001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.vis.audio; 031 032import gnu.trove.list.array.TFloatArrayList; 033 034import java.awt.Dimension; 035import java.util.ArrayList; 036 037import org.openimaj.audio.AudioFormat; 038import org.openimaj.audio.AudioStream; 039import org.openimaj.audio.SampleChunk; 040import org.openimaj.audio.processor.AudioProcessor; 041import org.openimaj.audio.samples.SampleBuffer; 042import org.openimaj.image.MBFImage; 043import org.openimaj.image.colour.RGBColour; 044import org.openimaj.image.renderer.MBFImageRenderer; 045import org.openimaj.image.typography.hershey.HersheyFont; 046import org.openimaj.math.geometry.point.Point2d; 047import org.openimaj.math.geometry.point.Point2dImpl; 048import org.openimaj.math.geometry.shape.Polygon; 049import org.openimaj.vis.DataUnitsTransformer; 050import org.openimaj.vis.VisualisationImpl; 051import org.openimaj.vis.timeline.TimelineObject; 052import org.openimaj.vis.timeline.TimelineObjectAdapter; 053 054/** 055 * Utilises an audio processor to plot the audio waveform to an image. This 056 * class is both a {@link VisualisationImpl} and a {@link TimelineObject}. This 057 * means that it can be used to plot a complete visualisation of the overview of 058 * the data or it can be used to plot temporal parts of the data into the 059 * visualisation window. 060 * <p> 061 * An internal class (AudioOverviewGenerator) can be used to generate overviews 062 * if necessary. 063 * <p> 064 * This class also extends {@link TimelineObjectAdapter} which allows an audio 065 * waveform to be put upon a timeline. 066 * 067 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 068 * 069 * @created 9 Jun 2011 070 */ 071public class AudioOverviewVisualisation extends VisualisationImpl<AudioStream> 072implements TimelineObject 073{ 074 /** */ 075 private static final long serialVersionUID = 1L; 076 077 /** 078 * Generates an audio overview. This is a lower-resolution version of the 079 * audio waveform. It takes the maximum value from a set of values and 080 * stores this as the overview. By default the processor takes the maximum 081 * value from every 5000 samples. The method 082 * {@link #getAudioOverview(int, int)} allows resampling of that overview. 083 * 084 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 085 * @created 21 Jul 2011 086 * 087 */ 088 public class AudioOverviewGenerator extends AudioProcessor 089 { 090 /** Number of bins in the overview */ 091 private int nSamplesPerBin = -1; 092 093 /** The maximum in the current bin for each channel */ 094 private float[] channelMax = null; 095 096 /** The number of samples so far in the current bin being processed */ 097 private int nSamplesInBin = 0; 098 099 /** The overview data */ 100 private TFloatArrayList[] audioOverview = null; 101 102 /** The number of channels in the audio data */ 103 private int nChannels = 0; 104 105 /** The audio format of the samples we're processing */ 106 private AudioFormat af = null; 107 108 /** 109 * Constructor 110 * 111 * @param nSamplesPerBin 112 * The number of samples per bin 113 * @param nChannels 114 * The number of channels 115 */ 116 public AudioOverviewGenerator(final int nSamplesPerBin, final int nChannels) 117 { 118 this.nSamplesPerBin = nSamplesPerBin; 119 this.nChannels = nChannels; 120 this.audioOverview = new TFloatArrayList[nChannels]; 121 this.channelMax = new float[nChannels]; 122 123 for (int i = 0; i < nChannels; i++) 124 this.audioOverview[i] = new TFloatArrayList(); 125 } 126 127 /** 128 * {@inheritDoc} 129 * 130 * @see org.openimaj.audio.processor.AudioProcessor#process(org.openimaj.audio.SampleChunk) 131 */ 132 @Override 133 public SampleChunk process(final SampleChunk samples) 134 { 135 // Store the format of the data 136 if (this.af == null) 137 this.af = samples.getFormat(); 138 139 // Get the sample data 140 final SampleBuffer b = samples.getSampleBuffer(); 141 142 // The number of samples (per channel) in this sample chunk 143 final int nSamples = b.size() / this.af.getNumChannels(); 144 145 // Keep a running total of how many samples we've processed 146 AudioOverviewVisualisation.this.numberOfProcessedSamples += nSamples; 147 148 for (int x = 0; x < nSamples; x++) 149 { 150 for (int c = 0; c < this.nChannels; c++) 151 { 152 // Store the maximum for the current bin 153 this.channelMax[c] = Math.max(this.channelMax[c], 154 b.get(x * this.nChannels + c)); 155 } 156 157 // If we're still within the bin 158 if (this.nSamplesInBin < this.nSamplesPerBin) 159 this.nSamplesInBin++; 160 else 161 { 162 // We've overflowed the bin 163 for (int c = 0; c < this.nChannels; c++) 164 { 165 // Store the current bin 166 this.audioOverview[c].add(this.channelMax[c]); 167 this.channelMax[c] = Integer.MIN_VALUE; 168 } 169 170 // Reset for the next bin 171 this.nSamplesInBin = 0; 172 } 173 } 174 175 return samples; 176 } 177 178 /** 179 * @return Get the overview data. 180 */ 181 public TFloatArrayList[] getAudioOverview() 182 { 183 return this.audioOverview; 184 } 185 186 /** 187 * Refactors the overview to given another overview. If the number of 188 * bins specified an overview that's finer than the actual overview the 189 * original overview is returned. The output of this function will then 190 * only return an array list of nBins or less. 191 * 192 * @param channel 193 * The channel to get 194 * @param nBins 195 * The number of bins in the overview 196 * @return A refactors overview 197 */ 198 public TFloatArrayList getAudioOverview(final int channel, final int nBins) 199 { 200 if (nBins >= this.audioOverview[channel].size()) 201 return this.audioOverview[channel]; 202 203 final TFloatArrayList ii = new TFloatArrayList(); 204 final double scalar = (double) this.audioOverview[channel].size() / (double) nBins; 205 for (int xx = 0; xx < nBins; xx++) 206 { 207 final int startBin = (int) (xx * scalar); 208 final int endBin = (int) ((xx + 1) * scalar); 209 float m = Integer.MIN_VALUE; 210 for (int yy = startBin; yy < endBin; yy++) 211 m = Math.max(m, this.audioOverview[channel].get(yy)); 212 ii.add(m); 213 } 214 return ii; 215 } 216 217 /** 218 * Returns a polygon representing the channel overview. 219 * 220 * @param channel 221 * The channel to get the polygon for 222 * @param mirror 223 * whether to mirror the polygon 224 * @param width 225 * The width of the overview to return 226 * @return A polygon 227 */ 228 public Polygon getChannelPolygon(final int channel, final boolean mirror, final int width) 229 { 230 final TFloatArrayList overview = this.getAudioOverview(channel, width); 231 final int len = overview.size(); 232 final double scalar = width / (double) len; 233 234 final ArrayList<Point2d> l = new ArrayList<Point2d>(); 235 for (int x = 0; x < len; x++) 236 l.add(new Point2dImpl((float) (x * scalar), overview.get(x))); 237 238 if (mirror) 239 { 240 for (int x = 1; x <= len; x++) 241 l.add(new Point2dImpl((float) ((len - x) * scalar), 242 -overview.get(len - x))); 243 } 244 245 // Store how long the given overview is in milliseconds 246 AudioOverviewVisualisation.this.millisecondsInView = (long) (AudioOverviewVisualisation.this.numberOfProcessedSamples / 247 this.af.getSampleRateKHz()); 248 249 return new Polygon(l); 250 } 251 } 252 253 /** 254 * The calculation of how many milliseconds are in the last generated view 255 * at the resampled overview. 256 */ 257 public long millisecondsInView = 0; 258 259 /** The number of samples that were originally read in from the data */ 260 public long numberOfProcessedSamples = 0; 261 262 /** The start time in milliseconds */ 263 private long start = 0; 264 265 /** The length of the audio data */ 266 private long length = 1000; 267 268 /** The overview generator */ 269 private AudioOverviewGenerator aap = null; 270 271 /** Number of samples per pixel */ 272 private int nSamplesPerPixel = 500; 273 274 /** Whether the generation is complete */ 275 private boolean generationComplete = false; 276 277 /** 278 * Default constructor 279 * 280 * @param as 281 * The audio data to plot 282 */ 283 public AudioOverviewVisualisation(final AudioStream as) 284 { 285 super(640, 480); 286 287 this.data = as; 288 this.length = this.data.getLength(); 289 290 // How many pixels we'll overview per pixel 291 this.nSamplesPerPixel = 500; 292 // TODO: This is currently fixed-size but should be based on audio 293 // length 294 295 // Generate the audio overview 296 this.aap = new AudioOverviewGenerator( 297 this.nSamplesPerPixel, this.data.getFormat().getNumChannels()); 298 299 new Thread(new Runnable() 300 { 301 @Override 302 public void run() 303 { 304 try 305 { 306 synchronized (AudioOverviewVisualisation.this.aap) 307 { 308 AudioOverviewVisualisation.this.aap.process(AudioOverviewVisualisation.this.data); 309 AudioOverviewVisualisation.this.generationComplete = true; 310 AudioOverviewVisualisation.this.aap.notifyAll(); 311 } 312 } 313 catch (final Exception e) 314 { 315 e.printStackTrace(); 316 AudioOverviewVisualisation.this.aap = null; 317 } 318 } 319 }).start(); 320 321 this.setPreferredSize(new Dimension(-1, 100)); 322 } 323 324 /** 325 * Generates a waveform image that fits within the given width and height 326 * and drawn in the given colour. Note that the generated image is RGBA so 327 * that the colours need to be 4 dimensions and may stipulate transparency. 328 * 329 * @param a 330 * The audio to draw 331 * @param w 332 * The width of the image to return 333 * @param h 334 * The height of the image to return 335 * @param backgroundColour 336 * The background colour to draw on the image 337 * @param colour 338 * The colour in which to draw the audio waveform. 339 * @return The input image. 340 */ 341 public static MBFImage getAudioWaveformImage(final AudioStream a, 342 final int w, final int h, final Float[] backgroundColour, 343 final Float[] colour) 344 { 345 return new AudioOverviewVisualisation(a).plotAudioWaveformImage( 346 w, h, backgroundColour, colour); 347 } 348 349 /** 350 * Generates a waveform image that fits within the given width and height 351 * and drawn in the given colour. Note that the generated image is RGBA so 352 * that the colours need to be 4 dimensions and may stipulate transparency. 353 * <p> 354 * If you require information about the plot afterwards you can check the 355 * fields that are stored within this instance. 356 * 357 * @param w 358 * The width of the image to return 359 * @param h 360 * The height of the image to return 361 * @param backgroundColour 362 * The background colour to draw on the image 363 * @param colour 364 * The colour in which to draw the audio waveform. 365 * @return The input image. 366 */ 367 public MBFImage plotAudioWaveformImage( 368 final int w, final int h, final Float[] backgroundColour, 369 final Float[] colour) 370 { 371 // Check if the overview's been generated, if not return empty image 372 if (this.aap == null) 373 { 374 this.visImage.drawText("Processing...", 20, 20, HersheyFont.TIMES_BOLD, 12, RGBColour.WHITE); 375 return this.visImage; 376 } 377 378 // If the generation isn't complete (and aap is not null) it means 379 // we're processing the overview. Wait until it's finished. 380 while (!this.generationComplete) 381 { 382 synchronized (this.aap) 383 { 384 try 385 { 386 this.aap.wait(); 387 } catch (final InterruptedException e) 388 { 389 e.printStackTrace(); 390 } 391 } 392 } 393 394 // Work out how high each channel will be 395 final double channelSize = h / (double) this.data.getFormat().getNumChannels(); 396 397 // This is the scalar from audio amplitude to pixels 398 final double ampScalar = channelSize / Integer.MAX_VALUE; 399 400 // Create the image we're going to draw on to - RGBA 401 // final MBFImage m = new MBFImage( w, h, 4 ); 402 final MBFImageRenderer renderer = this.visImage.createRenderer(); 403 this.visImage.fill(backgroundColour); 404 405 try 406 { 407 // Draw the polygon onto the image 408 final float ww = 1; 409 for (int i = 0; i < this.data.getFormat().getNumChannels(); i++) 410 { 411 final Polygon p = this.aap.getChannelPolygon(i, true, w); 412 p.scaleXY(ww, (float) -ampScalar / 2f); 413 p.translate(0f, (float) (-p.minY() + channelSize * i)); 414 renderer.drawPolygonFilled(p, colour); 415 } 416 } catch (final Exception e) 417 { 418 System.err.println("WARNING: Could not process audio " + 419 "to generate the audio overview."); 420 e.printStackTrace(); 421 } 422 423 return this.visImage; 424 } 425 426 /** 427 * Returns the length of the audio data in milliseconds. Only returns the 428 * correct value after processing. Until then, it will return 1 second. 429 * 430 * @return Length of the audio data. 431 */ 432 public long getLength() 433 { 434 return this.length; 435 } 436 437 /** 438 * {@inheritDoc} 439 * 440 * @see org.openimaj.vis.timeline.TimelineObjectAdapter#getStartTimeMilliseconds() 441 */ 442 @Override 443 public long getStartTimeMilliseconds() 444 { 445 return this.start; 446 } 447 448 /** 449 * {@inheritDoc} 450 * 451 * @see org.openimaj.vis.timeline.TimelineObjectAdapter#getEndTimeMilliseconds() 452 */ 453 @Override 454 public long getEndTimeMilliseconds() 455 { 456 return this.start + this.getLength(); 457 } 458 459 /** 460 * {@inheritDoc} 461 * 462 * @see org.openimaj.vis.VisualisationImpl#update() 463 */ 464 @Override 465 public void update() 466 { 467 if (this.visImage == null) 468 this.plotAudioWaveformImage( 469 this.visImage.getWidth(), this.visImage.getHeight(), 470 new Float[] { 1f, 1f, 0f, 1f }, new Float[] { 0f, 0f, 0f, 1f }); 471 } 472 473 /** 474 * {@inheritDoc} 475 * 476 * @see org.openimaj.vis.timeline.TimelineObject#setStartTimeMilliseconds(long) 477 */ 478 @Override 479 public void setStartTimeMilliseconds(final long l) 480 { 481 this.start = l; 482 } 483 484 /** 485 * {@inheritDoc} 486 * 487 * @see org.openimaj.vis.timeline.TimelineObject#setDataPixelTransformer(org.openimaj.vis.DataUnitsTransformer) 488 */ 489 @Override 490 public void setDataPixelTransformer(final DataUnitsTransformer<Float[], double[], int[]> dpt) 491 { 492 } 493}