001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.vis.audio;
031
032import gnu.trove.list.array.TFloatArrayList;
033
034import java.awt.Dimension;
035import java.util.ArrayList;
036
037import org.openimaj.audio.AudioFormat;
038import org.openimaj.audio.AudioStream;
039import org.openimaj.audio.SampleChunk;
040import org.openimaj.audio.processor.AudioProcessor;
041import org.openimaj.audio.samples.SampleBuffer;
042import org.openimaj.image.MBFImage;
043import org.openimaj.image.colour.RGBColour;
044import org.openimaj.image.renderer.MBFImageRenderer;
045import org.openimaj.image.typography.hershey.HersheyFont;
046import org.openimaj.math.geometry.point.Point2d;
047import org.openimaj.math.geometry.point.Point2dImpl;
048import org.openimaj.math.geometry.shape.Polygon;
049import org.openimaj.vis.DataUnitsTransformer;
050import org.openimaj.vis.VisualisationImpl;
051import org.openimaj.vis.timeline.TimelineObject;
052import org.openimaj.vis.timeline.TimelineObjectAdapter;
053
054/**
055 * Utilises an audio processor to plot the audio waveform to an image. This
056 * class is both a {@link VisualisationImpl} and a {@link TimelineObject}. This
057 * means that it can be used to plot a complete visualisation of the overview of
058 * the data or it can be used to plot temporal parts of the data into the
059 * visualisation window.
060 * <p>
061 * An internal class (AudioOverviewGenerator) can be used to generate overviews
062 * if necessary.
063 * <p>
064 * This class also extends {@link TimelineObjectAdapter} which allows an audio
065 * waveform to be put upon a timeline.
066 *
067 * @author David Dupplaw (dpd@ecs.soton.ac.uk)
068 *
069 * @created 9 Jun 2011
070 */
071public class AudioOverviewVisualisation extends VisualisationImpl<AudioStream>
072implements TimelineObject
073{
074        /** */
075        private static final long serialVersionUID = 1L;
076
077        /**
078         * Generates an audio overview. This is a lower-resolution version of the
079         * audio waveform. It takes the maximum value from a set of values and
080         * stores this as the overview. By default the processor takes the maximum
081         * value from every 5000 samples. The method
082         * {@link #getAudioOverview(int, int)} allows resampling of that overview.
083         *
084         * @author David Dupplaw (dpd@ecs.soton.ac.uk)
085         * @created 21 Jul 2011
086         *
087         */
088        public class AudioOverviewGenerator extends AudioProcessor
089        {
090                /** Number of bins in the overview */
091                private int nSamplesPerBin = -1;
092
093                /** The maximum in the current bin for each channel */
094                private float[] channelMax = null;
095
096                /** The number of samples so far in the current bin being processed */
097                private int nSamplesInBin = 0;
098
099                /** The overview data */
100                private TFloatArrayList[] audioOverview = null;
101
102                /** The number of channels in the audio data */
103                private int nChannels = 0;
104
105                /** The audio format of the samples we're processing */
106                private AudioFormat af = null;
107
108                /**
109                 * Constructor
110                 *
111                 * @param nSamplesPerBin
112                 *            The number of samples per bin
113                 * @param nChannels
114                 *            The number of channels
115                 */
116                public AudioOverviewGenerator(final int nSamplesPerBin, final int nChannels)
117                {
118                        this.nSamplesPerBin = nSamplesPerBin;
119                        this.nChannels = nChannels;
120                        this.audioOverview = new TFloatArrayList[nChannels];
121                        this.channelMax = new float[nChannels];
122
123                        for (int i = 0; i < nChannels; i++)
124                                this.audioOverview[i] = new TFloatArrayList();
125                }
126
127                /**
128                 * {@inheritDoc}
129                 *
130                 * @see org.openimaj.audio.processor.AudioProcessor#process(org.openimaj.audio.SampleChunk)
131                 */
132                @Override
133                public SampleChunk process(final SampleChunk samples)
134                {
135                        // Store the format of the data
136                        if (this.af == null)
137                                this.af = samples.getFormat();
138
139                        // Get the sample data
140                        final SampleBuffer b = samples.getSampleBuffer();
141
142                        // The number of samples (per channel) in this sample chunk
143                        final int nSamples = b.size() / this.af.getNumChannels();
144
145                        // Keep a running total of how many samples we've processed
146                        AudioOverviewVisualisation.this.numberOfProcessedSamples += nSamples;
147
148                        for (int x = 0; x < nSamples; x++)
149                        {
150                                for (int c = 0; c < this.nChannels; c++)
151                                {
152                                        // Store the maximum for the current bin
153                                        this.channelMax[c] = Math.max(this.channelMax[c],
154                                                        b.get(x * this.nChannels + c));
155                                }
156
157                                // If we're still within the bin
158                                if (this.nSamplesInBin < this.nSamplesPerBin)
159                                        this.nSamplesInBin++;
160                                else
161                                {
162                                        // We've overflowed the bin
163                                        for (int c = 0; c < this.nChannels; c++)
164                                        {
165                                                // Store the current bin
166                                                this.audioOverview[c].add(this.channelMax[c]);
167                                                this.channelMax[c] = Integer.MIN_VALUE;
168                                        }
169
170                                        // Reset for the next bin
171                                        this.nSamplesInBin = 0;
172                                }
173                        }
174
175                        return samples;
176                }
177
178                /**
179                 * @return Get the overview data.
180                 */
181                public TFloatArrayList[] getAudioOverview()
182                {
183                        return this.audioOverview;
184                }
185
186                /**
187                 * Refactors the overview to given another overview. If the number of
188                 * bins specified an overview that's finer than the actual overview the
189                 * original overview is returned. The output of this function will then
190                 * only return an array list of nBins or less.
191                 *
192                 * @param channel
193                 *            The channel to get
194                 * @param nBins
195                 *            The number of bins in the overview
196                 * @return A refactors overview
197                 */
198                public TFloatArrayList getAudioOverview(final int channel, final int nBins)
199                {
200                        if (nBins >= this.audioOverview[channel].size())
201                                return this.audioOverview[channel];
202
203                        final TFloatArrayList ii = new TFloatArrayList();
204                        final double scalar = (double) this.audioOverview[channel].size() / (double) nBins;
205                        for (int xx = 0; xx < nBins; xx++)
206                        {
207                                final int startBin = (int) (xx * scalar);
208                                final int endBin = (int) ((xx + 1) * scalar);
209                                float m = Integer.MIN_VALUE;
210                                for (int yy = startBin; yy < endBin; yy++)
211                                        m = Math.max(m, this.audioOverview[channel].get(yy));
212                                ii.add(m);
213                        }
214                        return ii;
215                }
216
217                /**
218                 * Returns a polygon representing the channel overview.
219                 *
220                 * @param channel
221                 *            The channel to get the polygon for
222                 * @param mirror
223                 *            whether to mirror the polygon
224                 * @param width
225                 *            The width of the overview to return
226                 * @return A polygon
227                 */
228                public Polygon getChannelPolygon(final int channel, final boolean mirror, final int width)
229                {
230                        final TFloatArrayList overview = this.getAudioOverview(channel, width);
231                        final int len = overview.size();
232                        final double scalar = width / (double) len;
233
234                        final ArrayList<Point2d> l = new ArrayList<Point2d>();
235                        for (int x = 0; x < len; x++)
236                                l.add(new Point2dImpl((float) (x * scalar), overview.get(x)));
237
238                        if (mirror)
239                        {
240                                for (int x = 1; x <= len; x++)
241                                        l.add(new Point2dImpl((float) ((len - x) * scalar),
242                                                        -overview.get(len - x)));
243                        }
244
245                        // Store how long the given overview is in milliseconds
246                        AudioOverviewVisualisation.this.millisecondsInView = (long) (AudioOverviewVisualisation.this.numberOfProcessedSamples /
247                                        this.af.getSampleRateKHz());
248
249                        return new Polygon(l);
250                }
251        }
252
253        /**
254         * The calculation of how many milliseconds are in the last generated view
255         * at the resampled overview.
256         */
257        public long millisecondsInView = 0;
258
259        /** The number of samples that were originally read in from the data */
260        public long numberOfProcessedSamples = 0;
261
262        /** The start time in milliseconds */
263        private long start = 0;
264
265        /** The length of the audio data */
266        private long length = 1000;
267
268        /** The overview generator */
269        private AudioOverviewGenerator aap = null;
270
271        /** Number of samples per pixel */
272        private int nSamplesPerPixel = 500;
273
274        /** Whether the generation is complete */
275        private boolean generationComplete = false;
276
277        /**
278         * Default constructor
279         *
280         * @param as
281         *            The audio data to plot
282         */
283        public AudioOverviewVisualisation(final AudioStream as)
284        {
285                super(640, 480);
286
287                this.data = as;
288                this.length = this.data.getLength();
289
290                // How many pixels we'll overview per pixel
291                this.nSamplesPerPixel = 500;
292                // TODO: This is currently fixed-size but should be based on audio
293                // length
294
295                // Generate the audio overview
296                this.aap = new AudioOverviewGenerator(
297                                this.nSamplesPerPixel, this.data.getFormat().getNumChannels());
298
299                new Thread(new Runnable()
300                {
301                        @Override
302                        public void run()
303                        {
304                                try
305                                {
306                                        synchronized (AudioOverviewVisualisation.this.aap)
307                                        {
308                                                AudioOverviewVisualisation.this.aap.process(AudioOverviewVisualisation.this.data);
309                                                AudioOverviewVisualisation.this.generationComplete = true;
310                                                AudioOverviewVisualisation.this.aap.notifyAll();
311                                        }
312                                }
313                                catch (final Exception e)
314                                {
315                                        e.printStackTrace();
316                                        AudioOverviewVisualisation.this.aap = null;
317                                }
318                        }
319                }).start();
320
321                this.setPreferredSize(new Dimension(-1, 100));
322        }
323
324        /**
325         * Generates a waveform image that fits within the given width and height
326         * and drawn in the given colour. Note that the generated image is RGBA so
327         * that the colours need to be 4 dimensions and may stipulate transparency.
328         *
329         * @param a
330         *            The audio to draw
331         * @param w
332         *            The width of the image to return
333         * @param h
334         *            The height of the image to return
335         * @param backgroundColour
336         *            The background colour to draw on the image
337         * @param colour
338         *            The colour in which to draw the audio waveform.
339         * @return The input image.
340         */
341        public static MBFImage getAudioWaveformImage(final AudioStream a,
342                        final int w, final int h, final Float[] backgroundColour,
343                        final Float[] colour)
344        {
345                return new AudioOverviewVisualisation(a).plotAudioWaveformImage(
346                                w, h, backgroundColour, colour);
347        }
348
349        /**
350         * Generates a waveform image that fits within the given width and height
351         * and drawn in the given colour. Note that the generated image is RGBA so
352         * that the colours need to be 4 dimensions and may stipulate transparency.
353         * <p>
354         * If you require information about the plot afterwards you can check the
355         * fields that are stored within this instance.
356         *
357         * @param w
358         *            The width of the image to return
359         * @param h
360         *            The height of the image to return
361         * @param backgroundColour
362         *            The background colour to draw on the image
363         * @param colour
364         *            The colour in which to draw the audio waveform.
365         * @return The input image.
366         */
367        public MBFImage plotAudioWaveformImage(
368                        final int w, final int h, final Float[] backgroundColour,
369                        final Float[] colour)
370        {
371                // Check if the overview's been generated, if not return empty image
372                if (this.aap == null)
373                {
374                        this.visImage.drawText("Processing...", 20, 20, HersheyFont.TIMES_BOLD, 12, RGBColour.WHITE);
375                        return this.visImage;
376                }
377
378                // If the generation isn't complete (and aap is not null) it means
379                // we're processing the overview. Wait until it's finished.
380                while (!this.generationComplete)
381                {
382                        synchronized (this.aap)
383                        {
384                                try
385                                {
386                                        this.aap.wait();
387                                } catch (final InterruptedException e)
388                                {
389                                        e.printStackTrace();
390                                }
391                        }
392                }
393
394                // Work out how high each channel will be
395                final double channelSize = h / (double) this.data.getFormat().getNumChannels();
396
397                // This is the scalar from audio amplitude to pixels
398                final double ampScalar = channelSize / Integer.MAX_VALUE;
399
400                // Create the image we're going to draw on to - RGBA
401                // final MBFImage m = new MBFImage( w, h, 4 );
402                final MBFImageRenderer renderer = this.visImage.createRenderer();
403                this.visImage.fill(backgroundColour);
404
405                try
406                {
407                        // Draw the polygon onto the image
408                        final float ww = 1;
409                        for (int i = 0; i < this.data.getFormat().getNumChannels(); i++)
410                        {
411                                final Polygon p = this.aap.getChannelPolygon(i, true, w);
412                                p.scaleXY(ww, (float) -ampScalar / 2f);
413                                p.translate(0f, (float) (-p.minY() + channelSize * i));
414                                renderer.drawPolygonFilled(p, colour);
415                        }
416                } catch (final Exception e)
417                {
418                        System.err.println("WARNING: Could not process audio " +
419                                        "to generate the audio overview.");
420                        e.printStackTrace();
421                }
422
423                return this.visImage;
424        }
425
426        /**
427         * Returns the length of the audio data in milliseconds. Only returns the
428         * correct value after processing. Until then, it will return 1 second.
429         *
430         * @return Length of the audio data.
431         */
432        public long getLength()
433        {
434                return this.length;
435        }
436
437        /**
438         * {@inheritDoc}
439         *
440         * @see org.openimaj.vis.timeline.TimelineObjectAdapter#getStartTimeMilliseconds()
441         */
442        @Override
443        public long getStartTimeMilliseconds()
444        {
445                return this.start;
446        }
447
448        /**
449         * {@inheritDoc}
450         *
451         * @see org.openimaj.vis.timeline.TimelineObjectAdapter#getEndTimeMilliseconds()
452         */
453        @Override
454        public long getEndTimeMilliseconds()
455        {
456                return this.start + this.getLength();
457        }
458
459        /**
460         * {@inheritDoc}
461         *
462         * @see org.openimaj.vis.VisualisationImpl#update()
463         */
464        @Override
465        public void update()
466        {
467                if (this.visImage == null)
468                        this.plotAudioWaveformImage(
469                                        this.visImage.getWidth(), this.visImage.getHeight(),
470                                        new Float[] { 1f, 1f, 0f, 1f }, new Float[] { 0f, 0f, 0f, 1f });
471        }
472
473        /**
474         * {@inheritDoc}
475         *
476         * @see org.openimaj.vis.timeline.TimelineObject#setStartTimeMilliseconds(long)
477         */
478        @Override
479        public void setStartTimeMilliseconds(final long l)
480        {
481                this.start = l;
482        }
483
484        /**
485         * {@inheritDoc}
486         *
487         * @see org.openimaj.vis.timeline.TimelineObject#setDataPixelTransformer(org.openimaj.vis.DataUnitsTransformer)
488         */
489        @Override
490        public void setDataPixelTransformer(final DataUnitsTransformer<Float[], double[], int[]> dpt)
491        {
492        }
493}