001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 *
032 */
033package org.openimaj.audio.analysis;
034
035import org.openimaj.audio.AudioFormat;
036import org.openimaj.audio.AudioStream;
037import org.openimaj.audio.SampleChunk;
038import org.openimaj.audio.processor.AudioProcessor;
039import org.openimaj.audio.samples.SampleBuffer;
040import org.openimaj.audio.samples.SampleBufferFactory;
041
042import edu.emory.mathcs.jtransforms.fft.FloatFFT_1D;
043
044/**
045 *      Perform an FFT on an audio signal. An FFT will be calculated for every
046 *      channel in the audio separately. Use {@link #getLastFFT()} to get the
047 *      last generated frequency domain calculation.
048 *      <p>
049 *      The class also includes an inverse transform function that takes a
050 *      frequency domain array (such as that delivered by {@link #getLastFFT()})
051 *      and returns a {@link SampleChunk}. The format of the output sample chunk
052 *      is determined by the given audio format.
053 *
054 *  @author David Dupplaw (dpd@ecs.soton.ac.uk)
055 *      @created 28 Oct 2011
056 */
057public class FourierTransform extends AudioProcessor
058{
059        /** The last generated FFT */
060        private float[][] lastFFT = null;
061
062        /** The scaling factor to apply prior to the FFT */
063        private float scalingFactor = 1;
064
065        /** Whether to pad the input to the next power of 2 */
066        private boolean padToNextPowerOf2 = true;
067
068        /** Whether to divide the real return parts by the size of the input */
069        private final boolean normalise = true;
070
071        /**
072         *      Default constructor for ad-hoc processing.
073         */
074        public FourierTransform()
075        {
076        }
077
078        /**
079         *      Constructor for chaining.
080         *      @param as The stream to chain to
081         */
082        public FourierTransform( final AudioStream as )
083        {
084                super( as );
085        }
086
087        /**
088         *      {@inheritDoc}
089         *      @see org.openimaj.audio.processor.AudioProcessor#process(org.openimaj.audio.SampleChunk)
090         */
091        @Override
092    public SampleChunk process( final SampleChunk sample )
093    {
094                // Get a sample buffer object for this data
095                final SampleBuffer sb = sample.getSampleBuffer();
096                return this.process( sb ).getSampleChunk();
097    }
098
099        /**
100         *      Process the given sample buffer
101         *      @param sb The sample buffer
102         *      @return The sample buffer
103         */
104        public SampleBuffer process( final SampleBuffer sb )
105        {
106                // The number of channels we need to process
107                final int nChannels = sb.getFormat().getNumChannels();
108
109                // Number of samples we'll need to process for each channel
110                final int nSamplesPerChannel = sb.size() / nChannels;
111
112                // The size of the FFT to generate
113                final int sizeOfFFT = this.padToNextPowerOf2 ?
114                                this.nextPowerOf2( nSamplesPerChannel ) : nSamplesPerChannel;
115
116                // The Fourier transformer we're going to use
117                final FloatFFT_1D fft = new FloatFFT_1D( nSamplesPerChannel );
118
119                // Creates an FFT for each of the channels in turn
120                this.lastFFT = new float[nChannels][];
121                for( int c = 0; c < nChannels; c++ )
122                {
123                        // Twice the length to account for imaginary parts
124                        this.lastFFT[c] = new float[ sizeOfFFT*2 ];
125
126                        // Fill the array
127                        for( int x = 0; x < nSamplesPerChannel; x++ )
128                                this.lastFFT[c][x*2] = sb.get( x*nChannels+c ) * this.scalingFactor;
129
130//                      System.out.println( "FFT Input (channel "+c+"), length "+this.lastFFT[c].length+": " );
131//                      System.out.println( Arrays.toString( this.lastFFT[c] ));
132
133                        // Perform the FFT (using jTransforms)
134                        fft.complexForward( this.lastFFT[c] );
135
136                        if( this.normalise )
137                                this.normaliseReals( sizeOfFFT );
138
139//                      System.out.println( "FFT Output (channel "+c+"): " );
140//                      System.out.println( Arrays.toString( this.lastFFT[c] ));
141                }
142
143            return sb;
144    }
145
146        /**
147         *      Divides the real parts of the last FFT by the given size
148         *      @param size the divisor
149         */
150        private void normaliseReals( final int size )
151        {
152                for( int c = 0; c < this.lastFFT.length; c++ )
153                        for( int i = 0; i < this.lastFFT[c].length; i +=2 )
154                                this.lastFFT[c][i] /= size;
155        }
156
157        /**
158         *      Returns the next power of 2 superior to n.
159         *      @param n The value to find the next power of 2 above
160         *      @return The next power of 2
161         */
162        private int nextPowerOf2( final int n )
163        {
164                return (int)Math.pow( 2, 32 - Integer.numberOfLeadingZeros(n - 1) );
165        }
166
167        /**
168         *      Given some transformed audio data, will convert it back into
169         *      a sample chunk. The number of channels given audio format
170         *      must match the data that is provided in the transformedData array.
171         *
172         *      @param format The required format for the output
173         *      @param transformedData The frequency domain data
174         *      @return A {@link SampleChunk}
175         */
176        static public SampleChunk inverseTransform( final AudioFormat format,
177                        final float[][] transformedData )
178        {
179                // Check the data has something in it.
180                if( transformedData == null || transformedData.length == 0 )
181                        throw new IllegalArgumentException( "No data in data chunk" );
182
183                // Check that the transformed data has the same number of channels
184                // as the data we've been given.
185                if( transformedData.length != format.getNumChannels() )
186                        throw new IllegalArgumentException( "Number of channels in audio " +
187                                        "format does not match given data." );
188
189                // The number of channels
190                final int nChannels = transformedData.length;
191
192                // The Fourier transformer we're going to use
193                final FloatFFT_1D fft = new FloatFFT_1D( transformedData[0].length/2 );
194
195                // Create a sample buffer to put the time domain data into
196                final SampleBuffer sb = SampleBufferFactory.createSampleBuffer( format,
197                                transformedData[0].length/2 *   nChannels );
198
199                // Perform the inverse on each channel
200                for( int channel = 0; channel < transformedData.length; channel++ )
201                {
202                        // Convert frequency domain back to time domain
203                        fft.complexInverse( transformedData[channel], true );
204
205                        // Set the data in the buffer
206                        for( int x = 0; x < transformedData[channel].length/2; x++ )
207                                sb.set( x*nChannels+channel, transformedData[channel][x] );
208                }
209
210                // Return a new sample chunk
211                return sb.getSampleChunk();
212        }
213
214        /**
215         *      Get the last processed FFT frequency data.
216         *      @return The fft of the last processed window
217         */
218        public float[][] getLastFFT()
219        {
220                return this.lastFFT;
221        }
222
223        /**
224         *      Returns the magnitudes of the last FFT data. The length of the
225         *      returned array of magnitudes will be half the length of the FFT data
226         *      (up to the Nyquist frequency).
227         *
228         *      @return The magnitudes of the last FFT data.
229         */
230        public float[][] getMagnitudes()
231        {
232                final float[][] mags = new float[this.lastFFT.length][];
233                for( int c = 0; c < this.lastFFT.length; c++ )
234                {
235                        mags[c] = new float[ this.lastFFT[c].length/4 ];
236                        for( int i = 0; i < this.lastFFT[c].length/4; i++ )
237                        {
238                                final float re = this.lastFFT[c][i*2];
239                                final float im = this.lastFFT[c][i*2+1];
240                                mags[c][i] = (float)Math.sqrt( re*re + im*im );
241                        }
242                }
243
244                return mags;
245        }
246
247        /**
248         *      Returns the power magnitudes of the last FFT data. The length of the
249         *      returned array of magnitudes will be half the length of the FFT data
250         *      (up to the Nyquist frequency). The power is calculated using:
251         *      <p><code>10log10( real^2 + imaginary^2 )</code></p>
252         *
253         *      @return The magnitudes of the last FFT data.
254         */
255        public float[][] getPowerMagnitudes()
256        {
257                final float[][] mags = new float[this.lastFFT.length][];
258                for( int c = 0; c < this.lastFFT.length; c++ )
259                {
260                        mags[c] = new float[ this.lastFFT[c].length/4 ];
261                        for( int i = 0; i < this.lastFFT[c].length/4; i++ )
262                        {
263                                final float re = this.lastFFT[c][i*2];
264                                final float im = this.lastFFT[c][i*2+1];
265                                mags[c][i] = 10f * (float)Math.log10( re*re + im*im );
266                        }
267                }
268
269                return mags;
270        }
271
272        /**
273         *      Scales the real and imaginary parts by the scalar prior to
274         *      calculating the (square) magnitude for normalising the outputs.
275         *      Returns only those values up to the Nyquist frequency.
276         *
277         *      @param scalar The scalar
278         *      @return Normalised magnitudes.
279         */
280        public float[][] getNormalisedMagnitudes( final float scalar )
281        {
282                final float[][] mags = new float[this.lastFFT.length][];
283                for( int c = 0; c < this.lastFFT.length; c++ )
284                {
285                        mags[c] = new float[ this.lastFFT[c].length/4 ];
286                        for( int i = 0; i < this.lastFFT[c].length/4; i++ )
287                        {
288                                final float re = this.lastFFT[c][i*2] * scalar;
289                                final float im = this.lastFFT[c][i*2+1] * scalar;
290                                mags[c][i] = re*re + im*im;
291                        }
292                }
293
294                return mags;
295        }
296
297        /**
298         *      Returns just the real numbers from the last FFT. The result will include
299         *      the symmetrical part.
300         *      @return The real numbers
301         */
302        public float[][] getReals()
303        {
304                final float[][] reals = new float[this.lastFFT.length][];
305                for( int c = 0; c < this.lastFFT.length; c++ )
306                {
307                        reals[c] = new float[ this.lastFFT[c].length/2 ];
308                        for( int i = 0; i < this.lastFFT[c].length/2; i++ )
309                                reals[c][i] = this.lastFFT[c][i*2];
310                }
311
312                return reals;
313        }
314
315        /**
316         *      Get the scaling factor in use.
317         *      @return The scaling factor.
318         */
319        public float getScalingFactor()
320        {
321                return this.scalingFactor;
322        }
323
324        /**
325         *      Set the scaling factor to use. This factor will be applied to signal
326         *      data prior to performing the FFT. The default is, of course, 1.
327         *      @param scalingFactor The scaling factor to use.
328         */
329        public void setScalingFactor( final float scalingFactor )
330        {
331                this.scalingFactor = scalingFactor;
332        }
333
334        /**
335         *      Returns whether the input will be padded to be the length
336         *      of the next higher power of 2.
337         *      @return TRUE if the input will be padded, FALSE otherwise.
338         */
339        public boolean isPadToNextPowerOf2()
340        {
341                return this.padToNextPowerOf2;
342        }
343
344        /**
345         *      Set whether to pad the input to the next power of 2.
346         *      @param padToNextPowerOf2 TRUE to pad the input, FALSE otherwise
347         */
348        public void setPadToNextPowerOf2( final boolean padToNextPowerOf2 )
349        {
350                this.padToNextPowerOf2 = padToNextPowerOf2;
351        }
352}