1 /**
2 * Copyright (c) 2011, The University of Southampton and the individual contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * * Neither the name of the University of Southampton nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 /**
31 *
32 */
33 package org.openimaj.audio.analysis;
34
35 import org.openimaj.audio.AudioFormat;
36 import org.openimaj.audio.AudioStream;
37 import org.openimaj.audio.SampleChunk;
38 import org.openimaj.audio.processor.AudioProcessor;
39 import org.openimaj.audio.samples.SampleBuffer;
40 import org.openimaj.audio.samples.SampleBufferFactory;
41
42 import edu.emory.mathcs.jtransforms.fft.FloatFFT_1D;
43
44 /**
45 * Perform an FFT on an audio signal. An FFT will be calculated for every
46 * channel in the audio separately. Use {@link #getLastFFT()} to get the
47 * last generated frequency domain calculation.
48 * <p>
49 * The class also includes an inverse transform function that takes a
50 * frequency domain array (such as that delivered by {@link #getLastFFT()})
51 * and returns a {@link SampleChunk}. The format of the output sample chunk
52 * is determined by the given audio format.
53 *
54 * @author David Dupplaw (dpd@ecs.soton.ac.uk)
55 * @created 28 Oct 2011
56 */
57 public class FourierTransform extends AudioProcessor
58 {
59 /** The last generated FFT */
60 private float[][] lastFFT = null;
61
62 /** The scaling factor to apply prior to the FFT */
63 private float scalingFactor = 1;
64
65 /** Whether to pad the input to the next power of 2 */
66 private boolean padToNextPowerOf2 = true;
67
68 /** Whether to divide the real return parts by the size of the input */
69 private final boolean normalise = true;
70
71 /**
72 * Default constructor for ad-hoc processing.
73 */
74 public FourierTransform()
75 {
76 }
77
78 /**
79 * Constructor for chaining.
80 * @param as The stream to chain to
81 */
82 public FourierTransform( final AudioStream as )
83 {
84 super( as );
85 }
86
87 /**
88 * {@inheritDoc}
89 * @see org.openimaj.audio.processor.AudioProcessor#process(org.openimaj.audio.SampleChunk)
90 */
91 @Override
92 public SampleChunk process( final SampleChunk sample )
93 {
94 // Get a sample buffer object for this data
95 final SampleBuffer sb = sample.getSampleBuffer();
96 return this.process( sb ).getSampleChunk();
97 }
98
99 /**
100 * Process the given sample buffer
101 * @param sb The sample buffer
102 * @return The sample buffer
103 */
104 public SampleBuffer process( final SampleBuffer sb )
105 {
106 // The number of channels we need to process
107 final int nChannels = sb.getFormat().getNumChannels();
108
109 // Number of samples we'll need to process for each channel
110 final int nSamplesPerChannel = sb.size() / nChannels;
111
112 // The size of the FFT to generate
113 final int sizeOfFFT = this.padToNextPowerOf2 ?
114 this.nextPowerOf2( nSamplesPerChannel ) : nSamplesPerChannel;
115
116 // The Fourier transformer we're going to use
117 final FloatFFT_1D fft = new FloatFFT_1D( nSamplesPerChannel );
118
119 // Creates an FFT for each of the channels in turn
120 this.lastFFT = new float[nChannels][];
121 for( int c = 0; c < nChannels; c++ )
122 {
123 // Twice the length to account for imaginary parts
124 this.lastFFT[c] = new float[ sizeOfFFT*2 ];
125
126 // Fill the array
127 for( int x = 0; x < nSamplesPerChannel; x++ )
128 this.lastFFT[c][x*2] = sb.get( x*nChannels+c ) * this.scalingFactor;
129
130 // System.out.println( "FFT Input (channel "+c+"), length "+this.lastFFT[c].length+": " );
131 // System.out.println( Arrays.toString( this.lastFFT[c] ));
132
133 // Perform the FFT (using jTransforms)
134 fft.complexForward( this.lastFFT[c] );
135
136 if( this.normalise )
137 this.normaliseReals( sizeOfFFT );
138
139 // System.out.println( "FFT Output (channel "+c+"): " );
140 // System.out.println( Arrays.toString( this.lastFFT[c] ));
141 }
142
143 return sb;
144 }
145
146 /**
147 * Divides the real parts of the last FFT by the given size
148 * @param size the divisor
149 */
150 private void normaliseReals( final int size )
151 {
152 for( int c = 0; c < this.lastFFT.length; c++ )
153 for( int i = 0; i < this.lastFFT[c].length; i +=2 )
154 this.lastFFT[c][i] /= size;
155 }
156
157 /**
158 * Returns the next power of 2 superior to n.
159 * @param n The value to find the next power of 2 above
160 * @return The next power of 2
161 */
162 private int nextPowerOf2( final int n )
163 {
164 return (int)Math.pow( 2, 32 - Integer.numberOfLeadingZeros(n - 1) );
165 }
166
167 /**
168 * Given some transformed audio data, will convert it back into
169 * a sample chunk. The number of channels given audio format
170 * must match the data that is provided in the transformedData array.
171 *
172 * @param format The required format for the output
173 * @param transformedData The frequency domain data
174 * @return A {@link SampleChunk}
175 */
176 static public SampleChunk inverseTransform( final AudioFormat format,
177 final float[][] transformedData )
178 {
179 // Check the data has something in it.
180 if( transformedData == null || transformedData.length == 0 )
181 throw new IllegalArgumentException( "No data in data chunk" );
182
183 // Check that the transformed data has the same number of channels
184 // as the data we've been given.
185 if( transformedData.length != format.getNumChannels() )
186 throw new IllegalArgumentException( "Number of channels in audio " +
187 "format does not match given data." );
188
189 // The number of channels
190 final int nChannels = transformedData.length;
191
192 // The Fourier transformer we're going to use
193 final FloatFFT_1D fft = new FloatFFT_1D( transformedData[0].length/2 );
194
195 // Create a sample buffer to put the time domain data into
196 final SampleBuffer sb = SampleBufferFactory.createSampleBuffer( format,
197 transformedData[0].length/2 * nChannels );
198
199 // Perform the inverse on each channel
200 for( int channel = 0; channel < transformedData.length; channel++ )
201 {
202 // Convert frequency domain back to time domain
203 fft.complexInverse( transformedData[channel], true );
204
205 // Set the data in the buffer
206 for( int x = 0; x < transformedData[channel].length/2; x++ )
207 sb.set( x*nChannels+channel, transformedData[channel][x] );
208 }
209
210 // Return a new sample chunk
211 return sb.getSampleChunk();
212 }
213
214 /**
215 * Get the last processed FFT frequency data.
216 * @return The fft of the last processed window
217 */
218 public float[][] getLastFFT()
219 {
220 return this.lastFFT;
221 }
222
223 /**
224 * Returns the magnitudes of the last FFT data. The length of the
225 * returned array of magnitudes will be half the length of the FFT data
226 * (up to the Nyquist frequency).
227 *
228 * @return The magnitudes of the last FFT data.
229 */
230 public float[][] getMagnitudes()
231 {
232 final float[][] mags = new float[this.lastFFT.length][];
233 for( int c = 0; c < this.lastFFT.length; c++ )
234 {
235 mags[c] = new float[ this.lastFFT[c].length/4 ];
236 for( int i = 0; i < this.lastFFT[c].length/4; i++ )
237 {
238 final float re = this.lastFFT[c][i*2];
239 final float im = this.lastFFT[c][i*2+1];
240 mags[c][i] = (float)Math.sqrt( re*re + im*im );
241 }
242 }
243
244 return mags;
245 }
246
247 /**
248 * Returns the power magnitudes of the last FFT data. The length of the
249 * returned array of magnitudes will be half the length of the FFT data
250 * (up to the Nyquist frequency). The power is calculated using:
251 * <p><code>10log10( real^2 + imaginary^2 )</code></p>
252 *
253 * @return The magnitudes of the last FFT data.
254 */
255 public float[][] getPowerMagnitudes()
256 {
257 final float[][] mags = new float[this.lastFFT.length][];
258 for( int c = 0; c < this.lastFFT.length; c++ )
259 {
260 mags[c] = new float[ this.lastFFT[c].length/4 ];
261 for( int i = 0; i < this.lastFFT[c].length/4; i++ )
262 {
263 final float re = this.lastFFT[c][i*2];
264 final float im = this.lastFFT[c][i*2+1];
265 mags[c][i] = 10f * (float)Math.log10( re*re + im*im );
266 }
267 }
268
269 return mags;
270 }
271
272 /**
273 * Scales the real and imaginary parts by the scalar prior to
274 * calculating the (square) magnitude for normalising the outputs.
275 * Returns only those values up to the Nyquist frequency.
276 *
277 * @param scalar The scalar
278 * @return Normalised magnitudes.
279 */
280 public float[][] getNormalisedMagnitudes( final float scalar )
281 {
282 final float[][] mags = new float[this.lastFFT.length][];
283 for( int c = 0; c < this.lastFFT.length; c++ )
284 {
285 mags[c] = new float[ this.lastFFT[c].length/4 ];
286 for( int i = 0; i < this.lastFFT[c].length/4; i++ )
287 {
288 final float re = this.lastFFT[c][i*2] * scalar;
289 final float im = this.lastFFT[c][i*2+1] * scalar;
290 mags[c][i] = re*re + im*im;
291 }
292 }
293
294 return mags;
295 }
296
297 /**
298 * Returns just the real numbers from the last FFT. The result will include
299 * the symmetrical part.
300 * @return The real numbers
301 */
302 public float[][] getReals()
303 {
304 final float[][] reals = new float[this.lastFFT.length][];
305 for( int c = 0; c < this.lastFFT.length; c++ )
306 {
307 reals[c] = new float[ this.lastFFT[c].length/2 ];
308 for( int i = 0; i < this.lastFFT[c].length/2; i++ )
309 reals[c][i] = this.lastFFT[c][i*2];
310 }
311
312 return reals;
313 }
314
315 /**
316 * Get the scaling factor in use.
317 * @return The scaling factor.
318 */
319 public float getScalingFactor()
320 {
321 return this.scalingFactor;
322 }
323
324 /**
325 * Set the scaling factor to use. This factor will be applied to signal
326 * data prior to performing the FFT. The default is, of course, 1.
327 * @param scalingFactor The scaling factor to use.
328 */
329 public void setScalingFactor( final float scalingFactor )
330 {
331 this.scalingFactor = scalingFactor;
332 }
333
334 /**
335 * Returns whether the input will be padded to be the length
336 * of the next higher power of 2.
337 * @return TRUE if the input will be padded, FALSE otherwise.
338 */
339 public boolean isPadToNextPowerOf2()
340 {
341 return this.padToNextPowerOf2;
342 }
343
344 /**
345 * Set whether to pad the input to the next power of 2.
346 * @param padToNextPowerOf2 TRUE to pad the input, FALSE otherwise
347 */
348 public void setPadToNextPowerOf2( final boolean padToNextPowerOf2 )
349 {
350 this.padToNextPowerOf2 = padToNextPowerOf2;
351 }
352 }