001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.audio.analysis; 034 035import org.openimaj.audio.AudioFormat; 036import org.openimaj.audio.AudioStream; 037import org.openimaj.audio.SampleChunk; 038import org.openimaj.audio.processor.AudioProcessor; 039import org.openimaj.audio.samples.SampleBuffer; 040import org.openimaj.audio.samples.SampleBufferFactory; 041 042import edu.emory.mathcs.jtransforms.fft.FloatFFT_1D; 043 044/** 045 * Perform an FFT on an audio signal. An FFT will be calculated for every 046 * channel in the audio separately. Use {@link #getLastFFT()} to get the 047 * last generated frequency domain calculation. 048 * <p> 049 * The class also includes an inverse transform function that takes a 050 * frequency domain array (such as that delivered by {@link #getLastFFT()}) 051 * and returns a {@link SampleChunk}. The format of the output sample chunk 052 * is determined by the given audio format. 053 * 054 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 055 * @created 28 Oct 2011 056 */ 057public class FourierTransform extends AudioProcessor 058{ 059 /** The last generated FFT */ 060 private float[][] lastFFT = null; 061 062 /** The scaling factor to apply prior to the FFT */ 063 private float scalingFactor = 1; 064 065 /** Whether to pad the input to the next power of 2 */ 066 private boolean padToNextPowerOf2 = true; 067 068 /** Whether to divide the real return parts by the size of the input */ 069 private final boolean normalise = true; 070 071 /** 072 * Default constructor for ad-hoc processing. 073 */ 074 public FourierTransform() 075 { 076 } 077 078 /** 079 * Constructor for chaining. 080 * @param as The stream to chain to 081 */ 082 public FourierTransform( final AudioStream as ) 083 { 084 super( as ); 085 } 086 087 /** 088 * {@inheritDoc} 089 * @see org.openimaj.audio.processor.AudioProcessor#process(org.openimaj.audio.SampleChunk) 090 */ 091 @Override 092 public SampleChunk process( final SampleChunk sample ) 093 { 094 // Get a sample buffer object for this data 095 final SampleBuffer sb = sample.getSampleBuffer(); 096 return this.process( sb ).getSampleChunk(); 097 } 098 099 /** 100 * Process the given sample buffer 101 * @param sb The sample buffer 102 * @return The sample buffer 103 */ 104 public SampleBuffer process( final SampleBuffer sb ) 105 { 106 // The number of channels we need to process 107 final int nChannels = sb.getFormat().getNumChannels(); 108 109 // Number of samples we'll need to process for each channel 110 final int nSamplesPerChannel = sb.size() / nChannels; 111 112 // The size of the FFT to generate 113 final int sizeOfFFT = this.padToNextPowerOf2 ? 114 this.nextPowerOf2( nSamplesPerChannel ) : nSamplesPerChannel; 115 116 // The Fourier transformer we're going to use 117 final FloatFFT_1D fft = new FloatFFT_1D( nSamplesPerChannel ); 118 119 // Creates an FFT for each of the channels in turn 120 this.lastFFT = new float[nChannels][]; 121 for( int c = 0; c < nChannels; c++ ) 122 { 123 // Twice the length to account for imaginary parts 124 this.lastFFT[c] = new float[ sizeOfFFT*2 ]; 125 126 // Fill the array 127 for( int x = 0; x < nSamplesPerChannel; x++ ) 128 this.lastFFT[c][x*2] = sb.get( x*nChannels+c ) * this.scalingFactor; 129 130// System.out.println( "FFT Input (channel "+c+"), length "+this.lastFFT[c].length+": " ); 131// System.out.println( Arrays.toString( this.lastFFT[c] )); 132 133 // Perform the FFT (using jTransforms) 134 fft.complexForward( this.lastFFT[c] ); 135 136 if( this.normalise ) 137 this.normaliseReals( sizeOfFFT ); 138 139// System.out.println( "FFT Output (channel "+c+"): " ); 140// System.out.println( Arrays.toString( this.lastFFT[c] )); 141 } 142 143 return sb; 144 } 145 146 /** 147 * Divides the real parts of the last FFT by the given size 148 * @param size the divisor 149 */ 150 private void normaliseReals( final int size ) 151 { 152 for( int c = 0; c < this.lastFFT.length; c++ ) 153 for( int i = 0; i < this.lastFFT[c].length; i +=2 ) 154 this.lastFFT[c][i] /= size; 155 } 156 157 /** 158 * Returns the next power of 2 superior to n. 159 * @param n The value to find the next power of 2 above 160 * @return The next power of 2 161 */ 162 private int nextPowerOf2( final int n ) 163 { 164 return (int)Math.pow( 2, 32 - Integer.numberOfLeadingZeros(n - 1) ); 165 } 166 167 /** 168 * Given some transformed audio data, will convert it back into 169 * a sample chunk. The number of channels given audio format 170 * must match the data that is provided in the transformedData array. 171 * 172 * @param format The required format for the output 173 * @param transformedData The frequency domain data 174 * @return A {@link SampleChunk} 175 */ 176 static public SampleChunk inverseTransform( final AudioFormat format, 177 final float[][] transformedData ) 178 { 179 // Check the data has something in it. 180 if( transformedData == null || transformedData.length == 0 ) 181 throw new IllegalArgumentException( "No data in data chunk" ); 182 183 // Check that the transformed data has the same number of channels 184 // as the data we've been given. 185 if( transformedData.length != format.getNumChannels() ) 186 throw new IllegalArgumentException( "Number of channels in audio " + 187 "format does not match given data." ); 188 189 // The number of channels 190 final int nChannels = transformedData.length; 191 192 // The Fourier transformer we're going to use 193 final FloatFFT_1D fft = new FloatFFT_1D( transformedData[0].length/2 ); 194 195 // Create a sample buffer to put the time domain data into 196 final SampleBuffer sb = SampleBufferFactory.createSampleBuffer( format, 197 transformedData[0].length/2 * nChannels ); 198 199 // Perform the inverse on each channel 200 for( int channel = 0; channel < transformedData.length; channel++ ) 201 { 202 // Convert frequency domain back to time domain 203 fft.complexInverse( transformedData[channel], true ); 204 205 // Set the data in the buffer 206 for( int x = 0; x < transformedData[channel].length/2; x++ ) 207 sb.set( x*nChannels+channel, transformedData[channel][x] ); 208 } 209 210 // Return a new sample chunk 211 return sb.getSampleChunk(); 212 } 213 214 /** 215 * Get the last processed FFT frequency data. 216 * @return The fft of the last processed window 217 */ 218 public float[][] getLastFFT() 219 { 220 return this.lastFFT; 221 } 222 223 /** 224 * Returns the magnitudes of the last FFT data. The length of the 225 * returned array of magnitudes will be half the length of the FFT data 226 * (up to the Nyquist frequency). 227 * 228 * @return The magnitudes of the last FFT data. 229 */ 230 public float[][] getMagnitudes() 231 { 232 final float[][] mags = new float[this.lastFFT.length][]; 233 for( int c = 0; c < this.lastFFT.length; c++ ) 234 { 235 mags[c] = new float[ this.lastFFT[c].length/4 ]; 236 for( int i = 0; i < this.lastFFT[c].length/4; i++ ) 237 { 238 final float re = this.lastFFT[c][i*2]; 239 final float im = this.lastFFT[c][i*2+1]; 240 mags[c][i] = (float)Math.sqrt( re*re + im*im ); 241 } 242 } 243 244 return mags; 245 } 246 247 /** 248 * Returns the power magnitudes of the last FFT data. The length of the 249 * returned array of magnitudes will be half the length of the FFT data 250 * (up to the Nyquist frequency). The power is calculated using: 251 * <p><code>10log10( real^2 + imaginary^2 )</code></p> 252 * 253 * @return The magnitudes of the last FFT data. 254 */ 255 public float[][] getPowerMagnitudes() 256 { 257 final float[][] mags = new float[this.lastFFT.length][]; 258 for( int c = 0; c < this.lastFFT.length; c++ ) 259 { 260 mags[c] = new float[ this.lastFFT[c].length/4 ]; 261 for( int i = 0; i < this.lastFFT[c].length/4; i++ ) 262 { 263 final float re = this.lastFFT[c][i*2]; 264 final float im = this.lastFFT[c][i*2+1]; 265 mags[c][i] = 10f * (float)Math.log10( re*re + im*im ); 266 } 267 } 268 269 return mags; 270 } 271 272 /** 273 * Scales the real and imaginary parts by the scalar prior to 274 * calculating the (square) magnitude for normalising the outputs. 275 * Returns only those values up to the Nyquist frequency. 276 * 277 * @param scalar The scalar 278 * @return Normalised magnitudes. 279 */ 280 public float[][] getNormalisedMagnitudes( final float scalar ) 281 { 282 final float[][] mags = new float[this.lastFFT.length][]; 283 for( int c = 0; c < this.lastFFT.length; c++ ) 284 { 285 mags[c] = new float[ this.lastFFT[c].length/4 ]; 286 for( int i = 0; i < this.lastFFT[c].length/4; i++ ) 287 { 288 final float re = this.lastFFT[c][i*2] * scalar; 289 final float im = this.lastFFT[c][i*2+1] * scalar; 290 mags[c][i] = re*re + im*im; 291 } 292 } 293 294 return mags; 295 } 296 297 /** 298 * Returns just the real numbers from the last FFT. The result will include 299 * the symmetrical part. 300 * @return The real numbers 301 */ 302 public float[][] getReals() 303 { 304 final float[][] reals = new float[this.lastFFT.length][]; 305 for( int c = 0; c < this.lastFFT.length; c++ ) 306 { 307 reals[c] = new float[ this.lastFFT[c].length/2 ]; 308 for( int i = 0; i < this.lastFFT[c].length/2; i++ ) 309 reals[c][i] = this.lastFFT[c][i*2]; 310 } 311 312 return reals; 313 } 314 315 /** 316 * Get the scaling factor in use. 317 * @return The scaling factor. 318 */ 319 public float getScalingFactor() 320 { 321 return this.scalingFactor; 322 } 323 324 /** 325 * Set the scaling factor to use. This factor will be applied to signal 326 * data prior to performing the FFT. The default is, of course, 1. 327 * @param scalingFactor The scaling factor to use. 328 */ 329 public void setScalingFactor( final float scalingFactor ) 330 { 331 this.scalingFactor = scalingFactor; 332 } 333 334 /** 335 * Returns whether the input will be padded to be the length 336 * of the next higher power of 2. 337 * @return TRUE if the input will be padded, FALSE otherwise. 338 */ 339 public boolean isPadToNextPowerOf2() 340 { 341 return this.padToNextPowerOf2; 342 } 343 344 /** 345 * Set whether to pad the input to the next power of 2. 346 * @param padToNextPowerOf2 TRUE to pad the input, FALSE otherwise 347 */ 348 public void setPadToNextPowerOf2( final boolean padToNextPowerOf2 ) 349 { 350 this.padToNextPowerOf2 = padToNextPowerOf2; 351 } 352}