001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 *
032 */
033package org.openimaj.audio.filters;
034
035import java.util.ArrayList;
036import java.util.List;
037
038import org.openimaj.audio.AudioFormat;
039import org.openimaj.audio.util.AudioUtils;
040
041import Jama.Matrix;
042
043/**
044 *      Filter bank of Mel filters for applying to a frequency domain source. It
045 *      is standard that the edges of each filter in the filter bank correspond
046 *      to the centre of the neighbouring filter - so they overlap by half (in
047 *      the Mel frequencies).
048 *
049 *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
050 *  @created 25 Jul 2012
051 *      @version $Author$, $Revision$, $Date$
052 */
053public class MelFilterBank
054{
055        /** The lowest frequency covered by this filter bank */
056        private double lowestFreq = 300;
057
058        /** The highest frequency coverted by this filter bank */
059        private double highestFreq = 5000;
060
061        /** The number of filters in this filter bank */
062        private int nFilters = 40;
063
064        /** The list of filters */
065        private ArrayList<TriangularFilter> filters = null;
066
067        /**
068         *      Construct a default MelFilterBank. The defaults are the lowest
069         *      frequency covered is 300Hz, the highest 5000Hz covered by 40 Mel filters.
070         */
071        public MelFilterBank()
072        {
073        }
074
075        /**
076         *      Default constructor to create a filter bank with the given number
077         *      of filters between the two given frequencies.
078         *
079         *      @param nFilters The number of filters
080         *      @param lowFreq The lowest frequency covered by the bank
081         *      @param highFreq The highest frequency covered by the bank
082         */
083        public MelFilterBank( final int nFilters, final double lowFreq, final double highFreq )
084        {
085                this.lowestFreq = lowFreq;
086                this.highestFreq = highFreq;
087                this.nFilters = nFilters;
088        }
089
090        /**
091         *      Instantiate the filter bank, if it's not already instantiated.
092         */
093        public void createFilterBank()
094        {
095                if( this.filters == null )
096                {
097                        this.filters = new ArrayList<TriangularFilter>();
098
099                        // Convert the range of the filter banks (in Hz) to Mel frequencies
100                        final double lowFreqMel = AudioUtils.frequencyToMelFrequency( this.lowestFreq );
101                        final double highFreqMel = AudioUtils.frequencyToMelFrequency( this.highestFreq );
102                        final double melFreqRange = highFreqMel-lowFreqMel;
103
104                        // The filters are evenly distributed on the Mel Scale.
105                        final double melFreqPerFilter = 2*melFreqRange /(this.nFilters+1);
106
107                        // Create the Filters
108                        for( int filter = 0; filter < this.nFilters; filter++ )
109                        {
110                                // Centre frequency of the mel triangular filter
111                                final double lf = lowFreqMel + melFreqPerFilter/2 * filter;
112                                final double cf = lf + melFreqPerFilter/2;
113                                final double hf = lf + melFreqPerFilter;
114                                this.filters.add( new TriangularFilter(
115                                                AudioUtils.melFrequencyToFrequency( lf ),
116                                                AudioUtils.melFrequencyToFrequency( cf ),
117                                                AudioUtils.melFrequencyToFrequency( hf )
118                                ) );
119                        }
120                }
121        }
122
123        /**
124         *      Returns a list of filters in this filter bank
125         *      @return The filters
126         */
127        public List<TriangularFilter> getFilters()
128        {
129                this.createFilterBank();
130                return this.filters;
131        }
132
133        /**
134         *      Process the input power spectrum with this filter bank. The output is
135         *      a set of Mel Frequency Coefficients for each channel of the audio. The
136         *      power spectrum is expected to be just the power magnitudes for the
137         *      real parts of a fourier frequency spectrum (for each channel of the
138         *      input audio). The output is a 2D array
139         *      where the first dimension is the number of audio channels and the
140         *      second dimension is each of the powers of the mel filters.
141         *
142         *      @param spectrum The power spectrum
143         *      @param format The format of the original audio used to produce the
144         *              spectrum
145         *      @return The Mel frequency coefficients
146         */
147        public float[][] process( final float[][] spectrum, final AudioFormat format )
148        {
149                // Make sure we've got some filters to apply
150                this.createFilterBank();
151
152                final float[][] output = new float[spectrum.length][this.filters.size()];
153
154                for( int c = 0; c < spectrum.length; c++ )
155                        for( int i = 0; i < this.filters.size(); i++ )
156                                output[c][i] = (float)this.filters.get(i).process( spectrum[c], format );
157
158                return output;
159        }
160
161        /**
162         *      Returns a set of values that represent the response of this filter bank
163         *      when the linear frequency is split in the given number of bins. The
164         *      result will have <code>nSpectrumBins</code> length.
165         *
166         *      @param nSpectrumBins The number of bins in a spectrum.
167         *      @param maxFreq The maximum frequency (sample rate)
168         *      @return The response curve.
169         */
170        public float[] getResponseCurve( final int nSpectrumBins, final double maxFreq )
171        {
172                final float[][] curve = new float[1][nSpectrumBins];
173                for( int i = 0; i < nSpectrumBins; i++ )
174                        curve[0][i] = 1f;
175                return this.process( curve, new AudioFormat( 8, maxFreq/500, 1 ) )[0];
176        }
177
178        /**
179         *      Set the filter amplitude for all the generated filters.
180         *      @param fa The new filter amplitude
181         */
182        public void setFilterAmplitude( final double fa )
183        {
184                if( this.filters != null )
185                        for( final TriangularFilter mf : this.filters )
186                                mf.setFilterAmplitude( fa );
187        }
188
189        /**
190         *      Returns the filters weights as a Matrix, where the rows are the
191         *      filters and the columns are the frequencies. The values in the cells are
192         *      the weights to apply to the given frequency for the given filter. The
193         *      frequency range of the spectrum is required to work out which frequency each bin
194         *      represents.
195         *
196         *      @param specSize The size of the spectrum (number of cols in the matrix)
197         *      @param minFreq The minimum frequency represented in the spectrum
198         *      @param maxFreq The maximum frequency represented in the spectrum
199         *      @return a Matrix representation of the filter bank or NULL if the
200         *              filter bank has not yet been initialised
201         */
202        public Matrix asMatrix( final int specSize, final double minFreq, final double maxFreq)
203        {
204                if( this.filters == null ) return null;
205
206                // The output matrix
207                final Matrix m = new Matrix( this.filters.size(), specSize );
208
209                // The size of each bin in Hz
210                final double binSize = (maxFreq-minFreq) / specSize;
211
212                for( int filter = 0; filter < this.filters.size(); filter++ )
213                        for( int i = 0; i < specSize; i++ )
214                                m.set( filter, i, this.filters.get(filter).getWeightAt( i*binSize ) );
215
216                return m;
217        }
218}