001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 * 
032 */
033package org.openimaj.demos.sandbox.audio;
034
035import java.io.File;
036import java.net.URL;
037
038import org.openimaj.audio.AudioStream;
039import org.openimaj.audio.SampleChunk;
040import org.openimaj.audio.samples.SampleBuffer;
041import org.openimaj.video.xuggle.XuggleAudio;
042
043import edu.cmu.sphinx.frontend.BaseDataProcessor;
044import edu.cmu.sphinx.frontend.Data;
045import edu.cmu.sphinx.frontend.DataEndSignal;
046import edu.cmu.sphinx.frontend.DataProcessingException;
047import edu.cmu.sphinx.frontend.DataStartSignal;
048import edu.cmu.sphinx.frontend.DoubleData;
049
050/**
051 *      A wrapper that allows OpenIMAJ audio objects to be used as input
052 *      to a Sphinx4 speech recogniser.
053 * 
054 *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
055 *      @created 7 Jun 2012
056 *      
057 */
058public class OpenIMAJAudioFileDataSource extends BaseDataProcessor
059{
060        /** Total number of values read */
061        private long totalValuesRead = -1;
062        
063        /** The audio stream being used */
064        private AudioStream audioStream = null;
065        
066        /** Set to true when we reach the end of the file */
067        private boolean atEOF = false;
068
069        /**
070         *      Default constructor
071         */
072        public OpenIMAJAudioFileDataSource()
073        {
074        }
075        
076        /**
077         *      Construct an OpenIMAJ audio wrapper for Sphinx
078         *      @param as The audio stream to wrap
079         */
080        public OpenIMAJAudioFileDataSource( AudioStream as )
081        {
082                this.audioStream = as;
083        }
084        
085        /**
086         *      Reads data from the OpenIMAJ stream and creates Data packets for
087         *      Sphinx. Creates a {@link DataStartSignal} and {@link DataEndSignal}
088         *      at the beginning and end of the stream. 
089         *
090         *      @see edu.cmu.sphinx.frontend.util.AudioFileDataSource#getData()
091         */
092        @Override
093        public Data getData() throws DataProcessingException
094        {
095                if( atEOF ) return null;
096                
097                getTimer().start();
098                Data output = null;
099
100                double sampleRate = audioStream.getFormat().
101                                getSampleRateKHz() * 1000;
102
103                // First time through?
104                if( totalValuesRead == -1 )
105                {
106                        // If it's the first time through, we need to generate 
107                        // a DataSignalStart packet
108                        output = new DataStartSignal( (int)sampleRate );
109                        totalValuesRead = 0;
110                }
111                else
112                {
113                        // Get some values we'll be needing
114                long collectTime = System.currentTimeMillis();
115                long firstSample = totalValuesRead;
116        
117                // Get the next sample chunk from the audio stream
118                SampleChunk sc = audioStream.nextSampleChunk();
119                
120                // If we're at the end of the stream....
121                if( sc == null )
122                {
123                        // Data End Signal (duration in milliseconds)
124                        output = new DataEndSignal( (long)(totalValuesRead/sampleRate*1000) );
125                        atEOF = true;
126                }
127                else
128                {
129                        // Get a sample buffer from the chunk
130                        SampleBuffer b = sc.getSampleBuffer();
131                        
132                        // Keep a total of how many samples we've processed
133                        totalValuesRead += b.size();
134                                
135                        // Create the data output packet
136                        output = new DoubleData( b.asDoubleArray(), (int)sampleRate, 
137                                        collectTime, firstSample );
138                }
139                }
140        
141                getTimer().stop();
142                return output;
143        }
144        
145        /**
146         *      Set the audio file to read. Will instantiate a {@link XuggleAudio}
147         *      to stream the data from.
148         *      @param url The audio file location
149         */
150        public void setAudioFile( URL url )
151        {
152                this.audioStream = new XuggleAudio( url );
153        }
154        
155        /**
156         *      Set the audio file to read. Will instantiate a {@link XuggleAudio}
157         *      to stream the data from.
158         *      @param f The audio file location
159         */
160        public void setAudioFile( File f )
161        {
162                this.audioStream = new XuggleAudio( f );
163        }
164        
165        /**
166         *      Set the audio stream from which to read data.
167         *      @param as The audio stream.
168         */
169        public void setAudioStream( AudioStream as )
170        {
171                this.audioStream = as;
172        }
173}