001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.demos.sandbox.audio; 034 035import java.io.File; 036import java.net.URL; 037 038import org.openimaj.audio.AudioStream; 039import org.openimaj.audio.SampleChunk; 040import org.openimaj.audio.samples.SampleBuffer; 041import org.openimaj.video.xuggle.XuggleAudio; 042 043import edu.cmu.sphinx.frontend.BaseDataProcessor; 044import edu.cmu.sphinx.frontend.Data; 045import edu.cmu.sphinx.frontend.DataEndSignal; 046import edu.cmu.sphinx.frontend.DataProcessingException; 047import edu.cmu.sphinx.frontend.DataStartSignal; 048import edu.cmu.sphinx.frontend.DoubleData; 049 050/** 051 * A wrapper that allows OpenIMAJ audio objects to be used as input 052 * to a Sphinx4 speech recogniser. 053 * 054 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 055 * @created 7 Jun 2012 056 * 057 */ 058public class OpenIMAJAudioFileDataSource extends BaseDataProcessor 059{ 060 /** Total number of values read */ 061 private long totalValuesRead = -1; 062 063 /** The audio stream being used */ 064 private AudioStream audioStream = null; 065 066 /** Set to true when we reach the end of the file */ 067 private boolean atEOF = false; 068 069 /** 070 * Default constructor 071 */ 072 public OpenIMAJAudioFileDataSource() 073 { 074 } 075 076 /** 077 * Construct an OpenIMAJ audio wrapper for Sphinx 078 * @param as The audio stream to wrap 079 */ 080 public OpenIMAJAudioFileDataSource( AudioStream as ) 081 { 082 this.audioStream = as; 083 } 084 085 /** 086 * Reads data from the OpenIMAJ stream and creates Data packets for 087 * Sphinx. Creates a {@link DataStartSignal} and {@link DataEndSignal} 088 * at the beginning and end of the stream. 089 * 090 * @see edu.cmu.sphinx.frontend.util.AudioFileDataSource#getData() 091 */ 092 @Override 093 public Data getData() throws DataProcessingException 094 { 095 if( atEOF ) return null; 096 097 getTimer().start(); 098 Data output = null; 099 100 double sampleRate = audioStream.getFormat(). 101 getSampleRateKHz() * 1000; 102 103 // First time through? 104 if( totalValuesRead == -1 ) 105 { 106 // If it's the first time through, we need to generate 107 // a DataSignalStart packet 108 output = new DataStartSignal( (int)sampleRate ); 109 totalValuesRead = 0; 110 } 111 else 112 { 113 // Get some values we'll be needing 114 long collectTime = System.currentTimeMillis(); 115 long firstSample = totalValuesRead; 116 117 // Get the next sample chunk from the audio stream 118 SampleChunk sc = audioStream.nextSampleChunk(); 119 120 // If we're at the end of the stream.... 121 if( sc == null ) 122 { 123 // Data End Signal (duration in milliseconds) 124 output = new DataEndSignal( (long)(totalValuesRead/sampleRate*1000) ); 125 atEOF = true; 126 } 127 else 128 { 129 // Get a sample buffer from the chunk 130 SampleBuffer b = sc.getSampleBuffer(); 131 132 // Keep a total of how many samples we've processed 133 totalValuesRead += b.size(); 134 135 // Create the data output packet 136 output = new DoubleData( b.asDoubleArray(), (int)sampleRate, 137 collectTime, firstSample ); 138 } 139 } 140 141 getTimer().stop(); 142 return output; 143 } 144 145 /** 146 * Set the audio file to read. Will instantiate a {@link XuggleAudio} 147 * to stream the data from. 148 * @param url The audio file location 149 */ 150 public void setAudioFile( URL url ) 151 { 152 this.audioStream = new XuggleAudio( url ); 153 } 154 155 /** 156 * Set the audio file to read. Will instantiate a {@link XuggleAudio} 157 * to stream the data from. 158 * @param f The audio file location 159 */ 160 public void setAudioFile( File f ) 161 { 162 this.audioStream = new XuggleAudio( f ); 163 } 164 165 /** 166 * Set the audio stream from which to read data. 167 * @param as The audio stream. 168 */ 169 public void setAudioStream( AudioStream as ) 170 { 171 this.audioStream = as; 172 } 173}