View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.audio.analysis.benchmarking.dataset;
31  
32  import java.io.File;
33  import java.io.FileFilter;
34  import java.io.FilenameFilter;
35  
36  import org.openimaj.citation.annotation.Reference;
37  import org.openimaj.citation.annotation.ReferenceType;
38  import org.openimaj.data.dataset.Dataset;
39  import org.openimaj.data.dataset.ListBackedDataset;
40  import org.openimaj.data.dataset.MapBackedDataset;
41  import org.openimaj.experiment.annotations.DatasetDescription;
42  
43  import cern.colt.Arrays;
44  
45  /**
46   *	OpenIMAJ Dataset for the MusicSpeech Database
47   *
48   *	@author David Dupplaw (dpd@ecs.soton.ac.uk)
49   *  @created 13 Mar 2013
50   */
51  @DatasetDescription(
52  		name = "Music-Speech Dataset",
53  		description = "The 'music-speech' corpus is a small collection of some 240 " +
54  				"15-second extracts collected 'at random' from the radio by Eric Scheirer " +
55  				"during his internship at Interval Research Corporation in the summer of 1996 " +
56  				"under the supervision of Malcolm Slaney",
57  		url = "http://labrosa.ee.columbia.edu/sounds/musp/scheislan.html")
58  @Reference(
59  		type = ReferenceType.Inproceedings,
60  		author = { "Scheirer E.", "Slaney, M." },
61  		title = "Construction And Evaluation Of A Robust Multifeature Speech/music Discriminator",
62  		year = "1997",
63  		booktitle = "Proc. ICASSP-97, Munich.")
64  public class MusicSpeechDataset extends MapBackedDataset<String, Dataset<File>, File>
65  {
66  	/** Directory of the sounds */
67  	private File soundsDir;
68  
69  	/**
70  	 * 	Create the Music-Speech Dataset from the given directory.
71  	 *
72  	 *	@param baseDir The base directory of the music-speech dataset.
73  	 * 	@param testOrTrain TRUE for testing set, FALSE for training set
74  	 */
75  	public MusicSpeechDataset( final File baseDir, final boolean testOrTrain )
76  	{
77  		// construct the directory of which set to get
78  		String soundsDir = "wavfile" + File.separator;
79  		soundsDir += testOrTrain? "test" : "train";
80  
81  		this.processDir( this.soundsDir = new File( baseDir, soundsDir ) );
82  
83  		System.out.println( this );
84  	}
85  
86  	private void processDir( final File dir )
87  	{
88  		final File[] groups = dir.listFiles( new FileFilter()
89  		{
90  			@Override
91  			public boolean accept( final File pathname )
92  			{
93  				return 	pathname.isDirectory() &&
94  						!pathname.getName().equals( "." ) &&
95  						!pathname.getName().equals("..");
96  			}
97  		} );
98  
99  		System.out.println( Arrays.toString( groups ));
100 
101 		if( groups.length == 0 )
102 		{
103 			System.out.println( "Processing "+dir );
104 
105 			final File[] files = dir.listFiles( new FilenameFilter()
106 			{
107 				@Override
108 				public boolean accept( final File dir, final String name )
109 				{
110 					return name.endsWith( ".wav" );
111 				}
112 			} );
113 
114 			final ListBackedDataset<File> list = new ListBackedDataset<File>();
115 			this.map.put( dir.getAbsolutePath().substring( this.soundsDir.getAbsolutePath().length()+1 ), list );
116 
117 			for( final File file : files )
118 			{
119 				list.add( file );
120 			}
121 		}
122 		else
123 		{
124 			for( final File group: groups )
125 				this.processDir( group );
126 		}
127 	}
128 }