001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.audio.analysis.benchmarking.dataset; 031 032import java.io.File; 033import java.io.FileFilter; 034import java.io.FilenameFilter; 035 036import org.openimaj.citation.annotation.Reference; 037import org.openimaj.citation.annotation.ReferenceType; 038import org.openimaj.data.dataset.Dataset; 039import org.openimaj.data.dataset.ListBackedDataset; 040import org.openimaj.data.dataset.MapBackedDataset; 041import org.openimaj.experiment.annotations.DatasetDescription; 042 043import cern.colt.Arrays; 044 045/** 046 * OpenIMAJ Dataset for the MusicSpeech Database 047 * 048 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 049 * @created 13 Mar 2013 050 */ 051@DatasetDescription( 052 name = "Music-Speech Dataset", 053 description = "The 'music-speech' corpus is a small collection of some 240 " + 054 "15-second extracts collected 'at random' from the radio by Eric Scheirer " + 055 "during his internship at Interval Research Corporation in the summer of 1996 " + 056 "under the supervision of Malcolm Slaney", 057 url = "http://labrosa.ee.columbia.edu/sounds/musp/scheislan.html") 058@Reference( 059 type = ReferenceType.Inproceedings, 060 author = { "Scheirer E.", "Slaney, M." }, 061 title = "Construction And Evaluation Of A Robust Multifeature Speech/music Discriminator", 062 year = "1997", 063 booktitle = "Proc. ICASSP-97, Munich.") 064public class MusicSpeechDataset extends MapBackedDataset<String, Dataset<File>, File> 065{ 066 /** Directory of the sounds */ 067 private File soundsDir; 068 069 /** 070 * Create the Music-Speech Dataset from the given directory. 071 * 072 * @param baseDir The base directory of the music-speech dataset. 073 * @param testOrTrain TRUE for testing set, FALSE for training set 074 */ 075 public MusicSpeechDataset( final File baseDir, final boolean testOrTrain ) 076 { 077 // construct the directory of which set to get 078 String soundsDir = "wavfile" + File.separator; 079 soundsDir += testOrTrain? "test" : "train"; 080 081 this.processDir( this.soundsDir = new File( baseDir, soundsDir ) ); 082 083 System.out.println( this ); 084 } 085 086 private void processDir( final File dir ) 087 { 088 final File[] groups = dir.listFiles( new FileFilter() 089 { 090 @Override 091 public boolean accept( final File pathname ) 092 { 093 return pathname.isDirectory() && 094 !pathname.getName().equals( "." ) && 095 !pathname.getName().equals(".."); 096 } 097 } ); 098 099 System.out.println( Arrays.toString( groups )); 100 101 if( groups.length == 0 ) 102 { 103 System.out.println( "Processing "+dir ); 104 105 final File[] files = dir.listFiles( new FilenameFilter() 106 { 107 @Override 108 public boolean accept( final File dir, final String name ) 109 { 110 return name.endsWith( ".wav" ); 111 } 112 } ); 113 114 final ListBackedDataset<File> list = new ListBackedDataset<File>(); 115 this.map.put( dir.getAbsolutePath().substring( this.soundsDir.getAbsolutePath().length()+1 ), list ); 116 117 for( final File file : files ) 118 { 119 list.add( file ); 120 } 121 } 122 else 123 { 124 for( final File group: groups ) 125 this.processDir( group ); 126 } 127 } 128}