001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.audio.analysis.benchmarking.dataset;
031
032import java.io.File;
033import java.io.FileFilter;
034import java.io.FilenameFilter;
035
036import org.openimaj.citation.annotation.Reference;
037import org.openimaj.citation.annotation.ReferenceType;
038import org.openimaj.data.dataset.Dataset;
039import org.openimaj.data.dataset.ListBackedDataset;
040import org.openimaj.data.dataset.MapBackedDataset;
041import org.openimaj.experiment.annotations.DatasetDescription;
042
043import cern.colt.Arrays;
044
045/**
046 *      OpenIMAJ Dataset for the MusicSpeech Database
047 *
048 *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
049 *  @created 13 Mar 2013
050 */
051@DatasetDescription(
052                name = "Music-Speech Dataset",
053                description = "The 'music-speech' corpus is a small collection of some 240 " +
054                                "15-second extracts collected 'at random' from the radio by Eric Scheirer " +
055                                "during his internship at Interval Research Corporation in the summer of 1996 " +
056                                "under the supervision of Malcolm Slaney",
057                url = "http://labrosa.ee.columbia.edu/sounds/musp/scheislan.html")
058@Reference(
059                type = ReferenceType.Inproceedings,
060                author = { "Scheirer E.", "Slaney, M." },
061                title = "Construction And Evaluation Of A Robust Multifeature Speech/music Discriminator",
062                year = "1997",
063                booktitle = "Proc. ICASSP-97, Munich.")
064public class MusicSpeechDataset extends MapBackedDataset<String, Dataset<File>, File>
065{
066        /** Directory of the sounds */
067        private File soundsDir;
068
069        /**
070         *      Create the Music-Speech Dataset from the given directory.
071         *
072         *      @param baseDir The base directory of the music-speech dataset.
073         *      @param testOrTrain TRUE for testing set, FALSE for training set
074         */
075        public MusicSpeechDataset( final File baseDir, final boolean testOrTrain )
076        {
077                // construct the directory of which set to get
078                String soundsDir = "wavfile" + File.separator;
079                soundsDir += testOrTrain? "test" : "train";
080
081                this.processDir( this.soundsDir = new File( baseDir, soundsDir ) );
082
083                System.out.println( this );
084        }
085
086        private void processDir( final File dir )
087        {
088                final File[] groups = dir.listFiles( new FileFilter()
089                {
090                        @Override
091                        public boolean accept( final File pathname )
092                        {
093                                return  pathname.isDirectory() &&
094                                                !pathname.getName().equals( "." ) &&
095                                                !pathname.getName().equals("..");
096                        }
097                } );
098
099                System.out.println( Arrays.toString( groups ));
100
101                if( groups.length == 0 )
102                {
103                        System.out.println( "Processing "+dir );
104
105                        final File[] files = dir.listFiles( new FilenameFilter()
106                        {
107                                @Override
108                                public boolean accept( final File dir, final String name )
109                                {
110                                        return name.endsWith( ".wav" );
111                                }
112                        } );
113
114                        final ListBackedDataset<File> list = new ListBackedDataset<File>();
115                        this.map.put( dir.getAbsolutePath().substring( this.soundsDir.getAbsolutePath().length()+1 ), list );
116
117                        for( final File file : files )
118                        {
119                                list.add( file );
120                        }
121                }
122                else
123                {
124                        for( final File group: groups )
125                                this.processDir( group );
126                }
127        }
128}