001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 * 
032 */
033package org.openimaj.tools.globalfeature;
034
035import java.io.File;
036import java.io.IOException;
037import java.io.PrintWriter;
038import java.io.StringWriter;
039import java.util.ArrayList;
040import java.util.Arrays;
041import java.util.HashMap;
042import java.util.List;
043import java.util.Map;
044import java.util.Set;
045
046import org.jgrapht.UndirectedGraph;
047import org.jgrapht.alg.ConnectivityInspector;
048import org.jgrapht.graph.DefaultEdge;
049import org.jgrapht.graph.SimpleGraph;
050import org.kohsuke.args4j.CmdLineException;
051import org.kohsuke.args4j.CmdLineParser;
052import org.kohsuke.args4j.Option;
053import org.kohsuke.args4j.ProxyOptionHandler;
054import org.openimaj.feature.ByteFV;
055import org.openimaj.feature.ByteFVComparison;
056import org.openimaj.feature.DoubleFV;
057import org.openimaj.feature.DoubleFVComparison;
058import org.openimaj.feature.FVComparator;
059import org.openimaj.feature.FeatureVector;
060import org.openimaj.feature.FloatFV;
061import org.openimaj.feature.FloatFVComparison;
062import org.openimaj.feature.IntFV;
063import org.openimaj.feature.IntFVComparison;
064import org.openimaj.feature.ShortFV;
065import org.openimaj.feature.ShortFVComparison;
066import org.openimaj.image.ImageUtilities;
067import org.openimaj.image.MBFImage;
068
069import Jama.Matrix;
070
071/**
072 *      This is a stand-alone tool that provides a means for comparing a
073 *      collection of images against each other.
074 *
075 *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
076 *  @created Oct 21, 2011
077 *      
078 */
079public class CollectionComparisonTool
080{
081        /**
082         *      This mandatory argument provides the name of a directory in which
083         *      all images will be found.
084         */
085        @Option(name="--dir", aliases="-d", usage="Directory of images", 
086                        required=true )
087                        private String dir = null;
088
089        /**
090         *      This is a mandatory argument that is used to provide the feature
091         *      comparison metric.
092         */
093        @Option(name="--metric", aliases="-m", usage="Comparison metric", 
094                        required=true)
095                        private FeatureComparison compare = null;
096
097        /**
098         *      This is a mandatory argument that is used to provide the feature
099         *      type to generate for each image.
100         */
101        @Option(name="--feature-type", aliases="-f", 
102                        handler=ProxyOptionHandler.class, usage="Feature type", 
103                        required=true)
104                        private GlobalFeatureType feature = null;
105                        private GlobalFeatureExtractor featureOp;
106
107        /**
108         *      This optional argument provides the ability to 'binarise' the output
109         *      such that distance values over the threshold will be set to 0,
110         *      and distances under the threshold set to 1.
111         */
112        @Option(name="--threshold", aliases="-t", usage="Threshold distances",
113                        required=false)
114                        private double threshold = -1;
115
116        /**
117         *      This optional argument provides a regular expression against which
118         *      files in the directory will be filtered against.
119         */
120        @Option(name="--regex", aliases="-r", usage="Filename regex pattern",
121                        required=false )
122                        private String regex = null;
123
124        /**
125         *      This is an optional parameter that allows the user to provide a single
126         *      image against which all the other images will be compared. 
127         */
128        @Option(name="--image", aliases="-i", usage="Single comparison image", 
129                        required=false)
130                        private String image = null;
131
132        /**
133         *      This optional parameter that shows verbose output
134         */
135        @Option(name="--verbose", aliases="-v", usage="Verbose output",
136                        required=false)
137                        private boolean verbose = false;
138
139        /**
140         *      This optional parameter allows features to be cached
141         */
142        @Option(name="--cache", usage="Cache features in RAM",
143                        required=false)
144                        private boolean cache = false;
145
146        /**
147         *      This optional parameter allows the clusters of 'similar' images
148         *  to be output rather than the matrix. The threshold must be set
149         *  for this to work.
150         */
151        @Option(name="--printClusters", aliases="-pc", usage="Print the clusters rather than the matrix",
152                        required=false)
153                        private boolean printClusters = false;
154
155        private Map<File, FeatureVector> cacheData = new HashMap<File, FeatureVector>();
156
157        private FeatureVector getFeatureVector(File file) throws IOException {
158                FeatureVector fv = cacheData.get(file);
159
160                if (fv == null) {
161                        MBFImage im1 = ImageUtilities.readMBF( file );
162                        fv = featureOp.extract(im1);
163
164                        if (cache) {
165                                cacheData.put(file, fv);
166                        }
167                }
168
169                return fv;
170        }
171
172        /**
173         *      Execute the tool.
174         */
175        private String execute()
176        {
177                List<String> dir1 = null;
178
179                // Get a list of files
180                List<String> dir2 = getListOfFiles( dir, true );
181
182                // If we're going to compare a single image against a collection,
183                // then we will take a different route here.
184                if( image != null )
185                {
186                        dir1 = new ArrayList<String>();
187                        dir1.add( image );
188                }
189                // Otherwise we'll compare all against all.
190                else dir1 = dir2;
191
192                Matrix m = new Matrix( dir1.size(), dir2.size() );
193                for( int y = 0; y < dir1.size(); y++ )
194                {
195                        String s1 = dir1.get(y);
196                        try
197                        {
198                                FeatureVector fv1 = getFeatureVector( new File(s1) );
199                                FVComparator<FeatureVector> fvc = getComp(fv1, compare);
200
201                                int xx = 0;
202                                if( dir1 == dir2 )
203                                        xx = y; 
204                                for( int x = xx; x < dir2.size(); x++ )
205                                {
206                                        String s2 = dir2.get(x);
207                                        if( y == 0 )
208                                                System.out.println( ""+x+": "+s2 );
209
210                                        if( verbose )
211                                                System.out.println( "Comparing "+s1+" against "+s2 );
212
213                                        try
214                                        {
215                                                FeatureVector fv2 = getFeatureVector( new File(s2) );
216
217                                                double d = 0;
218                                                if( compare == FeatureComparison.EQUALS ) 
219                                                {
220                                                        if( Arrays.equals( 
221                                                                        fv1.asDoubleVector(), 
222                                                                        fv2.asDoubleVector() ) )
223                                                                d = 1;
224                                                        else    d = 0;
225                                                } 
226                                                else 
227                                                {
228                                                        double v = fvc.compare(fv1, fv2);
229                                                        d = (threshold==-1?v:(v>threshold?0:1));
230                                                }               
231
232                                                // Symmetric matrix
233                                                m.set( y, x, d );
234                                                m.set( x, y, d );
235                                        }
236                                        catch( Exception e )
237                                        {
238                                                e.printStackTrace();
239                                                continue;
240                                        }
241                                }
242                        }
243                        catch( Exception e )
244                        {
245                                e.printStackTrace();
246                                continue;
247                        }
248                }
249
250                if (printClusters && this.threshold != -1) {
251                        return getClusterInfo(dir1, dir2, m);
252                } else {
253                        StringWriter sw = new StringWriter();
254                        PrintWriter  pw = new PrintWriter( sw );
255                        m.print( pw, 3, 4 );
256                        return sw.toString();
257                }
258        }
259
260        private String getClusterInfo(List<String> dir1, List<String> dir2, Matrix scores) {
261                List<Set<String>> clusters = getClusters(dir1, dir2, scores);
262
263                StringBuffer sb = new StringBuffer();
264
265                sb.append("<html>\n\t<body>\n");
266                for (Set<String> set : clusters) {
267                        if (set.size() > 1) {
268                                sb.append("\t\t<div>\n");
269                                
270                                for (String s : set) {
271                                        sb.append("\t\t\t<img src=\""+s+"\" width=\"100\"/>\n");
272//                                      String id = s.replace("/Users/jsh2/youtube/", "").replace("/hqdefault.jpg", "");
273//                                      sb.append("\t\t\t<a href=\"http://www.youtube.com/watch?v="+id+"\">\n");
274//                                      sb.append("\t\t\t\t<img src=\"http://i.ytimg.com/vi/"+id+"/hqdefault.jpg\" width=\"100\"/>\n");
275//                                      sb.append("\t\t\t</a>\n");
276                                }
277                                
278                                sb.append("\t\t</div>");
279                                sb.append("\t\t<hr/>");
280                        }
281                }
282                sb.append("\t</body>\n</html>\n");
283                
284                return sb.toString();
285        }
286
287        private List<Set<String>> getClusters(List<String> dir1, List<String> dir2, Matrix scores) {
288                UndirectedGraph<String, DefaultEdge> graph = new SimpleGraph<String, DefaultEdge>(DefaultEdge.class);
289
290                for (String f : dir1) graph.addVertex(f);
291                if (dir1 != dir2) for (String f : dir2) graph.addVertex(f);
292
293                final double[][] matrixData = scores.getArray();
294                for (int r=0; r<matrixData.length; r++) {
295                        for (int c=r; c<matrixData[0].length; c++) {
296                                String f1 = dir1.get(r);
297                                String f2 = dir2.get(c);
298                                if ( matrixData[r][c] != 0 && f1!=f2 ) {
299                                        graph.addEdge(f1, f2);
300                                }
301                        }
302                }
303
304                ConnectivityInspector<String, DefaultEdge> conn = new ConnectivityInspector<String, DefaultEdge>(graph);
305                return conn.connectedSets();
306        }
307
308        /**
309         *      Returns a list of files that exist in the given dir.
310         *      @param dir The directory to start at
311         *      @param subdirs Whether to recurse into subdirs
312         *      @return The list of relative filenames
313         */
314        private List<String> getListOfFiles( String dir, boolean subdirs )
315        {
316                // We'll stick all the files in here.
317                List<String> files = new ArrayList<String>(); 
318
319                // First get a list of all files. We don't filter them as we're
320                // trying to get all the subdirs.
321                File[] f = new File(dir).listFiles();
322                for( File file : f )
323                {
324                        // If it's a directory, we recurse
325                        if( file.isDirectory() )
326                        {
327                                files.addAll( 
328                                                getListOfFiles( file.getAbsolutePath(), subdirs ) );
329                        }
330                        else
331                                // If there's no regex or the file matches our regex
332                                if( regex == null || 
333                                                (regex != null && file.getName().matches( regex ) ) )
334                                {
335                                        files.add( file.getAbsolutePath() );
336                                }
337                }
338
339                // Convert to array
340                return files;
341        }
342
343        /**
344         *      Get a feature comparison class for the given feature and metric.
345         *      @param fv The feature vector
346         *      @param type The feature comparison type
347         *      @return A comparable class
348         */
349        @SuppressWarnings("unchecked")
350        protected <T extends FeatureVector> FVComparator<T> 
351        getComp( T fv, FeatureComparison type ) 
352        {
353                if (fv instanceof ByteFV) return (FVComparator<T>) ByteFVComparison.valueOf(type.name());
354                if (fv instanceof ShortFV) return (FVComparator<T>) ShortFVComparison.valueOf(type.name());
355                if (fv instanceof IntFV) return (FVComparator<T>) IntFVComparison.valueOf(type.name());
356                if (fv instanceof FloatFV) return (FVComparator<T>) FloatFVComparison.valueOf(type.name());
357                if (fv instanceof DoubleFV) return (FVComparator<T>) DoubleFVComparison.valueOf(type.name());
358                return null;
359        }
360
361        /**
362         *      Main method.
363         * 
364         *      Example command line:
365         *  collectcomp -d D:\gfx -r .*\.jpg -f HISTOGRAM -c RGB -m EUCLIDEAN 4 4 4
366         * 
367         *      @param args Command-line arguments
368         */
369        public static void main( String[] args )
370        {
371                // Instantiate the tool and parse the arguments
372                CollectionComparisonTool cct = new CollectionComparisonTool();
373                CmdLineParser parser = new CmdLineParser( cct );
374
375                try
376                {
377                        parser.parseArgument( args );                   
378                        System.out.println( cct.execute() );
379                }
380                catch( CmdLineException e )
381                {
382                        System.err.println(e.getMessage());
383                        System.err.println("Usage: collectiontool [options...]");
384                        parser.printUsage(System.err);
385
386                        if( cct.feature == null ) 
387                        {
388                                for( GlobalFeatureType m : GlobalFeatureType.values() ) 
389                                {
390                                        System.err.println();
391                                        System.err.println(m + " options: ");
392                                        new CmdLineParser(m.getOptions()).printUsage(System.err);
393                                }
394                        }
395                }
396        }
397}