001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.tools.globalfeature; 034 035import java.io.File; 036import java.io.IOException; 037import java.io.PrintWriter; 038import java.io.StringWriter; 039import java.util.ArrayList; 040import java.util.Arrays; 041import java.util.HashMap; 042import java.util.List; 043import java.util.Map; 044import java.util.Set; 045 046import org.jgrapht.UndirectedGraph; 047import org.jgrapht.alg.ConnectivityInspector; 048import org.jgrapht.graph.DefaultEdge; 049import org.jgrapht.graph.SimpleGraph; 050import org.kohsuke.args4j.CmdLineException; 051import org.kohsuke.args4j.CmdLineParser; 052import org.kohsuke.args4j.Option; 053import org.kohsuke.args4j.ProxyOptionHandler; 054import org.openimaj.feature.ByteFV; 055import org.openimaj.feature.ByteFVComparison; 056import org.openimaj.feature.DoubleFV; 057import org.openimaj.feature.DoubleFVComparison; 058import org.openimaj.feature.FVComparator; 059import org.openimaj.feature.FeatureVector; 060import org.openimaj.feature.FloatFV; 061import org.openimaj.feature.FloatFVComparison; 062import org.openimaj.feature.IntFV; 063import org.openimaj.feature.IntFVComparison; 064import org.openimaj.feature.ShortFV; 065import org.openimaj.feature.ShortFVComparison; 066import org.openimaj.image.ImageUtilities; 067import org.openimaj.image.MBFImage; 068 069import Jama.Matrix; 070 071/** 072 * This is a stand-alone tool that provides a means for comparing a 073 * collection of images against each other. 074 * 075 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 076 * @created Oct 21, 2011 077 * 078 */ 079public class CollectionComparisonTool 080{ 081 /** 082 * This mandatory argument provides the name of a directory in which 083 * all images will be found. 084 */ 085 @Option(name="--dir", aliases="-d", usage="Directory of images", 086 required=true ) 087 private String dir = null; 088 089 /** 090 * This is a mandatory argument that is used to provide the feature 091 * comparison metric. 092 */ 093 @Option(name="--metric", aliases="-m", usage="Comparison metric", 094 required=true) 095 private FeatureComparison compare = null; 096 097 /** 098 * This is a mandatory argument that is used to provide the feature 099 * type to generate for each image. 100 */ 101 @Option(name="--feature-type", aliases="-f", 102 handler=ProxyOptionHandler.class, usage="Feature type", 103 required=true) 104 private GlobalFeatureType feature = null; 105 private GlobalFeatureExtractor featureOp; 106 107 /** 108 * This optional argument provides the ability to 'binarise' the output 109 * such that distance values over the threshold will be set to 0, 110 * and distances under the threshold set to 1. 111 */ 112 @Option(name="--threshold", aliases="-t", usage="Threshold distances", 113 required=false) 114 private double threshold = -1; 115 116 /** 117 * This optional argument provides a regular expression against which 118 * files in the directory will be filtered against. 119 */ 120 @Option(name="--regex", aliases="-r", usage="Filename regex pattern", 121 required=false ) 122 private String regex = null; 123 124 /** 125 * This is an optional parameter that allows the user to provide a single 126 * image against which all the other images will be compared. 127 */ 128 @Option(name="--image", aliases="-i", usage="Single comparison image", 129 required=false) 130 private String image = null; 131 132 /** 133 * This optional parameter that shows verbose output 134 */ 135 @Option(name="--verbose", aliases="-v", usage="Verbose output", 136 required=false) 137 private boolean verbose = false; 138 139 /** 140 * This optional parameter allows features to be cached 141 */ 142 @Option(name="--cache", usage="Cache features in RAM", 143 required=false) 144 private boolean cache = false; 145 146 /** 147 * This optional parameter allows the clusters of 'similar' images 148 * to be output rather than the matrix. The threshold must be set 149 * for this to work. 150 */ 151 @Option(name="--printClusters", aliases="-pc", usage="Print the clusters rather than the matrix", 152 required=false) 153 private boolean printClusters = false; 154 155 private Map<File, FeatureVector> cacheData = new HashMap<File, FeatureVector>(); 156 157 private FeatureVector getFeatureVector(File file) throws IOException { 158 FeatureVector fv = cacheData.get(file); 159 160 if (fv == null) { 161 MBFImage im1 = ImageUtilities.readMBF( file ); 162 fv = featureOp.extract(im1); 163 164 if (cache) { 165 cacheData.put(file, fv); 166 } 167 } 168 169 return fv; 170 } 171 172 /** 173 * Execute the tool. 174 */ 175 private String execute() 176 { 177 List<String> dir1 = null; 178 179 // Get a list of files 180 List<String> dir2 = getListOfFiles( dir, true ); 181 182 // If we're going to compare a single image against a collection, 183 // then we will take a different route here. 184 if( image != null ) 185 { 186 dir1 = new ArrayList<String>(); 187 dir1.add( image ); 188 } 189 // Otherwise we'll compare all against all. 190 else dir1 = dir2; 191 192 Matrix m = new Matrix( dir1.size(), dir2.size() ); 193 for( int y = 0; y < dir1.size(); y++ ) 194 { 195 String s1 = dir1.get(y); 196 try 197 { 198 FeatureVector fv1 = getFeatureVector( new File(s1) ); 199 FVComparator<FeatureVector> fvc = getComp(fv1, compare); 200 201 int xx = 0; 202 if( dir1 == dir2 ) 203 xx = y; 204 for( int x = xx; x < dir2.size(); x++ ) 205 { 206 String s2 = dir2.get(x); 207 if( y == 0 ) 208 System.out.println( ""+x+": "+s2 ); 209 210 if( verbose ) 211 System.out.println( "Comparing "+s1+" against "+s2 ); 212 213 try 214 { 215 FeatureVector fv2 = getFeatureVector( new File(s2) ); 216 217 double d = 0; 218 if( compare == FeatureComparison.EQUALS ) 219 { 220 if( Arrays.equals( 221 fv1.asDoubleVector(), 222 fv2.asDoubleVector() ) ) 223 d = 1; 224 else d = 0; 225 } 226 else 227 { 228 double v = fvc.compare(fv1, fv2); 229 d = (threshold==-1?v:(v>threshold?0:1)); 230 } 231 232 // Symmetric matrix 233 m.set( y, x, d ); 234 m.set( x, y, d ); 235 } 236 catch( Exception e ) 237 { 238 e.printStackTrace(); 239 continue; 240 } 241 } 242 } 243 catch( Exception e ) 244 { 245 e.printStackTrace(); 246 continue; 247 } 248 } 249 250 if (printClusters && this.threshold != -1) { 251 return getClusterInfo(dir1, dir2, m); 252 } else { 253 StringWriter sw = new StringWriter(); 254 PrintWriter pw = new PrintWriter( sw ); 255 m.print( pw, 3, 4 ); 256 return sw.toString(); 257 } 258 } 259 260 private String getClusterInfo(List<String> dir1, List<String> dir2, Matrix scores) { 261 List<Set<String>> clusters = getClusters(dir1, dir2, scores); 262 263 StringBuffer sb = new StringBuffer(); 264 265 sb.append("<html>\n\t<body>\n"); 266 for (Set<String> set : clusters) { 267 if (set.size() > 1) { 268 sb.append("\t\t<div>\n"); 269 270 for (String s : set) { 271 sb.append("\t\t\t<img src=\""+s+"\" width=\"100\"/>\n"); 272// String id = s.replace("/Users/jsh2/youtube/", "").replace("/hqdefault.jpg", ""); 273// sb.append("\t\t\t<a href=\"http://www.youtube.com/watch?v="+id+"\">\n"); 274// sb.append("\t\t\t\t<img src=\"http://i.ytimg.com/vi/"+id+"/hqdefault.jpg\" width=\"100\"/>\n"); 275// sb.append("\t\t\t</a>\n"); 276 } 277 278 sb.append("\t\t</div>"); 279 sb.append("\t\t<hr/>"); 280 } 281 } 282 sb.append("\t</body>\n</html>\n"); 283 284 return sb.toString(); 285 } 286 287 private List<Set<String>> getClusters(List<String> dir1, List<String> dir2, Matrix scores) { 288 UndirectedGraph<String, DefaultEdge> graph = new SimpleGraph<String, DefaultEdge>(DefaultEdge.class); 289 290 for (String f : dir1) graph.addVertex(f); 291 if (dir1 != dir2) for (String f : dir2) graph.addVertex(f); 292 293 final double[][] matrixData = scores.getArray(); 294 for (int r=0; r<matrixData.length; r++) { 295 for (int c=r; c<matrixData[0].length; c++) { 296 String f1 = dir1.get(r); 297 String f2 = dir2.get(c); 298 if ( matrixData[r][c] != 0 && f1!=f2 ) { 299 graph.addEdge(f1, f2); 300 } 301 } 302 } 303 304 ConnectivityInspector<String, DefaultEdge> conn = new ConnectivityInspector<String, DefaultEdge>(graph); 305 return conn.connectedSets(); 306 } 307 308 /** 309 * Returns a list of files that exist in the given dir. 310 * @param dir The directory to start at 311 * @param subdirs Whether to recurse into subdirs 312 * @return The list of relative filenames 313 */ 314 private List<String> getListOfFiles( String dir, boolean subdirs ) 315 { 316 // We'll stick all the files in here. 317 List<String> files = new ArrayList<String>(); 318 319 // First get a list of all files. We don't filter them as we're 320 // trying to get all the subdirs. 321 File[] f = new File(dir).listFiles(); 322 for( File file : f ) 323 { 324 // If it's a directory, we recurse 325 if( file.isDirectory() ) 326 { 327 files.addAll( 328 getListOfFiles( file.getAbsolutePath(), subdirs ) ); 329 } 330 else 331 // If there's no regex or the file matches our regex 332 if( regex == null || 333 (regex != null && file.getName().matches( regex ) ) ) 334 { 335 files.add( file.getAbsolutePath() ); 336 } 337 } 338 339 // Convert to array 340 return files; 341 } 342 343 /** 344 * Get a feature comparison class for the given feature and metric. 345 * @param fv The feature vector 346 * @param type The feature comparison type 347 * @return A comparable class 348 */ 349 @SuppressWarnings("unchecked") 350 protected <T extends FeatureVector> FVComparator<T> 351 getComp( T fv, FeatureComparison type ) 352 { 353 if (fv instanceof ByteFV) return (FVComparator<T>) ByteFVComparison.valueOf(type.name()); 354 if (fv instanceof ShortFV) return (FVComparator<T>) ShortFVComparison.valueOf(type.name()); 355 if (fv instanceof IntFV) return (FVComparator<T>) IntFVComparison.valueOf(type.name()); 356 if (fv instanceof FloatFV) return (FVComparator<T>) FloatFVComparison.valueOf(type.name()); 357 if (fv instanceof DoubleFV) return (FVComparator<T>) DoubleFVComparison.valueOf(type.name()); 358 return null; 359 } 360 361 /** 362 * Main method. 363 * 364 * Example command line: 365 * collectcomp -d D:\gfx -r .*\.jpg -f HISTOGRAM -c RGB -m EUCLIDEAN 4 4 4 366 * 367 * @param args Command-line arguments 368 */ 369 public static void main( String[] args ) 370 { 371 // Instantiate the tool and parse the arguments 372 CollectionComparisonTool cct = new CollectionComparisonTool(); 373 CmdLineParser parser = new CmdLineParser( cct ); 374 375 try 376 { 377 parser.parseArgument( args ); 378 System.out.println( cct.execute() ); 379 } 380 catch( CmdLineException e ) 381 { 382 System.err.println(e.getMessage()); 383 System.err.println("Usage: collectiontool [options...]"); 384 parser.printUsage(System.err); 385 386 if( cct.feature == null ) 387 { 388 for( GlobalFeatureType m : GlobalFeatureType.values() ) 389 { 390 System.err.println(); 391 System.err.println(m + " options: "); 392 new CmdLineParser(m.getOptions()).printUsage(System.err); 393 } 394 } 395 } 396 } 397}