001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.classifier.citylandscape; 031 032import java.io.BufferedReader; 033import java.io.File; 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.InputStreamReader; 037 038import org.openimaj.image.FImage; 039import org.openimaj.image.ImageUtilities; 040import org.openimaj.image.analysis.algorithm.EdgeDirectionCoherenceVector; 041import org.openimaj.knn.DoubleNearestNeighbours; 042import org.openimaj.knn.approximate.DoubleNearestNeighboursKDTree; 043 044/** 045 * Originally written by Ajay Mehta for his third year project. Reworked for 046 * inclusion into OpenIMAJ by David Dupplaw. 047 * <p> 048 * This class provides classification of objects. The idea in the classifier 049 * is to classfy images between landscape images and cityscape images. Could 050 * also be used as a natural vs. non-natural classifier as the technique used 051 * is based on edge-direction coherence vectors (i.e. looking for strong lines). 052 * The training set (src/main/resources/CityLS10000.2.no-decimal) 053 * provides 5000 examples of edge direction coherence vectors for both city 054 * scape and landscape (the original images were crawled from Flickr based on 055 * appropriate tags). Internally the classifier uses the nearest neighbour 056 * to determine the class of the query. 057 * <p> 058 * To use, do something like this: 059 * <pre> 060 * {@code 061 * CityLandscapeTree clt = new CityLandscapeTree( "city", "landscape", 062 * getClass().getResourceAsStream( "/CityLS10000.2.no-decimal" ), 10000 ); 063 * String clazz = clt.classifyImage( ImageUtilities.readF( 064 * new File("myImage.jpg") ), 1 ); 065 * } 066 * </pre> 067 * <p> 068 * A main method is supplied for the tool which will take an image filename 069 * and classify the image as city or landscape. 070 * 071 * @author Ajay Mehta (am24g08@ecs.soton.ac.uk) 072 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 073 * @created 2011 074 * 075 */ 076public class CityLandscapeTree 077{ 078 /** Category 1 */ 079 private String cat1 = null; 080 081 /** Category 2 */ 082 private String cat2 = null; 083 084 /** The number of instances in the training set */ 085 private int trainingSetSize = 0; 086 087 /** The nearest neighbour classifier */ 088 private DoubleNearestNeighbours dnn = null; 089 090 /** The size of the vector generated for each image */ 091 private static int VECTOR_SIZE = 144; 092 093 /** */ 094 private final static int NTREES = 768; 095 096 /** */ 097 private final static int NCHECKS = 8; 098 099 /** 100 * Default constructor that takes the two categories 101 * to classifier between. 102 * 103 * @param cat1 The first category 104 * @param cat2 The second category 105 */ 106 public CityLandscapeTree( String cat1, String cat2 ) 107 { 108 this.cat1 = cat1; 109 this.cat2 = cat2; 110 } 111 112 /** 113 * Constructor that takes the two categories to classify between, 114 * an input stream that points to a training set file and the size 115 * of that training set. 116 * 117 * @param cat1 The first classification category 118 * @param cat2 The second classification category 119 * @param trainingSet The training set 120 * @param trainingSetSize The size of the training set 121 */ 122 public CityLandscapeTree( String cat1, String cat2, 123 InputStream trainingSet, int trainingSetSize ) 124 { 125 this( cat1, cat2 ); 126 try 127 { 128 // Load the given training set 129 double[][] train = loadTrainingSet( trainingSet, trainingSetSize ); 130 131 // Construct DoubleNearestNeighbours Object 132 dnn = new DoubleNearestNeighboursKDTree( train, NTREES, NCHECKS ); 133 } 134 catch( IOException e ) 135 { 136 e.printStackTrace(); 137 } 138 } 139 140 /** 141 * From the given image filename, returns the edge direction coherence 142 * vector as a double array. 143 * 144 * @param imageName The image to process 145 * @return A 2-dimensional vector with only one 146 */ 147 private static double[][] getImageVectorAsArray( FImage crgbimage ) 148 { 149 double[][] toReturn = new double[1][VECTOR_SIZE]; 150 151 // Calculate the Edge direction coherence on the image. 152 EdgeDirectionCoherenceVector edcv = new EdgeDirectionCoherenceVector(); 153 edcv.setNumberOfBins( VECTOR_SIZE/2 ); 154 155 // Process the image 156 crgbimage.analyseWith( edcv ); 157 158 // Get the histogram 159 double[] d = edcv.getLastHistogram().asDoubleFV().asDoubleVector(); 160 161 // Normalise the vector by the total number of edge pixels 162 double[] edgeCounter = new double[1]; 163 for( int j = 0; j < VECTOR_SIZE; j++ ) 164 { 165 toReturn[0][j] = d[j]; 166 edgeCounter[0] += d[j]; 167 } 168 169 // Normalise the vector 170 CityLandscapeUtilities.normaliseVector( toReturn, edgeCounter ); 171 172 return toReturn; 173 } 174 175 /** 176 * Classifies the given image. 177 * 178 * @param image The image to classify 179 * @param k The number of nearest neighbours to interrogate 180 * @return The expected category 181 */ 182 public String classifyImage( FImage image, int k ) 183 { 184 System.out.println( "Classifying image... " ); 185 186 // Get the vector for the query image. 187 double[][] query = getImageVectorAsArray( image ); 188 189 // Indexes and distances of nearest neighbours for the one query image 190 int[][] indexes = new int[1][k]; 191 double[][] distances = new double[1][k]; 192 193 // KNN search for the query image 194 dnn.searchKNN( query, k, indexes, distances ); 195 196 // Counters for the two categories (City vs Landscape) 197 double cat1Counter = 0, cat2Counter = 0; 198 199 // Loop through all the results 200 for( int i = 0; i < distances[0].length; i++ ) 201 { 202 // 203 if( indexes[0][i] < trainingSetSize / 2 ) 204 cat1Counter += 1 / distances[0][i]; 205 else cat2Counter += 1 / distances[0][i]; 206 } 207 208 if( cat1Counter > cat2Counter ) 209 return cat1; 210 else if( cat2Counter > cat1Counter ) 211 return cat2; 212 else return "?"; 213 } 214 215 /** 216 * Loads a training set of a given size from an input stream 217 * 218 * @param is The input stream to read the training set from 219 * @param vecSize The size of the training set 220 * @return A 2-dimensional double array of the training set 221 * @throws IOException if the input stream could not be fully read 222 */ 223 public double[][] loadTrainingSet( InputStream is, int vecSize ) 224 throws IOException 225 { 226 this.trainingSetSize = vecSize; 227 double[][] trainingVector = new double[trainingSetSize][VECTOR_SIZE]; 228 double[] totalEdges = new double[trainingSetSize]; 229 230 System.out.println( "Loading training data... " ); 231 232 // Read in each line from the training set data. The training set data 233 // is set out as one-line per training image. The data in the line is 234 // comma-separated double values, where the values are the histogram 235 // bin values from the edge direction coherence vector. The number 236 // of bins should be this.vectorSize/2 each for coherence and incoherent 237 // histograms. A final value on each line contains the total number 238 // of detected coherent edges in the image. 239 BufferedReader br = new BufferedReader( new InputStreamReader( is ) ); 240 int counter = 0; 241 String line = null; 242 while( (line = br.readLine()) != null ) 243 { 244 String[] array = line.split( "," ); 245 246 for( int i = 0; i < VECTOR_SIZE/2; i++ ) 247 trainingVector[counter][i] = Double.parseDouble( array[i] ); 248 249 totalEdges[counter++] = Double.parseDouble( 250 array[array.length - 1] ); 251 } 252 253 // Normalise the vector by the total number of edges. 254 System.out.println( "Normalising training data..." ); 255 CityLandscapeUtilities.normaliseVector( trainingVector, totalEdges ); 256 257 return trainingVector; 258 } 259 260 /** 261 * Given an image filename, it will classify it. 262 * @param args 263 */ 264 public static void main( String[] args ) 265 { 266 if( args.length < 1 ) 267 { 268 System.err.println( "Please supply an image filename." ); 269 System.exit(1); 270 } 271 272 try 273 { 274 CityLandscapeTree clt = new CityLandscapeTree( "City", "Landscape", 275 CityLandscapeTree.class.getResourceAsStream( "/CityLS10000.2.no-decimal" ), 10000 ); 276 String clazz = clt.classifyImage( ImageUtilities.readF( new File(args[0]) ), 1 ); 277 System.out.println( "Classified as: "+clazz ); 278 } 279 catch( IOException e ) 280 { 281 e.printStackTrace(); 282 } 283 } 284}