001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.tools.ocr; 034 035import org.openimaj.image.FImage; 036import org.openimaj.image.processing.resize.ResizeProcessor; 037import org.openimaj.knn.DoubleNearestNeighbours; 038import org.openimaj.knn.approximate.DoubleNearestNeighboursKDTree; 039import org.openimaj.tools.ocr.FontSimulator.FontSimListener; 040import org.openimaj.util.pair.Pair; 041 042/** 043 * A class and tester for testing KNN classification of characters 044 * which are built randomly from the {@link FontSimulator}. 045 * <p> 046 * In this base implementation, the pixels of the character images are 047 * used as the vector for classification which will achieve somewhere in the 048 * region of 60% correct classification for the characters 0-9 and somewhat 049 * worse for larger character ranges. 050 * <p> 051 * Override the {@link #getImageVector(FImage)} method to try different 052 * features. This class provides the training, classification and testing. 053 * 054 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 055 * @created 19 Aug 2011 056 * 057 */ 058public class KNNCharacterClassifier 059{ 060 /** 061 * This class is the {@link FontSimListener} implementation that receives 062 * each character as an image in some randomised font. When it receives 063 * an image, it creates the feature vector by calling 064 * {@link KNNCharacterClassifier#getImageVector(FImage)} and stores that 065 * into a double array that can be used as input to the nearest neighbour 066 * classifier for training. That array can be retrieved using 067 * {@link #getVector()}. 068 * 069 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 070 * @created 19 Aug 2011 071 * 072 */ 073 protected class ImageTrainer implements FontSimListener<FImage> 074 { 075 /** The output vector that can be used as input to the NN classifier */ 076 private double[][] vector = null; 077 078 /** The current index being processed */ 079 private int index = 0; 080 081 /** 082 * Constructor that takes the size of the output vector (that is the 083 * total number of training instances). 084 * 085 * @param n The number of training instances 086 */ 087 public ImageTrainer( int n ) 088 { 089 vector = new double[n][]; 090 } 091 092 /** 093 * Implementation of the {@link FontSimListener} interface. 094 * @param img The character image. 095 */ 096 @Override 097 public void imageCreated( FImage img ) 098 { 099 vector[index++] = getImageVector(img); 100 if( index%25 == 0 ) 101 System.out.print("..."+index ); 102 } 103 104 /** 105 * Retrieve the set of training data as a double array. 106 * @return the training data 107 */ 108 public double[][] getVector() 109 { 110 return vector; 111 } 112 } 113 114 private int NTREES = 768; 115 private int NCHECKS = 8; 116 117 /** The nearest neighbour classifier */ 118 private DoubleNearestNeighbours nn = null; 119 120 /** What size to resize the images to for the feature vector */ 121 private int resize = 32; 122 123 /** Number of examples per class to generate */ 124 private int nExamplesPerClass = 100; 125 126 /** Start character in the range */ 127 private int startChar = '0'; 128 129 /** End character in the range */ 130 private int endChar = '9'; 131 132 /** 133 * Default constructor 134 */ 135 public KNNCharacterClassifier() 136 { 137 } 138 139 /** 140 * Get the feature vector for a single image. Can be overridden to 141 * try different feature vectors for classification. 142 * 143 * @param img The character image. 144 * @return The feature vector for this image. 145 */ 146 public double[] getImageVector( FImage img ) 147 { 148 // Resize the image (stretch) to a standard shape 149 FImage ff = ResizeProcessor.resample( img, resize, resize ); 150 return ff.getDoublePixelVector(); 151 } 152 153 /** 154 * Train the classifier by generating a bunch of random training examples 155 * of characters in various fonts (using the {@link FontSimulator}) 156 * and using the features extracted from those images, train a nearest 157 * neighbour classifier. 158 */ 159 public void train() 160 { 161 // Create the FontSimListener that receives images from the FontSimulator 162 ImageTrainer it = new ImageTrainer( 163 (endChar-startChar)*nExamplesPerClass); 164 165 System.out.println( "Created vector of length "+ 166 ((endChar-startChar)*nExamplesPerClass) ); 167 System.out.println( "Creating character data" ); 168 169 // For each of the characters produce nExamplesPerCase training examples 170 for( int i = startChar; i < endChar; i++ ) 171 { 172 FontSimulator<Float, FImage> fs = 173 new FontSimulator<Float,FImage>( 174 Character.toString( (char)i ) ); 175 fs.setFontPointSize( 48 ); 176 fs.makeRuns( nExamplesPerClass, it, new FImage(1,1) ); 177 } 178 179 // Train the classifier 180 System.out.println( "\nCreating KDTree..."); 181 nn = new DoubleNearestNeighboursKDTree( it.getVector(), NTREES, NCHECKS ); 182 } 183 184 /** 185 * Classify the given image with the nearest neighbour classifier. 186 * 187 * @param img the character image to classify 188 * @return The classified character. 189 */ 190 public char classify( FImage img ) 191 { 192 // Create the input vector 193 double[][] v = new double[1][]; 194 v[0] = getImageVector( img ); 195 196 // Setup the output variables 197 int k = 1; 198 int[][] indices = new int[1][k]; 199 double[][] dists = new double[1][k]; 200 201 // Do the search 202 nn.searchKNN( v, k, indices, dists ); 203 204 // Work out what character the best index represents 205 System.out.println( "Best index: "+indices[0][0] ); 206 char c = (char)(48+indices[0][0]/nExamplesPerClass); 207 System.out.println( "So, I think it's "+c ); 208 209 return c; 210 } 211 212 /** 213 * Run a bunch of tests to determine how good the classifier is. It does 214 * this by creating a load of random examples of random characters using 215 * the {@link FontSimulator} and classifies them. 216 * 217 * @param nTestRuns The number of tests to run 218 */ 219 public void test( int nTestRuns ) 220 { 221 // First in the pair is the incorrectly classified count, 222 // the second is the correctly classified count. 223 final Pair<Integer> results = new Pair<Integer>(0,0); 224 225 // Loop through all the tests 226 for( int j = 0; j < nTestRuns; j++ ) 227 { 228 // Choose a randome character that's in our training set 229 final int i = startChar + (int)(Math.random()*(endChar-startChar)); 230 231 // Create a single random image of the chosen character 232 FontSimulator<Float, FImage> fs = new FontSimulator<Float,FImage>( 233 Character.toString( (char)i ) ); 234 fs.setFontPointSize( 48 ); 235 FImage x = fs.generate( new FImage(1,1) ); 236 237 // Classify that character 238 System.out.println( "Classifying character "+(char)i ); 239 char c = classify( x ); 240 241 // Update the test count 242 if( c != i ) 243 results.setFirstObject( results.firstObject().intValue()+1 ); 244 else results.setSecondObject( results.secondObject().intValue()+1 ); 245 } 246 247 // Show the test results 248 int nWrong = results.firstObject(); 249 int nCorrect = results.secondObject(); 250 251 System.out.println( "==========================================="); 252 System.out.println( " T E S T R E S U L T S "); 253 System.out.println( "==========================================="); 254 System.out.println( "Number of runs: "+nTestRuns ); 255 System.out.println( "Correct: "+nCorrect+" ("+(100*nCorrect/nTestRuns)+"%)" ); 256 System.out.println( "Wrong: "+nWrong+" ("+(100*nWrong/nTestRuns)+"%)" ); 257 System.out.println( "==========================================="); 258 259 } 260 261 /** 262 * The main does a training run then a test run. 263 * 264 * @param args 265 */ 266 public static void main( String[] args ) 267 { 268 final KNNCharacterClassifier ocr = new KNNCharacterClassifier(); 269 ocr.train(); 270 ocr.test( 100 ); 271 } 272}