001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 * 
032 */
033package org.openimaj.tools.ocr;
034
035import org.openimaj.image.FImage;
036import org.openimaj.image.processing.resize.ResizeProcessor;
037import org.openimaj.knn.DoubleNearestNeighbours;
038import org.openimaj.knn.approximate.DoubleNearestNeighboursKDTree;
039import org.openimaj.tools.ocr.FontSimulator.FontSimListener;
040import org.openimaj.util.pair.Pair;
041
042/**
043 *      A class and tester for testing KNN classification of characters 
044 *      which are built randomly from the {@link FontSimulator}.
045 *      <p> 
046 *      In this base implementation, the pixels of the character images are 
047 *      used as the vector for classification which will achieve somewhere in the
048 *      region of 60% correct classification for the characters 0-9 and somewhat
049 *      worse for larger character ranges. 
050 *      <p>
051 *      Override the {@link #getImageVector(FImage)} method to try different 
052 *      features. This class provides the training, classification and testing.
053 *
054 *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
055 *  @created 19 Aug 2011
056 *      
057 */
058public class KNNCharacterClassifier
059{
060        /**
061         *      This class is the {@link FontSimListener} implementation that receives
062         *      each character as an image in some randomised font. When it receives
063         *      an image, it creates the feature vector by calling
064         *      {@link KNNCharacterClassifier#getImageVector(FImage)} and stores that
065         *      into a double array that can be used as input to the nearest neighbour
066         *      classifier for training. That array can be retrieved using 
067         *      {@link #getVector()}. 
068         *
069         *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
070         *  @created 19 Aug 2011
071         *      
072         */
073        protected class ImageTrainer implements FontSimListener<FImage>
074        {
075                /** The output vector that can be used as input to the NN classifier */
076                private double[][] vector = null;
077                
078                /** The current index being processed */
079                private int index = 0;
080
081                /**
082                 *      Constructor that takes the size of the output vector (that is the
083                 *      total number of training instances).
084                 * 
085                 *      @param n The number of training instances
086                 */
087                public ImageTrainer( int n )
088                {
089                        vector = new double[n][];
090                }
091
092                /**
093                 *      Implementation of the {@link FontSimListener} interface.
094                 *      @param img The character image.
095                 */
096                @Override
097                public void imageCreated( FImage img )
098                {
099                        vector[index++] = getImageVector(img);
100                        if( index%25 == 0 )
101                                System.out.print("..."+index );
102                }
103                
104                /**
105                 *      Retrieve the set of training data as a double array.
106                 *      @return the training data
107                 */
108                public double[][] getVector()
109                {
110                        return vector;
111                }
112        }
113        
114        private int NTREES = 768;
115        private int NCHECKS = 8;
116        
117        /** The nearest neighbour classifier */
118        private DoubleNearestNeighbours nn = null;
119        
120        /** What size to resize the images to for the feature vector */
121        private int resize = 32;
122        
123        /** Number of examples per class to generate */
124        private int nExamplesPerClass = 100;
125        
126        /** Start character in the range */
127        private int startChar = '0';
128        
129        /** End character in the range */
130        private int endChar = '9';
131        
132        /**
133         *      Default constructor
134         */
135        public KNNCharacterClassifier()
136        {
137        }
138        
139        /**
140         *      Get the feature vector for a single image. Can be overridden to         
141         *      try different feature vectors for classification.       
142         * 
143         *      @param img The character image.
144         *      @return The feature vector for this image.
145         */
146        public double[] getImageVector( FImage img )
147        {
148                // Resize the image (stretch) to a standard shape
149                FImage ff = ResizeProcessor.resample( img, resize, resize );
150                return ff.getDoublePixelVector();               
151        }
152        
153        /**
154         *      Train the classifier by generating a bunch of random training examples
155         *      of characters in various fonts (using the {@link FontSimulator}) 
156         *      and using the features extracted from those images, train a nearest
157         *      neighbour classifier.
158         */
159        public void train()
160        {
161                // Create the FontSimListener that receives images from the FontSimulator
162                ImageTrainer it = new ImageTrainer(
163                                (endChar-startChar)*nExamplesPerClass);
164                
165                System.out.println( "Created vector of length "+
166                                ((endChar-startChar)*nExamplesPerClass) );
167                System.out.println( "Creating character data" );
168                
169                // For each of the characters produce nExamplesPerCase training examples
170                for( int i = startChar; i < endChar; i++ )
171                {
172                        FontSimulator<Float, FImage> fs = 
173                                new FontSimulator<Float,FImage>( 
174                                                Character.toString( (char)i ) );
175                        fs.setFontPointSize( 48 );
176                        fs.makeRuns( nExamplesPerClass, it, new FImage(1,1) );
177                }
178
179                // Train the classifier
180                System.out.println( "\nCreating KDTree...");
181                nn = new DoubleNearestNeighboursKDTree( it.getVector(), NTREES, NCHECKS );
182        }
183        
184        /**
185         *      Classify the given image with the nearest neighbour classifier.
186         * 
187         *      @param img the character image to classify
188         *      @return The classified character.
189         */
190        public char classify( FImage img )
191        {
192                // Create the input vector
193                double[][] v = new double[1][];
194                v[0] = getImageVector( img );
195                
196                // Setup the output variables
197                int k = 1;
198                int[][] indices  = new int[1][k];
199                double[][] dists = new double[1][k];
200                
201                // Do the search
202                nn.searchKNN( v, k, indices, dists );
203                
204                // Work out what character the best index represents
205                System.out.println( "Best index: "+indices[0][0] );
206                char c = (char)(48+indices[0][0]/nExamplesPerClass);
207                System.out.println( "So, I think it's "+c );
208                
209                return c;
210        }
211        
212        /**
213         *      Run a bunch of tests to determine how good the classifier is. It does
214         *      this by creating a load of random examples of random characters using
215         *      the {@link FontSimulator} and classifies them.
216         * 
217         *      @param nTestRuns The number of tests to run
218         */
219        public void test( int nTestRuns )
220        {
221                // First in the pair is the incorrectly classified count,
222                // the second is the correctly classified count.
223                final Pair<Integer> results = new Pair<Integer>(0,0);
224                
225                // Loop through all the tests
226                for( int j = 0; j < nTestRuns; j++ )
227                {
228                        // Choose a randome character that's in our training set
229                        final int i = startChar + (int)(Math.random()*(endChar-startChar));
230                        
231                        // Create a single random image of the chosen character
232                        FontSimulator<Float, FImage> fs = new FontSimulator<Float,FImage>( 
233                                                Character.toString( (char)i ) );
234                        fs.setFontPointSize( 48 );
235                        FImage x = fs.generate( new FImage(1,1) );
236                        
237                        // Classify that character
238                        System.out.println( "Classifying character "+(char)i );
239                        char c = classify( x );
240                        
241                        // Update the test count
242                        if( c != i )
243                                        results.setFirstObject( results.firstObject().intValue()+1 );
244                        else    results.setSecondObject( results.secondObject().intValue()+1 );
245                }
246                
247                // Show the test results
248                int nWrong = results.firstObject();
249                int nCorrect = results.secondObject();
250                
251                System.out.println( "===========================================");
252                System.out.println( "           T E S T   R E S U L T S         ");
253                System.out.println( "===========================================");
254                System.out.println( "Number of runs: "+nTestRuns );
255                System.out.println( "Correct: "+nCorrect+" ("+(100*nCorrect/nTestRuns)+"%)" );
256                System.out.println( "Wrong:   "+nWrong+" ("+(100*nWrong/nTestRuns)+"%)" );
257                System.out.println( "===========================================");
258                
259        }
260        
261        /**
262         *      The main does a training run then a test run.
263         *  
264         *      @param args
265         */
266        public static void main( String[] args )
267        {
268                final KNNCharacterClassifier ocr = new KNNCharacterClassifier();
269                ocr.train();
270                ocr.test( 100 );
271        }
272}