001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.ocr;
031
032import java.io.File;
033import java.io.IOException;
034import java.net.URL;
035import java.util.ArrayList;
036import java.util.Collections;
037import java.util.List;
038
039import org.openimaj.image.FImage;
040import org.openimaj.image.ImageUtilities;
041import org.openimaj.image.analysis.algorithm.TemplateMatcher;
042import org.openimaj.image.pixel.FValuePixel;
043
044/**
045 * Really simple (arabic numerical) OCR engine, specifically designed to extract
046 * the date and time from the GlacsWeb timelapse images <a
047 * href="http://data.glacsweb.info/iceland/webcam/river/">here</a>.
048 * 
049 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
050 * 
051 */
052public class SimpleOCR {
053        class NumberOccurance implements Comparable<NumberOccurance> {
054                int offset;
055                int value;
056
057                NumberOccurance(int offset, int value) {
058                        this.offset = offset;
059                        this.value = value;
060                }
061
062                @Override
063                public int compareTo(NumberOccurance o) {
064                        return ((Integer) offset).compareTo(o.offset);
065                }
066        }
067
068        private TemplateMatcher[] templates = new TemplateMatcher[10];
069
070        /**
071         * Construct the engine and load the templates.
072         * 
073         * @throws IOException
074         */
075        public SimpleOCR() throws IOException {
076                for (int i = 0; i < 10; i++) {
077                        final FImage img = ImageUtilities.readF(SimpleOCR.class.getResourceAsStream(i + ".png"));
078                        templates[i] = new TemplateMatcher(img, TemplateMatcher.Mode.NORM_CORRELATION_COEFFICIENT);
079                }
080        }
081
082        String processInternal(FImage img, String separator, int... pattern) throws Exception {
083                final List<NumberOccurance> occurances = new ArrayList<NumberOccurance>();
084
085                int num = 0;
086                for (final int i : pattern)
087                        num += i;
088
089                for (int i = 0; i < 10; i++) {
090                        templates[i].analyseImage(img);
091
092                        final FValuePixel[] resp = templates[i].getBestResponses(num);
093
094                        for (final FValuePixel pt : resp) {
095                                if (pt.value > 0.95) {
096                                        occurances.add(new NumberOccurance(pt.x, i));
097                                }
098                        }
099                }
100
101                if (occurances.size() != num)
102                        throw new Exception();
103
104                Collections.sort(occurances);
105
106                String result = "";
107                int i = 0, j = 0;
108                for (final NumberOccurance no : occurances) {
109                        if (pattern[i] == j) {
110                                j = 0;
111                                i++;
112                                result += separator;
113                        }
114
115                        result += no.value;
116
117                        j++;
118                }
119
120                return result;
121        }
122
123        /**
124         * Extract the date-time from the given image
125         * 
126         * @param image
127         * @return the date-time string
128         * @throws Exception
129         *             if there was an error
130         */
131        public String process(FImage image) throws Exception {
132                final String date = processInternal(extractDateArea(image), "/", 4, 2, 2);
133                final String time = processInternal(extractTimeArea(image), ":", 2, 2, 2);
134
135                return date + " " + time;
136        }
137
138        private FImage extractDateArea(FImage image) {
139                return image.extractROI(664, 1024, 176, 16);
140        }
141
142        private FImage extractTimeArea(FImage image) {
143                return image.extractROI(840, 1024, 144, 16);
144        }
145
146        /**
147         * Process the given images (filenames or urls)
148         * 
149         * @param args
150         * @throws Exception
151         */
152        public static void main(String[] args) throws Exception {
153                final SimpleOCR gocr = new SimpleOCR();
154
155                for (final String f : args) {
156                        try {
157                                FImage image;
158
159                                if (f.contains("://"))
160                                        image = ImageUtilities.readF(new URL(f));
161                                else
162                                        image = ImageUtilities.readF(new File(f));
163
164                                System.out.println(f + " " + gocr.process(image));
165                        } catch (final Exception e) {
166                                System.out.println(f + " error occurred performing ocr");
167                        }
168                }
169        }
170}