1 /**
2 * Copyright (c) 2011, The University of Southampton and the individual contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * * Neither the name of the University of Southampton nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 /**
31 *
32 */
33 package org.openimaj.image.text.extraction;
34
35 import java.util.ArrayList;
36 import java.util.HashMap;
37 import java.util.List;
38 import java.util.Map;
39
40 import org.openimaj.image.Image;
41 import org.openimaj.image.processor.ImageProcessor;
42 import org.openimaj.image.text.ocr.OCRProcessor;
43 import org.openimaj.math.geometry.shape.Rectangle;
44 import org.openimaj.util.pair.IndependentPair;
45
46 /**
47 * An interface for classes that are able to extract text from images.
48 * The single method allows the retrieval of the text mapped to the
49 * bounding boxes of the text within the image.
50 * <p>
51 * Note that this is an {@link ImageProcessor} extension so that the
52 * {@link TextExtractor} should process the image prior to the
53 * {@link #getTextRegions()} method being called.
54 * <p>
55 * This class will deal with the processing of extracted text regions
56 * with the OCR processor. Use {@link #setOCRProcessor(OCRProcessor)} to
57 * choose with OCR processor will be used on the extracted regions.
58 *
59 * @author David Dupplaw (dpd@ecs.soton.ac.uk)
60 * @created 11 Aug 2011
61 *
62 * @param <T> The type of {@link Image}
63 */
64 public abstract class TextExtractor<T extends Image<?,T>>
65 implements ImageProcessor<T>
66 {
67 /** The OCR Processor to extract strings from text regions. */
68 private OCRProcessor<T> ocr = null;
69
70 /**
71 * Get the text regions that can be extracted from an image. The images
72 * in the values of the map need not be simply the extracted region that
73 * is bounded by the rectangular key (this can be done afterwards), but
74 * may be a representation that is as near to canonical as possible -
75 * that is, it may be warped or thresholded such that an OCR processor
76 * may have less trouble reading the text
77 *
78 * @return A map from bounding box in original image to a canonical
79 * representation of the text (may be warped or thresholded)
80 */
81 public abstract Map<Rectangle,T> getTextRegions();
82
83 /**
84 * Get text that can be extracted from an image. The map should map a
85 * bounding box within the processed image to a pair of extracted image vs.
86 * text string. The extracted image may not necessarily be the region
87 * of interest which the rectangle bounds; it can be as close to a
88 * canonical representation of the text as possible such that an OCR
89 * would have less difficulty in classifying the text. For example,
90 * the image may be thresholded or warped such that the text is straight.
91 *
92 * @return A map of bounding box to a pair of image and text string
93 */
94 public Map<Rectangle, IndependentPair<T, String>> getText()
95 {
96 // The result map for the method
97 Map<Rectangle, IndependentPair<T, String>> textMap =
98 new HashMap<Rectangle, IndependentPair<T,String>>();
99
100 // Get the regions
101 Map<Rectangle,T> textRegions = getTextRegions();
102
103 // OCR the text from the text regions
104 if( ocr != null )
105 {
106 for( Rectangle r : textRegions.keySet() )
107 {
108 // Process the image with the OCR Processor
109 textRegions.get(r).analyseWith( ocr );
110
111 // Get the text from the OCR Processor
112 Map<Rectangle, String> m = ocr.getText();
113
114 // For each of the rectangles returned from the OCR
115 // we add them individually into the output set.
116 for( Rectangle subR: m.keySet() )
117 {
118 String s = m.get( subR );
119
120 // Translate into image coordinates (from sub-image coords)
121 subR.translate( r.x, r.y );
122
123 // Put into the output map
124 textMap.put( subR,
125 new IndependentPair<T,String>( textRegions.get(r), s )
126 );
127 }
128 }
129 }
130 else
131 {
132 // If no OCR is done, we simply add all the extracted text
133 // regions with a null string.
134 for( Rectangle r : textRegions.keySet() )
135 {
136 textMap.put( r,
137 new IndependentPair<T,String>(
138 textRegions.get(r), null ) );
139 }
140 }
141
142 return textMap;
143 }
144
145 /**
146 * If you're not interested in where the strings are located in the image
147 * you can use this method to simply get a list of extracted strings.
148 *
149 * @return A {@link List} of strings extracted from the image.
150 */
151 public List<String> getTextStrings()
152 {
153 List<String> strings = new ArrayList<String>();
154
155 if( ocr != null )
156 {
157 // Get the regions
158 Map<Rectangle,T> textRegions = getTextRegions();
159
160 for( Rectangle r : textRegions.keySet() )
161 {
162 // Process the image with the OCR Processor
163 textRegions.get(r).analyseWith( ocr );
164
165 // Get the text from the OCR Processor
166 Map<Rectangle, String> m = ocr.getText();
167 strings.addAll( m.values() );
168 }
169 }
170
171 return strings;
172 }
173
174 /**
175 * For the text regions that are extracted to be associated with textual
176 * representations of the text regions, an OCR processor must be used.
177 * Use this function to choose which OCR processor is used to extract
178 * read text regions.
179 *
180 * @param ocr The {@link OCRProcessor} to use
181 */
182 public void setOCRProcessor( OCRProcessor<T> ocr )
183 {
184 this.ocr = ocr;
185 }
186
187 /**
188 * Return the OCR processor being used to extract text from the
189 * image.
190 *
191 * @return The {@link OCRProcessor}
192 */
193 public OCRProcessor<T> getOCRProcessor()
194 {
195 return this.ocr;
196 }
197 }