View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  /**
31   * 
32   */
33  package org.openimaj.image.text.extraction;
34  
35  import java.util.ArrayList;
36  import java.util.HashMap;
37  import java.util.List;
38  import java.util.Map;
39  
40  import org.openimaj.image.Image;
41  import org.openimaj.image.processor.ImageProcessor;
42  import org.openimaj.image.text.ocr.OCRProcessor;
43  import org.openimaj.math.geometry.shape.Rectangle;
44  import org.openimaj.util.pair.IndependentPair;
45  
46  /**
47   *	An interface for classes that are able to extract text from images.
48   *	The single method allows the retrieval of the text mapped to the
49   *	bounding boxes of the text within the image.
50   *	<p>
51   *	Note that this is an {@link ImageProcessor} extension so that the
52   *	{@link TextExtractor} should process the image prior to the
53   *	{@link #getTextRegions()} method being called.
54   *	<p>
55   *	This class will deal with the processing of extracted text regions
56   *	with the OCR processor. Use {@link #setOCRProcessor(OCRProcessor)} to
57   *	choose with OCR processor will be used on the extracted regions. 
58   *
59   *	@author David Dupplaw (dpd@ecs.soton.ac.uk)
60   *  @created 11 Aug 2011
61   *	
62   *  @param <T> The type of {@link Image} 
63   */
64  public abstract class TextExtractor<T extends Image<?,T>> 
65  	implements ImageProcessor<T>
66  {
67  	/** The OCR Processor to extract strings from text regions. */
68  	private OCRProcessor<T> ocr = null;
69  	
70  	/**
71  	 * 	Get the text regions that can be extracted from an image. The images
72  	 * 	in the values of the map need not be simply the extracted region that
73  	 * 	is bounded by the rectangular key (this can be done afterwards), but 
74  	 * 	may be a representation that is as near to canonical as possible -
75  	 * 	that is, it may be warped or thresholded such that an OCR processor
76  	 * 	may have less trouble reading the text 
77  	 * 
78  	 *	@return A map from bounding box in original image to a canonical
79  	 *		representation of the text (may be warped or thresholded)
80  	 */
81  	public abstract Map<Rectangle,T> getTextRegions();
82  	
83  	/**
84  	 * 	Get text that can be extracted from an image. The map should map a 
85  	 * 	bounding box within the processed image to a pair of extracted image vs.
86  	 * 	text string. The extracted image may not necessarily be the region
87  	 * 	of interest which the rectangle bounds; it can be as close to a 
88  	 * 	canonical representation of the text as possible such that an OCR
89  	 * 	would have less difficulty in classifying the text. For example,
90  	 *  the image may be thresholded or warped such that the text is straight.
91  	 * 
92  	 *	@return A map of bounding box to a pair of image and text string
93  	 */
94  	public Map<Rectangle, IndependentPair<T, String>> getText()
95  	{
96  		// The result map for the method
97  		Map<Rectangle, IndependentPair<T, String>> textMap = 
98  			new HashMap<Rectangle, IndependentPair<T,String>>();
99  				
100 		// Get the regions
101 		Map<Rectangle,T> textRegions = getTextRegions();
102 		
103 		// OCR the text from the text regions
104 		if( ocr != null )
105 		{
106 			for( Rectangle r : textRegions.keySet() )
107 			{
108 				// Process the image with the OCR Processor
109 				textRegions.get(r).analyseWith( ocr );
110 				
111 				// Get the text from the OCR Processor
112 				Map<Rectangle, String> m = ocr.getText();
113 				
114 				// For each of the rectangles returned from the OCR
115 				// we add them individually into the output set.
116 				for( Rectangle subR: m.keySet() )
117 				{
118 					String s = m.get( subR );
119 					
120 					// Translate into image coordinates (from sub-image coords)
121 					subR.translate( r.x, r.y );
122 					
123 					// Put into the output map
124 					textMap.put( subR, 
125 						new IndependentPair<T,String>( textRegions.get(r), s ) 
126 					);
127 				}
128 			}
129 		}
130 		else
131 		{
132 			// If no OCR is done, we simply add all the extracted text
133 			// regions with a null string.
134 			for( Rectangle r : textRegions.keySet() )
135 			{
136 				textMap.put( r, 
137 					new IndependentPair<T,String>( 
138 						textRegions.get(r), null ) );
139 			}
140 		}		
141 		
142 		return textMap;
143 	}
144 	
145 	/**
146 	 * 	If you're not interested in where the strings are located in the image
147 	 * 	you can use this method to simply get a list of extracted strings.
148 	 * 
149 	 *	@return A {@link List} of strings extracted from the image.
150 	 */
151 	public List<String> getTextStrings()
152 	{
153 		List<String> strings = new ArrayList<String>();
154 		
155 		if( ocr != null )
156 		{
157 			// Get the regions
158 			Map<Rectangle,T> textRegions = getTextRegions();
159 
160 			for( Rectangle r : textRegions.keySet() )
161 			{
162 				// Process the image with the OCR Processor
163 				textRegions.get(r).analyseWith( ocr );
164 				
165 				// Get the text from the OCR Processor
166 				Map<Rectangle, String> m = ocr.getText();
167 				strings.addAll( m.values() );
168 			}
169 		}
170 		
171 		return strings;
172 	}
173 	
174 	/**
175 	 * 	For the text regions that are extracted to be associated with textual
176 	 * 	representations of the text regions, an OCR processor must be used.
177 	 * 	Use this function to choose which OCR processor is used to extract
178 	 *  read text regions.
179 	 * 
180 	 *	@param ocr The {@link OCRProcessor} to use
181 	 */
182 	public void setOCRProcessor( OCRProcessor<T> ocr )
183 	{
184 		this.ocr = ocr;
185 	}
186 	
187 	/**
188 	 * 	Return the OCR processor being used to extract text from the
189 	 * 	image.
190 	 * 
191 	 *	@return The {@link OCRProcessor}
192 	 */
193 	public OCRProcessor<T> getOCRProcessor()
194 	{
195 		return this.ocr;
196 	}
197 }