1 /** 2 * Copyright (c) 2011, The University of Southampton and the individual contributors. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without modification, 6 * are permitted provided that the following conditions are met: 7 * 8 * * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * * Neither the name of the University of Southampton nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 /** 31 * 32 */ 33 package org.openimaj.image.text.extraction; 34 35 import java.util.ArrayList; 36 import java.util.HashMap; 37 import java.util.List; 38 import java.util.Map; 39 40 import org.openimaj.image.Image; 41 import org.openimaj.image.processor.ImageProcessor; 42 import org.openimaj.image.text.ocr.OCRProcessor; 43 import org.openimaj.math.geometry.shape.Rectangle; 44 import org.openimaj.util.pair.IndependentPair; 45 46 /** 47 * An interface for classes that are able to extract text from images. 48 * The single method allows the retrieval of the text mapped to the 49 * bounding boxes of the text within the image. 50 * <p> 51 * Note that this is an {@link ImageProcessor} extension so that the 52 * {@link TextExtractor} should process the image prior to the 53 * {@link #getTextRegions()} method being called. 54 * <p> 55 * This class will deal with the processing of extracted text regions 56 * with the OCR processor. Use {@link #setOCRProcessor(OCRProcessor)} to 57 * choose with OCR processor will be used on the extracted regions. 58 * 59 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 60 * @created 11 Aug 2011 61 * 62 * @param <T> The type of {@link Image} 63 */ 64 public abstract class TextExtractor<T extends Image<?,T>> 65 implements ImageProcessor<T> 66 { 67 /** The OCR Processor to extract strings from text regions. */ 68 private OCRProcessor<T> ocr = null; 69 70 /** 71 * Get the text regions that can be extracted from an image. The images 72 * in the values of the map need not be simply the extracted region that 73 * is bounded by the rectangular key (this can be done afterwards), but 74 * may be a representation that is as near to canonical as possible - 75 * that is, it may be warped or thresholded such that an OCR processor 76 * may have less trouble reading the text 77 * 78 * @return A map from bounding box in original image to a canonical 79 * representation of the text (may be warped or thresholded) 80 */ 81 public abstract Map<Rectangle,T> getTextRegions(); 82 83 /** 84 * Get text that can be extracted from an image. The map should map a 85 * bounding box within the processed image to a pair of extracted image vs. 86 * text string. The extracted image may not necessarily be the region 87 * of interest which the rectangle bounds; it can be as close to a 88 * canonical representation of the text as possible such that an OCR 89 * would have less difficulty in classifying the text. For example, 90 * the image may be thresholded or warped such that the text is straight. 91 * 92 * @return A map of bounding box to a pair of image and text string 93 */ 94 public Map<Rectangle, IndependentPair<T, String>> getText() 95 { 96 // The result map for the method 97 Map<Rectangle, IndependentPair<T, String>> textMap = 98 new HashMap<Rectangle, IndependentPair<T,String>>(); 99 100 // Get the regions 101 Map<Rectangle,T> textRegions = getTextRegions(); 102 103 // OCR the text from the text regions 104 if( ocr != null ) 105 { 106 for( Rectangle r : textRegions.keySet() ) 107 { 108 // Process the image with the OCR Processor 109 textRegions.get(r).analyseWith( ocr ); 110 111 // Get the text from the OCR Processor 112 Map<Rectangle, String> m = ocr.getText(); 113 114 // For each of the rectangles returned from the OCR 115 // we add them individually into the output set. 116 for( Rectangle subR: m.keySet() ) 117 { 118 String s = m.get( subR ); 119 120 // Translate into image coordinates (from sub-image coords) 121 subR.translate( r.x, r.y ); 122 123 // Put into the output map 124 textMap.put( subR, 125 new IndependentPair<T,String>( textRegions.get(r), s ) 126 ); 127 } 128 } 129 } 130 else 131 { 132 // If no OCR is done, we simply add all the extracted text 133 // regions with a null string. 134 for( Rectangle r : textRegions.keySet() ) 135 { 136 textMap.put( r, 137 new IndependentPair<T,String>( 138 textRegions.get(r), null ) ); 139 } 140 } 141 142 return textMap; 143 } 144 145 /** 146 * If you're not interested in where the strings are located in the image 147 * you can use this method to simply get a list of extracted strings. 148 * 149 * @return A {@link List} of strings extracted from the image. 150 */ 151 public List<String> getTextStrings() 152 { 153 List<String> strings = new ArrayList<String>(); 154 155 if( ocr != null ) 156 { 157 // Get the regions 158 Map<Rectangle,T> textRegions = getTextRegions(); 159 160 for( Rectangle r : textRegions.keySet() ) 161 { 162 // Process the image with the OCR Processor 163 textRegions.get(r).analyseWith( ocr ); 164 165 // Get the text from the OCR Processor 166 Map<Rectangle, String> m = ocr.getText(); 167 strings.addAll( m.values() ); 168 } 169 } 170 171 return strings; 172 } 173 174 /** 175 * For the text regions that are extracted to be associated with textual 176 * representations of the text regions, an OCR processor must be used. 177 * Use this function to choose which OCR processor is used to extract 178 * read text regions. 179 * 180 * @param ocr The {@link OCRProcessor} to use 181 */ 182 public void setOCRProcessor( OCRProcessor<T> ocr ) 183 { 184 this.ocr = ocr; 185 } 186 187 /** 188 * Return the OCR processor being used to extract text from the 189 * image. 190 * 191 * @return The {@link OCRProcessor} 192 */ 193 public OCRProcessor<T> getOCRProcessor() 194 { 195 return this.ocr; 196 } 197 }