001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.image.text.extraction; 034 035import java.util.ArrayList; 036import java.util.HashMap; 037import java.util.List; 038import java.util.Map; 039 040import org.openimaj.image.Image; 041import org.openimaj.image.processor.ImageProcessor; 042import org.openimaj.image.text.ocr.OCRProcessor; 043import org.openimaj.math.geometry.shape.Rectangle; 044import org.openimaj.util.pair.IndependentPair; 045 046/** 047 * An interface for classes that are able to extract text from images. 048 * The single method allows the retrieval of the text mapped to the 049 * bounding boxes of the text within the image. 050 * <p> 051 * Note that this is an {@link ImageProcessor} extension so that the 052 * {@link TextExtractor} should process the image prior to the 053 * {@link #getTextRegions()} method being called. 054 * <p> 055 * This class will deal with the processing of extracted text regions 056 * with the OCR processor. Use {@link #setOCRProcessor(OCRProcessor)} to 057 * choose with OCR processor will be used on the extracted regions. 058 * 059 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 060 * @created 11 Aug 2011 061 * 062 * @param <T> The type of {@link Image} 063 */ 064public abstract class TextExtractor<T extends Image<?,T>> 065 implements ImageProcessor<T> 066{ 067 /** The OCR Processor to extract strings from text regions. */ 068 private OCRProcessor<T> ocr = null; 069 070 /** 071 * Get the text regions that can be extracted from an image. The images 072 * in the values of the map need not be simply the extracted region that 073 * is bounded by the rectangular key (this can be done afterwards), but 074 * may be a representation that is as near to canonical as possible - 075 * that is, it may be warped or thresholded such that an OCR processor 076 * may have less trouble reading the text 077 * 078 * @return A map from bounding box in original image to a canonical 079 * representation of the text (may be warped or thresholded) 080 */ 081 public abstract Map<Rectangle,T> getTextRegions(); 082 083 /** 084 * Get text that can be extracted from an image. The map should map a 085 * bounding box within the processed image to a pair of extracted image vs. 086 * text string. The extracted image may not necessarily be the region 087 * of interest which the rectangle bounds; it can be as close to a 088 * canonical representation of the text as possible such that an OCR 089 * would have less difficulty in classifying the text. For example, 090 * the image may be thresholded or warped such that the text is straight. 091 * 092 * @return A map of bounding box to a pair of image and text string 093 */ 094 public Map<Rectangle, IndependentPair<T, String>> getText() 095 { 096 // The result map for the method 097 Map<Rectangle, IndependentPair<T, String>> textMap = 098 new HashMap<Rectangle, IndependentPair<T,String>>(); 099 100 // Get the regions 101 Map<Rectangle,T> textRegions = getTextRegions(); 102 103 // OCR the text from the text regions 104 if( ocr != null ) 105 { 106 for( Rectangle r : textRegions.keySet() ) 107 { 108 // Process the image with the OCR Processor 109 textRegions.get(r).analyseWith( ocr ); 110 111 // Get the text from the OCR Processor 112 Map<Rectangle, String> m = ocr.getText(); 113 114 // For each of the rectangles returned from the OCR 115 // we add them individually into the output set. 116 for( Rectangle subR: m.keySet() ) 117 { 118 String s = m.get( subR ); 119 120 // Translate into image coordinates (from sub-image coords) 121 subR.translate( r.x, r.y ); 122 123 // Put into the output map 124 textMap.put( subR, 125 new IndependentPair<T,String>( textRegions.get(r), s ) 126 ); 127 } 128 } 129 } 130 else 131 { 132 // If no OCR is done, we simply add all the extracted text 133 // regions with a null string. 134 for( Rectangle r : textRegions.keySet() ) 135 { 136 textMap.put( r, 137 new IndependentPair<T,String>( 138 textRegions.get(r), null ) ); 139 } 140 } 141 142 return textMap; 143 } 144 145 /** 146 * If you're not interested in where the strings are located in the image 147 * you can use this method to simply get a list of extracted strings. 148 * 149 * @return A {@link List} of strings extracted from the image. 150 */ 151 public List<String> getTextStrings() 152 { 153 List<String> strings = new ArrayList<String>(); 154 155 if( ocr != null ) 156 { 157 // Get the regions 158 Map<Rectangle,T> textRegions = getTextRegions(); 159 160 for( Rectangle r : textRegions.keySet() ) 161 { 162 // Process the image with the OCR Processor 163 textRegions.get(r).analyseWith( ocr ); 164 165 // Get the text from the OCR Processor 166 Map<Rectangle, String> m = ocr.getText(); 167 strings.addAll( m.values() ); 168 } 169 } 170 171 return strings; 172 } 173 174 /** 175 * For the text regions that are extracted to be associated with textual 176 * representations of the text regions, an OCR processor must be used. 177 * Use this function to choose which OCR processor is used to extract 178 * read text regions. 179 * 180 * @param ocr The {@link OCRProcessor} to use 181 */ 182 public void setOCRProcessor( OCRProcessor<T> ocr ) 183 { 184 this.ocr = ocr; 185 } 186 187 /** 188 * Return the OCR processor being used to extract text from the 189 * image. 190 * 191 * @return The {@link OCRProcessor} 192 */ 193 public OCRProcessor<T> getOCRProcessor() 194 { 195 return this.ocr; 196 } 197}