Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 * 
032 */
033package org.openimaj.image.text.extraction;
034
035import java.util.ArrayList;
036import java.util.HashMap;
037import java.util.List;
038import java.util.Map;
039
040import org.openimaj.image.Image;
041import org.openimaj.image.processor.ImageProcessor;
042import org.openimaj.image.text.ocr.OCRProcessor;
043import org.openimaj.math.geometry.shape.Rectangle;
044import org.openimaj.util.pair.IndependentPair;
045
046/**
047 *      An interface for classes that are able to extract text from images.
048 *      The single method allows the retrieval of the text mapped to the
049 *      bounding boxes of the text within the image.
050 *      <p>
051 *      Note that this is an {@link ImageProcessor} extension so that the
052 *      {@link TextExtractor} should process the image prior to the
053 *      {@link #getTextRegions()} method being called.
054 *      <p>
055 *      This class will deal with the processing of extracted text regions
056 *      with the OCR processor. Use {@link #setOCRProcessor(OCRProcessor)} to
057 *      choose with OCR processor will be used on the extracted regions. 
058 *
059 *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
060 *  @created 11 Aug 2011
061 *      
062 *  @param <T> The type of {@link Image} 
063 */
064public abstract class TextExtractor<T extends Image<?,T>> 
065        implements ImageProcessor<T>
066{
067        /** The OCR Processor to extract strings from text regions. */
068        private OCRProcessor<T> ocr = null;
069        
070        /**
071         *      Get the text regions that can be extracted from an image. The images
072         *      in the values of the map need not be simply the extracted region that
073         *      is bounded by the rectangular key (this can be done afterwards), but 
074         *      may be a representation that is as near to canonical as possible -
075         *      that is, it may be warped or thresholded such that an OCR processor
076         *      may have less trouble reading the text 
077         * 
078         *      @return A map from bounding box in original image to a canonical
079         *              representation of the text (may be warped or thresholded)
080         */
081        public abstract Map<Rectangle,T> getTextRegions();
082        
083        /**
084         *      Get text that can be extracted from an image. The map should map a 
085         *      bounding box within the processed image to a pair of extracted image vs.
086         *      text string. The extracted image may not necessarily be the region
087         *      of interest which the rectangle bounds; it can be as close to a 
088         *      canonical representation of the text as possible such that an OCR
089         *      would have less difficulty in classifying the text. For example,
090         *  the image may be thresholded or warped such that the text is straight.
091         * 
092         *      @return A map of bounding box to a pair of image and text string
093         */
094        public Map<Rectangle, IndependentPair<T, String>> getText()
095        {
096                // The result map for the method
097                Map<Rectangle, IndependentPair<T, String>> textMap = 
098                        new HashMap<Rectangle, IndependentPair<T,String>>();
099                                
100                // Get the regions
101                Map<Rectangle,T> textRegions = getTextRegions();
102                
103                // OCR the text from the text regions
104                if( ocr != null )
105                {
106                        for( Rectangle r : textRegions.keySet() )
107                        {
108                                // Process the image with the OCR Processor
109                                textRegions.get(r).analyseWith( ocr );
110                                
111                                // Get the text from the OCR Processor
112                                Map<Rectangle, String> m = ocr.getText();
113                                
114                                // For each of the rectangles returned from the OCR
115                                // we add them individually into the output set.
116                                for( Rectangle subR: m.keySet() )
117                                {
118                                        String s = m.get( subR );
119                                        
120                                        // Translate into image coordinates (from sub-image coords)
121                                        subR.translate( r.x, r.y );
122                                        
123                                        // Put into the output map
124                                        textMap.put( subR, 
125                                                new IndependentPair<T,String>( textRegions.get(r), s ) 
126                                        );
127                                }
128                        }
129                }
130                else
131                {
132                        // If no OCR is done, we simply add all the extracted text
133                        // regions with a null string.
134                        for( Rectangle r : textRegions.keySet() )
135                        {
136                                textMap.put( r, 
137                                        new IndependentPair<T,String>( 
138                                                textRegions.get(r), null ) );
139                        }
140                }               
141                
142                return textMap;
143        }
144        
145        /**
146         *      If you're not interested in where the strings are located in the image
147         *      you can use this method to simply get a list of extracted strings.
148         * 
149         *      @return A {@link List} of strings extracted from the image.
150         */
151        public List<String> getTextStrings()
152        {
153                List<String> strings = new ArrayList<String>();
154                
155                if( ocr != null )
156                {
157                        // Get the regions
158                        Map<Rectangle,T> textRegions = getTextRegions();
159
160                        for( Rectangle r : textRegions.keySet() )
161                        {
162                                // Process the image with the OCR Processor
163                                textRegions.get(r).analyseWith( ocr );
164                                
165                                // Get the text from the OCR Processor
166                                Map<Rectangle, String> m = ocr.getText();
167                                strings.addAll( m.values() );
168                        }
169                }
170                
171                return strings;
172        }
173        
174        /**
175         *      For the text regions that are extracted to be associated with textual
176         *      representations of the text regions, an OCR processor must be used.
177         *      Use this function to choose which OCR processor is used to extract
178         *  read text regions.
179         * 
180         *      @param ocr The {@link OCRProcessor} to use
181         */
182        public void setOCRProcessor( OCRProcessor<T> ocr )
183        {
184                this.ocr = ocr;
185        }
186        
187        /**
188         *      Return the OCR processor being used to extract text from the
189         *      image.
190         * 
191         *      @return The {@link OCRProcessor}
192         */
193        public OCRProcessor<T> getOCRProcessor()
194        {
195                return this.ocr;
196        }
197}