1 /**
2 * Copyright (c) 2011, The University of Southampton and the individual contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * * Neither the name of the University of Southampton nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 /**
31 *
32 */
33 package org.openimaj.image.text.extraction;
34
35 import java.util.Map;
36
37 import org.openimaj.citation.annotation.Reference;
38 import org.openimaj.citation.annotation.ReferenceType;
39 import org.openimaj.image.DisplayUtilities;
40 import org.openimaj.image.FImage;
41 import org.openimaj.image.analysis.pyramid.PyramidProcessor;
42 import org.openimaj.image.analysis.pyramid.gaussian.GaussianOctave;
43 import org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramid;
44 import org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramidOptions;
45 import org.openimaj.image.processing.resize.ResizeProcessor;
46 import org.openimaj.math.geometry.shape.Rectangle;
47
48 /**
49 * An implementation of the multiscale text extractor from
50 *
51 * MULTISCALE EDGE-BASED TEXT EXTRACTION FROM COMPLEX IMAGES;
52 * Xiaoqing Liu and Jagath Samarabandu
53 * The University of Western Ontario
54 *
55 * http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4036951.
56 * <p>
57 * This multiscale text extractor uses a Gaussian pyramid to produce the
58 * multiscale feature vector. From this, the basic text extraction algorithm
59 * is used (see the {@link LiuSamarabanduTextExtractorBasic} implementation)
60 * on each image and the results combined using across-scale addition.
61 *
62 * @author David Dupplaw (dpd@ecs.soton.ac.uk)
63 * @created 28 Jul 2011
64 *
65 */
66 @Reference(
67 type = ReferenceType.Inproceedings,
68 author = { "Liu, X.", "Samarabandu, J." },
69 title = "Multiscale Edge-Based Text Extraction from Complex Images",
70 year = "2006",
71 booktitle = "Multimedia and Expo, 2006 IEEE International Conference on",
72 pages = { "1721 ", "1724" },
73 month = "july",
74 number = "",
75 volume = "",
76 customData = { "keywords", "multiscale edge-based text extraction;printed document image;scene text;text detection;document image processing;edge detection;feature extraction;text analysis;", "doi", "10.1109/ICME.2006.262882", "ISSN", "" }
77 )
78 public class LiuSamarabanduTextExtractorMultiscale extends TextExtractor<FImage>
79 {
80 private static final boolean DEBUG = true;
81
82 /** The basic text extractor implementation */
83 private final LiuSamarabanduTextExtractorBasic basicTextExtractor =
84 new LiuSamarabanduTextExtractorBasic();
85
86 /** The extracted regions from the processing */
87 private Map<Rectangle, FImage> extractedRegions;
88
89 /** Whether to double the size of the initial image in the pyramid */
90 private boolean doubleSizePyramid = true;
91
92 /**
93 * This is the main processor for this text extractor. For each of the
94 * multiscale pyramid images, this performs the basic text extraction.
95 *
96 * @author David Dupplaw (dpd@ecs.soton.ac.uk)
97 * @created 28 Jul 2011
98 *
99 */
100 public class PyramidTextExtractor implements PyramidProcessor<FImage>
101 {
102 /** The resulting feature map */
103 private FImage featureMap = null;
104
105 /**
106 * Get the feature map for the image.
107 * @return The feature map for the image.
108 */
109 public FImage getFeatureMap()
110 {
111 return this.featureMap;
112 }
113
114 /**
115 * {@inheritDoc}
116 * @see org.openimaj.image.analysis.pyramid.PyramidProcessor#process(org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramid)
117 */
118 @Override
119 public void process( final GaussianPyramid<FImage> pyramid )
120 {
121 FImage fmap = null;
122
123 // Process each of the octaves in the pyramid
124 for( final GaussianOctave<FImage> octave : pyramid )
125 {
126 // Extract text regions using the basic text extractor
127 FImage octaveFMap = LiuSamarabanduTextExtractorMultiscale.this.basicTextExtractor.textRegionDetection(
128 octave.getNextOctaveImage() );
129
130 if( fmap == null )
131 fmap = octaveFMap;
132 else
133 {
134 // Fuse across scales
135 octaveFMap = ResizeProcessor.resample( octaveFMap,
136 fmap.getWidth(), fmap.getHeight() ).normalise();
137
138 if( LiuSamarabanduTextExtractorMultiscale.DEBUG )
139 DisplayUtilities.display( octaveFMap, "Resized feature map" );
140
141 fmap.addInplace( octaveFMap );
142 }
143 }
144
145 this.featureMap = fmap;
146 }
147 }
148
149 /**
150 * Helper method for debugging when viewing images
151 */
152 protected void forceWait()
153 {
154 synchronized(this){ try { this.wait( 200000 ); } catch( final InterruptedException e1 ) {} }
155 }
156
157 /**
158 * {@inheritDoc}
159 * @see org.openimaj.image.processor.ImageProcessor#processImage(org.openimaj.image.Image)
160 */
161 @Override
162 public void processImage( final FImage image )
163 {
164 final PyramidTextExtractor ped = new PyramidTextExtractor();
165
166 // Unlike Lowe's SIFT DoG pyramid, we just need a basic pyramid
167 final GaussianPyramidOptions<FImage> gpo = new GaussianPyramidOptions<FImage>();
168 gpo.setScales( 1 );
169 gpo.setExtraScaleSteps( 1 );
170 gpo.setPyramidProcessor( ped );
171 gpo.setDoubleInitialImage( this.doubleSizePyramid );
172
173 // Create and process the pyramid
174 final GaussianPyramid<FImage> gp = new GaussianPyramid<FImage>( gpo );
175 image.analyseWith( gp );
176
177 // -------------------------------------------------------------
178 // This is not part of the Liu/Samarabandu algorithm:
179 // Multiscale feature map
180 FImage msFMap = ped.getFeatureMap();
181
182 // Single scale feature map
183 FImage fmap = this.basicTextExtractor.textRegionDetection( image );
184
185 // Need to make it match the multiscale feature map
186 if( this.doubleSizePyramid )
187 fmap = ResizeProcessor.doubleSize( fmap );
188
189 // Combine the two.
190 msFMap = fmap.add( msFMap );
191 // -------------------------------------------------------------
192
193 if( LiuSamarabanduTextExtractorMultiscale.DEBUG )
194 DisplayUtilities.display( msFMap.normalise(), "Fused Feature Map" );
195
196 // Process the feature map
197 this.basicTextExtractor.processFeatureMap( msFMap, image );
198
199 // Store the regions
200 this.extractedRegions = this.basicTextExtractor.getTextRegions();
201
202 // If we doubled the feature map, we'll have to half the size of the bounding boxes.
203 if( this.doubleSizePyramid )
204 for( final Rectangle r : this.extractedRegions.keySet() )
205 r.scale( 0.5f );
206
207 // The output of the processor is the feature map
208 image.internalAssign( fmap );
209 }
210
211 /**
212 * {@inheritDoc}
213 * @see org.openimaj.image.text.extraction.TextExtractor#getTextRegions()
214 */
215 @Override
216 public Map<Rectangle, FImage> getTextRegions()
217 {
218 return this.extractedRegions;
219 }
220
221 /**
222 * Whether the initial image in the pyramid is being double sized.
223 * @return TRUE if the initial image is double sized.
224 */
225 public boolean isDoubleSizePyramid()
226 {
227 return this.doubleSizePyramid;
228 }
229
230 /**
231 * Set whether to double the size of the pyramid
232 * @param doubleSizePyramid TRUE to double the size of the initial image.
233 */
234 public void setDoubleSizePyramid( final boolean doubleSizePyramid )
235 {
236 this.doubleSizePyramid = doubleSizePyramid;
237 }
238 }