View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  /**
31   *
32   */
33  package org.openimaj.image.text.extraction;
34  
35  import java.util.Map;
36  
37  import org.openimaj.citation.annotation.Reference;
38  import org.openimaj.citation.annotation.ReferenceType;
39  import org.openimaj.image.DisplayUtilities;
40  import org.openimaj.image.FImage;
41  import org.openimaj.image.analysis.pyramid.PyramidProcessor;
42  import org.openimaj.image.analysis.pyramid.gaussian.GaussianOctave;
43  import org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramid;
44  import org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramidOptions;
45  import org.openimaj.image.processing.resize.ResizeProcessor;
46  import org.openimaj.math.geometry.shape.Rectangle;
47  
48  /**
49   *	An implementation of the multiscale text extractor from
50   *
51   *	MULTISCALE EDGE-BASED TEXT EXTRACTION FROM COMPLEX IMAGES;
52   *	Xiaoqing Liu and Jagath Samarabandu
53   *	The University of Western Ontario
54   *
55   *	http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4036951.
56   *	<p>
57   *	This multiscale text extractor uses a Gaussian pyramid to produce the
58   *	multiscale feature vector. From this, the basic text extraction algorithm
59   *	is used (see the {@link LiuSamarabanduTextExtractorBasic} implementation)
60   *	on each image and the results combined using across-scale addition.
61   *
62   *	@author David Dupplaw (dpd@ecs.soton.ac.uk)
63   *  @created 28 Jul 2011
64   *
65   */
66  @Reference(
67  		type = ReferenceType.Inproceedings,
68  		author = { "Liu, X.", "Samarabandu, J." },
69  		title = "Multiscale Edge-Based Text Extraction from Complex Images",
70  		year = "2006",
71  		booktitle = "Multimedia and Expo, 2006 IEEE International Conference on",
72  		pages = { "1721 ", "1724" },
73  		month = "july",
74  		number = "",
75  		volume = "",
76  		customData = { "keywords", "multiscale edge-based text extraction;printed document image;scene text;text detection;document image processing;edge detection;feature extraction;text analysis;", "doi", "10.1109/ICME.2006.262882", "ISSN", "" }
77  	)
78  public class LiuSamarabanduTextExtractorMultiscale extends TextExtractor<FImage>
79  {
80  	private static final boolean DEBUG = true;
81  
82  	/** The basic text extractor implementation */
83  	private final LiuSamarabanduTextExtractorBasic basicTextExtractor =
84  		new LiuSamarabanduTextExtractorBasic();
85  
86  	/** The extracted regions from the processing */
87  	private Map<Rectangle, FImage> extractedRegions;
88  
89  	/** Whether to double the size of the initial image in the pyramid */
90  	private boolean doubleSizePyramid = true;
91  
92  	/**
93  	 *	This is the main processor for this text extractor. For each of the
94  	 *	multiscale pyramid images, this performs the basic text extraction.
95  	 *
96  	 *	@author David Dupplaw (dpd@ecs.soton.ac.uk)
97  	 *  @created 28 Jul 2011
98  	 *
99  	 */
100 	public class PyramidTextExtractor implements PyramidProcessor<FImage>
101 	{
102 		/** The resulting feature map */
103 		private FImage featureMap = null;
104 
105 		/**
106 		 * 	Get the feature map for the image.
107 		 *	@return The feature map for the image.
108 		 */
109 		public FImage getFeatureMap()
110 		{
111 			return this.featureMap;
112 		}
113 
114 		/**
115 		 *	{@inheritDoc}
116 		 * 	@see org.openimaj.image.analysis.pyramid.PyramidProcessor#process(org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramid)
117 		 */
118 		@Override
119 		public void process( final GaussianPyramid<FImage> pyramid )
120 		{
121 			FImage fmap = null;
122 
123 			// Process each of the octaves in the pyramid
124 			for( final GaussianOctave<FImage> octave : pyramid )
125 			{
126 				// Extract text regions using the basic text extractor
127 				FImage octaveFMap = LiuSamarabanduTextExtractorMultiscale.this.basicTextExtractor.textRegionDetection(
128 						octave.getNextOctaveImage() );
129 
130 				if( fmap == null )
131 					fmap = octaveFMap;
132 				else
133 				{
134 					// Fuse across scales
135 					octaveFMap = ResizeProcessor.resample( octaveFMap,
136 							fmap.getWidth(), fmap.getHeight() ).normalise();
137 
138 					if( LiuSamarabanduTextExtractorMultiscale.DEBUG )
139 						DisplayUtilities.display( octaveFMap, "Resized feature map" );
140 
141 					fmap.addInplace( octaveFMap );
142 				}
143 			}
144 
145 			this.featureMap = fmap;
146 		}
147 	}
148 
149 	/**
150 	 * 	Helper method for debugging when viewing images
151 	 */
152 	protected void forceWait()
153 	{
154 		synchronized(this){ try	{ this.wait( 200000 ); } catch( final InterruptedException e1 ) {} }
155 	}
156 
157 	/**
158 	 *	{@inheritDoc}
159 	 * 	@see org.openimaj.image.processor.ImageProcessor#processImage(org.openimaj.image.Image)
160 	 */
161 	@Override
162 	public void processImage( final FImage image )
163 	{
164 		final PyramidTextExtractor ped = new PyramidTextExtractor();
165 
166 		// Unlike Lowe's SIFT DoG pyramid, we just need a basic pyramid
167 		final GaussianPyramidOptions<FImage> gpo = new GaussianPyramidOptions<FImage>();
168 		gpo.setScales( 1 );
169 		gpo.setExtraScaleSteps( 1 );
170 		gpo.setPyramidProcessor( ped );
171 		gpo.setDoubleInitialImage( this.doubleSizePyramid );
172 
173 		// Create and process the pyramid
174 		final GaussianPyramid<FImage> gp = new GaussianPyramid<FImage>( gpo );
175 		image.analyseWith( gp );
176 
177 		// -------------------------------------------------------------
178 		// This is not part of the Liu/Samarabandu algorithm:
179 		// Multiscale feature map
180 		FImage msFMap = ped.getFeatureMap();
181 
182 		// Single scale feature map
183 		FImage fmap = this.basicTextExtractor.textRegionDetection( image );
184 
185 		// Need to make it match the multiscale feature map
186 		if( this.doubleSizePyramid )
187 			fmap = ResizeProcessor.doubleSize( fmap );
188 
189 		// Combine the two.
190 		msFMap = fmap.add( msFMap );
191 		// -------------------------------------------------------------
192 
193 		if( LiuSamarabanduTextExtractorMultiscale.DEBUG )
194 			DisplayUtilities.display( msFMap.normalise(), "Fused Feature Map" );
195 
196 		// Process the feature map
197 		this.basicTextExtractor.processFeatureMap( msFMap, image );
198 
199 		// Store the regions
200 		this.extractedRegions = this.basicTextExtractor.getTextRegions();
201 
202 		// If we doubled the feature map, we'll have to half the size of the bounding boxes.
203 		if( this.doubleSizePyramid )
204 			for( final Rectangle r : this.extractedRegions.keySet() )
205 				r.scale( 0.5f );
206 
207 		// The output of the processor is the feature map
208 		image.internalAssign( fmap );
209 	}
210 
211 	/**
212 	 *	{@inheritDoc}
213 	 * 	@see org.openimaj.image.text.extraction.TextExtractor#getTextRegions()
214 	 */
215 	@Override
216 	public Map<Rectangle, FImage> getTextRegions()
217 	{
218 		return this.extractedRegions;
219 	}
220 
221 	/**
222 	 * 	Whether the initial image in the pyramid is being double sized.
223 	 *	@return TRUE if the initial image is double sized.
224 	 */
225 	public boolean isDoubleSizePyramid()
226 	{
227 		return this.doubleSizePyramid;
228 	}
229 
230 	/**
231 	 * 	Set whether to double the size of the pyramid
232 	 *	@param doubleSizePyramid TRUE to double the size of the initial image.
233 	 */
234 	public void setDoubleSizePyramid( final boolean doubleSizePyramid )
235 	{
236 		this.doubleSizePyramid = doubleSizePyramid;
237 	}
238 }