Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 *
032 */
033package org.openimaj.image.text.extraction;
034
035import java.util.Map;
036
037import org.openimaj.citation.annotation.Reference;
038import org.openimaj.citation.annotation.ReferenceType;
039import org.openimaj.image.DisplayUtilities;
040import org.openimaj.image.FImage;
041import org.openimaj.image.analysis.pyramid.PyramidProcessor;
042import org.openimaj.image.analysis.pyramid.gaussian.GaussianOctave;
043import org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramid;
044import org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramidOptions;
045import org.openimaj.image.processing.resize.ResizeProcessor;
046import org.openimaj.math.geometry.shape.Rectangle;
047
048/**
049 *      An implementation of the multiscale text extractor from
050 *
051 *      MULTISCALE EDGE-BASED TEXT EXTRACTION FROM COMPLEX IMAGES;
052 *      Xiaoqing Liu and Jagath Samarabandu
053 *      The University of Western Ontario
054 *
055 *      http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4036951.
056 *      <p>
057 *      This multiscale text extractor uses a Gaussian pyramid to produce the
058 *      multiscale feature vector. From this, the basic text extraction algorithm
059 *      is used (see the {@link LiuSamarabanduTextExtractorBasic} implementation)
060 *      on each image and the results combined using across-scale addition.
061 *
062 *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
063 *  @created 28 Jul 2011
064 *
065 */
066@Reference(
067                type = ReferenceType.Inproceedings,
068                author = { "Liu, X.", "Samarabandu, J." },
069                title = "Multiscale Edge-Based Text Extraction from Complex Images",
070                year = "2006",
071                booktitle = "Multimedia and Expo, 2006 IEEE International Conference on",
072                pages = { "1721 ", "1724" },
073                month = "july",
074                number = "",
075                volume = "",
076                customData = { "keywords", "multiscale edge-based text extraction;printed document image;scene text;text detection;document image processing;edge detection;feature extraction;text analysis;", "doi", "10.1109/ICME.2006.262882", "ISSN", "" }
077        )
078public class LiuSamarabanduTextExtractorMultiscale extends TextExtractor<FImage>
079{
080        private static final boolean DEBUG = true;
081
082        /** The basic text extractor implementation */
083        private final LiuSamarabanduTextExtractorBasic basicTextExtractor =
084                new LiuSamarabanduTextExtractorBasic();
085
086        /** The extracted regions from the processing */
087        private Map<Rectangle, FImage> extractedRegions;
088
089        /** Whether to double the size of the initial image in the pyramid */
090        private boolean doubleSizePyramid = true;
091
092        /**
093         *      This is the main processor for this text extractor. For each of the
094         *      multiscale pyramid images, this performs the basic text extraction.
095         *
096         *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
097         *  @created 28 Jul 2011
098         *
099         */
100        public class PyramidTextExtractor implements PyramidProcessor<FImage>
101        {
102                /** The resulting feature map */
103                private FImage featureMap = null;
104
105                /**
106                 *      Get the feature map for the image.
107                 *      @return The feature map for the image.
108                 */
109                public FImage getFeatureMap()
110                {
111                        return this.featureMap;
112                }
113
114                /**
115                 *      {@inheritDoc}
116                 *      @see org.openimaj.image.analysis.pyramid.PyramidProcessor#process(org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramid)
117                 */
118                @Override
119                public void process( final GaussianPyramid<FImage> pyramid )
120                {
121                        FImage fmap = null;
122
123                        // Process each of the octaves in the pyramid
124                        for( final GaussianOctave<FImage> octave : pyramid )
125                        {
126                                // Extract text regions using the basic text extractor
127                                FImage octaveFMap = LiuSamarabanduTextExtractorMultiscale.this.basicTextExtractor.textRegionDetection(
128                                                octave.getNextOctaveImage() );
129
130                                if( fmap == null )
131                                        fmap = octaveFMap;
132                                else
133                                {
134                                        // Fuse across scales
135                                        octaveFMap = ResizeProcessor.resample( octaveFMap,
136                                                        fmap.getWidth(), fmap.getHeight() ).normalise();
137
138                                        if( LiuSamarabanduTextExtractorMultiscale.DEBUG )
139                                                DisplayUtilities.display( octaveFMap, "Resized feature map" );
140
141                                        fmap.addInplace( octaveFMap );
142                                }
143                        }
144
145                        this.featureMap = fmap;
146                }
147        }
148
149        /**
150         *      Helper method for debugging when viewing images
151         */
152        protected void forceWait()
153        {
154                synchronized(this){ try { this.wait( 200000 ); } catch( final InterruptedException e1 ) {} }
155        }
156
157        /**
158         *      {@inheritDoc}
159         *      @see org.openimaj.image.processor.ImageProcessor#processImage(org.openimaj.image.Image)
160         */
161        @Override
162        public void processImage( final FImage image )
163        {
164                final PyramidTextExtractor ped = new PyramidTextExtractor();
165
166                // Unlike Lowe's SIFT DoG pyramid, we just need a basic pyramid
167                final GaussianPyramidOptions<FImage> gpo = new GaussianPyramidOptions<FImage>();
168                gpo.setScales( 1 );
169                gpo.setExtraScaleSteps( 1 );
170                gpo.setPyramidProcessor( ped );
171                gpo.setDoubleInitialImage( this.doubleSizePyramid );
172
173                // Create and process the pyramid
174                final GaussianPyramid<FImage> gp = new GaussianPyramid<FImage>( gpo );
175                image.analyseWith( gp );
176
177                // -------------------------------------------------------------
178                // This is not part of the Liu/Samarabandu algorithm:
179                // Multiscale feature map
180                FImage msFMap = ped.getFeatureMap();
181
182                // Single scale feature map
183                FImage fmap = this.basicTextExtractor.textRegionDetection( image );
184
185                // Need to make it match the multiscale feature map
186                if( this.doubleSizePyramid )
187                        fmap = ResizeProcessor.doubleSize( fmap );
188
189                // Combine the two.
190                msFMap = fmap.add( msFMap );
191                // -------------------------------------------------------------
192
193                if( LiuSamarabanduTextExtractorMultiscale.DEBUG )
194                        DisplayUtilities.display( msFMap.normalise(), "Fused Feature Map" );
195
196                // Process the feature map
197                this.basicTextExtractor.processFeatureMap( msFMap, image );
198
199                // Store the regions
200                this.extractedRegions = this.basicTextExtractor.getTextRegions();
201
202                // If we doubled the feature map, we'll have to half the size of the bounding boxes.
203                if( this.doubleSizePyramid )
204                        for( final Rectangle r : this.extractedRegions.keySet() )
205                                r.scale( 0.5f );
206
207                // The output of the processor is the feature map
208                image.internalAssign( fmap );
209        }
210
211        /**
212         *      {@inheritDoc}
213         *      @see org.openimaj.image.text.extraction.TextExtractor#getTextRegions()
214         */
215        @Override
216        public Map<Rectangle, FImage> getTextRegions()
217        {
218                return this.extractedRegions;
219        }
220
221        /**
222         *      Whether the initial image in the pyramid is being double sized.
223         *      @return TRUE if the initial image is double sized.
224         */
225        public boolean isDoubleSizePyramid()
226        {
227                return this.doubleSizePyramid;
228        }
229
230        /**
231         *      Set whether to double the size of the pyramid
232         *      @param doubleSizePyramid TRUE to double the size of the initial image.
233         */
234        public void setDoubleSizePyramid( final boolean doubleSizePyramid )
235        {
236                this.doubleSizePyramid = doubleSizePyramid;
237        }
238}