001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.image.text.extraction; 034 035import java.util.Map; 036 037import org.openimaj.citation.annotation.Reference; 038import org.openimaj.citation.annotation.ReferenceType; 039import org.openimaj.image.DisplayUtilities; 040import org.openimaj.image.FImage; 041import org.openimaj.image.analysis.pyramid.PyramidProcessor; 042import org.openimaj.image.analysis.pyramid.gaussian.GaussianOctave; 043import org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramid; 044import org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramidOptions; 045import org.openimaj.image.processing.resize.ResizeProcessor; 046import org.openimaj.math.geometry.shape.Rectangle; 047 048/** 049 * An implementation of the multiscale text extractor from 050 * 051 * MULTISCALE EDGE-BASED TEXT EXTRACTION FROM COMPLEX IMAGES; 052 * Xiaoqing Liu and Jagath Samarabandu 053 * The University of Western Ontario 054 * 055 * http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4036951. 056 * <p> 057 * This multiscale text extractor uses a Gaussian pyramid to produce the 058 * multiscale feature vector. From this, the basic text extraction algorithm 059 * is used (see the {@link LiuSamarabanduTextExtractorBasic} implementation) 060 * on each image and the results combined using across-scale addition. 061 * 062 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 063 * @created 28 Jul 2011 064 * 065 */ 066@Reference( 067 type = ReferenceType.Inproceedings, 068 author = { "Liu, X.", "Samarabandu, J." }, 069 title = "Multiscale Edge-Based Text Extraction from Complex Images", 070 year = "2006", 071 booktitle = "Multimedia and Expo, 2006 IEEE International Conference on", 072 pages = { "1721 ", "1724" }, 073 month = "july", 074 number = "", 075 volume = "", 076 customData = { "keywords", "multiscale edge-based text extraction;printed document image;scene text;text detection;document image processing;edge detection;feature extraction;text analysis;", "doi", "10.1109/ICME.2006.262882", "ISSN", "" } 077 ) 078public class LiuSamarabanduTextExtractorMultiscale extends TextExtractor<FImage> 079{ 080 private static final boolean DEBUG = true; 081 082 /** The basic text extractor implementation */ 083 private final LiuSamarabanduTextExtractorBasic basicTextExtractor = 084 new LiuSamarabanduTextExtractorBasic(); 085 086 /** The extracted regions from the processing */ 087 private Map<Rectangle, FImage> extractedRegions; 088 089 /** Whether to double the size of the initial image in the pyramid */ 090 private boolean doubleSizePyramid = true; 091 092 /** 093 * This is the main processor for this text extractor. For each of the 094 * multiscale pyramid images, this performs the basic text extraction. 095 * 096 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 097 * @created 28 Jul 2011 098 * 099 */ 100 public class PyramidTextExtractor implements PyramidProcessor<FImage> 101 { 102 /** The resulting feature map */ 103 private FImage featureMap = null; 104 105 /** 106 * Get the feature map for the image. 107 * @return The feature map for the image. 108 */ 109 public FImage getFeatureMap() 110 { 111 return this.featureMap; 112 } 113 114 /** 115 * {@inheritDoc} 116 * @see org.openimaj.image.analysis.pyramid.PyramidProcessor#process(org.openimaj.image.analysis.pyramid.gaussian.GaussianPyramid) 117 */ 118 @Override 119 public void process( final GaussianPyramid<FImage> pyramid ) 120 { 121 FImage fmap = null; 122 123 // Process each of the octaves in the pyramid 124 for( final GaussianOctave<FImage> octave : pyramid ) 125 { 126 // Extract text regions using the basic text extractor 127 FImage octaveFMap = LiuSamarabanduTextExtractorMultiscale.this.basicTextExtractor.textRegionDetection( 128 octave.getNextOctaveImage() ); 129 130 if( fmap == null ) 131 fmap = octaveFMap; 132 else 133 { 134 // Fuse across scales 135 octaveFMap = ResizeProcessor.resample( octaveFMap, 136 fmap.getWidth(), fmap.getHeight() ).normalise(); 137 138 if( LiuSamarabanduTextExtractorMultiscale.DEBUG ) 139 DisplayUtilities.display( octaveFMap, "Resized feature map" ); 140 141 fmap.addInplace( octaveFMap ); 142 } 143 } 144 145 this.featureMap = fmap; 146 } 147 } 148 149 /** 150 * Helper method for debugging when viewing images 151 */ 152 protected void forceWait() 153 { 154 synchronized(this){ try { this.wait( 200000 ); } catch( final InterruptedException e1 ) {} } 155 } 156 157 /** 158 * {@inheritDoc} 159 * @see org.openimaj.image.processor.ImageProcessor#processImage(org.openimaj.image.Image) 160 */ 161 @Override 162 public void processImage( final FImage image ) 163 { 164 final PyramidTextExtractor ped = new PyramidTextExtractor(); 165 166 // Unlike Lowe's SIFT DoG pyramid, we just need a basic pyramid 167 final GaussianPyramidOptions<FImage> gpo = new GaussianPyramidOptions<FImage>(); 168 gpo.setScales( 1 ); 169 gpo.setExtraScaleSteps( 1 ); 170 gpo.setPyramidProcessor( ped ); 171 gpo.setDoubleInitialImage( this.doubleSizePyramid ); 172 173 // Create and process the pyramid 174 final GaussianPyramid<FImage> gp = new GaussianPyramid<FImage>( gpo ); 175 image.analyseWith( gp ); 176 177 // ------------------------------------------------------------- 178 // This is not part of the Liu/Samarabandu algorithm: 179 // Multiscale feature map 180 FImage msFMap = ped.getFeatureMap(); 181 182 // Single scale feature map 183 FImage fmap = this.basicTextExtractor.textRegionDetection( image ); 184 185 // Need to make it match the multiscale feature map 186 if( this.doubleSizePyramid ) 187 fmap = ResizeProcessor.doubleSize( fmap ); 188 189 // Combine the two. 190 msFMap = fmap.add( msFMap ); 191 // ------------------------------------------------------------- 192 193 if( LiuSamarabanduTextExtractorMultiscale.DEBUG ) 194 DisplayUtilities.display( msFMap.normalise(), "Fused Feature Map" ); 195 196 // Process the feature map 197 this.basicTextExtractor.processFeatureMap( msFMap, image ); 198 199 // Store the regions 200 this.extractedRegions = this.basicTextExtractor.getTextRegions(); 201 202 // If we doubled the feature map, we'll have to half the size of the bounding boxes. 203 if( this.doubleSizePyramid ) 204 for( final Rectangle r : this.extractedRegions.keySet() ) 205 r.scale( 0.5f ); 206 207 // The output of the processor is the feature map 208 image.internalAssign( fmap ); 209 } 210 211 /** 212 * {@inheritDoc} 213 * @see org.openimaj.image.text.extraction.TextExtractor#getTextRegions() 214 */ 215 @Override 216 public Map<Rectangle, FImage> getTextRegions() 217 { 218 return this.extractedRegions; 219 } 220 221 /** 222 * Whether the initial image in the pyramid is being double sized. 223 * @return TRUE if the initial image is double sized. 224 */ 225 public boolean isDoubleSizePyramid() 226 { 227 return this.doubleSizePyramid; 228 } 229 230 /** 231 * Set whether to double the size of the pyramid 232 * @param doubleSizePyramid TRUE to double the size of the initial image. 233 */ 234 public void setDoubleSizePyramid( final boolean doubleSizePyramid ) 235 { 236 this.doubleSizePyramid = doubleSizePyramid; 237 } 238}