001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.text.extraction.swt; 031 032import java.util.ArrayList; 033import java.util.List; 034 035import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; 036import org.openimaj.image.FImage; 037import org.openimaj.image.pixel.ConnectedComponent; 038import org.openimaj.image.pixel.Pixel; 039import org.openimaj.image.pixel.PixelSet; 040import org.openimaj.math.geometry.shape.Rectangle; 041 042/** 043 * This class models a candidate textual letter/character from the 044 * {@link SWTTextDetector}. 045 * 046 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 047 */ 048public class LetterCandidate extends Candidate { 049 protected WordCandidate word; 050 protected LineCandidate line; 051 protected PixelSet cc; 052 protected float averageBrightness; 053 protected Pixel centroid; 054 protected float medianStrokeWidth; 055 056 protected LetterCandidate(PixelSet cc, float medianStrokeWidth, FImage image) { 057 this.cc = cc; 058 this.medianStrokeWidth = medianStrokeWidth; 059 060 regularBoundingBox = cc.calculateRegularBoundingBox(); 061 062 centroid = cc.calculateCentroidPixel(); 063 064 averageBrightness = 0; 065 for (final Pixel p : cc.pixels) { 066 averageBrightness += image.pixels[p.y][p.x]; 067 } 068 averageBrightness /= cc.pixels.size(); 069 } 070 071 /** 072 * Compute the regular bounding rectangle of the given list of letter 073 * candidates 074 * 075 * @param letters 076 * the letter candidates 077 * @return the bounds rectangle 078 */ 079 public static Rectangle computeBounds(List<LetterCandidate> letters) { 080 float minx = Float.MAX_VALUE; 081 float miny = Float.MAX_VALUE; 082 float maxx = 0; 083 float maxy = 0; 084 085 for (final LetterCandidate letter : letters) { 086 final Rectangle r = letter.cc.calculateRegularBoundingBox(); 087 088 if (r.x < minx) 089 minx = r.x; 090 if (r.y < miny) 091 miny = r.y; 092 if (r.x + r.width > maxx) 093 maxx = r.x + r.width; 094 if (r.y + r.height > maxy) 095 maxy = r.y + r.height; 096 } 097 098 return new Rectangle(minx, miny, maxx - minx, maxy - miny); 099 } 100 101 @Override 102 public String toString() { 103 return regularBoundingBox.toString(); 104 } 105 106 /** 107 * Filter the components to find likely letter candidates. 108 * 109 * @param components 110 * the components to filter 111 * @param swt 112 * the swt image 113 * @param image 114 * the original image 115 * @return the potential letter candidates 116 */ 117 protected static List<LetterCandidate> 118 findLetters(List<ConnectedComponent> components, FImage swt, FImage image, SWTTextDetector.Options options) 119 { 120 final List<LetterCandidate> output = new ArrayList<LetterCandidate>(); 121 122 final DescriptiveStatistics stats = new DescriptiveStatistics(); 123 for (final ConnectedComponent cc : components) { 124 // additional check for small area - speeds processing... 125 if (cc.pixels.size() < options.minArea) 126 continue; 127 128 computeStats(stats, cc, swt); 129 130 final double mean = stats.getMean(); 131 final double variance = stats.getVariance(); 132 final double median = stats.getPercentile(50); 133 134 // test variance of stroke width 135 if (variance > options.letterVarianceMean * mean) 136 continue; 137 138 final Rectangle bb = cc.calculateRegularBoundingBox(); 139 140 // test aspect ratio 141 final double aspect = Math.max(bb.width, bb.height) / Math.min(bb.width, bb.height); 142 if (aspect > options.maxAspectRatio) 143 continue; 144 145 // test diameter 146 final float diameter = Math.max(bb.width, bb.height); 147 if (diameter / median > options.maxDiameterStrokeRatio) 148 continue; 149 150 // check occlusion 151 int overlapping = 0; 152 for (final ConnectedComponent cc2 : components) { 153 if (cc2 == cc) 154 continue; 155 final Rectangle bb2 = cc2.calculateRegularBoundingBox(); 156 if (bb2.intersectionArea(bb) > 0) 157 overlapping++; 158 } 159 if (overlapping > options.maxNumOverlappingBoxes) 160 continue; 161 162 // check height 163 if (bb.height < options.minHeight || bb.height > options.maxHeight) 164 continue; 165 166 output.add(new LetterCandidate(cc, (float) median, image)); 167 } 168 169 return output; 170 } 171 172 /** 173 * Compute the stroke statistics of a component. 174 * 175 * @param stats 176 * the stats object (will be reset) 177 * @param cc 178 * the component 179 * @param swt 180 * the swt image 181 */ 182 private static void computeStats(DescriptiveStatistics stats, PixelSet cc, FImage swt) { 183 stats.clear(); 184 for (final Pixel p : cc.pixels) { 185 stats.addValue(swt.pixels[p.y][p.x]); 186 } 187 } 188}