001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.processing.edges; 031 032import java.util.ArrayList; 033import java.util.Collections; 034import java.util.Comparator; 035import java.util.Iterator; 036import java.util.List; 037 038import org.openimaj.citation.annotation.Reference; 039import org.openimaj.citation.annotation.ReferenceType; 040import org.openimaj.image.FImage; 041import org.openimaj.image.pixel.Pixel; 042import org.openimaj.image.pixel.util.LineIterators; 043import org.openimaj.image.processing.convolution.FSobel; 044import org.openimaj.image.processor.SinglebandImageProcessor; 045import org.openimaj.math.geometry.line.Line2d; 046import org.openimaj.math.util.FloatArrayStatsUtils; 047 048/** 049 * Implementation of the Stroke Width Transform. 050 * <p> 051 * The Stroke Width Transform detects strokes and their respective widths from 052 * an image. This implementation contains a number of enhancements to improve 053 * the quality of the detected strokes, based on ideas from <a 054 * href="http://libccv.org/lib/ccv-swt/">LibCCV</a> implementation: 055 * <ul> 056 * <li>We search around the stroke in a small window for endpoints.</li> 057 * <li>We search around the endpoint in a small window for matching gradients.</li> 058 * <li>In addition to the stroke along the gradient, we also stroke at +/-45 059 * degrees from this.</li> 060 * </ul> 061 * 062 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 063 * 064 */ 065@Reference( 066 type = ReferenceType.Inproceedings, 067 author = { "Epshtein, B.", "Ofek, E.", "Wexler, Y." }, 068 title = "Detecting text in natural scenes with stroke width transform", 069 year = "2010", 070 booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on", 071 pages = { "2963", "2970" }, 072 customData = { 073 "keywords", 074 "image processing;text analysis;image operator;image pixel;natural images;natural scenes;stroke width transform;text detection;Colored noise;Computer vision;Engines;Filter bank;Geometry;Image segmentation;Layout;Optical character recognition software;Pixel;Robustness", 075 "doi", "10.1109/CVPR.2010.5540041", 076 "ISSN", "1063-6919" 077 }) 078public class StrokeWidthTransform implements SinglebandImageProcessor<Float, FImage> { 079 private final static int[][] edgeSearchRegion = { { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } }; 080 private final static int[][] gradSearchRegion = { 081 { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 }, { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; 082 083 private final CannyEdgeDetector canny; 084 private boolean direction; 085 private int maxStrokeWidth = 70; 086 087 /** 088 * Construct the SWT with the given Canny edge detector. 089 * 090 * @param direction 091 * direction of the SWT; true for dark on light, false for light 092 * on dark. 093 * @param canny 094 * the canny edge detector 095 */ 096 public StrokeWidthTransform(boolean direction, CannyEdgeDetector canny) { 097 this.direction = direction; 098 this.canny = canny; 099 } 100 101 /** 102 * Construct the SWT with the given sigma for smoothing in the Canny phase 103 * The Canny thresholds are chosen automatically. 104 * 105 * @param direction 106 * direction of the SWT; true for dark on light, false for light 107 * on dark. 108 * @param sigma 109 * the amount of initial blurring 110 */ 111 public StrokeWidthTransform(boolean direction, float sigma) { 112 this.direction = direction; 113 this.canny = new CannyEdgeDetector(sigma); 114 } 115 116 /** 117 * Construct with all Canny parameters set manually. 118 * 119 * @param direction 120 * direction of the SWT; true for dark on light, false for light 121 * on dark. 122 * 123 * @param lowThresh 124 * lower hysteresis threshold. 125 * @param highThresh 126 * upper hysteresis threshold. 127 * @param sigma 128 * the amount of initial blurring. 129 */ 130 public StrokeWidthTransform(boolean direction, float lowThresh, float highThresh, float sigma) { 131 this.direction = direction; 132 this.canny = new CannyEdgeDetector(lowThresh, highThresh, sigma); 133 } 134 135 /** 136 * Get the maximum stroke width 137 * 138 * @return the maximum stroke width 139 */ 140 public int getMaxStrokeWidth() { 141 return maxStrokeWidth; 142 } 143 144 /** 145 * Set the maximum stroke width 146 * 147 * @param maxStrokeWidth 148 * the maximum stroke width 149 */ 150 public void setMaxStrokeWidth(int maxStrokeWidth) { 151 this.maxStrokeWidth = maxStrokeWidth; 152 } 153 154 @Override 155 public void processImage(FImage image) { 156 final FSobel grads = new FSobel(canny.sigma); 157 158 final FImage edges = image.clone(); 159 canny.processImage(edges, grads); 160 161 image.fill(Float.POSITIVE_INFINITY); 162 163 final List<List<Pixel>> rays = generateRays(edges, grads.dx, grads.dy, direction, image); 164 medianFilter(image, rays); 165 } 166 167 private List<List<Pixel>> generateRays(FImage edges, FImage dx, FImage dy, boolean detectDark, FImage output) { 168 final List<List<Pixel>> rays = new ArrayList<List<Pixel>>(); 169 170 final float gradDirection = detectDark ? -1 : 1; 171 172 for (int y = 0; y < output.height; y++) { 173 for (int x = 0; x < output.width; x++) { 174 if (edges.pixels[y][x] > 0) { 175 traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 0, 0, 1); 176 traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 1, -1, 1); 177 traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, -1, 1, 1); 178 } 179 } 180 } 181 return rays; 182 } 183 184 private void traceRay(FImage edges, FImage dx, FImage dy, boolean 185 detectDark, FImage output, float gradDirection, 186 int x, int y, List<List<Pixel>> rays, int xx, int xy, int yx, int yy) 187 { 188 final float gradX = (xx * dx.pixels[y][x] + xy * dy.pixels[y][x]) * gradDirection; 189 final float gradY = (yy * dy.pixels[y][x] + yx * dx.pixels[y][x]) * gradDirection; 190 191 final Iterator<Pixel> iterator = LineIterators.bresenham(x, y, gradX, gradY); 192 final Pixel start = iterator.next().clone(); // start of ray 193 194 for (int j = 0; j < maxStrokeWidth; j++) { 195 final Pixel current = iterator.next(); 196 197 // check bounds 198 if (current.x < 1 || current.x >= output.width - 1 || current.y < 1 || current.y >= output.height - 1) { 199 break; 200 } 201 202 if (Math.abs(current.x - start.x) < 2 && Math.abs(current.y - start.y) < 2) 203 continue; 204 205 Pixel end = null; 206 207 // search over the around the current pixel region for an edge 208 for (int i = 0; i < edgeSearchRegion.length; i++) { 209 final int currentX = current.x + edgeSearchRegion[i][0]; 210 final int currentY = current.y + edgeSearchRegion[i][1]; 211 212 if (edges.pixels[currentY][currentX] > 0) { 213 end = new Pixel(currentX, currentY); 214 break; 215 } 216 } 217 218 if (end != null) { 219 // edge found; now search for matching gradient in surrounding 220 // area 221 boolean found = false; 222 223 final float startGradX = dx.pixels[start.y][start.x]; 224 final float startGradY = dy.pixels[start.y][start.x]; 225 226 for (int i = 0; i < gradSearchRegion.length; i++) { 227 final int currentX = end.x + gradSearchRegion[i][0]; 228 final int currentY = end.y + gradSearchRegion[i][1]; 229 230 final float currentGradX = dx.pixels[currentY][currentX]; 231 final float currentGradY = dy.pixels[currentY][currentX]; 232 233 // final float currentMag = (float) Math.sqrt((currentGradX 234 // * currentGradX) 235 // + (currentGradY * currentGradY)) 236 // * gradDirection; 237 // 238 // currentGradX = currentGradX / currentMag; 239 // currentGradY = currentGradY / currentMag; 240 // 241 // if (Math.acos(gradX * -currentGradX + gradY * 242 // -currentGradY) < Math.PI / 6.0) { 243 // found = true; 244 // break; 245 // } 246 final float tn = startGradY * currentGradX - startGradX * currentGradY; 247 final float td = startGradX * currentGradX + startGradY * currentGradY; 248 if (tn * 7 < -td * 4 && tn * 7 > td * 4) 249 { 250 found = true; 251 break; 252 } 253 } 254 255 // if we found an opposite grad, then we fill in the ray using 256 // the supercover to select all pixels that the ray passes 257 // through 258 if (found) { 259 final float length = (float) Line2d.distance(start, end); 260 final List<Pixel> ray = LineIterators.supercoverAsList(start, end); 261 for (final Pixel p : ray) { 262 output.pixels[p.y][p.x] = Math.min(length, output.pixels[p.y][p.x]); 263 } 264 265 rays.add(ray); 266 } 267 break; 268 } 269 } 270 } 271 272 private void medianFilter(FImage output, List<List<Pixel>> rays) { 273 if (rays.size() == 0) 274 return; 275 276 Collections.sort(rays, new Comparator<List<Pixel>>() { 277 @Override 278 public int compare(List<Pixel> o1, List<Pixel> o2) { 279 return o1.size() - o2.size(); 280 } 281 }); 282 283 final float[] working = new float[rays.get(rays.size() - 1).size()]; 284 285 for (final List<Pixel> ray : rays) { 286 final int length = ray.size(); 287 for (int i = 0; i < length; i++) { 288 final Pixel pixel = ray.get(i); 289 working[i] = output.pixels[pixel.y][pixel.x]; 290 } 291 292 final float median = FloatArrayStatsUtils.median(working, 0, length); 293 for (int i = 0; i < length; i++) { 294 final Pixel pixel = ray.get(i); 295 296 if (output.pixels[pixel.y][pixel.x] > median) 297 output.pixels[pixel.y][pixel.x] = median; 298 } 299 } 300 } 301 302 /** 303 * Normalise the output image of the {@link StrokeWidthTransform} for 304 * display. This replaces all {@link Float#POSITIVE_INFINITY} pixels with a 305 * value of 1, and min-max normalises all the valid stroke pixels to be 306 * between 0 and 1. 307 * 308 * @param input 309 * the image to normalise 310 * @return the normalised image 311 */ 312 public static FImage normaliseImage(FImage input) { 313 final FImage output = input.clone(); 314 315 float maxVal = 0; 316 float minVal = Float.MAX_VALUE; 317 for (int row = 0; row < input.height; row++) { 318 for (int col = 0; col < input.width; col++) { 319 final float val = input.pixels[row][col]; 320 if (val != Float.POSITIVE_INFINITY) { 321 maxVal = Math.max(val, maxVal); 322 minVal = Math.min(val, minVal); 323 } 324 } 325 } 326 327 final float difference = maxVal - minVal; 328 for (int row = 0; row < input.height; row++) { 329 for (int col = 0; col < input.width; col++) { 330 final float val = input.pixels[row][col]; 331 if (val == Float.POSITIVE_INFINITY) { 332 output.pixels[row][col] = 1; 333 } else { 334 output.pixels[row][col] = (val - minVal) / difference; 335 } 336 } 337 } 338 return output; 339 } 340 341 /** 342 * Get the direction of the SWT; true for dark on light, false for light 343 * 344 * @return the direction 345 */ 346 public boolean getDirection() { 347 return direction; 348 } 349 350 /** 351 * Set the direction of the SWT; true for dark on light, false for light 352 * 353 * @param direction 354 * the direction to set 355 */ 356 public void setDirection(boolean direction) { 357 this.direction = direction; 358 } 359 360}