View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.image.processing.edges;
31  
32  import java.util.ArrayList;
33  import java.util.Collections;
34  import java.util.Comparator;
35  import java.util.Iterator;
36  import java.util.List;
37  
38  import org.openimaj.citation.annotation.Reference;
39  import org.openimaj.citation.annotation.ReferenceType;
40  import org.openimaj.image.FImage;
41  import org.openimaj.image.pixel.Pixel;
42  import org.openimaj.image.pixel.util.LineIterators;
43  import org.openimaj.image.processing.convolution.FSobel;
44  import org.openimaj.image.processor.SinglebandImageProcessor;
45  import org.openimaj.math.geometry.line.Line2d;
46  import org.openimaj.math.util.FloatArrayStatsUtils;
47  
48  /**
49   * Implementation of the Stroke Width Transform.
50   * <p>
51   * The Stroke Width Transform detects strokes and their respective widths from
52   * an image. This implementation contains a number of enhancements to improve
53   * the quality of the detected strokes, based on ideas from <a
54   * href="http://libccv.org/lib/ccv-swt/">LibCCV</a> implementation:
55   * <ul>
56   * <li>We search around the stroke in a small window for endpoints.</li>
57   * <li>We search around the endpoint in a small window for matching gradients.</li>
58   * <li>In addition to the stroke along the gradient, we also stroke at +/-45
59   * degrees from this.</li>
60   * </ul>
61   * 
62   * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
63   * 
64   */
65  @Reference(
66  		type = ReferenceType.Inproceedings,
67  		author = { "Epshtein, B.", "Ofek, E.", "Wexler, Y." },
68  		title = "Detecting text in natural scenes with stroke width transform",
69  		year = "2010",
70  		booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on",
71  		pages = { "2963", "2970" },
72  		customData = {
73  				"keywords",
74  				"image processing;text analysis;image operator;image pixel;natural images;natural scenes;stroke width transform;text detection;Colored noise;Computer vision;Engines;Filter bank;Geometry;Image segmentation;Layout;Optical character recognition software;Pixel;Robustness",
75  				"doi", "10.1109/CVPR.2010.5540041",
76  				"ISSN", "1063-6919"
77  		})
78  public class StrokeWidthTransform implements SinglebandImageProcessor<Float, FImage> {
79  	private final static int[][] edgeSearchRegion = { { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
80  	private final static int[][] gradSearchRegion = {
81  			{ 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 }, { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
82  
83  	private final CannyEdgeDetector canny;
84  	private boolean direction;
85  	private int maxStrokeWidth = 70;
86  
87  	/**
88  	 * Construct the SWT with the given Canny edge detector.
89  	 * 
90  	 * @param direction
91  	 *            direction of the SWT; true for dark on light, false for light
92  	 *            on dark.
93  	 * @param canny
94  	 *            the canny edge detector
95  	 */
96  	public StrokeWidthTransform(boolean direction, CannyEdgeDetector canny) {
97  		this.direction = direction;
98  		this.canny = canny;
99  	}
100 
101 	/**
102 	 * Construct the SWT with the given sigma for smoothing in the Canny phase
103 	 * The Canny thresholds are chosen automatically.
104 	 * 
105 	 * @param direction
106 	 *            direction of the SWT; true for dark on light, false for light
107 	 *            on dark.
108 	 * @param sigma
109 	 *            the amount of initial blurring
110 	 */
111 	public StrokeWidthTransform(boolean direction, float sigma) {
112 		this.direction = direction;
113 		this.canny = new CannyEdgeDetector(sigma);
114 	}
115 
116 	/**
117 	 * Construct with all Canny parameters set manually.
118 	 * 
119 	 * @param direction
120 	 *            direction of the SWT; true for dark on light, false for light
121 	 *            on dark.
122 	 * 
123 	 * @param lowThresh
124 	 *            lower hysteresis threshold.
125 	 * @param highThresh
126 	 *            upper hysteresis threshold.
127 	 * @param sigma
128 	 *            the amount of initial blurring.
129 	 */
130 	public StrokeWidthTransform(boolean direction, float lowThresh, float highThresh, float sigma) {
131 		this.direction = direction;
132 		this.canny = new CannyEdgeDetector(lowThresh, highThresh, sigma);
133 	}
134 
135 	/**
136 	 * Get the maximum stroke width
137 	 * 
138 	 * @return the maximum stroke width
139 	 */
140 	public int getMaxStrokeWidth() {
141 		return maxStrokeWidth;
142 	}
143 
144 	/**
145 	 * Set the maximum stroke width
146 	 * 
147 	 * @param maxStrokeWidth
148 	 *            the maximum stroke width
149 	 */
150 	public void setMaxStrokeWidth(int maxStrokeWidth) {
151 		this.maxStrokeWidth = maxStrokeWidth;
152 	}
153 
154 	@Override
155 	public void processImage(FImage image) {
156 		final FSobel grads = new FSobel(canny.sigma);
157 
158 		final FImage edges = image.clone();
159 		canny.processImage(edges, grads);
160 
161 		image.fill(Float.POSITIVE_INFINITY);
162 
163 		final List<List<Pixel>> rays = generateRays(edges, grads.dx, grads.dy, direction, image);
164 		medianFilter(image, rays);
165 	}
166 
167 	private List<List<Pixel>> generateRays(FImage edges, FImage dx, FImage dy, boolean detectDark, FImage output) {
168 		final List<List<Pixel>> rays = new ArrayList<List<Pixel>>();
169 
170 		final float gradDirection = detectDark ? -1 : 1;
171 
172 		for (int y = 0; y < output.height; y++) {
173 			for (int x = 0; x < output.width; x++) {
174 				if (edges.pixels[y][x] > 0) {
175 					traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 0, 0, 1);
176 					traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 1, -1, 1);
177 					traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, -1, 1, 1);
178 				}
179 			}
180 		}
181 		return rays;
182 	}
183 
184 	private void traceRay(FImage edges, FImage dx, FImage dy, boolean
185 			detectDark, FImage output, float gradDirection,
186 			int x, int y, List<List<Pixel>> rays, int xx, int xy, int yx, int yy)
187 	{
188 		final float gradX = (xx * dx.pixels[y][x] + xy * dy.pixels[y][x]) * gradDirection;
189 		final float gradY = (yy * dy.pixels[y][x] + yx * dx.pixels[y][x]) * gradDirection;
190 
191 		final Iterator<Pixel> iterator = LineIterators.bresenham(x, y, gradX, gradY);
192 		final Pixel start = iterator.next().clone(); // start of ray
193 
194 		for (int j = 0; j < maxStrokeWidth; j++) {
195 			final Pixel current = iterator.next();
196 
197 			// check bounds
198 			if (current.x < 1 || current.x >= output.width - 1 || current.y < 1 || current.y >= output.height - 1) {
199 				break;
200 			}
201 
202 			if (Math.abs(current.x - start.x) < 2 && Math.abs(current.y - start.y) < 2)
203 				continue;
204 
205 			Pixel end = null;
206 
207 			// search over the around the current pixel region for an edge
208 			for (int i = 0; i < edgeSearchRegion.length; i++) {
209 				final int currentX = current.x + edgeSearchRegion[i][0];
210 				final int currentY = current.y + edgeSearchRegion[i][1];
211 
212 				if (edges.pixels[currentY][currentX] > 0) {
213 					end = new Pixel(currentX, currentY);
214 					break;
215 				}
216 			}
217 
218 			if (end != null) {
219 				// edge found; now search for matching gradient in surrounding
220 				// area
221 				boolean found = false;
222 
223 				final float startGradX = dx.pixels[start.y][start.x];
224 				final float startGradY = dy.pixels[start.y][start.x];
225 
226 				for (int i = 0; i < gradSearchRegion.length; i++) {
227 					final int currentX = end.x + gradSearchRegion[i][0];
228 					final int currentY = end.y + gradSearchRegion[i][1];
229 
230 					final float currentGradX = dx.pixels[currentY][currentX];
231 					final float currentGradY = dy.pixels[currentY][currentX];
232 
233 					// final float currentMag = (float) Math.sqrt((currentGradX
234 					// * currentGradX)
235 					// + (currentGradY * currentGradY))
236 					// * gradDirection;
237 					//
238 					// currentGradX = currentGradX / currentMag;
239 					// currentGradY = currentGradY / currentMag;
240 					//
241 					// if (Math.acos(gradX * -currentGradX + gradY *
242 					// -currentGradY) < Math.PI / 6.0) {
243 					// found = true;
244 					// break;
245 					// }
246 					final float tn = startGradY * currentGradX - startGradX * currentGradY;
247 					final float td = startGradX * currentGradX + startGradY * currentGradY;
248 					if (tn * 7 < -td * 4 && tn * 7 > td * 4)
249 					{
250 						found = true;
251 						break;
252 					}
253 				}
254 
255 				// if we found an opposite grad, then we fill in the ray using
256 				// the supercover to select all pixels that the ray passes
257 				// through
258 				if (found) {
259 					final float length = (float) Line2d.distance(start, end);
260 					final List<Pixel> ray = LineIterators.supercoverAsList(start, end);
261 					for (final Pixel p : ray) {
262 						output.pixels[p.y][p.x] = Math.min(length, output.pixels[p.y][p.x]);
263 					}
264 
265 					rays.add(ray);
266 				}
267 				break;
268 			}
269 		}
270 	}
271 
272 	private void medianFilter(FImage output, List<List<Pixel>> rays) {
273 		if (rays.size() == 0)
274 			return;
275 
276 		Collections.sort(rays, new Comparator<List<Pixel>>() {
277 			@Override
278 			public int compare(List<Pixel> o1, List<Pixel> o2) {
279 				return o1.size() - o2.size();
280 			}
281 		});
282 
283 		final float[] working = new float[rays.get(rays.size() - 1).size()];
284 
285 		for (final List<Pixel> ray : rays) {
286 			final int length = ray.size();
287 			for (int i = 0; i < length; i++) {
288 				final Pixel pixel = ray.get(i);
289 				working[i] = output.pixels[pixel.y][pixel.x];
290 			}
291 
292 			final float median = FloatArrayStatsUtils.median(working, 0, length);
293 			for (int i = 0; i < length; i++) {
294 				final Pixel pixel = ray.get(i);
295 
296 				if (output.pixels[pixel.y][pixel.x] > median)
297 					output.pixels[pixel.y][pixel.x] = median;
298 			}
299 		}
300 	}
301 
302 	/**
303 	 * Normalise the output image of the {@link StrokeWidthTransform} for
304 	 * display. This replaces all {@link Float#POSITIVE_INFINITY} pixels with a
305 	 * value of 1, and min-max normalises all the valid stroke pixels to be
306 	 * between 0 and 1.
307 	 * 
308 	 * @param input
309 	 *            the image to normalise
310 	 * @return the normalised image
311 	 */
312 	public static FImage normaliseImage(FImage input) {
313 		final FImage output = input.clone();
314 
315 		float maxVal = 0;
316 		float minVal = Float.MAX_VALUE;
317 		for (int row = 0; row < input.height; row++) {
318 			for (int col = 0; col < input.width; col++) {
319 				final float val = input.pixels[row][col];
320 				if (val != Float.POSITIVE_INFINITY) {
321 					maxVal = Math.max(val, maxVal);
322 					minVal = Math.min(val, minVal);
323 				}
324 			}
325 		}
326 
327 		final float difference = maxVal - minVal;
328 		for (int row = 0; row < input.height; row++) {
329 			for (int col = 0; col < input.width; col++) {
330 				final float val = input.pixels[row][col];
331 				if (val == Float.POSITIVE_INFINITY) {
332 					output.pixels[row][col] = 1;
333 				} else {
334 					output.pixels[row][col] = (val - minVal) / difference;
335 				}
336 			}
337 		}
338 		return output;
339 	}
340 
341 	/**
342 	 * Get the direction of the SWT; true for dark on light, false for light
343 	 * 
344 	 * @return the direction
345 	 */
346 	public boolean getDirection() {
347 		return direction;
348 	}
349 
350 	/**
351 	 * Set the direction of the SWT; true for dark on light, false for light
352 	 * 
353 	 * @param direction
354 	 *            the direction to set
355 	 */
356 	public void setDirection(boolean direction) {
357 		this.direction = direction;
358 	}
359 
360 }