001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.processing.edges;
031
032import java.util.ArrayList;
033import java.util.Collections;
034import java.util.Comparator;
035import java.util.Iterator;
036import java.util.List;
037
038import org.openimaj.citation.annotation.Reference;
039import org.openimaj.citation.annotation.ReferenceType;
040import org.openimaj.image.FImage;
041import org.openimaj.image.pixel.Pixel;
042import org.openimaj.image.pixel.util.LineIterators;
043import org.openimaj.image.processing.convolution.FSobel;
044import org.openimaj.image.processor.SinglebandImageProcessor;
045import org.openimaj.math.geometry.line.Line2d;
046import org.openimaj.math.util.FloatArrayStatsUtils;
047
048/**
049 * Implementation of the Stroke Width Transform.
050 * <p>
051 * The Stroke Width Transform detects strokes and their respective widths from
052 * an image. This implementation contains a number of enhancements to improve
053 * the quality of the detected strokes, based on ideas from <a
054 * href="http://libccv.org/lib/ccv-swt/">LibCCV</a> implementation:
055 * <ul>
056 * <li>We search around the stroke in a small window for endpoints.</li>
057 * <li>We search around the endpoint in a small window for matching gradients.</li>
058 * <li>In addition to the stroke along the gradient, we also stroke at +/-45
059 * degrees from this.</li>
060 * </ul>
061 * 
062 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
063 * 
064 */
065@Reference(
066                type = ReferenceType.Inproceedings,
067                author = { "Epshtein, B.", "Ofek, E.", "Wexler, Y." },
068                title = "Detecting text in natural scenes with stroke width transform",
069                year = "2010",
070                booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on",
071                pages = { "2963", "2970" },
072                customData = {
073                                "keywords",
074                                "image processing;text analysis;image operator;image pixel;natural images;natural scenes;stroke width transform;text detection;Colored noise;Computer vision;Engines;Filter bank;Geometry;Image segmentation;Layout;Optical character recognition software;Pixel;Robustness",
075                                "doi", "10.1109/CVPR.2010.5540041",
076                                "ISSN", "1063-6919"
077                })
078public class StrokeWidthTransform implements SinglebandImageProcessor<Float, FImage> {
079        private final static int[][] edgeSearchRegion = { { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
080        private final static int[][] gradSearchRegion = {
081                        { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 }, { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
082
083        private final CannyEdgeDetector canny;
084        private boolean direction;
085        private int maxStrokeWidth = 70;
086
087        /**
088         * Construct the SWT with the given Canny edge detector.
089         * 
090         * @param direction
091         *            direction of the SWT; true for dark on light, false for light
092         *            on dark.
093         * @param canny
094         *            the canny edge detector
095         */
096        public StrokeWidthTransform(boolean direction, CannyEdgeDetector canny) {
097                this.direction = direction;
098                this.canny = canny;
099        }
100
101        /**
102         * Construct the SWT with the given sigma for smoothing in the Canny phase
103         * The Canny thresholds are chosen automatically.
104         * 
105         * @param direction
106         *            direction of the SWT; true for dark on light, false for light
107         *            on dark.
108         * @param sigma
109         *            the amount of initial blurring
110         */
111        public StrokeWidthTransform(boolean direction, float sigma) {
112                this.direction = direction;
113                this.canny = new CannyEdgeDetector(sigma);
114        }
115
116        /**
117         * Construct with all Canny parameters set manually.
118         * 
119         * @param direction
120         *            direction of the SWT; true for dark on light, false for light
121         *            on dark.
122         * 
123         * @param lowThresh
124         *            lower hysteresis threshold.
125         * @param highThresh
126         *            upper hysteresis threshold.
127         * @param sigma
128         *            the amount of initial blurring.
129         */
130        public StrokeWidthTransform(boolean direction, float lowThresh, float highThresh, float sigma) {
131                this.direction = direction;
132                this.canny = new CannyEdgeDetector(lowThresh, highThresh, sigma);
133        }
134
135        /**
136         * Get the maximum stroke width
137         * 
138         * @return the maximum stroke width
139         */
140        public int getMaxStrokeWidth() {
141                return maxStrokeWidth;
142        }
143
144        /**
145         * Set the maximum stroke width
146         * 
147         * @param maxStrokeWidth
148         *            the maximum stroke width
149         */
150        public void setMaxStrokeWidth(int maxStrokeWidth) {
151                this.maxStrokeWidth = maxStrokeWidth;
152        }
153
154        @Override
155        public void processImage(FImage image) {
156                final FSobel grads = new FSobel(canny.sigma);
157
158                final FImage edges = image.clone();
159                canny.processImage(edges, grads);
160
161                image.fill(Float.POSITIVE_INFINITY);
162
163                final List<List<Pixel>> rays = generateRays(edges, grads.dx, grads.dy, direction, image);
164                medianFilter(image, rays);
165        }
166
167        private List<List<Pixel>> generateRays(FImage edges, FImage dx, FImage dy, boolean detectDark, FImage output) {
168                final List<List<Pixel>> rays = new ArrayList<List<Pixel>>();
169
170                final float gradDirection = detectDark ? -1 : 1;
171
172                for (int y = 0; y < output.height; y++) {
173                        for (int x = 0; x < output.width; x++) {
174                                if (edges.pixels[y][x] > 0) {
175                                        traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 0, 0, 1);
176                                        traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 1, -1, 1);
177                                        traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, -1, 1, 1);
178                                }
179                        }
180                }
181                return rays;
182        }
183
184        private void traceRay(FImage edges, FImage dx, FImage dy, boolean
185                        detectDark, FImage output, float gradDirection,
186                        int x, int y, List<List<Pixel>> rays, int xx, int xy, int yx, int yy)
187        {
188                final float gradX = (xx * dx.pixels[y][x] + xy * dy.pixels[y][x]) * gradDirection;
189                final float gradY = (yy * dy.pixels[y][x] + yx * dx.pixels[y][x]) * gradDirection;
190
191                final Iterator<Pixel> iterator = LineIterators.bresenham(x, y, gradX, gradY);
192                final Pixel start = iterator.next().clone(); // start of ray
193
194                for (int j = 0; j < maxStrokeWidth; j++) {
195                        final Pixel current = iterator.next();
196
197                        // check bounds
198                        if (current.x < 1 || current.x >= output.width - 1 || current.y < 1 || current.y >= output.height - 1) {
199                                break;
200                        }
201
202                        if (Math.abs(current.x - start.x) < 2 && Math.abs(current.y - start.y) < 2)
203                                continue;
204
205                        Pixel end = null;
206
207                        // search over the around the current pixel region for an edge
208                        for (int i = 0; i < edgeSearchRegion.length; i++) {
209                                final int currentX = current.x + edgeSearchRegion[i][0];
210                                final int currentY = current.y + edgeSearchRegion[i][1];
211
212                                if (edges.pixels[currentY][currentX] > 0) {
213                                        end = new Pixel(currentX, currentY);
214                                        break;
215                                }
216                        }
217
218                        if (end != null) {
219                                // edge found; now search for matching gradient in surrounding
220                                // area
221                                boolean found = false;
222
223                                final float startGradX = dx.pixels[start.y][start.x];
224                                final float startGradY = dy.pixels[start.y][start.x];
225
226                                for (int i = 0; i < gradSearchRegion.length; i++) {
227                                        final int currentX = end.x + gradSearchRegion[i][0];
228                                        final int currentY = end.y + gradSearchRegion[i][1];
229
230                                        final float currentGradX = dx.pixels[currentY][currentX];
231                                        final float currentGradY = dy.pixels[currentY][currentX];
232
233                                        // final float currentMag = (float) Math.sqrt((currentGradX
234                                        // * currentGradX)
235                                        // + (currentGradY * currentGradY))
236                                        // * gradDirection;
237                                        //
238                                        // currentGradX = currentGradX / currentMag;
239                                        // currentGradY = currentGradY / currentMag;
240                                        //
241                                        // if (Math.acos(gradX * -currentGradX + gradY *
242                                        // -currentGradY) < Math.PI / 6.0) {
243                                        // found = true;
244                                        // break;
245                                        // }
246                                        final float tn = startGradY * currentGradX - startGradX * currentGradY;
247                                        final float td = startGradX * currentGradX + startGradY * currentGradY;
248                                        if (tn * 7 < -td * 4 && tn * 7 > td * 4)
249                                        {
250                                                found = true;
251                                                break;
252                                        }
253                                }
254
255                                // if we found an opposite grad, then we fill in the ray using
256                                // the supercover to select all pixels that the ray passes
257                                // through
258                                if (found) {
259                                        final float length = (float) Line2d.distance(start, end);
260                                        final List<Pixel> ray = LineIterators.supercoverAsList(start, end);
261                                        for (final Pixel p : ray) {
262                                                output.pixels[p.y][p.x] = Math.min(length, output.pixels[p.y][p.x]);
263                                        }
264
265                                        rays.add(ray);
266                                }
267                                break;
268                        }
269                }
270        }
271
272        private void medianFilter(FImage output, List<List<Pixel>> rays) {
273                if (rays.size() == 0)
274                        return;
275
276                Collections.sort(rays, new Comparator<List<Pixel>>() {
277                        @Override
278                        public int compare(List<Pixel> o1, List<Pixel> o2) {
279                                return o1.size() - o2.size();
280                        }
281                });
282
283                final float[] working = new float[rays.get(rays.size() - 1).size()];
284
285                for (final List<Pixel> ray : rays) {
286                        final int length = ray.size();
287                        for (int i = 0; i < length; i++) {
288                                final Pixel pixel = ray.get(i);
289                                working[i] = output.pixels[pixel.y][pixel.x];
290                        }
291
292                        final float median = FloatArrayStatsUtils.median(working, 0, length);
293                        for (int i = 0; i < length; i++) {
294                                final Pixel pixel = ray.get(i);
295
296                                if (output.pixels[pixel.y][pixel.x] > median)
297                                        output.pixels[pixel.y][pixel.x] = median;
298                        }
299                }
300        }
301
302        /**
303         * Normalise the output image of the {@link StrokeWidthTransform} for
304         * display. This replaces all {@link Float#POSITIVE_INFINITY} pixels with a
305         * value of 1, and min-max normalises all the valid stroke pixels to be
306         * between 0 and 1.
307         * 
308         * @param input
309         *            the image to normalise
310         * @return the normalised image
311         */
312        public static FImage normaliseImage(FImage input) {
313                final FImage output = input.clone();
314
315                float maxVal = 0;
316                float minVal = Float.MAX_VALUE;
317                for (int row = 0; row < input.height; row++) {
318                        for (int col = 0; col < input.width; col++) {
319                                final float val = input.pixels[row][col];
320                                if (val != Float.POSITIVE_INFINITY) {
321                                        maxVal = Math.max(val, maxVal);
322                                        minVal = Math.min(val, minVal);
323                                }
324                        }
325                }
326
327                final float difference = maxVal - minVal;
328                for (int row = 0; row < input.height; row++) {
329                        for (int col = 0; col < input.width; col++) {
330                                final float val = input.pixels[row][col];
331                                if (val == Float.POSITIVE_INFINITY) {
332                                        output.pixels[row][col] = 1;
333                                } else {
334                                        output.pixels[row][col] = (val - minVal) / difference;
335                                }
336                        }
337                }
338                return output;
339        }
340
341        /**
342         * Get the direction of the SWT; true for dark on light, false for light
343         * 
344         * @return the direction
345         */
346        public boolean getDirection() {
347                return direction;
348        }
349
350        /**
351         * Set the direction of the SWT; true for dark on light, false for light
352         * 
353         * @param direction
354         *            the direction to set
355         */
356        public void setDirection(boolean direction) {
357                this.direction = direction;
358        }
359
360}