1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.image.processing.edges;
31
32 import java.util.ArrayList;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.Iterator;
36 import java.util.List;
37
38 import org.openimaj.citation.annotation.Reference;
39 import org.openimaj.citation.annotation.ReferenceType;
40 import org.openimaj.image.FImage;
41 import org.openimaj.image.pixel.Pixel;
42 import org.openimaj.image.pixel.util.LineIterators;
43 import org.openimaj.image.processing.convolution.FSobel;
44 import org.openimaj.image.processor.SinglebandImageProcessor;
45 import org.openimaj.math.geometry.line.Line2d;
46 import org.openimaj.math.util.FloatArrayStatsUtils;
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 @Reference(
66 type = ReferenceType.Inproceedings,
67 author = { "Epshtein, B.", "Ofek, E.", "Wexler, Y." },
68 title = "Detecting text in natural scenes with stroke width transform",
69 year = "2010",
70 booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on",
71 pages = { "2963", "2970" },
72 customData = {
73 "keywords",
74 "image processing;text analysis;image operator;image pixel;natural images;natural scenes;stroke width transform;text detection;Colored noise;Computer vision;Engines;Filter bank;Geometry;Image segmentation;Layout;Optical character recognition software;Pixel;Robustness",
75 "doi", "10.1109/CVPR.2010.5540041",
76 "ISSN", "1063-6919"
77 })
78 public class StrokeWidthTransform implements SinglebandImageProcessor<Float, FImage> {
79 private final static int[][] edgeSearchRegion = { { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 } };
80 private final static int[][] gradSearchRegion = {
81 { 0, 0 }, { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 }, { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
82
83 private final CannyEdgeDetector canny;
84 private boolean direction;
85 private int maxStrokeWidth = 70;
86
87
88
89
90
91
92
93
94
95
96 public StrokeWidthTransform(boolean direction, CannyEdgeDetector canny) {
97 this.direction = direction;
98 this.canny = canny;
99 }
100
101
102
103
104
105
106
107
108
109
110
111 public StrokeWidthTransform(boolean direction, float sigma) {
112 this.direction = direction;
113 this.canny = new CannyEdgeDetector(sigma);
114 }
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130 public StrokeWidthTransform(boolean direction, float lowThresh, float highThresh, float sigma) {
131 this.direction = direction;
132 this.canny = new CannyEdgeDetector(lowThresh, highThresh, sigma);
133 }
134
135
136
137
138
139
140 public int getMaxStrokeWidth() {
141 return maxStrokeWidth;
142 }
143
144
145
146
147
148
149
150 public void setMaxStrokeWidth(int maxStrokeWidth) {
151 this.maxStrokeWidth = maxStrokeWidth;
152 }
153
154 @Override
155 public void processImage(FImage image) {
156 final FSobel grads = new FSobel(canny.sigma);
157
158 final FImage edges = image.clone();
159 canny.processImage(edges, grads);
160
161 image.fill(Float.POSITIVE_INFINITY);
162
163 final List<List<Pixel>> rays = generateRays(edges, grads.dx, grads.dy, direction, image);
164 medianFilter(image, rays);
165 }
166
167 private List<List<Pixel>> generateRays(FImage edges, FImage dx, FImage dy, boolean detectDark, FImage output) {
168 final List<List<Pixel>> rays = new ArrayList<List<Pixel>>();
169
170 final float gradDirection = detectDark ? -1 : 1;
171
172 for (int y = 0; y < output.height; y++) {
173 for (int x = 0; x < output.width; x++) {
174 if (edges.pixels[y][x] > 0) {
175 traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 0, 0, 1);
176 traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, 1, -1, 1);
177 traceRay(edges, dx, dy, detectDark, output, gradDirection, x, y, rays, 1, -1, 1, 1);
178 }
179 }
180 }
181 return rays;
182 }
183
184 private void traceRay(FImage edges, FImage dx, FImage dy, boolean
185 detectDark, FImage output, float gradDirection,
186 int x, int y, List<List<Pixel>> rays, int xx, int xy, int yx, int yy)
187 {
188 final float gradX = (xx * dx.pixels[y][x] + xy * dy.pixels[y][x]) * gradDirection;
189 final float gradY = (yy * dy.pixels[y][x] + yx * dx.pixels[y][x]) * gradDirection;
190
191 final Iterator<Pixel> iterator = LineIterators.bresenham(x, y, gradX, gradY);
192 final Pixel start = iterator.next().clone();
193
194 for (int j = 0; j < maxStrokeWidth; j++) {
195 final Pixel current = iterator.next();
196
197
198 if (current.x < 1 || current.x >= output.width - 1 || current.y < 1 || current.y >= output.height - 1) {
199 break;
200 }
201
202 if (Math.abs(current.x - start.x) < 2 && Math.abs(current.y - start.y) < 2)
203 continue;
204
205 Pixel end = null;
206
207
208 for (int i = 0; i < edgeSearchRegion.length; i++) {
209 final int currentX = current.x + edgeSearchRegion[i][0];
210 final int currentY = current.y + edgeSearchRegion[i][1];
211
212 if (edges.pixels[currentY][currentX] > 0) {
213 end = new Pixel(currentX, currentY);
214 break;
215 }
216 }
217
218 if (end != null) {
219
220
221 boolean found = false;
222
223 final float startGradX = dx.pixels[start.y][start.x];
224 final float startGradY = dy.pixels[start.y][start.x];
225
226 for (int i = 0; i < gradSearchRegion.length; i++) {
227 final int currentX = end.x + gradSearchRegion[i][0];
228 final int currentY = end.y + gradSearchRegion[i][1];
229
230 final float currentGradX = dx.pixels[currentY][currentX];
231 final float currentGradY = dy.pixels[currentY][currentX];
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246 final float tn = startGradY * currentGradX - startGradX * currentGradY;
247 final float td = startGradX * currentGradX + startGradY * currentGradY;
248 if (tn * 7 < -td * 4 && tn * 7 > td * 4)
249 {
250 found = true;
251 break;
252 }
253 }
254
255
256
257
258 if (found) {
259 final float length = (float) Line2d.distance(start, end);
260 final List<Pixel> ray = LineIterators.supercoverAsList(start, end);
261 for (final Pixel p : ray) {
262 output.pixels[p.y][p.x] = Math.min(length, output.pixels[p.y][p.x]);
263 }
264
265 rays.add(ray);
266 }
267 break;
268 }
269 }
270 }
271
272 private void medianFilter(FImage output, List<List<Pixel>> rays) {
273 if (rays.size() == 0)
274 return;
275
276 Collections.sort(rays, new Comparator<List<Pixel>>() {
277 @Override
278 public int compare(List<Pixel> o1, List<Pixel> o2) {
279 return o1.size() - o2.size();
280 }
281 });
282
283 final float[] working = new float[rays.get(rays.size() - 1).size()];
284
285 for (final List<Pixel> ray : rays) {
286 final int length = ray.size();
287 for (int i = 0; i < length; i++) {
288 final Pixel pixel = ray.get(i);
289 working[i] = output.pixels[pixel.y][pixel.x];
290 }
291
292 final float median = FloatArrayStatsUtils.median(working, 0, length);
293 for (int i = 0; i < length; i++) {
294 final Pixel pixel = ray.get(i);
295
296 if (output.pixels[pixel.y][pixel.x] > median)
297 output.pixels[pixel.y][pixel.x] = median;
298 }
299 }
300 }
301
302
303
304
305
306
307
308
309
310
311
312 public static FImage normaliseImage(FImage input) {
313 final FImage output = input.clone();
314
315 float maxVal = 0;
316 float minVal = Float.MAX_VALUE;
317 for (int row = 0; row < input.height; row++) {
318 for (int col = 0; col < input.width; col++) {
319 final float val = input.pixels[row][col];
320 if (val != Float.POSITIVE_INFINITY) {
321 maxVal = Math.max(val, maxVal);
322 minVal = Math.min(val, minVal);
323 }
324 }
325 }
326
327 final float difference = maxVal - minVal;
328 for (int row = 0; row < input.height; row++) {
329 for (int col = 0; col < input.width; col++) {
330 final float val = input.pixels[row][col];
331 if (val == Float.POSITIVE_INFINITY) {
332 output.pixels[row][col] = 1;
333 } else {
334 output.pixels[row][col] = (val - minVal) / difference;
335 }
336 }
337 }
338 return output;
339 }
340
341
342
343
344
345
346 public boolean getDirection() {
347 return direction;
348 }
349
350
351
352
353
354
355
356 public void setDirection(boolean direction) {
357 this.direction = direction;
358 }
359
360 }