1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.image.text.extraction.swt;
31
32 import java.util.ArrayList;
33 import java.util.List;
34 import java.util.Set;
35
36 import org.openimaj.citation.annotation.Reference;
37 import org.openimaj.citation.annotation.ReferenceType;
38 import org.openimaj.image.FImage;
39 import org.openimaj.image.analyser.ImageAnalyser;
40 import org.openimaj.image.pixel.ConnectedComponent;
41 import org.openimaj.image.pixel.Pixel;
42 import org.openimaj.image.processing.edges.CannyEdgeDetector;
43 import org.openimaj.image.processing.edges.StrokeWidthTransform;
44 import org.openimaj.image.processing.resize.ResizeProcessor;
45 import org.openimaj.util.set.DisjointSetForest;
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62 @Reference(
63 type = ReferenceType.Inproceedings,
64 author = { "Epshtein, B.", "Ofek, E.", "Wexler, Y." },
65 title = "Detecting text in natural scenes with stroke width transform",
66 year = "2010",
67 booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on",
68 pages = { "2963", "2970" },
69 customData = {
70 "keywords",
71 "image processing;text analysis;image operator;image pixel;natural images;natural scenes;stroke width transform;text detection;Colored noise;Computer vision;Engines;Filter bank;Geometry;Image segmentation;Layout;Optical character recognition software;Pixel;Robustness",
72 "doi", "10.1109/CVPR.2010.5540041",
73 "ISSN", "1063-6919"
74 })
75 public class SWTTextDetector implements ImageAnalyser<FImage> {
76
77
78
79
80
81
82
83 public static enum Direction {
84
85
86
87 DarkOnLight {
88 @Override
89 protected void detect(FImage image, SWTTextDetector detector) {
90 final StrokeWidthTransform swt = new StrokeWidthTransform(true, detector.options.canny);
91 swt.setMaxStrokeWidth(detector.options.maxStrokeWidth);
92 final FImage swtImage = image.process(swt);
93 detector.analyseImage(image, swtImage);
94 }
95 },
96
97
98
99 LightOnDark {
100 @Override
101 protected void detect(FImage image, SWTTextDetector detector) {
102 final StrokeWidthTransform swt = new StrokeWidthTransform(false, detector.options.canny);
103 swt.setMaxStrokeWidth(detector.options.maxStrokeWidth);
104 final FImage swtImage = image.process(swt);
105 detector.analyseImage(image, swtImage);
106 }
107 },
108
109
110
111 Both {
112 @Override
113 protected void detect(FImage image, SWTTextDetector detector) {
114 final StrokeWidthTransform swt = new StrokeWidthTransform(true, detector.options.canny);
115 swt.setMaxStrokeWidth(detector.options.maxStrokeWidth);
116 FImage swtImage = image.process(swt);
117 detector.analyseImage(image, swtImage);
118
119 swt.setDirection(false);
120 swtImage = image.process(swt);
121 detector.analyseImage(image, swtImage);
122 }
123 };
124
125 protected abstract void detect(FImage image, SWTTextDetector detector);
126 }
127
128
129
130
131
132
133 public static class Options {
134
135
136
137
138 public Direction direction = Direction.DarkOnLight;
139
140
141
142
143 public CannyEdgeDetector canny = new CannyEdgeDetector(1);
144
145
146
147
148 public boolean doubleSize = false;
149
150
151
152
153
154 public float strokeWidthRatio = 3.0f;
155
156
157
158
159
160 public double letterVarianceMean = 0.93;
161
162
163
164
165 public double maxAspectRatio = 10;
166
167
168
169
170
171 public double maxDiameterStrokeRatio = 10;
172
173
174
175
176
177 public int minArea = 38;
178
179
180
181
182 public float minHeight = 10;
183
184
185
186
187 public float maxHeight = 300;
188
189
190
191
192 public int maxNumOverlappingBoxes = 10;
193
194
195
196
197 public int maxStrokeWidth = 70;
198
199
200
201
202
203 public float medianStrokeWidthRatio = 2;
204
205
206
207
208
209 public float letterHeightRatio = 2;
210
211
212
213
214
215 public float intensityThreshold = 0.12f;
216
217
218
219
220
221
222 public float widthMultiplier = 3;
223
224
225
226
227 public int minLettersPerLine = 3;
228
229
230
231
232
233 public float intersectRatio = 1.3f;
234
235
236
237
238
239 public float wordBreakdownRatio = 1f;
240 }
241
242
243
244
245 private final static int[][] connect8 = {
246 { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 }, { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
247
248
249
250
251 protected Options options;
252
253 private List<LetterCandidate> letters = null;
254 private List<LineCandidate> lines = null;
255 private List<WordCandidate> words = null;
256
257
258
259
260 public SWTTextDetector() {
261 this(new Options());
262 }
263
264
265
266
267
268
269
270 public SWTTextDetector(Options options) {
271 this.options = options;
272 }
273
274
275
276
277
278
279 public Options getOptions() {
280 return options;
281 }
282
283
284
285
286
287
288
289
290
291 private List<ConnectedComponent> findComponents(FImage image) {
292 final DisjointSetForest<Pixel> forest = new DisjointSetForest<Pixel>();
293
294 Pixel current = new Pixel();
295 Pixel next = new Pixel();
296 for (int y = 0; y < image.height; y++) {
297 for (int x = 0; x < image.width; x++) {
298 final float currentValue = image.pixels[y][x];
299
300 if (currentValue > 0 && currentValue != Float.POSITIVE_INFINITY) {
301 current.x = x;
302 current.y = y;
303
304 if (forest.makeSet(current) != null)
305 current = current.clone();
306
307 for (int i = 0; i < connect8.length; i++) {
308 final int xx = x + connect8[i][0];
309 final int yy = y + connect8[i][1];
310
311 if (xx >= 0 && xx < image.width - 1 && yy >= 0 && yy < image.height - 1) {
312 final float value = image.pixels[yy][xx];
313
314 if (value > 0 && value != Float.POSITIVE_INFINITY) {
315 next.x = xx;
316 next.y = yy;
317
318 if (forest.makeSet(next) != null)
319 next = next.clone();
320
321 if ((Math.max(currentValue, value) / Math.min(currentValue, value)) < options.strokeWidthRatio)
322 forest.union(current, next);
323 }
324 }
325 }
326 }
327 }
328 }
329
330 final List<ConnectedComponent> components = new ArrayList<ConnectedComponent>();
331 for (final Set<Pixel> pixels : forest.getSubsets()) {
332 final ConnectedComponent cc = new ConnectedComponent(pixels);
333 components.add(cc);
334 }
335
336 return components;
337 }
338
339 @Override
340 public void analyseImage(FImage image) {
341 letters = new ArrayList<LetterCandidate>();
342 lines = new ArrayList<LineCandidate>();
343 words = new ArrayList<WordCandidate>();
344
345 if (options.doubleSize)
346 image = ResizeProcessor.doubleSize(image);
347
348 options.direction.detect(image, this);
349 }
350
351 protected void analyseImage(FImage image, FImage swt) {
352 final List<ConnectedComponent> comps = findComponents(swt);
353 final List<LetterCandidate> tmpLetters = LetterCandidate.findLetters(comps, swt, image, options);
354 final List<LineCandidate> tmpLines = LineCandidate.extractLines(tmpLetters, this.options);
355
356 this.letters.addAll(tmpLetters);
357 this.lines.addAll(tmpLines);
358 for (final LineCandidate line : tmpLines) {
359 this.words.addAll(line.words);
360 }
361 }
362
363
364
365
366
367
368 public List<LineCandidate> getLines() {
369 return lines;
370 }
371
372
373
374
375
376
377
378
379
380
381 public List<LetterCandidate> getLetters() {
382 return letters;
383 }
384 }