1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 package org.openimaj.image.text.extraction;
34
35 import java.util.ArrayList;
36 import java.util.HashMap;
37 import java.util.Iterator;
38 import java.util.List;
39 import java.util.Map;
40
41 import org.openimaj.citation.annotation.Reference;
42 import org.openimaj.citation.annotation.ReferenceType;
43 import org.openimaj.image.DisplayUtilities;
44 import org.openimaj.image.FImage;
45 import org.openimaj.image.MBFImage;
46 import org.openimaj.image.connectedcomponent.ConnectedComponentLabeler;
47 import org.openimaj.image.pixel.ConnectedComponent;
48 import org.openimaj.image.pixel.ConnectedComponent.ConnectMode;
49 import org.openimaj.image.pixel.Pixel;
50 import org.openimaj.image.pixel.PixelSet;
51 import org.openimaj.image.processing.convolution.CompassOperators.Compass0;
52 import org.openimaj.image.processing.convolution.CompassOperators.Compass135;
53 import org.openimaj.image.processing.convolution.CompassOperators.Compass45;
54 import org.openimaj.image.processing.convolution.CompassOperators.Compass90;
55 import org.openimaj.image.processing.convolution.FConvolution;
56 import org.openimaj.image.processing.morphology.Close;
57 import org.openimaj.image.processing.morphology.Dilate;
58 import org.openimaj.image.processing.morphology.StructuringElement;
59 import org.openimaj.image.processing.morphology.Thin;
60 import org.openimaj.image.processing.threshold.OtsuThreshold;
61 import org.openimaj.image.processing.transform.SkewCorrector;
62 import org.openimaj.image.processor.connectedcomponent.ConnectedComponentProcessor;
63 import org.openimaj.image.processor.connectedcomponent.render.OrientatedBoundingBoxRenderer;
64 import org.openimaj.image.text.ocr.OCRProcessor;
65 import org.openimaj.math.geometry.point.Point2d;
66 import org.openimaj.math.geometry.point.Point2dImpl;
67 import org.openimaj.math.geometry.shape.Polygon;
68 import org.openimaj.math.geometry.shape.Rectangle;
69 import org.openimaj.util.pair.IndependentPair;
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 @Reference(
112 type = ReferenceType.Inproceedings,
113 author = { "Xiaoqing Liu", "Samarabandu, J." },
114 title = "An edge-based text region extraction algorithm for indoor mobile robot navigation",
115 year = "2005",
116 booktitle = "Mechatronics and Automation, 2005 IEEE International Conference",
117 pages = { " 701 ", " 706 Vol. 2" },
118 month = "July-1 Aug.",
119 number = "",
120 volume = "2",
121 customData = {
122 "keywords",
123 "edge-based text region extraction; feature extraction; scene text; text localization; vision-based mobile robot navigation; character recognition; edge detection; feature extraction; mobile robots; navigation; path planning; robot vision;",
124 "doi", "10.1109/ICMA.2005.1626635", "ISSN", "" })
125 public class LiuSamarabanduTextExtractorBasic extends TextExtractor<FImage>
126 {
127
128 public static final boolean DEBUG = false;
129
130
131 private float boundingBoxPaddingPc = 1.1f;
132
133
134 private Map<Rectangle, FImage> textRegions = null;
135
136
137
138
139
140
141
142
143
144
145
146
147
148 private FImage processImage(FImage img, FConvolution c)
149 {
150 return img.process(c)
151 .abs()
152 .normalise();
153 }
154
155
156
157
158
159
160 @Override
161 public void processImage(FImage image)
162 {
163
164 final FImage fmap = textRegionDetection(image);
165
166
167 processFeatureMap(fmap, image);
168
169
170 image.internalAssign(fmap);
171 }
172
173
174
175
176
177
178
179
180
181
182
183 public void processFeatureMap(FImage fmap, FImage image)
184 {
185
186 final Map<Rectangle, FImage> t = textRegionLocalisation(fmap, image);
187 this.textRegions = t;
188 }
189
190
191
192
193
194
195
196
197
198 public FImage textRegionDetection(FImage image)
199 {
200
201 final HashMap<Integer, FImage> e = new HashMap<Integer, FImage>();
202 e.put(0, processImage(image, new Compass0()));
203 e.put(45, processImage(image, new Compass45()));
204 e.put(90, processImage(image, new Compass90()));
205 e.put(135, processImage(image, new Compass135()));
206
207
208
209 final FImage e90strong = e.get(90).process(new OtsuThreshold());
210
211 if (DEBUG)
212 DisplayUtilities.display(e90strong, "Strong Edges");
213
214
215
216
217 final StructuringElement se = new StructuringElement();
218 se.positive.add(new Pixel(0, 0));
219 se.positive.add(new Pixel(-1, 0));
220 se.positive.add(new Pixel(1, 0));
221
222 if (DEBUG)
223 System.out.println("Dilating with a 1x3 structuring element");
224
225 final FImage dilated = e90strong.process(new Dilate(se));
226
227
228
229 int m = (int) (dilated.getHeight() / 25d);
230
231 if (DEBUG)
232 System.out.println("Closing with a " + m + "x1 structuring element.");
233
234 final StructuringElement se2 = new StructuringElement();
235 for (int i = 0; i < m; i++)
236 se2.positive.add(new Pixel(0, i - m / 2));
237 final FImage closed = dilated.process(new Close(se2));
238
239
240
241 FImage e90weak = closed.subtract(dilated).abs();
242 e90weak.multiplyInplace(e.get(90));
243 e90weak = e90weak.process(new OtsuThreshold());
244
245 if (DEBUG)
246 DisplayUtilities.display(e90weak, "Weak Edges");
247
248 final FImage e90edges = e90strong.add(e90weak).normalise().process(
249 new OtsuThreshold());
250
251 if (DEBUG)
252 DisplayUtilities.display(e90edges, "Edges");
253
254
255 final FImage e90thin = e90edges.process(new Thin(StructuringElement.BOX));
256
257 if (DEBUG)
258 DisplayUtilities.display(e90thin, "Thinned");
259
260 final ConnectedComponentLabeler ccl = new ConnectedComponentLabeler(
261 ConnectMode.CONNECT_4);
262 final List<ConnectedComponent> cc = ccl.findComponents(e90thin);
263
264
265 final FImage e90labelled = new FImage(e90thin.getWidth(), e90thin.getHeight());
266 final ConnectedComponentProcessor ccp = new ConnectedComponentProcessor()
267 {
268 @Override
269 public void process(ConnectedComponent cc)
270 {
271 final int a = cc.calculateArea();
272 for (final Pixel p : cc.pixels)
273 e90labelled.setPixel((int) p.getX(), (int) p.getY(), (float) a);
274 }
275 };
276 ConnectedComponent.process(cc, ccp);
277
278 if (DEBUG) {
279 DisplayUtilities.display(e90labelled.clone().normalise(), "Labelled Edges");
280 System.out.println("Max edge length: " + e90labelled.max());
281 }
282
283
284 final FImage e90short = e90labelled.clone().clip(0f, 1f).subtract(
285 e90labelled.threshold(e90labelled.max() / 4 * 3));
286
287 if (DEBUG)
288 DisplayUtilities.display(e90short.clone().normalise(), "Thresholded Lengths");
289
290
291
292
293 final StructuringElement se3 = new StructuringElement();
294 for (int i = 0; i < m; i++)
295 for (int j = 0; j < m; j++)
296 se3.positive.add(new Pixel(i - m / 2, j - m / 2));
297 final FImage e90candidate = e90short.process(new Dilate(se3));
298
299 if (DEBUG)
300 DisplayUtilities.display(e90candidate, "Candidate Regions");
301
302
303 final FImage is = e.get(0).clone().
304 addInplace(e.get(45)).
305 addInplace(e.get(90)).
306 addInplace(e.get(135));
307
308
309
310
311 final FImage refined = e90candidate.multiply(is).normalise();
312
313 if (DEBUG)
314 DisplayUtilities.display(refined, "Refined");
315
316
317
318 final int c = 5;
319 final FImage fmap = new FImage(image.getWidth(), image.getHeight());
320
321
322
323
324
325
326
327
328
329
330
331
332 final HashMap<Integer, Float> maxPixDir = new HashMap<Integer, Float>();
333
334
335 for (int j = c; j < image.getHeight() - c; j++)
336 {
337 for (int i = c; i < image.getWidth() - c; i++)
338 {
339 float pixelValue = 0;
340 final float N = c * c;
341 maxPixDir.clear();
342
343
344 for (m = -c; m < c; m++)
345 {
346 for (int n = -c; n < c; n++)
347 {
348 pixelValue += refined.getPixel(i + m, j + n);
349
350 updateMaxPixDir(maxPixDir, e, 0, i + m, j + n);
351 updateMaxPixDir(maxPixDir, e, 45, i + m, j + n);
352 updateMaxPixDir(maxPixDir, e, 90, i + m, j + n);
353 updateMaxPixDir(maxPixDir, e, 135, i + m, j + n);
354 }
355 }
356
357 float w = maxPixDir.get(0) +
358 maxPixDir.get(45) +
359 maxPixDir.get(90) +
360 maxPixDir.get(135);
361 w /= 4;
362
363 pixelValue *= w;
364 pixelValue /= N;
365
366 fmap.setPixel(i, j, pixelValue);
367 }
368 }
369
370 if (DEBUG)
371 DisplayUtilities.display(fmap.clone().normalise(), "Feature Map");
372
373 return fmap;
374 }
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391 private void updateMaxPixDir(HashMap<Integer, Float> maxPixDir, HashMap<Integer, FImage> e, int dir, int x, int y)
392 {
393 Float xx = null;
394 if ((xx = maxPixDir.get(dir)) == null)
395 maxPixDir.put(dir, e.get(dir).getPixel(x, y));
396 else
397 maxPixDir.put(dir, Math.max(xx, e.get(dir).getPixel(x, y)));
398 }
399
400
401
402
403
404
405
406
407
408
409
410
411 public Map<Rectangle, FImage> textRegionLocalisation(FImage fmap, FImage image)
412 {
413
414 final HashMap<Rectangle, FImage> textAreas = new HashMap<Rectangle, FImage>();
415
416
417 final FImage thresh = fmap.clone().normalise().process(new OtsuThreshold());
418
419
420 final StructuringElement se = new StructuringElement();
421 final int ses = 9;
422 for (int i = 0; i < ses; i++)
423 for (int j = 0; j < ses; j++)
424 se.positive.add(new Pixel(i, j));
425
426 final FImage dilated = thresh.process(new Dilate(se));
427
428 if (DEBUG)
429 DisplayUtilities.display(dilated, "Candidate text-blobs");
430
431
432 final ConnectedComponentLabeler ccl = new
433 ConnectedComponentLabeler(ConnectMode.CONNECT_4);
434 final List<ConnectedComponent> ccs = ccl.findComponents(dilated);
435
436 System.out.println("Got " + ccs.size() + " connected components.");
437
438
439
440
441 int maxArea = 0;
442 for (final PixelSet cc : ccs)
443 maxArea = Math.max(maxArea, cc.calculateArea());
444
445
446 for (final Iterator<ConnectedComponent> cci = ccs.iterator(); cci.hasNext();)
447 if (cci.next().calculateArea() < maxArea / 20d)
448 cci.remove();
449
450
451
452 for (final Iterator<ConnectedComponent> cci = ccs.iterator(); cci.hasNext();)
453 {
454 final PixelSet cc = cci.next();
455 final Rectangle r = cc.calculateRegularBoundingBox();
456 if (r.width / r.height < 0.2)
457 cci.remove();
458 }
459
460 if (DEBUG) {
461 final MBFImage bb = new MBFImage(image.getWidth(), image.getHeight(), 3);
462 bb.createRenderer().drawImage(image, 0, 0);
463 final OrientatedBoundingBoxRenderer<Float[]> obbr = new
464 OrientatedBoundingBoxRenderer<Float[]>(bb, new Float[] { 1f, 1f, 0f });
465 ConnectedComponent.process(ccs, obbr);
466 DisplayUtilities.display(bb);
467 System.out.println("Continuing with " + ccs.size() + " connected components.");
468 }
469
470
471 for (final PixelSet cc : ccs)
472 {
473 if (cc.getPixels().size() < 20)
474 continue;
475
476
477 final Rectangle r = cc.calculateRegularBoundingBox();
478 r.scaleCentroid(boundingBoxPaddingPc);
479 FImage textArea = image.extractROI(r);
480
481
482 final OtsuThreshold o = new OtsuThreshold();
483 o.processImage(textArea);
484
485 if (DEBUG)
486 DisplayUtilities.display(textArea, "text area - before distortion");
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505 final SkewCorrector sc = new SkewCorrector();
506 sc.setAccuracy(4);
507 textArea = textArea.process(sc);
508
509
510 textAreas.put(r, textArea);
511
512 if (DEBUG)
513 DisplayUtilities.display(textArea, "text area - after distortion");
514 }
515
516 return textAreas;
517 }
518
519
520
521
522
523
524
525
526
527 public List<IndependentPair<Point2d, Point2d>> calculateHomography(Polygon p)
528 {
529
530 final List<IndependentPair<Point2d, Point2d>> pointPairs = new
531 ArrayList<IndependentPair<Point2d, Point2d>>();
532
533
534
535
536
537
538
539 final List<Point2d> v = p.getVertices();
540 final Point2d p1 = v.get(0);
541 final Point2d p2 = v.get(1);
542 final Point2d p3 = v.get(2);
543 final Point2d p4 = v.get(3);
544
545
546 final Point2d p1p = new Point2dImpl(p2.getX(), p1.getY());
547
548 final Point2d p2p = v.get(1);
549 final Point2d p3p = new Point2dImpl(p3.getX(), p2.getY());
550
551 final Point2d p4p = new Point2dImpl(p3p.getX(), p1.getY());
552
553
554 pointPairs.add(new IndependentPair<Point2d, Point2d>(p1, p1p));
555 pointPairs.add(new IndependentPair<Point2d, Point2d>(p2, p2p));
556 pointPairs.add(new IndependentPair<Point2d, Point2d>(p3, p3p));
557 pointPairs.add(new IndependentPair<Point2d, Point2d>(p4, p4p));
558
559 return pointPairs;
560 }
561
562
563
564
565
566
567
568 public float getBoundingBoxPaddingPc()
569 {
570 return boundingBoxPaddingPc;
571 }
572
573
574
575
576
577
578
579 public void setBoundingBoxPaddingPc(float boundingBoxPaddingPc)
580 {
581 this.boundingBoxPaddingPc = boundingBoxPaddingPc;
582 }
583
584
585
586
587
588
589 @Override
590 public Map<Rectangle, FImage> getTextRegions()
591 {
592 return this.textRegions;
593 }
594 }