1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.image.text.extraction.swt;
31
32 import java.util.ArrayList;
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.List;
36
37 import org.openimaj.image.processing.threshold.OtsuThreshold;
38 import org.openimaj.util.pair.FloatFloatPair;
39
40
41
42
43
44
45
46
47 public class WordCandidate extends Candidate {
48
49
50
51 protected LineCandidate line;
52
53
54
55
56 protected List<LetterCandidate> letters = new ArrayList<LetterCandidate>();
57
58 protected static List<WordCandidate> extractWords(LineCandidate line, SWTTextDetector.Options options) {
59 final List<WordCandidate> words = new ArrayList<WordCandidate>();
60
61
62 Collections.sort(line.letters, new Comparator<LetterCandidate>() {
63 @Override
64 public int compare(LetterCandidate o1, LetterCandidate o2) {
65 return o1.centroid.x - o2.centroid.x;
66 }
67 });
68
69
70 final float[] spacings = new float[line.letters.size() - 1];
71
72 float mean = 0;
73 int rng = 0;
74 for (int i = 1; i < line.letters.size(); i++) {
75 final LetterCandidate left = line.letters.get(i - 1);
76 final LetterCandidate right = line.letters.get(i);
77
78 spacings[i - 1] = Math.max(0,
79 right.getRegularBoundingBox().x
80 - (left.getRegularBoundingBox().x + left.getRegularBoundingBox().width));
81 mean += spacings[i - 1];
82
83 if (spacings[i - 1] >= rng)
84 rng = (int) (spacings[i - 1] + 1);
85 }
86 mean /= spacings.length;
87
88
89 final FloatFloatPair threshVar = OtsuThreshold.calculateThresholdAndVariance(spacings, rng);
90 final float threshold = threshVar.first;
91 final float variance = threshVar.second;
92
93
94
95
96 if (Math.sqrt(variance) > mean * options.wordBreakdownRatio)
97 {
98 WordCandidate word = new WordCandidate();
99 word.line = line;
100 word.letters.add(line.letters.get(0));
101 words.add(word);
102 for (int i = 0; i < spacings.length; i++) {
103 if (spacings[i] > threshold) {
104 word = new WordCandidate();
105 words.add(word);
106 }
107 word.letters.add(line.letters.get(i + 1));
108 }
109 } else {
110 final WordCandidate word = new WordCandidate();
111 word.line = line;
112 word.letters = line.letters;
113 words.add(word);
114 }
115
116 for (final WordCandidate w : words) {
117 w.regularBoundingBox = LetterCandidate.computeBounds(w.letters);
118
119 for (final LetterCandidate letter : w.letters)
120 letter.word = w;
121 }
122
123 return words;
124 }
125
126
127
128
129
130
131 public List<LetterCandidate> getLetters() {
132 return letters;
133 }
134
135
136
137
138
139
140 public LineCandidate getLine() {
141 return line;
142 }
143 }