1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.ml.classification.boosting;
31
32 import java.util.ArrayList;
33 import java.util.List;
34
35 import org.openimaj.ml.classification.LabelledDataProvider;
36 import org.openimaj.ml.classification.StumpClassifier;
37 import org.openimaj.util.pair.ObjectFloatPair;
38
39 public class AdaBoost {
40 StumpClassifier.WeightedLearner factory = new StumpClassifier.WeightedLearner();
41
42 public List<ObjectFloatPair<StumpClassifier>> learn(LabelledDataProvider trainingSet, int numberOfRounds) {
43
44 final float[] weights = new float[trainingSet.numInstances()];
45 for (int i = 0; i < trainingSet.numInstances(); i++)
46 weights[i] = 1.0f / trainingSet.numInstances();
47
48 final boolean[] actualClasses = trainingSet.getClasses();
49
50 final List<ObjectFloatPair<StumpClassifier>> ensemble = new ArrayList<ObjectFloatPair<StumpClassifier>>();
51
52
53 for (int t = 0; t < numberOfRounds; t++) {
54 System.out.println("Iteration: " + t);
55
56
57 final ObjectFloatPair<StumpClassifier> h = factory.learn(trainingSet, weights);
58
59
60 final boolean[] hClassification = new boolean[trainingSet.numInstances()];
61 final float[] responses = trainingSet.getFeatureResponse(h.first.dimension);
62 double epsilon = 0.0;
63 for (int i = 0; i < trainingSet.numInstances(); i++) {
64 hClassification[i] = h.first.classify(responses[i]);
65 epsilon += hClassification[i] != actualClasses[i] ? weights[i] : 0.0;
66 }
67
68
69 if (epsilon >= 0.5)
70 break;
71
72
73 final float alpha = (float) (0.5 * Math.log((1 - epsilon) / epsilon));
74
75
76 float weightsSum = 0.0f;
77 for (int i = 0; i < trainingSet.numInstances(); i++) {
78 weights[i] *= Math.exp(-alpha * (actualClasses[i] ? 1 : -1) * (hClassification[i] ? 1 : -1));
79 weightsSum += weights[i];
80 }
81
82
83 for (int i = 0; i < trainingSet.numInstances(); i++)
84 weights[i] /= weightsSum;
85
86
87 ensemble.add(new ObjectFloatPair<StumpClassifier>(h.first, alpha));
88
89
90 if (epsilon == 0.0)
91 break;
92 }
93
94 return ensemble;
95 }
96
97 public void printClassificationQuality(LabelledDataProvider data, List<ObjectFloatPair<StumpClassifier>> ensemble,
98 float threshold)
99 {
100 int tp = 0;
101 int fn = 0;
102 int tn = 0;
103 int fp = 0;
104
105 final int ninstances = data.numInstances();
106 final boolean[] classes = data.getClasses();
107 for (int i = 0; i < ninstances; i++) {
108 final float[] feature = data.getInstanceFeature(i);
109
110 final boolean predicted = AdaBoost.classify(feature, ensemble, threshold);
111 final boolean actual = classes[i];
112
113 if (actual) {
114 if (predicted)
115 tp++;
116 else
117 fn++;
118 } else {
119 if (predicted)
120 fp++;
121 else
122 tn++;
123 }
124 }
125
126 System.out.format("TP: %d\tFN: %d\tFP: %d\tTN: %d\n", tp, fn, fp, tn);
127
128 final float fpr = (float) fp / (float) (fp + tn);
129 final float tpr = (float) tp / (float) (tp + fn);
130
131 System.out.format("FPR: %2.2f\tTPR: %2.2f\n", fpr, tpr);
132 }
133
134 public static boolean classify(float[] data, List<ObjectFloatPair<StumpClassifier>> ensemble) {
135 double classification = 0.0;
136
137
138 for (int t = 0; t < ensemble.size(); t++)
139 classification += ensemble.get(t).second * (ensemble.get(t).first.classify(data) ? 1 : -1);
140
141
142 return classification > 0.0 ? true : false;
143 }
144
145 public static boolean classify(float[] data, List<ObjectFloatPair<StumpClassifier>> ensemble, float threshold) {
146 double classification = 0.0;
147
148
149 for (int t = 0; t < ensemble.size(); t++)
150 classification += ensemble.get(t).second * (ensemble.get(t).first.classify(data) ? 1 : -1);
151
152
153 return classification > threshold ? true : false;
154 }
155 }