1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.ml.clustering.kdtree;
31
32
33 import org.apache.commons.math.stat.descriptive.rank.Max;
34 import org.openimaj.data.DataSource;
35 import org.openimaj.math.matrix.MatrixUtils;
36 import org.openimaj.ml.clustering.SpatialClusterer;
37 import org.openimaj.util.array.IntArrayView;
38 import org.openimaj.util.pair.IntDoublePair;
39 import org.openimaj.util.tree.DoubleKDTree;
40 import org.openimaj.util.tree.DoubleKDTree.SplitChooser;
41
42 import Jama.Matrix;
43
44
45
46
47
48 public class DoubleKDTreeClusterer implements SpatialClusterer<KDTreeClusters, double[]>{
49 private static final int DEFAULT_MINPTS = 4;
50 private double DEFAULT_VARIANCE_PROP = 0.1;
51
52 double varprop = DEFAULT_VARIANCE_PROP ;
53 int minpts = DEFAULT_MINPTS;
54 int ndims = -1;
55 private int startindex = -1;
56 private SplitDetectionMode detectionMode = new SplitDetectionMode.VARIABLE_MEDIAN();
57
58 class CappedVarianceSplitChooser implements SplitChooser{
59
60
61 Matrix firstVariances = null;
62 Max max = new Max();
63
64 public CappedVarianceSplitChooser() {
65 }
66
67
68 @Override
69 public IntDoublePair chooseSplit(double[][] pnts, IntArrayView inds, int depth, double[] minBounds, double[] maxBounds) {
70
71 if(inds.size() < minpts) return null;
72
73 double[][] subarr = new double[inds.size()][ndims];
74 for (int i = 0; i < subarr.length; i++) {
75 double[] pnti = pnts[inds.get(i)];
76 for (int dim = startindex; dim < startindex + ndims; dim++) {
77 subarr[i][dim-startindex] = pnti[dim];
78 }
79 }
80
81 Matrix mat = new Matrix(subarr);
82 Matrix mean = MatrixUtils.sumCols(mat).times(1./inds.size());
83 Matrix var = MatrixUtils.sumCols(mat.arrayTimes(mat)).times(1./inds.size()).minus(mean.arrayTimes(mean));
84
85 if(firstVariances == null){
86 firstVariances = var;
87 double[] variances = var.getArray()[0];
88 if(max.evaluate(variances) == 0){
89 return null;
90 }
91 }
92
93 Matrix propchange = var.arrayRightDivide(firstVariances);
94 if(max.evaluate(propchange.getArray()[0]) < varprop){
95 return null;
96 }
97 else{
98 IntDoublePair maxDim = maxDim(MatrixUtils.abs(var).getArray()[0]);
99 double[] col = mat.getMatrix(0, inds.size()-1, maxDim.first, maxDim.first).transpose().getArray()[0];
100 double mid = detectionMode.detect(col);
101 return IntDoublePair.pair(maxDim.first+startindex,mid);
102 }
103 }
104
105
106 private IntDoublePair maxDim(double[] ds) {
107 double maxv = -Double.MAX_VALUE;
108 int maxi = -1;
109 for (int i = 0; i < ds.length; i++) {
110 if(maxv < ds[i]){
111 maxi = i;
112 maxv = ds[i];
113 }
114 }
115 return IntDoublePair.pair(maxi, maxv);
116 }
117
118 }
119
120
121
122
123 public DoubleKDTreeClusterer() {
124
125 }
126
127
128
129
130
131
132 public DoubleKDTreeClusterer(double varprop, int startindex, int ndims) {
133 this.varprop = varprop;
134 this.startindex = startindex;
135 this.ndims = ndims;
136 }
137
138
139
140
141
142
143
144
145 public DoubleKDTreeClusterer(SplitDetectionMode detectionMode, double varprop, int startindex, int ndims) {
146 this.detectionMode = detectionMode;
147 this.varprop = varprop;
148 this.startindex = startindex;
149 this.ndims = ndims;
150 }
151
152
153
154
155 public DoubleKDTreeClusterer(double varprop) {
156 this.varprop = varprop;
157 }
158
159 @Override
160 public int[][] performClustering(double[][] data) {
161 return cluster(data).clusters();
162 }
163
164 @Override
165 public KDTreeClusters cluster(double[][] data) {
166 if(ndims == -1)
167 {
168 this.startindex = 0;
169 ndims = data[0].length;
170 }
171
172 DoubleKDTree tree = new DoubleKDTree(data, new CappedVarianceSplitChooser());
173 return new KDTreeClusters(tree, ndims);
174 }
175
176 @Override
177 public KDTreeClusters cluster(DataSource<double[]> data) {
178 double[][] arrdata = new double[data.size()][];
179 for (int i = 0; i < arrdata.length; i++) {
180 arrdata[i] = data.getData(i);
181 }
182 return cluster(arrdata);
183 }
184
185 }