1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.ml.clustering.kdtree;
31
32 import java.util.ArrayList;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.Set;
36
37 import org.apache.log4j.Logger;
38
39
40
41
42
43
44 public class ClusterTestDataLoader{
45
46
47
48
49
50 public static class TestStats{
51
52
53
54 public double eps;
55
56
57
58 public int minpts;
59
60
61
62 public int ncluster;
63
64
65
66 public int noutliers;
67
68
69
70 public double mineps;
71 }
72 private int percluster = -1;
73 private boolean outliers = true;
74
75
76
77
78
79 public ClusterTestDataLoader() {
80 this.percluster = -1;
81 }
82
83
84
85
86
87
88 public ClusterTestDataLoader(int percluster, boolean outliers) {
89 this.percluster = percluster;
90 this.outliers = outliers;
91 }
92
93 private Logger logger = Logger.getLogger(ClusterTestDataLoader.class);
94 private TestStats testStats;
95 private int[][] testClusters;
96 private double[][] testData;
97
98
99
100
101 private TestStats readTestStats(String[] data) {
102 ClusterTestDataLoader.TestStats ret = new TestStats();
103 int i = 0;
104 ret.eps = Double.parseDouble(data[i++].split("=")[1].trim());
105 ret.minpts = Integer.parseInt(data[i++].split("=")[1].trim());
106 ret.ncluster = Integer.parseInt(data[i++].split("=")[1].trim());
107 ret.noutliers = Integer.parseInt(data[i++].split("=")[1].trim());
108 ret.mineps = Double.parseDouble(data[i++].split("=")[1].trim());
109 return ret;
110 }
111
112
113
114
115
116
117 private int[][] readTestClusters(String[] data) {
118 int i = 0;
119 for (;data[i].length()!=0; i++);
120 for (i=i+1;data[i].length()!=0; i++);
121 List<int[]> clusters = new ArrayList<int[]>();
122 int count = 0;
123 for (i=i+1;i<data.length; i++){
124 int[] readIntDataLine = readIntDataLine(data[i]);
125 clusters.add(readIntDataLine);
126 count += readIntDataLine.length;
127 }
128 logger .debug(String.format("Loading %d items in %d clusters\n",count,clusters.size()));
129 return clusters.toArray(new int[clusters.size()][]);
130 }
131
132
133
134
135
136
137 public int[] readIntDataLine(String string) {
138 String[] split = string.split(",");
139 int[] arr = new int[split.length-1];
140 int i = 0;
141
142 for (String s : split) {
143 if(s.contains("<"))continue;
144 s = s.replace(">", "").trim();
145 arr[i++] = Integer.parseInt(s)-1;
146
147 }
148 return arr;
149 }
150
151
152
153
154 private double[][] readTestData(String[] data) {
155
156 int i = 0;
157 for (;data[i].length()!=0; i++);
158 List<double[]> dataL = new ArrayList<double[]>();
159 int start = i+1;
160 for (i=start;data[i].length()!=0; i++){
161 dataL.add(readDataLine(data[i]));
162 }
163 logger.debug(String.format("Loading %d data items\n",dataL.size()));
164 return dataL.toArray(new double[dataL.size()][]);
165 }
166 private Set<Integer> existing(int[][] correct) {
167 Set<Integer> exist = new HashSet<Integer>();
168 for (int[] is : correct) {
169 for (int i : is) {
170 exist.add(i);
171 }
172 }
173 return exist;
174 }
175
176 private double[] readDataLine(String string) {
177 String[] split = string.split(" ");
178 double[] arr = new double[]{
179 Double.parseDouble(split[1]),
180 Double.parseDouble(split[2])
181 };
182 return arr;
183 }
184
185 public void prepare(String[] data) {
186 this.testStats = this.readTestStats(data);
187 this.testClusters = this.readTestClusters(data);
188 this.testData = this.readTestData(data);
189 correctClusters();
190 }
191
192 private void correctClusters() {
193
194 if(this.percluster != -1){
195 double[][] correctedData = null;
196 int[][] correctedClusters = new int[this.testClusters.length][this.percluster];
197 int seen ;
198 if(this.outliers){
199 seen = this.testStats.noutliers;
200 correctedData= new double[this.percluster * this.testClusters.length + seen][];
201 for (int i = 0; i < seen; i++) {
202 correctedData[i] = this.testData[i];
203 }
204
205 }
206 else{
207 seen = 0;
208 correctedData = new double[this.percluster * this.testClusters.length][];
209 }
210 for (int i = 0; i < this.testClusters.length; i++) {
211 int[] clust = this.testClusters[i];
212 for (int j = 0; j < this.percluster; j++) {
213 int d = clust[j];
214 correctedData[seen] = this.testData[d];
215 correctedClusters[i][j] = seen;
216 seen++;
217 }
218 }
219
220 this.testClusters = correctedClusters;
221 this.testData = correctedData;
222 }
223 }
224
225 public TestStats getTestStats() {
226 return this.testStats;
227 }
228
229 public double[][] getTestData() {
230 return this.testData;
231 }
232
233 public int[][] getTestClusters() {
234 return this.testClusters;
235 }
236 }