1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.ml.clustering.dbscan;
31
32 import gnu.trove.list.TIntList;
33 import gnu.trove.list.array.TIntArrayList;
34 import gnu.trove.map.hash.TIntObjectHashMap;
35 import gnu.trove.procedure.TIntObjectProcedure;
36 import gnu.trove.procedure.TIntProcedure;
37 import gnu.trove.set.hash.TIntHashSet;
38
39 import java.util.List;
40
41 import org.openimaj.ml.clustering.dbscan.neighbourhood.RegionMode;
42 import org.openimaj.util.pair.IntDoublePair;
43
44
45
46
47
48
49
50 public class DBSCAN {
51 protected boolean noiseAsClusters = false;
52
53
54
55
56 public static class State{
57
58 TIntHashSet visited = new TIntHashSet();
59 TIntHashSet noise = new TIntHashSet();
60 TIntHashSet addedToCluster = new TIntHashSet();
61 TIntObjectHashMap<TIntList> clusters = new TIntObjectHashMap<TIntList>();
62 private RegionMode<IntDoublePair> regionMode;
63 private int length;
64 private boolean noiseAsClusters;
65
66
67
68
69
70
71 public State(int length, RegionMode<IntDoublePair> regionMode)
72 {
73 this(length, regionMode, false);
74 }
75
76
77
78
79
80
81 public State(int length, RegionMode<IntDoublePair> regionMode, boolean noiseAsClusters){
82 this.regionMode = regionMode;
83 this.length = length;
84 this.noiseAsClusters = noiseAsClusters;
85 }
86 }
87
88 DoubleDBSCANClusters dbscan(final State state) {
89 final int[] clusterIndex = new int[]{0};
90 for (int p = 0; p < state.length; p++) {
91 if(state.visited.contains(p))continue;
92 state.visited.add(p);
93 List<IntDoublePair> region = state.regionMode.regionQuery(p);
94 if(!state.regionMode.validRegion(region)){
95 state.noise.add(p);
96 }
97 else{
98 TIntList cluster = new TIntArrayList();
99 state.clusters.put(clusterIndex[0], cluster);
100 expandCluster(p,region,cluster,state);
101 clusterIndex[0]++;
102 }
103 }
104
105 if(state.noiseAsClusters){
106 state.noise.forEach(new TIntProcedure() {
107
108 @Override
109 public boolean execute(int value) {
110 TIntArrayList arr = new TIntArrayList();
111 arr.add(value);
112 state.clusters.put(clusterIndex[0]++, arr);
113 return true;
114 }
115 });
116 }
117 final int[][] clusterMembers = new int[state.clusters.size()][];
118 final int[] nEntries = new int[1];
119 state.clusters.forEachEntry(new TIntObjectProcedure<TIntList>() {
120 @Override
121 public boolean execute(int cluster, TIntList b) {
122 clusterMembers[cluster] = b.toArray();
123 nEntries[0] += clusterMembers[cluster].length;
124 return true;
125 }
126 });
127 int[] noise = state.noise.toArray();
128
129 final DoubleDBSCANClusters dbscanClusters = new DoubleDBSCANClusters(noise, clusterMembers);
130 return dbscanClusters;
131 }
132
133 private void expandCluster(int p, List<IntDoublePair> region, TIntList cluster, State state) {
134 addToCluster(p,cluster,state);
135 for (int regionIndex = 0; regionIndex < region.size(); regionIndex++) {
136 int pprime = region.get(regionIndex).first;
137 if (!state.visited.contains(pprime)){
138 state.visited.add(pprime);
139 List<IntDoublePair> regionPrime = state.regionMode.regionQuery(pprime);
140 if(state.regionMode.validRegion(regionPrime))
141 region.addAll(regionPrime);
142 else
143 state.noise.add(pprime);
144 }
145
146 addToCluster(pprime, cluster, state);
147 }
148 }
149
150 private void addToCluster(int p, TIntList cluster, State state) {
151 if(!state.addedToCluster.contains(p)){
152
153 state.noise.remove(p);
154 cluster.add(p);
155 state.addedToCluster.add(p);
156 }
157 }
158
159
160
161
162
163 public void setNoiseAsClusters(boolean b) {
164 this.noiseAsClusters = b;
165 }
166 }