View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.ml.clustering.dbscan;
31  
32  import gnu.trove.list.TIntList;
33  import gnu.trove.list.array.TIntArrayList;
34  import gnu.trove.map.hash.TIntObjectHashMap;
35  import gnu.trove.procedure.TIntObjectProcedure;
36  import gnu.trove.procedure.TIntProcedure;
37  import gnu.trove.set.hash.TIntHashSet;
38  
39  import java.util.List;
40  
41  import org.openimaj.ml.clustering.dbscan.neighbourhood.RegionMode;
42  import org.openimaj.util.pair.IntDoublePair;
43  
44  /**
45   * Implementation of DBSCAN (http://en.wikipedia.org/wiki/DBSCAN) using
46   * a
47   * @author Sina Samangooei (ss@ecs.soton.ac.uk)
48   *
49   */
50  public class DBSCAN {
51  	protected boolean noiseAsClusters = false;
52  	/**
53  	 *
54  	 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
55  	 */
56  	public static class State{
57  
58  		TIntHashSet visited = new TIntHashSet();
59  		TIntHashSet noise = new TIntHashSet();
60  		TIntHashSet addedToCluster = new TIntHashSet();
61  		TIntObjectHashMap<TIntList> clusters = new TIntObjectHashMap<TIntList>();
62  		private RegionMode<IntDoublePair> regionMode;
63  		private int length;
64  		private boolean noiseAsClusters;
65  		
66  		/**
67  		 * 
68  		 * @param length
69  		 * @param regionMode
70  		 */
71  		public State(int length, RegionMode<IntDoublePair> regionMode)
72  		{
73  			this(length, regionMode, false);
74  		}
75  		
76  		/**
77  		 * @param length
78  		 * @param regionMode
79  		 * @param noiseAsClusters treat noise as isolated clusters
80  		 */
81  		public State(int length, RegionMode<IntDoublePair> regionMode, boolean noiseAsClusters){
82  			this.regionMode = regionMode;
83  			this.length = length;
84  			this.noiseAsClusters = noiseAsClusters;
85  		}
86  	}
87  
88  	DoubleDBSCANClusters dbscan(final State state) {
89  		final int[] clusterIndex = new int[]{0};
90  		for (int p = 0; p < state.length; p++) {
91  			if(state.visited.contains(p))continue;
92  			state.visited.add(p);
93  			List<IntDoublePair> region = state.regionMode.regionQuery(p);
94  			if(!state.regionMode.validRegion(region)){
95  				state.noise.add(p);
96  			}
97  			else{
98  				TIntList cluster = new TIntArrayList();
99  				state.clusters.put(clusterIndex[0], cluster);
100 				expandCluster(p,region,cluster,state);
101 				clusterIndex[0]++;
102 			}
103 		}
104 		
105 		if(state.noiseAsClusters){
106 			state.noise.forEach(new TIntProcedure() {
107 				
108 				@Override
109 				public boolean execute(int value) {
110 					TIntArrayList arr = new TIntArrayList();
111 					arr.add(value);
112 					state.clusters.put(clusterIndex[0]++, arr);
113 					return true;
114 				}
115 			});
116 		}
117 		final int[][] clusterMembers = new int[state.clusters.size()][];
118 		final int[] nEntries = new int[1];
119 		state.clusters.forEachEntry(new TIntObjectProcedure<TIntList>() {
120 			@Override
121 			public boolean execute(int cluster, TIntList b) {
122 				clusterMembers[cluster] = b.toArray();
123 				nEntries[0] += clusterMembers[cluster].length; 
124 				return true;
125 			}
126 		});
127 		int[] noise = state.noise.toArray();
128 		
129 		final DoubleDBSCANClusters dbscanClusters = new DoubleDBSCANClusters(noise, clusterMembers);
130 		return dbscanClusters;
131 	}
132 
133 	private void expandCluster(int p, List<IntDoublePair> region, TIntList cluster, State state) {
134 		addToCluster(p,cluster,state);
135 		for (int regionIndex = 0; regionIndex < region.size(); regionIndex++) {
136 			int pprime = region.get(regionIndex).first;
137 			if (!state.visited.contains(pprime)){
138 				state.visited.add(pprime);
139 				List<IntDoublePair> regionPrime = state.regionMode.regionQuery(pprime);
140 				if(state.regionMode.validRegion(regionPrime)) 
141 					region.addAll(regionPrime);
142 				else
143 					state.noise.add(pprime);
144 			}
145 
146 			addToCluster(pprime, cluster, state);
147 		}
148 	}
149 
150 	private void addToCluster(int p, TIntList cluster, State state) {
151 		if(!state.addedToCluster.contains(p)){
152 			// This point is now certainly at the very least DENSITY reachable and must not be noise
153 			state.noise.remove(p); 
154 			cluster.add(p);
155 			state.addedToCluster.add(p);
156 		}
157 	}
158 	
159 	/**
160 	 * Treat noise as clusters on their own
161 	 * @param b
162 	 */
163 	public void setNoiseAsClusters(boolean b) {
164 		this.noiseAsClusters  = b;
165 	}
166 }