001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.clustering.dbscan; 031 032import gnu.trove.list.TIntList; 033import gnu.trove.list.array.TIntArrayList; 034import gnu.trove.map.hash.TIntObjectHashMap; 035import gnu.trove.procedure.TIntObjectProcedure; 036import gnu.trove.procedure.TIntProcedure; 037import gnu.trove.set.hash.TIntHashSet; 038 039import java.util.List; 040 041import org.openimaj.ml.clustering.dbscan.neighbourhood.RegionMode; 042import org.openimaj.util.pair.IntDoublePair; 043 044/** 045 * Implementation of DBSCAN (http://en.wikipedia.org/wiki/DBSCAN) using 046 * a 047 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 048 * 049 */ 050public class DBSCAN { 051 protected boolean noiseAsClusters = false; 052 /** 053 * 054 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 055 */ 056 public static class State{ 057 058 TIntHashSet visited = new TIntHashSet(); 059 TIntHashSet noise = new TIntHashSet(); 060 TIntHashSet addedToCluster = new TIntHashSet(); 061 TIntObjectHashMap<TIntList> clusters = new TIntObjectHashMap<TIntList>(); 062 private RegionMode<IntDoublePair> regionMode; 063 private int length; 064 private boolean noiseAsClusters; 065 066 /** 067 * 068 * @param length 069 * @param regionMode 070 */ 071 public State(int length, RegionMode<IntDoublePair> regionMode) 072 { 073 this(length, regionMode, false); 074 } 075 076 /** 077 * @param length 078 * @param regionMode 079 * @param noiseAsClusters treat noise as isolated clusters 080 */ 081 public State(int length, RegionMode<IntDoublePair> regionMode, boolean noiseAsClusters){ 082 this.regionMode = regionMode; 083 this.length = length; 084 this.noiseAsClusters = noiseAsClusters; 085 } 086 } 087 088 DoubleDBSCANClusters dbscan(final State state) { 089 final int[] clusterIndex = new int[]{0}; 090 for (int p = 0; p < state.length; p++) { 091 if(state.visited.contains(p))continue; 092 state.visited.add(p); 093 List<IntDoublePair> region = state.regionMode.regionQuery(p); 094 if(!state.regionMode.validRegion(region)){ 095 state.noise.add(p); 096 } 097 else{ 098 TIntList cluster = new TIntArrayList(); 099 state.clusters.put(clusterIndex[0], cluster); 100 expandCluster(p,region,cluster,state); 101 clusterIndex[0]++; 102 } 103 } 104 105 if(state.noiseAsClusters){ 106 state.noise.forEach(new TIntProcedure() { 107 108 @Override 109 public boolean execute(int value) { 110 TIntArrayList arr = new TIntArrayList(); 111 arr.add(value); 112 state.clusters.put(clusterIndex[0]++, arr); 113 return true; 114 } 115 }); 116 } 117 final int[][] clusterMembers = new int[state.clusters.size()][]; 118 final int[] nEntries = new int[1]; 119 state.clusters.forEachEntry(new TIntObjectProcedure<TIntList>() { 120 @Override 121 public boolean execute(int cluster, TIntList b) { 122 clusterMembers[cluster] = b.toArray(); 123 nEntries[0] += clusterMembers[cluster].length; 124 return true; 125 } 126 }); 127 int[] noise = state.noise.toArray(); 128 129 final DoubleDBSCANClusters dbscanClusters = new DoubleDBSCANClusters(noise, clusterMembers); 130 return dbscanClusters; 131 } 132 133 private void expandCluster(int p, List<IntDoublePair> region, TIntList cluster, State state) { 134 addToCluster(p,cluster,state); 135 for (int regionIndex = 0; regionIndex < region.size(); regionIndex++) { 136 int pprime = region.get(regionIndex).first; 137 if (!state.visited.contains(pprime)){ 138 state.visited.add(pprime); 139 List<IntDoublePair> regionPrime = state.regionMode.regionQuery(pprime); 140 if(state.regionMode.validRegion(regionPrime)) 141 region.addAll(regionPrime); 142 else 143 state.noise.add(pprime); 144 } 145 146 addToCluster(pprime, cluster, state); 147 } 148 } 149 150 private void addToCluster(int p, TIntList cluster, State state) { 151 if(!state.addedToCluster.contains(p)){ 152 // This point is now certainly at the very least DENSITY reachable and must not be noise 153 state.noise.remove(p); 154 cluster.add(p); 155 state.addedToCluster.add(p); 156 } 157 } 158 159 /** 160 * Treat noise as clusters on their own 161 * @param b 162 */ 163 public void setNoiseAsClusters(boolean b) { 164 this.noiseAsClusters = b; 165 } 166}