001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.ml.clustering.dbscan;
031
032import gnu.trove.list.TIntList;
033import gnu.trove.list.array.TIntArrayList;
034import gnu.trove.map.hash.TIntObjectHashMap;
035import gnu.trove.procedure.TIntObjectProcedure;
036import gnu.trove.procedure.TIntProcedure;
037import gnu.trove.set.hash.TIntHashSet;
038
039import java.util.List;
040
041import org.openimaj.ml.clustering.dbscan.neighbourhood.RegionMode;
042import org.openimaj.util.pair.IntDoublePair;
043
044/**
045 * Implementation of DBSCAN (http://en.wikipedia.org/wiki/DBSCAN) using
046 * a
047 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
048 *
049 */
050public class DBSCAN {
051        protected boolean noiseAsClusters = false;
052        /**
053         *
054         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
055         */
056        public static class State{
057
058                TIntHashSet visited = new TIntHashSet();
059                TIntHashSet noise = new TIntHashSet();
060                TIntHashSet addedToCluster = new TIntHashSet();
061                TIntObjectHashMap<TIntList> clusters = new TIntObjectHashMap<TIntList>();
062                private RegionMode<IntDoublePair> regionMode;
063                private int length;
064                private boolean noiseAsClusters;
065                
066                /**
067                 * 
068                 * @param length
069                 * @param regionMode
070                 */
071                public State(int length, RegionMode<IntDoublePair> regionMode)
072                {
073                        this(length, regionMode, false);
074                }
075                
076                /**
077                 * @param length
078                 * @param regionMode
079                 * @param noiseAsClusters treat noise as isolated clusters
080                 */
081                public State(int length, RegionMode<IntDoublePair> regionMode, boolean noiseAsClusters){
082                        this.regionMode = regionMode;
083                        this.length = length;
084                        this.noiseAsClusters = noiseAsClusters;
085                }
086        }
087
088        DoubleDBSCANClusters dbscan(final State state) {
089                final int[] clusterIndex = new int[]{0};
090                for (int p = 0; p < state.length; p++) {
091                        if(state.visited.contains(p))continue;
092                        state.visited.add(p);
093                        List<IntDoublePair> region = state.regionMode.regionQuery(p);
094                        if(!state.regionMode.validRegion(region)){
095                                state.noise.add(p);
096                        }
097                        else{
098                                TIntList cluster = new TIntArrayList();
099                                state.clusters.put(clusterIndex[0], cluster);
100                                expandCluster(p,region,cluster,state);
101                                clusterIndex[0]++;
102                        }
103                }
104                
105                if(state.noiseAsClusters){
106                        state.noise.forEach(new TIntProcedure() {
107                                
108                                @Override
109                                public boolean execute(int value) {
110                                        TIntArrayList arr = new TIntArrayList();
111                                        arr.add(value);
112                                        state.clusters.put(clusterIndex[0]++, arr);
113                                        return true;
114                                }
115                        });
116                }
117                final int[][] clusterMembers = new int[state.clusters.size()][];
118                final int[] nEntries = new int[1];
119                state.clusters.forEachEntry(new TIntObjectProcedure<TIntList>() {
120                        @Override
121                        public boolean execute(int cluster, TIntList b) {
122                                clusterMembers[cluster] = b.toArray();
123                                nEntries[0] += clusterMembers[cluster].length; 
124                                return true;
125                        }
126                });
127                int[] noise = state.noise.toArray();
128                
129                final DoubleDBSCANClusters dbscanClusters = new DoubleDBSCANClusters(noise, clusterMembers);
130                return dbscanClusters;
131        }
132
133        private void expandCluster(int p, List<IntDoublePair> region, TIntList cluster, State state) {
134                addToCluster(p,cluster,state);
135                for (int regionIndex = 0; regionIndex < region.size(); regionIndex++) {
136                        int pprime = region.get(regionIndex).first;
137                        if (!state.visited.contains(pprime)){
138                                state.visited.add(pprime);
139                                List<IntDoublePair> regionPrime = state.regionMode.regionQuery(pprime);
140                                if(state.regionMode.validRegion(regionPrime)) 
141                                        region.addAll(regionPrime);
142                                else
143                                        state.noise.add(pprime);
144                        }
145
146                        addToCluster(pprime, cluster, state);
147                }
148        }
149
150        private void addToCluster(int p, TIntList cluster, State state) {
151                if(!state.addedToCluster.contains(p)){
152                        // This point is now certainly at the very least DENSITY reachable and must not be noise
153                        state.noise.remove(p); 
154                        cluster.add(p);
155                        state.addedToCluster.add(p);
156                }
157        }
158        
159        /**
160         * Treat noise as clusters on their own
161         * @param b
162         */
163        public void setNoiseAsClusters(boolean b) {
164                this.noiseAsClusters  = b;
165        }
166}