001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.ml.clustering.kdtree;
031
032
033import org.apache.commons.math.stat.descriptive.rank.Max;
034import org.openimaj.data.DataSource;
035import org.openimaj.math.matrix.MatrixUtils;
036import org.openimaj.ml.clustering.SpatialClusterer;
037import org.openimaj.util.array.IntArrayView;
038import org.openimaj.util.pair.IntDoublePair;
039import org.openimaj.util.tree.DoubleKDTree;
040import org.openimaj.util.tree.DoubleKDTree.SplitChooser;
041
042import Jama.Matrix;
043
044/**
045 *
046 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
047 */
048public class DoubleKDTreeClusterer implements SpatialClusterer<KDTreeClusters, double[]>{
049        private static final int DEFAULT_MINPTS = 4;
050        private double DEFAULT_VARIANCE_PROP = 0.1;
051        
052        double varprop = DEFAULT_VARIANCE_PROP ;
053        int minpts = DEFAULT_MINPTS;
054        int ndims = -1;
055        private int startindex = -1;
056        private SplitDetectionMode detectionMode = new SplitDetectionMode.VARIABLE_MEDIAN();
057//      private SplitDetectionMode detectionMode = new SplitDetectionMode.MEAN();
058        class CappedVarianceSplitChooser implements SplitChooser{
059                
060                
061                Matrix firstVariances = null;
062                Max max = new Max();
063                
064                public CappedVarianceSplitChooser() {
065                }
066
067
068                @Override
069                public IntDoublePair chooseSplit(double[][] pnts, IntArrayView inds, int depth, double[] minBounds, double[] maxBounds) {
070                         
071                        if(inds.size() < minpts) return null;
072                        
073                        double[][] subarr = new double[inds.size()][ndims];
074                        for (int i = 0; i < subarr.length; i++) {
075                                double[] pnti = pnts[inds.get(i)];
076                                for (int dim = startindex; dim < startindex + ndims; dim++) {                                   
077                                        subarr[i][dim-startindex] = pnti[dim];
078                                }
079                        }
080                        
081                        Matrix mat = new Matrix(subarr);
082                        Matrix mean = MatrixUtils.sumCols(mat).times(1./inds.size());
083                        Matrix var = MatrixUtils.sumCols(mat.arrayTimes(mat)).times(1./inds.size()).minus(mean.arrayTimes(mean));
084                        
085                        if(firstVariances == null){
086                                firstVariances = var;
087                                double[] variances = var.getArray()[0];
088                                if(max.evaluate(variances) == 0){
089                                        return null; // special case, if the first variance is null we've been handed ALL the same, screw it
090                                }
091                        }
092                        
093                        Matrix propchange = var.arrayRightDivide(firstVariances);
094                        if(max.evaluate(propchange.getArray()[0]) < varprop){
095                                return null;
096                        }
097                        else{
098                                IntDoublePair maxDim = maxDim(MatrixUtils.abs(var).getArray()[0]);
099                                double[] col = mat.getMatrix(0, inds.size()-1, maxDim.first, maxDim.first).transpose().getArray()[0];
100                                double mid = detectionMode.detect(col);
101                                return IntDoublePair.pair(maxDim.first+startindex,mid);
102                        }
103                }
104
105
106                private IntDoublePair maxDim(double[] ds) {
107                        double maxv = -Double.MAX_VALUE;
108                        int maxi = -1;
109                        for (int i = 0; i < ds.length; i++) {
110                                if(maxv < ds[i]){
111                                        maxi = i;
112                                        maxv = ds[i];
113                                }
114                        }
115                        return IntDoublePair.pair(maxi, maxv);
116                }
117                
118        }
119        
120        /**
121         * calls: {@link #DoubleKDTreeClusterer()} with 0.01
122         */
123        public DoubleKDTreeClusterer() {
124                
125        }
126        
127        /**
128         * @param varprop the proportion of variance change from the root variance before splitting stops
129         * @param startindex the index to start from
130         * @param ndims the number of dimensions to split on
131         */
132        public DoubleKDTreeClusterer(double varprop, int startindex, int ndims) {
133                this.varprop = varprop;
134                this.startindex = startindex;
135                this.ndims = ndims;
136        }
137        
138        /**
139         * @param detectionMode The {@link SplitDetectionMode} given the feature of highest variance
140         * @param varprop The minimum proportional variance (compared to the first variance)
141         * @param startindex the feature start index
142         * @param ndims the number of features to use
143         * 
144         */
145        public DoubleKDTreeClusterer(SplitDetectionMode detectionMode, double varprop, int startindex, int ndims) {
146                this.detectionMode = detectionMode;
147                this.varprop = varprop;
148                this.startindex = startindex;
149                this.ndims = ndims;
150        }
151        
152        /**
153         * @param varprop the proportion of variance change from the root variance before splitting stops
154         */
155        public DoubleKDTreeClusterer(double varprop) {
156                this.varprop = varprop;
157        }
158
159        @Override
160        public int[][] performClustering(double[][] data) {
161                return cluster(data).clusters();
162        }
163
164        @Override
165        public KDTreeClusters cluster(double[][] data) {
166                if(ndims == -1)
167                {
168                        this.startindex = 0;
169                        ndims = data[0].length;
170                }
171                
172                DoubleKDTree tree = new DoubleKDTree(data, new CappedVarianceSplitChooser());
173                return new KDTreeClusters(tree, ndims);
174        }
175
176        @Override
177        public KDTreeClusters cluster(DataSource<double[]> data) {
178                double[][] arrdata = new double[data.size()][];
179                for (int i = 0; i < arrdata.length; i++) {
180                        arrdata[i] = data.getData(i);
181                }
182                return cluster(arrdata);
183        }
184
185}