001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.clustering.kdtree; 031 032 033import org.apache.commons.math.stat.descriptive.rank.Max; 034import org.openimaj.data.DataSource; 035import org.openimaj.math.matrix.MatrixUtils; 036import org.openimaj.ml.clustering.SpatialClusterer; 037import org.openimaj.util.array.IntArrayView; 038import org.openimaj.util.pair.IntDoublePair; 039import org.openimaj.util.tree.DoubleKDTree; 040import org.openimaj.util.tree.DoubleKDTree.SplitChooser; 041 042import Jama.Matrix; 043 044/** 045 * 046 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 047 */ 048public class DoubleKDTreeClusterer implements SpatialClusterer<KDTreeClusters, double[]>{ 049 private static final int DEFAULT_MINPTS = 4; 050 private double DEFAULT_VARIANCE_PROP = 0.1; 051 052 double varprop = DEFAULT_VARIANCE_PROP ; 053 int minpts = DEFAULT_MINPTS; 054 int ndims = -1; 055 private int startindex = -1; 056 private SplitDetectionMode detectionMode = new SplitDetectionMode.VARIABLE_MEDIAN(); 057// private SplitDetectionMode detectionMode = new SplitDetectionMode.MEAN(); 058 class CappedVarianceSplitChooser implements SplitChooser{ 059 060 061 Matrix firstVariances = null; 062 Max max = new Max(); 063 064 public CappedVarianceSplitChooser() { 065 } 066 067 068 @Override 069 public IntDoublePair chooseSplit(double[][] pnts, IntArrayView inds, int depth, double[] minBounds, double[] maxBounds) { 070 071 if(inds.size() < minpts) return null; 072 073 double[][] subarr = new double[inds.size()][ndims]; 074 for (int i = 0; i < subarr.length; i++) { 075 double[] pnti = pnts[inds.get(i)]; 076 for (int dim = startindex; dim < startindex + ndims; dim++) { 077 subarr[i][dim-startindex] = pnti[dim]; 078 } 079 } 080 081 Matrix mat = new Matrix(subarr); 082 Matrix mean = MatrixUtils.sumCols(mat).times(1./inds.size()); 083 Matrix var = MatrixUtils.sumCols(mat.arrayTimes(mat)).times(1./inds.size()).minus(mean.arrayTimes(mean)); 084 085 if(firstVariances == null){ 086 firstVariances = var; 087 double[] variances = var.getArray()[0]; 088 if(max.evaluate(variances) == 0){ 089 return null; // special case, if the first variance is null we've been handed ALL the same, screw it 090 } 091 } 092 093 Matrix propchange = var.arrayRightDivide(firstVariances); 094 if(max.evaluate(propchange.getArray()[0]) < varprop){ 095 return null; 096 } 097 else{ 098 IntDoublePair maxDim = maxDim(MatrixUtils.abs(var).getArray()[0]); 099 double[] col = mat.getMatrix(0, inds.size()-1, maxDim.first, maxDim.first).transpose().getArray()[0]; 100 double mid = detectionMode.detect(col); 101 return IntDoublePair.pair(maxDim.first+startindex,mid); 102 } 103 } 104 105 106 private IntDoublePair maxDim(double[] ds) { 107 double maxv = -Double.MAX_VALUE; 108 int maxi = -1; 109 for (int i = 0; i < ds.length; i++) { 110 if(maxv < ds[i]){ 111 maxi = i; 112 maxv = ds[i]; 113 } 114 } 115 return IntDoublePair.pair(maxi, maxv); 116 } 117 118 } 119 120 /** 121 * calls: {@link #DoubleKDTreeClusterer()} with 0.01 122 */ 123 public DoubleKDTreeClusterer() { 124 125 } 126 127 /** 128 * @param varprop the proportion of variance change from the root variance before splitting stops 129 * @param startindex the index to start from 130 * @param ndims the number of dimensions to split on 131 */ 132 public DoubleKDTreeClusterer(double varprop, int startindex, int ndims) { 133 this.varprop = varprop; 134 this.startindex = startindex; 135 this.ndims = ndims; 136 } 137 138 /** 139 * @param detectionMode The {@link SplitDetectionMode} given the feature of highest variance 140 * @param varprop The minimum proportional variance (compared to the first variance) 141 * @param startindex the feature start index 142 * @param ndims the number of features to use 143 * 144 */ 145 public DoubleKDTreeClusterer(SplitDetectionMode detectionMode, double varprop, int startindex, int ndims) { 146 this.detectionMode = detectionMode; 147 this.varprop = varprop; 148 this.startindex = startindex; 149 this.ndims = ndims; 150 } 151 152 /** 153 * @param varprop the proportion of variance change from the root variance before splitting stops 154 */ 155 public DoubleKDTreeClusterer(double varprop) { 156 this.varprop = varprop; 157 } 158 159 @Override 160 public int[][] performClustering(double[][] data) { 161 return cluster(data).clusters(); 162 } 163 164 @Override 165 public KDTreeClusters cluster(double[][] data) { 166 if(ndims == -1) 167 { 168 this.startindex = 0; 169 ndims = data[0].length; 170 } 171 172 DoubleKDTree tree = new DoubleKDTree(data, new CappedVarianceSplitChooser()); 173 return new KDTreeClusters(tree, ndims); 174 } 175 176 @Override 177 public KDTreeClusters cluster(DataSource<double[]> data) { 178 double[][] arrdata = new double[data.size()][]; 179 for (int i = 0; i < arrdata.length; i++) { 180 arrdata[i] = data.getData(i); 181 } 182 return cluster(arrdata); 183 } 184 185}