View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.ml.clustering.kdtree;
31  
32  
33  import org.apache.commons.math.stat.descriptive.rank.Max;
34  import org.openimaj.data.DataSource;
35  import org.openimaj.math.matrix.MatrixUtils;
36  import org.openimaj.ml.clustering.SpatialClusterer;
37  import org.openimaj.util.array.IntArrayView;
38  import org.openimaj.util.pair.IntDoublePair;
39  import org.openimaj.util.tree.DoubleKDTree;
40  import org.openimaj.util.tree.DoubleKDTree.SplitChooser;
41  
42  import Jama.Matrix;
43  
44  /**
45   *
46   * @author Sina Samangooei (ss@ecs.soton.ac.uk)
47   */
48  public class DoubleKDTreeClusterer implements SpatialClusterer<KDTreeClusters, double[]>{
49  	private static final int DEFAULT_MINPTS = 4;
50  	private double DEFAULT_VARIANCE_PROP = 0.1;
51  	
52  	double varprop = DEFAULT_VARIANCE_PROP ;
53  	int minpts = DEFAULT_MINPTS;
54  	int ndims = -1;
55  	private int startindex = -1;
56  	private SplitDetectionMode detectionMode = new SplitDetectionMode.VARIABLE_MEDIAN();
57  //	private SplitDetectionMode detectionMode = new SplitDetectionMode.MEAN();
58  	class CappedVarianceSplitChooser implements SplitChooser{
59  		
60  		
61  		Matrix firstVariances = null;
62  		Max max = new Max();
63  		
64  		public CappedVarianceSplitChooser() {
65  		}
66  
67  
68  		@Override
69  		public IntDoublePair chooseSplit(double[][] pnts, IntArrayView inds, int depth, double[] minBounds, double[] maxBounds) {
70  			 
71  			if(inds.size() < minpts) return null;
72  			
73  			double[][] subarr = new double[inds.size()][ndims];
74  			for (int i = 0; i < subarr.length; i++) {
75  				double[] pnti = pnts[inds.get(i)];
76  				for (int dim = startindex; dim < startindex + ndims; dim++) {					
77  					subarr[i][dim-startindex] = pnti[dim];
78  				}
79  			}
80  			
81  			Matrix mat = new Matrix(subarr);
82  			Matrix mean = MatrixUtils.sumCols(mat).times(1./inds.size());
83  			Matrix var = MatrixUtils.sumCols(mat.arrayTimes(mat)).times(1./inds.size()).minus(mean.arrayTimes(mean));
84  			
85  			if(firstVariances == null){
86  				firstVariances = var;
87  				double[] variances = var.getArray()[0];
88  				if(max.evaluate(variances) == 0){
89  					return null; // special case, if the first variance is null we've been handed ALL the same, screw it
90  				}
91  			}
92  			
93  			Matrix propchange = var.arrayRightDivide(firstVariances);
94  			if(max.evaluate(propchange.getArray()[0]) < varprop){
95  				return null;
96  			}
97  			else{
98  				IntDoublePair maxDim = maxDim(MatrixUtils.abs(var).getArray()[0]);
99  				double[] col = mat.getMatrix(0, inds.size()-1, maxDim.first, maxDim.first).transpose().getArray()[0];
100 				double mid = detectionMode.detect(col);
101 				return IntDoublePair.pair(maxDim.first+startindex,mid);
102 			}
103 		}
104 
105 
106 		private IntDoublePair maxDim(double[] ds) {
107 			double maxv = -Double.MAX_VALUE;
108 			int maxi = -1;
109 			for (int i = 0; i < ds.length; i++) {
110 				if(maxv < ds[i]){
111 					maxi = i;
112 					maxv = ds[i];
113 				}
114 			}
115 			return IntDoublePair.pair(maxi, maxv);
116 		}
117 		
118 	}
119 	
120 	/**
121 	 * calls: {@link #DoubleKDTreeClusterer()} with 0.01
122 	 */
123 	public DoubleKDTreeClusterer() {
124 		
125 	}
126 	
127 	/**
128 	 * @param varprop the proportion of variance change from the root variance before splitting stops
129 	 * @param startindex the index to start from
130 	 * @param ndims the number of dimensions to split on
131 	 */
132 	public DoubleKDTreeClusterer(double varprop, int startindex, int ndims) {
133 		this.varprop = varprop;
134 		this.startindex = startindex;
135 		this.ndims = ndims;
136 	}
137 	
138 	/**
139 	 * @param detectionMode The {@link SplitDetectionMode} given the feature of highest variance
140 	 * @param varprop The minimum proportional variance (compared to the first variance)
141 	 * @param startindex the feature start index
142 	 * @param ndims the number of features to use
143 	 * 
144 	 */
145 	public DoubleKDTreeClusterer(SplitDetectionMode detectionMode, double varprop, int startindex, int ndims) {
146 		this.detectionMode = detectionMode;
147 		this.varprop = varprop;
148 		this.startindex = startindex;
149 		this.ndims = ndims;
150 	}
151 	
152 	/**
153 	 * @param varprop the proportion of variance change from the root variance before splitting stops
154 	 */
155 	public DoubleKDTreeClusterer(double varprop) {
156 		this.varprop = varprop;
157 	}
158 
159 	@Override
160 	public int[][] performClustering(double[][] data) {
161 		return cluster(data).clusters();
162 	}
163 
164 	@Override
165 	public KDTreeClusters cluster(double[][] data) {
166 		if(ndims == -1)
167 		{
168 			this.startindex = 0;
169 			ndims = data[0].length;
170 		}
171 		
172 		DoubleKDTree tree = new DoubleKDTree(data, new CappedVarianceSplitChooser());
173 		return new KDTreeClusters(tree, ndims);
174 	}
175 
176 	@Override
177 	public KDTreeClusters cluster(DataSource<double[]> data) {
178 		double[][] arrdata = new double[data.size()][];
179 		for (int i = 0; i < arrdata.length; i++) {
180 			arrdata[i] = data.getData(i);
181 		}
182 		return cluster(arrdata);
183 	}
184 
185 }