001/* 002 AUTOMATICALLY GENERATED BY jTemp FROM 003 /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/kmeans/Hierarchical#T#KMeans.jtemp 004*/ 005/** 006 * Copyright (c) 2011, The University of Southampton and the individual contributors. 007 * All rights reserved. 008 * 009 * Redistribution and use in source and binary forms, with or without modification, 010 * are permitted provided that the following conditions are met: 011 * 012 * * Redistributions of source code must retain the above copyright notice, 013 * this list of conditions and the following disclaimer. 014 * 015 * * Redistributions in binary form must reproduce the above copyright notice, 016 * this list of conditions and the following disclaimer in the documentation 017 * and/or other materials provided with the distribution. 018 * 019 * * Neither the name of the University of Southampton nor the names of its 020 * contributors may be used to endorse or promote products derived from this 021 * software without specific prior written permission. 022 * 023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 033 */ 034 package org.openimaj.ml.clustering.kmeans; 035 036import gnu.trove.list.array.TIntArrayList; 037import gnu.trove.map.hash.TIntObjectHashMap; 038 039import org.openimaj.citation.annotation.Reference; 040import org.openimaj.citation.annotation.ReferenceType; 041import org.openimaj.data.DataSource; 042import org.openimaj.data.IndexedViewDataSource; 043import org.openimaj.knn.LongNearestNeighbours; 044import org.openimaj.ml.clustering.IndexClusters; 045import org.openimaj.ml.clustering.SpatialClusterer; 046import org.openimaj.ml.clustering.assignment.HardAssigner; 047import org.openimaj.ml.clustering.kmeans.HierarchicalLongKMeansResult.Node; 048import org.openimaj.util.pair.IntDoublePair; 049 050/** 051 * Hierarchical Long K-Means clustering ({@link HierarchicalLongKMeans}) is a simple 052 * hierarchical version of LongKMeans. The algorithm recursively applies 053 * @{link LongKMeans} to create more refined partitions of the data. 054 * 055 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 056 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 057 */ 058@Reference( 059 type = ReferenceType.Inproceedings, 060 author = { "David. Nist\'er", "Henrik. Stew\'enius" }, 061 title = "Scalable Recognition with a Vocabulary Tree", 062 year = "2006", 063 booktitle = "CVPR", 064 pages = { "2161", "", "2168" }, 065 customData = { 066 "Date-Added", "2010-11-12 09:33:18 +0000", 067 "Date-Modified", "2010-11-22 15:11:22 +0000" 068 } 069) 070public class HierarchicalLongKMeans implements SpatialClusterer<HierarchicalLongKMeansResult, long[]> { 071 /** data dimensionality */ 072 int M; 073 074 /** K clusters per node */ 075 int K; 076 077 /** KMeans configuration */ 078 KMeansConfiguration<LongNearestNeighbours, long[]> conf; 079 080 /** Depth of the tree */ 081 int depth; 082 083 /** 084 * Construct a new {@link HierarchicalLongKMeans} with the given parameters. 085 * 086 * @param config configuration for the underlying kmeans clustering. 087 * @param M Data dimensionality. 088 * @param K Number of clusters per node. 089 * @param depth Tree depth. 090 */ 091 public HierarchicalLongKMeans(KMeansConfiguration<LongNearestNeighbours, long[]> config, int M, int K, int depth) { 092 this.conf = config; 093 this.M = M; 094 this.K = K; 095 this.depth = depth; 096 } 097 098 /** 099 * Construct a new {@link HierarchicalLongKMeans} with the given parameters. 100 * Uses the default parameters of the {@link KMeansConfiguration}. 101 * 102 * @param M Data dimensionality. 103 * @param K Number of clusters per node. 104 * @param depth Tree depth. 105 */ 106 public HierarchicalLongKMeans(int M, int K, int depth) { 107 this(new KMeansConfiguration<LongNearestNeighbours, long[]>(), M, K, depth); 108 } 109 110 /** 111 * Extract a subset of the data to a buffer 112 * 113 * @param data Data 114 * @param ids Data labels 115 * @param id Label of data to copy 116 * 117 * @return a new buffer with a copy of the selected data. 118 */ 119 private long[][] extractSubset(final long[][] data, int[] ids, int id) { 120 int N = data.length; 121 int M = data[0].length; 122 int count = 0; 123 124 // count how many data points with this label there are 125 for (int i = 0; i < N; i++) 126 if (ids[i] == id) 127 count++; 128 129 // copy each datum to the buffer 130 long[][] newData = new long[count][M]; 131 count = 0; 132 for (int i = 0; i < N; i++) { 133 if (ids[i] == id) { 134 System.arraycopy(data[i], 0, newData[count], 0, M); 135 count++; 136 } 137 } 138 return newData; 139 } 140 141 /** 142 * Compute HierarchicalLongKMeans clustering. 143 * 144 * @param data Data to cluster. 145 * @param K Number of clusters for this node. 146 * @param height Tree height. 147 * 148 * @return a new HierarchicalLongKMeans node representing a sub-clustering. 149 **/ 150 private Node trainLevel(final long[][] data, int K, int height) { 151 Node node = new Node(); 152 node.children = (height == 1) ? null : new Node[K]; 153 154 LongKMeans kmeans = newLongKMeans(K); 155 node.result = kmeans.cluster(data); 156 157 HardAssigner<long[], double[], IntDoublePair> assigner = node.result.defaultHardAssigner(); 158 159 if (height > 1) { 160 int[] ids = assigner.assign(data); 161 162 for (int k = 0; k < K; k++) { 163 long[][] partition = extractSubset(data, ids, k); 164 165 int partitionK = Math.min(K, partition.length); 166 167 node.children[k] = trainLevel(partition, partitionK, height - 1); 168 } 169 } 170 171 return node; 172 } 173 174 /** 175 * Compute HierarchicalLongKMeans clustering. 176 * 177 * @param data Data to cluster. 178 * @param K Number of clusters for this node. 179 * @param height Tree height. 180 * 181 * @return a new HierarchicalLongKMeans node representing a sub-clustering. 182 **/ 183 private Node trainLevel(final DataSource<long[]> data, int K, int height) { 184 Node node = new Node(); 185 node.children = (height == 1) ? null : new Node[K]; 186 187 LongKMeans kmeans = newLongKMeans(K); 188 node.result = kmeans.cluster(data); 189 190 HardAssigner<long[], double[], IntDoublePair> assigner = node.result.defaultHardAssigner(); 191 192 if (height > 1) { 193 final TIntObjectHashMap<TIntArrayList> assignments = new TIntObjectHashMap<TIntArrayList>(); 194 195 final long[][] tmp = new long[1][M]; 196 for (int i = 0; i < data.size(); i++) { 197 data.getData(i, i + 1, tmp); 198 final int asgn = assigner.assign(tmp[0]); 199 200 TIntArrayList ids = assignments.get(asgn); 201 if (ids == null) 202 assignments.put(asgn, ids = new TIntArrayList()); 203 ids.add(i); 204 } 205 206 for (int k = 0; k < K; k++) { 207 final int[] indexes = assignments.get(k).toArray(); 208 final DataSource<long[]> partition = new IndexedViewDataSource<long[]>(data, indexes); 209 210 final int partitionK = Math.min(K, partition.size()); 211 212 node.children[k] = trainLevel(partition, partitionK, height - 1); 213 } 214 } 215 216 return node; 217 } 218 219 @Override 220 public HierarchicalLongKMeansResult cluster(final long[][] data) { 221 HierarchicalLongKMeansResult result = new HierarchicalLongKMeansResult(); 222 223 result.K = K; 224 result.M = M; 225 result.depth = depth; 226 result.root = trainLevel(data, Math.min(K, data.length), depth); 227 228 return result; 229 } 230 231 @Override 232 public int[][] performClustering(long[][] data) { 233 HierarchicalLongKMeansResult clusters = this.cluster(data); 234 return new IndexClusters(clusters.defaultHardAssigner().assign(data)).clusters(); 235 } 236 237 @Override 238 public HierarchicalLongKMeansResult cluster(DataSource<long[]> data) { 239 HierarchicalLongKMeansResult result = new HierarchicalLongKMeansResult(); 240 241 result.K = K; 242 result.M = M; 243 result.depth = depth; 244 result.root = trainLevel(data, Math.min(K, data.size()), depth); 245 246 return result; 247 } 248 249 private LongKMeans newLongKMeans(int K) { 250 KMeansConfiguration<LongNearestNeighbours, long[]> newConf = conf.clone(); 251 newConf.setK(K); 252 return new LongKMeans(newConf); 253 } 254}