1 /* 2 AUTOMATICALLY GENERATED BY jTemp FROM 3 /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/random/Random#T#Clusterer.jtemp 4 */ 5 /** 6 * Copyright (c) 2011, The University of Southampton and the individual contributors. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without modification, 10 * are permitted provided that the following conditions are met: 11 * 12 * * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * * Neither the name of the University of Southampton nor the names of its 20 * contributors may be used to endorse or promote products derived from this 21 * software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 27 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 30 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 package org.openimaj.ml.clustering.random; 36 37 import java.util.Arrays; 38 import java.util.Random; 39 40 import org.openimaj.data.DataSource; 41 import org.openimaj.ml.clustering.ShortCentroidsResult; 42 import org.openimaj.ml.clustering.IndexClusters; 43 import org.openimaj.ml.clustering.SpatialClusterer; 44 45 /** 46 * A simple (yet apparently quite effective in high dimensions) 47 * clustering technique trained used randomly sampled data points. A 48 * configurable number of data points are selected from the training data to 49 * represent the centroids. 50 * 51 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 52 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 53 * 54 */ 55 public class RandomShortClusterer implements SpatialClusterer<ShortCentroidsResult, short[]> { 56 protected int M; 57 protected int K; 58 protected Random random; 59 protected long seed; 60 61 /** 62 * Creates a new random byte cluster used to create K centroids with data containing M elements. 63 * 64 * @param M number of elements in each data vector 65 * @param K number of centroids to be created 66 */ 67 public RandomShortClusterer(int M, int K) { 68 this.M = M; 69 this.K = K; 70 this.random = new Random(); 71 this.seed = -1; 72 } 73 74 /** 75 * Creates a new random byte cluster used to create centroids with data containing M elements. The 76 * number of clusters will be equal to the number of data points provided in training. 77 * 78 * @param M number of elements in each data vector 79 */ 80 public RandomShortClusterer(int M) { 81 this(M, -1); 82 } 83 84 /** 85 * @param seed the seed for the {@link Random} object used to select random data points. 86 */ 87 public void setSeed(long seed) { 88 this.random = new Random(); 89 random.setSeed(seed); 90 this.seed = seed; 91 } 92 93 /*** 94 * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided 95 * data points will be selected. It is not guaranteed that the same data point will not be selected 96 * many times. 97 * 98 * @params data source of centroids 99 * @return the selected centroids 100 */ 101 @Override 102 public ShortCentroidsResult cluster(short[][] data) { 103 int nc = this.K; 104 105 if (nc == -1) { 106 nc = data.length; 107 } 108 109 ShortCentroidsResult result = new ShortCentroidsResult(); 110 result.centroids = new short[nc][]; 111 112 for (int i = 0; i < nc; i++) { 113 int dIndex = this.random.nextInt(data.length); 114 115 result.centroids[i] = Arrays.copyOf(data[dIndex], data[dIndex].length); 116 } 117 118 return result; 119 } 120 121 @Override 122 public int[][] performClustering(short[][] data) { 123 ShortCentroidsResult res = this.cluster(data); 124 return new IndexClusters(res.defaultHardAssigner().assign(data)).clusters(); 125 } 126 127 /** 128 * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. 129 * If K is -1 all provided data points will be selected. It is not guaranteed that the same data 130 * point will not be selected many times. 131 * 132 * @params data a data source object 133 * @return the selected centroids 134 */ 135 @Override 136 public ShortCentroidsResult cluster(DataSource<short[]> data) { 137 int nc = this.K; 138 139 if (nc == -1) { 140 nc = data.size(); 141 } 142 143 ShortCentroidsResult result = new ShortCentroidsResult(); 144 result.centroids = new short[nc][M]; 145 short[][] dataRow = new short[1][]; 146 147 for (int i = 0; i < nc; i++) { 148 int dIndex = this.random.nextInt(data.size()); 149 dataRow[0] = result.centroids[i]; 150 151 data.getData(dIndex, dIndex+1, dataRow); 152 } 153 154 return result; 155 } 156 }