001/* 002 AUTOMATICALLY GENERATED BY jTemp FROM 003 /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/random/Random#T#Clusterer.jtemp 004*/ 005/** 006 * Copyright (c) 2011, The University of Southampton and the individual contributors. 007 * All rights reserved. 008 * 009 * Redistribution and use in source and binary forms, with or without modification, 010 * are permitted provided that the following conditions are met: 011 * 012 * * Redistributions of source code must retain the above copyright notice, 013 * this list of conditions and the following disclaimer. 014 * 015 * * Redistributions in binary form must reproduce the above copyright notice, 016 * this list of conditions and the following disclaimer in the documentation 017 * and/or other materials provided with the distribution. 018 * 019 * * Neither the name of the University of Southampton nor the names of its 020 * contributors may be used to endorse or promote products derived from this 021 * software without specific prior written permission. 022 * 023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 033 */ 034 035package org.openimaj.ml.clustering.random; 036 037import java.util.Arrays; 038import java.util.Random; 039 040import org.openimaj.data.DataSource; 041import org.openimaj.ml.clustering.LongCentroidsResult; 042import org.openimaj.ml.clustering.IndexClusters; 043import org.openimaj.ml.clustering.SpatialClusterer; 044 045/** 046 * A simple (yet apparently quite effective in high dimensions) 047 * clustering technique trained used randomly sampled data points. A 048 * configurable number of data points are selected from the training data to 049 * represent the centroids. 050 * 051 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 052 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 053 * 054 */ 055public class RandomLongClusterer implements SpatialClusterer<LongCentroidsResult, long[]> { 056 protected int M; 057 protected int K; 058 protected Random random; 059 protected long seed; 060 061 /** 062 * Creates a new random byte cluster used to create K centroids with data containing M elements. 063 * 064 * @param M number of elements in each data vector 065 * @param K number of centroids to be created 066 */ 067 public RandomLongClusterer(int M, int K) { 068 this.M = M; 069 this.K = K; 070 this.random = new Random(); 071 this.seed = -1; 072 } 073 074 /** 075 * Creates a new random byte cluster used to create centroids with data containing M elements. The 076 * number of clusters will be equal to the number of data points provided in training. 077 * 078 * @param M number of elements in each data vector 079 */ 080 public RandomLongClusterer(int M) { 081 this(M, -1); 082 } 083 084 /** 085 * @param seed the seed for the {@link Random} object used to select random data points. 086 */ 087 public void setSeed(long seed) { 088 this.random = new Random(); 089 random.setSeed(seed); 090 this.seed = seed; 091 } 092 093 /*** 094 * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided 095 * data points will be selected. It is not guaranteed that the same data point will not be selected 096 * many times. 097 * 098 * @params data source of centroids 099 * @return the selected centroids 100 */ 101 @Override 102 public LongCentroidsResult cluster(long[][] data) { 103 int nc = this.K; 104 105 if (nc == -1) { 106 nc = data.length; 107 } 108 109 LongCentroidsResult result = new LongCentroidsResult(); 110 result.centroids = new long[nc][]; 111 112 for (int i = 0; i < nc; i++) { 113 int dIndex = this.random.nextInt(data.length); 114 115 result.centroids[i] = Arrays.copyOf(data[dIndex], data[dIndex].length); 116 } 117 118 return result; 119 } 120 121 @Override 122 public int[][] performClustering(long[][] data) { 123 LongCentroidsResult res = this.cluster(data); 124 return new IndexClusters(res.defaultHardAssigner().assign(data)).clusters(); 125 } 126 127 /** 128 * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. 129 * If K is -1 all provided data points will be selected. It is not guaranteed that the same data 130 * point will not be selected many times. 131 * 132 * @params data a data source object 133 * @return the selected centroids 134 */ 135 @Override 136 public LongCentroidsResult cluster(DataSource<long[]> data) { 137 int nc = this.K; 138 139 if (nc == -1) { 140 nc = data.size(); 141 } 142 143 LongCentroidsResult result = new LongCentroidsResult(); 144 result.centroids = new long[nc][M]; 145 long[][] dataRow = new long[1][]; 146 147 for (int i = 0; i < nc; i++) { 148 int dIndex = this.random.nextInt(data.size()); 149 dataRow[0] = result.centroids[i]; 150 151 data.getData(dIndex, dIndex+1, dataRow); 152 } 153 154 return result; 155 } 156}