001/* 002 AUTOMATICALLY GENERATED BY jTemp FROM 003 /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/random/RandomSet#T#Clusterer.jtemp 004*/ 005/** 006 * Copyright (c) 2011, The University of Southampton and the individual contributors. 007 * All rights reserved. 008 * 009 * Redistribution and use in source and binary forms, with or without modification, 010 * are permitted provided that the following conditions are met: 011 * 012 * * Redistributions of source code must retain the above copyright notice, 013 * this list of conditions and the following disclaimer. 014 * 015 * * Redistributions in binary form must reproduce the above copyright notice, 016 * this list of conditions and the following disclaimer in the documentation 017 * and/or other materials provided with the distribution. 018 * 019 * * Neither the name of the University of Southampton nor the names of its 020 * contributors may be used to endorse or promote products derived from this 021 * software without specific prior written permission. 022 * 023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 033 */ 034package org.openimaj.ml.clustering.random; 035 036import java.util.Arrays; 037import java.util.Random; 038 039import org.openimaj.data.DataSource; 040import org.openimaj.data.RandomData; 041import org.openimaj.ml.clustering.LongCentroidsResult; 042 043/** 044 * A similar strategy to {@link RandomSetLongClusterer} however it is 045 * guaranteed that the same training vector will not be sampled more than once. 046 * 047 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 048 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 049 */ 050public class RandomSetLongClusterer extends RandomLongClusterer { 051 /** 052 * Creates a new random byte cluster used to create K centroids with data containing M elements. 053 * 054 * @param M number of elements in each data vector 055 */ 056 public RandomSetLongClusterer(int M) { 057 super(M); 058 } 059 060 /** 061 * Creates a new random byte cluster used to create centroids with data containing M elements. The 062 * number of clusters will be equal to the number of data points provided in training. 063 * 064 * @param M number of elements in each data vector 065 * @param K number of centroids to be created 066 */ 067 public RandomSetLongClusterer(int M, int K) { 068 super(M, K); 069 } 070 071 /** 072 * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided 073 * data points will be selected. It is guaranteed that the same data point will not be selected 074 * many times, though this is not the case if two seperate entries provided are identical. 075 * 076 * @params data source of centroids 077 * @return the selected centroids 078 */ 079 @Override 080 public LongCentroidsResult cluster(long[][] data) { 081 LongCentroidsResult result = new LongCentroidsResult(); 082 083 if (K == -1) { 084 result.centroids = data; 085 } else { 086 if (data.length < K) { 087 throw new IllegalArgumentException("Not enough data"); 088 } 089 090 result.centroids = new long[K][]; 091 092 int[] indices; 093 if(this.seed >= 0) 094 indices = RandomData.getUniqueRandomInts(this.K, 0, data.length, new Random(this.seed)); 095 else 096 indices = RandomData.getUniqueRandomInts(this.K, 0, data.length); 097 098 for (int i = 0 ; i < indices.length; i++) { 099 int dIndex = indices[i]; 100 101 result.centroids[i] = Arrays.copyOf(data[dIndex ], data[dIndex ].length); 102 } 103 } 104 105 return result; 106 } 107 108 /** 109 * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. 110 * If K is -1 all provided data points will be selected. It is guaranteed that the same data 111 * point will not be selected many times, though this is not the case if two seperate entries 112 * provided are identical. 113 * 114 * @params data a data source object 115 * @return the selected centroids 116 */ 117 @Override 118 public LongCentroidsResult cluster(DataSource<long[]> data) { 119 LongCentroidsResult result = new LongCentroidsResult(); 120 121 if(K == -1) { 122 final int nc = data.size(); 123 result.centroids = new long[nc][data.numDimensions()]; 124 } else { 125 result.centroids = new long[K][data.numDimensions()]; 126 } 127 128 data.getRandomRows(result.centroids); 129 130 return result; 131 } 132}