001/*
002        AUTOMATICALLY GENERATED BY jTemp FROM
003        /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/random/RandomSet#T#Clusterer.jtemp
004*/
005/**
006 * Copyright (c) 2011, The University of Southampton and the individual contributors.
007 * All rights reserved.
008 *
009 * Redistribution and use in source and binary forms, with or without modification,
010 * are permitted provided that the following conditions are met:
011 *
012 *   *  Redistributions of source code must retain the above copyright notice,
013 *      this list of conditions and the following disclaimer.
014 *
015 *   *  Redistributions in binary form must reproduce the above copyright notice,
016 *      this list of conditions and the following disclaimer in the documentation
017 *      and/or other materials provided with the distribution.
018 *
019 *   *  Neither the name of the University of Southampton nor the names of its
020 *      contributors may be used to endorse or promote products derived from this
021 *      software without specific prior written permission.
022 *
023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
033 */
034package org.openimaj.ml.clustering.random;
035
036import java.util.Arrays;
037import java.util.Random;
038
039import org.openimaj.data.DataSource;
040import org.openimaj.data.RandomData;
041import org.openimaj.ml.clustering.FloatCentroidsResult;
042
043/**
044 * A similar strategy to {@link RandomSetFloatClusterer} however it is 
045 * guaranteed that the same training vector will not be sampled more than once. 
046 * 
047 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
048 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
049 */
050public class RandomSetFloatClusterer extends RandomFloatClusterer {
051        /**
052         * Creates a new random byte cluster used to create K centroids with data containing M elements.
053         *
054         * @param M number of elements in each data vector 
055         */
056        public RandomSetFloatClusterer(int M) {
057                super(M);
058        }
059        
060        /**
061         * Creates a new random byte cluster used to create centroids with data containing M elements. The 
062         * number of clusters will be equal to the number of data points provided in training.
063         *
064         * @param M number of elements in each data vector
065         * @param K number of centroids to be created
066         */
067        public RandomSetFloatClusterer(int M, int K) {
068                super(M, K);
069        }
070        
071        /**
072         * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided
073         * data points will be selected. It is guaranteed that the same data point will not be selected
074         * many times, though this is not the case if two seperate entries provided are identical.
075         * 
076         * @params data source of centroids
077         * @return the selected centroids
078         */
079    @Override
080        public FloatCentroidsResult cluster(float[][] data) {
081                FloatCentroidsResult result = new FloatCentroidsResult();
082                
083                if (K == -1) {
084                        result.centroids = data;
085                } else {
086                        if (data.length < K) {
087                                throw new IllegalArgumentException("Not enough data");
088                        }
089                        
090                        result.centroids = new float[K][];
091                        
092                        int[] indices;
093                        if(this.seed >= 0)
094                                indices = RandomData.getUniqueRandomInts(this.K, 0, data.length, new Random(this.seed));
095                        else
096                                indices = RandomData.getUniqueRandomInts(this.K, 0, data.length);
097                        
098                        for (int i = 0 ; i < indices.length; i++) {
099                                int dIndex = indices[i];
100                                
101                                result.centroids[i] = Arrays.copyOf(data[dIndex ], data[dIndex ].length);
102                        }
103                }
104                
105                return result;
106        }
107        
108        /**
109         * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. 
110         * If K is -1 all provided data points will be selected. It is guaranteed that the same data 
111         * point will not be selected many times, though this is not the case if two seperate entries 
112         * provided are identical.
113         * 
114         * @params data a data source object
115         * @return the selected centroids
116         */
117        @Override
118        public FloatCentroidsResult cluster(DataSource<float[]> data) {
119                FloatCentroidsResult result = new FloatCentroidsResult();
120                
121                if(K == -1) {
122                        final int nc = data.size();
123                        result.centroids = new float[nc][data.numDimensions()];
124                } else {
125                        result.centroids = new float[K][data.numDimensions()];
126                }
127                
128                data.getRandomRows(result.centroids);
129                
130                return result;
131        }
132}