001/*
002        AUTOMATICALLY GENERATED BY jTemp FROM
003        /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/random/Random#T#Clusterer.jtemp
004*/
005/**
006 * Copyright (c) 2011, The University of Southampton and the individual contributors.
007 * All rights reserved.
008 *
009 * Redistribution and use in source and binary forms, with or without modification,
010 * are permitted provided that the following conditions are met:
011 *
012 *   *  Redistributions of source code must retain the above copyright notice,
013 *      this list of conditions and the following disclaimer.
014 *
015 *   *  Redistributions in binary form must reproduce the above copyright notice,
016 *      this list of conditions and the following disclaimer in the documentation
017 *      and/or other materials provided with the distribution.
018 *
019 *   *  Neither the name of the University of Southampton nor the names of its
020 *      contributors may be used to endorse or promote products derived from this
021 *      software without specific prior written permission.
022 *
023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
033 */
034
035package org.openimaj.ml.clustering.random;
036
037import java.util.Arrays;
038import java.util.Random;
039
040import org.openimaj.data.DataSource;
041import org.openimaj.ml.clustering.IntCentroidsResult;
042import org.openimaj.ml.clustering.IndexClusters;
043import org.openimaj.ml.clustering.SpatialClusterer;
044
045/**
046 * A simple (yet apparently quite effective in high dimensions) 
047 * clustering technique trained used randomly sampled data points. A 
048 * configurable number of data points are selected from the training data to
049 * represent the centroids.
050 *
051 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
052 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
053 *
054 */
055public class RandomIntClusterer implements SpatialClusterer<IntCentroidsResult, int[]> {
056        protected int M;
057        protected int K;
058        protected Random random;
059        protected long seed;
060        
061        /**
062         * Creates a new random byte cluster used to create K centroids with data containing M elements.
063         * 
064         * @param M number of elements in each data vector
065         * @param K number of centroids to be created
066         */
067        public RandomIntClusterer(int M, int K) {
068                this.M = M;
069                this.K = K;
070                this.random = new Random();
071                this.seed = -1;
072        }
073        
074        /**
075         * Creates a new random byte cluster used to create centroids with data containing M elements. The 
076         * number of clusters will be equal to the number of data points provided in training.
077         *
078         * @param M number of elements in each data vector
079         */
080        public RandomIntClusterer(int M) {
081                this(M, -1);
082        }
083                
084        /**
085         * @param seed the seed for the {@link Random} object used to select random data points.
086         */
087        public void setSeed(long seed) {
088                this.random = new Random();
089                random.setSeed(seed);
090                this.seed = seed;
091        }
092        
093        /***
094         * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided
095         * data points will be selected. It is not guaranteed that the same data point will not be selected
096         * many times.
097         * 
098         * @params data source of centroids
099         * @return the selected centroids
100         */
101        @Override
102        public IntCentroidsResult cluster(int[][] data) {
103                int nc = this.K;
104                
105                if (nc == -1) {
106                        nc = data.length;
107                }
108                
109                IntCentroidsResult result = new IntCentroidsResult();
110                result.centroids = new int[nc][];
111                
112                for (int i = 0; i < nc; i++) {
113                        int dIndex = this.random.nextInt(data.length);
114                        
115                        result.centroids[i] = Arrays.copyOf(data[dIndex], data[dIndex].length);
116                }
117                
118                return result;
119        }
120        
121        @Override
122        public int[][] performClustering(int[][] data) {
123                IntCentroidsResult res = this.cluster(data);
124                return new IndexClusters(res.defaultHardAssigner().assign(data)).clusters();
125        }
126        
127        /**
128         * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. 
129         * If K is -1 all provided data points will be selected. It is not guaranteed that the same data 
130         * point will not be selected many times.
131         * 
132         * @params data a data source object
133         * @return the selected centroids
134         */
135        @Override
136        public IntCentroidsResult cluster(DataSource<int[]> data) {
137                int nc = this.K;
138                
139                if (nc == -1) {
140                        nc = data.size();
141                }
142                
143                IntCentroidsResult result = new IntCentroidsResult();
144                result.centroids = new int[nc][M];
145                int[][] dataRow = new int[1][];
146                
147                for (int i = 0; i < nc; i++) {
148                        int dIndex = this.random.nextInt(data.size());
149                        dataRow[0] = result.centroids[i];
150                        
151                        data.getData(dIndex, dIndex+1, dataRow);
152                }
153                
154                return result;
155        }
156}