View Javadoc

1   /*
2   	AUTOMATICALLY GENERATED BY jTemp FROM
3   	/Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/random/Random#T#Clusterer.jtemp
4   */
5   /**
6    * Copyright (c) 2011, The University of Southampton and the individual contributors.
7    * All rights reserved.
8    *
9    * Redistribution and use in source and binary forms, with or without modification,
10   * are permitted provided that the following conditions are met:
11   *
12   *   * 	Redistributions of source code must retain the above copyright notice,
13   * 	this list of conditions and the following disclaimer.
14   *
15   *   *	Redistributions in binary form must reproduce the above copyright notice,
16   * 	this list of conditions and the following disclaimer in the documentation
17   * 	and/or other materials provided with the distribution.
18   *
19   *   *	Neither the name of the University of Southampton nor the names of its
20   * 	contributors may be used to endorse or promote products derived from this
21   * 	software without specific prior written permission.
22   *
23   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
24   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
27   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
30   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33   */
34  
35  package org.openimaj.ml.clustering.random;
36  
37  import java.util.Arrays;
38  import java.util.Random;
39  
40  import org.openimaj.data.DataSource;
41  import org.openimaj.ml.clustering.ShortCentroidsResult;
42  import org.openimaj.ml.clustering.IndexClusters;
43  import org.openimaj.ml.clustering.SpatialClusterer;
44  
45  /**
46   * A simple (yet apparently quite effective in high dimensions) 
47   * clustering technique trained used randomly sampled data points. A 
48   * configurable number of data points are selected from the training data to
49   * represent the centroids.
50   *
51   * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
52   * @author Sina Samangooei (ss@ecs.soton.ac.uk)
53   *
54   */
55  public class RandomShortClusterer implements SpatialClusterer<ShortCentroidsResult, short[]> {
56  	protected int M;
57  	protected int K;
58  	protected Random random;
59  	protected long seed;
60  	
61  	/**
62  	 * Creates a new random byte cluster used to create K centroids with data containing M elements.
63  	 * 
64  	 * @param M number of elements in each data vector
65  	 * @param K number of centroids to be created
66  	 */
67  	public RandomShortClusterer(int M, int K) {
68  		this.M = M;
69  		this.K = K;
70  		this.random = new Random();
71  		this.seed = -1;
72  	}
73  	
74  	/**
75  	 * Creates a new random byte cluster used to create centroids with data containing M elements. The 
76  	 * number of clusters will be equal to the number of data points provided in training.
77  	 *
78  	 * @param M number of elements in each data vector
79  	 */
80  	public RandomShortClusterer(int M) {
81  		this(M, -1);
82  	}
83  		
84  	/**
85  	 * @param seed the seed for the {@link Random} object used to select random data points.
86  	 */
87  	public void setSeed(long seed) {
88  		this.random = new Random();
89  		random.setSeed(seed);
90  		this.seed = seed;
91  	}
92  	
93  	/***
94  	 * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided
95  	 * data points will be selected. It is not guaranteed that the same data point will not be selected
96  	 * many times.
97  	 * 
98  	 * @params data source of centroids
99  	 * @return the selected centroids
100 	 */
101 	@Override
102 	public ShortCentroidsResult cluster(short[][] data) {
103 		int nc = this.K;
104 		
105 		if (nc == -1) {
106 			nc = data.length;
107 		}
108 		
109 		ShortCentroidsResult result = new ShortCentroidsResult();
110 		result.centroids = new short[nc][];
111 		
112 		for (int i = 0; i < nc; i++) {
113 			int dIndex = this.random.nextInt(data.length);
114 			
115 			result.centroids[i] = Arrays.copyOf(data[dIndex], data[dIndex].length);
116 		}
117 		
118 		return result;
119 	}
120 	
121 	@Override
122 	public int[][] performClustering(short[][] data) {
123 		ShortCentroidsResult res = this.cluster(data);
124 		return new IndexClusters(res.defaultHardAssigner().assign(data)).clusters();
125 	}
126 	
127 	/**
128 	 * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. 
129 	 * If K is -1 all provided data points will be selected. It is not guaranteed that the same data 
130 	 * point will not be selected many times.
131 	 * 
132 	 * @params data a data source object
133 	 * @return the selected centroids
134 	 */
135 	@Override
136 	public ShortCentroidsResult cluster(DataSource<short[]> data) {
137 		int nc = this.K;
138 		
139 		if (nc == -1) {
140 			nc = data.size();
141 		}
142 		
143 		ShortCentroidsResult result = new ShortCentroidsResult();
144 		result.centroids = new short[nc][M];
145 		short[][] dataRow = new short[1][];
146 		
147 		for (int i = 0; i < nc; i++) {
148 			int dIndex = this.random.nextInt(data.size());
149 			dataRow[0] = result.centroids[i];
150 			
151 			data.getData(dIndex, dIndex+1, dataRow);
152 		}
153 		
154 		return result;
155 	}
156 }