1 /*
2 AUTOMATICALLY GENERATED BY jTemp FROM
3 /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/random/Random#T#Clusterer.jtemp
4 */
5 /**
6 * Copyright (c) 2011, The University of Southampton and the individual contributors.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without modification,
10 * are permitted provided that the following conditions are met:
11 *
12 * * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * * Neither the name of the University of Southampton nor the names of its
20 * contributors may be used to endorse or promote products derived from this
21 * software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
27 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
30 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 package org.openimaj.ml.clustering.random;
36
37 import java.util.Arrays;
38 import java.util.Random;
39
40 import org.openimaj.data.DataSource;
41 import org.openimaj.ml.clustering.ShortCentroidsResult;
42 import org.openimaj.ml.clustering.IndexClusters;
43 import org.openimaj.ml.clustering.SpatialClusterer;
44
45 /**
46 * A simple (yet apparently quite effective in high dimensions)
47 * clustering technique trained used randomly sampled data points. A
48 * configurable number of data points are selected from the training data to
49 * represent the centroids.
50 *
51 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
52 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
53 *
54 */
55 public class RandomShortClusterer implements SpatialClusterer<ShortCentroidsResult, short[]> {
56 protected int M;
57 protected int K;
58 protected Random random;
59 protected long seed;
60
61 /**
62 * Creates a new random byte cluster used to create K centroids with data containing M elements.
63 *
64 * @param M number of elements in each data vector
65 * @param K number of centroids to be created
66 */
67 public RandomShortClusterer(int M, int K) {
68 this.M = M;
69 this.K = K;
70 this.random = new Random();
71 this.seed = -1;
72 }
73
74 /**
75 * Creates a new random byte cluster used to create centroids with data containing M elements. The
76 * number of clusters will be equal to the number of data points provided in training.
77 *
78 * @param M number of elements in each data vector
79 */
80 public RandomShortClusterer(int M) {
81 this(M, -1);
82 }
83
84 /**
85 * @param seed the seed for the {@link Random} object used to select random data points.
86 */
87 public void setSeed(long seed) {
88 this.random = new Random();
89 random.setSeed(seed);
90 this.seed = seed;
91 }
92
93 /***
94 * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided
95 * data points will be selected. It is not guaranteed that the same data point will not be selected
96 * many times.
97 *
98 * @params data source of centroids
99 * @return the selected centroids
100 */
101 @Override
102 public ShortCentroidsResult cluster(short[][] data) {
103 int nc = this.K;
104
105 if (nc == -1) {
106 nc = data.length;
107 }
108
109 ShortCentroidsResult result = new ShortCentroidsResult();
110 result.centroids = new short[nc][];
111
112 for (int i = 0; i < nc; i++) {
113 int dIndex = this.random.nextInt(data.length);
114
115 result.centroids[i] = Arrays.copyOf(data[dIndex], data[dIndex].length);
116 }
117
118 return result;
119 }
120
121 @Override
122 public int[][] performClustering(short[][] data) {
123 ShortCentroidsResult res = this.cluster(data);
124 return new IndexClusters(res.defaultHardAssigner().assign(data)).clusters();
125 }
126
127 /**
128 * Selects K elements from the provided {@link DataSource} as the centroids of the clusters.
129 * If K is -1 all provided data points will be selected. It is not guaranteed that the same data
130 * point will not be selected many times.
131 *
132 * @params data a data source object
133 * @return the selected centroids
134 */
135 @Override
136 public ShortCentroidsResult cluster(DataSource<short[]> data) {
137 int nc = this.K;
138
139 if (nc == -1) {
140 nc = data.size();
141 }
142
143 ShortCentroidsResult result = new ShortCentroidsResult();
144 result.centroids = new short[nc][M];
145 short[][] dataRow = new short[1][];
146
147 for (int i = 0; i < nc; i++) {
148 int dIndex = this.random.nextInt(data.size());
149 dataRow[0] = result.centroids[i];
150
151 data.getData(dIndex, dIndex+1, dataRow);
152 }
153
154 return result;
155 }
156 }