1 /*
2 AUTOMATICALLY GENERATED BY jTemp FROM
3 /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/knn/pq/#T#ProductQuantiserUtilities.jtemp
4 */
5 /**
6 * Copyright (c) 2011, The University of Southampton and the individual contributors.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without modification,
10 * are permitted provided that the following conditions are met:
11 *
12 * * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * * Neither the name of the University of Southampton nor the names of its
20 * contributors may be used to endorse or promote products derived from this
21 * software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
27 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
30 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34 package org.openimaj.knn.pq;
35
36 import org.openimaj.knn.ByteNearestNeighboursExact;
37 import org.openimaj.knn.ByteNearestNeighboursProvider;
38 import org.openimaj.ml.clustering.kmeans.ByteKMeans;
39
40 /**
41 * Utility methods for easily creating a {@link ByteProductQuantiser} using
42 * (Exact) K-Means.
43 *
44 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
45 *
46 */
47 public final class ByteProductQuantiserUtilities {
48 private ByteProductQuantiserUtilities() {
49 }
50
51 /**
52 * Learn a {@link ByteProductQuantiser} by applying exact K-Means to
53 * sub-vectors extracted from the given data. The length of the subvectors
54 * is determined by dividing the vector length by the requested number of
55 * assigners. The number of clusters per vector subset is constant, and must
56 * be less than 256.
57 *
58 * @param data
59 * the data to train the {@link ByteProductQuantiser} on.
60 * @param numAssigners
61 * the number of sub-quantisers to learn
62 * @param K
63 * the number of centroids per sub-quantiser
64 * @param nIter
65 * the maximum number of iterations for each k-means clustering
66 *
67 * @return a trained {@link ByteProductQuantiser}.
68 */
69 public static ByteProductQuantiser train(byte[][] data, int numAssigners, int K, int nIter) {
70 if (K > 256 || K <= 0)
71 throw new IllegalArgumentException("0 <= K < 256");
72
73 final int subDim = data[0].length / numAssigners;
74 final byte[][] tmp = new byte[data.length][subDim];
75
76 final ByteNearestNeighboursExact[] assigners = new ByteNearestNeighboursExact[numAssigners];
77 final ByteKMeans kmeans = ByteKMeans.createExact(K, 100);
78
79 for (int i = 0; i < numAssigners; i++) {
80 // copy data
81 for (int j = 0; j < data.length; j++) {
82 System.arraycopy(data[j], i * subDim, tmp[j], 0, subDim);
83 }
84
85 // kmeans
86 final ByteNearestNeighboursProvider centroids = (ByteNearestNeighboursProvider) kmeans.cluster(tmp);
87
88 assigners[i] = (ByteNearestNeighboursExact)centroids.getNearestNeighbours();
89 }
90
91 return new ByteProductQuantiser(assigners);
92 }
93
94 /**
95 * Learn a {@link ByteProductQuantiser} by applying exact K-Means to
96 * sub-vectors extracted from the given data. The length of the subvectors
97 * is determined by dividing the vector length by the requested number of
98 * assigners. The number of clusters per vector subset is constant, and set
99 * at 256.
100 *
101 * @param data
102 * the data to train the {@link ByteProductQuantiser} on.
103 * @param numAssigners
104 * the number of sub-quantisers to learn
105 * @param nIter
106 * the maximum number of iterations for each k-means clustering
107 *
108 * @return a trained {@link ByteProductQuantiser}.
109 */
110 public static ByteProductQuantiser train(byte[][] data, int numAssigners, int nIter) {
111 return train(data, numAssigners, 256, nIter);
112 }
113 }