001/* 002 AUTOMATICALLY GENERATED BY jTemp FROM 003 /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/knn/pq/#T#ProductQuantiserUtilities.jtemp 004*/ 005/** 006 * Copyright (c) 2011, The University of Southampton and the individual contributors. 007 * All rights reserved. 008 * 009 * Redistribution and use in source and binary forms, with or without modification, 010 * are permitted provided that the following conditions are met: 011 * 012 * * Redistributions of source code must retain the above copyright notice, 013 * this list of conditions and the following disclaimer. 014 * 015 * * Redistributions in binary form must reproduce the above copyright notice, 016 * this list of conditions and the following disclaimer in the documentation 017 * and/or other materials provided with the distribution. 018 * 019 * * Neither the name of the University of Southampton nor the names of its 020 * contributors may be used to endorse or promote products derived from this 021 * software without specific prior written permission. 022 * 023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 033 */ 034package org.openimaj.knn.pq; 035 036import org.openimaj.knn.DoubleNearestNeighboursExact; 037import org.openimaj.knn.DoubleNearestNeighboursProvider; 038import org.openimaj.ml.clustering.kmeans.DoubleKMeans; 039 040/** 041 * Utility methods for easily creating a {@link ByteProductQuantiser} using 042 * (Exact) K-Means. 043 * 044 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 045 * 046 */ 047public final class DoubleProductQuantiserUtilities { 048 private DoubleProductQuantiserUtilities() { 049 } 050 051 /** 052 * Learn a {@link DoubleProductQuantiser} by applying exact K-Means to 053 * sub-vectors extracted from the given data. The length of the subvectors 054 * is determined by dividing the vector length by the requested number of 055 * assigners. The number of clusters per vector subset is constant, and must 056 * be less than 256. 057 * 058 * @param data 059 * the data to train the {@link DoubleProductQuantiser} on. 060 * @param numAssigners 061 * the number of sub-quantisers to learn 062 * @param K 063 * the number of centroids per sub-quantiser 064 * @param nIter 065 * the maximum number of iterations for each k-means clustering 066 * 067 * @return a trained {@link DoubleProductQuantiser}. 068 */ 069 public static DoubleProductQuantiser train(double[][] data, int numAssigners, int K, int nIter) { 070 if (K > 256 || K <= 0) 071 throw new IllegalArgumentException("0 <= K < 256"); 072 073 final int subDim = data[0].length / numAssigners; 074 final double[][] tmp = new double[data.length][subDim]; 075 076 final DoubleNearestNeighboursExact[] assigners = new DoubleNearestNeighboursExact[numAssigners]; 077 final DoubleKMeans kmeans = DoubleKMeans.createExact(K, 100); 078 079 for (int i = 0; i < numAssigners; i++) { 080 // copy data 081 for (int j = 0; j < data.length; j++) { 082 System.arraycopy(data[j], i * subDim, tmp[j], 0, subDim); 083 } 084 085 // kmeans 086 final DoubleNearestNeighboursProvider centroids = (DoubleNearestNeighboursProvider) kmeans.cluster(tmp); 087 088 assigners[i] = (DoubleNearestNeighboursExact)centroids.getNearestNeighbours(); 089 } 090 091 return new DoubleProductQuantiser(assigners); 092 } 093 094 /** 095 * Learn a {@link DoubleProductQuantiser} by applying exact K-Means to 096 * sub-vectors extracted from the given data. The length of the subvectors 097 * is determined by dividing the vector length by the requested number of 098 * assigners. The number of clusters per vector subset is constant, and set 099 * at 256. 100 * 101 * @param data 102 * the data to train the {@link DoubleProductQuantiser} on. 103 * @param numAssigners 104 * the number of sub-quantisers to learn 105 * @param nIter 106 * the maximum number of iterations for each k-means clustering 107 * 108 * @return a trained {@link DoubleProductQuantiser}. 109 */ 110 public static DoubleProductQuantiser train(double[][] data, int numAssigners, int nIter) { 111 return train(data, numAssigners, 256, nIter); 112 } 113}