001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.examples.ml.clustering.kmeans;
031
032import java.util.Arrays;
033
034import org.openimaj.data.RandomData;
035import org.openimaj.ml.clustering.assignment.hard.HierarchicalByteHardAssigner;
036import org.openimaj.ml.clustering.kmeans.HierarchicalByteKMeans;
037import org.openimaj.ml.clustering.kmeans.HierarchicalByteKMeansResult;
038import org.openimaj.ml.clustering.kmeans.HierarchicalByteKMeansResult.Node;
039
040/**
041 * Example showing how to use the Hierarchical KMeans clustering algorithm with
042 * in-memory data. The example sets up the clustering algorithm, generates some
043 * uniform random data to cluster, and runs the clustering algorithm. The
044 * resultant clusters are printed, and the example also shows how to perform
045 * cluster assignment (i.e. vector quantisation) in which vectors are assigned
046 * to their closest cluster.
047 * 
048 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
049 */
050public class HierarchicalKMeansExample {
051        /**
052         * Main method for the example.
053         * 
054         * @param args
055         *            Ignored.
056         */
057        public static void main(String[] args) {
058                // Set up the variables needed to define the clustering operation
059                final int dimensionality = 2;
060                final int numItems = 10000;
061                final int clustersPerNode = 4;
062                final int depth = 2;
063
064                // Create the clusterer; there are specific types for all kinds of data
065                // (we're using byte data here). There is also a constructor that allows
066                // you to set the parameters of the underlying standard k-means
067                // implementations.
068                final HierarchicalByteKMeans kmeans = new HierarchicalByteKMeans(dimensionality, clustersPerNode, depth);
069
070                // Generate some random data to cluster
071                final byte[][] data = RandomData.getRandomByteArray(numItems, dimensionality, Byte.MIN_VALUE, Byte.MAX_VALUE);
072
073                // Perform the clustering
074                final HierarchicalByteKMeansResult result = kmeans.cluster(data);
075
076                // Print the generated hierarchy
077                printNode(result.getRoot(), 0);
078
079                // Get an assigner to assign vectors to the closest cluster:
080                final HierarchicalByteHardAssigner assigner = result.defaultHardAssigner();
081
082                // Now investigate which cluster each original data item belonged to:
083                for (int i = 0; i < 10; i++) {
084                        final byte[] vector = data[i];
085
086                        // Each leaf-node of the hierarchy is assigned a unique index number
087                        // between 0 and the total number of leaf-nodes.
088                        final int globalClusterNumber = assigner.assign(vector);
089
090                        // We can also get the path taken down the tree terms of the node
091                        // number at each level of depth. At each level the index number is
092                        // between 0 and clustersPerNode.
093                        final int[] path = result.getPath(globalClusterNumber);
094
095                        System.out.format("%s was assigned to cluster %d with path %s\n", Arrays.toString(vector),
096                                        globalClusterNumber,
097                                        Arrays.toString(path));
098                }
099        }
100
101        /**
102         * Recursively print the tree of cluster centroids to {@link System#out}.
103         * 
104         * @param node
105         *            the node to start from
106         * @param indent
107         *            the amount to indent the current line
108         */
109        static void printNode(Node node, int indent) {
110                final byte[][] centroids = node.result.getCentroids();
111                final Node[] children = node.children;
112
113                if (children != null) {
114                        for (int i = 0; i < children.length; i++) {
115                                for (int j = 0; j < indent; j++)
116                                        System.out.print("\t");
117
118                                System.out.println(Arrays.toString(centroids[i]));
119                                printNode(children[i], indent + 1);
120                        }
121                } else {
122                        for (int i = 0; i < centroids.length; i++) {
123                                for (int j = 0; j < indent; j++)
124                                        System.out.print("\t");
125
126                                System.out.println(Arrays.toString(centroids[i]));
127                        }
128                }
129        }
130}