001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.clustering.spectral; 031 032import java.util.Iterator; 033 034import org.apache.logging.log4j.Logger; 035import org.apache.logging.log4j.LogManager; 036import org.openimaj.ml.clustering.DataClusterer; 037import org.openimaj.ml.clustering.IndexClusters; 038import org.openimaj.ml.clustering.SpatialClusterer; 039import org.openimaj.ml.clustering.SpatialClusters; 040import org.openimaj.util.pair.DoubleObjectPair; 041import org.openimaj.util.pair.IndependentPair; 042 043import ch.akuhn.matrix.Vector; 044import ch.akuhn.matrix.Vector.Entry; 045import ch.akuhn.matrix.eigenvalues.Eigenvalues; 046 047/** 048 * For a given set of {@link Eigenvalues} perform the stages of spectral 049 * clustering which involve the selection of the best eigen values and the 050 * calling of an internal clustering algorithm 051 * 052 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 053 */ 054public class PreparedSpectralClustering implements DataClusterer<Eigenvalues, SpectralIndexedClusters> { 055 final static Logger logger = LogManager.getLogger(PreparedSpectralClustering.class); 056 private SpectralClusteringConf<double[]> conf; 057 058 /** 059 * @param conf 060 */ 061 public PreparedSpectralClustering(SpectralClusteringConf<double[]> conf) { 062 this.conf = conf; 063 } 064 065 @Override 066 public int[][] performClustering(Eigenvalues data) { 067 return cluster(data).clusters(); 068 } 069 070 @Override 071 public SpectralIndexedClusters cluster(Eigenvalues eig) { 072 // Also normalise each row 073 final IndependentPair<double[], double[][]> lowestCols = bestCols(eig); 074 // Using the eigenspace, cluster 075 return eigenspaceCluster(lowestCols); 076 } 077 078 protected SpectralIndexedClusters eigenspaceCluster(IndependentPair<double[], double[][]> lowestCols) { 079 final SpatialClusterer<? extends SpatialClusters<double[]>, double[]> clusterer = conf.internal.apply(lowestCols); 080 // Cluster the rows with the internal spatial clusterer 081 final SpatialClusters<double[]> cluster = clusterer.cluster(lowestCols.getSecondObject()); 082 // if the clusters contain the cluster indexes of the training examples 083 // use those 084 if (cluster instanceof IndexClusters) { 085 final IndexClusters clusters = new IndexClusters(((IndexClusters) cluster).clusters()); 086 // logger.debug(clusters); 087 return new SpectralIndexedClusters(clusters, lowestCols); 088 } 089 // Otherwise attempt to assign values to clusters 090 final int[] clustered = cluster.defaultHardAssigner().assign(lowestCols.getSecondObject()); 091 // done! 092 return new SpectralIndexedClusters(new IndexClusters(clustered), lowestCols); 093 } 094 095 protected IndependentPair<double[], double[][]> bestCols(final Eigenvalues eig) { 096 097 int eigenVectorSelect = conf.eigenChooser.nEigenVectors(this.conf.laplacian.eigenIterator(eig), eig.getN()); 098 final int eigenVectorSkip = this.conf.skipEigenVectors; 099 logger.debug("Selected dimensions: " + eigenVectorSelect); 100 logger.debug("Skipping dimesions: " + eigenVectorSkip); 101 eigenVectorSelect -= eigenVectorSkip; 102 103 final int nrows = eig.vector[0].size(); 104 final double[][] ret = new double[nrows][eigenVectorSelect]; 105 final double[] retSum = new double[nrows]; 106 final double[] eigvals = new double[eigenVectorSelect]; 107 final Iterator<DoubleObjectPair<Vector>> iterator = this.conf.laplacian.eigenIterator(eig); 108 // Skip a few at the beggining 109 for (int i = 0; i < eigenVectorSkip; i++) 110 iterator.next(); 111 int col = 0; 112 // Calculate U matrix (containing n smallests eigen valued columns) 113 for (; iterator.hasNext();) { 114 final DoubleObjectPair<Vector> v = iterator.next(); 115 eigvals[col] = v.first; 116 117 for (final Entry d : v.second.entries()) { 118 double elColI = d.value; 119 if (conf.eigenValueScale) { 120 elColI *= Math.sqrt(v.first); 121 } 122 ret[d.index][col] = elColI; 123 retSum[d.index] += elColI * elColI; 124 } 125 col++; 126 if (col == eigenVectorSelect) 127 break; 128 } 129 130 if (!conf.eigenValueScale) { 131 // normalise rows 132 for (int i = 0; i < ret.length; i++) { 133 final double[] row = ret[i]; 134 for (int j = 0; j < row.length; j++) { 135 row[j] /= Math.sqrt(retSum[i]); 136 } 137 } 138 } 139 140 return IndependentPair.pair(eigvals, ret); 141 } 142 143}