001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.clustering.random; 031 032import gnu.trove.list.array.TIntArrayList; 033 034import java.util.HashMap; 035import java.util.Map; 036import java.util.Map.Entry; 037import java.util.Random; 038 039import org.openimaj.ml.clustering.IndexClusters; 040import org.openimaj.ml.clustering.SparseMatrixClusterer; 041 042import ch.akuhn.matrix.SparseMatrix; 043 044/** 045 * Given a similarity or distance matrix, this clusterer randomly selects a 046 * number of clusters and randomly assigned each row to each cluster. 047 * 048 * The number of clusters is a random number from 0 to 049 * {@link SparseMatrix#rowCount()} 050 * 051 * 052 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 053 */ 054public class RandomClusterer implements SparseMatrixClusterer<IndexClusters> { 055 056 private Random random; 057 private int forceClusters = -1; 058 059 /** 060 * unseeded random 061 */ 062 public RandomClusterer() { 063 this.random = new Random(); 064 } 065 066 /** 067 * seeded random 068 * 069 * @param seed 070 */ 071 public RandomClusterer(long seed) { 072 073 this.random = new Random(seed); 074 } 075 076 /** 077 * seeded random 078 * 079 * @param nclusters 080 */ 081 public RandomClusterer(int nclusters) { 082 this(); 083 this.forceClusters = nclusters; 084 } 085 086 /** 087 * seeded random 088 * 089 * @param nclusters 090 * @param seed 091 * random seed 092 */ 093 public RandomClusterer(int nclusters, long seed) { 094 this(seed); 095 this.forceClusters = nclusters; 096 } 097 098 @Override 099 public IndexClusters cluster(SparseMatrix data) { 100 int nClusters = 0; 101 102 if (this.forceClusters > 0) 103 nClusters = this.forceClusters; 104 else 105 nClusters = this.random.nextInt(data.rowCount()); 106 107 final Map<Integer, TIntArrayList> clusters = new HashMap<Integer, TIntArrayList>(); 108 109 for (int i = 0; i < data.rowCount(); i++) { 110 final int cluster = this.random.nextInt(nClusters); 111 TIntArrayList l = clusters.get(cluster); 112 113 if (l == null) { 114 clusters.put(cluster, l = new TIntArrayList()); 115 } 116 117 l.add(i); 118 } 119 120 final int[][] outClusters = new int[clusters.size()][]; 121 int i = 0; 122 for (final Entry<Integer, TIntArrayList> is : clusters.entrySet()) { 123 outClusters[i++] = is.getValue().toArray(); 124 } 125 126 return new IndexClusters(outClusters, data.rowCount()); 127 } 128 129 @Override 130 public int[][] performClustering(SparseMatrix data) { 131 return this.cluster(data).clusters(); 132 } 133 134}