001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.clustering; 031 032import gnu.trove.list.array.TIntArrayList; 033 034import java.io.DataInput; 035import java.io.DataOutput; 036import java.io.IOException; 037import java.io.PrintWriter; 038import java.io.StringWriter; 039import java.util.Arrays; 040import java.util.HashMap; 041import java.util.List; 042import java.util.Map; 043import java.util.Map.Entry; 044import java.util.Scanner; 045 046import org.openimaj.io.IOUtils; 047 048/** 049 * Class to describe objects that are the result of the clustering where the 050 * training data is implicitly clustered 051 * 052 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 053 */ 054public class IndexClusters implements Clusters { 055 protected int[][] clusters; 056 protected int nEntries; 057 058 /** 059 * Used only to initailise for {@link IOUtils} 060 */ 061 public IndexClusters() { 062 } 063 064 /** 065 * @param clusters 066 * the clusters 067 * @param nEntries 068 * the number of entries 069 */ 070 public IndexClusters(int[][] clusters, int nEntries) { 071 this.nEntries = nEntries; 072 this.clusters = clusters; 073 } 074 075 /** 076 * @param clusters 077 * the clusters 078 */ 079 public IndexClusters(int[][] clusters) { 080 this.nEntries = 0; 081 this.clusters = clusters; 082 for (int i = 0; i < clusters.length; i++) { 083 this.nEntries += clusters[i].length; 084 } 085 } 086 087 /** 088 * @param assignments 089 * convert a list of cluster assignments to a 2D array to cluster 090 * to assignments 091 */ 092 public IndexClusters(int[] assignments) { 093 this.nEntries = assignments.length; 094 final Map<Integer, TIntArrayList> clusters = new HashMap<Integer, TIntArrayList>(); 095 for (int i = 0; i < assignments.length; i++) { 096 final int ass = assignments[i]; 097 TIntArrayList current = clusters.get(ass); 098 if (current == null) { 099 clusters.put(ass, current = new TIntArrayList()); 100 } 101 current.add(i); 102 } 103 int clustersSeen = 0; 104 this.clusters = new int[clusters.size()][]; 105 for (final Entry<Integer, TIntArrayList> i : clusters.entrySet()) { 106 this.clusters[clustersSeen] = i.getValue().toArray(); 107 clustersSeen++; 108 } 109 } 110 111 /** 112 * @param completedClusters 113 */ 114 public IndexClusters(List<int[]> completedClusters) { 115 this.nEntries = 0; 116 this.clusters = new int[completedClusters.size()][]; 117 for (int i = 0; i < clusters.length; i++) { 118 clusters[i] = completedClusters.get(i); 119 this.nEntries += clusters[i].length; 120 } 121 } 122 123 /** 124 * Get the number of clusters. 125 * 126 * @return number of clusters. 127 */ 128 public int[][] clusters() { 129 return clusters; 130 } 131 132 /** 133 * Get the number of data entries 134 * 135 * @return the number of data entries. 136 */ 137 public int numEntries() { 138 return nEntries; 139 } 140 141 /** 142 * Get the number of clusters. 143 * 144 * @return number of clusters. 145 */ 146 public int numClusters() { 147 return this.clusters.length; 148 } 149 150 @Override 151 public void readASCII(Scanner in) throws IOException { 152 this.clusters = new int[in.nextInt()][]; 153 this.nEntries = in.nextInt(); 154 for (int i = 0; i < this.nEntries;) { 155 final int cluster = in.nextInt(); 156 final int count = in.nextInt(); 157 i += count; 158 this.clusters[cluster] = new int[count]; 159 for (int j = 0; j < count; j++) { 160 this.clusters[cluster][j] = in.nextInt(); 161 } 162 } 163 } 164 165 @Override 166 public String asciiHeader() { 167 return "IDX" + CLUSTER_HEADER; 168 } 169 170 @Override 171 public void readBinary(DataInput in) throws IOException { 172 this.clusters = new int[in.readInt()][]; 173 this.nEntries = in.readInt(); 174 for (int i = 0; i < this.nEntries;) { 175 final int cluster = in.readInt(); 176 final int count = in.readInt(); 177 i += count; 178 this.clusters[cluster] = new int[count]; 179 for (int j = 0; j < count; j++) { 180 this.clusters[cluster][j] = in.readInt(); 181 } 182 } 183 } 184 185 @Override 186 public byte[] binaryHeader() { 187 return asciiHeader().getBytes(); 188 } 189 190 @Override 191 public void writeASCII(PrintWriter out) throws IOException { 192 out.println(this.numClusters()); 193 out.println(this.nEntries); 194 for (int i = 0; i < this.clusters.length; i++) { 195 final int[] cluster = this.clusters[i]; 196 out.println(i); 197 out.println(cluster.length); 198 for (int j = 0; j < cluster.length; j++) { 199 out.println(cluster[j]); 200 } 201 } 202 } 203 204 @Override 205 public void writeBinary(DataOutput out) throws IOException { 206 out.writeInt(this.numClusters()); 207 out.writeInt(nEntries); 208 for (int i = 0; i < this.clusters.length; i++) { 209 final int[] cluster = this.clusters[i]; 210 out.writeInt(i); 211 out.writeInt(cluster.length); 212 for (int j = 0; j < cluster.length; j++) { 213 out.writeInt(cluster[j]); 214 } 215 } 216 } 217 218 @Override 219 public String toString() { 220 final int[][] clusters = this.clusters(); 221 int i = 0; 222 final StringWriter sw = new StringWriter(); 223 final PrintWriter out = new PrintWriter(sw); 224 out.println("N-Clusters: " + this.numClusters()); 225 out.println("Entities: " + this.numEntries()); 226 String str = sw.toString(); 227 for (final int[] member : clusters) { 228 str += String.format("%d %s\n", i++, Arrays.toString(member)); 229 } 230 return str; 231 } 232}