001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.ml.clustering;
031
032import gnu.trove.list.array.TIntArrayList;
033
034import java.io.DataInput;
035import java.io.DataOutput;
036import java.io.IOException;
037import java.io.PrintWriter;
038import java.io.StringWriter;
039import java.util.Arrays;
040import java.util.HashMap;
041import java.util.List;
042import java.util.Map;
043import java.util.Map.Entry;
044import java.util.Scanner;
045
046import org.openimaj.io.IOUtils;
047
048/**
049 * Class to describe objects that are the result of the clustering where the
050 * training data is implicitly clustered
051 *
052 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
053 */
054public class IndexClusters implements Clusters {
055        protected int[][] clusters;
056        protected int nEntries;
057
058        /**
059         * Used only to initailise for {@link IOUtils}
060         */
061        public IndexClusters() {
062        }
063
064        /**
065         * @param clusters
066         *            the clusters
067         * @param nEntries
068         *            the number of entries
069         */
070        public IndexClusters(int[][] clusters, int nEntries) {
071                this.nEntries = nEntries;
072                this.clusters = clusters;
073        }
074
075        /**
076         * @param clusters
077         *            the clusters
078         */
079        public IndexClusters(int[][] clusters) {
080                this.nEntries = 0;
081                this.clusters = clusters;
082                for (int i = 0; i < clusters.length; i++) {
083                        this.nEntries += clusters[i].length;
084                }
085        }
086
087        /**
088         * @param assignments
089         *            convert a list of cluster assignments to a 2D array to cluster
090         *            to assignments
091         */
092        public IndexClusters(int[] assignments) {
093                this.nEntries = assignments.length;
094                final Map<Integer, TIntArrayList> clusters = new HashMap<Integer, TIntArrayList>();
095                for (int i = 0; i < assignments.length; i++) {
096                        final int ass = assignments[i];
097                        TIntArrayList current = clusters.get(ass);
098                        if (current == null) {
099                                clusters.put(ass, current = new TIntArrayList());
100                        }
101                        current.add(i);
102                }
103                int clustersSeen = 0;
104                this.clusters = new int[clusters.size()][];
105                for (final Entry<Integer, TIntArrayList> i : clusters.entrySet()) {
106                        this.clusters[clustersSeen] = i.getValue().toArray();
107                        clustersSeen++;
108                }
109        }
110
111        /**
112         * @param completedClusters
113         */
114        public IndexClusters(List<int[]> completedClusters) {
115                this.nEntries = 0;
116                this.clusters = new int[completedClusters.size()][];
117                for (int i = 0; i < clusters.length; i++) {
118                        clusters[i] = completedClusters.get(i);
119                        this.nEntries += clusters[i].length;
120                }
121        }
122
123        /**
124         * Get the number of clusters.
125         *
126         * @return number of clusters.
127         */
128        public int[][] clusters() {
129                return clusters;
130        }
131
132        /**
133         * Get the number of data entries
134         *
135         * @return the number of data entries.
136         */
137        public int numEntries() {
138                return nEntries;
139        }
140
141        /**
142         * Get the number of clusters.
143         *
144         * @return number of clusters.
145         */
146        public int numClusters() {
147                return this.clusters.length;
148        }
149
150        @Override
151        public void readASCII(Scanner in) throws IOException {
152                this.clusters = new int[in.nextInt()][];
153                this.nEntries = in.nextInt();
154                for (int i = 0; i < this.nEntries;) {
155                        final int cluster = in.nextInt();
156                        final int count = in.nextInt();
157                        i += count;
158                        this.clusters[cluster] = new int[count];
159                        for (int j = 0; j < count; j++) {
160                                this.clusters[cluster][j] = in.nextInt();
161                        }
162                }
163        }
164
165        @Override
166        public String asciiHeader() {
167                return "IDX" + CLUSTER_HEADER;
168        }
169
170        @Override
171        public void readBinary(DataInput in) throws IOException {
172                this.clusters = new int[in.readInt()][];
173                this.nEntries = in.readInt();
174                for (int i = 0; i < this.nEntries;) {
175                        final int cluster = in.readInt();
176                        final int count = in.readInt();
177                        i += count;
178                        this.clusters[cluster] = new int[count];
179                        for (int j = 0; j < count; j++) {
180                                this.clusters[cluster][j] = in.readInt();
181                        }
182                }
183        }
184
185        @Override
186        public byte[] binaryHeader() {
187                return asciiHeader().getBytes();
188        }
189
190        @Override
191        public void writeASCII(PrintWriter out) throws IOException {
192                out.println(this.numClusters());
193                out.println(this.nEntries);
194                for (int i = 0; i < this.clusters.length; i++) {
195                        final int[] cluster = this.clusters[i];
196                        out.println(i);
197                        out.println(cluster.length);
198                        for (int j = 0; j < cluster.length; j++) {
199                                out.println(cluster[j]);
200                        }
201                }
202        }
203
204        @Override
205        public void writeBinary(DataOutput out) throws IOException {
206                out.writeInt(this.numClusters());
207                out.writeInt(nEntries);
208                for (int i = 0; i < this.clusters.length; i++) {
209                        final int[] cluster = this.clusters[i];
210                        out.writeInt(i);
211                        out.writeInt(cluster.length);
212                        for (int j = 0; j < cluster.length; j++) {
213                                out.writeInt(cluster[j]);
214                        }
215                }
216        }
217
218        @Override
219        public String toString() {
220                final int[][] clusters = this.clusters();
221                int i = 0;
222                final StringWriter sw = new StringWriter();
223                final PrintWriter out = new PrintWriter(sw);
224                out.println("N-Clusters: " + this.numClusters());
225                out.println("Entities: " + this.numEntries());
226                String str = sw.toString();
227                for (final int[] member : clusters) {
228                        str += String.format("%d %s\n", i++, Arrays.toString(member));
229                }
230                return str;
231        }
232}