001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.util.set;
031
032import gnu.trove.map.hash.TObjectIntHashMap;
033
034import java.util.ArrayList;
035import java.util.Collection;
036import java.util.Comparator;
037import java.util.HashMap;
038import java.util.HashSet;
039import java.util.Iterator;
040import java.util.List;
041import java.util.Map;
042import java.util.Set;
043
044/**
045 * Implementation of a Disjoint Set Forest data structure.
046 * <p>
047 * A disjoint set holds a set of elements partitioned into a number of
048 * non-overlapping subsets. The disjoint set forest holds each subset as a tree
049 * structure, with the representative of each subset being the root of the
050 * respective tree.
051 * <p>
052 * See <a href="http://en.wikipedia.org/wiki/Disjoint-set_data_structure">
053 * http://en.wikipedia.org/wiki/Disjoint-set_data_structure</a> for more
054 * information.
055 * 
056 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
057 * 
058 * @param <T>
059 *            the type of elements maintained by this set
060 */
061public class DisjointSetForest<T> implements Set<T> {
062        class Node {
063                int rank;
064                T parent;
065
066                Node(T parent, int rank) {
067                        this.parent = parent;
068                        this.rank = rank;
069                }
070        }
071
072        private Map<T, Node> data;
073        private TObjectIntHashMap<T> counts = new TObjectIntHashMap<T>();
074
075        /**
076         * Construct a new Disjoint Set Forest.
077         */
078        public DisjointSetForest() {
079                data = new HashMap<T, Node>();
080        }
081
082        /**
083         * Constructs a new Disjoint Set Forest, with the specified initial
084         * capacity.
085         * 
086         * @param initialCapacity
087         *            The initial capacity
088         */
089        public DisjointSetForest(int initialCapacity) {
090                data = new HashMap<T, Node>(initialCapacity);
091        }
092
093        /**
094         * Search for the representative of the subset containing the element x.
095         * 
096         * This implementation uses path compression to flatten the trees during the
097         * find operation. This will lead to performance improvements on subsequent
098         * find operations.
099         * 
100         * @param x
101         *            the element
102         * @return the representative element or null if x isn't in the forest.
103         */
104        public T find(T x) {
105                final Node xNode = data.get(x);
106
107                if (xNode == null)
108                        return null;
109
110                if (x == xNode.parent)
111                        return x;
112
113                xNode.parent = find(xNode.parent);
114
115                return xNode.parent;
116        }
117
118        /**
119         * Make a new subset from the given item
120         * 
121         * @param o
122         *            the item
123         * @return the item if the subset was created successfully; null otherwise.
124         */
125        public T makeSet(T o) {
126                if (data.containsKey(o))
127                        return null;
128
129                data.put(o, new Node(o, 0));
130                counts.put(o, 1);
131
132                return o;
133        }
134
135        /**
136         * Joint the subsets belonging to the objects x and y.
137         * 
138         * @param x
139         *            the x object.
140         * @param y
141         *            the y object.
142         * @return the new root, or null if x or y are not in the disjoint set
143         *         forest, or x and y are already in the same set.
144         */
145        public T union(T x, T y) {
146                final T xRoot = find(x);
147                final T yRoot = find(y);
148
149                if (xRoot == yRoot || xRoot == null || yRoot == null)
150                        return null;
151
152                final Node xNode = data.get(xRoot);
153                final Node yNode = data.get(yRoot);
154
155                // x and y are not already in same set. Merge them.
156                if (xNode.rank < yNode.rank) {
157                        xNode.parent = yRoot;
158                        counts.adjustValue(yRoot, counts.remove(xRoot));
159
160                        return yRoot;
161                } else if (xNode.rank > yNode.rank) {
162                        yNode.parent = xRoot;
163                        counts.adjustValue(xRoot, counts.remove(yRoot));
164
165                        return xRoot;
166                } else {
167                        yNode.parent = xRoot;
168                        xNode.rank++;
169                        counts.adjustValue(xRoot, counts.remove(yRoot));
170                        return xRoot;
171                }
172        }
173
174        /**
175         * @return the contents of the forest as a list
176         */
177        public List<T> asList() {
178                return new ArrayList<T>(data.keySet());
179        }
180
181        @Override
182        public int size() {
183                return data.size();
184        }
185
186        @Override
187        public boolean isEmpty() {
188                return data.isEmpty();
189        }
190
191        @Override
192        public boolean contains(Object o) {
193                return data.containsKey(o);
194        }
195
196        @Override
197        public Iterator<T> iterator() {
198                return new Iterator<T>() {
199                        Iterator<T> iterator = data.keySet().iterator();
200
201                        @Override
202                        public boolean hasNext() {
203                                return iterator.hasNext();
204                        }
205
206                        @Override
207                        public T next() {
208                                return iterator.next();
209                        }
210
211                        @Override
212                        public void remove() {
213                                throw new UnsupportedOperationException("not supported");
214                        }
215
216                };
217        }
218
219        @Override
220        public Object[] toArray() {
221                return data.keySet().toArray();
222        }
223
224        @Override
225        public <E> E[] toArray(E[] a) {
226                return data.keySet().toArray(a);
227        }
228
229        @Override
230        public boolean add(T e) {
231                return makeSet(e) == e;
232        }
233
234        /**
235         * Add an element to the given set.
236         * 
237         * @param elem
238         *            Element to add
239         * @param repr
240         *            A representative of the set to add to
241         * @return The representative of the set containing the added element, or
242         *         null if elem is already contained in this set.
243         * @throws IllegalArgumentException
244         *             If repr is not an element of this forest.
245         */
246        public T add(T elem, T repr) {
247                if (repr == null)
248                        return makeSet(elem);
249
250                if (data.containsKey(elem))
251                        return null;
252                if (!data.containsKey(repr))
253                        throw new IllegalArgumentException("Set containing representative not found");
254
255                add(elem);
256
257                return union(repr, elem);
258        }
259
260        @Override
261        public boolean remove(Object o) {
262                throw new UnsupportedOperationException("not supported");
263        }
264
265        @Override
266        public boolean containsAll(Collection<?> collection) {
267                for (final Object o : collection)
268                        if (!data.containsKey(o))
269                                return false;
270
271                return true;
272        }
273
274        /**
275         * Add all the elements in the collection to this disjoint set forest. Each
276         * element will be added to its own set.
277         * 
278         * @param collection
279         *            The collection to add
280         * @return true is any elements were added; false otherwise.
281         */
282        @Override
283        public boolean addAll(Collection<? extends T> collection) {
284                boolean changed = false;
285                for (final T c : collection) {
286                        if (makeSet(c) == c) {
287                                changed = true;
288                        }
289                }
290                return changed;
291        }
292
293        /**
294         * Add all the elements in the collection to this disjoint set forest.
295         * 
296         * @param collection
297         *            The collection to add
298         * @param sameSet
299         *            Are the elements in the collection to be added to the same
300         *            set?
301         * 
302         * @return true is any elements were added; false otherwise.
303         */
304        public boolean addAll(Collection<? extends T> collection, boolean sameSet) {
305                if (collection == null || collection.isEmpty())
306                        return false;
307
308                if (!sameSet)
309                        return addAll(collection);
310
311                T root = null;
312                for (final T c : collection) {
313                        if (root == null)
314                                root = makeSet(c);
315                        else
316                                add(c, root);
317                }
318                return root != null;
319        }
320
321        @Override
322        public boolean retainAll(Collection<?> c) {
323                throw new UnsupportedOperationException("not supported");
324        }
325
326        @Override
327        public boolean removeAll(Collection<?> c) {
328                throw new UnsupportedOperationException("not supported");
329        }
330
331        @Override
332        public void clear() {
333                data.clear();
334        }
335
336        /**
337         * Get the size of a subset
338         * 
339         * @param o
340         *            an object in the subset
341         * @return the number of objects in the subset
342         */
343        public int size(T o) {
344                return counts.get(find(o));
345        }
346
347        /**
348         * Get the number of subsets
349         * 
350         * @return the number of subset
351         */
352        public int numSets() {
353                return counts.size();
354        }
355
356        /**
357         * Get all the subsets stored in this forest.
358         * 
359         * @return subsets
360         */
361        public Set<Set<T>> getSubsets() {
362                final Map<T, Set<T>> set = new HashMap<T, Set<T>>();
363
364                for (final T t : this) {
365                        final T repr = find(t);
366
367                        Set<T> reprSet = set.get(repr);
368                        if (reprSet == null)
369                                set.put(repr, reprSet = new HashSet<T>());
370                        reprSet.add(t);
371                }
372
373                return new HashSet<Set<T>>(set.values());
374        }
375
376        /**
377         * Construct a {@link DisjointSetForest} by partitioning the given values
378         * using the given {@link Comparator} to determine equality of pairs of
379         * values.
380         * 
381         * @param <T>
382         *            the type of elements maintained by this set
383         * @param values
384         *            the values to partition
385         * @param comparator
386         *            the comparator to determine equality of values
387         * @return the new {@link DisjointSetForest} representing the partitioning
388         */
389        public static <T> DisjointSetForest<T> partition(List<T> values, Comparator<T> comparator) {
390                final DisjointSetForest<T> forest = new DisjointSetForest<T>(values.size());
391                forest.addAll(values);
392
393                final int size = values.size();
394                for (int i = 0; i < size; i++) {
395                        final T vi = values.get(i);
396
397                        for (int j = i + 1; j < size; j++) {
398                                final T vj = values.get(j);
399
400                                if (comparator.compare(vi, vj) == 0)
401                                        forest.union(vi, vj);
402                        }
403                }
404
405                return forest;
406        }
407
408        /**
409         * Extract the subsets formed by constructing a {@link DisjointSetForest}
410         * which partitions the given values using the given {@link Comparator} to
411         * determine equality of pairs of values.
412         * 
413         * @param <T>
414         *            the type of elements maintained by this set
415         * @param values
416         *            the values to partition
417         * @param comparator
418         *            the comparator to determine equality of values
419         * @return the subsets representing the partitioning
420         */
421        public static <T> Set<Set<T>> partitionSubsets(List<T> values, Comparator<T> comparator) {
422                return partition(values, comparator).getSubsets();
423        }
424}