001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.util.set; 031 032import gnu.trove.map.hash.TObjectIntHashMap; 033 034import java.util.ArrayList; 035import java.util.Collection; 036import java.util.Comparator; 037import java.util.HashMap; 038import java.util.HashSet; 039import java.util.Iterator; 040import java.util.List; 041import java.util.Map; 042import java.util.Set; 043 044/** 045 * Implementation of a Disjoint Set Forest data structure. 046 * <p> 047 * A disjoint set holds a set of elements partitioned into a number of 048 * non-overlapping subsets. The disjoint set forest holds each subset as a tree 049 * structure, with the representative of each subset being the root of the 050 * respective tree. 051 * <p> 052 * See <a href="http://en.wikipedia.org/wiki/Disjoint-set_data_structure"> 053 * http://en.wikipedia.org/wiki/Disjoint-set_data_structure</a> for more 054 * information. 055 * 056 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 057 * 058 * @param <T> 059 * the type of elements maintained by this set 060 */ 061public class DisjointSetForest<T> implements Set<T> { 062 class Node { 063 int rank; 064 T parent; 065 066 Node(T parent, int rank) { 067 this.parent = parent; 068 this.rank = rank; 069 } 070 } 071 072 private Map<T, Node> data; 073 private TObjectIntHashMap<T> counts = new TObjectIntHashMap<T>(); 074 075 /** 076 * Construct a new Disjoint Set Forest. 077 */ 078 public DisjointSetForest() { 079 data = new HashMap<T, Node>(); 080 } 081 082 /** 083 * Constructs a new Disjoint Set Forest, with the specified initial 084 * capacity. 085 * 086 * @param initialCapacity 087 * The initial capacity 088 */ 089 public DisjointSetForest(int initialCapacity) { 090 data = new HashMap<T, Node>(initialCapacity); 091 } 092 093 /** 094 * Search for the representative of the subset containing the element x. 095 * 096 * This implementation uses path compression to flatten the trees during the 097 * find operation. This will lead to performance improvements on subsequent 098 * find operations. 099 * 100 * @param x 101 * the element 102 * @return the representative element or null if x isn't in the forest. 103 */ 104 public T find(T x) { 105 final Node xNode = data.get(x); 106 107 if (xNode == null) 108 return null; 109 110 if (x == xNode.parent) 111 return x; 112 113 xNode.parent = find(xNode.parent); 114 115 return xNode.parent; 116 } 117 118 /** 119 * Make a new subset from the given item 120 * 121 * @param o 122 * the item 123 * @return the item if the subset was created successfully; null otherwise. 124 */ 125 public T makeSet(T o) { 126 if (data.containsKey(o)) 127 return null; 128 129 data.put(o, new Node(o, 0)); 130 counts.put(o, 1); 131 132 return o; 133 } 134 135 /** 136 * Joint the subsets belonging to the objects x and y. 137 * 138 * @param x 139 * the x object. 140 * @param y 141 * the y object. 142 * @return the new root, or null if x or y are not in the disjoint set 143 * forest, or x and y are already in the same set. 144 */ 145 public T union(T x, T y) { 146 final T xRoot = find(x); 147 final T yRoot = find(y); 148 149 if (xRoot == yRoot || xRoot == null || yRoot == null) 150 return null; 151 152 final Node xNode = data.get(xRoot); 153 final Node yNode = data.get(yRoot); 154 155 // x and y are not already in same set. Merge them. 156 if (xNode.rank < yNode.rank) { 157 xNode.parent = yRoot; 158 counts.adjustValue(yRoot, counts.remove(xRoot)); 159 160 return yRoot; 161 } else if (xNode.rank > yNode.rank) { 162 yNode.parent = xRoot; 163 counts.adjustValue(xRoot, counts.remove(yRoot)); 164 165 return xRoot; 166 } else { 167 yNode.parent = xRoot; 168 xNode.rank++; 169 counts.adjustValue(xRoot, counts.remove(yRoot)); 170 return xRoot; 171 } 172 } 173 174 /** 175 * @return the contents of the forest as a list 176 */ 177 public List<T> asList() { 178 return new ArrayList<T>(data.keySet()); 179 } 180 181 @Override 182 public int size() { 183 return data.size(); 184 } 185 186 @Override 187 public boolean isEmpty() { 188 return data.isEmpty(); 189 } 190 191 @Override 192 public boolean contains(Object o) { 193 return data.containsKey(o); 194 } 195 196 @Override 197 public Iterator<T> iterator() { 198 return new Iterator<T>() { 199 Iterator<T> iterator = data.keySet().iterator(); 200 201 @Override 202 public boolean hasNext() { 203 return iterator.hasNext(); 204 } 205 206 @Override 207 public T next() { 208 return iterator.next(); 209 } 210 211 @Override 212 public void remove() { 213 throw new UnsupportedOperationException("not supported"); 214 } 215 216 }; 217 } 218 219 @Override 220 public Object[] toArray() { 221 return data.keySet().toArray(); 222 } 223 224 @Override 225 public <E> E[] toArray(E[] a) { 226 return data.keySet().toArray(a); 227 } 228 229 @Override 230 public boolean add(T e) { 231 return makeSet(e) == e; 232 } 233 234 /** 235 * Add an element to the given set. 236 * 237 * @param elem 238 * Element to add 239 * @param repr 240 * A representative of the set to add to 241 * @return The representative of the set containing the added element, or 242 * null if elem is already contained in this set. 243 * @throws IllegalArgumentException 244 * If repr is not an element of this forest. 245 */ 246 public T add(T elem, T repr) { 247 if (repr == null) 248 return makeSet(elem); 249 250 if (data.containsKey(elem)) 251 return null; 252 if (!data.containsKey(repr)) 253 throw new IllegalArgumentException("Set containing representative not found"); 254 255 add(elem); 256 257 return union(repr, elem); 258 } 259 260 @Override 261 public boolean remove(Object o) { 262 throw new UnsupportedOperationException("not supported"); 263 } 264 265 @Override 266 public boolean containsAll(Collection<?> collection) { 267 for (final Object o : collection) 268 if (!data.containsKey(o)) 269 return false; 270 271 return true; 272 } 273 274 /** 275 * Add all the elements in the collection to this disjoint set forest. Each 276 * element will be added to its own set. 277 * 278 * @param collection 279 * The collection to add 280 * @return true is any elements were added; false otherwise. 281 */ 282 @Override 283 public boolean addAll(Collection<? extends T> collection) { 284 boolean changed = false; 285 for (final T c : collection) { 286 if (makeSet(c) == c) { 287 changed = true; 288 } 289 } 290 return changed; 291 } 292 293 /** 294 * Add all the elements in the collection to this disjoint set forest. 295 * 296 * @param collection 297 * The collection to add 298 * @param sameSet 299 * Are the elements in the collection to be added to the same 300 * set? 301 * 302 * @return true is any elements were added; false otherwise. 303 */ 304 public boolean addAll(Collection<? extends T> collection, boolean sameSet) { 305 if (collection == null || collection.isEmpty()) 306 return false; 307 308 if (!sameSet) 309 return addAll(collection); 310 311 T root = null; 312 for (final T c : collection) { 313 if (root == null) 314 root = makeSet(c); 315 else 316 add(c, root); 317 } 318 return root != null; 319 } 320 321 @Override 322 public boolean retainAll(Collection<?> c) { 323 throw new UnsupportedOperationException("not supported"); 324 } 325 326 @Override 327 public boolean removeAll(Collection<?> c) { 328 throw new UnsupportedOperationException("not supported"); 329 } 330 331 @Override 332 public void clear() { 333 data.clear(); 334 } 335 336 /** 337 * Get the size of a subset 338 * 339 * @param o 340 * an object in the subset 341 * @return the number of objects in the subset 342 */ 343 public int size(T o) { 344 return counts.get(find(o)); 345 } 346 347 /** 348 * Get the number of subsets 349 * 350 * @return the number of subset 351 */ 352 public int numSets() { 353 return counts.size(); 354 } 355 356 /** 357 * Get all the subsets stored in this forest. 358 * 359 * @return subsets 360 */ 361 public Set<Set<T>> getSubsets() { 362 final Map<T, Set<T>> set = new HashMap<T, Set<T>>(); 363 364 for (final T t : this) { 365 final T repr = find(t); 366 367 Set<T> reprSet = set.get(repr); 368 if (reprSet == null) 369 set.put(repr, reprSet = new HashSet<T>()); 370 reprSet.add(t); 371 } 372 373 return new HashSet<Set<T>>(set.values()); 374 } 375 376 /** 377 * Construct a {@link DisjointSetForest} by partitioning the given values 378 * using the given {@link Comparator} to determine equality of pairs of 379 * values. 380 * 381 * @param <T> 382 * the type of elements maintained by this set 383 * @param values 384 * the values to partition 385 * @param comparator 386 * the comparator to determine equality of values 387 * @return the new {@link DisjointSetForest} representing the partitioning 388 */ 389 public static <T> DisjointSetForest<T> partition(List<T> values, Comparator<T> comparator) { 390 final DisjointSetForest<T> forest = new DisjointSetForest<T>(values.size()); 391 forest.addAll(values); 392 393 final int size = values.size(); 394 for (int i = 0; i < size; i++) { 395 final T vi = values.get(i); 396 397 for (int j = i + 1; j < size; j++) { 398 final T vj = values.get(j); 399 400 if (comparator.compare(vi, vj) == 0) 401 forest.union(vi, vj); 402 } 403 } 404 405 return forest; 406 } 407 408 /** 409 * Extract the subsets formed by constructing a {@link DisjointSetForest} 410 * which partitions the given values using the given {@link Comparator} to 411 * determine equality of pairs of values. 412 * 413 * @param <T> 414 * the type of elements maintained by this set 415 * @param values 416 * the values to partition 417 * @param comparator 418 * the comparator to determine equality of values 419 * @return the subsets representing the partitioning 420 */ 421 public static <T> Set<Set<T>> partitionSubsets(List<T> values, Comparator<T> comparator) { 422 return partition(values, comparator).getSubsets(); 423 } 424}