001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.data.dataset; 031 032import java.util.AbstractMap; 033import java.util.Collection; 034import java.util.HashMap; 035import java.util.Iterator; 036import java.util.Map; 037import java.util.Set; 038 039import org.openimaj.data.identity.Identifiable; 040import org.openimaj.util.iterator.ConcatenatedIterable; 041 042/** 043 * A {@link MapBackedDataset} is a concrete implementation of a 044 * {@link GroupedDataset} backed by a {@link Map}. 045 * 046 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 047 * 048 * @param <KEY> 049 * Type of dataset class key 050 * @param <DATASET> 051 * Type of sub-datasets. 052 * @param <INSTANCE> 053 * Type of objects in the dataset 054 */ 055public class MapBackedDataset<KEY extends Object, DATASET extends Dataset<INSTANCE>, INSTANCE> 056extends AbstractMap<KEY, DATASET> 057implements GroupedDataset<KEY, DATASET, INSTANCE> 058{ 059 protected Map<KEY, DATASET> map; 060 061 /** 062 * Construct an empty {@link MapBackedDataset} backed by a {@link HashMap}. 063 */ 064 public MapBackedDataset() { 065 this.map = new HashMap<KEY, DATASET>(); 066 } 067 068 /** 069 * Construct with the given map. 070 * 071 * @param map 072 * the map 073 */ 074 public MapBackedDataset(Map<KEY, DATASET> map) { 075 this.map = map; 076 } 077 078 /** 079 * Get the underlying map. 080 * 081 * @return the underlying map 082 */ 083 public Map<KEY, DATASET> getMap() { 084 return map; 085 } 086 087 @Override 088 public DATASET getInstances(KEY key) { 089 return map.get(key); 090 } 091 092 @Override 093 public Set<KEY> getGroups() { 094 return map.keySet(); 095 } 096 097 @Override 098 public INSTANCE getRandomInstance(KEY key) { 099 return map.get(key).getRandomInstance(); 100 } 101 102 @Override 103 public INSTANCE getRandomInstance() { 104 final int index = (int) (Math.random() * numInstances()); 105 int count = 0; 106 107 for (final DATASET d : map.values()) { 108 if (index >= count + d.numInstances()) { 109 count += d.numInstances(); 110 } else { 111 if (d instanceof ListDataset) { 112 return ((ListDataset<INSTANCE>) d).get(index - count); 113 } else { 114 for (final INSTANCE i : d) { 115 if (index == count) 116 return i; 117 118 count++; 119 } 120 } 121 } 122 } 123 return null; 124 } 125 126 @Override 127 public int numInstances() { 128 int size = 0; 129 130 for (final DATASET d : map.values()) { 131 size += d.numInstances(); 132 } 133 134 return size; 135 } 136 137 @Override 138 public Iterator<INSTANCE> iterator() { 139 return new ConcatenatedIterable<INSTANCE>(map.values()).iterator(); 140 } 141 142 @Override 143 public String toString() { 144 return map.toString(); 145 } 146 147 @Override 148 public Set<Entry<KEY, DATASET>> entrySet() { 149 return map.entrySet(); 150 } 151 152 /* 153 * (non-Javadoc) 154 * 155 * @see java.util.AbstractMap#put(java.lang.Object, java.lang.Object) 156 */ 157 @Override 158 public DATASET put(KEY key, DATASET value) { 159 return map.put(key, value); 160 } 161 162 /** 163 * Convenience method for populating a dataset by chaining method calls: 164 * 165 * <pre> 166 * final MapBackedDataset<String, ListDataset<String>, String> ds = new MapBackedDataset<String, ListDataset<String>, String>() 167 * .add("A", new ListBackedDataset<String>()) 168 * .add("B", new ListBackedDataset<String>()); 169 * </pre> 170 * 171 * @param key 172 * the key to insert 173 * @param dataset 174 * the value to insert 175 * @return this dataset 176 */ 177 public MapBackedDataset<KEY, DATASET, INSTANCE> add(KEY key, DATASET dataset) { 178 this.put(key, dataset); 179 return this; 180 } 181 182 /** 183 * Convenience method to construct a {@link MapBackedDataset} from a number 184 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 185 * and the key is the identifier returned by {@link Identifiable#getID()}. 186 * 187 * @param datasets 188 * the datasets representing the groups 189 * @return the newly constructed grouped dataset. 190 */ 191 @SafeVarargs 192 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 193 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET... datasets) 194 { 195 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 196 197 for (final DATASET d : datasets) { 198 ds.put(d.getID(), d); 199 } 200 201 return ds; 202 } 203 204 /** 205 * A builder for creating {@link MapBackedDataset} instances from 206 * {@link Identifiable} sub-datasets. Example: 207 * 208 * <pre> 209 * final MapBackedDataset<String, VFSListDataset<String>, String> ds = new MapBackedDataset.IdentifiableBuilder<VFSListDataset<String>, String>() 210 * .add(new VFSListDataset<String>(...)) 211 * .add(new VFSListDataset<String>(...)) 212 * .build(); 213 * </pre> 214 * 215 * For small {@link MapBackedDataset}s, the <tt>MapBackedDataset.of()</tt> 216 * methods are even more convenient. 217 * <p> 218 * Builder instances can be reused - it is safe to call {@link #build()} 219 * multiple times to build multiple maps in series. Each map is a superset 220 * of the maps created before it. 221 * 222 * 223 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 224 * 225 * @param <DATASET> 226 * Type of sub-datasets. 227 * @param <INSTANCE> 228 * Type of objects in the dataset 229 */ 230 public static class IdentifiableBuilder<DATASET extends Dataset<INSTANCE> & Identifiable, INSTANCE> { 231 MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 232 233 /** 234 * Add the sub-dataset such that it becomes a group in the 235 * {@link MapBackedDataset} returned by {@link #build()} where the key 236 * is the identifier returned by {@link Identifiable#getID()}. 237 * <p> 238 * If duplicate keys (i.e. sub-datasets with duplicate identifiers) are 239 * added, only the last one will appear in the resultant dataset 240 * produced by {@link #build()}. 241 * 242 * @param dataset 243 * the sub-dataset to add 244 * @return the builder 245 */ 246 public IdentifiableBuilder<DATASET, INSTANCE> add(DATASET dataset) { 247 ds.put(dataset.getID(), dataset); 248 249 return this; 250 } 251 252 /** 253 * Returns a newly-created {@link MapBackedDataset}. 254 * 255 * @return a newly-created {@link MapBackedDataset}. 256 */ 257 public MapBackedDataset<String, DATASET, INSTANCE> build() { 258 return new MapBackedDataset<String, DATASET, INSTANCE>(ds); 259 } 260 } 261 262 /** 263 * Returns a new builder. The generated builder is equivalent to the builder 264 * created by the {@link IdentifiableBuilder#IdentifiableBuilder()} 265 * constructor. 266 * 267 * @return a new builder. 268 */ 269 public static <DATASET extends Dataset<INSTANCE> & Identifiable, INSTANCE> 270 IdentifiableBuilder<DATASET, INSTANCE> builder() 271 { 272 return new IdentifiableBuilder<DATASET, INSTANCE>(); 273 } 274 275 /** 276 * Convenience method to construct a {@link MapBackedDataset} from a number 277 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 278 * and the key is the identifier returned by {@link Identifiable#getID()}. 279 * 280 * @param d1 281 * first dataset 282 * 283 * @return the newly constructed grouped dataset. 284 */ 285 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 286 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1) 287 { 288 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 289 ds.put(d1.getID(), d1); 290 return ds; 291 } 292 293 /** 294 * Convenience method to construct a {@link MapBackedDataset} from a number 295 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 296 * and the key is the identifier returned by {@link Identifiable#getID()}. 297 * 298 * @param d1 299 * first dataset 300 * @param d2 301 * second dataset 302 * 303 * @return the newly constructed grouped dataset. 304 */ 305 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 306 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2) 307 { 308 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 309 ds.put(d1.getID(), d1); 310 ds.put(d2.getID(), d2); 311 return ds; 312 } 313 314 /** 315 * Convenience method to construct a {@link MapBackedDataset} from a number 316 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 317 * and the key is the identifier returned by {@link Identifiable#getID()}. 318 * 319 * @param d1 320 * first dataset 321 * @param d2 322 * second dataset 323 * @param d3 324 * third dataset 325 * 326 * @return the newly constructed grouped dataset. 327 */ 328 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 329 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3) 330 { 331 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 332 ds.put(d1.getID(), d1); 333 ds.put(d2.getID(), d2); 334 ds.put(d3.getID(), d3); 335 return ds; 336 } 337 338 /** 339 * Convenience method to construct a {@link MapBackedDataset} from a number 340 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 341 * and the key is the identifier returned by {@link Identifiable#getID()}. 342 * 343 * @param d1 344 * first dataset 345 * @param d2 346 * second dataset 347 * @param d3 348 * third dataset 349 * @param d4 350 * forth dataset 351 * @return the newly constructed grouped dataset. 352 */ 353 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 354 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3, DATASET d4) 355 { 356 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 357 ds.put(d1.getID(), d1); 358 ds.put(d2.getID(), d2); 359 ds.put(d3.getID(), d3); 360 ds.put(d4.getID(), d4); 361 return ds; 362 } 363 364 /** 365 * Convenience method to construct a {@link MapBackedDataset} from a number 366 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 367 * and the key is the identifier returned by {@link Identifiable#getID()}. 368 * 369 * @param d1 370 * first dataset 371 * @param d2 372 * second dataset 373 * @param d3 374 * third dataset 375 * @param d4 376 * forth dataset 377 * @param d5 378 * fifth dataset 379 * @return the newly constructed grouped dataset. 380 */ 381 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 382 MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3, DATASET d4, DATASET d5) 383 { 384 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 385 ds.put(d1.getID(), d1); 386 ds.put(d2.getID(), d2); 387 ds.put(d3.getID(), d3); 388 ds.put(d4.getID(), d4); 389 ds.put(d5.getID(), d5); 390 return ds; 391 } 392 393 /** 394 * Convenience method to construct a {@link MapBackedDataset} from a number 395 * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group, 396 * and the key is the identifier returned by {@link Identifiable#getID()}. 397 * 398 * @param datasets 399 * the datasets representing the groups 400 * @return the newly constructed grouped dataset. 401 */ 402 public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable> 403 MapBackedDataset<String, DATASET, INSTANCE> of(Collection<DATASET> datasets) 404 { 405 final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>(); 406 407 for (final DATASET d : datasets) { 408 ds.put(d.getID(), d); 409 } 410 411 return ds; 412 } 413}