001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.data.dataset;
031
032import java.util.AbstractMap;
033import java.util.Collection;
034import java.util.HashMap;
035import java.util.Iterator;
036import java.util.Map;
037import java.util.Set;
038
039import org.openimaj.data.identity.Identifiable;
040import org.openimaj.util.iterator.ConcatenatedIterable;
041
042/**
043 * A {@link MapBackedDataset} is a concrete implementation of a
044 * {@link GroupedDataset} backed by a {@link Map}.
045 *
046 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
047 *
048 * @param <KEY>
049 *            Type of dataset class key
050 * @param <DATASET>
051 *            Type of sub-datasets.
052 * @param <INSTANCE>
053 *            Type of objects in the dataset
054 */
055public class MapBackedDataset<KEY extends Object, DATASET extends Dataset<INSTANCE>, INSTANCE>
056extends AbstractMap<KEY, DATASET>
057implements GroupedDataset<KEY, DATASET, INSTANCE>
058{
059        protected Map<KEY, DATASET> map;
060
061        /**
062         * Construct an empty {@link MapBackedDataset} backed by a {@link HashMap}.
063         */
064        public MapBackedDataset() {
065                this.map = new HashMap<KEY, DATASET>();
066        }
067
068        /**
069         * Construct with the given map.
070         *
071         * @param map
072         *            the map
073         */
074        public MapBackedDataset(Map<KEY, DATASET> map) {
075                this.map = map;
076        }
077
078        /**
079         * Get the underlying map.
080         *
081         * @return the underlying map
082         */
083        public Map<KEY, DATASET> getMap() {
084                return map;
085        }
086
087        @Override
088        public DATASET getInstances(KEY key) {
089                return map.get(key);
090        }
091
092        @Override
093        public Set<KEY> getGroups() {
094                return map.keySet();
095        }
096
097        @Override
098        public INSTANCE getRandomInstance(KEY key) {
099                return map.get(key).getRandomInstance();
100        }
101
102        @Override
103        public INSTANCE getRandomInstance() {
104                final int index = (int) (Math.random() * numInstances());
105                int count = 0;
106
107                for (final DATASET d : map.values()) {
108                        if (index >= count + d.numInstances()) {
109                                count += d.numInstances();
110                        } else {
111                                if (d instanceof ListDataset) {
112                                        return ((ListDataset<INSTANCE>) d).get(index - count);
113                                } else {
114                                        for (final INSTANCE i : d) {
115                                                if (index == count)
116                                                        return i;
117
118                                                count++;
119                                        }
120                                }
121                        }
122                }
123                return null;
124        }
125
126        @Override
127        public int numInstances() {
128                int size = 0;
129
130                for (final DATASET d : map.values()) {
131                        size += d.numInstances();
132                }
133
134                return size;
135        }
136
137        @Override
138        public Iterator<INSTANCE> iterator() {
139                return new ConcatenatedIterable<INSTANCE>(map.values()).iterator();
140        }
141
142        @Override
143        public String toString() {
144                return map.toString();
145        }
146
147        @Override
148        public Set<Entry<KEY, DATASET>> entrySet() {
149                return map.entrySet();
150        }
151
152        /*
153         * (non-Javadoc)
154         *
155         * @see java.util.AbstractMap#put(java.lang.Object, java.lang.Object)
156         */
157        @Override
158        public DATASET put(KEY key, DATASET value) {
159                return map.put(key, value);
160        }
161
162        /**
163         * Convenience method for populating a dataset by chaining method calls:
164         *
165         * <pre>
166         * final MapBackedDataset&lt;String, ListDataset&lt;String&gt;, String&gt; ds = new MapBackedDataset&lt;String, ListDataset&lt;String&gt;, String&gt;()
167         *              .add(&quot;A&quot;, new ListBackedDataset&lt;String&gt;())
168         *              .add(&quot;B&quot;, new ListBackedDataset&lt;String&gt;());
169         * </pre>
170         *
171         * @param key
172         *            the key to insert
173         * @param dataset
174         *            the value to insert
175         * @return this dataset
176         */
177        public MapBackedDataset<KEY, DATASET, INSTANCE> add(KEY key, DATASET dataset) {
178                this.put(key, dataset);
179                return this;
180        }
181
182        /**
183         * Convenience method to construct a {@link MapBackedDataset} from a number
184         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
185         * and the key is the identifier returned by {@link Identifiable#getID()}.
186         *
187         * @param datasets
188         *            the datasets representing the groups
189         * @return the newly constructed grouped dataset.
190         */
191        @SafeVarargs
192        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
193        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET... datasets)
194        {
195                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
196
197                for (final DATASET d : datasets) {
198                        ds.put(d.getID(), d);
199                }
200
201                return ds;
202        }
203
204        /**
205         * A builder for creating {@link MapBackedDataset} instances from
206         * {@link Identifiable} sub-datasets. Example:
207         *
208         * <pre>
209         * final MapBackedDataset<String, VFSListDataset<String>, String> ds = new MapBackedDataset.IdentifiableBuilder<VFSListDataset<String>, String>()
210         *                                      .add(new VFSListDataset<String>(...))
211         *                                      .add(new VFSListDataset<String>(...))
212         *                                      .build();
213         * </pre>
214         *
215         * For small {@link MapBackedDataset}s, the <tt>MapBackedDataset.of()</tt>
216         * methods are even more convenient.
217         * <p>
218         * Builder instances can be reused - it is safe to call {@link #build()}
219         * multiple times to build multiple maps in series. Each map is a superset
220         * of the maps created before it.
221         *
222         *
223         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
224         *
225         * @param <DATASET>
226         *            Type of sub-datasets.
227         * @param <INSTANCE>
228         *            Type of objects in the dataset
229         */
230        public static class IdentifiableBuilder<DATASET extends Dataset<INSTANCE> & Identifiable, INSTANCE> {
231                MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
232
233                /**
234                 * Add the sub-dataset such that it becomes a group in the
235                 * {@link MapBackedDataset} returned by {@link #build()} where the key
236                 * is the identifier returned by {@link Identifiable#getID()}.
237                 * <p>
238                 * If duplicate keys (i.e. sub-datasets with duplicate identifiers) are
239                 * added, only the last one will appear in the resultant dataset
240                 * produced by {@link #build()}.
241                 *
242                 * @param dataset
243                 *            the sub-dataset to add
244                 * @return the builder
245                 */
246                public IdentifiableBuilder<DATASET, INSTANCE> add(DATASET dataset) {
247                        ds.put(dataset.getID(), dataset);
248
249                        return this;
250                }
251
252                /**
253                 * Returns a newly-created {@link MapBackedDataset}.
254                 *
255                 * @return a newly-created {@link MapBackedDataset}.
256                 */
257                public MapBackedDataset<String, DATASET, INSTANCE> build() {
258                        return new MapBackedDataset<String, DATASET, INSTANCE>(ds);
259                }
260        }
261
262        /**
263         * Returns a new builder. The generated builder is equivalent to the builder
264         * created by the {@link IdentifiableBuilder#IdentifiableBuilder()}
265         * constructor.
266         *
267         * @return a new builder.
268         */
269        public static <DATASET extends Dataset<INSTANCE> & Identifiable, INSTANCE>
270        IdentifiableBuilder<DATASET, INSTANCE> builder()
271        {
272                return new IdentifiableBuilder<DATASET, INSTANCE>();
273        }
274
275        /**
276         * Convenience method to construct a {@link MapBackedDataset} from a number
277         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
278         * and the key is the identifier returned by {@link Identifiable#getID()}.
279         *
280         * @param d1
281         *            first dataset
282         *
283         * @return the newly constructed grouped dataset.
284         */
285        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
286        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1)
287        {
288                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
289                ds.put(d1.getID(), d1);
290                return ds;
291        }
292
293        /**
294         * Convenience method to construct a {@link MapBackedDataset} from a number
295         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
296         * and the key is the identifier returned by {@link Identifiable#getID()}.
297         *
298         * @param d1
299         *            first dataset
300         * @param d2
301         *            second dataset
302         *
303         * @return the newly constructed grouped dataset.
304         */
305        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
306        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2)
307        {
308                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
309                ds.put(d1.getID(), d1);
310                ds.put(d2.getID(), d2);
311                return ds;
312        }
313
314        /**
315         * Convenience method to construct a {@link MapBackedDataset} from a number
316         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
317         * and the key is the identifier returned by {@link Identifiable#getID()}.
318         *
319         * @param d1
320         *            first dataset
321         * @param d2
322         *            second dataset
323         * @param d3
324         *            third dataset
325         *
326         * @return the newly constructed grouped dataset.
327         */
328        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
329        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3)
330        {
331                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
332                ds.put(d1.getID(), d1);
333                ds.put(d2.getID(), d2);
334                ds.put(d3.getID(), d3);
335                return ds;
336        }
337
338        /**
339         * Convenience method to construct a {@link MapBackedDataset} from a number
340         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
341         * and the key is the identifier returned by {@link Identifiable#getID()}.
342         *
343         * @param d1
344         *            first dataset
345         * @param d2
346         *            second dataset
347         * @param d3
348         *            third dataset
349         * @param d4
350         *            forth dataset
351         * @return the newly constructed grouped dataset.
352         */
353        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
354        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3, DATASET d4)
355        {
356                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
357                ds.put(d1.getID(), d1);
358                ds.put(d2.getID(), d2);
359                ds.put(d3.getID(), d3);
360                ds.put(d4.getID(), d4);
361                return ds;
362        }
363
364        /**
365         * Convenience method to construct a {@link MapBackedDataset} from a number
366         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
367         * and the key is the identifier returned by {@link Identifiable#getID()}.
368         *
369         * @param d1
370         *            first dataset
371         * @param d2
372         *            second dataset
373         * @param d3
374         *            third dataset
375         * @param d4
376         *            forth dataset
377         * @param d5
378         *            fifth dataset
379         * @return the newly constructed grouped dataset.
380         */
381        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
382        MapBackedDataset<String, DATASET, INSTANCE> of(DATASET d1, DATASET d2, DATASET d3, DATASET d4, DATASET d5)
383        {
384                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
385                ds.put(d1.getID(), d1);
386                ds.put(d2.getID(), d2);
387                ds.put(d3.getID(), d3);
388                ds.put(d4.getID(), d4);
389                ds.put(d5.getID(), d5);
390                return ds;
391        }
392
393        /**
394         * Convenience method to construct a {@link MapBackedDataset} from a number
395         * of {@link Identifiable} sub-datasets. Each sub-dataset becomes a group,
396         * and the key is the identifier returned by {@link Identifiable#getID()}.
397         *
398         * @param datasets
399         *            the datasets representing the groups
400         * @return the newly constructed grouped dataset.
401         */
402        public static <INSTANCE, DATASET extends Dataset<INSTANCE> & Identifiable>
403        MapBackedDataset<String, DATASET, INSTANCE> of(Collection<DATASET> datasets)
404        {
405                final MapBackedDataset<String, DATASET, INSTANCE> ds = new MapBackedDataset<String, DATASET, INSTANCE>();
406
407                for (final DATASET d : datasets) {
408                        ds.put(d.getID(), d);
409                }
410
411                return ds;
412        }
413}