Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.experiment.dataset.util;
031
032import java.util.ArrayList;
033import java.util.List;
034import java.util.Map;
035
036import org.openimaj.data.dataset.Dataset;
037import org.openimaj.data.dataset.GroupedDataset;
038import org.openimaj.data.dataset.ListBackedDataset;
039import org.openimaj.data.dataset.ListDataset;
040import org.openimaj.data.dataset.MapBackedDataset;
041
042/**
043 * Helper methods to provide different types of view on a dataset.
044 *
045 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
046 */
047public class DatasetAdaptors {
048        /**
049         * Create a {@link List} view of the given dataset. If the dataset is a
050         * {@link ListDataset} it is returned, otherwise this method creates a new
051         * {@link List} containing all the instances in the dataset. The list is
052         * populated by iterating through the dataset.
053         *
054         * @param <INSTANCE>
055         *            The type of instances in the dataset
056         * @param dataset
057         *            The dataset.
058         * @return a list of all instances.
059         */
060        public static <INSTANCE> List<INSTANCE> asList(final Dataset<INSTANCE> dataset) {
061                if (dataset instanceof ListDataset)
062                        return (ListDataset<INSTANCE>) dataset;
063
064                final ArrayList<INSTANCE> list = new ArrayList<INSTANCE>();
065
066                for (final INSTANCE instance : dataset)
067                        list.add(instance);
068
069                return list;
070        }
071
072        /**
073         * if you have a grouped dataset where the groups contains lists of feature
074         * objects (i.e. GroupedDataset&lt;KEY,ListDataset&lt;List&lt;INSTANCE&gt;&gt;,INSTANCE&gt;)
075         * then this will flatten those internal list, so that all the instances
076         * from those lists are directly associated with the key. This type of thing
077         * might occur if your dataset element reader can extract multiple media
078         * parts from a single dataset item, that will all end up with the same key.
079         *
080         * @param dataset
081         *            The dataset
082         * @return The new dataset
083         */
084        public static <ANN, INSTANCE> GroupedDataset<ANN, ListDataset<INSTANCE>, INSTANCE>
085                        flattenListGroupedDataset(
086                                        final GroupedDataset<ANN, ? extends ListDataset<List<INSTANCE>>, ? extends List<INSTANCE>> dataset)
087        {
088                // Create a grouped dataset without the lists
089                final MapBackedDataset<ANN, ListDataset<INSTANCE>, INSTANCE> g =
090                                new MapBackedDataset<ANN, ListDataset<INSTANCE>, INSTANCE>();
091
092                // Go through each of the groups...
093                for (final ANN a : dataset.getGroups())
094                {
095                        // Get the group
096                        final ListDataset<? extends List<INSTANCE>> l = dataset.getInstances(a);
097
098                        // Add each of the instances in that dataset to a new list dataset
099                        final ListBackedDataset<INSTANCE> newListDataset = new ListBackedDataset<INSTANCE>();
100                        for (final List<INSTANCE> le : l)
101                                for (final INSTANCE ll : le)
102                                        newListDataset.add(ll);
103
104                        // Put that list dataset straight into the new grouped dataset.
105                        g.add(a, newListDataset);
106                }
107
108                return g;
109        }
110
111        /**
112         * Takes a grouped dataset and returns a new dataset that contains only
113         * those groups specified. If the given groups do not exist in the provided
114         * dataset, then they will be ignored.
115         *
116         * @param data
117         *            The dataset to take the groups from
118         * @param groups
119         *            The groups to take
120         * @return the new dataset containing only those groups.
121         */
122        @SafeVarargs
123        public static <ANN, DATASET extends Dataset<INSTANCE>, INSTANCE> GroupedDataset<ANN, DATASET, INSTANCE>
124                        getGroupedDatasetSubset(final GroupedDataset<ANN, DATASET, INSTANCE> data, final ANN... groups)
125        {
126                // New dataset
127                final MapBackedDataset<ANN, DATASET, INSTANCE> newDataset = new MapBackedDataset<ANN, DATASET, INSTANCE>();
128
129                // Loop through each of the groups specified...
130                for (final ANN group : groups)
131                {
132                        // Copy the dataset into the new dataset (if it's not null)
133                        final DATASET ds = data.getInstances(group);
134                        if (ds != null)
135                                newDataset.put(group, ds);
136                }
137
138                return newDataset;
139        }
140
141        /**
142         * Takes a grouped dataset and returns a new dataset with the groups
143         * re-shuffled as specified in the regrouping criteria.
144         *
145         * The regrouping criteria is a map from new group name to old group name.
146         * Instances in the old group names will be mapped to the new group names.
147         *
148         * Where many old groups map to a single new group, the groups will be
149         * merged.
150         *
151         * For example:
152         *
153         * <pre>
154         * <code>
155         *      old == GroupedDataset: {G1=[1,2,3],G2=[4,5,6],G3=[7,8,9]}
156         * 
157         *      new = getGroupedDatasetSubset( old, {A-&gt;[G1,G3],B-&gt;[G2]} )
158         * 
159         *      new == GroupedDataset: {A=[1,2,3,7,8,9],B=[4,5,6]}
160         *      </code>
161         * </pre>
162         *
163         * If the given groups do not exist in the provided dataset, then they will
164         * be ignored.
165         *
166         * @param data
167         *            The dataset to take the groups from
168         * @param regroupCriteria
169         *            The regrouping criteria
170         * @return the new dataset containing the new regrouping.
171         */
172        public static <ANN, DATASET extends ListDataset<INSTANCE>, INSTANCE>
173                        GroupedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE>
174                        getRegroupedDataset(final GroupedDataset<ANN, DATASET, INSTANCE> data, final Map<ANN, ANN[]> regroupCriteria)
175        {
176                // New dataset
177                final MapBackedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE> newDataset =
178                                new MapBackedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE>();
179
180                // Loop through each of the new groups specified...
181                for (final ANN newGroup : regroupCriteria.keySet())
182                {
183                        for (final ANN oldGroup : regroupCriteria.get(newGroup))
184                        {
185                                // Copy the dataset into the new dataset (if it's not null)
186                                final DATASET ds = data.getInstances(oldGroup);
187                                if (ds != null)
188                                {
189                                        // Create a new list backed dataset (which we know we can
190                                        // write to)...
191                                        final ListBackedDataset<INSTANCE> lbd = new ListBackedDataset<INSTANCE>();
192                                        lbd.addAll(ds);
193
194                                        // We merge the groups if there's already one in our new
195                                        // dataset
196                                        if (newDataset.get(newGroup) != null)
197                                                newDataset.get(newGroup).addAll(lbd);
198                                        else
199                                                newDataset.put(newGroup, lbd);
200                                }
201                        }
202                }
203
204                return newDataset;
205        }
206}