001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.experiment.dataset.util; 031 032import java.util.ArrayList; 033import java.util.List; 034import java.util.Map; 035 036import org.openimaj.data.dataset.Dataset; 037import org.openimaj.data.dataset.GroupedDataset; 038import org.openimaj.data.dataset.ListBackedDataset; 039import org.openimaj.data.dataset.ListDataset; 040import org.openimaj.data.dataset.MapBackedDataset; 041 042/** 043 * Helper methods to provide different types of view on a dataset. 044 * 045 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 046 */ 047public class DatasetAdaptors { 048 /** 049 * Create a {@link List} view of the given dataset. If the dataset is a 050 * {@link ListDataset} it is returned, otherwise this method creates a new 051 * {@link List} containing all the instances in the dataset. The list is 052 * populated by iterating through the dataset. 053 * 054 * @param <INSTANCE> 055 * The type of instances in the dataset 056 * @param dataset 057 * The dataset. 058 * @return a list of all instances. 059 */ 060 public static <INSTANCE> List<INSTANCE> asList(final Dataset<INSTANCE> dataset) { 061 if (dataset instanceof ListDataset) 062 return (ListDataset<INSTANCE>) dataset; 063 064 final ArrayList<INSTANCE> list = new ArrayList<INSTANCE>(); 065 066 for (final INSTANCE instance : dataset) 067 list.add(instance); 068 069 return list; 070 } 071 072 /** 073 * if you have a grouped dataset where the groups contains lists of feature 074 * objects (i.e. GroupedDataset<KEY,ListDataset<List<INSTANCE>>,INSTANCE>) 075 * then this will flatten those internal list, so that all the instances 076 * from those lists are directly associated with the key. This type of thing 077 * might occur if your dataset element reader can extract multiple media 078 * parts from a single dataset item, that will all end up with the same key. 079 * 080 * @param dataset 081 * The dataset 082 * @return The new dataset 083 */ 084 public static <ANN, INSTANCE> GroupedDataset<ANN, ListDataset<INSTANCE>, INSTANCE> 085 flattenListGroupedDataset( 086 final GroupedDataset<ANN, ? extends ListDataset<List<INSTANCE>>, ? extends List<INSTANCE>> dataset) 087 { 088 // Create a grouped dataset without the lists 089 final MapBackedDataset<ANN, ListDataset<INSTANCE>, INSTANCE> g = 090 new MapBackedDataset<ANN, ListDataset<INSTANCE>, INSTANCE>(); 091 092 // Go through each of the groups... 093 for (final ANN a : dataset.getGroups()) 094 { 095 // Get the group 096 final ListDataset<? extends List<INSTANCE>> l = dataset.getInstances(a); 097 098 // Add each of the instances in that dataset to a new list dataset 099 final ListBackedDataset<INSTANCE> newListDataset = new ListBackedDataset<INSTANCE>(); 100 for (final List<INSTANCE> le : l) 101 for (final INSTANCE ll : le) 102 newListDataset.add(ll); 103 104 // Put that list dataset straight into the new grouped dataset. 105 g.add(a, newListDataset); 106 } 107 108 return g; 109 } 110 111 /** 112 * Takes a grouped dataset and returns a new dataset that contains only 113 * those groups specified. If the given groups do not exist in the provided 114 * dataset, then they will be ignored. 115 * 116 * @param data 117 * The dataset to take the groups from 118 * @param groups 119 * The groups to take 120 * @return the new dataset containing only those groups. 121 */ 122 @SafeVarargs 123 public static <ANN, DATASET extends Dataset<INSTANCE>, INSTANCE> GroupedDataset<ANN, DATASET, INSTANCE> 124 getGroupedDatasetSubset(final GroupedDataset<ANN, DATASET, INSTANCE> data, final ANN... groups) 125 { 126 // New dataset 127 final MapBackedDataset<ANN, DATASET, INSTANCE> newDataset = new MapBackedDataset<ANN, DATASET, INSTANCE>(); 128 129 // Loop through each of the groups specified... 130 for (final ANN group : groups) 131 { 132 // Copy the dataset into the new dataset (if it's not null) 133 final DATASET ds = data.getInstances(group); 134 if (ds != null) 135 newDataset.put(group, ds); 136 } 137 138 return newDataset; 139 } 140 141 /** 142 * Takes a grouped dataset and returns a new dataset with the groups 143 * re-shuffled as specified in the regrouping criteria. 144 * 145 * The regrouping criteria is a map from new group name to old group name. 146 * Instances in the old group names will be mapped to the new group names. 147 * 148 * Where many old groups map to a single new group, the groups will be 149 * merged. 150 * 151 * For example: 152 * 153 * <pre> 154 * <code> 155 * old == GroupedDataset: {G1=[1,2,3],G2=[4,5,6],G3=[7,8,9]} 156 * 157 * new = getGroupedDatasetSubset( old, {A->[G1,G3],B->[G2]} ) 158 * 159 * new == GroupedDataset: {A=[1,2,3,7,8,9],B=[4,5,6]} 160 * </code> 161 * </pre> 162 * 163 * If the given groups do not exist in the provided dataset, then they will 164 * be ignored. 165 * 166 * @param data 167 * The dataset to take the groups from 168 * @param regroupCriteria 169 * The regrouping criteria 170 * @return the new dataset containing the new regrouping. 171 */ 172 public static <ANN, DATASET extends ListDataset<INSTANCE>, INSTANCE> 173 GroupedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE> 174 getRegroupedDataset(final GroupedDataset<ANN, DATASET, INSTANCE> data, final Map<ANN, ANN[]> regroupCriteria) 175 { 176 // New dataset 177 final MapBackedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE> newDataset = 178 new MapBackedDataset<ANN, ListBackedDataset<INSTANCE>, INSTANCE>(); 179 180 // Loop through each of the new groups specified... 181 for (final ANN newGroup : regroupCriteria.keySet()) 182 { 183 for (final ANN oldGroup : regroupCriteria.get(newGroup)) 184 { 185 // Copy the dataset into the new dataset (if it's not null) 186 final DATASET ds = data.getInstances(oldGroup); 187 if (ds != null) 188 { 189 // Create a new list backed dataset (which we know we can 190 // write to)... 191 final ListBackedDataset<INSTANCE> lbd = new ListBackedDataset<INSTANCE>(); 192 lbd.addAll(ds); 193 194 // We merge the groups if there's already one in our new 195 // dataset 196 if (newDataset.get(newGroup) != null) 197 newDataset.get(newGroup).addAll(lbd); 198 else 199 newDataset.put(newGroup, lbd); 200 } 201 } 202 } 203 204 return newDataset; 205 } 206}