001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.annotation.utils; 031 032import gnu.trove.list.array.TIntArrayList; 033 034import java.util.AbstractList; 035import java.util.HashMap; 036import java.util.HashSet; 037import java.util.List; 038import java.util.Map; 039import java.util.Set; 040 041import org.openimaj.feature.FeatureExtractor; 042import org.openimaj.ml.annotation.Annotated; 043 044 045/** 046 * Helper class for dealing with lists of annotated objects, 047 * and specifically getting objects by class and determining 048 * the set of annotations. 049 * <p> 050 * Because it might not be practical to hold all the items 051 * in the list in memory at once, the implementation only stores 052 * the index of each item, and performs an indirect lookup 053 * as required. This does mean that once you've passed the list 054 * to the constructor, you shouldn't modify it as doing so 055 * will invalidate the index. The constructor will make one 056 * single pass through all the objects in order to build the 057 * index. 058 * 059 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 060 * 061 * @param <OBJECT> The type of object 062 * @param <ANNOTATION> The type of annotation 063 */ 064public class AnnotatedListHelper<OBJECT, ANNOTATION> { 065 Map<ANNOTATION, TIntArrayList> index = new HashMap<ANNOTATION, TIntArrayList>(); 066 List<? extends Annotated<OBJECT, ANNOTATION>> data; 067 068 /** 069 * Construct the {@link AnnotatedListHelper} with the given list. 070 * @param list the list 071 */ 072 public AnnotatedListHelper(List<? extends Annotated<OBJECT, ANNOTATION>> list) { 073 this.data = list; 074 075 for (int i=0; i<list.size(); i++) { 076 Annotated<OBJECT, ANNOTATION> item = list.get(i); 077 078 //only want to add one index/annotation, so make a set 079 Set<ANNOTATION> annotations = new HashSet<ANNOTATION>(item.getAnnotations()); 080 081 for (ANNOTATION annotation : annotations) { 082 TIntArrayList indices = index.get(annotation); 083 084 if (indices == null) index.put(annotation, indices = new TIntArrayList()); 085 086 indices.add(i); 087 } 088 } 089 } 090 091 /** 092 * Retrieve all the items from the data that have a specific 093 * annotation. 094 * 095 * @param annotation the annotation to search for. 096 * @return a read-only list of annotated objects with the given annotation. 097 */ 098 public List<Annotated<OBJECT, ANNOTATION>> get(final ANNOTATION annotation) { 099 if (!index.containsKey(annotation)) 100 return null; 101 102 return new AbstractList<Annotated<OBJECT,ANNOTATION>>() { 103 TIntArrayList indices = index.get(annotation); 104 105 @Override 106 public Annotated<OBJECT, ANNOTATION> get(int index) { 107 return data.get(indices.get(index)); 108 } 109 110 @Override 111 public int size() { 112 return indices.size(); 113 } 114 }; 115 } 116 117 /** 118 * Get the set of all known annotations 119 * @return the set of known annotations 120 */ 121 public Set<ANNOTATION> getAnnotations() { 122 return index.keySet(); 123 } 124 125 /** 126 * Extract the features corresponding to a specific annotation. 127 * This method doesn't actually perform the extraction, rather 128 * the returned list will perform the extraction when 129 * {@link List#get(int)} is called. The returned list doesn't perform 130 * any kind of caching, so calling get multiple times on 131 * the same object will result in the features being extracted 132 * multiple times. 133 * <p> 134 * If you need to convert the list to a cached variety, you can write: 135 * <code> 136 * List<F> cached = new ArrayList<F>(alh.extractFeatures(annotation, extractor)); 137 * </code> 138 * 139 * @param <FEATURE> The type of feature 140 * @param annotation the annotation 141 * @param extractor the feature extractor 142 * @return the list of features. 143 */ 144 public <FEATURE> List<FEATURE> extractFeatures(final ANNOTATION annotation, final FeatureExtractor<FEATURE, OBJECT> extractor) { 145 if (!index.containsKey(annotation)) 146 return null; 147 148 return new AbstractList<FEATURE>() { 149 TIntArrayList indices = index.get(annotation); 150 151 @Override 152 public FEATURE get(int index) { 153 return extractor.extractFeature( data.get(indices.get(index)).getObject() ); 154 } 155 156 @Override 157 public int size() { 158 return indices.size(); 159 } 160 }; 161 } 162 163 /** 164 * Extract the features corresponding to everything EXCEPT 165 * the specific given annotation. 166 * <p> 167 * This method doesn't actually perform the extraction, rather 168 * the returned list will perform the extraction when 169 * {@link List#get(int)} is called. The returned list doesn't perform 170 * any kind of caching, so calling get multiple times on 171 * the same object will result in the features being extracted 172 * multiple times. 173 * <p> 174 * If you need to convert the list to a cached variety, you can write: 175 * <code> 176 * List<F> cached = new ArrayList<F>(alh.extractFeatures(annotation, extractor)); 177 * </code> 178 * 179 * @param <FEATURE> The type of feature 180 * @param annotation the annotation to exclude 181 * @param extractor the feature extractor 182 * @return the list of features. 183 */ 184 public <FEATURE> List<FEATURE> extractFeaturesExclude(final ANNOTATION annotation, final FeatureExtractor<FEATURE, OBJECT> extractor) { 185 final TIntArrayList excludedIndices = index.get(annotation); 186 final TIntArrayList selectedIndices = new TIntArrayList(data.size() - excludedIndices.size()); 187 188 for (int i=0; i<this.data.size(); i++) { 189 if (excludedIndices.binarySearch(i) < 0) 190 selectedIndices.add(i); 191 } 192 193 return new AbstractList<FEATURE>() { 194 @Override 195 public FEATURE get(int index) { 196 return extractor.extractFeature( data.get(selectedIndices.get(index)).getObject() ); 197 } 198 199 @Override 200 public int size() { 201 return selectedIndices.size(); 202 } 203 }; 204 } 205}