001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.ml.annotation.utils;
031
032import gnu.trove.list.array.TIntArrayList;
033
034import java.util.AbstractList;
035import java.util.HashMap;
036import java.util.HashSet;
037import java.util.List;
038import java.util.Map;
039import java.util.Set;
040
041import org.openimaj.feature.FeatureExtractor;
042import org.openimaj.ml.annotation.Annotated;
043
044
045/**
046 * Helper class for dealing with lists of annotated objects,
047 * and specifically getting objects by class and determining
048 * the set of annotations.
049 * <p>
050 * Because it might not be practical to hold all the items
051 * in the list in memory at once, the implementation only stores
052 * the index of each item, and performs an indirect lookup
053 * as required. This does mean that once you've passed the list
054 * to the constructor, you shouldn't modify it as doing so
055 * will invalidate the index. The constructor will make one
056 * single pass through all the objects in order to build the 
057 * index.
058 * 
059 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
060 *
061 * @param <OBJECT> The type of object
062 * @param <ANNOTATION> The type of annotation
063 */
064public class AnnotatedListHelper<OBJECT, ANNOTATION> {
065        Map<ANNOTATION, TIntArrayList> index = new HashMap<ANNOTATION, TIntArrayList>();
066        List<? extends Annotated<OBJECT, ANNOTATION>> data;
067        
068        /**
069         * Construct the {@link AnnotatedListHelper} with the given list.
070         * @param list the list
071         */
072        public AnnotatedListHelper(List<? extends Annotated<OBJECT, ANNOTATION>> list) {
073                this.data = list;
074                
075                for (int i=0; i<list.size(); i++) {
076                        Annotated<OBJECT, ANNOTATION> item = list.get(i);
077                        
078                        //only want to add one index/annotation, so make a set
079                        Set<ANNOTATION> annotations = new HashSet<ANNOTATION>(item.getAnnotations());
080                        
081                        for (ANNOTATION annotation : annotations) {
082                                TIntArrayList indices = index.get(annotation);
083                                
084                                if (indices == null) index.put(annotation, indices = new TIntArrayList());
085                                
086                                indices.add(i);
087                        }
088                }
089        }
090                
091        /**
092         * Retrieve all the items from the data that have a specific
093         * annotation.
094         * 
095         * @param annotation the annotation to search for.
096         * @return a read-only list of annotated objects with the given annotation.
097         */
098        public List<Annotated<OBJECT, ANNOTATION>> get(final ANNOTATION annotation) {
099                if (!index.containsKey(annotation))
100                        return null;
101                
102                return new AbstractList<Annotated<OBJECT,ANNOTATION>>() {
103                        TIntArrayList indices = index.get(annotation);
104                        
105                        @Override
106                        public Annotated<OBJECT, ANNOTATION> get(int index) {
107                                return data.get(indices.get(index));
108                        }
109
110                        @Override
111                        public int size() {
112                                return indices.size();
113                        }                       
114                };
115        }
116        
117        /**
118         * Get the set of all known annotations
119         * @return the set of known annotations
120         */
121        public Set<ANNOTATION> getAnnotations() {
122                return index.keySet();
123        }
124        
125        /**
126         * Extract the features corresponding to a specific annotation.
127         * This method doesn't actually perform the extraction, rather
128         * the returned list will perform the extraction when 
129         * {@link List#get(int)} is called. The returned list doesn't perform
130         * any kind of caching, so calling get multiple times on
131         * the same object will result in the features being extracted
132         * multiple times.
133         * <p>
134         * If you need to convert the list to a cached variety, you can write:
135         * <code>
136         * List<F> cached = new ArrayList<F>(alh.extractFeatures(annotation, extractor));
137         * </code>
138         * 
139         * @param <FEATURE> The type of feature
140         * @param annotation the annotation
141         * @param extractor the feature extractor
142         * @return the list of features.
143         */
144        public <FEATURE> List<FEATURE> extractFeatures(final ANNOTATION annotation, final FeatureExtractor<FEATURE, OBJECT> extractor) {
145                if (!index.containsKey(annotation))
146                        return null;
147                
148                return new AbstractList<FEATURE>() {
149                        TIntArrayList indices = index.get(annotation);
150                        
151                        @Override
152                        public FEATURE get(int index) {
153                                return extractor.extractFeature( data.get(indices.get(index)).getObject() );
154                        }
155
156                        @Override
157                        public int size() {
158                                return indices.size();
159                        }                       
160                };
161        }
162        
163        /**
164         * Extract the features corresponding to everything EXCEPT 
165         * the specific given annotation.
166         * <p>
167         * This method doesn't actually perform the extraction, rather
168         * the returned list will perform the extraction when 
169         * {@link List#get(int)} is called. The returned list doesn't perform
170         * any kind of caching, so calling get multiple times on
171         * the same object will result in the features being extracted
172         * multiple times.
173         * <p>
174         * If you need to convert the list to a cached variety, you can write:
175         * <code>
176         * List<F> cached = new ArrayList<F>(alh.extractFeatures(annotation, extractor));
177         * </code>
178         * 
179         * @param <FEATURE> The type of feature
180         * @param annotation the annotation to exclude
181         * @param extractor the feature extractor
182         * @return the list of features.
183         */
184        public <FEATURE> List<FEATURE> extractFeaturesExclude(final ANNOTATION annotation, final FeatureExtractor<FEATURE, OBJECT> extractor) {
185                final TIntArrayList excludedIndices = index.get(annotation);
186                final TIntArrayList selectedIndices = new TIntArrayList(data.size() - excludedIndices.size());
187
188                for (int i=0; i<this.data.size(); i++) {
189                        if (excludedIndices.binarySearch(i) < 0) 
190                                selectedIndices.add(i);
191                }
192                
193                return new AbstractList<FEATURE>() {
194                        @Override
195                        public FEATURE get(int index) {
196                                return extractor.extractFeature( data.get(selectedIndices.get(index)).getObject() );
197                        }
198
199                        @Override
200                        public int size() {
201                                return selectedIndices.size();
202                        }                       
203                };
204        }
205}