001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.annotation; 031 032import org.openimaj.data.dataset.GroupedDataset; 033import org.openimaj.data.dataset.ListDataset; 034import org.openimaj.ml.training.IncrementalTrainer; 035 036/** 037 * An {@link Annotator} that can be trained/updated incrementally. 038 * 039 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 040 * 041 * @param <OBJECT> 042 * Type of object 043 * @param <ANNOTATION> 044 * Type of annotation 045 */ 046public abstract class IncrementalAnnotator<OBJECT, ANNOTATION> 047 extends 048 AbstractAnnotator<OBJECT, ANNOTATION> 049 implements 050 IncrementalTrainer<Annotated<OBJECT, ANNOTATION>> 051{ 052 protected IncrementalAnnotator() { 053 } 054 055 /** 056 * Train the annotator with the given data. The default implementation of 057 * this method just calls {@link #train(Object)} on each data item. 058 * Subclasses may override to do something more intelligent if necessary. 059 * 060 * @param data 061 * the training data 062 */ 063 @Override 064 public void train(Iterable<? extends Annotated<OBJECT, ANNOTATION>> data) { 065 for (final Annotated<OBJECT, ANNOTATION> d : data) 066 train(d); 067 } 068 069 /** 070 * Train the annotator with the given grouped dataset. This method assumes 071 * that each object only appears in a <b>single</b> group of the dataset 072 * (i.e. a multi-class problem). Each group corresponds to the one single 073 * annotation assigned to each object. 074 * <p> 075 * If your dataset contains multiple labels for each object (through an 076 * object appearing in multiple groups) you should use 077 * {@link #train(GroupedDataset)}. 078 * 079 * @param dataset 080 * the dataset to train on 081 */ 082 public void trainMultiClass(GroupedDataset<ANNOTATION, ? extends ListDataset<OBJECT>, OBJECT> dataset) { 083 for (final ANNOTATION grp : dataset.getGroups()) { 084 for (final OBJECT inst : dataset.getInstances(grp)) { 085 train(new AnnotatedObject<OBJECT, ANNOTATION>(inst, grp)); 086 } 087 } 088 } 089 090 /** 091 * Train the annotator with the given grouped dataset. This method assumes 092 * that each object can appear in multiple groups of the dataset (i.e. a 093 * multi-label problem). Internally, the dataset is converted to a list 094 * containing exactly one reference to each object in the dataset with 095 * (potentially) multiple annotations. 096 * <p> 097 * If the dataset is actually multi-class (i.e. each object belongs to only 098 * a single group), then calling this method is equivalent to calling 099 * {@link #trainMultiClass(GroupedDataset)}, but is less efficient as the 100 * dataset has to be converted into a list. 101 * <p> 102 * Some annotator implementations do not care whether the data is 103 * multi-class or multi-label, and might choose to override this method to 104 * just call {@link #trainMultiClass(GroupedDataset)} instead. 105 * 106 * @param dataset 107 * the dataset to train on 108 */ 109 public void train(GroupedDataset<ANNOTATION, ? extends ListDataset<OBJECT>, OBJECT> dataset) { 110 for (final AnnotatedObject<OBJECT, ANNOTATION> ao : AnnotatedObject.createList(dataset)) { 111 train(ao); 112 } 113 } 114}