001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.experiment.dataset.sampling; 031 032import java.util.List; 033 034import org.openimaj.data.RandomData; 035import org.openimaj.data.dataset.ListBackedDataset; 036import org.openimaj.data.dataset.ListDataset; 037import org.openimaj.experiment.dataset.util.DatasetAdaptors; 038import org.openimaj.util.list.AcceptingListView; 039import org.openimaj.util.list.SkippingListView; 040 041/** 042 * A uniformly random sampling scheme for {@link ListDataset}s. Both sampling 043 * with and without replacement are supported. The sampler returns a "view" on 044 * top of the input dataset that selects a predefined fraction of the data. 045 * 046 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 047 * 048 * @param <INSTANCE> 049 * Type of instances 050 */ 051public class UniformRandomisedSampler<INSTANCE> implements Sampler<ListDataset<INSTANCE>> { 052 private boolean withReplacement = false; 053 054 // this is overloaded to hold either a percentage or number of instances. 055 // Percentages are stored in the range 0..1; numbers are stored as -number. 056 private double percentage; 057 058 /** 059 * Construct a {@link UniformRandomisedSampler} with the given percentage of 060 * instances to select. By default, the sampling is without replacement 061 * (i.e. an instance can only be selected once). 062 * 063 * @param percentage 064 * percentage of instances to select 065 */ 066 public UniformRandomisedSampler(double percentage) { 067 if (percentage < 0 || percentage > 1) 068 throw new IllegalArgumentException("percentage of sample instances must be between 0 and 1"); 069 070 this.percentage = percentage; 071 } 072 073 /** 074 * Construct a {@link UniformRandomisedSampler} with the given percentage of 075 * instances to select, using with with-replacement or without-replacement 076 * sampling. 077 * 078 * @param percentage 079 * percentage of instances to select 080 * @param withReplacement 081 * should the sampling be performed with replacement (true); or 082 * without replacement (false). 083 */ 084 public UniformRandomisedSampler(double percentage, boolean withReplacement) { 085 this(percentage); 086 this.withReplacement = withReplacement; 087 } 088 089 /** 090 * Construct a {@link UniformRandomisedSampler} with the given number of 091 * instances to select. By default, the sampling is without replacement 092 * (i.e. an instance can only be selected once). 093 * 094 * @param number 095 * number of instances to select 096 */ 097 public UniformRandomisedSampler(int number) { 098 if (number < 1) 099 throw new IllegalArgumentException("number of sample instances must be bigger than 0"); 100 101 this.percentage = -number; 102 } 103 104 /** 105 * Construct a {@link UniformRandomisedSampler} with the given number of 106 * instances to select, using with with-replacement or without-replacement 107 * sampling. 108 * 109 * @param number 110 * number of instances to select 111 * @param withReplacement 112 * should the sampling be performed with replacement (true); or 113 * without replacement (false). 114 */ 115 public UniformRandomisedSampler(int number, boolean withReplacement) { 116 this(number); 117 this.withReplacement = withReplacement; 118 } 119 120 @Override 121 public ListDataset<INSTANCE> sample(ListDataset<INSTANCE> dataset) { 122 final boolean skip; 123 final int N; 124 if (percentage >= 0) { 125 // if we want more than 50%, it's better to select 1-percentage 126 // indexes to skip 127 skip = percentage > 0.5; 128 final double per = skip ? 1.0 - percentage : percentage; 129 130 N = (int) Math.round(dataset.size() * per); 131 } else { 132 N = (int) -percentage; 133 skip = N > (dataset.size() / 2); 134 } 135 136 int[] selectedIds; 137 if (withReplacement) { 138 selectedIds = RandomData.getRandomIntArray(N, 0, dataset.size()); 139 } else { 140 selectedIds = RandomData.getUniqueRandomInts(N, 0, dataset.size()); 141 } 142 143 final List<INSTANCE> listView = DatasetAdaptors.asList(dataset); 144 145 if (!skip) { 146 return new ListBackedDataset<INSTANCE>(new AcceptingListView<INSTANCE>(listView, selectedIds)); 147 } 148 149 return new ListBackedDataset<INSTANCE>(new SkippingListView<INSTANCE>(listView, selectedIds)); 150 } 151}