001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.math.model.fit; 031 032import java.util.ArrayList; 033import java.util.Arrays; 034import java.util.List; 035 036import org.apache.commons.math3.distribution.ChiSquaredDistribution; 037import org.openimaj.citation.annotation.Reference; 038import org.openimaj.citation.annotation.ReferenceType; 039import org.openimaj.math.model.EstimatableModel; 040import org.openimaj.math.model.fit.residuals.ResidualCalculator; 041import org.openimaj.math.util.DoubleArrayStatsUtils; 042import org.openimaj.util.CollectionSampler; 043import org.openimaj.util.UniformSampler; 044import org.openimaj.util.pair.IndependentPair; 045 046/** 047 * Least Median of Squares robust model fitting 048 * 049 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 050 * 051 * @param <I> 052 * type of independent data 053 * @param <D> 054 * type of dependent data 055 * @param <M> 056 * concrete type of model learned 057 */ 058@Reference( 059 type = ReferenceType.Article, 060 author = { "Peter J. Rousseeuw" }, 061 title = "Least Median of Squares Regression", 062 year = "1984", 063 journal = "Journal of the American Statistical Association", 064 pages = { "871", "", "880" }, 065 url = "http://www.jstor.org/stable/2288718", 066 month = "December", 067 number = "388", 068 volume = "79") 069public class LMedS<I, D, M extends EstimatableModel<I, D>> implements RobustModelFitting<I, D, M> { 070 /** 071 * Probability that at least one of the samples drawn is good 072 */ 073 double probability = 0.99; 074 075 /** 076 * The level of inlier noise (standard deviation of Gaussian noise inherent 077 * in the data). Set to -1 if unknown. 078 */ 079 double inlierNoiseLevel = -1; 080 081 /** 082 * Expected proportion of outliers 083 */ 084 double outlierProportion = 0.4; 085 086 /** 087 * Number of degrees of freedom of the model; used in conjunction with 088 * inlierNoiseLevel if set to estimate the inliers. 089 */ 090 private double degreesOfFreedom; 091 092 protected ResidualCalculator<I, D, M> residualEstimator; 093 094 protected boolean improveEstimate; 095 096 protected M model; 097 protected M bestModel; 098 protected List<IndependentPair<I, D>> inliers = new ArrayList<IndependentPair<I, D>>(); 099 protected List<IndependentPair<I, D>> outliers = new ArrayList<IndependentPair<I, D>>(); 100 protected CollectionSampler<IndependentPair<I, D>> sampler; 101 private double bestMedianError; 102 103 /** 104 * Construct with the given model and residual calculator. The proportion of 105 * outliers is assumed to be 0.4. The threshold for determining inliers and 106 * outliers is automatically computed. Uniform random sampling is used for 107 * creating the subsets. 108 * 109 * @param model 110 * the model to estimate 111 * @param residualEstimator 112 * the algorithm to compute residuals of the model 113 * @param impEst 114 * True if we want to perform a final fitting of the model with 115 * all inliers, false otherwise 116 */ 117 public LMedS(M model, ResidualCalculator<I, D, M> residualEstimator, boolean impEst) { 118 this(model, residualEstimator, impEst, new UniformSampler<IndependentPair<I, D>>()); 119 } 120 121 /** 122 * Construct with the given model, residual calculator and estimated 123 * proportion of outliers. The threshold for determining inliers and 124 * outliers is automatically computed. Uniform random sampling is used for 125 * creating the subsets. 126 * 127 * @param model 128 * the model to estimate 129 * @param residualEstimator 130 * the algorithm to compute residuals of the model 131 * @param outlierProportion 132 * Expected proportion of outliers 133 * @param impEst 134 * True if we want to perform a final fitting of the model with 135 * all inliers, false otherwise 136 */ 137 public LMedS(M model, ResidualCalculator<I, D, M> residualEstimator, double outlierProportion, boolean impEst) { 138 this(model, residualEstimator, impEst); 139 this.outlierProportion = outlierProportion; 140 } 141 142 /** 143 * Construct with the given model, residual calculator and estimated 144 * proportion of outliers. The given inlier noise level and number of 145 * degrees of freedom of the model are used to estimate the optimal 146 * threshold for determining inliers versus outliers. Uniform random 147 * sampling is used for creating the subsets. 148 * 149 * @param model 150 * the model to estimate 151 * @param residualEstimator 152 * the algorithm to compute residuals of the model 153 * @param outlierProportion 154 * Expected proportion of outliers 155 * @param inlierNoiseLevel 156 * The level of inlier noise (standard deviation of Gaussian 157 * noise inherent in the data). 158 * @param degreesOfFreedom 159 * Number of degrees of freedom of the model 160 * @param impEst 161 * True if we want to perform a final fitting of the model with 162 * all inliers, false otherwise 163 */ 164 public LMedS(M model, ResidualCalculator<I, D, M> residualEstimator, double outlierProportion, 165 double inlierNoiseLevel, double degreesOfFreedom, boolean impEst) 166 { 167 this(model, residualEstimator, outlierProportion, impEst); 168 this.inlierNoiseLevel = inlierNoiseLevel; 169 this.degreesOfFreedom = degreesOfFreedom; 170 } 171 172 /** 173 * Construct with the given model and residual calculator. The proportion of 174 * outliers is assumed to be 0.4. The threshold for determining inliers and 175 * outliers is automatically computed. 176 * 177 * @param model 178 * the model to estimate 179 * @param residualEstimator 180 * the algorithm to compute residuals of the model 181 * @param impEst 182 * True if we want to perform a final fitting of the model with 183 * all inliers, false otherwise 184 * @param sampler 185 * the sampling algorithm for selecting random subsets 186 */ 187 @SuppressWarnings("unchecked") 188 public LMedS(M model, ResidualCalculator<I, D, M> residualEstimator, boolean impEst, 189 CollectionSampler<IndependentPair<I, D>> sampler) 190 { 191 this.model = model; 192 this.residualEstimator = residualEstimator; 193 this.bestModel = (M) model.clone(); 194 this.improveEstimate = impEst; 195 this.sampler = sampler; 196 } 197 198 /** 199 * Construct with the given model, residual calculator and estimated 200 * proportion of outliers. The threshold for determining inliers and 201 * outliers is automatically computed. 202 * 203 * @param model 204 * the model to estimate 205 * @param residualEstimator 206 * the algorithm to compute residuals of the model 207 * @param outlierProportion 208 * Expected proportion of outliers 209 * @param impEst 210 * True if we want to perform a final fitting of the model with 211 * all inliers, false otherwise 212 * @param sampler 213 * the sampling algorithm for selecting random subsets 214 */ 215 public LMedS(M model, ResidualCalculator<I, D, M> residualEstimator, double outlierProportion, boolean impEst, 216 CollectionSampler<IndependentPair<I, D>> sampler) 217 { 218 this(model, residualEstimator, impEst, sampler); 219 this.outlierProportion = outlierProportion; 220 } 221 222 /** 223 * Construct with the given model, residual calculator and estimated 224 * proportion of outliers. The given inlier noise level and number of 225 * degrees of freedom of the model are used to estimate the optimal 226 * threshold for determining inliers versus outliers. 227 * 228 * @param model 229 * the model to estimate 230 * @param residualEstimator 231 * the algorithm to compute residuals of the model 232 * @param outlierProportion 233 * Expected proportion of outliers 234 * @param inlierNoiseLevel 235 * The level of inlier noise (standard deviation of Gaussian 236 * noise inherent in the data). 237 * @param degreesOfFreedom 238 * Number of degrees of freedom of the model 239 * @param impEst 240 * True if we want to perform a final fitting of the model with 241 * all inliers, false otherwise 242 * @param sampler 243 * the sampling algorithm for selecting random subsets 244 */ 245 public LMedS(M model, ResidualCalculator<I, D, M> residualEstimator, double outlierProportion, 246 double inlierNoiseLevel, double degreesOfFreedom, boolean impEst, 247 CollectionSampler<IndependentPair<I, D>> sampler) 248 { 249 this(model, residualEstimator, outlierProportion, impEst, sampler); 250 this.inlierNoiseLevel = inlierNoiseLevel; 251 this.degreesOfFreedom = degreesOfFreedom; 252 } 253 254 @Override 255 public boolean fitData(List<? extends IndependentPair<I, D>> data) { 256 final int sampleSize = model.numItemsToEstimate(); 257 258 if (data.size() < sampleSize) 259 return false; 260 261 final int numSamples = (int) Math.ceil(Math.log(1 - probability) 262 / Math.log(1 - Math.pow(1 - outlierProportion, sampleSize))); 263 264 double[] errors = new double[data.size()]; 265 double[] bestErrors = new double[data.size()]; 266 Arrays.fill(bestErrors, Double.MAX_VALUE); 267 bestMedianError = Double.MAX_VALUE; 268 269 sampler.setCollection(data); 270 271 for (int i = 0; i < numSamples; i++) { 272 final List<? extends IndependentPair<I, D>> sample = sampler.sample(sampleSize); 273 274 if (!model.estimate(sample)) 275 continue; 276 277 residualEstimator.setModel(model); 278 residualEstimator.computeResiduals(data, errors); 279 280 final double medianError = DoubleArrayStatsUtils.median(errors); 281 if (medianError < bestMedianError) { 282 bestMedianError = medianError; 283 284 // swap working model and best model 285 final M tmp = bestModel; 286 bestModel = model; 287 model = tmp; 288 289 final double[] tmp2 = bestErrors; 290 bestErrors = errors; 291 errors = tmp2; 292 } 293 } 294 295 findInliersOutliers(data, bestErrors); 296 297 if (improveEstimate) { 298 if (!bestModel.estimate(inliers)) 299 return false; 300 } 301 302 final double outlierProp = (double) outliers.size() / (double) data.size(); 303 304 return outlierProp < this.outlierProportion; 305 } 306 307 private void findInliersOutliers(List<? extends IndependentPair<I, D>> data, double[] bestErrors) { 308 inliers.clear(); 309 outliers.clear(); 310 311 double threshold; 312 if (inlierNoiseLevel > 0) 313 threshold = inlierNoiseLevel * inlierNoiseLevel 314 * new ChiSquaredDistribution(degreesOfFreedom).inverseCumulativeProbability(probability); 315 else { 316 final double sigmahat = 1.4826 * (1 + 5 / (Math.max(1, data.size() - model.numItemsToEstimate()))) 317 * Math.sqrt(bestMedianError); 318 // http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node25.html 319 threshold = (2.5 * sigmahat) * (2.5 * sigmahat); 320 } 321 322 for (int i = 0; i < data.size(); i++) { 323 if (bestErrors[i] < threshold) 324 inliers.add(data.get(i)); 325 else 326 outliers.add(data.get(i)); 327 } 328 } 329 330 @Override 331 public M getModel() { 332 return bestModel; 333 } 334 335 @Override 336 public List<? extends IndependentPair<I, D>> getInliers() { 337 return inliers; 338 } 339 340 @Override 341 public List<? extends IndependentPair<I, D>> getOutliers() { 342 return outliers; 343 } 344 345 @Override 346 public int numItemsToEstimate() { 347 return model.numItemsToEstimate(); 348 } 349}