001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/* 031 * SetRetrievalComparator.java 032 * 033 * Created on November 30, 2006, 4:56 PM 034 * 035 * To change this template, choose Tools | Template Manager 036 * and open the template in the editor. 037 */ 038 039package org.lemurproject.ireval; 040 041import java.util.Map; 042import java.util.Random; 043import java.util.Set; 044import java.util.TreeSet; 045 046import org.apache.commons.math.MathException; 047import org.apache.commons.math.distribution.BinomialDistributionImpl; 048import org.apache.commons.math.distribution.TDistributionImpl; 049 050/** 051 * 052 * @author Trevor Strohman 053 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 054 */ 055public class SetRetrievalComparator { 056 double[] baseline; 057 double[] treatment; 058 059 /** 060 * Creates a new instance of SetRetrievalComparator 061 * @param baseline 062 * @param treatment 063 */ 064 public SetRetrievalComparator( Map<String, Double> baseline, Map<String, Double> treatment ) { 065 Set<String> commonQueries = new TreeSet<String>( baseline.keySet() ); 066 commonQueries.retainAll( treatment.keySet() ); 067 068 this.baseline = new double[commonQueries.size()]; 069 this.treatment = new double[commonQueries.size()]; 070 int i = 0; 071 072 for( String key : commonQueries ) { 073 this.baseline[i] = baseline.get(key); 074 this.treatment[i] = treatment.get(key); 075 i++; 076 } 077 } 078 079 private double mean( double[] numbers ) { 080 double sum = 0; 081 for( int i=0; i<numbers.length; i++ ) { 082 sum += numbers[i]; 083 } 084 085 return sum / (double) numbers.length; 086 } 087 088 /** 089 * @return mean baseline metric 090 */ 091 public double meanBaselineMetric() { 092 return mean( baseline ); 093 } 094 095 /** 096 * @return mean treatment metric 097 */ 098 public double meanTreatmentMetric() { 099 return mean( treatment ); 100 } 101 102 /** 103 * @return number of times treatment is better than baseline 104 */ 105 public int countTreatmentBetter() { 106 int better = 0; 107 108 for( int i=0; i<baseline.length; i++ ) { 109 if( baseline[i] < treatment[i] ) 110 better++; 111 } 112 113 return better; 114 } 115 116 /** 117 * @return number of times baseline is better than treatment 118 */ 119 public int countBaselineBetter() { 120 int better = 0; 121 122 for( int i=0; i<baseline.length; i++ ) { 123 if( baseline[i] > treatment[i] ) 124 better++; 125 } 126 127 return better; 128 } 129 130 /** 131 * @return number of times baseline and treatment are equal 132 */ 133 public int countEqual() { 134 int same = 0; 135 136 for( int i=0; i<baseline.length; i++ ) { 137 if( baseline[i] == treatment[i] ) 138 same++; 139 } 140 141 return same; 142 } 143 144 /** 145 * @return result of T-test between baseline and treatment 146 */ 147 public double pairedTTest() { 148 double sampleSum = 0; 149 double sampleSumSquares = 0; 150 int n = baseline.length; 151 152 for( int i=0; i<baseline.length; i++ ) { 153 double delta = treatment[i] - baseline[i]; 154 sampleSum += delta; 155 sampleSumSquares += delta*delta; 156 } 157 158 double sampleVariance = sampleSumSquares / (n - 1); 159 double sampleMean = sampleSum / baseline.length; 160 161 double sampleDeviation = Math.sqrt(sampleVariance); 162 double meanDeviation = sampleDeviation / Math.sqrt(n); 163 double t = sampleMean / meanDeviation; 164 165 try { 166 return 1.0 - new TDistributionImpl(n).cumulativeProbability(t); 167 } catch (MathException e) { 168 throw new RuntimeException(e); 169 } 170 } 171 172 /** 173 * @return result of sign test between baseline and treatment 174 */ 175 public double signTest() { 176 int treatmentIsBetter = 0; 177 int different = 0; 178 179 for( int i=0; i<treatment.length; i++ ) { 180 if( treatment[i] > baseline[i] ) 181 treatmentIsBetter++; 182 if( treatment[i] != baseline[i] ) 183 different++; 184 } 185 186 double pvalue; 187 try { 188 pvalue = 1 - new BinomialDistributionImpl(different, 0.5).cumulativeProbability(treatmentIsBetter - 1); 189 } catch (MathException e) { 190 throw new RuntimeException(e); 191 } 192 193 return pvalue; 194 } 195 196 /** 197 * @return result of randomized test between baseline and treatment 198 */ 199 public double randomizedTest() { 200 double baseMean = mean( baseline ); 201 double treatmentMean = mean( treatment ); 202 double difference = treatmentMean - baseMean; 203 int batch = 10000; 204 205 final int maxIterationsWithoutMatch = 1000000; 206 long iterations = 0; 207 long matches = 0; 208 209 double[] leftSample = new double[baseline.length]; 210 double[] rightSample = new double[baseline.length]; 211 Random random = new Random(); 212 double pValue = 0.0; 213 214 while(true) { 215 for( int i=0; i<batch; i++ ) { 216 // create a sample from both distributions 217 for( int j=0; j<baseline.length; j++ ) { 218 if( random.nextBoolean() ) { 219 leftSample[j] = baseline[j]; 220 rightSample[j] = treatment[j]; 221 } else { 222 leftSample[j] = treatment[j]; 223 rightSample[j] = baseline[j]; 224 } 225 } 226 227 double sampleDifference = mean( leftSample ) - mean( rightSample ); 228 229 if( difference <= sampleDifference ) 230 matches++; 231 } 232 233 iterations += batch; 234 235 // this is the current p-value estimate 236 pValue = (double) matches / (double) iterations; 237 238 // if we still haven't found a match, keep looking 239 if( matches == 0 ) { 240 if( iterations < maxIterationsWithoutMatch ) { 241 continue; 242 } else { 243 break; 244 } 245 } 246 247 // this is our accepted level of deviation in the p-value; we require: 248 // - accuracy at the fourth decimal place, and 249 // - less than 5% error in the p-value, or 250 // - accuracy at the sixth decimal place. 251 252 double maxDeviation = Math.max( 0.0000005 / pValue, Math.min( 0.00005 / pValue, 0.05 ) ); 253 254 // this estimate is derived in Efron and Tibshirani, p.209. 255 // this is the estimated number of iterations necessary for convergence, given 256 // our current p-value estimate. 257 double estimatedIterations = Math.sqrt( pValue * (1.0 - pValue) ) / maxDeviation; 258 259 // if( estimatedIterations > iterations ) 260 if( iterations > estimatedIterations ) 261 break; 262 263 } 264 265 return pValue; 266 } 267}