Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/*
031 * SetRetrievalComparator.java
032 *
033 * Created on November 30, 2006, 4:56 PM
034 *
035 * To change this template, choose Tools | Template Manager
036 * and open the template in the editor.
037 */
038
039package org.lemurproject.ireval;
040
041import java.util.Map;
042import java.util.Random;
043import java.util.Set;
044import java.util.TreeSet;
045
046import org.apache.commons.math.MathException;
047import org.apache.commons.math.distribution.BinomialDistributionImpl;
048import org.apache.commons.math.distribution.TDistributionImpl;
049
050/**
051 *
052 * @author Trevor Strohman
053 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
054 */
055public class SetRetrievalComparator {
056    double[] baseline;
057    double[] treatment;
058    
059    /** 
060     * Creates a new instance of SetRetrievalComparator 
061     * @param baseline 
062     * @param treatment 
063     */
064    public SetRetrievalComparator( Map<String, Double> baseline, Map<String, Double> treatment ) {
065        Set<String> commonQueries = new TreeSet<String>( baseline.keySet() );
066        commonQueries.retainAll( treatment.keySet() );
067
068        this.baseline = new double[commonQueries.size()];
069        this.treatment = new double[commonQueries.size()];
070        int i = 0;
071        
072        for( String key : commonQueries ) {
073            this.baseline[i] = baseline.get(key);
074            this.treatment[i] = treatment.get(key);
075            i++;
076        }
077    }
078
079    private double mean( double[] numbers ) {
080        double sum = 0;
081        for( int i=0; i<numbers.length; i++ ) {
082            sum += numbers[i];
083        }
084        
085        return sum / (double) numbers.length;
086    }
087    
088    /**
089     * @return mean baseline metric
090     */
091    public double meanBaselineMetric() {
092        return mean( baseline );
093    }
094    
095    /**
096     * @return mean treatment metric
097     */
098    public double meanTreatmentMetric() {
099        return mean( treatment );
100    }
101
102    /**
103     * @return number of times treatment is better than baseline
104     */
105    public int countTreatmentBetter() {
106        int better = 0;
107
108        for( int i=0; i<baseline.length; i++ ) {
109            if( baseline[i] < treatment[i] )
110                better++;
111        }
112
113        return better;
114    }
115
116    /**
117     * @return number of times baseline is better than treatment
118     */
119    public int countBaselineBetter() {
120        int better = 0;
121
122        for( int i=0; i<baseline.length; i++ ) {
123            if( baseline[i] > treatment[i] )
124                better++;
125        }
126
127        return better;
128    }
129
130    /**
131     * @return number of times baseline and treatment are equal
132     */
133    public int countEqual() {
134        int same = 0;
135
136        for( int i=0; i<baseline.length; i++ ) {
137            if( baseline[i] == treatment[i] )
138                same++;
139        }
140
141        return same;
142    }
143    
144    /**
145     * @return result of T-test between baseline and treatment
146     */
147    public double pairedTTest() {       
148        double sampleSum = 0;
149        double sampleSumSquares = 0;
150        int n = baseline.length;
151        
152        for( int i=0; i<baseline.length; i++ ) {
153            double delta = treatment[i] - baseline[i];
154            sampleSum += delta;
155            sampleSumSquares += delta*delta;
156        }
157       
158        double sampleVariance = sampleSumSquares / (n - 1);
159        double sampleMean = sampleSum / baseline.length;
160        
161        double sampleDeviation = Math.sqrt(sampleVariance);
162        double meanDeviation = sampleDeviation / Math.sqrt(n);
163        double t = sampleMean / meanDeviation;
164        
165        try {
166                        return 1.0 - new TDistributionImpl(n).cumulativeProbability(t);
167                } catch (MathException e) {
168                        throw new RuntimeException(e);
169                }
170    }
171    
172    /**
173     * @return result of sign test between baseline and treatment
174     */
175    public double signTest() {
176        int treatmentIsBetter = 0;
177        int different = 0;
178        
179        for( int i=0; i<treatment.length; i++ ) {
180            if( treatment[i] > baseline[i] )
181                treatmentIsBetter++;
182            if( treatment[i] != baseline[i] )
183                different++;
184        }
185        
186        double pvalue;
187                try {
188                        pvalue = 1 - new BinomialDistributionImpl(different, 0.5).cumulativeProbability(treatmentIsBetter - 1);
189                } catch (MathException e) {
190                        throw new RuntimeException(e);
191                }
192        
193        return pvalue;
194    }
195
196    /**
197     * @return result of randomized test between baseline and treatment
198     */
199    public double randomizedTest() {
200        double baseMean = mean( baseline );
201        double treatmentMean = mean( treatment );
202        double difference = treatmentMean - baseMean;
203        int batch = 10000;
204        
205        final int maxIterationsWithoutMatch = 1000000;
206        long iterations = 0;
207        long matches = 0;
208        
209        double[] leftSample = new double[baseline.length];
210        double[] rightSample = new double[baseline.length];
211        Random random = new Random();
212        double pValue = 0.0;
213        
214        while(true) {
215            for( int i=0; i<batch; i++ ) {
216                // create a sample from both distributions
217                for( int j=0; j<baseline.length; j++ ) {
218                    if( random.nextBoolean() ) {
219                        leftSample[j] = baseline[j];
220                        rightSample[j] = treatment[j];
221                    } else {
222                        leftSample[j] = treatment[j];
223                        rightSample[j] = baseline[j];
224                    }
225                }
226
227                double sampleDifference = mean( leftSample ) - mean( rightSample );
228
229                if( difference <= sampleDifference )
230                    matches++;
231            }
232            
233            iterations += batch;
234
235            // this is the current p-value estimate
236            pValue = (double) matches / (double) iterations;
237            
238            // if we still haven't found a match, keep looking
239            if( matches == 0 ) {
240                if( iterations < maxIterationsWithoutMatch ) {
241                    continue;
242                } else {
243                    break;
244                }
245            }
246            
247            // this is our accepted level of deviation in the p-value; we require:
248            //      - accuracy at the fourth decimal place, and
249            //      - less than 5% error in the p-value, or
250            //      - accuracy at the sixth decimal place.
251            
252            double maxDeviation = Math.max( 0.0000005 / pValue, Math.min( 0.00005 / pValue, 0.05 ) );
253            
254            // this estimate is derived in Efron and Tibshirani, p.209.
255            // this is the estimated number of iterations necessary for convergence, given
256            // our current p-value estimate.
257            double estimatedIterations = Math.sqrt( pValue * (1.0 - pValue) ) / maxDeviation;
258
259            //            if( estimatedIterations > iterations )
260            if( iterations > estimatedIterations )
261                break;
262
263        }
264   
265        return pValue;
266    }
267}