001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/* 031 * Main.java 032 * 033 * Created on November 30, 2006, 9:25 PM 034 * 035 * To change this template, choose Tools | Template Manager 036 * and open the template in the editor. 037 */ 038 039package org.lemurproject.ireval; 040 041import org.lemurproject.ireval.RetrievalEvaluator.Document; 042import org.lemurproject.ireval.RetrievalEvaluator.Judgment; 043import java.io.BufferedReader; 044import java.io.FileNotFoundException; 045import java.io.FileReader; 046import java.io.IOException; 047import java.io.PrintWriter; 048import java.io.StringWriter; 049import java.util.ArrayList; 050import java.util.Map; 051import java.util.TreeMap; 052 053/** 054 * A trec_eval style tool in pure java. 055 * 056 * @author Trevor Strohman 057 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 058 */ 059public class IREval { 060 /** 061 * Loads a TREC judgments file. 062 * 063 * @param filename The filename of the judgments file to load. 064 * @return Maps from query numbers to lists of judgments for each query. 065 * @throws IOException 066 * @throws FileNotFoundException 067 */ 068 public static TreeMap< String, ArrayList<Judgment> > loadJudgments( String filename ) throws IOException, FileNotFoundException { 069 // open file 070 BufferedReader in = new BufferedReader(new FileReader( filename )); 071 String line = null; 072 TreeMap< String, ArrayList<Judgment> > judgments = new TreeMap< String, ArrayList<Judgment> >(); 073 String recentQuery = null; 074 ArrayList<Judgment> recentJudgments = null; 075 076 while( (line = in.readLine()) != null ) { 077 // allow for multiple whitespace characters between fields 078 String[] fields = line.split( "\\s+" ); 079 080 String number = fields[0]; 081 @SuppressWarnings("unused") 082 String unused = fields[1]; 083 String docno = fields[2]; 084 String judgment = fields[3]; 085 int jVal = 0; 086 try { 087 jVal = Integer.valueOf( judgment ); 088 } catch (NumberFormatException e) { 089 jVal = (int)Math.round(Double.valueOf( judgment )); 090 } 091 092 Judgment j = new Judgment( docno, jVal ); 093 094 if( recentQuery == null || !recentQuery.equals( number ) ) { 095 if( !judgments.containsKey( number ) ) { 096 judgments.put( number, new ArrayList<Judgment>() ); 097 } 098 099 recentJudgments = judgments.get( number ); 100 recentQuery = number; 101 } 102 103 recentJudgments.add( j ); 104 } 105 106 in.close(); 107 return judgments; 108 } 109 110 /** 111 * Reads in a TREC ranking file. 112 * 113 * @param filename The filename of the ranking file. 114 * @return A map from query numbers to document ranking lists. 115 * @throws IOException 116 * @throws FileNotFoundException 117 */ 118 @SuppressWarnings("unused") 119 public static TreeMap< String, ArrayList<Document> > loadRanking( String filename ) throws IOException, FileNotFoundException { 120 // open file 121 BufferedReader in = new BufferedReader(new FileReader( filename )); 122 String line = null; 123 TreeMap< String, ArrayList<Document> > ranking = new TreeMap< String, ArrayList<Document> >(); 124 ArrayList<Document> recentRanking = null; 125 String recentQuery = null; 126 127 while( (line = in.readLine()) != null ) { 128 // allow for multiple whitespace characters between fields 129 String[] fields = line.split( "\\s+" ); 130 131 // 1 Q0 WSJ880711-0086 39 -3.05948 Exp 132 133 String number = fields[0]; 134 String unused = fields[1]; 135 String docno = fields[2]; 136 String rank = fields[3]; 137 String score = fields[4]; 138 String runtag = fields[5]; 139 // lemur can output nan (or NaN) 140 double scoreNumber; 141 try { 142 scoreNumber = Double.valueOf( score ); 143 } catch (NumberFormatException ex) { 144 scoreNumber = 0.0; 145 } 146 147 148 Document document = new Document( docno, Integer.valueOf( rank ), scoreNumber ); 149 150 if( recentQuery == null || !recentQuery.equals( number ) ) { 151 if( !ranking.containsKey( number ) ) { 152 ranking.put( number, new ArrayList<Document>() ); 153 } 154 155 recentQuery = number; 156 recentRanking = ranking.get( number ); 157 } 158 159 recentRanking.add( document ); 160 } 161 162 in.close(); 163 return ranking; 164 } 165 166 /** 167 * Creates a SetRetrievalEvaluator from data from loadRanking and loadJudgments. 168 * @param allRankings 169 * @param allJudgments 170 * @return the evaluation result 171 */ 172 public static SetRetrievalEvaluator create( TreeMap< String, ArrayList<Document> > allRankings, TreeMap< String, ArrayList<Judgment> > allJudgments ) { 173 // Map query numbers into Integer to get proper sorting. 174 TreeMap< String, RetrievalEvaluator > evaluators = new TreeMap<String, RetrievalEvaluator>(new java.util.Comparator<String>() { 175 @Override 176 public int compare(String a, String b) { 177 try { 178 Integer a1 = new Integer(a); 179 Integer b1 = new Integer(b); 180 return a1.compareTo(b1); 181 } catch (NumberFormatException e) { 182 // not an integer 183 return a.compareTo(b); 184 }}}); 185 186 for( String query : allRankings.keySet() ) { 187 ArrayList<Judgment> judgments = allJudgments.get( query ); 188 ArrayList<Document> ranking = allRankings.get( query ); 189 190 /* resort ranking on score, renumber ranks */ 191 java.util.Collections.sort(ranking, new java.util.Comparator<Document>() { 192 @Override 193 public int compare(Document a, Document b) 194 { 195 if (a.score < b.score) return 1; 196 if (a.score == b.score) return 0; 197 return -1; 198 } 199 }); 200 int i = 1; 201 for (Document d : ranking) { 202 d.rank = i++; 203 } 204 205 if( judgments == null || ranking == null ) { 206 continue; 207 } 208 209 RetrievalEvaluator evaluator = new RetrievalEvaluator( query, ranking, judgments ); 210 evaluators.put( query, evaluator ); 211 } 212 213 return new SetRetrievalEvaluator( evaluators.values() ); 214 } 215 216 /** 217 * Returns an output string very similar to that of trec_eval. 218 * @param query 219 * @param evaluator 220 * @return the result as a {@link String} 221 */ 222 public static String singleQuery( String query, RetrievalEvaluator evaluator ) { 223 StringWriter s = new StringWriter(); 224 PrintWriter out = new PrintWriter(s); 225 String formatString = "%2$-25s\t%1$5s\t"; 226 // print trec_eval relational-style output 227 // counts 228 out.format( formatString + "%3$6d\n", query, "num_ret", evaluator.retrievedDocuments().size() ); 229 out.format( formatString + "%3$6d\n", query, "num_rel", evaluator.relevantDocuments().size() ); 230 out.format( formatString + "%3$6d\n", query, "num_rel_ret", evaluator.relevantRetrievedDocuments().size() ); 231 232 // aggregate measures 233 out.format( formatString + "%3$6.4f\n", query, "map", evaluator.averagePrecision() ); 234 out.format( formatString + "%3$6.4f\n", query, "ndcg", evaluator.normalizedDiscountedCumulativeGain() ); 235 out.format( formatString + "%3$6.4f\n", query, "ndcg15", evaluator.normalizedDiscountedCumulativeGain( 15 ) ); 236 out.format( formatString + "%3$6.4f\n", query, "R-prec", evaluator.rPrecision() ); 237 out.format( formatString + "%3$6.4f\n", query, "bpref", evaluator.binaryPreference() ); 238 out.format( formatString + "%3$6.4f\n", query, "recip_rank", evaluator.reciprocalRank() ); 239 240 // precision at fixed points 241 int[] fixedPoints = RetrievalEvaluator.getFixedPoints(); 242 double [] vals = evaluator.precisionAtFixedPoints(); 243 for( int i=0; i<fixedPoints.length; i++ ) { 244 int point = fixedPoints[i]; 245 out.format( formatString + "%3$6.4f\n", query, "P" + point, vals[i] ); 246 } 247 double[] precs = evaluator.interpolatedPrecision(); 248 double prec = 0; 249 for( int i=0; i<precs.length; i++ ) { 250 out.format( "ircl_prn.%3$3.2f%2$-18s\t%1$5s\t%4$6.4f\n", query, " ", prec, precs[i] ); 251 prec += 0.1; 252 } 253 out.format("\n"); 254 return s.toString(); 255 } 256 257 /** 258 * Returns an output string very similar to that of trec_eval. 259 * @param setEvaluator 260 * @param showIndividual 261 * @return the result as a {@link String} 262 */ 263 public static String singleEvaluation( SetRetrievalEvaluator setEvaluator, boolean showIndividual ) { 264 StringWriter s = new StringWriter(); 265 PrintWriter out = new PrintWriter(s); 266 String formatString = "%2$-25s\t%1$5s\t"; 267 // print trec_eval relational-style output 268 if (showIndividual) { 269 for( RetrievalEvaluator evaluator : setEvaluator.getEvaluators() ) { 270 String query = evaluator.queryName(); 271 out.print(singleQuery(query, evaluator)); 272 } 273 } 274 // print summary data 275 out.format( formatString + "%3$6d\n", "all", "num_q", setEvaluator.getEvaluators().size() ); 276 out.format( formatString + "%3$6d\n", "all", "num_ret", setEvaluator.numberRetrieved() ); 277 out.format( formatString + "%3$6d\n", "all", "num_rel", setEvaluator.numberRelevant() ); 278 out.format( formatString + "%3$6d\n", "all", "num_rel_ret", setEvaluator.numberRelevantRetrieved() ); 279 280 out.format( formatString + "%3$6.4f\n", "all", "map", setEvaluator.meanAveragePrecision() ); 281 out.format( formatString + "%3$6.4f\n", "all", "gm_ap", setEvaluator.geometricMeanAveragePrecision() ); 282 out.format( formatString + "%3$6.4f\n", "all", "ndcg", setEvaluator.meanNormalizedDiscountedCumulativeGain() ); 283 out.format( formatString + "%3$6.4f\n", "all", "R-prec", setEvaluator.meanRPrecision() ); 284 out.format( formatString + "%3$6.4f\n", "all", "bpref", setEvaluator.meanBinaryPreference() ); 285 out.format( formatString + "%3$6.4f\n", "all", "recip_rank", setEvaluator.meanReciprocalRank() ); 286 287 // precision at fixed points 288 int[] fixedPoints = SetRetrievalEvaluator.getFixedPoints(); 289 double [] precs = setEvaluator.precisionAtFixedPoints(); 290 291 for( int i=0; i<fixedPoints.length; i++ ) { 292 int point = fixedPoints[i]; 293 out.format( formatString + "%3$6.4f\n", "all", "P" + point, precs[i] ); 294 } 295 double prec = 0; 296 precs = setEvaluator.interpolatedPrecision(); 297 for( int i=0; i<precs.length; i++ ) { 298 out.format( "ircl_prn.%3$3.2f%2$-18s\t%1$5s\t%4$6.4f\n", "all", " ", prec, precs[i] ); 299 prec += 0.1; 300 } 301 out.format("\n"); 302 return s.toString(); 303 } 304 305 /** 306 * Compare two sets of retrieval results. 307 * 308 * @param baseline 309 * @param treatment 310 * @param baselineName 311 * @param treatmentName 312 * @return the result as a {@link String}. 313 */ 314 public static String comparisonEvaluation( SetRetrievalEvaluator baseline, SetRetrievalEvaluator treatment, String baselineName, String treatmentName ) { 315 316 StringWriter s = new StringWriter(); 317 PrintWriter out = new PrintWriter(s); 318 String[] metrics = { "averagePrecision", "rPrecision", "ndcg", "bpref", "P5", "P10", "P20" }; 319 String formatString = "%1$-20s%2$-30s%3$6.4f\n"; 320 String integerFormatString = "%1$-20s%2$-30s%3$6d\n"; 321 out.println("Comparing baseline: " + baselineName + " to treatment: " + treatmentName + "\n"); 322 if (treatment == null) return "NOPE"; 323 for( String metric : metrics ) { 324 Map<String, Double> baselineMetric = baseline.evaluateAll( metric ); 325 Map<String, Double> treatmentMetric = treatment.evaluateAll( metric ); 326 327 SetRetrievalComparator comparator = new SetRetrievalComparator( baselineMetric, treatmentMetric ); 328 329 out.format( formatString, metric, baselineName, comparator.meanBaselineMetric() ); 330 out.format( formatString, metric, treatmentName, comparator.meanTreatmentMetric() ); 331 332 out.format( integerFormatString, metric, "basebetter", comparator.countBaselineBetter() ); 333 out.format( integerFormatString, metric, "treatbetter", comparator.countTreatmentBetter() ); 334 out.format( integerFormatString, metric, "equal", comparator.countEqual() ); 335 336 out.format( formatString, metric, "ttest", comparator.pairedTTest() ); 337 out.format( formatString, metric, "randomized", comparator.randomizedTest() ); 338 out.format( formatString, metric, "signtest", comparator.signTest() ); 339 } 340 341 return s.toString(); 342 } 343 344 345 /** 346 * Print tool usage 347 */ 348 public static void usage( ) { 349 System.err.println( "ireval: " ); 350 System.err.println( " There are two ways to use this program. First, you can evaluate a single ranking: " ); 351 System.err.println( " java -jar ireval.jar TREC-Ranking-File TREC-Judgments-File" ); 352 System.err.println( " or, you can use it to compare two rankings with statistical tests: " ); 353 System.err.println( " java -jar ireval.jar TREC-Baseline-Ranking-File TREC-Improved-Ranking-File TREC-Judgments-File" ); 354 System.exit(-1); 355 } 356 357 /** 358 * Tool main method. 359 * @param args 360 * @throws IOException 361 */ 362 public static void main( String[] args ) throws IOException { 363 try { 364 if( args.length == 3 ) { 365 TreeMap< String, ArrayList<Document> > baselineRanking = loadRanking( args[0] ); 366 TreeMap< String, ArrayList<Document> > treatmentRanking = loadRanking( args[1] ); 367 TreeMap< String, ArrayList<Judgment> > judgments = loadJudgments( args[2] ); 368 369 SetRetrievalEvaluator baseline = create( baselineRanking, judgments ); 370 SetRetrievalEvaluator treatment = create( treatmentRanking, judgments ); 371 372 // adjust these to file names. 373 System.out.println(comparisonEvaluation( baseline, treatment, "baseline", "treatment" )); 374 } else if( args.length == 2 ) { 375 TreeMap< String, ArrayList<Document> > ranking = loadRanking( args[0] ); 376 TreeMap< String, ArrayList<Judgment> > judgments = loadJudgments( args[1] ); 377 378 SetRetrievalEvaluator setEvaluator = create( ranking, judgments ); 379 System.out.println(singleEvaluation( setEvaluator, true )); 380 } else { 381 usage(); 382 } 383 } catch( Exception e ) { 384 e.printStackTrace(); 385 usage(); 386 } 387 } 388}