001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/* 031 * RetrievalEvaluator 032 * 033 * Use of this software is governed by the Creative Commons Attribution license: 034 * http://creativecommons.org/licenses/by/2.5/ 035 * 036 * Trevor Strohman, September 23, 2005 037 */ 038package org.lemurproject.ireval; 039 040import java.util.ArrayList; 041import java.util.HashMap; 042import java.util.TreeMap; 043import java.util.List; 044import java.util.Collection; 045 046/** 047 * <p>A retrieval evaluator object computes a variety of standard 048 * information retrieval metrics commonly used in TREC, including 049 * binary preference (BPREF), geometric mean average precision (GMAP), 050 * mean average precision (MAP), and standard precision and recall. 051 * In addition, the object gives access to the relevant documents that 052 * were found, and the relevant documents that were missed.</p> 053 * 054 * <p>BPREF is defined in Buckley and Voorhees, "Retrieval Evaluation 055 * with Incomplete Information", SIGIR 2004.</p> 056 * 057 * @author Trevor Strohman 058 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 059 */ 060public class RetrievalEvaluator { 061 062 /** 063 * This class represents a document returned by a retrieval 064 * system. It can be subclassed if you want to add system-dependent 065 * information to the document representation. 066 */ 067 068 public static class Document { 069 /** 070 * Constructs a new Document object. 071 * 072 * @param documentNumber The document identifier. 073 * @param rank The rank of the document in a retrieved ranked list. 074 * @param score The score given to this document by the retrieval system. 075 */ 076 077 public Document( String documentNumber, int rank, double score ) { 078 this.documentNumber = documentNumber; 079 this.rank = rank; 080 this.score = score; 081 } 082 083 /** 084 * Constructs a new Document object. 085 * 086 * @param documentNumber The document identifier. 087 */ 088 089 public Document( String documentNumber ) { 090 this.documentNumber = documentNumber; 091 this.rank = Integer.MAX_VALUE; 092 this.score = Double.NEGATIVE_INFINITY; 093 } 094 095 /** The rank of the document in a retrieved ranked list. */ 096 public int rank; 097 /** The document identifier. */ 098 public String documentNumber; 099 /** The score given to this document by the retrieval system. */ 100 public double score; 101 } 102 103 /** 104 * This class represents a relevance judgment of a particular document 105 * for a specific query. 106 */ 107 108 public static class Judgment { 109 /** 110 * Constructs a new Judgment instance. 111 * 112 * @param documentNumber The document identifier. 113 * @param judgment The relevance judgment for this document, where positive values mean relevant, and zero means not relevant. 114 */ 115 public Judgment( String documentNumber, int judgment ) { 116 this.documentNumber = documentNumber; 117 this.judgment = judgment; 118 } 119 120 /** The document identifier. */ 121 public String documentNumber; 122 /** The relevance judgment for this document, where positive values mean relevant, and zero means not relevant. */ 123 public int judgment; 124 } 125 126 private String _queryName; 127 private ArrayList<Document> _retrieved; 128 private ArrayList<Document> _judgedMissed; 129 private ArrayList<Document> _relevant; 130 private ArrayList<Document> _relevantRetrieved; 131 private ArrayList<Document> _judgedIrrelevantRetrieved; 132 private ArrayList<Document> _irrelevantRetrieved; 133 private ArrayList<Document> _relevantMissed; 134 private HashMap<String, Judgment> _judgments; 135 private int _numIrrelevant; 136 137 /** 138 * Creates a new instance of RetrievalEvaluator 139 * 140 * @param queryName 141 * @param retrieved A ranked list of retrieved documents. 142 * @param judgments A collection of relevance judgments. 143 */ 144 public RetrievalEvaluator( String queryName, List<Document> retrieved, Collection<Judgment> judgments ) { 145 _queryName = queryName; 146 _retrieved = new ArrayList<Document>( retrieved ); 147 148 _buildJudgments( judgments ); 149 _judgeRetrievedDocuments(); 150 _findMissedDocuments(); 151 _findRelevantDocuments(); 152 } 153 154 private void _buildJudgments( Collection<Judgment> judgments ) { 155 _judgments = new HashMap<String, Judgment>(); 156 _numIrrelevant = 0; 157 for( Judgment judgment : judgments ) { 158 _judgments.put( judgment.documentNumber, judgment ); 159 // count total number of judged irrelevant for bpref. 160 if (judgment.judgment <= 0) _numIrrelevant++; 161 } 162 } 163 164 private void _judgeRetrievedDocuments() { 165 _irrelevantRetrieved = new ArrayList<Document>(); 166 _relevantRetrieved = new ArrayList<Document>(); 167 _judgedIrrelevantRetrieved = new ArrayList<Document>(); 168 169 for( Document document: _retrieved ) { 170 boolean relevant = false; 171 boolean judgedIrrelevant = false; 172 Judgment judgment = _judgments.get( document.documentNumber ); 173 174 relevant = ( judgment != null ) && ( judgment.judgment > 0 ); 175 judgedIrrelevant = ( judgment != null ) && ( judgment.judgment <= 0 ); 176 177 if( relevant ) { 178 _relevantRetrieved.add( document ); 179 } else { 180 _irrelevantRetrieved.add( document ); 181 182 if( judgedIrrelevant ) { 183 _judgedIrrelevantRetrieved.add( document ); 184 } 185 } 186 } 187 } 188 189 private void _findMissedDocuments() { 190 HashMap<String, Judgment> missedDocuments = new HashMap<String, Judgment>( _judgments ); 191 192 for( Document document : _relevantRetrieved ) { 193 missedDocuments.remove( document.documentNumber ); 194 } 195 196 for( Document document : _judgedIrrelevantRetrieved ) { 197 missedDocuments.remove( document.documentNumber ); 198 } 199 200 _judgedMissed = new ArrayList<Document>(); 201 _relevantMissed = new ArrayList<Document>(); 202 203 for( Judgment judgment : missedDocuments.values() ) { 204 Document document = new Document( judgment.documentNumber ); 205 _judgedMissed.add( document ); 206 207 if( judgment.judgment > 0 ) { 208 _relevantMissed.add( document ); 209 } 210 } 211 } 212 213 private void _findRelevantDocuments() { 214 _relevant = new ArrayList<Document>(); 215 _relevant.addAll( _relevantRetrieved ); 216 _relevant.addAll( _relevantMissed ); 217 } 218 219 /** 220 * @return the name of the query represented by this evaluator. 221 */ 222 public String queryName() { 223 return _queryName; 224 } 225 226 private static int[] fixedPoints = { 5, 10, 15, 20, 30, 100, 200, 500, 1000 }; 227 228 /** 229 * @return the fixed points (number of retrieved docs) at which precision is evaluated 230 */ 231 public static int[] getFixedPoints(){return fixedPoints;} 232 233 private double[] _pFP = null; 234 235 /** 236 * @return the precision at the fixed points specified by {@link #getFixedPoints()}. 237 */ 238 public double[] precisionAtFixedPoints(){ 239 if (_pFP == null ) { 240 // precision at fixed points 241 _pFP = new double [fixedPoints.length]; 242 int i = 0; 243 for( int point : fixedPoints ) { 244 _pFP[i++] = precision( point ); 245 } 246 } 247 return _pFP; 248 } 249 250 private double[] _ip = null; 251 252 /** 253 * @return the interpolated precision at 10% recall intervals 254 */ 255 public double[] interpolatedPrecision(){ 256 if (_ip == null) { 257 int size = _relevant.size(); 258 _ip = new double[11]; 259 int [] cuts = new int[11]; 260 cuts[0] = 0; 261 for (int i = 1; i < 11; i++) { 262 cuts[i] = ((size * i) + 9)/10; 263 _ip[i] = 0; 264 } 265 int current_cut = 10; 266 int relRet = _relevantRetrieved.size(); 267 while (cuts[current_cut] > relRet) current_cut--; 268 double prec = 0; 269 for (int i = relRet-1; i>=0; i--) { 270 int rank = _relevantRetrieved.get(i).rank; 271 double newprec = precision(rank); 272 prec = Math.max(prec, newprec); 273 while (current_cut >= 0 && cuts[current_cut] == (i+1)) { 274 _ip[current_cut--] = prec; 275 } 276 } 277 _ip[0] = prec; 278 } 279 return _ip; 280 } 281 282 /** 283 * Returns the precision of the retrieval at a given number of documents retrieved. 284 * The precision is the number of relevant documents retrieved 285 * divided by the total number of documents retrieved. 286 * 287 * @param documentsRetrieved The evaluation rank. 288 * @return the precision at the given number of retrieved documents. 289 */ 290 public double precision( int documentsRetrieved ) { 291 if (documentsRetrieved == 0) return 0; 292 return (double) relevantRetrieved( documentsRetrieved ) / (double) documentsRetrieved; 293 } 294 295 /** 296 * Returns the recall of the retrieval at a given number of documents retrieved. 297 * The recall is the number of relevant documents retrieved 298 * divided by the total number of relevant documents for the query. 299 * 300 * @param documentsRetrieved The evaluation rank. 301 * @return the recall at the given number of retrieved documents. 302 */ 303 public double recall( int documentsRetrieved ) { 304 if (_relevant.size() == 0) return 0; 305 return (double) relevantRetrieved( documentsRetrieved ) / (double) _relevant.size(); 306 } 307 308 /** 309 * Returns the precision at the rank equal to the total number of 310 * relevant documents retrieved. This method is equivalent to 311 * precision( relevantDocuments().size() ). If R is greater than 312 * the number of documents retrieved, the non-retrieved documents 313 * are assumed to be non-relevant (cf trec_eval 8). 314 * @return the r-precision 315 */ 316 public double rPrecision( ) { 317 int relevantCount = _relevant.size(); 318 @SuppressWarnings("unused") 319 int retrievedCount = _retrieved.size(); 320 321 return precision( relevantCount ); 322 } 323 324 /** 325 * Returns the reciprocal of the rank of the first relevant document 326 * retrieved, or zero if no relevant documents were retrieved. 327 * @return the reciprocal rank 328 */ 329 public double reciprocalRank( ) { 330 if( _relevantRetrieved.size() == 0 ) 331 return 0; 332 333 return 1.0 / (double) _relevantRetrieved.get(0).rank; 334 } 335 336 /** 337 * Returns the average precision of the query.<p> 338 * 339 * Suppose the precision is evaluated once at the rank of 340 * each relevant document in the retrieval. If a document is 341 * not retrieved, we assume that it was retrieved at rank infinity. 342 * The mean of all these precision values is the average precision. 343 * @return the average precision 344 */ 345 public double averagePrecision( ) { 346 double sumPrecision = 0; 347 int relevantCount = 0; 348 349 for( Document document : _relevantRetrieved ) { 350 relevantCount++; 351 sumPrecision += relevantCount / (double) document.rank; 352 } 353 if (_relevant.size() > 0) 354 return (double) sumPrecision / _relevant.size(); 355 return 0; 356 } 357 358 359 /** 360 * <p>The binary preference measure, as presented in Buckley, Voorhees 361 * "Retrieval Evaluation with Incomplete Information", SIGIR 2004. 362 * This implemenation is the 'pure' version, which is the one 363 * used in Buckley's trec_eval (v 8 with bpref bugfix).</p> 364 * 365 * <p>The formula is: 366 * <tt>1/R \sum_{r} 1 - |n ranked greater than r| / min(R, N)</tt> 367 * where R is the number of relevant documents for this topic, 368 * N is the number of irrelevant documents judged for this topic, 369 * and 370 * n is a member of the set of first R judged irrelevant documents 371 * retrieved.</p> 372 * @return the binary preference. 373 */ 374 public double binaryPreference( ) { 375 // Edge case: no relevant documents retrieved 376 // Edge case: more relevant documents than count retrieved 377 int totalRelevant = _relevant.size(); 378 if (totalRelevant == 0) return 0; 379 int i=0; 380 int j=0; 381 // 2006 bug fix... 382 int irrelevantCount = Math.min( totalRelevant, _numIrrelevant ); 383 List<Document> irrelevant = _judgedIrrelevantRetrieved.subList( 0, Math.min( totalRelevant, _judgedIrrelevantRetrieved.size() ) ); 384 double sum = 0; 385 // if no negative judgments, num_rel_ret/num_rel (trec_eval 8). 386 if (irrelevant.size()==0) sum = _relevantRetrieved.size(); 387 388 while( i < _relevantRetrieved.size() && j < irrelevant.size() ) { 389 Document rel = _relevantRetrieved.get(i); 390 Document irr = irrelevant.get(j); 391 392 if( rel.rank < irr.rank ) { 393 // we've just seen a relevant document; 394 // how many of the irrelevant set are ahead of us? 395 assert j <= totalRelevant; 396 sum += 1.0 - ((double) j / (double) irrelevantCount); 397 i++; 398 } else { 399 j++; 400 } 401 } 402 403 return sum / (double) totalRelevant; 404 } 405 406 /** 407 * <p>Normalized Discounted Cumulative Gain </p> 408 * 409 * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods 410 * for Retrieving Highly Relevant Documents" SIGIR 2001. I copied the formula 411 * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web 412 * Search", SIGIR 2006. 413 * 414 * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i) 415 * 416 * Where N is such that the score cannot be greater than 1. We compute this 417 * by computing the DCG (unnormalized) of a perfect ranking. 418 * 419 * @return the normalized discounted cumulative gain (ndcg). 420 */ 421 public double normalizedDiscountedCumulativeGain( ) { 422 return normalizedDiscountedCumulativeGain( Math.max( _retrieved.size(), _judgments.size() ) ); 423 } 424 425 /** 426 * <p>Normalized Discounted Cumulative Gain </p> 427 * 428 * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods 429 * for Retrieving Highly Relevant Documents" SIGIR 2001. I copied the formula 430 * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web 431 * Search", SIGIR 2006. 432 * 433 * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i) 434 * 435 * Where N is such that the score cannot be greater than 1. We compute this 436 * by computing the DCG (unnormalized) of a perfect ranking. 437 * 438 * @param documentsRetrieved 439 * 440 * @return the normalized discounted cumulative gain (ndcg). 441 */ 442 public double normalizedDiscountedCumulativeGain( int documentsRetrieved ) { 443 // first, compute the gain from an optimal ranking 444 double normalizer = normalizationTermNDCG( documentsRetrieved ); 445 if (normalizer == 0) return 0; 446 447 // now, compute the NDCG of the ranking and return that 448 double dcg = 0; 449 List<Document> truncated = _retrieved; 450 451 if( _retrieved.size() > documentsRetrieved ) 452 truncated = _retrieved.subList( 0, documentsRetrieved ); 453 454 455 for( Document document : truncated ) { 456 Judgment judgment = _judgments.get( document.documentNumber ); 457 458 if( judgment != null && judgment.judgment > 0 ) { 459 dcg += (Math.pow(2, judgment.judgment) - 1.0) / Math.log( 1 + document.rank ); 460 } 461 } 462 463 return dcg / normalizer; 464 } 465 466 protected double normalizationTermNDCG( int documentsRetrieved ) { 467 TreeMap<Integer, Integer> relevanceCounts = new TreeMap<Integer, Integer>(); 468 469 // the normalization term represents the highest possible DCG score 470 // that could possibly be attained. we compute this by taking the relevance 471 // judgments, ordering them by relevance value (highly relevant documents first) 472 // then calling that the ranked list, and computing its DCG value. 473 474 // we use negative judgment values so they come out of the map 475 // in order from highest to lowest relevance 476 for( Judgment judgment : _judgments.values() ) { 477 if( judgment.judgment == 0 ) 478 continue; 479 480 if( !relevanceCounts.containsKey(-judgment.judgment) ) { 481 relevanceCounts.put( -judgment.judgment, 0 ); 482 } 483 484 relevanceCounts.put( -judgment.judgment, relevanceCounts.get( -judgment.judgment ) + 1 ); 485 } 486 487 double normalizer = 0; 488 int documentsProcessed = 0; 489 490 for( Integer negativeRelevanceValue : relevanceCounts.keySet() ) { 491 int relevanceCount = (int)relevanceCounts.get( negativeRelevanceValue ); 492 int relevanceValue = -negativeRelevanceValue; 493 relevanceCount = Math.min( relevanceCount, documentsRetrieved - documentsProcessed ); 494 495 for( int i = 1; i <= relevanceCount; i++ ) { 496 normalizer += (Math.pow(2, relevanceValue) - 1.0) / Math.log( 1 + i + documentsProcessed ); 497 } 498 499 documentsProcessed += relevanceCount; 500 if( documentsProcessed >= documentsRetrieved ) 501 break; 502 } 503 504 return normalizer; 505 } 506 507 /** 508 * The number of relevant documents retrieved at a particular 509 * rank. This is equivalent to <tt>n * precision(n)</tt>. 510 * 511 * @param documentsRetrieved the rank 512 * @return the number of relevant docs at the rank. 513 */ 514 public int relevantRetrieved( int documentsRetrieved ) { 515 int low = 0; 516 int high = _relevantRetrieved.size() - 1; 517 518 // if we didn't retrieve anything relevant, we know the answer 519 if( _relevantRetrieved.size() == 0 ) 520 return 0; 521 522 // is this after the last relevant document? 523 Document lastRelevant = _relevantRetrieved.get( high ); 524 if( lastRelevant.rank <= documentsRetrieved ) 525 return _relevantRetrieved.size(); 526 527 // is this before the first relevant document? 528 Document firstRelevant = _relevantRetrieved.get( low ); 529 if( firstRelevant.rank > documentsRetrieved ) 530 return 0; 531 532 while( (high - low) >= 2 ) { 533 int middle = low + (high - low) / 2; 534 Document middleDocument = _relevantRetrieved.get( middle ); 535 536 if( middleDocument.rank == documentsRetrieved ) 537 return middle + 1; 538 else if( middleDocument.rank > documentsRetrieved ) 539 high = middle; 540 else 541 low = middle; 542 } 543 544 // if the high document had rank == documentsRetrieved, we would 545 // have already returned, either at the top, or at the return middle statement 546 assert _relevantRetrieved.get( low ).rank <= documentsRetrieved && 547 _relevantRetrieved.get( high ).rank > documentsRetrieved; 548 549 return low + 1; 550 } 551 552 /** 553 * @return The list of retrieved documents. 554 */ 555 public ArrayList<Document> retrievedDocuments() { 556 return _retrieved; 557 } 558 559 /** 560 * @return The list of all documents retrieved that were explicitly judged irrelevant. 561 */ 562 563 public ArrayList<Document> judgedIrrelevantRetrievedDocuments() { 564 return _judgedIrrelevantRetrieved; 565 } 566 567 /** 568 * This method returns a list of all documents that were retrieved 569 * but assumed to be irrelevant. This includes both documents that were 570 * judged to be irrelevant and those that were not judged at all. 571 * The list is returned in retrieval order. 572 * @return the list of all retrieved irrelevant documents. 573 */ 574 public ArrayList<Document> irrelevantRetrievedDocuments() { 575 return _irrelevantRetrieved; 576 } 577 578 /** 579 * Returns a list of retrieved documents that were judged relevant, 580 * in the order that they were retrieved. 581 * @return the list of all retrieved relevant documents 582 */ 583 public ArrayList<Document> relevantRetrievedDocuments() { 584 return _relevantRetrieved; 585 } 586 587 /** 588 * Returns a list of all documents judged relevant, whether they were 589 * retrieved or not. Documents are listed in the order they were retrieved, 590 * with those not retrieved coming last. 591 * @return the list of all relevant documents 592 */ 593 public ArrayList<Document> relevantDocuments() { 594 return _relevant; 595 } 596 597 /** 598 * Returns a list of documents that were judged relevant that 599 * were not retrieved. 600 * @return the relevant documents that were missed by the search engine 601 */ 602 public ArrayList<Document> relevantMissedDocuments() { 603 return _relevantMissed; 604 } 605}