Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/*
031 * RetrievalEvaluator
032 *
033 * Use of this software is governed by the Creative Commons Attribution license:
034 *    http://creativecommons.org/licenses/by/2.5/
035 *
036 * Trevor Strohman, September 23, 2005
037 */
038package org.lemurproject.ireval;
039
040import java.util.ArrayList;
041import java.util.HashMap;  
042import java.util.TreeMap;
043import java.util.List;
044import java.util.Collection;
045
046/**
047 * <p>A retrieval evaluator object computes a variety of standard
048 * information retrieval metrics commonly used in TREC, including
049 * binary preference (BPREF), geometric mean average precision (GMAP),
050 * mean average precision (MAP), and standard precision and recall.
051 * In addition, the object gives access to the relevant documents that
052 * were found, and the relevant documents that were missed.</p>
053 *
054 * <p>BPREF is defined in Buckley and Voorhees, "Retrieval Evaluation
055 * with Incomplete Information", SIGIR 2004.</p>
056 *
057 * @author Trevor Strohman
058 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 
059 */
060public class RetrievalEvaluator {
061    
062    /**
063     * This class represents a document returned by a retrieval
064     * system.  It can be subclassed if you want to add system-dependent
065     * information to the document representation.
066     */
067    
068    public static class Document {
069        /**
070         * Constructs a new Document object.
071         *
072         * @param documentNumber The document identifier.
073         * @param rank The rank of the document in a retrieved ranked list.
074         * @param score The score given to this document by the retrieval system.
075         */
076        
077        public Document( String documentNumber, int rank, double score ) {
078            this.documentNumber = documentNumber;
079            this.rank = rank;
080            this.score = score;
081        }
082        
083        /**
084         * Constructs a new Document object.
085         *
086         * @param documentNumber The document identifier.
087         */
088        
089        public Document( String documentNumber ) {
090            this.documentNumber = documentNumber;
091            this.rank = Integer.MAX_VALUE;
092            this.score = Double.NEGATIVE_INFINITY;
093        }
094        
095        /** The rank of the document in a retrieved ranked list. */
096        public int rank;
097        /** The document identifier. */
098        public String documentNumber;
099        /** The score given to this document by the retrieval system. */
100        public double score;
101    }
102    
103    /**
104     * This class represents a relevance judgment of a particular document
105     * for a specific query.
106     */
107    
108    public static class Judgment {
109        /**
110         * Constructs a new Judgment instance.
111         *
112         * @param documentNumber The document identifier.
113         * @param judgment The relevance judgment for this document, where positive values mean relevant, and zero means not relevant.
114         */
115        public Judgment( String documentNumber, int judgment ) {
116            this.documentNumber = documentNumber;
117            this.judgment = judgment;
118        }
119        
120        /** The document identifier. */
121        public String documentNumber;
122        /** The relevance judgment for this document, where positive values mean relevant, and zero means not relevant. */
123        public int judgment;
124    }
125    
126    private String _queryName;
127    private ArrayList<Document> _retrieved;
128    private ArrayList<Document> _judgedMissed;
129    private ArrayList<Document> _relevant;
130    private ArrayList<Document> _relevantRetrieved;
131    private ArrayList<Document> _judgedIrrelevantRetrieved;
132    private ArrayList<Document> _irrelevantRetrieved;
133    private ArrayList<Document> _relevantMissed;
134    private HashMap<String, Judgment> _judgments;
135    private int _numIrrelevant;
136
137    /**
138     * Creates a new instance of RetrievalEvaluator
139     * 
140     * @param queryName 
141     * @param retrieved A ranked list of retrieved documents.
142     * @param judgments A collection of relevance judgments.
143     */
144    public RetrievalEvaluator( String queryName, List<Document> retrieved, Collection<Judgment> judgments ) {
145        _queryName = queryName;
146        _retrieved = new ArrayList<Document>( retrieved );
147        
148        _buildJudgments( judgments );
149        _judgeRetrievedDocuments();
150        _findMissedDocuments();
151        _findRelevantDocuments();
152    }
153    
154    private void _buildJudgments( Collection<Judgment> judgments ) {
155        _judgments = new HashMap<String, Judgment>();
156        _numIrrelevant = 0;
157        for( Judgment judgment : judgments ) {
158            _judgments.put( judgment.documentNumber, judgment );
159            // count total number of judged irrelevant for bpref.
160            if (judgment.judgment <= 0) _numIrrelevant++;
161        }
162    }
163    
164    private void _judgeRetrievedDocuments() {
165        _irrelevantRetrieved = new ArrayList<Document>();
166        _relevantRetrieved = new ArrayList<Document>();
167        _judgedIrrelevantRetrieved = new ArrayList<Document>();
168        
169        for( Document document: _retrieved ) {
170            boolean relevant = false;
171            boolean judgedIrrelevant = false;
172            Judgment judgment = _judgments.get( document.documentNumber );
173            
174            relevant = ( judgment != null ) && ( judgment.judgment > 0 );
175            judgedIrrelevant = ( judgment != null ) && ( judgment.judgment <= 0 );
176            
177            if( relevant ) {
178                _relevantRetrieved.add( document );
179            } else {
180                _irrelevantRetrieved.add( document );
181                
182                if( judgedIrrelevant ) {
183                    _judgedIrrelevantRetrieved.add( document );
184                }
185            }
186        }
187    }
188    
189    private void _findMissedDocuments() {
190        HashMap<String, Judgment> missedDocuments = new HashMap<String, Judgment>( _judgments );
191        
192        for( Document document : _relevantRetrieved ) {
193            missedDocuments.remove( document.documentNumber );
194        }
195        
196        for( Document document : _judgedIrrelevantRetrieved ) {
197            missedDocuments.remove( document.documentNumber );
198        }
199        
200        _judgedMissed = new ArrayList<Document>();
201        _relevantMissed = new ArrayList<Document>();
202        
203        for( Judgment judgment : missedDocuments.values() ) {
204            Document document = new Document( judgment.documentNumber );
205            _judgedMissed.add( document );
206            
207            if( judgment.judgment > 0 ) {
208                _relevantMissed.add( document );
209            }
210        }
211    }
212    
213    private void _findRelevantDocuments() {
214        _relevant = new ArrayList<Document>();
215        _relevant.addAll( _relevantRetrieved );
216        _relevant.addAll( _relevantMissed );
217    }
218    
219    /**
220     * @return the name of the query represented by this evaluator.
221     */
222    public String queryName() {
223        return _queryName;
224    }
225    
226    private static int[] fixedPoints = { 5, 10, 15, 20, 30, 100, 200, 500, 1000 };
227    
228    /**
229     * @return the fixed points (number of retrieved docs) at which precision is evaluated
230     */
231    public  static int[] getFixedPoints(){return fixedPoints;}
232    
233    private double[] _pFP = null;
234        
235    /**
236     * @return the precision at the fixed points specified by {@link #getFixedPoints()}.
237     */
238    public double[] precisionAtFixedPoints(){
239        if (_pFP == null ) {
240            // precision at fixed points
241            _pFP = new double [fixedPoints.length];
242            int i = 0;
243            for( int point : fixedPoints ) {
244                _pFP[i++] = precision( point );
245            }
246        }
247        return _pFP;
248    }
249    
250    private double[] _ip = null;
251    
252    /**
253     * @return the interpolated precision at 10% recall intervals
254     */
255    public double[] interpolatedPrecision(){
256        if (_ip == null) {
257            int size = _relevant.size();
258            _ip = new double[11];
259            int [] cuts = new int[11];
260            cuts[0] = 0;
261            for (int i = 1; i < 11; i++) {                              
262                cuts[i] = ((size * i) + 9)/10;
263                _ip[i] = 0;
264            }
265            int current_cut = 10;
266            int relRet = _relevantRetrieved.size();
267            while (cuts[current_cut] > relRet) current_cut--;
268            double prec = 0;
269            for (int i = relRet-1; i>=0; i--) {
270                int rank = _relevantRetrieved.get(i).rank;
271                double newprec = precision(rank);
272                prec = Math.max(prec, newprec);
273                while (current_cut >= 0 && cuts[current_cut] == (i+1)) {
274                    _ip[current_cut--] = prec;
275                }
276            }
277            _ip[0] = prec;
278        }
279        return _ip;
280    }
281    
282    /**
283     * Returns the precision of the retrieval at a given number of documents retrieved.
284     * The precision is the number of relevant documents retrieved
285     * divided by the total number of documents retrieved.
286     *
287     * @param documentsRetrieved The evaluation rank.
288     * @return the precision at the given number of retrieved documents.
289     */
290    public double precision( int documentsRetrieved ) {
291        if (documentsRetrieved == 0) return 0;
292        return (double) relevantRetrieved( documentsRetrieved ) / (double) documentsRetrieved;
293    }
294    
295    /**
296     * Returns the recall of the retrieval at a given number of documents retrieved.
297     * The recall is the number of relevant documents retrieved
298     * divided by the total number of relevant documents for the query.
299     *
300     * @param documentsRetrieved The evaluation rank.
301     * @return the recall at the given number of retrieved documents.
302     */
303    public double recall( int documentsRetrieved ) {
304        if (_relevant.size() == 0) return 0;
305        return (double) relevantRetrieved( documentsRetrieved ) / (double) _relevant.size();
306    }
307    
308    /**
309     * Returns the precision at the rank equal to the total number of
310     * relevant documents retrieved.  This method is equivalent to
311     * precision( relevantDocuments().size() ). If R is greater than
312     * the number of documents retrieved, the non-retrieved documents
313     * are assumed to be non-relevant (cf trec_eval 8).
314     * @return the r-precision 
315     */
316    public double rPrecision( ) {
317        int relevantCount = _relevant.size();
318        @SuppressWarnings("unused")
319                int retrievedCount = _retrieved.size();
320        
321        return precision( relevantCount );
322    }
323    
324    /**
325     * Returns the reciprocal of the rank of the first relevant document
326     * retrieved, or zero if no relevant documents were retrieved.
327     * @return the reciprocal rank
328     */
329    public double reciprocalRank( ) {
330        if( _relevantRetrieved.size() == 0 )
331            return 0;
332        
333        return 1.0 / (double) _relevantRetrieved.get(0).rank;
334    }
335    
336    /**
337     * Returns the average precision of the query.<p>
338     *
339     * Suppose the precision is evaluated once at the rank of
340     * each relevant document in the retrieval.  If a document is
341     * not retrieved, we assume that it was retrieved at rank infinity.
342     * The mean of all these precision values is the average precision.
343     * @return the average precision
344     */
345    public double averagePrecision( ) {
346        double sumPrecision = 0;
347        int relevantCount = 0;
348        
349        for( Document document : _relevantRetrieved ) {
350            relevantCount++;
351            sumPrecision += relevantCount / (double) document.rank;
352        }
353        if (_relevant.size() > 0)
354            return (double) sumPrecision / _relevant.size();
355        return 0;
356    }
357
358    
359    /**
360     * <p>The binary preference measure, as presented in Buckley, Voorhees
361     * "Retrieval Evaluation with Incomplete Information", SIGIR 2004.
362     * This implemenation is the 'pure' version, which is the one
363     * used in Buckley's trec_eval (v 8 with bpref bugfix).</p>
364     *
365     * <p>The formula is:
366     * <tt>1/R \sum_{r} 1 - |n ranked greater than r| / min(R, N)</tt>
367     * where R is the number of relevant documents for this topic,
368     * N is the number of irrelevant documents judged for this topic,
369     * and
370     * n is a member of the set of first R judged irrelevant documents
371     * retrieved.</p>
372     * @return the binary preference.
373     */
374    public double binaryPreference( ) {
375        // Edge case: no relevant documents retrieved
376        // Edge case: more relevant documents than count retrieved
377        int totalRelevant = _relevant.size();
378        if (totalRelevant == 0) return 0;
379        int i=0;
380        int j=0;
381        // 2006 bug fix...
382        int irrelevantCount = Math.min( totalRelevant, _numIrrelevant );
383        List<Document> irrelevant = _judgedIrrelevantRetrieved.subList( 0, Math.min( totalRelevant, _judgedIrrelevantRetrieved.size() ) );
384        double sum = 0;
385        // if no negative judgments, num_rel_ret/num_rel (trec_eval 8).
386        if (irrelevant.size()==0) sum = _relevantRetrieved.size();
387        
388        while( i < _relevantRetrieved.size() && j < irrelevant.size() ) {
389            Document rel = _relevantRetrieved.get(i);
390            Document irr = irrelevant.get(j);
391            
392            if( rel.rank < irr.rank ) {
393                // we've just seen a relevant document;
394                // how many of the irrelevant set are ahead of us?
395                assert j <= totalRelevant;
396                sum += 1.0 - ((double) j / (double) irrelevantCount);
397                i++;
398            } else {
399                j++;
400            }
401        }
402        
403        return sum / (double) totalRelevant;
404    }    
405
406    /** 
407     * <p>Normalized Discounted Cumulative Gain </p>
408     *
409     * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods
410     * for Retrieving Highly Relevant Documents" SIGIR 2001.  I copied the formula
411     * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web
412     * Search", SIGIR 2006.
413     *
414     * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i)
415     *
416     * Where N is such that the score cannot be greater than 1.  We compute this
417     * by computing the DCG (unnormalized) of a perfect ranking.
418     * 
419     * @return the normalized discounted cumulative gain (ndcg). 
420     */
421    public double normalizedDiscountedCumulativeGain( ) {
422        return normalizedDiscountedCumulativeGain( Math.max( _retrieved.size(), _judgments.size() ) );
423    }
424    
425    /** 
426     * <p>Normalized Discounted Cumulative Gain </p>
427     *
428     * This measure was introduced in Jarvelin, Kekalainen, "IR Evaluation Methods
429     * for Retrieving Highly Relevant Documents" SIGIR 2001.  I copied the formula
430     * from Vassilvitskii, "Using Web-Graph Distance for Relevance Feedback in Web
431     * Search", SIGIR 2006.
432     *
433     * Score = N \sum_i (2^{r(i)} - 1) / \log(1 + i)
434     *
435     * Where N is such that the score cannot be greater than 1.  We compute this
436     * by computing the DCG (unnormalized) of a perfect ranking.
437     * 
438     * @param documentsRetrieved  
439     * 
440     * @return the normalized discounted cumulative gain (ndcg).
441     */                     
442    public double normalizedDiscountedCumulativeGain( int documentsRetrieved ) {
443        // first, compute the gain from an optimal ranking  
444        double normalizer = normalizationTermNDCG( documentsRetrieved );
445        if (normalizer == 0) return 0;
446        
447        // now, compute the NDCG of the ranking and return that
448        double dcg = 0;
449        List<Document> truncated = _retrieved;
450        
451        if( _retrieved.size() > documentsRetrieved )
452            truncated = _retrieved.subList( 0, documentsRetrieved );
453        
454        
455        for( Document document : truncated ) {
456            Judgment judgment = _judgments.get( document.documentNumber );
457            
458            if( judgment != null && judgment.judgment > 0 ) {
459                dcg += (Math.pow(2, judgment.judgment) - 1.0) / Math.log( 1 + document.rank );
460            }
461        }    
462        
463        return dcg / normalizer;
464    }           
465    
466    protected double normalizationTermNDCG( int documentsRetrieved ) {
467        TreeMap<Integer, Integer> relevanceCounts = new TreeMap<Integer, Integer>();
468        
469        // the normalization term represents the highest possible DCG score
470        // that could possibly be attained.  we compute this by taking the relevance
471        // judgments, ordering them by relevance value (highly relevant documents first)
472        // then calling that the ranked list, and computing its DCG value.
473                                      
474        // we use negative judgment values so they come out of the map
475        // in order from highest to lowest relevance
476        for( Judgment judgment : _judgments.values() ) {
477            if( judgment.judgment == 0 )
478                continue;
479                
480            if( !relevanceCounts.containsKey(-judgment.judgment) ) {
481                relevanceCounts.put( -judgment.judgment, 0 );
482            }                                               
483            
484            relevanceCounts.put( -judgment.judgment, relevanceCounts.get( -judgment.judgment ) + 1 );
485        }                                                                                          
486                
487        double normalizer = 0;
488        int documentsProcessed = 0; 
489        
490        for( Integer negativeRelevanceValue : relevanceCounts.keySet() ) {        
491            int relevanceCount = (int)relevanceCounts.get( negativeRelevanceValue );
492            int relevanceValue = -negativeRelevanceValue;
493            relevanceCount = Math.min( relevanceCount, documentsRetrieved - documentsProcessed );
494            
495            for( int i = 1; i <= relevanceCount; i++ ) {
496                normalizer += (Math.pow(2, relevanceValue) - 1.0) / Math.log( 1 + i + documentsProcessed ); 
497            }
498            
499            documentsProcessed += relevanceCount;
500            if( documentsProcessed >= documentsRetrieved )
501                break;
502        }                              
503        
504        return normalizer;
505    }
506    
507    /**
508     * The number of relevant documents retrieved at a particular
509     * rank.  This is equivalent to <tt>n * precision(n)</tt>.
510     * 
511     * @param documentsRetrieved the rank 
512     * @return the number of relevant docs at the rank.
513     */
514    public int relevantRetrieved( int documentsRetrieved ) {
515        int low = 0;
516        int high = _relevantRetrieved.size() - 1;
517        
518        // if we didn't retrieve anything relevant, we know the answer
519        if( _relevantRetrieved.size() == 0 )
520            return 0;
521        
522        // is this after the last relevant document?
523        Document lastRelevant = _relevantRetrieved.get( high );
524        if( lastRelevant.rank <= documentsRetrieved )
525            return _relevantRetrieved.size();
526        
527        // is this before the first relevant document?
528        Document firstRelevant = _relevantRetrieved.get( low );
529        if( firstRelevant.rank > documentsRetrieved )
530            return 0;
531        
532        while( (high - low) >= 2 ) {
533            int middle = low + (high - low) / 2;
534            Document middleDocument = _relevantRetrieved.get( middle );
535            
536            if( middleDocument.rank == documentsRetrieved )
537                return middle + 1;
538            else if( middleDocument.rank > documentsRetrieved )
539                high = middle;
540            else
541                low = middle;
542        }
543        
544        // if the high document had rank == documentsRetrieved, we would
545        // have already returned, either at the top, or at the return middle statement
546        assert _relevantRetrieved.get( low ).rank <= documentsRetrieved &&
547            _relevantRetrieved.get( high ).rank > documentsRetrieved;
548        
549        return low + 1;
550    }
551    
552    /**
553     * @return The list of retrieved documents.
554     */
555    public ArrayList<Document> retrievedDocuments() {
556        return _retrieved;
557    }
558    
559    /**
560     * @return The list of all documents retrieved that were explicitly judged irrelevant.
561     */
562    
563    public ArrayList<Document> judgedIrrelevantRetrievedDocuments() {
564        return _judgedIrrelevantRetrieved;
565    }
566    
567    /**
568     * This method returns a list of all documents that were retrieved
569     * but assumed to be irrelevant.  This includes both documents that were
570     * judged to be irrelevant and those that were not judged at all.
571     * The list is returned in retrieval order.
572     * @return the list of all retrieved irrelevant documents.
573     */
574    public ArrayList<Document> irrelevantRetrievedDocuments() {
575        return _irrelevantRetrieved;
576    }
577    
578    /**
579     * Returns a list of retrieved documents that were judged relevant,
580     * in the order that they were retrieved.
581     * @return the list of all retrieved relevant documents
582     */
583    public ArrayList<Document> relevantRetrievedDocuments() {
584        return _relevantRetrieved;
585    }
586    
587    /**
588     * Returns a list of all documents judged relevant, whether they were
589     * retrieved or not.  Documents are listed in the order they were retrieved,
590     * with those not retrieved coming last.
591     * @return the list of all relevant documents 
592     */
593    public ArrayList<Document> relevantDocuments() {
594        return _relevant;
595    }
596    
597    /**
598     * Returns a list of documents that were judged relevant that
599     * were not retrieved.
600     * @return the relevant documents that were missed by the search engine
601     */
602    public ArrayList<Document> relevantMissedDocuments() {
603        return _relevantMissed;
604    }
605}