001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/*
031 * Main.java
032 *
033 * Created on November 30, 2006, 9:25 PM
034 *
035 * To change this template, choose Tools | Template Manager
036 * and open the template in the editor.
037 */
038
039package org.lemurproject.ireval;
040
041import org.lemurproject.ireval.RetrievalEvaluator.Document;
042import org.lemurproject.ireval.RetrievalEvaluator.Judgment;
043import java.io.BufferedReader;
044import java.io.FileNotFoundException;
045import java.io.FileReader;
046import java.io.IOException;
047import java.io.PrintWriter;
048import java.io.StringWriter;
049import java.util.ArrayList;
050import java.util.Map;
051import java.util.TreeMap;
052
053/**
054 * A trec_eval style tool in pure java.
055 * 
056 * @author Trevor Strohman
057 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
058 */
059public class IREval {
060    /**
061     * Loads a TREC judgments file.
062     *
063     * @param filename The filename of the judgments file to load.
064     * @return Maps from query numbers to lists of judgments for each query.
065     * @throws IOException 
066     * @throws FileNotFoundException 
067     */
068    public static TreeMap< String, ArrayList<Judgment> > loadJudgments( String filename ) throws IOException, FileNotFoundException {
069        // open file
070        BufferedReader in = new BufferedReader(new FileReader( filename ));
071        String line = null;
072        TreeMap< String, ArrayList<Judgment> > judgments = new TreeMap< String, ArrayList<Judgment> >();
073        String recentQuery = null;
074        ArrayList<Judgment> recentJudgments = null;
075        
076        while( (line = in.readLine()) != null ) {
077            // allow for multiple whitespace characters between fields
078            String[] fields = line.split( "\\s+" );
079            
080            String number = fields[0];
081            @SuppressWarnings("unused")
082                        String unused = fields[1];
083            String docno = fields[2];
084            String judgment = fields[3];
085            int jVal = 0;
086            try {
087                jVal = Integer.valueOf( judgment );
088            } catch (NumberFormatException e) {
089                jVal = (int)Math.round(Double.valueOf( judgment ));
090            }
091            
092            Judgment j = new Judgment( docno, jVal );
093            
094            if( recentQuery == null || !recentQuery.equals( number ) ) {
095                if( !judgments.containsKey( number ) ) {
096                    judgments.put( number, new ArrayList<Judgment>() );
097                }
098                
099                recentJudgments = judgments.get( number );
100                recentQuery = number;
101            }
102            
103            recentJudgments.add( j );
104        }
105
106        in.close();
107        return judgments;
108    }
109    
110    /**
111     * Reads in a TREC ranking file.
112     *
113     * @param filename The filename of the ranking file.
114     * @return A map from query numbers to document ranking lists.
115     * @throws IOException 
116     * @throws FileNotFoundException 
117     */
118    @SuppressWarnings("unused")
119        public static TreeMap< String, ArrayList<Document> > loadRanking( String filename ) throws IOException, FileNotFoundException {
120        // open file
121        BufferedReader in = new BufferedReader(new FileReader( filename ));
122        String line = null;
123        TreeMap< String, ArrayList<Document> > ranking = new TreeMap< String, ArrayList<Document> >();
124        ArrayList<Document> recentRanking = null;
125        String recentQuery = null;
126        
127        while( (line = in.readLine()) != null ) {
128            // allow for multiple whitespace characters between fields
129            String[] fields = line.split( "\\s+" );
130            
131            // 1 Q0 WSJ880711-0086 39 -3.05948 Exp
132                    
133            String number = fields[0];
134            String unused = fields[1];
135            String docno = fields[2];
136            String rank = fields[3];
137            String score = fields[4];
138            String runtag = fields[5];
139            // lemur can output nan (or NaN)
140            double scoreNumber;
141            try {
142                scoreNumber = Double.valueOf( score );
143            } catch (NumberFormatException ex) {
144                scoreNumber = 0.0;
145            }
146            
147            
148            Document document = new Document( docno, Integer.valueOf( rank ), scoreNumber );
149            
150            if( recentQuery == null || !recentQuery.equals( number ) ) {
151                if( !ranking.containsKey( number ) ) {
152                    ranking.put( number, new ArrayList<Document>() );
153                }
154                
155                recentQuery = number;
156                recentRanking = ranking.get( number );       
157            }
158            
159            recentRanking.add( document );
160        }
161        
162        in.close();
163        return ranking;
164    }
165
166    /**
167     * Creates a SetRetrievalEvaluator from data from loadRanking and loadJudgments.
168     * @param allRankings 
169     * @param allJudgments 
170     * @return the evaluation result 
171     */
172    public static SetRetrievalEvaluator create( TreeMap< String, ArrayList<Document> > allRankings, TreeMap< String, ArrayList<Judgment> > allJudgments ) {
173        // Map query numbers into Integer to get proper sorting.
174        TreeMap< String, RetrievalEvaluator > evaluators = new TreeMap<String, RetrievalEvaluator>(new java.util.Comparator<String>() {
175                @Override
176                                public int compare(String a, String b) {
177                    try {
178                        Integer a1 = new Integer(a);
179                        Integer b1 = new Integer(b);
180                        return a1.compareTo(b1);
181                    } catch (NumberFormatException e) {
182                        // not an integer
183                        return a.compareTo(b);
184                    }}});
185
186        for( String query : allRankings.keySet() ) {
187            ArrayList<Judgment> judgments = allJudgments.get( query );
188            ArrayList<Document> ranking = allRankings.get( query );
189
190            /* resort ranking on score, renumber ranks */
191            java.util.Collections.sort(ranking, new java.util.Comparator<Document>() {
192                    @Override
193                                        public int compare(Document a, Document b) 
194                    {
195                        if (a.score < b.score) return 1;
196                        if (a.score == b.score) return 0;
197                        return -1;
198                    }
199                });
200            int i = 1;
201            for (Document d : ranking) {
202                d.rank = i++;
203            }
204            
205            if( judgments == null || ranking == null ) {
206                continue;
207            }
208            
209            RetrievalEvaluator evaluator = new RetrievalEvaluator( query, ranking, judgments );
210            evaluators.put( query, evaluator );
211        }
212        
213        return new SetRetrievalEvaluator( evaluators.values() );
214    }
215
216    /**
217     * Returns an output string very similar to that of trec_eval.  
218     * @param query 
219     * @param evaluator 
220     * @return the result as a {@link String}
221     */
222    public static String singleQuery( String query, RetrievalEvaluator evaluator ) {
223        StringWriter s = new StringWriter();
224        PrintWriter out = new PrintWriter(s);
225        String formatString = "%2$-25s\t%1$5s\t";
226        // print trec_eval relational-style output
227        // counts
228        out.format( formatString + "%3$6d\n",        query, "num_ret",     evaluator.retrievedDocuments().size() );
229        out.format( formatString + "%3$6d\n",        query, "num_rel",     evaluator.relevantDocuments().size() );
230        out.format( formatString + "%3$6d\n",        query, "num_rel_ret", evaluator.relevantRetrievedDocuments().size() );
231
232        // aggregate measures
233        out.format( formatString + "%3$6.4f\n",     query, "map",         evaluator.averagePrecision() );
234        out.format( formatString + "%3$6.4f\n",     query, "ndcg",        evaluator.normalizedDiscountedCumulativeGain() );
235        out.format( formatString + "%3$6.4f\n",     query, "ndcg15",      evaluator.normalizedDiscountedCumulativeGain( 15 ) );
236        out.format( formatString + "%3$6.4f\n",     query, "R-prec",      evaluator.rPrecision() );
237        out.format( formatString + "%3$6.4f\n",     query, "bpref",       evaluator.binaryPreference() );
238        out.format( formatString + "%3$6.4f\n",     query, "recip_rank",  evaluator.reciprocalRank() );
239
240        // precision at fixed points
241        int[] fixedPoints = RetrievalEvaluator.getFixedPoints();
242        double [] vals = evaluator.precisionAtFixedPoints();
243        for( int i=0; i<fixedPoints.length; i++ ) {
244            int point = fixedPoints[i];
245            out.format( formatString + "%3$6.4f\n", query, "P" + point,  vals[i] );
246        }
247        double[] precs = evaluator.interpolatedPrecision();
248        double prec = 0;
249        for( int i=0; i<precs.length; i++ ) {
250            out.format( "ircl_prn.%3$3.2f%2$-18s\t%1$5s\t%4$6.4f\n", query, " ", prec, precs[i]  );
251            prec += 0.1;
252        }
253        out.format("\n");
254        return s.toString();
255    }
256
257    /**
258     * Returns an output string very similar to that of trec_eval.  
259     * @param setEvaluator 
260     * @param showIndividual 
261     * @return the result as a {@link String}
262     */
263    public static String singleEvaluation( SetRetrievalEvaluator setEvaluator, boolean showIndividual ) {
264        StringWriter s = new StringWriter();
265        PrintWriter out = new PrintWriter(s);
266        String formatString = "%2$-25s\t%1$5s\t";
267        // print trec_eval relational-style output
268        if (showIndividual) {
269            for( RetrievalEvaluator evaluator : setEvaluator.getEvaluators() ) {
270                String query = evaluator.queryName();
271                out.print(singleQuery(query, evaluator));
272            }
273        }
274        // print summary data
275        out.format( formatString + "%3$6d\n",      "all", "num_q",     setEvaluator.getEvaluators().size() );
276        out.format( formatString + "%3$6d\n",      "all", "num_ret",     setEvaluator.numberRetrieved() );
277        out.format( formatString + "%3$6d\n",      "all", "num_rel",     setEvaluator.numberRelevant() );
278        out.format( formatString + "%3$6d\n",      "all", "num_rel_ret", setEvaluator.numberRelevantRetrieved() );
279
280        out.format( formatString + "%3$6.4f\n",   "all", "map",         setEvaluator.meanAveragePrecision() );
281        out.format( formatString + "%3$6.4f\n",   "all", "gm_ap",         setEvaluator.geometricMeanAveragePrecision() );
282        out.format( formatString + "%3$6.4f\n",   "all", "ndcg",        setEvaluator.meanNormalizedDiscountedCumulativeGain() );
283        out.format( formatString + "%3$6.4f\n",   "all", "R-prec",      setEvaluator.meanRPrecision() );
284        out.format( formatString + "%3$6.4f\n",   "all", "bpref",       setEvaluator.meanBinaryPreference() );
285        out.format( formatString + "%3$6.4f\n",   "all", "recip_rank",  setEvaluator.meanReciprocalRank() );
286
287        // precision at fixed points
288        int[] fixedPoints = SetRetrievalEvaluator.getFixedPoints();
289        double [] precs = setEvaluator.precisionAtFixedPoints();
290
291        for( int i=0; i<fixedPoints.length; i++ ) {
292            int point = fixedPoints[i];
293            out.format( formatString + "%3$6.4f\n", "all", "P" + point,   precs[i] );
294        }
295        double prec = 0;
296        precs = setEvaluator.interpolatedPrecision();
297        for( int i=0; i<precs.length; i++ ) {
298            out.format( "ircl_prn.%3$3.2f%2$-18s\t%1$5s\t%4$6.4f\n", "all", " ", prec, precs[i]  );
299            prec += 0.1;
300        }
301        out.format("\n");
302        return s.toString();
303    }
304
305    /**
306     * Compare two sets of retrieval results.
307     * 
308     * @param baseline
309     * @param treatment
310     * @param baselineName
311     * @param treatmentName
312     * @return the result as a {@link String}.
313     */
314    public static String comparisonEvaluation( SetRetrievalEvaluator baseline, SetRetrievalEvaluator treatment, String baselineName, String treatmentName ) {
315
316        StringWriter s = new StringWriter();
317        PrintWriter out = new PrintWriter(s);
318        String[] metrics = { "averagePrecision", "rPrecision", "ndcg", "bpref", "P5", "P10", "P20" };
319        String formatString = "%1$-20s%2$-30s%3$6.4f\n";
320        String integerFormatString = "%1$-20s%2$-30s%3$6d\n";
321        out.println("Comparing baseline: " + baselineName + " to treatment: " + treatmentName + "\n");
322        if (treatment == null) return "NOPE";
323        for( String metric : metrics ) {
324            Map<String, Double> baselineMetric = baseline.evaluateAll( metric );
325            Map<String, Double> treatmentMetric = treatment.evaluateAll( metric );
326
327            SetRetrievalComparator comparator = new SetRetrievalComparator( baselineMetric, treatmentMetric );
328
329            out.format( formatString, metric, baselineName, comparator.meanBaselineMetric() );
330            out.format( formatString, metric, treatmentName, comparator.meanTreatmentMetric() );
331
332            out.format( integerFormatString, metric, "basebetter", comparator.countBaselineBetter() );
333            out.format( integerFormatString, metric, "treatbetter", comparator.countTreatmentBetter() );
334            out.format( integerFormatString, metric, "equal", comparator.countEqual() );
335
336            out.format( formatString, metric, "ttest", comparator.pairedTTest() );
337            out.format( formatString, metric, "randomized", comparator.randomizedTest() );
338            out.format( formatString, metric, "signtest", comparator.signTest() );
339        }
340
341        return s.toString();
342    }
343
344    
345    /**
346     * Print tool usage
347     */
348    public static void usage( ) {
349        System.err.println( "ireval: " );
350        System.err.println( "   There are two ways to use this program.  First, you can evaluate a single ranking: " );
351        System.err.println( "      java -jar ireval.jar TREC-Ranking-File TREC-Judgments-File" );
352        System.err.println( "   or, you can use it to compare two rankings with statistical tests: " );
353        System.err.println( "      java -jar ireval.jar TREC-Baseline-Ranking-File TREC-Improved-Ranking-File TREC-Judgments-File" );
354        System.exit(-1);
355    }
356    
357    /**
358     * Tool main method.
359     * @param args
360     * @throws IOException
361     */
362    public static void main( String[] args ) throws IOException {
363        try {
364            if( args.length == 3 ) {
365                TreeMap< String, ArrayList<Document> > baselineRanking = loadRanking( args[0] );
366                TreeMap< String, ArrayList<Document> > treatmentRanking = loadRanking( args[1] );
367                TreeMap< String, ArrayList<Judgment> > judgments = loadJudgments( args[2] );
368
369                SetRetrievalEvaluator baseline = create( baselineRanking, judgments );
370                SetRetrievalEvaluator treatment = create( treatmentRanking, judgments );
371
372                // adjust these to file names.
373                System.out.println(comparisonEvaluation( baseline, treatment, "baseline", "treatment" ));
374            } else if( args.length == 2 ) {
375                TreeMap< String, ArrayList<Document> > ranking = loadRanking( args[0] );
376                TreeMap< String, ArrayList<Judgment> > judgments = loadJudgments( args[1] );
377
378                SetRetrievalEvaluator setEvaluator = create( ranking, judgments );
379                System.out.println(singleEvaluation( setEvaluator, true ));
380            } else {
381                usage();
382            }
383        } catch( Exception e ) {
384            e.printStackTrace();
385            usage();
386        }
387    }
388}