001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 *
032 */
033package org.openimaj.experiment.evaluation.agreement;
034
035import gnu.trove.map.hash.TObjectIntHashMap;
036
037import java.util.Map;
038
039/**
040 * Calculates the interrater agreement for a given dataset between two (and only
041 * two) raters.
042 * <p>
043 * Cohen's Kappa is defined as: (PrA - PrE) / (1 - PrE) where PrA is the
044 * percentage agreement, and PrE is the probability of random agreement. PrA =
045 * agreement / total and PrE = PrX + PrY where PrX and PrY are the probability
046 * of both agreeing on X or both agreeing on Y randomly (that is,
047 * Pr(r1,x)*Pr(r2,x) ... )
048 * 
049 * @see "http://en.wikipedia.org/wiki/Cohen's_kappa"
050 * @author David Dupplaw (dpd@ecs.soton.ac.uk)
051 * @created 12 Aug 2013
052 */
053public class CohensKappaInterraterAgreement
054{
055        /**
056         * The input should be a {@link Map} for each rater where the keys represent
057         * all the subjects that were rated by the raters and the values represent
058         * the annotations given by the raters. Agreement between the raters is
059         * determined by {@link #equals(Object)} for the INSTANCE type. Annotations
060         * for subjects which are not in both sets are ignored.
061         * 
062         * @see "http://en.wikipedia.org/wiki/Cohen's_kappa"
063         * 
064         * @param rater1
065         *            The annotations from rater 1
066         * @param rater2
067         *            The annotations from rater 2
068         * @return Cohen's Kappa [0,1]
069         */
070        public static <K, A> double calculate(
071                        final Map<K, A> rater1,
072                        final Map<K, A> rater2)
073        {
074                int totalCount = 0;
075                int agreementCount = 0;
076                final TObjectIntHashMap<A> answerCountsR1 = new TObjectIntHashMap<A>();
077                final TObjectIntHashMap<A> answerCountsR2 = new TObjectIntHashMap<A>();
078
079                for (final K subjectKey : rater1.keySet())
080                {
081                        // We can only form an agreement if both raters rated this
082                        // specific subject, so let's check
083                        if (rater2.keySet().contains(subjectKey))
084                        {
085                                final A r1a = rater1.get(subjectKey);
086                                final A r2a = rater2.get(subjectKey);
087
088                                // It's possible that the key exists but is mapped to
089                                // a null value (for example, if majority voting was used
090                                // to generate the set and there was no majority).
091                                if (r1a == null || r2a == null)
092                                        continue;
093
094                                // Get the answers from the raters
095                                final A annotation1 = r1a;
096                                final A annotation2 = r2a;
097
098                                // Count the agreements
099                                if (annotation1.equals(annotation2))
100                                        agreementCount++;
101
102                                // Count each of the answers for each of the raters
103                                answerCountsR1.putIfAbsent(annotation1, 0);
104                                answerCountsR2.putIfAbsent(annotation2, 0);
105                                answerCountsR1.increment(annotation1);
106                                answerCountsR2.increment(annotation2);
107
108                                // Keep a running total
109                                totalCount++;
110                        }
111                }
112
113                System.out.println(answerCountsR1);
114
115                final double PrA = agreementCount / (double) totalCount;
116                System.out.println(PrA);
117
118                double PrE = 0;
119                for (final A ann : answerCountsR1.keySet())
120                {
121                        final Integer i = answerCountsR2.get(ann);
122                        final double PrAnnR1 = answerCountsR1.get(ann) / (double) totalCount;
123                        final double PrAnnR2 = (i == null ? 0 : i) / (double) totalCount;
124                        PrE += PrAnnR1 * PrAnnR2;
125                }
126                System.out.println(PrE);
127
128                final double kappa = (PrA - PrE) / (1d - PrE);
129
130                return kappa;
131        }
132}