001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.experiment.evaluation.cluster.analyser;
031
032import net.sf.jasperreports.engine.JRException;
033import net.sf.jasperreports.engine.JasperPrint;
034
035import org.openimaj.experiment.evaluation.AnalysisResult;
036
037/**
038 * A measure of how pure each cluster is.
039 * P = 1/N Sigma_k max_j | w_k AND c_j |
040 *
041 * Count the true classes of all the elements in a class, make a count of the largest group from each cluster,
042 * divide by number of elements in all clusters.
043 *
044 * High means: most of the clusters had a high number of a single class
045 * Low means: most of the clusters had a roughly equal spread of all the classes
046 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
047 */
048public class PurityAnalysis implements AnalysisResult,RandomBaselineWrappable{
049
050        /**
051         * the purity
052         */
053        public double purity;
054
055        @Override
056        public JasperPrint getSummaryReport(String title, String info) throws JRException {
057                throw new UnsupportedOperationException();
058        }
059
060        @Override
061        public JasperPrint getDetailReport(String title, String info) throws JRException {
062                throw new UnsupportedOperationException();
063        }
064
065        @Override
066        public String getSummaryReport() {
067                return toString();
068        }
069
070        @Override
071        public String getDetailReport() {
072                return toString();
073        }
074        
075        @Override
076        public String toString() {
077                return String.format("purity=%2.4f",purity);
078        }
079
080        @Override
081        public double score() {
082                return this.purity;
083        }
084
085}