001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.tools.twitter.token.outputmode.jacard; 031 032import java.io.IOException; 033import java.io.PrintWriter; 034import java.io.StringReader; 035import java.util.Scanner; 036 037import org.openimaj.io.ReadWriteableASCII; 038 039import com.Ostermiller.util.CSVParser; 040import com.Ostermiller.util.CSVPrinter; 041 042/** 043 * An index encoding the difference between two sets 044 * 045 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 046 * 047 */ 048public class JacardIndex implements ReadWriteableASCII { 049 050 /** 051 * The number of words forming the intersection between now and historic 052 * words 053 */ 054 public long intersection; 055 /** 056 * The number of words forming the union between now and historic words 057 */ 058 public long union; 059 /** 060 * current time period 061 */ 062 public long time; 063 /** 064 * The jacard index is: J(A,B) = |intersection(A,B)| / |union(A,B)| for this 065 * time period 066 */ 067 public double jacardIndex; 068 069 /** 070 * @param time 071 * @param intersection 072 * @param union 073 */ 074 public JacardIndex(long time, long intersection, long union) { 075 this.time = time; 076 this.intersection = intersection; 077 this.union = union; 078 this.jacardIndex = (double) intersection / (double) union; 079 } 080 081 private JacardIndex() { 082 } 083 084 @Override 085 public void readASCII(Scanner in) throws IOException { 086 fromString(in.nextLine(), this); 087 } 088 089 private static void fromString(String nextLine, JacardIndex i) throws IOException { 090 final StringReader reader = new StringReader(nextLine); 091 final CSVParser csvreader = new CSVParser(reader); 092 final String[] line = csvreader.getLine(); 093 i.time = Long.parseLong(line[0]); 094 i.intersection = Long.parseLong(line[1]); 095 i.union = Long.parseLong(line[2]); 096 i.jacardIndex = (double) i.intersection / (double) i.union; 097 098 } 099 100 @Override 101 public String asciiHeader() { 102 return ""; 103 } 104 105 @Override 106 public void writeASCII(PrintWriter out) throws IOException { 107 final CSVPrinter writer = new CSVPrinter(out); 108 writer.write(new String[] { 109 "" + this.time, 110 "" + intersection, 111 "" + union 112 }); 113 } 114 115 /** 116 * Read a new jacard index from a comma separated line 117 * 118 * @param next 119 * @return new JacardIndex 120 * @throws IOException 121 */ 122 public static JacardIndex fromString(String next) throws IOException { 123 final JacardIndex ind = new JacardIndex(); 124 fromString(next, ind); 125 return ind; 126 } 127 128 @Override 129 public boolean equals(Object other) { 130 if (!(other instanceof JacardIndex)) 131 return false; 132 final JacardIndex that = (JacardIndex) other; 133 return that.intersection == this.intersection && that.union == this.union; 134 } 135 136}