001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.tools.twitter.token.mode.dfidf; 031 032import gnu.trove.map.hash.TLongObjectHashMap; 033import gnu.trove.procedure.TLongObjectProcedure; 034 035import java.io.DataInput; 036import java.io.DataOutput; 037import java.io.IOException; 038import java.util.Arrays; 039 040import org.openimaj.hadoop.tools.twitter.token.mode.dfidf.TimeFrequencyHolder.TimeFrequency; 041import org.openimaj.io.ReadWriteableBinary; 042 043/** 044 * A {@link ReadWriteableBinary} {@link TLongObjectHashMap} 045 * 046 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 047 * 048 */ 049public class TimeFrequencyHolder extends TLongObjectHashMap<TimeFrequency> implements 050 ReadWriteableBinary 051{ 052 053 /** 054 * Holds the number of a thing at a moment in time and the total number of 055 * that thing seen across all time 056 * 057 * 058 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 059 * 060 */ 061 public static class TimeFrequency implements ReadWriteableBinary { 062 long time; 063 long periodFrequency; 064 long cumulativeFrequency; 065 066 /** 067 * default 068 */ 069 public TimeFrequency() { 070 071 } 072 073 @Override 074 public String toString() { 075 return String.format("t(%d) tf(%d) Tf(%d)", this.time, periodFrequency, cumulativeFrequency); 076 } 077 078 /** 079 * initialise, presumed to be at the beginning of some time period so 080 * the cumulative == the period frequency 081 * 082 * @param time 083 * @param frequency 084 */ 085 public TimeFrequency(long time, long frequency) { 086 this.time = time; 087 this.periodFrequency = frequency; 088 this.cumulativeFrequency = frequency; 089 090 } 091 092 @Override 093 public void readBinary(DataInput in) throws IOException { 094 time = in.readLong(); 095 periodFrequency = in.readLong(); 096 this.cumulativeFrequency = periodFrequency; 097 } 098 099 @Override 100 public byte[] binaryHeader() { 101 return "".getBytes(); 102 } 103 104 @Override 105 public void writeBinary(DataOutput out) throws IOException { 106 out.writeLong(time); 107 out.writeLong(periodFrequency); 108 } 109 110 /** 111 * Given a {@link TimeFrequency} instance, keep count of cumulative 112 * frequency and set the periodFrequency to the one furthest along in 113 * time 114 * 115 * @param other 116 * @return a new {@link TimeFrequency} instance 117 */ 118 public TimeFrequency combine(TimeFrequency other) { 119 final TimeFrequency nHolder = new TimeFrequency(); 120 TimeFrequency future, past; 121 if (this.time > other.time) { // this is the future time instance 122 // (so should be the time we 123 // remember) 124 future = this; 125 past = other; 126 } 127 else if (other.time > this.time) { // other is the future time 128 // instance 129 future = other; 130 past = this; 131 } 132 else { // equal time instances, choose other as the "true" value 133 nHolder.time = other.time; 134 nHolder.periodFrequency = other.periodFrequency; 135 nHolder.cumulativeFrequency = other.cumulativeFrequency; 136 return nHolder; 137 } 138 nHolder.time = future.time; 139 nHolder.periodFrequency = future.periodFrequency; 140 nHolder.cumulativeFrequency = past.cumulativeFrequency + future.periodFrequency; 141 return nHolder; 142 143 } 144 145 /** 146 * @param l 147 * @param nTweets 148 * @return a new {@link TimeFrequency} instance 149 */ 150 public TimeFrequency combine(long l, long nTweets) { 151 final TimeFrequency ntf = new TimeFrequency(l, nTweets); 152 return combine(ntf); 153 } 154 } 155 156 /** 157 * default 158 */ 159 public TimeFrequencyHolder() { 160 } 161 162 /** 163 * For every held {@link TimeFrequency} reset {@link TimeFrequency} 164 * cumulativeFrequency = {@link TimeFrequency} periodFrequency and then go 165 * through each in key-value order and use 166 * {@link TimeFrequency#combine(TimeFrequency)} to calculate a cumulative 167 * count 168 */ 169 public void recalculateCumulativeFrequencies() { 170 final long[] sortedKeys = this.keys(); 171 Arrays.sort(sortedKeys); 172 TimeFrequency current = null; 173 for (int i = 0; i < sortedKeys.length; i++) { 174 final long k = sortedKeys[i]; 175 final TimeFrequency held = this.get(k); 176 held.cumulativeFrequency = held.periodFrequency; 177 if (current == null) 178 { 179 current = held; 180 } 181 else { 182 current = current.combine(this.get(k)); 183 } 184 this.put(k, current); 185 } 186 } 187 188 @Override 189 public String toString() { 190 final StringBuilder builder = new StringBuilder(); 191 final long[] sortedKeys = this.keys(); 192 Arrays.sort(sortedKeys); 193 for (int i = 0; i < sortedKeys.length; i++) { 194 final long k = sortedKeys[i]; 195 builder.append(String.format("%d - %s\n", k, this.get(k))); 196 } 197 return builder.toString(); 198 } 199 200 @Override 201 public void readBinary(DataInput in) throws IOException { 202 final long count = in.readLong(); 203 for (int i = 0; i < count; i++) { 204 final TimeFrequency tf = new TimeFrequency(); 205 tf.readBinary(in); 206 this.put(tf.time, tf); 207 } 208 } 209 210 @Override 211 public byte[] binaryHeader() { 212 return "".getBytes(); 213 } 214 215 @Override 216 public void writeBinary(final DataOutput out) throws IOException { 217 out.writeLong(this.size()); 218 this.forEachEntry(new TLongObjectProcedure<TimeFrequency>() { 219 220 @Override 221 public boolean execute(long a, TimeFrequency b) { 222 try { 223 b.writeBinary(out); 224 } catch (final IOException e) { 225 return false; 226 } 227 return true; 228 } 229 }); 230 } 231}