001/** 002 * Copyright (c) 2012, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.tools.twitter.utils; 031 032import gnu.trove.map.hash.TObjectIntHashMap; 033import gnu.trove.procedure.TObjectIntProcedure; 034 035import java.io.DataInput; 036import java.io.DataOutput; 037import java.io.IOException; 038 039import org.openimaj.io.ReadWriteableBinary; 040 041/** 042 * Class encapsulating a number of tweets across which certain words were seen 043 * 044 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 045 * 046 */ 047public class TweetCountWordMap implements ReadWriteableBinary{ 048 int ntweets ; 049 /** 050 * If the ntweets is set to this value, the ntweets should be ignored 051 */ 052 public final static int INVALID_TWEET_COUNT = -1; 053 054 TObjectIntHashMap<String> wordMap ; 055 /** 056 * empty words and 0 tweets 057 */ 058 public TweetCountWordMap() { 059 ntweets = 0; 060 wordMap = new TObjectIntHashMap<String>(); 061 } 062 /** 063 * @param ntweets 064 * @param wordMap 065 */ 066 public TweetCountWordMap(int ntweets,TObjectIntHashMap<String> wordMap ) { 067 this.ntweets = ntweets; 068 this.wordMap = wordMap; 069 } 070 @Override 071 public void readBinary(DataInput in) throws IOException { 072 WriteableStringIntPair tweetPair = new WriteableStringIntPair(); 073 tweetPair.readBinary(in); 074 this.ntweets = tweetPair.secondObject(); 075 int nWords = in.readInt(); 076 for (int i = 0; i < nWords; i++) { 077 WriteableStringIntPair wordPair = new WriteableStringIntPair(); 078 wordPair.readBinary(in); 079 this.wordMap.put(wordPair.firstObject(), wordPair.secondObject()); 080 } 081 } 082 083 @Override 084 public byte[] binaryHeader() { 085 return "".getBytes(); 086 } 087 088 @Override 089 public void writeBinary(final DataOutput out) throws IOException { 090 new WriteableStringIntPair("ntweets",this.ntweets).writeBinary(out); 091 out.writeInt(this.wordMap.size()); 092 this.wordMap.forEachEntry(new TObjectIntProcedure<String>() { 093 @Override 094 public boolean execute(String word, int count) { 095 try { 096 new WriteableStringIntPair(word,count).writeBinary(out); 097 } catch (IOException e) {} 098 return true; 099 } 100 }); 101 } 102 103 @Override 104 public boolean equals(Object other){ 105 if(!(other instanceof TweetCountWordMap))return false; 106 final TweetCountWordMap that = (TweetCountWordMap)other; 107 boolean eq = this.ntweets == that.ntweets; 108 if(!eq) return false; 109 return this.wordMap.forEachEntry(new TObjectIntProcedure<String>() { 110 111 @Override 112 public boolean execute(String arg0, int arg1) { 113 return that.wordMap.get(arg0) == arg1; 114 } 115 }); 116 117 } 118 /** 119 * @return the word map 120 */ 121 public TObjectIntHashMap<String> getTweetWordMap() { 122 // TODO Auto-generated method stub 123 return this.wordMap; 124 } 125 /** 126 * @param i increment number of tweets by this amount 127 */ 128 public void incrementTweetCount(int i) { 129 this.ntweets += i; 130 131 } 132 /** 133 * Add values from "that" to those in this if they exist, otherwise create the element 134 * and start a new count 135 * @param that 136 */ 137 public void combine(TweetCountWordMap that) { 138 this.ntweets += that.ntweets; 139 if(this.wordMap == null || that.wordMap == null) return; 140 that.wordMap.forEachEntry(new TObjectIntProcedure<String>() { 141 142 @Override 143 public boolean execute(String word, int count) { 144 TweetCountWordMap.this.wordMap.adjustOrPutValue(word, count, count); 145 return true; 146 } 147 }); 148 } 149 /** 150 * @return the number of tweets 151 */ 152 public long getNTweets() { 153 return this.ntweets; 154 } 155 156 /** 157 * set the number of tweets 158 * @param ntweets 159 */ 160 public void setNTweets(int ntweets) { 161 this.ntweets = ntweets; 162 } 163}