001/**
002 * Copyright (c) 2012, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.hadoop.tools.twitter.utils;
031
032import gnu.trove.map.hash.TObjectIntHashMap;
033import gnu.trove.procedure.TObjectIntProcedure;
034
035import java.io.DataInput;
036import java.io.DataOutput;
037import java.io.IOException;
038
039import org.openimaj.io.ReadWriteableBinary;
040
041/**
042 * Class encapsulating a number of tweets across which certain words were seen
043 * 
044 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
045 *
046 */
047public class TweetCountWordMap implements ReadWriteableBinary{
048        int ntweets ;
049        /**
050         * If the ntweets is set to this value, the ntweets should be ignored
051         */
052        public final static int INVALID_TWEET_COUNT = -1;
053        
054        TObjectIntHashMap<String> wordMap ;
055        /**
056         * empty words and 0 tweets
057         */
058        public TweetCountWordMap() {
059                ntweets = 0;
060                wordMap = new TObjectIntHashMap<String>();
061        }
062        /**
063         * @param ntweets
064         * @param wordMap
065         */
066        public TweetCountWordMap(int ntweets,TObjectIntHashMap<String> wordMap ) {
067                this.ntweets = ntweets;
068                this.wordMap = wordMap;
069        }
070        @Override
071        public void readBinary(DataInput in) throws IOException {
072                WriteableStringIntPair tweetPair = new WriteableStringIntPair();
073                tweetPair.readBinary(in);
074                this.ntweets = tweetPair.secondObject();
075                int nWords = in.readInt();
076                for (int i = 0; i < nWords; i++) {
077                        WriteableStringIntPair wordPair = new WriteableStringIntPair();
078                        wordPair.readBinary(in);
079                        this.wordMap.put(wordPair.firstObject(), wordPair.secondObject());
080                }
081        }
082
083        @Override
084        public byte[] binaryHeader() {
085                return "".getBytes();
086        }
087
088        @Override
089        public void writeBinary(final DataOutput out) throws IOException {
090                new WriteableStringIntPair("ntweets",this.ntweets).writeBinary(out);
091                out.writeInt(this.wordMap.size());
092                this.wordMap.forEachEntry(new TObjectIntProcedure<String>() {
093                        @Override
094                        public boolean execute(String word, int count) {
095                                try {
096                                        new WriteableStringIntPair(word,count).writeBinary(out);
097                                } catch (IOException e) {}
098                                return true;
099                        }
100                });
101        }
102        
103        @Override
104        public boolean equals(Object other){
105                if(!(other instanceof TweetCountWordMap))return false;
106                final TweetCountWordMap that = (TweetCountWordMap)other;
107                boolean eq = this.ntweets == that.ntweets;
108                if(!eq) return false;
109                return this.wordMap.forEachEntry(new TObjectIntProcedure<String>() {
110
111                        @Override
112                        public boolean execute(String arg0, int arg1) {
113                                return that.wordMap.get(arg0) == arg1;
114                        }
115                });
116                
117        }
118        /**
119         * @return the word map
120         */
121        public TObjectIntHashMap<String> getTweetWordMap() {
122                // TODO Auto-generated method stub
123                return this.wordMap;
124        }
125        /**
126         * @param i increment number of tweets by this amount
127         */
128        public void incrementTweetCount(int i) {
129                this.ntweets += i;
130                
131        }
132        /**
133         * Add values from "that" to those in this if they exist, otherwise create the element
134         * and start a new count
135         * @param that
136         */
137        public void combine(TweetCountWordMap that) {
138                this.ntweets += that.ntweets;
139                if(this.wordMap == null || that.wordMap == null) return;
140                that.wordMap.forEachEntry(new TObjectIntProcedure<String>() {
141
142                        @Override
143                        public boolean execute(String word, int count) {
144                                TweetCountWordMap.this.wordMap.adjustOrPutValue(word, count, count);
145                                return true;
146                        }
147                });
148        }
149        /**
150         * @return the number of tweets
151         */
152        public long getNTweets() {
153                return this.ntweets;
154        }
155        
156        /**
157         * set the number of tweets
158         * @param ntweets 
159         */
160        public void setNTweets(int ntweets) {
161                this.ntweets = ntweets;
162        }
163}