001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.hadoop.tools.twitter.token.mode.dfidf;
031
032import gnu.trove.map.hash.TLongObjectHashMap;
033import gnu.trove.procedure.TLongObjectProcedure;
034
035import java.io.DataInput;
036import java.io.DataOutput;
037import java.io.IOException;
038import java.util.Arrays;
039
040import org.openimaj.hadoop.tools.twitter.token.mode.dfidf.TimeFrequencyHolder.TimeFrequency;
041import org.openimaj.io.ReadWriteableBinary;
042
043/**
044 * A {@link ReadWriteableBinary} {@link TLongObjectHashMap}
045 * 
046 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
047 * 
048 */
049public class TimeFrequencyHolder extends TLongObjectHashMap<TimeFrequency> implements
050                ReadWriteableBinary
051{
052
053        /**
054         * Holds the number of a thing at a moment in time and the total number of
055         * that thing seen across all time
056         * 
057         * 
058         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
059         * 
060         */
061        public static class TimeFrequency implements ReadWriteableBinary {
062                long time;
063                long periodFrequency;
064                long cumulativeFrequency;
065
066                /**
067                 * default
068                 */
069                public TimeFrequency() {
070
071                }
072
073                @Override
074                public String toString() {
075                        return String.format("t(%d) tf(%d) Tf(%d)", this.time, periodFrequency, cumulativeFrequency);
076                }
077
078                /**
079                 * initialise, presumed to be at the beginning of some time period so
080                 * the cumulative == the period frequency
081                 * 
082                 * @param time
083                 * @param frequency
084                 */
085                public TimeFrequency(long time, long frequency) {
086                        this.time = time;
087                        this.periodFrequency = frequency;
088                        this.cumulativeFrequency = frequency;
089
090                }
091
092                @Override
093                public void readBinary(DataInput in) throws IOException {
094                        time = in.readLong();
095                        periodFrequency = in.readLong();
096                        this.cumulativeFrequency = periodFrequency;
097                }
098
099                @Override
100                public byte[] binaryHeader() {
101                        return "".getBytes();
102                }
103
104                @Override
105                public void writeBinary(DataOutput out) throws IOException {
106                        out.writeLong(time);
107                        out.writeLong(periodFrequency);
108                }
109
110                /**
111                 * Given a {@link TimeFrequency} instance, keep count of cumulative
112                 * frequency and set the periodFrequency to the one furthest along in
113                 * time
114                 * 
115                 * @param other
116                 * @return a new {@link TimeFrequency} instance
117                 */
118                public TimeFrequency combine(TimeFrequency other) {
119                        final TimeFrequency nHolder = new TimeFrequency();
120                        TimeFrequency future, past;
121                        if (this.time > other.time) { // this is the future time instance
122                                                                                        // (so should be the time we
123                                                                                        // remember)
124                                future = this;
125                                past = other;
126                        }
127                        else if (other.time > this.time) { // other is the future time
128                                                                                                // instance
129                                future = other;
130                                past = this;
131                        }
132                        else { // equal time instances, choose other as the "true" value
133                                nHolder.time = other.time;
134                                nHolder.periodFrequency = other.periodFrequency;
135                                nHolder.cumulativeFrequency = other.cumulativeFrequency;
136                                return nHolder;
137                        }
138                        nHolder.time = future.time;
139                        nHolder.periodFrequency = future.periodFrequency;
140                        nHolder.cumulativeFrequency = past.cumulativeFrequency + future.periodFrequency;
141                        return nHolder;
142
143                }
144
145                /**
146                 * @param l
147                 * @param nTweets
148                 * @return a new {@link TimeFrequency} instance
149                 */
150                public TimeFrequency combine(long l, long nTweets) {
151                        final TimeFrequency ntf = new TimeFrequency(l, nTweets);
152                        return combine(ntf);
153                }
154        }
155
156        /**
157         * default
158         */
159        public TimeFrequencyHolder() {
160        }
161
162        /**
163         * For every held {@link TimeFrequency} reset {@link TimeFrequency}
164         * cumulativeFrequency = {@link TimeFrequency} periodFrequency and then go
165         * through each in key-value order and use
166         * {@link TimeFrequency#combine(TimeFrequency)} to calculate a cumulative
167         * count
168         */
169        public void recalculateCumulativeFrequencies() {
170                final long[] sortedKeys = this.keys();
171                Arrays.sort(sortedKeys);
172                TimeFrequency current = null;
173                for (int i = 0; i < sortedKeys.length; i++) {
174                        final long k = sortedKeys[i];
175                        final TimeFrequency held = this.get(k);
176                        held.cumulativeFrequency = held.periodFrequency;
177                        if (current == null)
178                        {
179                                current = held;
180                        }
181                        else {
182                                current = current.combine(this.get(k));
183                        }
184                        this.put(k, current);
185                }
186        }
187
188        @Override
189        public String toString() {
190                final StringBuilder builder = new StringBuilder();
191                final long[] sortedKeys = this.keys();
192                Arrays.sort(sortedKeys);
193                for (int i = 0; i < sortedKeys.length; i++) {
194                        final long k = sortedKeys[i];
195                        builder.append(String.format("%d - %s\n", k, this.get(k)));
196                }
197                return builder.toString();
198        }
199
200        @Override
201        public void readBinary(DataInput in) throws IOException {
202                final long count = in.readLong();
203                for (int i = 0; i < count; i++) {
204                        final TimeFrequency tf = new TimeFrequency();
205                        tf.readBinary(in);
206                        this.put(tf.time, tf);
207                }
208        }
209
210        @Override
211        public byte[] binaryHeader() {
212                return "".getBytes();
213        }
214
215        @Override
216        public void writeBinary(final DataOutput out) throws IOException {
217                out.writeLong(this.size());
218                this.forEachEntry(new TLongObjectProcedure<TimeFrequency>() {
219
220                        @Override
221                        public boolean execute(long a, TimeFrequency b) {
222                                try {
223                                        b.writeBinary(out);
224                                } catch (final IOException e) {
225                                        return false;
226                                }
227                                return true;
228                        }
229                });
230        }
231}