001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.demos.sandbox.ml.regression;
031
032import java.io.File;
033import java.io.IOException;
034
035import javax.swing.JFrame;
036
037import org.apache.commons.lang.StringUtils;
038import org.jfree.chart.ChartFactory;
039import org.jfree.chart.ChartPanel;
040import org.jfree.chart.JFreeChart;
041import org.jfree.data.time.Day;
042import org.jfree.data.time.TimeSeries;
043import org.joda.time.DateTime;
044import org.joda.time.format.DateTimeFormat;
045import org.joda.time.format.DateTimeFormatter;
046import org.openimaj.hadoop.tools.twitter.utils.WordDFIDF;
047import org.openimaj.hadoop.tools.twitter.utils.WordDFIDFTimeSeries;
048import org.openimaj.hadoop.tools.twitter.utils.WordDFIDFTimeSeriesCollection;
049import org.openimaj.io.Cache;
050import org.openimaj.io.IOUtils;
051import org.openimaj.ml.timeseries.IncompatibleTimeSeriesException;
052import org.openimaj.ml.timeseries.collection.TimeSeriesCollection;
053import org.openimaj.ml.timeseries.converter.DoubleProviderTimeSeriesConverter;
054import org.openimaj.ml.timeseries.processor.GaussianTimeSeriesProcessor;
055import org.openimaj.ml.timeseries.processor.IntervalSummationProcessor;
056import org.openimaj.ml.timeseries.processor.MovingAverageProcessor;
057import org.openimaj.ml.timeseries.processor.interpolation.LinearInterpolationProcessor;
058import org.openimaj.ml.timeseries.processor.interpolation.util.TimeSpanUtils;
059import org.openimaj.ml.timeseries.series.DoubleTimeSeries;
060import org.openimaj.ml.timeseries.series.DoubleTimeSeriesCollection;
061import org.openimaj.ml.timeseries.series.DoubleTimeSeriesProvider;
062import org.openimaj.twitter.finance.YahooFinanceData;
063import org.openimaj.util.pair.IndependentPair;
064
065/**
066 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
067 * 
068 */
069public class WordDFIDFTSPlayground {
070        /**
071         * @param args
072         * @throws IOException
073         * @throws IncompatibleTimeSeriesException
074         */
075        public static void main(String[] args) throws IOException, IncompatibleTimeSeriesException {
076                TSCollection coll = new TSCollection();
077                final String input = "/Users/ss/Development/data/trendminer-data/datasets/sheffield/2010/AAPLwithhashtags.specific.fixed";
078                WordDFIDFTimeSeriesCollection AAPLwords = IOUtils.read(new File(input), WordDFIDFTimeSeriesCollection.class);
079                AAPLwords = AAPLwords.collectionByNames("#apple");
080                final DateTimeFormatter f = DateTimeFormat.forPattern("YYYY MM dd");
081                final DateTime begin = f.parseDateTime("2010 01 01");
082                final DateTime end = f.parseDateTime("2010 12 31");
083                final long gap = 24 * 60 * 60 * 1000;
084                final long[] times = TimeSpanUtils.getTime(begin.getMillis(), end.getMillis(), gap);
085                AAPLwords.processInternalInplace(new IntervalSummationProcessor<WordDFIDF[], WordDFIDF, WordDFIDFTimeSeries>(
086                                times));
087                DoubleTimeSeriesCollection converted = AAPLwords.convertInternal(
088                                new DoubleProviderTimeSeriesConverter<WordDFIDF[], WordDFIDF, WordDFIDFTimeSeries>(),
089                                new MovingAverageProcessor(30 * 24 * 60 * 60 * 1000l),
090                                new DoubleTimeSeriesCollection()
091                                );
092                timeSeriesToChart(AAPLwords, coll);
093                timeSeriesToChart(converted, coll, " - movingaverage");
094                converted = AAPLwords.convertInternal(
095                                new DoubleProviderTimeSeriesConverter<WordDFIDF[], WordDFIDF, WordDFIDFTimeSeries>(),
096                                new GaussianTimeSeriesProcessor(3),
097                                new DoubleTimeSeriesCollection()
098                                );
099                timeSeriesToChart(converted, coll, " - gaussian");
100                displayTimeSeries(coll, "AAPL words DFIDF", "date", "dfidf sum");
101
102                // Load the finance data
103                YahooFinanceData data = new YahooFinanceData("AAPL", begin, end);
104                data = Cache.load(data);
105                coll = new TSCollection();
106
107                timeSeriesToChart("AAPL Moving Average",
108                                data.seriesByName("High").process(new MovingAverageProcessor(30 * 24 * 60 * 60 * 1000l)), coll);
109                timeSeriesToChart(
110                                "AAPL Interpolated",
111                                data.seriesByName("High").process(
112                                                new LinearInterpolationProcessor(begin.getMillis(), end.getMillis(), gap)), coll);
113                timeSeriesToChart("AAPL", data.seriesByName("High"), coll);
114                displayTimeSeries(coll, "AAPL High", "date", "price");
115        }
116
117        private static void displayTimeSeries(TSCollection dataset, String name, String xname, String yname) {
118                final JFreeChart chart = ChartFactory.createTimeSeriesChart(name, xname, yname, dataset, true, false, false);
119                final ChartPanel panel = new ChartPanel(chart);
120                panel.setFillZoomRectangle(true);
121                final JFrame j = new JFrame();
122                j.setContentPane(panel);
123                j.pack();
124                j.setVisible(true);
125                j.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
126        }
127
128        private static void timeSeriesToChart(TimeSeriesCollection<?, ?, ?, ? extends DoubleTimeSeriesProvider> dstsc,
129                        TSCollection coll, String... append)
130        {
131                for (final String seriesName : dstsc.getNames()) {
132                        final DoubleTimeSeries series = dstsc.series(seriesName).doubleTimeSeries();
133                        final TimeSeries ret = new TimeSeries(seriesName + StringUtils.join(append, "-"));
134                        for (final IndependentPair<Long, Double> pair : series) {
135                                final DateTime dt = new DateTime(pair.firstObject());
136                                final Day d = new Day(dt.getDayOfMonth(), dt.getMonthOfYear(), dt.getYear());
137                                ret.add(d, pair.secondObject());
138                        }
139                        coll.addSeries(ret);
140                }
141        }
142
143        private static void timeSeriesToChart(String name, DoubleTimeSeries highseries, TSCollection coll) {
144                final TimeSeries ret = new TimeSeries(name);
145                for (final IndependentPair<Long, Double> pair : highseries) {
146                        final DateTime dt = new DateTime(pair.firstObject());
147                        final Day d = new Day(dt.getDayOfMonth(), dt.getMonthOfYear(), dt.getYear());
148                        ret.add(d, pair.secondObject());
149                }
150                coll.addSeries(ret);
151        }
152}