001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.demos.sandbox.ml.regression; 031 032import java.io.File; 033import java.io.IOException; 034 035import javax.swing.JFrame; 036 037import org.apache.commons.lang.StringUtils; 038import org.jfree.chart.ChartFactory; 039import org.jfree.chart.ChartPanel; 040import org.jfree.chart.JFreeChart; 041import org.jfree.data.time.Day; 042import org.jfree.data.time.TimeSeries; 043import org.joda.time.DateTime; 044import org.joda.time.format.DateTimeFormat; 045import org.joda.time.format.DateTimeFormatter; 046import org.openimaj.hadoop.tools.twitter.utils.WordDFIDF; 047import org.openimaj.hadoop.tools.twitter.utils.WordDFIDFTimeSeries; 048import org.openimaj.hadoop.tools.twitter.utils.WordDFIDFTimeSeriesCollection; 049import org.openimaj.io.Cache; 050import org.openimaj.io.IOUtils; 051import org.openimaj.ml.timeseries.IncompatibleTimeSeriesException; 052import org.openimaj.ml.timeseries.collection.TimeSeriesCollection; 053import org.openimaj.ml.timeseries.converter.DoubleProviderTimeSeriesConverter; 054import org.openimaj.ml.timeseries.processor.GaussianTimeSeriesProcessor; 055import org.openimaj.ml.timeseries.processor.IntervalSummationProcessor; 056import org.openimaj.ml.timeseries.processor.MovingAverageProcessor; 057import org.openimaj.ml.timeseries.processor.interpolation.LinearInterpolationProcessor; 058import org.openimaj.ml.timeseries.processor.interpolation.util.TimeSpanUtils; 059import org.openimaj.ml.timeseries.series.DoubleTimeSeries; 060import org.openimaj.ml.timeseries.series.DoubleTimeSeriesCollection; 061import org.openimaj.ml.timeseries.series.DoubleTimeSeriesProvider; 062import org.openimaj.twitter.finance.YahooFinanceData; 063import org.openimaj.util.pair.IndependentPair; 064 065/** 066 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 067 * 068 */ 069public class WordDFIDFTSPlayground { 070 /** 071 * @param args 072 * @throws IOException 073 * @throws IncompatibleTimeSeriesException 074 */ 075 public static void main(String[] args) throws IOException, IncompatibleTimeSeriesException { 076 TSCollection coll = new TSCollection(); 077 final String input = "/Users/ss/Development/data/trendminer-data/datasets/sheffield/2010/AAPLwithhashtags.specific.fixed"; 078 WordDFIDFTimeSeriesCollection AAPLwords = IOUtils.read(new File(input), WordDFIDFTimeSeriesCollection.class); 079 AAPLwords = AAPLwords.collectionByNames("#apple"); 080 final DateTimeFormatter f = DateTimeFormat.forPattern("YYYY MM dd"); 081 final DateTime begin = f.parseDateTime("2010 01 01"); 082 final DateTime end = f.parseDateTime("2010 12 31"); 083 final long gap = 24 * 60 * 60 * 1000; 084 final long[] times = TimeSpanUtils.getTime(begin.getMillis(), end.getMillis(), gap); 085 AAPLwords.processInternalInplace(new IntervalSummationProcessor<WordDFIDF[], WordDFIDF, WordDFIDFTimeSeries>( 086 times)); 087 DoubleTimeSeriesCollection converted = AAPLwords.convertInternal( 088 new DoubleProviderTimeSeriesConverter<WordDFIDF[], WordDFIDF, WordDFIDFTimeSeries>(), 089 new MovingAverageProcessor(30 * 24 * 60 * 60 * 1000l), 090 new DoubleTimeSeriesCollection() 091 ); 092 timeSeriesToChart(AAPLwords, coll); 093 timeSeriesToChart(converted, coll, " - movingaverage"); 094 converted = AAPLwords.convertInternal( 095 new DoubleProviderTimeSeriesConverter<WordDFIDF[], WordDFIDF, WordDFIDFTimeSeries>(), 096 new GaussianTimeSeriesProcessor(3), 097 new DoubleTimeSeriesCollection() 098 ); 099 timeSeriesToChart(converted, coll, " - gaussian"); 100 displayTimeSeries(coll, "AAPL words DFIDF", "date", "dfidf sum"); 101 102 // Load the finance data 103 YahooFinanceData data = new YahooFinanceData("AAPL", begin, end); 104 data = Cache.load(data); 105 coll = new TSCollection(); 106 107 timeSeriesToChart("AAPL Moving Average", 108 data.seriesByName("High").process(new MovingAverageProcessor(30 * 24 * 60 * 60 * 1000l)), coll); 109 timeSeriesToChart( 110 "AAPL Interpolated", 111 data.seriesByName("High").process( 112 new LinearInterpolationProcessor(begin.getMillis(), end.getMillis(), gap)), coll); 113 timeSeriesToChart("AAPL", data.seriesByName("High"), coll); 114 displayTimeSeries(coll, "AAPL High", "date", "price"); 115 } 116 117 private static void displayTimeSeries(TSCollection dataset, String name, String xname, String yname) { 118 final JFreeChart chart = ChartFactory.createTimeSeriesChart(name, xname, yname, dataset, true, false, false); 119 final ChartPanel panel = new ChartPanel(chart); 120 panel.setFillZoomRectangle(true); 121 final JFrame j = new JFrame(); 122 j.setContentPane(panel); 123 j.pack(); 124 j.setVisible(true); 125 j.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); 126 } 127 128 private static void timeSeriesToChart(TimeSeriesCollection<?, ?, ?, ? extends DoubleTimeSeriesProvider> dstsc, 129 TSCollection coll, String... append) 130 { 131 for (final String seriesName : dstsc.getNames()) { 132 final DoubleTimeSeries series = dstsc.series(seriesName).doubleTimeSeries(); 133 final TimeSeries ret = new TimeSeries(seriesName + StringUtils.join(append, "-")); 134 for (final IndependentPair<Long, Double> pair : series) { 135 final DateTime dt = new DateTime(pair.firstObject()); 136 final Day d = new Day(dt.getDayOfMonth(), dt.getMonthOfYear(), dt.getYear()); 137 ret.add(d, pair.secondObject()); 138 } 139 coll.addSeries(ret); 140 } 141 } 142 143 private static void timeSeriesToChart(String name, DoubleTimeSeries highseries, TSCollection coll) { 144 final TimeSeries ret = new TimeSeries(name); 145 for (final IndependentPair<Long, Double> pair : highseries) { 146 final DateTime dt = new DateTime(pair.firstObject()); 147 final Day d = new Day(dt.getDayOfMonth(), dt.getMonthOfYear(), dt.getYear()); 148 ret.add(d, pair.secondObject()); 149 } 150 coll.addSeries(ret); 151 } 152}