001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.demos.sandbox.ml.linear.learner.stream.experiments; 031 032import java.io.IOException; 033import java.net.MalformedURLException; 034import java.util.List; 035import java.util.Map; 036 037import org.openimaj.demos.sandbox.ml.linear.learner.stream.IncrementalLearnerFunction; 038import org.openimaj.demos.sandbox.ml.linear.learner.stream.IncrementalLearnerWorldSelectingEvaluator; 039import org.openimaj.demos.sandbox.ml.linear.learner.stream.ModelStats; 040import org.openimaj.demos.sandbox.ml.linear.learner.stream.YahooFinanceStream; 041import org.openimaj.demos.sandbox.ml.linear.learner.stream.twitter.TwitterPredicateFunction; 042import org.openimaj.demos.sandbox.ml.linear.learner.stream.twitter.TwitterPreprocessingFunction; 043import org.openimaj.demos.sandbox.ml.linear.learner.stream.twitter.TwitterStatusAsUSMFStatus; 044import org.openimaj.demos.sandbox.ml.linear.learner.stream.twitter.USMFStatusBagOfWords; 045import org.openimaj.ml.linear.evaluation.SumLossEvaluator; 046import org.openimaj.ml.linear.learner.BilinearLearnerParameters; 047import org.openimaj.ml.linear.learner.init.HardCodedInitStrat; 048import org.openimaj.ml.linear.learner.init.SingleValueInitStrat; 049import org.openimaj.ml.linear.learner.init.SparseZerosInitStrategy; 050import org.openimaj.stream.provider.twitter.TwitterStreamDataset; 051import org.openimaj.tools.twitter.modes.filter.LanguageFilter; 052import org.openimaj.tools.twitter.modes.preprocessing.LanguageDetectionMode; 053import org.openimaj.tools.twitter.modes.preprocessing.StopwordMode; 054import org.openimaj.tools.twitter.modes.preprocessing.TokeniseMode; 055import org.openimaj.twitter.USMFStatus; 056import org.openimaj.util.api.auth.DefaultTokenFactory; 057import org.openimaj.util.api.auth.common.TwitterAPIToken; 058import org.openimaj.util.concurrent.ArrayBlockingDroppingQueue; 059import org.openimaj.util.data.Context; 060import org.openimaj.util.function.Operation; 061import org.openimaj.util.function.context.ContextFunctionAdaptor; 062import org.openimaj.util.function.context.ContextListFilter; 063import org.openimaj.util.function.context.ContextListFunction; 064import org.openimaj.util.pair.Pair; 065import org.openimaj.util.stream.Stream; 066import org.openimaj.util.stream.combine.ContextStreamCombiner; 067import org.openimaj.util.stream.window.ContextRealTimeWindowFunction; 068import org.openimaj.util.stream.window.WindowAverage; 069 070import twitter4j.Status; 071 072/** 073 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 074 * 075 */ 076public class FinancialStreamLearningExperiment { 077 /** 078 * @param args 079 * @throws MalformedURLException 080 * @throws IOException 081 */ 082 public static void main(String[] args) throws MalformedURLException, IOException { 083 084 // The financial stream 085 final ContextRealTimeWindowFunction<Map<String, Double>> yahooWindow = new ContextRealTimeWindowFunction<Map<String, Double>>( 086 5000); 087 final Stream<Context> yahooAveragePriceStream = new YahooFinanceStream("AAPL", "GOOG") 088 .transform(yahooWindow) 089 .map( 090 new ContextFunctionAdaptor<List<Map<String, Double>>, Map<String, Double>>( 091 new WindowAverage(), "item", 092 "averageticks" 093 ) 094 ); 095 096 // The Twitter Stream 097 final ArrayBlockingDroppingQueue<Status> buffer = new ArrayBlockingDroppingQueue<Status>(1); 098 final LanguageDetectionMode languageDetectionMode = new LanguageDetectionMode(); 099 final StopwordMode stopwordMode = new StopwordMode(); 100 final TokeniseMode tokeniseMode = new TokeniseMode(); 101 102 final Stream<Context> twitterUserWordCountStream = new TwitterStreamDataset( 103 DefaultTokenFactory.get(TwitterAPIToken.class), buffer 104 ) 105 .transform(new ContextRealTimeWindowFunction<Status>(5000)) 106 .map( 107 new ContextListFunction<Status, USMFStatus>(new TwitterStatusAsUSMFStatus(), "item", 108 "usmfstatuses" 109 ) 110 ) 111 .map( 112 new ContextListFunction<USMFStatus, USMFStatus>(new TwitterPreprocessingFunction(languageDetectionMode, tokeniseMode, 113 stopwordMode), 114 "usmfstatuses" 115 ) 116 ) 117 .map(new ContextListFilter<USMFStatus>(new TwitterPredicateFunction(new LanguageFilter("en")), 118 "usmfstatuses" 119 ) 120 ) 121 .map( 122 new ContextFunctionAdaptor<List<USMFStatus>, Map<String, Map<String, Double>>>(new USMFStatusBagOfWords(new StopwordMode()), 123 "usmfstatuses", 124 "bagofwords" 125 ) 126 ); 127 128 final BilinearLearnerParameters params = new BilinearLearnerParameters(); 129 params.put(BilinearLearnerParameters.ETA0_U, 0.02); 130 params.put(BilinearLearnerParameters.ETA0_W, 0.02); 131 params.put(BilinearLearnerParameters.LAMBDA, 0.001); 132 params.put(BilinearLearnerParameters.BICONVEX_TOL, 0.01); 133 params.put(BilinearLearnerParameters.BICONVEX_MAXITER, 10); 134 params.put(BilinearLearnerParameters.BIAS, true); 135 params.put(BilinearLearnerParameters.ETA0_BIAS, 0.5); 136 params.put(BilinearLearnerParameters.WINITSTRAT, new SingleValueInitStrat(0.1)); 137 params.put(BilinearLearnerParameters.UINITSTRAT, new SparseZerosInitStrategy()); 138 final HardCodedInitStrat biasInitStrat = new HardCodedInitStrat(); 139 params.put(BilinearLearnerParameters.BIASINITSTRAT, biasInitStrat); 140 // The combined stream 141 ContextStreamCombiner 142 .combine(twitterUserWordCountStream, yahooAveragePriceStream) 143 .map( 144 new IncrementalLearnerWorldSelectingEvaluator(new SumLossEvaluator(), 145 new IncrementalLearnerFunction(params))) 146 .forEach(new Operation<Context>() { 147 148 @Override 149 public void perform(Context c) { 150 final ModelStats object = c.getTyped("modelstats"); 151 System.out.println("Loss: " + object.score); 152 System.out.println("Important words: "); 153 for (final String task : object.importantWords.keySet()) { 154 final Pair<Double> minmax = object.taskWordMinMax.get(task); 155 System.out.printf("... %s (%1.4f->%1.4f) %s\n", 156 task, 157 minmax.firstObject(), 158 minmax.secondObject(), 159 object.importantWords.get(task) 160 ); 161 } 162 } 163 }); 164 165 } 166}