001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.demos.sandbox.ml.linear.learner.stream.twitter; 031 032import java.net.UnknownHostException; 033import java.util.ArrayList; 034import java.util.List; 035 036import org.apache.log4j.ConsoleAppender; 037import org.apache.log4j.Level; 038import org.apache.log4j.Logger; 039import org.apache.log4j.PatternLayout; 040import org.openimaj.demos.sandbox.ml.linear.learner.stream.MongoDBQueryStream; 041import org.openimaj.tools.twitter.modes.preprocessing.CountryCodeMode; 042import org.openimaj.tools.twitter.modes.preprocessing.TwitterPreprocessingMode; 043import org.openimaj.twitter.USMFStatus; 044import org.openimaj.util.data.Context; 045import org.openimaj.util.function.Operation; 046import org.openimaj.util.function.context.ContextListFunction; 047 048import twitter4j.Status; 049import twitter4j.TwitterException; 050import twitter4j.json.DataObjectFactory; 051 052import com.mongodb.DBObject; 053import com.mongodb.ServerAddress; 054import com.mongodb.util.JSON; 055 056public class CountryCodeUSMFName { 057 static { 058 if(System.getProperty("os.name").toLowerCase().contains("mac")){ 059 060 ConsoleAppender console = new ConsoleAppender(); // create appender 061 // configure the appender 062 String PATTERN = "%d [%p|%c|%C{1}] %m%n"; 063 console.setLayout(new PatternLayout(PATTERN)); 064 console.setThreshold(Level.DEBUG); 065 console.activateOptions(); 066 // add appender to any Logger (here is root) 067 Logger.getRootLogger().addAppender(console); 068 } 069 } 070 public static void main(String[] args) throws UnknownHostException { 071 List<ServerAddress> servers = new ArrayList<ServerAddress>(); 072 servers.add(new ServerAddress("rumi")); 073 servers.add(new ServerAddress("hafez")); 074 final CountryCodeMode countryCodeMode = new CountryCodeMode(); 075 076 new MongoDBQueryStream<Context>(servers ) { 077 078 @Override 079 public String getCollectionName() { 080 return "searchapi_yahoo_billgeo"; 081 } 082 083 @Override 084 public String getDBName() { 085 return "twitterticker"; 086 } 087 088 @Override 089 public Context constructObjects(DBObject next) { 090 Context ret = new Context(); 091 List<USMFStatus> tweets = new ArrayList<USMFStatus>(); 092 List<Status> raw = new ArrayList<Status>(); 093 @SuppressWarnings("unchecked") 094 List<Object> objt = (List<Object>) next.get("tweets_raw"); 095 for (Object object : objt) { 096 try { 097 raw.add(DataObjectFactory.createStatus(JSON.serialize(object))); 098 } catch (TwitterException e) { 099 // TODO Auto-generated catch block 100 e.printStackTrace(); 101 } 102 } 103 List<Object> objl = (List<Object>) next.get("tweets"); 104 for (Object object : objl) { 105 USMFStatus status = new USMFStatus(); 106 status.fillFromString(JSON.serialize(object)); 107 tweets.add(status); 108 } 109 ret.put("usmfstatuses", tweets); 110 ret.put("tweets", raw); 111 return ret ; 112 } 113 } 114// .forEach(new Operation<Context>() { 115// 116// @Override 117// public void perform(Context object) { 118// List<Status> tweets = object.getTyped("tweets"); 119// for (Status status : tweets) { 120// System.out.println(status); 121// } 122// } 123// }); 124 .map(new ContextListFunction<USMFStatus,USMFStatus>(new TwitterPreprocessingFunction(countryCodeMode), "usmfstatuses")) 125 .forEach(new Operation<Context>() { 126 127 @Override 128 public void perform(Context object) { 129 List<USMFStatus> statuses = object.getTyped("usmfstatuses"); 130 List<String> places = new ArrayList<String>(); 131 for (USMFStatus usmfStatus : statuses) { 132 try { 133 places.add(TwitterPreprocessingMode.results(usmfStatus, countryCodeMode)); 134 } catch (Exception e) { 135 } 136 } 137 System.out.println(places); 138 } 139 }); 140 } 141}