001/** 002 * Copyright (c) 2012, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.tools.twitter.modes.preprocessing; 031 032import java.io.IOException; 033 034import org.apache.log4j.Logger; 035import org.geonames.ToponymSearchCriteria; 036import org.geonames.ToponymSearchResult; 037import org.geonames.WebService; 038import org.openimaj.twitter.USMFStatus; 039import org.openimaj.twitter.utils.Twitter4jUtil; 040 041import twitter4j.GeoQuery; 042import twitter4j.Place; 043import twitter4j.ResponseList; 044import twitter4j.Twitter; 045import twitter4j.TwitterException; 046 047/** 048 * Use the twokeniser to tokenise tweets 049 * 050 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 051 * 052 */ 053public class CountryCodeMode extends TwitterPreprocessingMode<String> { 054 055 Logger logger = Logger.getLogger(CountryCodeMode.class); 056 final static String COUNTRY_CODE = "country_code"; 057 private static final long GEONAMES_ENFORCED_WAIT_TIME = (60 * 60 * 1000) / 2000; 058 private static final long TWITTER_ENFORCED_WAIT_TIME = (60 * 60 * 1000) / 100; 059 private static final long TWITTER_DEFAULT_ERROR_BUT_NO_WAIT_TIME = 5000; 060 061 private Twitter twitter; 062 private EnforcedWait geonamesLastCall = new EnforcedWait(GEONAMES_ENFORCED_WAIT_TIME); 063 private EnforcedWait twitterLastCall = new EnforcedWait(TWITTER_ENFORCED_WAIT_TIME); 064 065 /** 066 * 067 */ 068 public CountryCodeMode() { 069 this.twitter = Twitter4jUtil.create(); 070 } 071 class EnforcedWait{ 072 long currentWait; 073 private long minimumWait; 074 private long lastCall; 075 076 public EnforcedWait(long minWait) { 077 this.minimumWait = minWait; 078 } 079 080 public void enforce() throws EnforcedWaitException{ 081 long timeSinceLastCall = System.currentTimeMillis() - lastCall; 082 if(timeSinceLastCall < Math.max(currentWait, minimumWait)) 083 { 084 currentWait = Math.max(currentWait, minimumWait); 085 throw new EnforcedWaitException(this); 086 } 087 else{ 088 this.lastCall = System.currentTimeMillis(); 089 } 090 } 091 } 092 class EnforcedWaitException extends Exception{ 093 private EnforcedWait wait; 094 095 public EnforcedWaitException(EnforcedWait enforcedWait) { 096 this.wait = enforcedWait; 097 } 098 } 099 @Override 100 public String process(USMFStatus stat) { 101 while(true){ 102 long waitTime = Long.MAX_VALUE; 103 // Try using the twitter API first! 104 if(stat.country_code!= null ) { 105 logger .debug("Country code from status!"); 106 return stat.country_code; 107 } 108 try{ 109 if(stat.location!=null){ 110 String searchWithTwitter = searchWithTwitter(stat.location); 111 logger.debug("country code from status location twitter places"); 112 return searchWithTwitter; 113 } 114 else if(stat.user.location!=null){ 115 String searchWithTwitter = searchWithTwitter(stat.user.location); 116 logger.debug("country code from user location twitter places"); 117 return searchWithTwitter; 118 } 119 } 120 catch(EnforcedWaitException e){ 121 waitTime = e.wait.currentWait; 122 } 123 // now try geonames (which we have to wait for) 124 try{ 125 if(stat.geo == null){ 126 if(stat.location != null){ 127 String searchByString = searchByString(stat.location); 128 logger.debug("country code from geonames search"); 129 return searchByString; 130 }else{ 131 if(stat.user.geo!=null){ 132 String countryCodeByGeo = countryCodeByGeo(stat.user.geo); 133 logger.debug("country code from geonames user geo"); 134 return countryCodeByGeo; 135 } 136 else if(stat.user.location!=null){ 137 String searchByString = searchByString(stat.user.location); 138 logger.debug("country code from geonames user location"); 139 return searchByString; 140 } 141 } 142 } 143 else{ 144 String countryCodeByGeo = countryCodeByGeo(stat.geo); 145 logger.debug("country code from geonames status geo"); 146 return countryCodeByGeo; 147 } 148 }catch(EnforcedWaitException e){ 149 waitTime = Math.min(waitTime, e.wait.currentWait); 150 } 151 if(waitTime == Long.MAX_VALUE){ 152 // ONLY IN THIS SITUATION RETURN, it means both APIs were called but none returned 153 logger.debug("API called, no response!"); 154 return ""; 155 }else{ 156 try { 157 logger.debug("APIs busy, waiting: " + waitTime); 158 Thread.sleep(waitTime); 159 } catch (InterruptedException e) { 160 } 161 } 162 163 } 164 } 165 166 private String searchWithTwitter(String location) throws EnforcedWaitException { 167 while(true){ 168 try { 169 twitterLastCall.enforce(); 170 ResponseList<Place> res = this.twitter.searchPlaces(new GeoQuery(location)); 171 if(res.size() > 0) 172 return res.get(0).getCountryCode(); 173 else 174 return null; 175 } catch (TwitterException e) { 176 this.twitterLastCall.currentWait = Twitter4jUtil.handleTwitterException(e, TWITTER_DEFAULT_ERROR_BUT_NO_WAIT_TIME); 177 throw new EnforcedWaitException(this.twitterLastCall); 178 } 179 } 180 } 181 182 @Override 183 public String getAnalysisKey(){ 184 return CountryCodeMode.COUNTRY_CODE; 185 } 186 187 private String searchByString(String location) throws EnforcedWaitException { 188 geonamesLastCall.enforce(); 189 ToponymSearchResult x; 190 try { 191 ToponymSearchCriteria search = new ToponymSearchCriteria(); 192 search.setQ(location); 193 x = WebService.search(search); 194 if(x.getTotalResultsCount() == 0) 195 { 196 return ""; 197 } 198 return x.getToponyms() 199 .get(0) 200 .getCountryCode(); 201 } catch (Exception e) { 202 return ""; 203 } 204 } 205 206 207 208 private String countryCodeByGeo(double[] geo) throws EnforcedWaitException { 209 try { 210 geonamesLastCall.enforce(); 211 return WebService.countryCode(geo[0], geo[1]); 212 } catch (IOException e) { 213 return null; 214 } 215 } 216}