001/**
002 * Copyright (c) 2012, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.tools.twitter.modes.preprocessing;
031
032import java.io.IOException;
033
034import org.apache.log4j.Logger;
035import org.geonames.ToponymSearchCriteria;
036import org.geonames.ToponymSearchResult;
037import org.geonames.WebService;
038import org.openimaj.twitter.USMFStatus;
039import org.openimaj.twitter.utils.Twitter4jUtil;
040
041import twitter4j.GeoQuery;
042import twitter4j.Place;
043import twitter4j.ResponseList;
044import twitter4j.Twitter;
045import twitter4j.TwitterException;
046
047/**
048 * Use the twokeniser to tokenise tweets
049 *
050 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
051 *
052 */
053public class CountryCodeMode extends TwitterPreprocessingMode<String> {
054
055        Logger logger = Logger.getLogger(CountryCodeMode.class);
056        final static String COUNTRY_CODE = "country_code";
057        private static final long GEONAMES_ENFORCED_WAIT_TIME = (60 * 60 * 1000) / 2000;
058        private static final long TWITTER_ENFORCED_WAIT_TIME = (60 * 60 * 1000) / 100;
059        private static final long TWITTER_DEFAULT_ERROR_BUT_NO_WAIT_TIME = 5000;
060
061        private Twitter twitter;
062        private EnforcedWait geonamesLastCall = new EnforcedWait(GEONAMES_ENFORCED_WAIT_TIME);
063        private EnforcedWait twitterLastCall = new EnforcedWait(TWITTER_ENFORCED_WAIT_TIME);
064
065        /**
066         *
067         */
068        public CountryCodeMode()  {
069                this.twitter = Twitter4jUtil.create();
070        }
071        class EnforcedWait{
072                long currentWait;
073                private long minimumWait;
074                private long lastCall;
075
076                public EnforcedWait(long minWait) {
077                        this.minimumWait = minWait;
078                }
079
080                public void enforce() throws EnforcedWaitException{
081                        long timeSinceLastCall = System.currentTimeMillis() - lastCall;
082                        if(timeSinceLastCall < Math.max(currentWait, minimumWait))
083                        {
084                                currentWait = Math.max(currentWait, minimumWait);
085                                throw new EnforcedWaitException(this);
086                        }
087                        else{
088                                this.lastCall = System.currentTimeMillis();
089                        }
090                }
091        }
092        class EnforcedWaitException extends Exception{
093                private EnforcedWait wait;
094
095                public EnforcedWaitException(EnforcedWait enforcedWait) {
096                        this.wait = enforcedWait;
097                }
098        }
099        @Override
100        public String process(USMFStatus stat)  {
101                while(true){
102                        long waitTime = Long.MAX_VALUE;
103                        // Try using the twitter API first!
104                        if(stat.country_code!= null ) {
105                                logger .debug("Country code from status!");
106                                return stat.country_code;
107                        }
108                        try{
109                                if(stat.location!=null){
110                                        String searchWithTwitter = searchWithTwitter(stat.location);
111                                        logger.debug("country code from status location twitter places");
112                                        return searchWithTwitter;
113                                }
114                                else if(stat.user.location!=null){
115                                        String searchWithTwitter = searchWithTwitter(stat.user.location);
116                                        logger.debug("country code from user location twitter places");
117                                        return searchWithTwitter;
118                                }
119                        }
120                        catch(EnforcedWaitException e){
121                                waitTime = e.wait.currentWait;
122                        }
123                        // now try geonames (which we have to wait for)
124                        try{
125                                if(stat.geo == null){
126                                        if(stat.location != null){
127                                                String searchByString = searchByString(stat.location);
128                                                logger.debug("country code from geonames search");
129                                                return searchByString;
130                                        }else{
131                                                if(stat.user.geo!=null){
132                                                        String countryCodeByGeo = countryCodeByGeo(stat.user.geo);
133                                                        logger.debug("country code from geonames user geo");
134                                                        return countryCodeByGeo;
135                                                }
136                                                else if(stat.user.location!=null){
137                                                        String searchByString = searchByString(stat.user.location);
138                                                        logger.debug("country code from geonames user location");
139                                                        return searchByString;
140                                                }
141                                        }
142                                }
143                                else{
144                                        String countryCodeByGeo = countryCodeByGeo(stat.geo);
145                                        logger.debug("country code from geonames status geo");
146                                        return countryCodeByGeo;
147                                }
148                        }catch(EnforcedWaitException e){
149                                waitTime = Math.min(waitTime, e.wait.currentWait);
150                        }
151                        if(waitTime == Long.MAX_VALUE){
152                                // ONLY IN THIS SITUATION RETURN, it means both APIs were called but none returned
153                                logger.debug("API called, no response!");
154                                return "";
155                        }else{
156                                try {
157                                        logger.debug("APIs busy, waiting: " + waitTime);
158                                        Thread.sleep(waitTime);
159                                } catch (InterruptedException e) {
160                                }
161                        }
162
163                }
164        }
165
166        private String searchWithTwitter(String location) throws EnforcedWaitException {
167                while(true){
168                        try {
169                                twitterLastCall.enforce();
170                                ResponseList<Place> res = this.twitter.searchPlaces(new GeoQuery(location));
171                                if(res.size() > 0)
172                                        return res.get(0).getCountryCode();
173                                else
174                                        return null;
175                        } catch (TwitterException e) {
176                                this.twitterLastCall.currentWait = Twitter4jUtil.handleTwitterException(e, TWITTER_DEFAULT_ERROR_BUT_NO_WAIT_TIME);
177                                throw new EnforcedWaitException(this.twitterLastCall);
178                        }
179                }
180        }
181
182        @Override
183        public String getAnalysisKey(){
184                return CountryCodeMode.COUNTRY_CODE;
185        }
186
187        private String searchByString(String location) throws EnforcedWaitException {
188                geonamesLastCall.enforce();
189                ToponymSearchResult x;
190                try {
191                        ToponymSearchCriteria search = new ToponymSearchCriteria();
192                        search.setQ(location);
193                        x = WebService.search(search);
194                        if(x.getTotalResultsCount() == 0)
195                        {
196                                return "";
197                        }
198                        return x.getToponyms()
199                                        .get(0)
200                                        .getCountryCode();
201                } catch (Exception e) {
202                        return "";
203                }
204        }
205
206
207
208        private String countryCodeByGeo(double[] geo) throws EnforcedWaitException {
209                try {
210                        geonamesLastCall.enforce();
211                        return WebService.countryCode(geo[0], geo[1]);
212                } catch (IOException e) {
213                        return null;
214                }
215        }
216}