001package org.openimaj.picslurper; 002 003import java.io.File; 004import java.io.FileInputStream; 005import java.io.FileNotFoundException; 006import java.io.IOException; 007import java.io.InputStream; 008import java.util.ArrayList; 009import java.util.Iterator; 010import java.util.List; 011import java.util.Properties; 012 013import org.apache.log4j.Logger; 014import org.kohsuke.args4j.CmdLineException; 015import org.kohsuke.args4j.CmdLineParser; 016import org.kohsuke.args4j.Option; 017import org.kohsuke.args4j.ProxyOptionHandler; 018import org.openimaj.io.FileUtils; 019import org.openimaj.picslurper.output.OutputListener; 020import org.openimaj.picslurper.output.OutputListenerMode; 021import org.openimaj.text.nlp.TweetTokeniserException; 022import org.openimaj.tools.FileToolsUtil; 023import org.openimaj.tools.InOutToolOptions; 024 025import twitter4j.Status; 026 027/** 028 * A tool for slurping images off twitter 029 * 030 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 031 * 032 */ 033public class PicSlurper extends InOutToolOptions implements Iterable<InputStream>, Iterator<InputStream> { 034 035 private static Logger logger = Logger.getLogger(PicSlurper.class); 036 037 String[] args; 038 boolean stdin; 039 List<File> inputFiles; 040 boolean stdout; 041 File outputLocation; 042 File globalStatus; 043 Iterator<File> fileIterator; 044 File inputFile; 045 private static final String STATUS_FILE_NAME = "status.txt"; 046 /** 047 * System property name for whether twitter console login should be allowed 048 */ 049 public static final String ALLOW_CONSOLE_LOGIN = "twitter.console_login"; 050 051 @Option( 052 name = "--encoding", 053 aliases = "-e", 054 required = false, 055 usage = "The outputstreamwriter's text encoding", 056 metaVar = "STRING") 057 String encoding = "UTF-8"; 058 059 @Option( 060 name = "--no-stats", 061 aliases = "-ns", 062 required = false, 063 usage = "Don't try to keep stats of the tweets seen", 064 metaVar = "STRING") 065 boolean stats = true; 066 067 @Option( 068 name = "--no-threads", 069 aliases = "-j", 070 required = false, 071 usage = "Threads used to download images, defaults to n CPUs", 072 metaVar = "STRING") 073 int nThreads = Runtime.getRuntime().availableProcessors(); 074 075 @Option( 076 name = "--use-oauth-stream", 077 aliases = "-oauth", 078 required = false, 079 usage = "Force the useage of twitter oauth to access the stream using the twitter4j api") 080 boolean forceTwitter4J = false; 081 082 @Option( 083 name = "--database", 084 aliases = "-d", 085 required = false, 086 usage = "Force the use of a database", 087 metaVar = "URL" 088 ) 089 String database = null; 090 091 @Option( 092 name = "--databaseTable", 093 aliases = "-dt", 094 required = false, 095 usage = "When using a database, specify the database table", 096 metaVar = "STRING" ) 097 String databaseTable = null; 098 099 @Option( 100 name = "--databaseUser", 101 aliases = "-du", 102 required = false, 103 usage = "When using a database, specify the database username", 104 metaVar = "STRING" ) 105 String databaseUser = null; 106 107 @Option( 108 name = "--databasePassword", 109 aliases = "-dp", 110 required = false, 111 usage = "When using a database, specify the database password", 112 metaVar = "STRING" ) 113 String databasePW = null; 114 115 @Option( 116 name = "--output-listener", 117 aliases = "-ol", 118 required = false, 119 usage = "Add an output listener which gets told about each image downloaded, its location, tweet and url", 120 handler = ProxyOptionHandler.class, 121 multiValued = true) 122 List<OutputListenerMode> outputListenerMode = new ArrayList<OutputListenerMode>(); 123 List<OutputListener> outputListenerModeOp = new ArrayList<OutputListener>(); 124 125 private StatusFeeder statusFeeder; 126 127 /** 128 * @param args 129 * tool arguments 130 */ 131 public PicSlurper(final String[] args) { 132 this.args = args; 133 } 134 135 /** 136 * no args 137 */ 138 public PicSlurper() { 139 this.args = new String[] {}; 140 } 141 142 /** 143 * prepare the tool for running 144 */ 145 public void prepare() { 146 final CmdLineParser parser = new CmdLineParser(this); 147 try { 148 parser.parseArgument(this.args); 149 this.validate(); 150 } catch (final CmdLineException e) { 151 e.printStackTrace(); 152 System.err.println(e.getMessage()); 153 System.err.println("Usage: java -jar PicSlurper.jar [options...] "); 154 parser.printUsage(System.err); 155 System.err.println(this.getExtractUsageInfo()); 156 System.exit(1); 157 } 158 } 159 160 String getExtractUsageInfo() { 161 return "Grab some images and some stats"; 162 } 163 164 void validate() throws CmdLineException { 165 try { 166 if( this.database != null ) 167 { 168 this.statusFeeder = new DatabaseStatusFeeder( this.database, 169 this.databaseTable, this.databaseUser, this.databasePW ); 170 } 171 else 172 if (this.forceTwitter4J) { 173 this.statusFeeder = new Twitter4JStreamFeeder(); 174 } else { 175 this.statusFeeder = new InputStreamFeeder(this); 176 } 177 if (FileToolsUtil.isStdout(this)) { 178 this.stdout = true; 179 } else { 180 this.outputLocation = PicSlurper.validateLocalOutput(this.getOutput(), this.isForce(), !this.isContinue()); 181 this.outputLocation.mkdirs(); 182 this.globalStatus = new File(this.outputLocation, PicSlurper.STATUS_FILE_NAME); 183 // init the output file 184 PicSlurperUtils.updateStats(this.globalStatus, new StatusConsumption()); 185 } 186 187 for (final OutputListener listener : this.outputListenerModeOp) { 188 listener.prepare(); 189 } 190 } catch (final Exception e) { 191 e.printStackTrace(); 192 throw new CmdLineException(null, e.getMessage()); 193 } 194 } 195 196 /** 197 * Validate the (local) ouput from an String and return the corresponding 198 * file. 199 * 200 * @param out 201 * where the file will go 202 * @param overwrite 203 * whether to overwrite existing files 204 * @param contin 205 * whether an existing output should be continued (i.e. ignored 206 * if it exists) 207 * @return the output file location, deleted if it is allowed to be deleted 208 * @throws IOException 209 * if the file exists, but can't be deleted 210 */ 211 public static File validateLocalOutput(final String out, final boolean overwrite, final boolean contin) throws IOException { 212 if (out == null) { 213 throw new IOException("No output specified"); 214 } 215 final File output = new File(out); 216 if (output.exists()) { 217 if (overwrite) { 218 if (!FileUtils.deleteRecursive(output)) 219 throw new IOException("Couldn't delete existing output"); 220 } else if (!contin) { 221 throw new IOException("Output already exists, didn't remove"); 222 } 223 } 224 return output; 225 } 226 227 @Override 228 public boolean hasNext() { 229 if (!this.stdin) { 230 if (this.fileIterator == null) 231 return false; 232 return this.fileIterator.hasNext(); 233 } 234 return true; 235 } 236 237 @Override 238 public InputStream next() { 239 if (this.stdin) { 240 this.stdin = false; 241 return System.in; 242 } 243 if (this.fileIterator == null) 244 return null; 245 if (this.fileIterator.hasNext()) { 246 this.inputFile = this.fileIterator.next(); 247 try { 248 return new FileInputStream(this.inputFile); 249 } catch (final FileNotFoundException e) { 250 } 251 } else 252 this.inputFile = null; 253 return null; 254 } 255 256 @Override 257 public void remove() { 258 throw new UnsupportedOperationException(); 259 } 260 261 /** 262 * @param status 263 * handle this status 264 */ 265 public void handleStatus(final Status status) { 266 StatusConsumer consumer; 267 try { 268 consumer = new StatusConsumer(this.stats, this.globalStatus, this.outputLocation, this.outputListenerModeOp); 269 consumer.consume(status); 270 271 } catch (final Exception e) { 272 PicSlurper.logger.error("Some error with the statusconsumer: " + e.getMessage()); 273 } 274 } 275 276 @Override 277 public Iterator<InputStream> iterator() { 278 return this; 279 } 280 281 /** 282 * @param args 283 * @throws IOException 284 * @throws TweetTokeniserException 285 * @throws InterruptedException 286 */ 287 public static void main(final String[] args) throws IOException, TweetTokeniserException, InterruptedException { 288 // Load the config 289 PicSlurper.loadConfig(); 290 final PicSlurper slurper = new PicSlurper(args); 291 slurper.prepare(); 292 slurper.start(); 293 } 294 295 private void start() throws IOException { 296 this.statusFeeder.feedStatus(this); 297 298 } 299 300 /** 301 * Load the configuration file which looks for twitter usernames and 302 * passwords. If this can't be found or the values can't be found then 303 * System.in is used to get the username and password 304 * 305 * @throws FileNotFoundException 306 * @throws IOException 307 */ 308 public static void loadConfig() throws FileNotFoundException, IOException { 309 final File configFile = new File("config.properties"); 310 PicSlurper.logger.debug("Looking for config file: " + configFile.getAbsolutePath()); 311 if (configFile.exists()) { 312 final Properties prop = System.getProperties(); 313 prop.load(new FileInputStream(configFile)); 314 System.setProperties(prop); 315 } else { 316 // File not found, try looking for the resource! 317 final Properties prop = System.getProperties(); 318 final InputStream propStream = PicSlurper.class.getResourceAsStream("/config.properties"); 319 if (propStream != null) { 320 prop.load(propStream); 321 } 322 System.setProperties(prop); 323 } 324 325 // System.setProperty("org.apache.commons.logging.Log", 326 // "org.apache.commons.logging.impl.SimpleLog"); 327 // System.setProperty("org.apache.commons.logging.simplelog.showdatetime", 328 // "true"); 329 // System.setProperty("org.apache.commons.logging.simplelog.log.httpclient.wire.header", 330 // "debug"); 331 // System.setProperty("org.apache.commons.logging.simplelog.log.org.apache.commons.httpclient", 332 // "debug"); 333 // checkTwitterCredentials(); 334 } 335 // 336 // private static void checkTwitterCredentials() throws IOException { 337 // final String user = System.getProperty("twitter.user"); 338 // final String password = System.getProperty("twitter.password"); 339 // final String consoleLogin = 340 // System.getProperty(PicSlurper.ALLOW_CONSOLE_LOGIN); 341 // if (user != null && password != null || (consoleLogin != null && 342 // !Boolean.parseBoolean(consoleLogin))) 343 // return; 344 // final Console console = System.console(); 345 // final String credentialsMessage = 346 // "Could not find twitter credentials. Taking from input. You can add these to a config.properties file to save time.\n"; 347 // final String usernameMessage = "Twitter username: "; 348 // final String passwordMessage = "Twitter password: "; 349 // if (console != null) { 350 // console.printf(credentialsMessage); 351 // console.printf(usernameMessage); 352 // System.setProperty("twitter.user", console.readLine()); 353 // console.printf(passwordMessage); 354 // System.setProperty("twitter.password", 355 // String.copyValueOf(console.readPassword())); 356 // } else { 357 // logger.debug(credentialsMessage); 358 // logger.debug(usernameMessage); 359 // final BufferedReader reader = new BufferedReader(new 360 // InputStreamReader(System.in)); 361 // System.setProperty("twitter.user", reader.readLine()); 362 // logger.debug(passwordMessage); 363 // System.setProperty("twitter.password", reader.readLine()); 364 // } 365 // 366 // } 367 368}