001package org.openimaj.picslurper;
003import java.io.File;
004import java.io.FileInputStream;
005import java.io.FileNotFoundException;
006import java.io.IOException;
007import java.io.InputStream;
008import java.util.ArrayList;
009import java.util.Iterator;
010import java.util.List;
011import java.util.Properties;
013import org.apache.log4j.Logger;
014import org.kohsuke.args4j.CmdLineException;
015import org.kohsuke.args4j.CmdLineParser;
016import org.kohsuke.args4j.Option;
017import org.kohsuke.args4j.ProxyOptionHandler;
018import org.openimaj.io.FileUtils;
019import org.openimaj.picslurper.output.OutputListener;
020import org.openimaj.picslurper.output.OutputListenerMode;
021import org.openimaj.text.nlp.TweetTokeniserException;
022import org.openimaj.tools.FileToolsUtil;
023import org.openimaj.tools.InOutToolOptions;
025import twitter4j.Status;
028 * A tool for slurping images off twitter
029 *
030 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
031 *
032 */
033public class PicSlurper extends InOutToolOptions implements Iterable<InputStream>, Iterator<InputStream> {
035        private static Logger logger = Logger.getLogger(PicSlurper.class);
037        String[] args;
038        boolean stdin;
039        List<File> inputFiles;
040        boolean stdout;
041        File outputLocation;
042        File globalStatus;
043        Iterator<File> fileIterator;
044        File inputFile;
045        private static final String STATUS_FILE_NAME = "status.txt";
046        /**
047         * System property name for whether twitter console login should be allowed
048         */
049        public static final String ALLOW_CONSOLE_LOGIN = "twitter.console_login";
051        @Option(
052                        name = "--encoding",
053                        aliases = "-e",
054                        required = false,
055                        usage = "The outputstreamwriter's text encoding",
056                        metaVar = "STRING")
057        String encoding = "UTF-8";
059        @Option(
060                        name = "--no-stats",
061                        aliases = "-ns",
062                        required = false,
063                        usage = "Don't try to keep stats of the tweets seen",
064                        metaVar = "STRING")
065        boolean stats = true;
067        @Option(
068                        name = "--no-threads",
069                        aliases = "-j",
070                        required = false,
071                        usage = "Threads used to download images, defaults to n CPUs",
072                        metaVar = "STRING")
073        int nThreads = Runtime.getRuntime().availableProcessors();
075        @Option(
076                        name = "--use-oauth-stream",
077                        aliases = "-oauth",
078                        required = false,
079                        usage = "Force the useage of twitter oauth to access the stream using the twitter4j api")
080        boolean forceTwitter4J = false;
082        @Option(
083                        name = "--database",
084                        aliases = "-d",
085                        required = false,
086                        usage = "Force the use of a database",
087                        metaVar = "URL"
088        )
089        String database = null;
091        @Option(
092                        name = "--databaseTable",
093                        aliases = "-dt",
094                        required = false,
095                        usage = "When using a database, specify the database table",
096                        metaVar = "STRING" )
097        String databaseTable = null;
099        @Option(
100                        name = "--databaseUser",
101                        aliases = "-du",
102                        required = false,
103                        usage = "When using a database, specify the database username",
104                        metaVar = "STRING" )
105        String databaseUser = null;
107        @Option(
108                        name = "--databasePassword",
109                        aliases = "-dp",
110                        required = false,
111                        usage = "When using a database, specify the database password",
112                        metaVar = "STRING" )
113        String databasePW = null;
115        @Option(
116                        name = "--output-listener",
117                        aliases = "-ol",
118                        required = false,
119                        usage = "Add an output listener which gets told about each image downloaded, its location, tweet and url",
120                        handler = ProxyOptionHandler.class,
121                        multiValued = true)
122        List<OutputListenerMode> outputListenerMode = new ArrayList<OutputListenerMode>();
123        List<OutputListener> outputListenerModeOp = new ArrayList<OutputListener>();
125        private StatusFeeder statusFeeder;
127        /**
128         * @param args
129         *            tool arguments
130         */
131        public PicSlurper(final String[] args) {
132                this.args = args;
133        }
135        /**
136         * no args
137         */
138        public PicSlurper() {
139                this.args = new String[] {};
140        }
142        /**
143         * prepare the tool for running
144         */
145        public void prepare() {
146                final CmdLineParser parser = new CmdLineParser(this);
147                try {
148                        parser.parseArgument(this.args);
149                        this.validate();
150                } catch (final CmdLineException e) {
151                        e.printStackTrace();
152                        System.err.println(e.getMessage());
153                        System.err.println("Usage: java -jar PicSlurper.jar [options...] ");
154                        parser.printUsage(System.err);
155                        System.err.println(this.getExtractUsageInfo());
156                        System.exit(1);
157                }
158        }
160        String getExtractUsageInfo() {
161                return "Grab some images and some stats";
162        }
164        void validate() throws CmdLineException {
165                try {
166                        if( this.database != null )
167                        {
168                                this.statusFeeder = new DatabaseStatusFeeder( this.database,
169                                                this.databaseTable, this.databaseUser, this.databasePW );
170                        }
171                        else
172                        if (this.forceTwitter4J) {
173                                this.statusFeeder = new Twitter4JStreamFeeder();
174                        } else {
175                                this.statusFeeder = new InputStreamFeeder(this);
176                        }
177                        if (FileToolsUtil.isStdout(this)) {
178                                this.stdout = true;
179                        } else {
180                                this.outputLocation = PicSlurper.validateLocalOutput(this.getOutput(), this.isForce(), !this.isContinue());
181                                this.outputLocation.mkdirs();
182                                this.globalStatus = new File(this.outputLocation, PicSlurper.STATUS_FILE_NAME);
183                                // init the output file
184                                PicSlurperUtils.updateStats(this.globalStatus, new StatusConsumption());
185                        }
187                        for (final OutputListener listener : this.outputListenerModeOp) {
188                                listener.prepare();
189                        }
190                } catch (final Exception e) {
191                        e.printStackTrace();
192                        throw new CmdLineException(null, e.getMessage());
193                }
194        }
196        /**
197         * Validate the (local) ouput from an String and return the corresponding
198         * file.
199         *
200         * @param out
201         *            where the file will go
202         * @param overwrite
203         *            whether to overwrite existing files
204         * @param contin
205         *            whether an existing output should be continued (i.e. ignored
206         *            if it exists)
207         * @return the output file location, deleted if it is allowed to be deleted
208         * @throws IOException
209         *             if the file exists, but can't be deleted
210         */
211        public static File validateLocalOutput(final String out, final boolean overwrite, final boolean contin) throws IOException {
212                if (out == null) {
213                        throw new IOException("No output specified");
214                }
215                final File output = new File(out);
216                if (output.exists()) {
217                        if (overwrite) {
218                                if (!FileUtils.deleteRecursive(output))
219                                        throw new IOException("Couldn't delete existing output");
220                        } else if (!contin) {
221                                throw new IOException("Output already exists, didn't remove");
222                        }
223                }
224                return output;
225        }
227        @Override
228        public boolean hasNext() {
229                if (!this.stdin) {
230                        if (this.fileIterator == null)
231                                return false;
232                        return this.fileIterator.hasNext();
233                }
234                return true;
235        }
237        @Override
238        public InputStream next() {
239                if (this.stdin) {
240                        this.stdin = false;
241                        return System.in;
242                }
243                if (this.fileIterator == null)
244                        return null;
245                if (this.fileIterator.hasNext()) {
246                        this.inputFile = this.fileIterator.next();
247                        try {
248                                return new FileInputStream(this.inputFile);
249                        } catch (final FileNotFoundException e) {
250                        }
251                } else
252                        this.inputFile = null;
253                return null;
254        }
256        @Override
257        public void remove() {
258                throw new UnsupportedOperationException();
259        }
261        /**
262         * @param status
263         *            handle this status
264         */
265        public void handleStatus(final Status status) {
266                StatusConsumer consumer;
267                try {
268                        consumer = new StatusConsumer(this.stats, this.globalStatus, this.outputLocation, this.outputListenerModeOp);
269                        consumer.consume(status);
271                } catch (final Exception e) {
272                        PicSlurper.logger.error("Some error with the statusconsumer: " + e.getMessage());
273                }
274        }
276        @Override
277        public Iterator<InputStream> iterator() {
278                return this;
279        }
281        /**
282         * @param args
283         * @throws IOException
284         * @throws TweetTokeniserException
285         * @throws InterruptedException
286         */
287        public static void main(final String[] args) throws IOException, TweetTokeniserException, InterruptedException {
288                // Load the config
289                PicSlurper.loadConfig();
290                final PicSlurper slurper = new PicSlurper(args);
291                slurper.prepare();
292                slurper.start();
293        }
295        private void start() throws IOException {
296                this.statusFeeder.feedStatus(this);
298        }
300        /**
301         * Load the configuration file which looks for twitter usernames and
302         * passwords. If this can't be found or the values can't be found then
303         * System.in is used to get the username and password
304         *
305         * @throws FileNotFoundException
306         * @throws IOException
307         */
308        public static void loadConfig() throws FileNotFoundException, IOException {
309                final File configFile = new File("config.properties");
310                PicSlurper.logger.debug("Looking for config file: " + configFile.getAbsolutePath());
311                if (configFile.exists()) {
312                        final Properties prop = System.getProperties();
313                        prop.load(new FileInputStream(configFile));
314                        System.setProperties(prop);
315                } else {
316                        // File not found, try looking for the resource!
317                        final Properties prop = System.getProperties();
318                        final InputStream propStream = PicSlurper.class.getResourceAsStream("/config.properties");
319                        if (propStream != null) {
320                                prop.load(propStream);
321                        }
322                        System.setProperties(prop);
323                }
325                // System.setProperty("org.apache.commons.logging.Log",
326                // "org.apache.commons.logging.impl.SimpleLog");
327                // System.setProperty("org.apache.commons.logging.simplelog.showdatetime",
328                // "true");
329                // System.setProperty("org.apache.commons.logging.simplelog.log.httpclient.wire.header",
330                // "debug");
331                // System.setProperty("org.apache.commons.logging.simplelog.log.org.apache.commons.httpclient",
332                // "debug");
333                // checkTwitterCredentials();
334        }
335        //
336        // private static void checkTwitterCredentials() throws IOException {
337        // final String user = System.getProperty("twitter.user");
338        // final String password = System.getProperty("twitter.password");
339        // final String consoleLogin =
340        // System.getProperty(PicSlurper.ALLOW_CONSOLE_LOGIN);
341        // if (user != null && password != null || (consoleLogin != null &&
342        // !Boolean.parseBoolean(consoleLogin)))
343        // return;
344        // final Console console = System.console();
345        // final String credentialsMessage =
346        // "Could not find twitter credentials. Taking from input. You can add these to a config.properties file to save time.\n";
347        // final String usernameMessage = "Twitter username: ";
348        // final String passwordMessage = "Twitter password: ";
349        // if (console != null) {
350        // console.printf(credentialsMessage);
351        // console.printf(usernameMessage);
352        // System.setProperty("twitter.user", console.readLine());
353        // console.printf(passwordMessage);
354        // System.setProperty("twitter.password",
355        // String.copyValueOf(console.readPassword()));
356        // } else {
357        // logger.debug(credentialsMessage);
358        // logger.debug(usernameMessage);
359        // final BufferedReader reader = new BufferedReader(new
360        // InputStreamReader(System.in));
361        // System.setProperty("twitter.user", reader.readLine());
362        // logger.debug(passwordMessage);
363        // System.setProperty("twitter.password", reader.readLine());
364        // }
365        //
366        // }