001/**
002 * Copyright (c) 2012, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.tools.twitter;
031
032import java.io.IOException;
033import java.io.PrintWriter;
034import java.util.List;
035
036import org.kohsuke.args4j.CmdLineException;
037import org.openimaj.tools.twitter.modes.output.TwitterOutputMode;
038import org.openimaj.tools.twitter.modes.preprocessing.TwitterPreprocessingMode;
039import org.openimaj.tools.twitter.options.TwitterPreprocessingToolOptions;
040import org.openimaj.twitter.USMFStatus;
041import org.openimaj.twitter.collection.TwitterStatusList;
042import org.openimaj.utils.threads.WatchedRunner;
043
044/**
045 * A tool for applying preprocessing to a set of tweets and outputting the results in json
046 *
047 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
048 *
049 */
050public class TwitterPreprocessingTool
051{
052        static TwitterPreprocessingToolOptions options;
053
054        /**
055         * Run the tool
056         * @param args
057         * @throws IOException
058         */
059        public static void main(String[] args) throws IOException {
060                try {
061                        options = new TwitterPreprocessingToolOptions(args);
062                } catch (CmdLineException e1) {
063                        System.err.println(e1.getMessage());
064                        System.err.println("Usage: java -jar JClusterQuantiser.jar [options...] [files...]");
065                        e1.getParser().printUsage(System.err);
066                        System.exit(1);
067                }
068                TwitterOutputMode outputMode;
069                final List<TwitterPreprocessingMode<?>> modes;
070                try {
071                        modes = options.preprocessingMode();
072                        outputMode = options.ouputMode();
073                        outputMode.delimit("\n");
074                } catch (Exception e) {
075                        System.err.println("Could not create processing mode!");
076                        e.printStackTrace();
077                        return;
078                }
079
080                while(options.hasNextFile()){
081                        options.nextFile();
082                        options.progress("Preparing tweets\n");
083                        TwitterStatusList<USMFStatus> tweets = options.getTwitterStatusList();
084                        options.progress("Processing " + tweets.size() + " tweets\n");
085
086                        long done = 0;
087                        long skipped = 0;
088                        long start = System.currentTimeMillis();
089                        PrintWriter oWriter = options.outputWriter();
090                        for (final USMFStatus twitterStatus : tweets) {
091                                if(twitterStatus.isInvalid() || twitterStatus.text.isEmpty()){
092                                        if(options.veryLoud()){
093                                                System.out.println("\nTWEET INVALID, skipping.");
094                                        }
095                                        continue;
096                                }
097                                if(options.veryLoud()){
098                                        System.out.println("\nPROCESSING TWEET");
099                                        System.out.println(twitterStatus);
100                                }
101
102                                if(options.preProcessesSkip(twitterStatus)) continue;
103
104                                WatchedRunner runner = new WatchedRunner(options.getTimeBeforeSkip()){
105                                        @Override
106                                        public void doTask() {
107                                                for (TwitterPreprocessingMode<?> mode : modes) {
108                                                        try {
109                                                                TwitterPreprocessingMode.results(twitterStatus, mode);
110                                                        } catch (Exception e) {
111                                                                System.err.println("Mode failed: " + mode);
112                                                        }
113                                                }
114                                        }
115                                };
116                                runner.go();
117                                if(runner.taskCompleted()){
118                                        done++;
119                                        options.progress("\rDone: " + done);
120
121
122                                        if(!options.postProcessesSkip(twitterStatus))
123                                        {
124                                                outputMode.output(options.convertToOutputFormat(twitterStatus),oWriter);
125                                                oWriter.flush();
126                                        }
127                                }
128                                else{
129                                        skipped ++;
130                                }
131                                if(skipped > 0){
132                                        options.progress(" (Skipped: " + skipped + ") ");
133                                }
134
135
136
137                        }
138                        long end = System.currentTimeMillis();
139                        options.progress(String.format("\nTook: %d\n",(end-start)));
140                        options.progress("Done!\n");
141                }
142                options.outputWriter().flush();
143                options.outputWriter().close();
144        }
145}