001/**
002 * Copyright (c) 2012, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.tools.twitter.modes.filter;
031
032import java.text.ParseException;
033import java.util.ArrayList;
034import java.util.List;
035import org.joda.time.DateTime;
036import org.joda.time.Interval;
037import org.joda.time.format.DateTimeFormat;
038import org.kohsuke.args4j.Option;
039import org.openimaj.twitter.USMFStatus;
040
041/**
042 * The grep functionality. Should only be used as a post filter most of the time
043 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
044 *
045 */
046public class DateFilter extends TwitterPreprocessingPredicate {
047        
048        @Option(name="--date-start", aliases="-from", required=false, usage="The start date", metaVar="STRING", multiValued=true)
049        String startDateStr;
050        DateTime startDate;
051        @Option(name="--end-start", aliases="-to", required=false, usage="The start date", metaVar="STRING")
052        String endDateStr;
053        DateTime endDate;
054        @Option(name="--date-range", aliases="-drng", required=false, usage="Comma delimited start,end date range", metaVar="STRING", multiValued=true)
055        List<String> dateRanges = new ArrayList<String>();
056        List<Interval> intervals = new ArrayList<Interval>();
057        
058        
059        
060        
061        @Override
062        public boolean test(USMFStatus twitterStatus) {
063                DateTime date;
064                
065                try {
066                        date = twitterStatus.createdAt();
067                } catch (ParseException e) {
068                        System.out.println("Failed to parse: " + twitterStatus);
069                        return false;
070                }
071                
072                if(date == null) {
073                        System.out.println("no date for: " + twitterStatus);
074                        return false;
075                }
076                // valid date, is it after the start and before the end?
077                
078                if(startDate!=null && date.isBefore(startDate)) {
079                        System.out.println(date + " is before " + startDate);
080                        return false;
081                }
082                if(endDate!=null && date.isAfter(endDate)) {
083                        System.out.println(date + " is after " + endDate);
084                        return false;
085                }
086                // We are both after the start and after the end, but are we within one of the intervals?
087                boolean match = this.intervals.size() == 0;
088                for (Interval  interval : this.intervals) {
089                        match = interval.contains(date);
090                        if(match) return match; //it is inside one of the intervals
091                }
092                
093                return match;
094        }
095        
096        @Override
097        public void validate() {
098                if(startDateStr != null){
099                        startDate = DateTimeFormat.forPattern("Y/M/d").parseDateTime(startDateStr);
100                }
101                
102                if(endDateStr != null){
103                        endDate = DateTimeFormat.forPattern("Y/M/d").parseDateTime(endDateStr);
104                }
105                
106                for (String dateRange : this.dateRanges) {
107                        String[] dRangeSplit = dateRange.split(",");
108                        if(dRangeSplit.length!=2){
109                                continue;
110                        }
111                        DateTime start = DateTimeFormat.forPattern("Y/M/d").parseDateTime(dRangeSplit[0]);
112                        DateTime end = DateTimeFormat.forPattern("Y/M/d").parseDateTime(dRangeSplit[1]);
113                        this.intervals.add(new Interval(start, end));
114                        
115                }
116        }
117
118}