001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.tools.twitter; 031 032import java.io.IOException; 033 034import org.apache.hadoop.fs.Path; 035import org.kohsuke.args4j.CmdLineException; 036import org.kohsuke.args4j.Option; 037import org.kohsuke.args4j.ProxyOptionHandler; 038import org.openimaj.hadoop.sequencefile.SequenceFileUtility; 039import org.openimaj.hadoop.tools.HadoopToolsUtil; 040import org.openimaj.tools.twitter.options.AbstractTwitterPreprocessingToolOptions; 041 042/** 043 * Hadoop specific options for twitter preprocessing 044 * 045 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 046 * 047 */ 048public class HadoopTwitterPreprocessingToolOptions extends AbstractTwitterPreprocessingToolOptions { 049 050 private boolean beforeMaps; 051 052 /** 053 * The hadoop options, assume these are the options before mapping 054 * 055 * @param args 056 * @throws CmdLineException 057 */ 058 public HadoopTwitterPreprocessingToolOptions(String[] args) throws CmdLineException { 059 this(args, false); 060 } 061 062 /** 063 * The hadoop twitter preprocessing options 064 * 065 * @param args 066 * command line optios 067 * @param beforeMaps 068 * if true, the output location is removed if the option to do so 069 * is set 070 * @throws CmdLineException 071 */ 072 public HadoopTwitterPreprocessingToolOptions(String[] args, boolean beforeMaps) throws CmdLineException { 073 super(args, false); // don't prepare using the superclass 074 this.beforeMaps = beforeMaps; 075 } 076 077 /* 078 * IO args 079 */ 080 @Option( 081 name = "--mapper-mode", 082 aliases = "-mm", 083 required = false, 084 usage = "Choose a mapper mode.", 085 handler = ProxyOptionHandler.class) 086 MapperMode mapperMode = MapperMode.STANDARD; 087 MapperMode.Mode mapperModeOp; 088 089 @Option(name = "--reudcer-mode", aliases = "-redm", required = false, usage = "Choose a reducer mode mode.") 090 ReducerModeOption reducerMode = ReducerModeOption.NULL; 091 092 @Option( 093 name = "--return-immediately", 094 aliases = "-ri", 095 required = false, 096 usage = "If set, the job is submitted to the cluster and this returns immediately") 097 boolean returnImmediately = false; 098 099 @Option( 100 name = "--lzo-compress", 101 aliases = "-lzoc", 102 required = false, 103 usage = "If set, compress the output of the preprocessing pipeline as LZO") 104 boolean lzoCompress = false; 105 106 @Override 107 public boolean validate() throws CmdLineException { 108 if (this.beforeMaps) { 109 HadoopToolsUtil.validateInput(this); 110 HadoopToolsUtil.validateOutput(this); 111 } 112 return true; 113 } 114 115 /** 116 * @return the list of input files 117 * @throws IOException 118 */ 119 public Path[] getInputPaths() throws IOException { 120 final Path[] sequenceFiles = SequenceFileUtility.getFilePaths(this.getAllInputs(), "part"); 121 return sequenceFiles; 122 } 123 124 /** 125 * @return the output path 126 */ 127 public Path getOutputPath() { 128 return new Path(this.getOutput()); 129 } 130 131}