001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.hadoop.tools.sequencefile.index;
031
032import java.io.IOException;
033import java.net.URI;
034
035import org.apache.hadoop.conf.Configuration;
036import org.apache.hadoop.fs.FileSystem;
037import org.apache.hadoop.fs.LocalFileSystem;
038import org.apache.hadoop.fs.Path;
039import org.kohsuke.args4j.CmdLineException;
040import org.kohsuke.args4j.CmdLineParser;
041import org.kohsuke.args4j.Option;
042import org.openimaj.hadoop.sequencefile.SequenceFileUtility;
043
044
045public class SequenceFileIndexerOptions {
046
047        private static String EXTRA_USAGE_INFO;
048
049        private String[] args;
050        
051        @Option(name="--remove", aliases="-rm", required=false, usage="Remove the existing output location if it exists.", metaVar="BOOLEAN")
052        private boolean replace = false;
053
054        public SequenceFileIndexerOptions(String[] args) {
055                this.args = args;
056        }
057
058        public void prepare() {
059                CmdLineParser parser = new CmdLineParser(this);
060                try {
061                        parser.parseArgument(args);
062                        this.validate();
063                } catch (CmdLineException e) {
064                        System.err.println(e.getMessage());
065                        System.err.println("Usage: java -jar JClusterQuantiser.jar [options...] [files...]");
066                        parser.printUsage(System.err);
067                        System.err.print(SequenceFileIndexerOptions.EXTRA_USAGE_INFO);
068                        
069                        System.exit(1);
070                }
071                
072        }
073        
074        private void validate() {
075                if(replace){
076                        try {
077                                URI outuri = SequenceFileUtility.convertToURI(this.getOutputString());
078                                FileSystem fs = getFileSystem(outuri);
079                                fs.delete(new Path(outuri.toString()), true);
080                        } catch (IOException e) {
081                                
082                        }
083                }
084        }
085
086        public static FileSystem getFileSystem(URI uri) throws IOException {
087                Configuration config = new Configuration();
088                FileSystem fs = FileSystem.get(uri, config);
089                if (fs instanceof LocalFileSystem) fs = ((LocalFileSystem)fs).getRaw();
090                return fs;
091        }
092
093        @Option(name="--input", aliases="-i", required=true, usage="Input Sequence File.", metaVar="STRING")
094        private String input;
095        
096        @Option(name="--output", aliases="-o", required=true, usage="Output Index File or URL.", metaVar="STRING")
097        private String output;
098        
099        
100        public String getInputString() {
101                return input;
102        }
103
104        public String getOutputString() {
105                return output;
106        }
107        
108        public Path[] getInputPaths() throws IOException {
109                Path[] sequenceFiles = SequenceFileUtility.getFilePaths(this.getInputString(), "part");
110                return sequenceFiles;
111        }
112
113        
114
115        public Path getOutputPath() {
116                return new Path(SequenceFileUtility.convertToURI(this.getOutputString()).toString());
117        }
118
119}