View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.hadoop.tools.globalfeature;
31  
32  import java.io.ByteArrayInputStream;
33  import java.io.ByteArrayOutputStream;
34  import java.util.ArrayList;
35  import java.util.Arrays;
36  import java.util.HashMap;
37  import java.util.List;
38  import java.util.Map;
39  
40  import org.apache.hadoop.conf.Configured;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.io.BytesWritable;
43  import org.apache.hadoop.io.SequenceFile;
44  import org.apache.hadoop.io.Text;
45  import org.apache.hadoop.mapreduce.Job;
46  import org.apache.hadoop.mapreduce.Mapper;
47  import org.apache.hadoop.util.Tool;
48  import org.apache.hadoop.util.ToolRunner;
49  import org.apache.log4j.Logger;
50  import org.openimaj.feature.FeatureVector;
51  import org.openimaj.hadoop.mapreduce.TextBytesJobUtil;
52  import org.openimaj.hadoop.sequencefile.MetadataConfiguration;
53  import org.openimaj.hadoop.tools.HadoopToolsUtil;
54  import org.openimaj.image.ImageUtilities;
55  import org.openimaj.image.MBFImage;
56  import org.openimaj.io.IOUtils;
57  
58  /**
59   * A Hadoop version of the GlobalFeaturesTool. Capable of extracting global
60   * image features on very large scale corpora from images stored in
61   * {@link SequenceFile}s.
62   *
63   * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
64   */
65  public class HadoopGlobalFeaturesTool extends Configured implements Tool
66  {
67  	private static final String ARGS_KEY = "globalfeatures.args";
68  	private static Logger logger = Logger.getLogger(HadoopGlobalFeaturesTool.class);
69  
70  	static class GlobalFeaturesMapper extends Mapper<Text, BytesWritable, Text, BytesWritable> {
71  		private HadoopGlobalFeaturesOptions options;
72  
73  		public GlobalFeaturesMapper() {
74  		}
75  
76  		@Override
77  		protected void setup(Mapper<Text, BytesWritable, Text, BytesWritable>.Context context) {
78  			options = new HadoopGlobalFeaturesOptions(context.getConfiguration().getStrings(ARGS_KEY));
79  		}
80  
81  		@Override
82  		protected void
83  				map(Text key, BytesWritable value, Mapper<Text, BytesWritable, Text, BytesWritable>.Context context)
84  						throws InterruptedException
85  		{
86  			try {
87  				final MBFImage img = ImageUtilities.readMBF(new ByteArrayInputStream(value.getBytes()));
88  				final FeatureVector fv = options.featureOp.extract(img);
89  
90  				final ByteArrayOutputStream baos = new ByteArrayOutputStream();
91  				if (options.binary)
92  					IOUtils.writeBinary(baos, fv);
93  				else
94  					IOUtils.writeASCII(baos, fv);
95  
96  				context.write(key, new BytesWritable(baos.toByteArray()));
97  			} catch (final Exception e) {
98  				logger.warn("Problem processing image " + key + " (" + e + ")");
99  			}
100 		}
101 	}
102 
103 	@Override
104 	public int run(String[] args) throws Exception {
105 		final HadoopGlobalFeaturesOptions options = new HadoopGlobalFeaturesOptions(args, true);
106 
107 		final Map<String, String> metadata = new HashMap<String, String>();
108 		metadata.put(MetadataConfiguration.CONTENT_TYPE_KEY, "application/globalfeature-" + options.feature + "-"
109 				+ (options.binary ? "bin" : "ascii"));
110 
111 		metadata.put("clusterquantiser.filetype", (options.binary ? "bin" : "ascii"));
112 
113 		final List<Path> allPaths = new ArrayList<Path>();
114 		for (final String p : options.input) {
115 			allPaths.addAll(Arrays.asList(HadoopToolsUtil.getInputPaths(p)));
116 		}
117 
118 		final Job job = TextBytesJobUtil.createJob(allPaths, new Path(options.output), metadata, this.getConf());
119 		job.setJarByClass(this.getClass());
120 		job.setMapperClass(GlobalFeaturesMapper.class);
121 		job.getConfiguration().setStrings(ARGS_KEY, args);
122 		job.setNumReduceTasks(0);
123 
124 		job.waitForCompletion(true);
125 
126 		return 0;
127 	}
128 
129 	/**
130 	 * The main method for the tool.
131 	 * 
132 	 * @param args
133 	 *            the command-line arguments
134 	 * @throws Exception
135 	 *             if an error occurs
136 	 */
137 	public static void main(String[] args) throws Exception
138 	{
139 		ToolRunner.run(new HadoopGlobalFeaturesTool(), args);
140 	}
141 }