001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.hadoop.tools.exif;
031
032import java.io.ByteArrayInputStream;
033import java.io.ByteArrayOutputStream;
034import java.io.File;
035import java.io.FileOutputStream;
036import java.io.IOException;
037import java.io.PrintWriter;
038import java.util.HashMap;
039import java.util.Map;
040
041import org.apache.hadoop.conf.Configured;
042import org.apache.hadoop.fs.Path;
043import org.apache.hadoop.io.BytesWritable;
044import org.apache.hadoop.io.IOUtils;
045import org.apache.hadoop.io.Text;
046import org.apache.hadoop.mapreduce.Job;
047import org.apache.hadoop.mapreduce.Mapper;
048import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
049import org.apache.hadoop.util.Tool;
050import org.apache.hadoop.util.ToolRunner;
051import org.openimaj.hadoop.mapreduce.TextBytesJobUtil;
052import org.openimaj.hadoop.sequencefile.MetadataConfiguration;
053import org.openimaj.hadoop.sequencefile.TextBytesSequenceFileUtility;
054
055import com.thebuzzmedia.exiftool.RDFExifTool;
056/**
057 * An EXIF extraction tool based on exiftool. Allows the location of exiftool on each machine to be specified. 
058 * Loads the images from a sequence file of <imageName,image>, loads each image into a temporary file, runs exif tool
059 * and outputs the exif information as another sequence file of <imageName, exifData> where exifData is <KEY "VALUE"\n,> 
060 * 
061 * 
062 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
063 *
064 */
065public class HadoopEXIF extends Configured implements Tool{
066        private static final String ARGS_KEY = "clusterquantiser.args";
067
068        public static class HadoopEXIFMapper extends Mapper<Text, BytesWritable, Text, BytesWritable>{
069                
070                
071                private RDFExifTool tool;
072                private HadoopEXIFOptions options;
073//              private static ExifTool tool;
074                public HadoopEXIFMapper(){}
075                
076                @Override
077                protected void setup(Mapper<Text, BytesWritable, Text, BytesWritable>.Context context)throws IOException, InterruptedException {
078                        options = new HadoopEXIFOptions(context.getConfiguration().getStrings(ARGS_KEY),false);
079                        options.prepare();
080                        System.setProperty("exiftool.path",options.getExifPath());
081                        tool = new RDFExifTool(options.getInputString());
082                }
083//              
084//              private synchronized static void loadExifTool(Mapper<Text, BytesWritable, Text, BytesWritable>.Context context) {
085//                      if(tool==null){
086//                              HadoopEXIFOptions options = new HadoopEXIFOptions(context.getConfiguration().getStrings(ARGS_KEY),false);
087//                              options.prepare();
088//                              System.setProperty("exiftool.path",options.getExifPath());
089//                              tool = new ExifTool();
090//                      }
091//              }
092
093                @Override
094                protected void map(Text key, BytesWritable value, Mapper<Text, BytesWritable, Text, BytesWritable>.Context context) throws java.io.IOException, InterruptedException 
095                {
096                        try{
097                                File tmp = File.createTempFile("prefix", ".image", new File("/tmp")); 
098                                FileOutputStream fos = new FileOutputStream(tmp);
099                                IOUtils.copyBytes(new ByteArrayInputStream(value.getBytes()), fos, context.getConfiguration());
100                                fos.close();
101                                
102                                
103                                ByteArrayOutputStream bos = new ByteArrayOutputStream();
104                                PrintWriter pw = new PrintWriter(bos);
105                                options.getOutputMode().output(pw, tmp, key.toString(), tool);
106                                tmp.delete();
107                                
108                                
109                                context.write(key, new BytesWritable(bos.toByteArray()));
110                        } catch(Throwable e) {
111                                System.err.println("... Problem with this image! Keeping Calm. Carrying on.");
112                                e.printStackTrace(System.err);
113                        }
114                }
115        }
116        
117        @Override
118        public int run(String[] args) throws Exception {
119                HadoopEXIFOptions options = new HadoopEXIFOptions(args,true);
120                options.prepare();
121//              String clusterFileString = options.getInputString();
122                Path[] paths = options.getInputPaths();
123                TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri() , true);
124                Map<String,String> metadata = new HashMap<String,String>();
125                if (util.getUUID() != null) metadata.put(MetadataConfiguration.UUID_KEY, util.getUUID());
126                metadata.put(MetadataConfiguration.CONTENT_TYPE_KEY, "application/imageexif");
127                
128                Job job = TextBytesJobUtil.createJob(paths, options.getOutputPath(), metadata,this.getConf());
129//              job.setOutputValueClass(Text.class);
130                job.setJarByClass(this.getClass());
131                job.setMapperClass(HadoopEXIF.HadoopEXIFMapper.class);
132                job.getConfiguration().setStrings(ARGS_KEY, args);
133                job.setNumReduceTasks(0);
134                SequenceFileOutputFormat.setCompressOutput(job, false);
135                long start,end;
136                start = System.currentTimeMillis();
137                job.waitForCompletion(true);
138                end = System.currentTimeMillis();
139                System.out.println("Took: " + (end - start) + "ms");
140                return 0;
141        }
142        
143        public static void main(String[] args) throws Exception {
144                ToolRunner.run(new HadoopEXIF(), args);
145        }
146}