001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.tools.sequencefile.index; 031 032import java.io.IOException; 033import java.lang.reflect.Field; 034import java.util.HashMap; 035 036import org.apache.hadoop.io.BytesWritable; 037import org.apache.hadoop.io.SequenceFile; 038import org.apache.hadoop.io.SequenceFile.Reader; 039import org.apache.hadoop.io.Text; 040import org.apache.hadoop.mapreduce.InputSplit; 041import org.apache.hadoop.mapreduce.MapContext; 042import org.apache.hadoop.mapreduce.Mapper; 043import org.apache.hadoop.mapreduce.RecordReader; 044import org.apache.hadoop.mapreduce.lib.input.FileSplit; 045import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader; 046 047public class RecordIndexMapper extends Mapper<Text, BytesWritable, Text, Text> { 048 049 050 private SequenceFile.Reader reader; 051 052 @Override 053 protected void setup(Context context) throws IOException, InterruptedException { 054 try { 055 Field readerField = MapContext.class.getDeclaredField("reader"); 056 readerField.setAccessible(true); 057 @SuppressWarnings("unchecked") 058 RecordReader<Text, BytesWritable> reader = (RecordReader<Text, BytesWritable>) readerField.get(context); 059 060 Field realField = reader.getClass().getDeclaredField("real"); 061 realField.setAccessible(true); 062 @SuppressWarnings("unchecked") 063 SequenceFileRecordReader<Text,BytesWritable> realReader = (SequenceFileRecordReader<Text, BytesWritable>) realField.get(reader); 064 065 Field inField = realReader.getClass().getDeclaredField("in"); 066 inField.setAccessible(true); 067 this.reader = (Reader) inField.get(realReader); 068 } catch (IllegalArgumentException e) { 069 // TODO Auto-generated catch block 070 e.printStackTrace(); 071 } catch (SecurityException e) { 072 // TODO Auto-generated catch block 073 e.printStackTrace(); 074 } catch (IllegalAccessException e) { 075 // TODO Auto-generated catch block 076 e.printStackTrace(); 077 } catch (NoSuchFieldException e) { 078 // TODO Auto-generated catch block 079 e.printStackTrace(); 080 } 081 } 082 083 @Override 084 public void map(Text index, BytesWritable urlLine, Context context) { 085 086 InputSplit inputSplit = context.getInputSplit(); 087 if(inputSplit instanceof FileSplit){ 088 FileSplit fInputSplit = (FileSplit)inputSplit; 089 HashMap<String,String> metaInfo = new HashMap<String,String>(); 090 metaInfo.put("location", fInputSplit.getPath().toString()); 091// metaInfo.put("mimetype", ""+fInputSplit.getStart()); 092 try { 093 metaInfo.put("offset", ""+this.reader.getPosition()); 094 context.write(index, new Text(hashToString(metaInfo))); 095 } 096 catch (IOException e) {} 097 catch (InterruptedException e) {} 098 } 099 } 100 101 private String hashToString(HashMap<String, String> metaInfo) { 102 String returnString = "{"; 103 for(String key : metaInfo.keySet()){ 104 returnString += "\"" + key + "\"" + ": " + "\"" + metaInfo.get(key) + "\"" + ","; 105 } 106 returnString += "}"; 107 return returnString; 108 } 109 110 111// public String getMimeType(BytesWritable data){ 112//// MagicMatch match; 113//// try { 114//// match = Magic.getMagicMatch(((BytesWritable)value).getBytes()); 115//// String ext = match.getExtension(); 116//// if(ext.trim().length()!=0) name += "." + ext; 117//// } catch(Exception e){ 118//// System.out.println("Failed!"); 119//// } 120// } 121 122}