001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.tools.fastkmeans; 031 032import java.io.ByteArrayInputStream; 033import java.io.IOException; 034 035import org.apache.hadoop.io.BytesWritable; 036import org.apache.hadoop.io.IntWritable; 037import org.apache.hadoop.io.Text; 038import org.apache.hadoop.mapreduce.Mapper; 039import org.apache.hadoop.mapreduce.Reducer; 040import org.openimaj.tools.clusterquantiser.FileType; 041import org.openimaj.tools.clusterquantiser.Header; 042 043/** 044 * Map Reduce job to count features 045 * 046 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 047 * 048 */ 049public class FeatureCount { 050 private static final String FILETYPE_KEY = "clusterquantiser.FileType"; 051 052 /** 053 * Map class. Emits number of features per record. 054 * 055 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 056 * 057 */ 058 public static class Map extends Mapper<Text, BytesWritable, Text, IntWritable> 059 { 060 private final static IntWritable nfeatures = new IntWritable(1); 061 private Text word = new Text("total"); 062 private static FileType fileType = null; 063 064 @Override 065 protected void setup(Mapper<Text, BytesWritable, Text, IntWritable>.Context context) throws IOException, 066 InterruptedException 067 { 068 if (fileType == null) { 069 fileType = FileType.valueOf(context.getConfiguration().get(FILETYPE_KEY)); 070 } 071 } 072 073 @Override 074 public void map(Text key, BytesWritable value, Context context) throws IOException, InterruptedException { 075 final Header input = fileType.readHeader(new ByteArrayInputStream(value.getBytes())); 076 nfeatures.set(input.nfeatures); 077 context.write(word, nfeatures); 078 } 079 } 080 081 /** 082 * Reduce class. Combines counts per map record into overall sum. 083 * 084 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 085 * 086 */ 087 public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { 088 @Override 089 public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, 090 InterruptedException 091 { 092 int sum = 0; 093 for (final IntWritable val : values) { 094 sum += val.get(); 095 } 096 context.write(key, new IntWritable(sum)); 097 } 098 } 099}