001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.sequencefile; 031 032import java.util.ArrayList; 033import java.util.List; 034import java.util.Map; 035import java.util.Map.Entry; 036 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.io.SequenceFile; 039import org.apache.hadoop.io.SequenceFile.Metadata; 040import org.apache.hadoop.io.Text; 041import org.apache.hadoop.util.Tool; 042 043/** 044 * Utility functions for storing and retrieving metadata to be stored in a 045 * {@link SequenceFile} by the {@link MetadataSequenceFileOutputFormat}. 046 * <p> 047 * Standard usage would be to use the {@link #setMetadata(Map, Configuration)} 048 * method to add the given metadata to the {@link Configuration} in the part of 049 * the code that runs locally (i.e. in a {@link Tool#run(String[])} method). The 050 * configuration would then be distributed across the cluster, and any mappers 051 * or reducers that use the {@link MetadataSequenceFileOutputFormat} will 052 * automatically have the metadata added to their output file(s). 053 * 054 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 055 * 056 */ 057public class MetadataConfiguration { 058 private static final String META_PREFIX = "org.openimaj.hadoop.sequencefile.metadata."; 059 private static final String META_KEYS = META_PREFIX + "__metadataKeys__"; 060 061 /** 062 * Standard key for a unique identifier metadata item 063 */ 064 public static final String UUID_KEY = "UUID"; 065 066 /** 067 * Standard key for a comment metadata item 068 */ 069 public static final String COMMENT_KEY = "Comment"; 070 071 /** 072 * Standard key for a storing an indicator of the mime-type of the value 073 * fields 074 */ 075 public static final String CONTENT_TYPE_KEY = "ContentType"; 076 077 private MetadataConfiguration() { 078 } 079 080 /** 081 * Read any metadata stored in the {@link Configuration}. 082 * 083 * @param conf 084 * the configuration 085 * @return the metadata map 086 */ 087 public static Metadata getMetadata(Configuration conf) { 088 final Metadata metadata = new Metadata(); 089 090 final String[] keys = conf.getStrings(META_KEYS); 091 092 if (keys != null) { 093 for (final String key : keys) { 094 final String value = conf.get(META_PREFIX + key); 095 096 if (value != null) 097 metadata.set(new Text(key), new Text(value)); 098 } 099 } 100 101 return metadata; 102 } 103 104 /** 105 * Write the given metadata to the {@link Configuration}. 106 * 107 * @param metadata 108 * the metadata. 109 * @param conf 110 * the configuration. 111 */ 112 public static void setMetadata(Map<String, String> metadata, Configuration conf) { 113 for (final Entry<String, String> entry : metadata.entrySet()) { 114 conf.set(META_PREFIX + entry.getKey(), entry.getValue()); 115 } 116 117 final List<String> keys = new ArrayList<String>(); 118 if (conf.getStringCollection(META_KEYS) != null) 119 keys.addAll(conf.getStringCollection(META_KEYS)); 120 121 for (final String key : metadata.keySet()) { 122 keys.add(key); 123 } 124 125 conf.setStrings(META_KEYS, keys.toArray(new String[keys.size()])); 126 } 127 128}