001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.hadoop.tools.sequencefile; 031 032import java.awt.image.BufferedImage; 033import java.io.ByteArrayInputStream; 034import java.io.IOException; 035import java.util.ArrayList; 036import java.util.List; 037 038import javax.imageio.ImageIO; 039 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.io.BytesWritable; 042import org.apache.hadoop.io.SequenceFile; 043import org.ontoware.rdf2go.model.node.impl.URIImpl; 044import org.openimaj.hadoop.sequencefile.RecordInformationExtractor; 045import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier; 046 047/** 048 * Options for controlling what is printed when listing the contents of a 049 * {@link SequenceFile} with the {@link SequenceFileTool}. 050 * 051 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 052 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 053 */ 054public enum ListModeOptions { 055 /** 056 * Print the record key 057 * 058 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 059 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 060 */ 061 KEY { 062 @Override 063 public RecordInformationExtractor getExtractor() { 064 return new RecordInformationExtractor() { 065 @Override 066 public <K, V> String extract(K key, V value, long offset, Path seqFile) { 067 return key.toString(); 068 } 069 }; 070 } 071 }, 072 /** 073 * Print the offset of the record in the {@link SequenceFile} 074 * 075 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 076 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 077 */ 078 OFFSET { 079 @Override 080 public RecordInformationExtractor getExtractor() { 081 return new RecordInformationExtractor() { 082 @Override 083 public <K, V> String extract(K key, V value, long offset, Path seqFile) { 084 return ((Long) offset).toString(); 085 } 086 }; 087 } 088 }, 089 /** 090 * Print the path to the {@link SequenceFile} in question. This is useful if 091 * you're working with a directory of {@link SequenceFile}s 092 * 093 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 094 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 095 */ 096 SEQUENCEFILE { 097 @Override 098 public RecordInformationExtractor getExtractor() { 099 return new RecordInformationExtractor() { 100 @Override 101 public <K, V> String extract(K key, V value, long offset, Path seqFile) { 102 return seqFile.toString(); 103 } 104 }; 105 } 106 }, 107 /** 108 * Print the mimetype of the value in each record 109 * 110 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 111 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 112 */ 113 MIMETYPE { 114 @Override 115 public RecordInformationExtractor getExtractor() { 116 return new RecordInformationExtractor() { 117 @Override 118 public <K, V> String extract(K key, V value, long offset, Path seqFile) { 119 if (value instanceof BytesWritable) { 120 MagicMimeTypeIdentifier match; 121 try { 122 final BytesWritable bw = (BytesWritable) value; 123 match = new MagicMimeTypeIdentifier(); 124 final String ident = match.identify(bw.getBytes(), key.toString(), new URIImpl(seqFile 125 .toUri().toString())); 126 return ident; 127 } catch (final Exception e) { 128 System.err.println("Failed!"); 129 } 130 } 131 return null; 132 } 133 }; 134 } 135 }, 136 /** 137 * Print the size of the record value in bytes 138 * 139 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 140 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 141 */ 142 SIZE { 143 @Override 144 public RecordInformationExtractor getExtractor() { 145 return new RecordInformationExtractor() { 146 @Override 147 public <K, V> String extract(K key, V value, long offset, Path seqFile) { 148 if (value instanceof BytesWritable) { 149 return "" + ((BytesWritable) value).getLength(); 150 } 151 return null; 152 } 153 }; 154 } 155 }, 156 /** 157 * Print the dimensions of each records value if it is a valid image. 158 * 159 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 160 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 161 */ 162 IMAGE_DIMENSIONS { 163 @Override 164 public RecordInformationExtractor getExtractor() { 165 return new RecordInformationExtractor() { 166 @Override 167 public <K, V> String extract(K key, V value, long offset, Path seqFile) { 168 if (value instanceof BytesWritable) { 169 try { 170 final BufferedImage im = ImageIO.read(new ByteArrayInputStream(((BytesWritable) value) 171 .getBytes())); 172 return String.format("%d %d", im.getWidth(), im.getHeight()); 173 } catch (final IOException e) { 174 return null; 175 } 176 } 177 return null; 178 } 179 }; 180 } 181 }; 182 183 /** 184 * @return a {@link RecordInformationExtractor} for extracting information 185 * from a {@link SequenceFile} record. 186 */ 187 public abstract RecordInformationExtractor getExtractor(); 188 189 /** 190 * Construct a list of extractors from the given options. 191 * 192 * @param options 193 * the options 194 * @return the extractors in the same order as the given options 195 */ 196 public static List<RecordInformationExtractor> listOptionsToExtractPolicy(List<ListModeOptions> options) { 197 final List<RecordInformationExtractor> extractors = new ArrayList<RecordInformationExtractor>(); 198 199 for (final ListModeOptions opt : options) 200 extractors.add(opt.getExtractor()); 201 202 return extractors; 203 } 204}