001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.hadoop.tools.sequencefile;
031
032import java.awt.image.BufferedImage;
033import java.io.ByteArrayInputStream;
034import java.io.IOException;
035import java.util.ArrayList;
036import java.util.List;
037
038import javax.imageio.ImageIO;
039
040import org.apache.hadoop.fs.Path;
041import org.apache.hadoop.io.BytesWritable;
042import org.apache.hadoop.io.SequenceFile;
043import org.ontoware.rdf2go.model.node.impl.URIImpl;
044import org.openimaj.hadoop.sequencefile.RecordInformationExtractor;
045import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier;
046
047/**
048 * Options for controlling what is printed when listing the contents of a
049 * {@link SequenceFile} with the {@link SequenceFileTool}.
050 *
051 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
052 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
053 */
054public enum ListModeOptions {
055        /**
056         * Print the record key
057         *
058         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
059         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
060         */
061        KEY {
062                @Override
063                public RecordInformationExtractor getExtractor() {
064                        return new RecordInformationExtractor() {
065                                @Override
066                                public <K, V> String extract(K key, V value, long offset, Path seqFile) {
067                                        return key.toString();
068                                }
069                        };
070                }
071        },
072        /**
073         * Print the offset of the record in the {@link SequenceFile}
074         *
075         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
076         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
077         */
078        OFFSET {
079                @Override
080                public RecordInformationExtractor getExtractor() {
081                        return new RecordInformationExtractor() {
082                                @Override
083                                public <K, V> String extract(K key, V value, long offset, Path seqFile) {
084                                        return ((Long) offset).toString();
085                                }
086                        };
087                }
088        },
089        /**
090         * Print the path to the {@link SequenceFile} in question. This is useful if
091         * you're working with a directory of {@link SequenceFile}s
092         *
093         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
094         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
095         */
096        SEQUENCEFILE {
097                @Override
098                public RecordInformationExtractor getExtractor() {
099                        return new RecordInformationExtractor() {
100                                @Override
101                                public <K, V> String extract(K key, V value, long offset, Path seqFile) {
102                                        return seqFile.toString();
103                                }
104                        };
105                }
106        },
107        /**
108         * Print the mimetype of the value in each record
109         *
110         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
111         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
112         */
113        MIMETYPE {
114                @Override
115                public RecordInformationExtractor getExtractor() {
116                        return new RecordInformationExtractor() {
117                                @Override
118                                public <K, V> String extract(K key, V value, long offset, Path seqFile) {
119                                        if (value instanceof BytesWritable) {
120                                                MagicMimeTypeIdentifier match;
121                                                try {
122                                                        final BytesWritable bw = (BytesWritable) value;
123                                                        match = new MagicMimeTypeIdentifier();
124                                                        final String ident = match.identify(bw.getBytes(), key.toString(), new URIImpl(seqFile
125                                                                        .toUri().toString()));
126                                                        return ident;
127                                                } catch (final Exception e) {
128                                                        System.err.println("Failed!");
129                                                }
130                                        }
131                                        return null;
132                                }
133                        };
134                }
135        },
136        /**
137         * Print the size of the record value in bytes
138         *
139         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
140         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
141         */
142        SIZE {
143                @Override
144                public RecordInformationExtractor getExtractor() {
145                        return new RecordInformationExtractor() {
146                                @Override
147                                public <K, V> String extract(K key, V value, long offset, Path seqFile) {
148                                        if (value instanceof BytesWritable) {
149                                                return "" + ((BytesWritable) value).getLength();
150                                        }
151                                        return null;
152                                }
153                        };
154                }
155        },
156        /**
157         * Print the dimensions of each records value if it is a valid image.
158         *
159         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
160         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
161         */
162        IMAGE_DIMENSIONS {
163                @Override
164                public RecordInformationExtractor getExtractor() {
165                        return new RecordInformationExtractor() {
166                                @Override
167                                public <K, V> String extract(K key, V value, long offset, Path seqFile) {
168                                        if (value instanceof BytesWritable) {
169                                                try {
170                                                        final BufferedImage im = ImageIO.read(new ByteArrayInputStream(((BytesWritable) value)
171                                                                        .getBytes()));
172                                                        return String.format("%d %d", im.getWidth(), im.getHeight());
173                                                } catch (final IOException e) {
174                                                        return null;
175                                                }
176                                        }
177                                        return null;
178                                }
179                        };
180                }
181        };
182
183        /**
184         * @return a {@link RecordInformationExtractor} for extracting information
185         *         from a {@link SequenceFile} record.
186         */
187        public abstract RecordInformationExtractor getExtractor();
188
189        /**
190         * Construct a list of extractors from the given options.
191         *
192         * @param options
193         *            the options
194         * @return the extractors in the same order as the given options
195         */
196        public static List<RecordInformationExtractor> listOptionsToExtractPolicy(List<ListModeOptions> options) {
197                final List<RecordInformationExtractor> extractors = new ArrayList<RecordInformationExtractor>();
198
199                for (final ListModeOptions opt : options)
200                        extractors.add(opt.getExtractor());
201
202                return extractors;
203        }
204}