001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.data.dataset;
031
032import java.io.BufferedInputStream;
033import java.io.IOException;
034import java.util.Iterator;
035
036import org.apache.commons.vfs2.FileContent;
037import org.apache.commons.vfs2.FileObject;
038import org.apache.commons.vfs2.FileSelectInfo;
039import org.apache.commons.vfs2.FileSelector;
040import org.apache.commons.vfs2.FileSystemException;
041import org.apache.commons.vfs2.FileSystemManager;
042import org.apache.commons.vfs2.FileType;
043import org.apache.commons.vfs2.VFS;
044import org.openimaj.data.identity.Identifiable;
045import org.openimaj.io.IOUtils;
046import org.openimaj.io.InputStreamObjectReader;
047import org.openimaj.io.ObjectReader;
048import org.openimaj.util.array.ArrayIterator;
049
050/**
051 * A {@link ListDataset} backed by a directory of items (either locally or
052 * remotely), or items stored in a compressed archive.
053 * <p>
054 * As an example, this class can be used to easily create a {@link ListDataset}
055 * from a directory of images:
056 * 
057 * <pre>
058 * ListDataset&lt;FImage&gt; dataset = new VFSListDataset&lt;FImage&gt;(&quot;/path/to/directory/of/images&quot;,
059 *              ImageUtilities.FIMAGE_READER);
060 * </pre>
061 * 
062 * a zip file of images:
063 * 
064 * <pre>
065 * ListDataset&lt;FImage&gt; dataset = new VFSListDataset&lt;FImage&gt;(
066 *              &quot;zip:file:/path/to/images.zip&quot;, ImageUtilities.FIMAGE_READER);
067 * </pre>
068 * 
069 * or even a remote zip of images hosted via http:
070 * 
071 * <pre>
072 * ListDataset&lt;FImage&gt; dataset = new VFSListDataset&lt;FImage&gt;(
073 *              &quot;zip:http://localhost/&tilde;jsh2/thumbnails.zip&quot;, ImageUtilities.FIMAGE_READER);
074 * </pre>
075 * 
076 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
077 * 
078 * @param <INSTANCE>
079 *            The type of instance in the dataset
080 */
081public class VFSListDataset<INSTANCE> extends ReadableListDataset<INSTANCE, FileObject> implements Identifiable {
082        /**
083         * An adaptor that lets {@link InputStreamObjectReader}s be used as a
084         * {@link ObjectReader} with a {@link FileObject} source type.
085         * 
086         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
087         * 
088         * @param <INSTANCE>
089         *            The type of instance that the {@link InputStreamObjectReader}
090         *            produces
091         */
092        public static class FileObjectISReader<INSTANCE> implements ObjectReader<INSTANCE, FileObject> {
093                private InputStreamObjectReader<INSTANCE> streamReader;
094
095                /**
096                 * Construct with the given {@link InputStreamObjectReader}
097                 * 
098                 * @param reader
099                 *            the {@link InputStreamObjectReader}
100                 */
101                public FileObjectISReader(InputStreamObjectReader<INSTANCE> reader) {
102                        this.streamReader = reader;
103                }
104
105                @Override
106                public INSTANCE read(FileObject source) throws IOException {
107                        FileContent content = null;
108                        try {
109                                content = source.getContent();
110                                return streamReader.read(content.getInputStream());
111                        } finally {
112                                if (content != null)
113                                        content.close();
114                        }
115                }
116
117                @Override
118                public boolean canRead(FileObject source, String name) {
119                        BufferedInputStream stream = null;
120                        try {
121                                stream = new BufferedInputStream(source.getContent().getInputStream());
122
123                                return IOUtils.canRead(streamReader, stream, source.getName().getBaseName());
124                        } catch (final IOException e) {
125                                // ignore
126                        } finally {
127                                if (stream != null) {
128                                        try {
129                                                stream.close();
130                                        } catch (final IOException e) {
131                                                // ignore
132                                        }
133                                }
134                        }
135
136                        return false;
137                }
138
139        }
140
141        private FileObject[] files;
142        private FileObject base;
143
144        /**
145         * Construct a list dataset from any virtual file system source (local
146         * directory, remote zip file, etc).
147         * 
148         * @see "http://commons.apache.org/proper/commons-vfs/filesystems.html"
149         * @param path
150         *            the file system path or uri. See the Apache Commons VFS2
151         *            documentation for all the details.
152         * @param reader
153         *            the {@link InputStreamObjectReader} that reads the data from
154         *            the VFS
155         * @throws FileSystemException
156         *             if an error occurs accessing the VFS
157         */
158        public VFSListDataset(final String path, final InputStreamObjectReader<INSTANCE> reader) throws FileSystemException {
159                this(path, new FileObjectISReader<INSTANCE>(reader));
160        }
161
162        /**
163         * Construct a list dataset from any virtual file system source (local
164         * directory, remote zip file, etc).
165         * 
166         * @see "http://commons.apache.org/proper/commons-vfs/filesystems.html"
167         * @param path
168         *            the file system path or uri. See the Apache Commons VFS2
169         *            documentation for all the details.
170         * @param reader
171         *            the {@link ObjectReader} that reads the data from the VFS
172         * @throws FileSystemException
173         *             if an error occurs accessing the VFS
174         */
175        public VFSListDataset(final String path, final ObjectReader<INSTANCE, FileObject> reader) throws FileSystemException {
176                super(reader);
177
178                final FileSystemManager fsManager = VFS.getManager();
179                base = fsManager.resolveFile(path);
180
181                files = base.findFiles(new FileSelector() {
182
183                        @Override
184                        public boolean traverseDescendents(FileSelectInfo fileInfo) throws Exception {
185                                return true;
186                        }
187
188                        @Override
189                        public boolean includeFile(FileSelectInfo fileInfo) throws Exception {
190                                if (fileInfo.getFile().getType() == FileType.FILE) {
191                                        return IOUtils.canRead(reader, fileInfo.getFile(), fileInfo.getFile().getName().getBaseName());
192                                }
193
194                                return false;
195                        }
196                });
197        }
198
199        /**
200         * Get the underlying file descriptors of the files in the dataset
201         * 
202         * @return the array of file objects
203         */
204        public FileObject[] getFileObjects() {
205                return files;
206        }
207
208        /**
209         * Get the underlying file descriptor for a particular instance in the
210         * dataset.
211         * 
212         * @param index
213         *            index of the instance
214         * 
215         * @return the file object corresponding to the instance
216         */
217        public FileObject getFileObject(int index) {
218                return files[index];
219        }
220
221        @Override
222        public INSTANCE getInstance(int index) {
223                try {
224                        return read(files[index]);
225                } catch (final IOException e) {
226                        throw new RuntimeException(e);
227                }
228        }
229
230        @Override
231        public int numInstances() {
232                if (files == null)
233                        return 0;
234                return files.length;
235        }
236
237        private INSTANCE read(FileObject file) throws IOException {
238                return reader.read(file);
239        }
240
241        @Override
242        public Iterator<INSTANCE> iterator() {
243                return new Iterator<INSTANCE>() {
244                        ArrayIterator<FileObject> filesIterator = new ArrayIterator<FileObject>(files);
245
246                        @Override
247                        public boolean hasNext() {
248                                return filesIterator.hasNext();
249                        }
250
251                        @Override
252                        public INSTANCE next() {
253                                try {
254                                        return read(filesIterator.next());
255                                } catch (final IOException e) {
256                                        throw new RuntimeException(e);
257                                }
258                        }
259
260                        @Override
261                        public void remove() {
262                                filesIterator.remove();
263                        }
264                };
265        }
266
267        @Override
268        public String getID(int index) {
269                try {
270                        return base.getName().getRelativeName(files[index].getName());
271                } catch (final FileSystemException e) {
272                        throw new RuntimeException(e);
273                }
274        }
275
276        @Override
277        public String toString() {
278                return String.format("%s(%d instances)", this.getClass().getName(), this.files.length);
279        }
280
281        @Override
282        public String getID() {
283                return base.getName().getBaseName();
284        }
285}