001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.data.dataset; 031 032import java.io.BufferedInputStream; 033import java.io.IOException; 034import java.util.Iterator; 035 036import org.apache.commons.vfs2.FileContent; 037import org.apache.commons.vfs2.FileObject; 038import org.apache.commons.vfs2.FileSelectInfo; 039import org.apache.commons.vfs2.FileSelector; 040import org.apache.commons.vfs2.FileSystemException; 041import org.apache.commons.vfs2.FileSystemManager; 042import org.apache.commons.vfs2.FileType; 043import org.apache.commons.vfs2.VFS; 044import org.openimaj.data.identity.Identifiable; 045import org.openimaj.io.IOUtils; 046import org.openimaj.io.InputStreamObjectReader; 047import org.openimaj.io.ObjectReader; 048import org.openimaj.util.array.ArrayIterator; 049 050/** 051 * A {@link ListDataset} backed by a directory of items (either locally or 052 * remotely), or items stored in a compressed archive. 053 * <p> 054 * As an example, this class can be used to easily create a {@link ListDataset} 055 * from a directory of images: 056 * 057 * <pre> 058 * ListDataset<FImage> dataset = new VFSListDataset<FImage>("/path/to/directory/of/images", 059 * ImageUtilities.FIMAGE_READER); 060 * </pre> 061 * 062 * a zip file of images: 063 * 064 * <pre> 065 * ListDataset<FImage> dataset = new VFSListDataset<FImage>( 066 * "zip:file:/path/to/images.zip", ImageUtilities.FIMAGE_READER); 067 * </pre> 068 * 069 * or even a remote zip of images hosted via http: 070 * 071 * <pre> 072 * ListDataset<FImage> dataset = new VFSListDataset<FImage>( 073 * "zip:http://localhost/˜jsh2/thumbnails.zip", ImageUtilities.FIMAGE_READER); 074 * </pre> 075 * 076 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 077 * 078 * @param <INSTANCE> 079 * The type of instance in the dataset 080 */ 081public class VFSListDataset<INSTANCE> extends ReadableListDataset<INSTANCE, FileObject> implements Identifiable { 082 /** 083 * An adaptor that lets {@link InputStreamObjectReader}s be used as a 084 * {@link ObjectReader} with a {@link FileObject} source type. 085 * 086 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 087 * 088 * @param <INSTANCE> 089 * The type of instance that the {@link InputStreamObjectReader} 090 * produces 091 */ 092 public static class FileObjectISReader<INSTANCE> implements ObjectReader<INSTANCE, FileObject> { 093 private InputStreamObjectReader<INSTANCE> streamReader; 094 095 /** 096 * Construct with the given {@link InputStreamObjectReader} 097 * 098 * @param reader 099 * the {@link InputStreamObjectReader} 100 */ 101 public FileObjectISReader(InputStreamObjectReader<INSTANCE> reader) { 102 this.streamReader = reader; 103 } 104 105 @Override 106 public INSTANCE read(FileObject source) throws IOException { 107 FileContent content = null; 108 try { 109 content = source.getContent(); 110 return streamReader.read(content.getInputStream()); 111 } finally { 112 if (content != null) 113 content.close(); 114 } 115 } 116 117 @Override 118 public boolean canRead(FileObject source, String name) { 119 BufferedInputStream stream = null; 120 try { 121 stream = new BufferedInputStream(source.getContent().getInputStream()); 122 123 return IOUtils.canRead(streamReader, stream, source.getName().getBaseName()); 124 } catch (final IOException e) { 125 // ignore 126 } finally { 127 if (stream != null) { 128 try { 129 stream.close(); 130 } catch (final IOException e) { 131 // ignore 132 } 133 } 134 } 135 136 return false; 137 } 138 139 } 140 141 private FileObject[] files; 142 private FileObject base; 143 144 /** 145 * Construct a list dataset from any virtual file system source (local 146 * directory, remote zip file, etc). 147 * 148 * @see "http://commons.apache.org/proper/commons-vfs/filesystems.html" 149 * @param path 150 * the file system path or uri. See the Apache Commons VFS2 151 * documentation for all the details. 152 * @param reader 153 * the {@link InputStreamObjectReader} that reads the data from 154 * the VFS 155 * @throws FileSystemException 156 * if an error occurs accessing the VFS 157 */ 158 public VFSListDataset(final String path, final InputStreamObjectReader<INSTANCE> reader) throws FileSystemException { 159 this(path, new FileObjectISReader<INSTANCE>(reader)); 160 } 161 162 /** 163 * Construct a list dataset from any virtual file system source (local 164 * directory, remote zip file, etc). 165 * 166 * @see "http://commons.apache.org/proper/commons-vfs/filesystems.html" 167 * @param path 168 * the file system path or uri. See the Apache Commons VFS2 169 * documentation for all the details. 170 * @param reader 171 * the {@link ObjectReader} that reads the data from the VFS 172 * @throws FileSystemException 173 * if an error occurs accessing the VFS 174 */ 175 public VFSListDataset(final String path, final ObjectReader<INSTANCE, FileObject> reader) throws FileSystemException { 176 super(reader); 177 178 final FileSystemManager fsManager = VFS.getManager(); 179 base = fsManager.resolveFile(path); 180 181 files = base.findFiles(new FileSelector() { 182 183 @Override 184 public boolean traverseDescendents(FileSelectInfo fileInfo) throws Exception { 185 return true; 186 } 187 188 @Override 189 public boolean includeFile(FileSelectInfo fileInfo) throws Exception { 190 if (fileInfo.getFile().getType() == FileType.FILE) { 191 return IOUtils.canRead(reader, fileInfo.getFile(), fileInfo.getFile().getName().getBaseName()); 192 } 193 194 return false; 195 } 196 }); 197 } 198 199 /** 200 * Get the underlying file descriptors of the files in the dataset 201 * 202 * @return the array of file objects 203 */ 204 public FileObject[] getFileObjects() { 205 return files; 206 } 207 208 /** 209 * Get the underlying file descriptor for a particular instance in the 210 * dataset. 211 * 212 * @param index 213 * index of the instance 214 * 215 * @return the file object corresponding to the instance 216 */ 217 public FileObject getFileObject(int index) { 218 return files[index]; 219 } 220 221 @Override 222 public INSTANCE getInstance(int index) { 223 try { 224 return read(files[index]); 225 } catch (final IOException e) { 226 throw new RuntimeException(e); 227 } 228 } 229 230 @Override 231 public int numInstances() { 232 if (files == null) 233 return 0; 234 return files.length; 235 } 236 237 private INSTANCE read(FileObject file) throws IOException { 238 return reader.read(file); 239 } 240 241 @Override 242 public Iterator<INSTANCE> iterator() { 243 return new Iterator<INSTANCE>() { 244 ArrayIterator<FileObject> filesIterator = new ArrayIterator<FileObject>(files); 245 246 @Override 247 public boolean hasNext() { 248 return filesIterator.hasNext(); 249 } 250 251 @Override 252 public INSTANCE next() { 253 try { 254 return read(filesIterator.next()); 255 } catch (final IOException e) { 256 throw new RuntimeException(e); 257 } 258 } 259 260 @Override 261 public void remove() { 262 filesIterator.remove(); 263 } 264 }; 265 } 266 267 @Override 268 public String getID(int index) { 269 try { 270 return base.getName().getRelativeName(files[index].getName()); 271 } catch (final FileSystemException e) { 272 throw new RuntimeException(e); 273 } 274 } 275 276 @Override 277 public String toString() { 278 return String.format("%s(%d instances)", this.getClass().getName(), this.files.length); 279 } 280 281 @Override 282 public String getID() { 283 return base.getName().getBaseName(); 284 } 285}