001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.data.dataset; 031 032import java.util.Arrays; 033import java.util.Comparator; 034import java.util.LinkedHashMap; 035import java.util.Map; 036import java.util.Set; 037 038import org.apache.commons.vfs2.FileObject; 039import org.apache.commons.vfs2.FileSystemException; 040import org.apache.commons.vfs2.FileSystemManager; 041import org.apache.commons.vfs2.FileType; 042import org.apache.commons.vfs2.FileTypeSelector; 043import org.apache.commons.vfs2.VFS; 044import org.openimaj.data.identity.Identifiable; 045import org.openimaj.io.InputStreamObjectReader; 046import org.openimaj.io.ObjectReader; 047 048/** 049 * A {@link GroupedDataset} of {@link VFSListDataset}s backed by directories of 050 * items (either locally or remotely), or items stored in a hierarchical 051 * structure within a compressed archive. 052 * <p> 053 * This implementation only supports a basic grouped dataset with {@link String} 054 * keys created from the names of directories, and {@link VFSListDataset} values 055 * from all the readable files within each directory. 056 * <p> 057 * As an example, this class can be used to easily create a 058 * {@link GroupedDataset} from a directory containing directories of images: 059 * 060 * <pre> 061 * GroupedDataset<String, VFSListDataset<FImage>, FImage> dataset = new VFSGroupDataset<FImage>( 062 * "/path/to/directory/of/images", 063 * ImageUtilities.FIMAGE_READER); 064 * </pre> 065 * 066 * a zip file of directories of images: 067 * 068 * <pre> 069 * GroupedDataset<String, VFSListDataset<FImage>, FImage> dataset = new VFSGroupDataset<FImage>( 070 * "zip:file:/path/to/images.zip", ImageUtilities.FIMAGE_READER); 071 * </pre> 072 * 073 * or even a remote zip of directories of images hosted via http: 074 * 075 * <pre> 076 * GroupedDataset<String, VFSListDataset<FImage>, FImage> dataset = new VFSGroupDataset<FImage>( 077 * "zip:http://localhost/˜jsh2/thumbnails.zip", ImageUtilities.FIMAGE_READER); 078 * </pre> 079 * 080 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 081 * 082 * @param <INSTANCE> 083 * The type of instance in the dataset 084 */ 085public class VFSGroupDataset<INSTANCE> 086 extends 087 ReadableGroupDataset<String, VFSListDataset<INSTANCE>, INSTANCE, FileObject> 088 implements 089 Identifiable 090{ 091 private Map<String, VFSListDataset<INSTANCE>> files = new LinkedHashMap<String, VFSListDataset<INSTANCE>>(); 092 private Map<String, FileObject> directoryInfo = new LinkedHashMap<String, FileObject>(); 093 private FileObject base; 094 095 /** 096 * Construct a grouped dataset from any virtual file system source (local 097 * directory, remote zip file, etc). Only the child directories under the 098 * given path will be used to create groups; the contents of any 099 * sub-directories will be merged automatically. Only directories with 100 * readable items as children will be included in the resultant dataset. 101 * 102 * @see "http://commons.apache.org/proper/commons-vfs/filesystems.html" 103 * @param path 104 * the file system path or uri. See the Apache Commons VFS2 105 * documentation for all the details. 106 * @param reader 107 * the {@link InputStreamObjectReader} that reads the data from 108 * the VFS 109 * @throws FileSystemException 110 * if an error occurs accessing the VFS 111 */ 112 public VFSGroupDataset(final String path, final InputStreamObjectReader<INSTANCE> reader) throws FileSystemException { 113 this(path, new VFSListDataset.FileObjectISReader<INSTANCE>(reader)); 114 } 115 116 /** 117 * Construct a grouped dataset from any virtual file system source (local 118 * directory, remote zip file, etc). Only the child directories under the 119 * given path will be used to create groups; the contents of any 120 * sub-directories will be merged automatically. Only directories with 121 * readable items as children will be included in the resultant dataset. 122 * 123 * @see "http://commons.apache.org/proper/commons-vfs/filesystems.html" 124 * @param path 125 * the file system path or uri. See the Apache Commons VFS2 126 * documentation for all the details. 127 * @param reader 128 * the {@link InputStreamObjectReader} that reads the data from 129 * the VFS 130 * @throws FileSystemException 131 * if an error occurs accessing the VFS 132 */ 133 public VFSGroupDataset(final String path, final ObjectReader<INSTANCE, FileObject> reader) throws FileSystemException 134 { 135 super(reader); 136 137 final FileSystemManager fsManager = VFS.getManager(); 138 base = fsManager.resolveFile(path); 139 140 final FileObject[] folders = base.findFiles(new FileTypeSelector(FileType.FOLDER)); 141 142 Arrays.sort(folders, new Comparator<FileObject>() { 143 @Override 144 public int compare(FileObject o1, FileObject o2) { 145 return o1.getName().toString().compareToIgnoreCase(o2.getName().toString()); 146 } 147 }); 148 149 for (final FileObject folder : folders) { 150 if (folder.equals(base)) 151 continue; 152 153 directoryInfo.put(folder.getName().getBaseName(), folder); 154 final VFSListDataset<INSTANCE> list = new VFSListDataset<INSTANCE>(folder.getName().getURI(), reader); 155 156 if (list.size() > 0) 157 files.put(folder.getName().getBaseName(), list); 158 } 159 } 160 161 /** 162 * Get the underlying file descriptors of the directories that form the 163 * groups of the dataset 164 * 165 * @return the array of file objects 166 */ 167 public Map<String, FileObject> getGroupDirectories() { 168 return directoryInfo; 169 } 170 171 /** 172 * Get the underlying file descriptor for a particular group in the dataset. 173 * 174 * @param key 175 * key of the group 176 * 177 * @return the file object corresponding to the instance 178 */ 179 public FileObject getFileObject(String key) { 180 return directoryInfo.get(key); 181 } 182 183 @Override 184 public String toString() { 185 return String.format("%s(%d groups with a total of %d instances)", this.getClass().getName(), this.size(), 186 this.numInstances()); 187 } 188 189 @Override 190 public Set<Entry<String, VFSListDataset<INSTANCE>>> entrySet() { 191 return files.entrySet(); 192 } 193 194 @Override 195 public String getID() { 196 return base.getName().getBaseName(); 197 } 198}