001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.annotation.evaluation.datasets; 031 032import java.io.DataInputStream; 033import java.io.File; 034import java.io.IOException; 035import java.io.InputStream; 036import java.net.URL; 037import java.util.List; 038 039import org.apache.commons.io.FileUtils; 040import org.apache.commons.io.IOUtils; 041import org.apache.commons.vfs2.FileObject; 042import org.apache.commons.vfs2.FileSystemException; 043import org.apache.commons.vfs2.FileSystemManager; 044import org.apache.commons.vfs2.VFS; 045import org.openimaj.citation.annotation.Reference; 046import org.openimaj.citation.annotation.ReferenceType; 047import org.openimaj.data.DataUtils; 048import org.openimaj.data.dataset.GroupedDataset; 049import org.openimaj.data.dataset.ListBackedDataset; 050import org.openimaj.data.dataset.ListDataset; 051import org.openimaj.data.dataset.MapBackedDataset; 052import org.openimaj.experiment.annotations.DatasetDescription; 053import org.openimaj.image.MBFImage; 054import org.openimaj.image.annotation.evaluation.datasets.cifar.BinaryReader; 055 056/** 057 * CIFAR-100 Dataset. Contains 60000 tiny images in 100 classes (600 per class). 058 * There are 500 training images/class and 100 test. Each image is 32x32 pixels. 059 * 060 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 061 * 062 */ 063@Reference( 064 type = ReferenceType.Article, 065 author = { "Krizhevsky, A.", "Hinton, G." }, 066 title = "Learning multiple layers of features from tiny images", 067 year = "2009", 068 journal = "Master's thesis, Department of Computer Science, University of Toronto", 069 publisher = "Citeseer") 070@DatasetDescription( 071 name = "CIFAR-100", 072 description = "This dataset is just like CIFAR-10, except it has 100 " 073 + "classes containing 600 images each. There are 500 training images " 074 + "and 100 testing images per class. The 100 classes in the CIFAR-100 " 075 + "are grouped into 20 superclasses. Each image comes with a \"fine\" " 076 + "label (the class to which it belongs) and a \"coarse\" label " 077 + "(the superclass to which it belongs).", 078 creator = "Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton", 079 url = "http://www.cs.toronto.edu/~kriz/cifar.html", 080 downloadUrls = { 081 "http://datasets.openimaj.org/cifar/cifar-100-binary.tar.gz", 082 }) 083public class CIFAR100Dataset extends CIFARDataset { 084 private static final String DATA_TGZ = "cifar/cifar-100-binary.tar.gz"; 085 private static final String DOWNLOAD_URL = "http://datasets.openimaj.org/cifar/cifar-100-binary.tar.gz"; 086 087 private static final String TRAINING_FILE = "train.bin"; 088 private static final String TEST_FILE = "test.bin"; 089 private static final String FINE_CLASSES_FILE = "fine_label_names.txt"; 090 private static final String COARSE_CLASSES_FILE = "coarse_label_names.txt"; 091 092 private CIFAR100Dataset() { 093 } 094 095 private static String downloadAndGetPath() throws IOException { 096 final File dataset = DataUtils.getDataLocation(DATA_TGZ); 097 098 if (!(dataset.exists())) { 099 dataset.getParentFile().mkdirs(); 100 FileUtils.copyURLToFile(new URL(DOWNLOAD_URL), dataset); 101 } 102 103 return "tgz:file:" + dataset.toString() + "!cifar-100-binary/"; 104 } 105 106 /** 107 * Load the training images using the given reader. To load the images as 108 * {@link MBFImage}s, you would do the following: <code> 109 * CIFAR100Dataset.getTrainingImages(CIFAR100Dataset.MBFIMAGE_READER); 110 * </code> 111 * 112 * @param reader 113 * the reader 114 * @param fineLabels 115 * if true, then the fine labels will be used; otherwise the 116 * coarse superclass labels will be used. 117 * @return the training image dataset 118 * @throws IOException 119 */ 120 public static <IMAGE> GroupedDataset<String, ListDataset<IMAGE>, IMAGE> getTrainingImages(BinaryReader<IMAGE> reader, 121 boolean fineLabels) 122 throws IOException 123 { 124 final MapBackedDataset<String, ListDataset<IMAGE>, IMAGE> dataset = new MapBackedDataset<String, ListDataset<IMAGE>, IMAGE>(); 125 126 final FileSystemManager fsManager = VFS.getManager(); 127 final FileObject base = fsManager.resolveFile(downloadAndGetPath()); 128 129 final List<String> classList = loadClasses(dataset, base, fineLabels); 130 131 DataInputStream is = null; 132 try { 133 is = new DataInputStream(base.resolveFile(TRAINING_FILE).getContent().getInputStream()); 134 135 loadData(is, dataset, classList, reader, 50000, fineLabels); 136 } finally { 137 IOUtils.closeQuietly(is); 138 } 139 140 return dataset; 141 } 142 143 private static <IMAGE> List<String> loadClasses(final MapBackedDataset<String, ListDataset<IMAGE>, IMAGE> dataset, 144 final FileObject base, boolean fine) throws FileSystemException, IOException 145 { 146 InputStream classStream = null; 147 List<String> classList = null; 148 try { 149 if (fine) 150 classStream = base.resolveFile(FINE_CLASSES_FILE).getContent().getInputStream(); 151 else 152 classStream = base.resolveFile(COARSE_CLASSES_FILE).getContent().getInputStream(); 153 classList = IOUtils.readLines(classStream); 154 } finally { 155 IOUtils.closeQuietly(classStream); 156 } 157 158 for (final String clz : classList) 159 dataset.put(clz, new ListBackedDataset<IMAGE>()); 160 return classList; 161 } 162 163 private static <IMAGE> void loadData(DataInputStream is, 164 MapBackedDataset<String, ListDataset<IMAGE>, IMAGE> dataset, List<String> classList, 165 BinaryReader<IMAGE> reader, int num, boolean fine) throws IOException 166 { 167 168 for (int i = 0; i < num; i++) { 169 final int coarseClz = is.read(); 170 final int fineClz = is.read(); 171 final int clz = fine ? fineClz : coarseClz; 172 173 final String clzStr = classList.get(clz); 174 final byte[] record = new byte[32 * 32 * 3]; 175 is.readFully(record); 176 177 dataset.get(clzStr).add(reader.read(record)); 178 } 179 } 180 181 /** 182 * Load the test images using the given reader. To load the images as 183 * {@link MBFImage}s, you would do the following: <code> 184 * CIFAR100Dataset.getTestImages(CIFAR100Dataset.MBFIMAGE_READER); 185 * </code> 186 * 187 * @param reader 188 * the reader 189 * @param fineLabels 190 * if true, then the fine labels will be used; otherwise the 191 * coarse superclass labels will be used. 192 * @return the test image dataset 193 * @throws IOException 194 */ 195 public static <IMAGE> GroupedDataset<String, ListDataset<IMAGE>, IMAGE> getTestImages(BinaryReader<IMAGE> reader, 196 boolean fineLabels) 197 throws IOException 198 { 199 final MapBackedDataset<String, ListDataset<IMAGE>, IMAGE> dataset = new MapBackedDataset<String, ListDataset<IMAGE>, IMAGE>(); 200 201 final FileSystemManager fsManager = VFS.getManager(); 202 final FileObject base = fsManager.resolveFile(downloadAndGetPath()); 203 204 final List<String> classList = loadClasses(dataset, base, fineLabels); 205 206 DataInputStream is = null; 207 try { 208 is = new DataInputStream(base.resolveFile(TEST_FILE).getContent().getInputStream()); 209 loadData(is, dataset, classList, reader, 10000, fineLabels); 210 } finally { 211 IOUtils.closeQuietly(is); 212 } 213 214 return dataset; 215 } 216}