001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.util.list; 031 032import java.io.Closeable; 033import java.io.DataInput; 034import java.io.DataInputStream; 035import java.io.IOException; 036import java.io.InputStream; 037import java.io.InputStreamReader; 038import java.io.UnsupportedEncodingException; 039import java.util.AbstractSequentialList; 040import java.util.ArrayList; 041import java.util.Arrays; 042import java.util.Collections; 043import java.util.ListIterator; 044import java.util.Scanner; 045 046import org.openimaj.data.RandomData; 047import org.openimaj.io.Readable; 048 049/** 050 * A list of records that can be consumed from a stream. 051 * Records can only be read in order and concurrent access is a really 052 * bad idea. Once a list is exhausted by consuming all it's elements it 053 * cannot be reused. The stream will be closed automatically when it is 054 * exhausted. 055 * 056 * @author Jonathon Hare 057 * 058 * @param <T> The type of object which can be read by this list 059 */ 060public abstract class AbstractStreamBackedList<T extends Readable> extends AbstractSequentialList<T> implements RandomisableList<T> { 061 private Object streamWrapper; 062 063 /** 064 * The class from which to generate an instance of items held in the list 065 */ 066 protected final Class<T> clz; 067 068 /** 069 * The size of the list 070 */ 071 protected final int size; 072 073 /** 074 * Does the stream hold binary data (as opposed to ASCII) 075 */ 076 protected final boolean isBinary; 077 /** 078 * The length (in bytes) of the header which identifies the stream type 079 */ 080 protected final int headerLength; 081 /** 082 * The length (in bytes) of each item held in the list 083 */ 084 protected final int recordLength; 085 086 /** 087 * Number of bytes read 088 */ 089 protected int consumed = 0; 090 091 private InputStream underlyingStream; 092 093 /** 094 * Instantiate the list and all instance variables. Also starts the stream as a DataInputStream if the stream is binary and a BufferedReader 095 * otherwise. 096 * 097 * @param stream the stream 098 * @param size number of elements 099 * @param isBinary is the stream binary 100 * @param headerLength how long is the header 101 * @param recordLength how long is each element 102 * @param clz what class instantiates elements in the list 103 * @throws UnsupportedEncodingException 104 */ 105 protected AbstractStreamBackedList(InputStream stream, int size, boolean isBinary, int headerLength, int recordLength, Class<T> clz) { 106 this.size = size; 107 this.isBinary = isBinary; 108 this.headerLength = headerLength; 109 this.recordLength = recordLength; 110 this.clz = clz; 111 112 if (isBinary) 113 this.streamWrapper = new DataInputStream(stream); 114 else 115 { 116 Scanner s = new Scanner(new InputStreamReader(stream)); 117 for (int i = 0; i < headerLength; i++) { 118 s.nextLine(); 119 } 120 this.streamWrapper = s; 121 122 } 123 this.underlyingStream = stream; 124 125 } 126 127 /** 128 * Instantiate the list and all instance variables. Also starts the stream as a DataInputStream if the stream is binary and a BufferedReader 129 * otherwise. 130 * 131 * @param stream the stream 132 * @param size number of elements 133 * @param isBinary is the stream binary 134 * @param headerLength how long is the header 135 * @param recordLength how long is each element 136 * @param charset if the stream is not binary, the charsetName which is sent to the internal InputStreamReader 137 * @param clz what class instantiates elements in the list 138 * 139 * @throws UnsupportedEncodingException 140 */ 141 protected AbstractStreamBackedList(InputStream stream, int size,boolean isBinary, int headerLength, int recordLength,Class<T> clz, String charset) throws UnsupportedEncodingException { 142 this.size = size; 143 this.isBinary = isBinary; 144 this.headerLength = headerLength; 145 this.recordLength = recordLength; 146 this.clz = clz; 147 148 if (isBinary) 149 this.streamWrapper = new DataInputStream(stream); 150 else 151 { 152 Scanner s = new Scanner(new InputStreamReader(stream,charset)); 153 for (int i = 0; i < headerLength; i++) { 154 s.nextLine(); 155 } 156 this.streamWrapper = s; 157 158 } 159 this.underlyingStream = stream; 160 } 161 162 /** 163 * Override this id your instances can't be constructed with a no-args ctr 164 * @return 165 */ 166 protected T newElementInstance() { 167 try { 168 return clz.newInstance(); 169 } catch (Exception e) { 170 throw new RuntimeException(e); 171 } 172 } 173 174 protected T readRecord(DataInput input) throws IOException { 175 T element = newElementInstance(); 176 element.readBinary(input); 177 return element; 178 } 179 180 protected T readRecordASCII(Scanner br) throws IOException { 181 T element = newElementInstance(); 182 element.readASCII(br); 183 return element; 184 } 185 186 abstract class SLIterator implements ListIterator<T> { 187 @Override 188 public void add(T e) { 189 throw new UnsupportedOperationException(); 190 } 191 192 @Override 193 public boolean hasNext() { 194 if(streamWrapper == null) return false; 195 boolean readMore = true; 196 readMore &= size == -1; 197 if(streamWrapper instanceof Scanner){ 198 readMore &= ((Scanner)streamWrapper).hasNext(); 199 } 200 readMore = readMore || consumed < size; 201 if (readMore) return true; 202 close(); 203 return false; 204 } 205 206 @Override 207 public boolean hasPrevious() { 208 return false; 209 } 210 211 @Override 212 public int nextIndex() { 213 return Math.max(consumed+1, size()); 214 } 215 216 @Override 217 public T previous() { 218 return null; 219 } 220 221 @Override 222 public int previousIndex() { 223 return -1; 224 } 225 226 @Override 227 public void remove() { 228 throw new UnsupportedOperationException(); 229 } 230 231 @Override 232 public void set(T e) { 233 throw new UnsupportedOperationException(); 234 } 235 } 236 237 class SLBinaryIterator extends SLIterator { 238 @Override 239 public T next() { 240 try { 241 if (hasNext()) { 242 consumed++; 243 return readRecord((DataInputStream)streamWrapper); 244 } 245 return null; 246 } catch (IOException e) { 247 close(); 248 throw new RuntimeException(e); 249 } 250 } 251 } 252 253 class SLAsciiIterator extends SLIterator { 254 @Override 255 public T next() { 256 try { 257 if (hasNext()) { 258 consumed++; 259 return readRecordASCII((Scanner)streamWrapper); 260 } 261 return null; 262 } catch (IOException e) { 263 close(); 264 throw new RuntimeException(e); 265 } 266 } 267 } 268 269 @Override 270 public ListIterator<T> listIterator(int index) { 271 if (isBinary) return new SLBinaryIterator(); 272 return new SLAsciiIterator(); 273 } 274 275 void close() { 276 if (streamWrapper != null) { 277 try { ((Closeable) underlyingStream).close(); } catch (IOException e) {} 278 streamWrapper = null; 279 } 280 } 281 282 @Override 283 public int size() { 284 return size; 285 } 286 287 /** 288 * Get the number of records consumed so far 289 * @return number of consumed records 290 */ 291 public int consumed() { 292 return consumed; 293 } 294 295 /** 296 * Get the number of records remaining in the stream 297 * @return number of remaining records 298 */ 299 public int remaining() { 300 return size-consumed; 301 } 302 303 class MemoryRandomisableList extends ArrayList<T> implements RandomisableList<T> { 304 private static final long serialVersionUID = 1L; 305 @Override 306 public RandomisableList<T> randomSubList(int nelem) { 307 if (nelem > remaining()) 308 throw new IllegalArgumentException("not enough elements in list"); 309 310 int [] rnd = RandomData.getUniqueRandomInts(nelem, 0, size()); 311 MemoryRandomisableList newList = new MemoryRandomisableList(); 312 for (int i : rnd) { 313 newList.add(get(i)); 314 } 315 316 return newList; 317 } 318 } 319 320 /** 321 * This method creates a random sublist in ram from elements consumed 322 * from the target list. 323 * 324 * @see org.openimaj.util.list.RandomisableList#randomSubList(int) 325 */ 326 @Override 327 public RandomisableList<T> randomSubList(int nelem) { 328 if (nelem > remaining()) 329 throw new IllegalArgumentException("not enough records remaining in list"); 330 331 MemoryRandomisableList newList = new MemoryRandomisableList(); 332 int [] rnd = RandomData.getUniqueRandomInts(nelem, 0, remaining()); 333 Arrays.sort(rnd); 334 335 for (int i : rnd) { 336 newList.add(get(i)); 337 } 338 339 Collections.shuffle(newList); 340 341 return newList; 342 } 343}