001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.util.list; 031 032import java.io.DataInput; 033import java.io.File; 034import java.io.FileInputStream; 035import java.io.IOException; 036import java.io.RandomAccessFile; 037import java.nio.charset.Charset; 038import java.util.AbstractList; 039import java.util.ArrayList; 040import java.util.Arrays; 041import java.util.Collection; 042import java.util.Iterator; 043import java.util.List; 044import java.util.Scanner; 045 046import org.openimaj.data.RandomData; 047import org.openimaj.io.Readable; 048 049/** 050 * A (currently) read-only list that iterates over a file. Both ascii and binary 051 * files are supported. Binary files are required to have fixed length records. 052 * 053 * @author Jonathon Hare 054 * 055 * @param <T> 056 * the type of readable item held in array 057 */ 058public abstract class AbstractFileBackedList<T extends Readable> extends AbstractList<T> 059 implements 060 RandomisableList<T>, 061 Cloneable 062{ 063 protected final int size; 064 protected final Class<T> clz; 065 066 protected final boolean isBinary; 067 protected final int headerLength; 068 protected final int recordLength; 069 070 protected final File file; 071 072 private int ascii_offset = 0; 073 protected String charset; 074 075 protected AbstractFileBackedList(int size, boolean isBinary, int headerLength, int recordLength, File file, 076 Class<T> clz) 077 { 078 this.size = size; 079 this.isBinary = isBinary; 080 this.headerLength = headerLength; 081 this.recordLength = recordLength; 082 this.file = file; 083 this.clz = clz; 084 this.charset = Charset.defaultCharset().name(); 085 } 086 087 protected AbstractFileBackedList(int size, boolean isBinary, int headerLength, int recordLength, File file, 088 Class<T> clz, String charset) 089 { 090 this.size = size; 091 this.isBinary = isBinary; 092 this.headerLength = headerLength; 093 this.recordLength = recordLength; 094 this.file = file; 095 this.clz = clz; 096 this.charset = charset; 097 } 098 099 /** 100 * Override this if your instances can't be constructed with a no-args 101 * constructor 102 * 103 * @return 104 */ 105 protected T newElementInstance() { 106 try { 107 return clz.newInstance(); 108 } catch (final Exception e) { 109 throw new RuntimeException(e); 110 } 111 } 112 113 protected T readRecord(DataInput input) throws IOException { 114 final T element = newElementInstance(); 115 element.readBinary(input); 116 return element; 117 } 118 119 protected T readRecordASCII(Scanner br) throws IOException { 120 final T element = newElementInstance(); 121 element.readASCII(br); 122 return element; 123 } 124 125 @Override 126 public int size() { 127 return size; 128 } 129 130 @Override 131 public boolean isEmpty() { 132 return size == 0; 133 } 134 135 @Override 136 public boolean contains(Object o) { 137 for (final T k : this) { 138 if (k.equals(o)) 139 return true; 140 } 141 142 return false; 143 } 144 145 class FLBinaryIterator implements Iterator<T> { 146 protected int count = 0; 147 protected RandomAccessFile raf; 148 149 FLBinaryIterator() { 150 try { 151 this.raf = new RandomAccessFile(file, "r"); 152 raf.seek(headerLength); 153 } catch (final IOException e) { 154 close(); 155 throw new RuntimeException(e); 156 } 157 } 158 159 @Override 160 public boolean hasNext() { 161 if (count < size()) 162 return true; 163 close(); 164 return false; 165 } 166 167 protected void close() { 168 if (raf != null) { 169 try { 170 raf.close(); 171 raf = null; 172 } catch (final IOException e) { 173 } 174 } 175 } 176 177 @Override 178 public T next() { 179 try { 180 if (raf == null) 181 return null; 182 183 final T k = readRecord(raf); 184 count++; 185 return k; 186 } catch (final IOException e) { 187 throw new RuntimeException(e); 188 } 189 } 190 191 @Override 192 public void remove() { 193 throw new UnsupportedOperationException("Modifying a FileKeypointList isn't supported"); 194 } 195 196 @Override 197 public void finalize() { 198 close(); 199 } 200 } 201 202 class FLAsciiIterator implements Iterator<T> { 203 protected int count = 0; 204 protected Scanner br; 205 206 FLAsciiIterator() { 207 reset(); 208 for (int i = 0; i < ascii_offset; i++) 209 next(); 210 count = 0; 211 } 212 213 protected void reset() { 214 try { 215 close(); 216 final FileInputStream fis = new FileInputStream(file); 217 br = new Scanner(fis, charset); 218 for (int i = 0; i < headerLength; i++) 219 br.nextLine(); 220 } catch (final IOException e) { 221 close(); 222 throw new RuntimeException(e); 223 } 224 } 225 226 @Override 227 public boolean hasNext() { 228 if (count < size()) 229 return true; 230 close(); 231 return false; 232 } 233 234 protected void close() { 235 if (br != null) { 236 br.close(); 237 br = null; 238 } 239 } 240 241 @Override 242 public T next() { 243 try { 244 if (br == null) 245 return null; 246 247 final T k = readRecordASCII(br); 248 count++; 249 return k; 250 } catch (final IOException e) { 251 throw new RuntimeException(e); 252 } 253 } 254 255 @Override 256 public void remove() { 257 throw new UnsupportedOperationException("Modifying a FileList isn't supported"); 258 } 259 260 @Override 261 public void finalize() { 262 close(); 263 } 264 } 265 266 @Override 267 public Iterator<T> iterator() { 268 if (isBinary) 269 return new FLBinaryIterator(); 270 return new FLAsciiIterator(); 271 } 272 273 @SuppressWarnings("unchecked") 274 @Override 275 public <E> E[] toArray(E[] a) { 276 if (a.length < size) 277 return (E[]) Arrays.copyOf(toArray(), size, a.getClass()); 278 279 int i = 0; 280 for (final T k : this) 281 a[i++] = (E) k; 282 283 if (a.length > size) 284 a[size] = null; 285 286 return a; 287 } 288 289 @Override 290 public boolean containsAll(Collection<?> c) { 291 final List<T> seen = new ArrayList<T>(); 292 for (final T k : this) { 293 if (c.contains(k)) 294 seen.add(k); 295 } 296 return seen.size() == c.size(); 297 } 298 299 @Override 300 public T get(int index) { 301 if (index > size) 302 throw new IllegalArgumentException("Index out of bounds"); 303 304 if (!isBinary) { 305 int count = 0; 306 for (final T k : this) { 307 if (count++ == index) 308 return k; 309 } 310 } else { 311 final long offset = (index * recordLength) + headerLength; 312 RandomAccessFile raf = null; 313 try { 314 raf = new RandomAccessFile(file, "r"); 315 raf.seek(offset); 316 return readRecord(raf); 317 } catch (final IOException e) { 318 throw new RuntimeException(e); 319 } finally { 320 if (raf != null) 321 try { 322 raf.close(); 323 } catch (final IOException e) { 324 } 325 } 326 } 327 return null; 328 } 329 330 @Override 331 public T set(int index, T element) { 332 throw new UnsupportedOperationException("Modifying a FileList isn't supported"); 333 } 334 335 @Override 336 public void add(int index, T element) { 337 throw new UnsupportedOperationException("Modifying a FileList isn't supported"); 338 } 339 340 @Override 341 public int indexOf(Object o) { 342 int count = 0; 343 for (final T k : this) { 344 if (o.equals(k)) 345 return count; 346 count++; 347 } 348 return -1; 349 } 350 351 @Override 352 public int lastIndexOf(Object o) { 353 int count = 0; 354 int found = -1; 355 for (final T k : this) { 356 if (o.equals(k)) 357 found = count; 358 count++; 359 } 360 return found; 361 } 362 363 protected abstract AbstractFileBackedList<T> newInstance(int newSize, boolean isBinary, int newHeaderLength, 364 int recordLength, File file); 365 366 @Override 367 public RandomisableList<T> subList(int fromIndex, int toIndex) { 368 if (fromIndex < 0 || fromIndex > size || toIndex < 0 || toIndex > size) 369 throw new IllegalArgumentException("bad offsets"); 370 371 if (!isBinary) { 372 final AbstractFileBackedList<T> fkl = newInstance(toIndex - fromIndex, isBinary, headerLength, recordLength, 373 file); 374 fkl.ascii_offset = ascii_offset + fromIndex; 375 return fkl; 376 } else { 377 final int newHeaderLength = headerLength + (fromIndex * recordLength); 378 final int newSize = toIndex - fromIndex; 379 380 final AbstractFileBackedList<T> fkl = newInstance(newSize, isBinary, newHeaderLength, recordLength, file); 381 return fkl; 382 } 383 } 384 385 class FLRandomSubList extends AbstractFileBackedList<T> { 386 protected final int[] indices; 387 388 FLRandomSubList(boolean isBinary, int headerLength, int recordLength, File file, int[] indices, Class<T> clz) { 389 super(indices.length, isBinary, headerLength, recordLength, file, clz); 390 this.indices = indices; 391 } 392 393 @Override 394 public int size() { 395 return indices.length; 396 } 397 398 @Override 399 public T get(int index) { 400 return super.get(indices[index]); 401 } 402 403 @Override 404 public RandomisableList<T> subList(int fromIndex, int toIndex) { 405 if (fromIndex < 0 || fromIndex > size() || toIndex < 0 || toIndex > size()) 406 throw new IllegalArgumentException("bad offsets"); 407 408 final int[] newIndices = new int[toIndex - fromIndex]; 409 410 return new FLRandomSubList(isBinary, headerLength, recordLength, file, newIndices, clz); 411 } 412 413 class FLRandomBinaryIterator extends FLBinaryIterator { 414 @Override 415 public T next() { 416 try { 417 if (count >= indices.length) 418 return null; 419 420 final int idx = indices[count]; 421 raf.seek(headerLength + (idx * recordLength)); 422 423 final T k = readRecord(raf); 424 count++; 425 return k; 426 } catch (final IOException e) { 427 throw new RuntimeException(e); 428 } 429 } 430 } 431 432 class FLRandomAsciiIterator extends FLAsciiIterator { 433 @Override 434 public T next() { 435 try { 436 if (count >= indices.length) 437 return null; 438 439 int offset = 0; 440 if (count > 0) 441 offset = indices[count] - indices[count - 1]; 442 443 if (offset < 0) { 444 reset(); 445 offset = indices[count]; 446 } 447 448 for (int i = 0; i < offset - 1; i++) { 449 readRecordASCII(br); 450 } 451 452 final T k = readRecordASCII(br); 453 count++; 454 return k; 455 } catch (final IOException e) { 456 throw new RuntimeException(e); 457 } 458 } 459 } 460 461 @Override 462 public Iterator<T> iterator() { 463 if (isBinary) 464 return new FLRandomBinaryIterator(); 465 return new FLRandomAsciiIterator(); 466 } 467 468 @Override 469 public RandomisableList<T> randomSubList(int nelem) { 470 if (nelem > size()) 471 throw new IllegalArgumentException("number of requested elements is greater than the list size"); 472 473 final int[] newindices = RandomData.getUniqueRandomInts(nelem, 0, size()); 474 475 for (int i = 0; i < newindices.length; i++) 476 newindices[i] = indices[newindices[i]]; 477 478 return new FLRandomSubList(isBinary, headerLength, recordLength, file, newindices, clz); 479 } 480 481 @Override 482 public T readRecord(DataInput input) throws IOException { 483 return AbstractFileBackedList.this.readRecord(input); 484 } 485 486 @Override 487 public T readRecordASCII(Scanner br) throws IOException { 488 return AbstractFileBackedList.this.readRecordASCII(br); 489 } 490 491 @Override 492 protected AbstractFileBackedList<T> newInstance(int newSize, boolean isBinary, int newHeaderLength, 493 int recordLength, File file) 494 { 495 return AbstractFileBackedList.this.newInstance(newSize, isBinary, newHeaderLength, recordLength, file); 496 } 497 } 498 499 @Override 500 public RandomisableList<T> randomSubList(int nelem) { 501 if (nelem > size()) 502 throw new IllegalArgumentException("number of requested elements is greater than the list size"); 503 504 final int[] indices = RandomData.getUniqueRandomInts(nelem, 0, size()); 505 506 return new FLRandomSubList(isBinary, headerLength, recordLength, file, indices, clz); 507 } 508 509 @Override 510 public Object[] toArray() { 511 final Object[] objs = new Object[size()]; 512 int i = 0; 513 for (final T o : this) { 514 objs[i++] = o; 515 } 516 return objs; 517 } 518}