001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.util.list;
031
032import java.io.DataInput;
033import java.io.File;
034import java.io.FileInputStream;
035import java.io.IOException;
036import java.io.RandomAccessFile;
037import java.nio.charset.Charset;
038import java.util.AbstractList;
039import java.util.ArrayList;
040import java.util.Arrays;
041import java.util.Collection;
042import java.util.Iterator;
043import java.util.List;
044import java.util.Scanner;
045
046import org.openimaj.data.RandomData;
047import org.openimaj.io.Readable;
048
049/**
050 * A (currently) read-only list that iterates over a file. Both ascii and binary
051 * files are supported. Binary files are required to have fixed length records.
052 * 
053 * @author Jonathon Hare
054 * 
055 * @param <T>
056 *            the type of readable item held in array
057 */
058public abstract class AbstractFileBackedList<T extends Readable> extends AbstractList<T>
059                implements
060                        RandomisableList<T>,
061                        Cloneable
062{
063        protected final int size;
064        protected final Class<T> clz;
065
066        protected final boolean isBinary;
067        protected final int headerLength;
068        protected final int recordLength;
069
070        protected final File file;
071
072        private int ascii_offset = 0;
073        protected String charset;
074
075        protected AbstractFileBackedList(int size, boolean isBinary, int headerLength, int recordLength, File file,
076                        Class<T> clz)
077        {
078                this.size = size;
079                this.isBinary = isBinary;
080                this.headerLength = headerLength;
081                this.recordLength = recordLength;
082                this.file = file;
083                this.clz = clz;
084                this.charset = Charset.defaultCharset().name();
085        }
086
087        protected AbstractFileBackedList(int size, boolean isBinary, int headerLength, int recordLength, File file,
088                        Class<T> clz, String charset)
089        {
090                this.size = size;
091                this.isBinary = isBinary;
092                this.headerLength = headerLength;
093                this.recordLength = recordLength;
094                this.file = file;
095                this.clz = clz;
096                this.charset = charset;
097        }
098
099        /**
100         * Override this if your instances can't be constructed with a no-args
101         * constructor
102         * 
103         * @return
104         */
105        protected T newElementInstance() {
106                try {
107                        return clz.newInstance();
108                } catch (final Exception e) {
109                        throw new RuntimeException(e);
110                }
111        }
112
113        protected T readRecord(DataInput input) throws IOException {
114                final T element = newElementInstance();
115                element.readBinary(input);
116                return element;
117        }
118
119        protected T readRecordASCII(Scanner br) throws IOException {
120                final T element = newElementInstance();
121                element.readASCII(br);
122                return element;
123        }
124
125        @Override
126        public int size() {
127                return size;
128        }
129
130        @Override
131        public boolean isEmpty() {
132                return size == 0;
133        }
134
135        @Override
136        public boolean contains(Object o) {
137                for (final T k : this) {
138                        if (k.equals(o))
139                                return true;
140                }
141
142                return false;
143        }
144
145        class FLBinaryIterator implements Iterator<T> {
146                protected int count = 0;
147                protected RandomAccessFile raf;
148
149                FLBinaryIterator() {
150                        try {
151                                this.raf = new RandomAccessFile(file, "r");
152                                raf.seek(headerLength);
153                        } catch (final IOException e) {
154                                close();
155                                throw new RuntimeException(e);
156                        }
157                }
158
159                @Override
160                public boolean hasNext() {
161                        if (count < size())
162                                return true;
163                        close();
164                        return false;
165                }
166
167                protected void close() {
168                        if (raf != null) {
169                                try {
170                                        raf.close();
171                                        raf = null;
172                                } catch (final IOException e) {
173                                }
174                        }
175                }
176
177                @Override
178                public T next() {
179                        try {
180                                if (raf == null)
181                                        return null;
182
183                                final T k = readRecord(raf);
184                                count++;
185                                return k;
186                        } catch (final IOException e) {
187                                throw new RuntimeException(e);
188                        }
189                }
190
191                @Override
192                public void remove() {
193                        throw new UnsupportedOperationException("Modifying a FileKeypointList isn't supported");
194                }
195
196                @Override
197                public void finalize() {
198                        close();
199                }
200        }
201
202        class FLAsciiIterator implements Iterator<T> {
203                protected int count = 0;
204                protected Scanner br;
205
206                FLAsciiIterator() {
207                        reset();
208                        for (int i = 0; i < ascii_offset; i++)
209                                next();
210                        count = 0;
211                }
212
213                protected void reset() {
214                        try {
215                                close();
216                                final FileInputStream fis = new FileInputStream(file);
217                                br = new Scanner(fis, charset);
218                                for (int i = 0; i < headerLength; i++)
219                                        br.nextLine();
220                        } catch (final IOException e) {
221                                close();
222                                throw new RuntimeException(e);
223                        }
224                }
225
226                @Override
227                public boolean hasNext() {
228                        if (count < size())
229                                return true;
230                        close();
231                        return false;
232                }
233
234                protected void close() {
235                        if (br != null) {
236                                br.close();
237                                br = null;
238                        }
239                }
240
241                @Override
242                public T next() {
243                        try {
244                                if (br == null)
245                                        return null;
246
247                                final T k = readRecordASCII(br);
248                                count++;
249                                return k;
250                        } catch (final IOException e) {
251                                throw new RuntimeException(e);
252                        }
253                }
254
255                @Override
256                public void remove() {
257                        throw new UnsupportedOperationException("Modifying a FileList isn't supported");
258                }
259
260                @Override
261                public void finalize() {
262                        close();
263                }
264        }
265
266        @Override
267        public Iterator<T> iterator() {
268                if (isBinary)
269                        return new FLBinaryIterator();
270                return new FLAsciiIterator();
271        }
272
273        @SuppressWarnings("unchecked")
274        @Override
275        public <E> E[] toArray(E[] a) {
276                if (a.length < size)
277                        return (E[]) Arrays.copyOf(toArray(), size, a.getClass());
278
279                int i = 0;
280                for (final T k : this)
281                        a[i++] = (E) k;
282
283                if (a.length > size)
284                        a[size] = null;
285
286                return a;
287        }
288
289        @Override
290        public boolean containsAll(Collection<?> c) {
291                final List<T> seen = new ArrayList<T>();
292                for (final T k : this) {
293                        if (c.contains(k))
294                                seen.add(k);
295                }
296                return seen.size() == c.size();
297        }
298
299        @Override
300        public T get(int index) {
301                if (index > size)
302                        throw new IllegalArgumentException("Index out of bounds");
303
304                if (!isBinary) {
305                        int count = 0;
306                        for (final T k : this) {
307                                if (count++ == index)
308                                        return k;
309                        }
310                } else {
311                        final long offset = (index * recordLength) + headerLength;
312                        RandomAccessFile raf = null;
313                        try {
314                                raf = new RandomAccessFile(file, "r");
315                                raf.seek(offset);
316                                return readRecord(raf);
317                        } catch (final IOException e) {
318                                throw new RuntimeException(e);
319                        } finally {
320                                if (raf != null)
321                                        try {
322                                                raf.close();
323                                        } catch (final IOException e) {
324                                        }
325                        }
326                }
327                return null;
328        }
329
330        @Override
331        public T set(int index, T element) {
332                throw new UnsupportedOperationException("Modifying a FileList isn't supported");
333        }
334
335        @Override
336        public void add(int index, T element) {
337                throw new UnsupportedOperationException("Modifying a FileList isn't supported");
338        }
339
340        @Override
341        public int indexOf(Object o) {
342                int count = 0;
343                for (final T k : this) {
344                        if (o.equals(k))
345                                return count;
346                        count++;
347                }
348                return -1;
349        }
350
351        @Override
352        public int lastIndexOf(Object o) {
353                int count = 0;
354                int found = -1;
355                for (final T k : this) {
356                        if (o.equals(k))
357                                found = count;
358                        count++;
359                }
360                return found;
361        }
362
363        protected abstract AbstractFileBackedList<T> newInstance(int newSize, boolean isBinary, int newHeaderLength,
364                        int recordLength, File file);
365
366        @Override
367        public RandomisableList<T> subList(int fromIndex, int toIndex) {
368                if (fromIndex < 0 || fromIndex > size || toIndex < 0 || toIndex > size)
369                        throw new IllegalArgumentException("bad offsets");
370
371                if (!isBinary) {
372                        final AbstractFileBackedList<T> fkl = newInstance(toIndex - fromIndex, isBinary, headerLength, recordLength,
373                                        file);
374                        fkl.ascii_offset = ascii_offset + fromIndex;
375                        return fkl;
376                } else {
377                        final int newHeaderLength = headerLength + (fromIndex * recordLength);
378                        final int newSize = toIndex - fromIndex;
379
380                        final AbstractFileBackedList<T> fkl = newInstance(newSize, isBinary, newHeaderLength, recordLength, file);
381                        return fkl;
382                }
383        }
384
385        class FLRandomSubList extends AbstractFileBackedList<T> {
386                protected final int[] indices;
387
388                FLRandomSubList(boolean isBinary, int headerLength, int recordLength, File file, int[] indices, Class<T> clz) {
389                        super(indices.length, isBinary, headerLength, recordLength, file, clz);
390                        this.indices = indices;
391                }
392
393                @Override
394                public int size() {
395                        return indices.length;
396                }
397
398                @Override
399                public T get(int index) {
400                        return super.get(indices[index]);
401                }
402
403                @Override
404                public RandomisableList<T> subList(int fromIndex, int toIndex) {
405                        if (fromIndex < 0 || fromIndex > size() || toIndex < 0 || toIndex > size())
406                                throw new IllegalArgumentException("bad offsets");
407
408                        final int[] newIndices = new int[toIndex - fromIndex];
409
410                        return new FLRandomSubList(isBinary, headerLength, recordLength, file, newIndices, clz);
411                }
412
413                class FLRandomBinaryIterator extends FLBinaryIterator {
414                        @Override
415                        public T next() {
416                                try {
417                                        if (count >= indices.length)
418                                                return null;
419
420                                        final int idx = indices[count];
421                                        raf.seek(headerLength + (idx * recordLength));
422
423                                        final T k = readRecord(raf);
424                                        count++;
425                                        return k;
426                                } catch (final IOException e) {
427                                        throw new RuntimeException(e);
428                                }
429                        }
430                }
431
432                class FLRandomAsciiIterator extends FLAsciiIterator {
433                        @Override
434                        public T next() {
435                                try {
436                                        if (count >= indices.length)
437                                                return null;
438
439                                        int offset = 0;
440                                        if (count > 0)
441                                                offset = indices[count] - indices[count - 1];
442
443                                        if (offset < 0) {
444                                                reset();
445                                                offset = indices[count];
446                                        }
447
448                                        for (int i = 0; i < offset - 1; i++) {
449                                                readRecordASCII(br);
450                                        }
451
452                                        final T k = readRecordASCII(br);
453                                        count++;
454                                        return k;
455                                } catch (final IOException e) {
456                                        throw new RuntimeException(e);
457                                }
458                        }
459                }
460
461                @Override
462                public Iterator<T> iterator() {
463                        if (isBinary)
464                                return new FLRandomBinaryIterator();
465                        return new FLRandomAsciiIterator();
466                }
467
468                @Override
469                public RandomisableList<T> randomSubList(int nelem) {
470                        if (nelem > size())
471                                throw new IllegalArgumentException("number of requested elements is greater than the list size");
472
473                        final int[] newindices = RandomData.getUniqueRandomInts(nelem, 0, size());
474
475                        for (int i = 0; i < newindices.length; i++)
476                                newindices[i] = indices[newindices[i]];
477
478                        return new FLRandomSubList(isBinary, headerLength, recordLength, file, newindices, clz);
479                }
480
481                @Override
482                public T readRecord(DataInput input) throws IOException {
483                        return AbstractFileBackedList.this.readRecord(input);
484                }
485
486                @Override
487                public T readRecordASCII(Scanner br) throws IOException {
488                        return AbstractFileBackedList.this.readRecordASCII(br);
489                }
490
491                @Override
492                protected AbstractFileBackedList<T> newInstance(int newSize, boolean isBinary, int newHeaderLength,
493                                int recordLength, File file)
494                {
495                        return AbstractFileBackedList.this.newInstance(newSize, isBinary, newHeaderLength, recordLength, file);
496                }
497        }
498
499        @Override
500        public RandomisableList<T> randomSubList(int nelem) {
501                if (nelem > size())
502                        throw new IllegalArgumentException("number of requested elements is greater than the list size");
503
504                final int[] indices = RandomData.getUniqueRandomInts(nelem, 0, size());
505
506                return new FLRandomSubList(isBinary, headerLength, recordLength, file, indices, clz);
507        }
508
509        @Override
510        public Object[] toArray() {
511                final Object[] objs = new Object[size()];
512                int i = 0;
513                for (final T o : this) {
514                        objs[i++] = o;
515                }
516                return objs;
517        }
518}