001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.util.list;
031
032import java.io.Closeable;
033import java.io.DataInput;
034import java.io.DataInputStream;
035import java.io.IOException;
036import java.io.InputStream;
037import java.io.InputStreamReader;
038import java.io.UnsupportedEncodingException;
039import java.util.AbstractSequentialList;
040import java.util.ArrayList;
041import java.util.Arrays;
042import java.util.Collections;
043import java.util.ListIterator;
044import java.util.Scanner;
045
046import org.openimaj.data.RandomData;
047import org.openimaj.io.Readable;
048
049/**
050 * A list of records that can be consumed from a stream. 
051 * Records can only be read in order and concurrent access is a really
052 * bad idea. Once a list is exhausted by consuming all it's elements it
053 * cannot be reused. The stream will be closed automatically when it is 
054 * exhausted.
055 * 
056 * @author Jonathon Hare
057 *
058 * @param <T> The type of object which can be read by this list
059 */
060public abstract class AbstractStreamBackedList<T extends Readable> extends AbstractSequentialList<T> implements RandomisableList<T> {
061        private Object streamWrapper;
062        
063        /**
064         * The class from which to generate an instance of items held in the list
065         */
066        protected final Class<T> clz;
067        
068        /**
069         * The size of the list
070         */
071        protected final int size;
072        
073        /**
074         * Does the stream hold binary data (as opposed to ASCII)
075         */
076        protected final boolean isBinary;
077        /**
078         * The length (in bytes) of the header which identifies the stream type
079         */
080        protected final int headerLength;
081        /**
082         * The length (in bytes) of each item held in the list
083         */
084        protected final int recordLength;
085        
086        /**
087         * Number of bytes read
088         */
089        protected int consumed = 0;
090
091        private InputStream underlyingStream;
092        
093        /**
094         * Instantiate the list and all instance variables. Also starts the stream as a DataInputStream if the stream is binary and a BufferedReader
095         * otherwise.
096         * 
097         * @param stream the stream
098         * @param size number of elements
099         * @param isBinary is the stream binary
100         * @param headerLength how long is the header
101         * @param recordLength how long is each element
102         * @param clz what class instantiates elements in the list
103         * @throws UnsupportedEncodingException 
104         */
105        protected AbstractStreamBackedList(InputStream stream, int size, boolean isBinary, int headerLength, int recordLength, Class<T> clz)  {
106                this.size = size;
107                this.isBinary = isBinary;
108                this.headerLength = headerLength;
109                this.recordLength = recordLength;
110                this.clz = clz;
111                
112                if (isBinary)
113                        this.streamWrapper = new DataInputStream(stream);
114                else
115                {
116                        Scanner s = new Scanner(new InputStreamReader(stream));
117                        for (int i = 0; i < headerLength; i++) {
118                                s.nextLine();
119                        }
120                        this.streamWrapper = s;
121                        
122                }
123                this.underlyingStream = stream;
124                
125        }
126        
127        /**
128         * Instantiate the list and all instance variables. Also starts the stream as a DataInputStream if the stream is binary and a BufferedReader
129         * otherwise.
130         * 
131         * @param stream the stream
132         * @param size number of elements
133         * @param isBinary is the stream binary
134         * @param headerLength how long is the header
135         * @param recordLength how long is each element
136         * @param charset if the stream is not binary, the charsetName which is sent to the internal InputStreamReader
137         * @param clz what class instantiates elements in the list
138         * 
139         * @throws UnsupportedEncodingException 
140         */
141        protected AbstractStreamBackedList(InputStream stream, int size,boolean isBinary, int headerLength, int recordLength,Class<T> clz, String charset) throws UnsupportedEncodingException {
142                this.size = size;
143                this.isBinary = isBinary;
144                this.headerLength = headerLength;
145                this.recordLength = recordLength;
146                this.clz = clz;
147                
148                if (isBinary)
149                        this.streamWrapper = new DataInputStream(stream);
150                else
151                {
152                        Scanner s = new Scanner(new InputStreamReader(stream,charset));
153                        for (int i = 0; i < headerLength; i++) {
154                                s.nextLine();
155                        }
156                        this.streamWrapper = s;
157                        
158                }
159                this.underlyingStream = stream;
160        }
161
162        /**
163         * Override this id your instances can't be constructed with a no-args ctr
164         * @return
165         */
166        protected T newElementInstance() {
167                try {
168                        return clz.newInstance();
169                } catch (Exception e) {
170                        throw new RuntimeException(e);
171                }
172        }
173        
174        protected T readRecord(DataInput input) throws IOException {
175                T element = newElementInstance();
176                element.readBinary(input);
177                return element;
178        }
179        
180        protected T readRecordASCII(Scanner br) throws IOException {
181                T element = newElementInstance();
182                element.readASCII(br);
183                return element;
184        }
185        
186        abstract class SLIterator implements ListIterator<T> {
187                @Override
188                public void add(T e) {
189                        throw new UnsupportedOperationException();
190                }
191
192                @Override
193                public boolean hasNext() {
194                        if(streamWrapper == null) return false;
195                        boolean readMore = true;
196                        readMore &= size == -1;
197                        if(streamWrapper instanceof Scanner){
198                                readMore &= ((Scanner)streamWrapper).hasNext();
199                        }
200                        readMore = readMore || consumed < size;
201                        if (readMore) return true;
202                        close();
203                        return false;
204                }
205
206                @Override
207                public boolean hasPrevious() {
208                        return false;
209                }
210
211                @Override
212                public int nextIndex() {
213                        return Math.max(consumed+1, size());
214                }
215
216                @Override
217                public T previous() {
218                        return null;
219                }
220
221                @Override
222                public int previousIndex() {
223                        return -1;
224                }
225
226                @Override
227                public void remove() {
228                        throw new UnsupportedOperationException();
229                }
230
231                @Override
232                public void set(T e) {
233                        throw new UnsupportedOperationException();
234                }
235        }
236        
237        class SLBinaryIterator extends SLIterator {
238                @Override
239                public T next() {
240                        try {
241                                if (hasNext()) {
242                                        consumed++;
243                                        return readRecord((DataInputStream)streamWrapper);
244                                }
245                                return null;
246                        } catch (IOException e) {
247                                close();
248                                throw new RuntimeException(e);
249                        }
250                }
251        }
252        
253        class SLAsciiIterator extends SLIterator {
254                @Override
255                public T next() {
256                        try {
257                                if (hasNext()) {
258                                        consumed++;
259                                        return readRecordASCII((Scanner)streamWrapper);
260                                }
261                                return null;
262                        } catch (IOException e) {
263                                close();
264                                throw new RuntimeException(e);
265                        }
266                }
267        }
268        
269        @Override
270        public ListIterator<T> listIterator(int index) {
271                if (isBinary) return new SLBinaryIterator();
272                return new SLAsciiIterator();
273        }
274
275        void close() {
276                if (streamWrapper != null) {
277                        try { ((Closeable) underlyingStream).close(); } catch (IOException e) {}
278                        streamWrapper = null;
279                }
280        }
281        
282        @Override
283        public int size() {
284                return size;
285        }
286        
287        /**
288         * Get the number of records consumed so far
289         * @return number of consumed records
290         */
291        public int consumed() {
292                return consumed;
293        }
294        
295        /**
296         * Get the number of records remaining in the stream
297         * @return number of remaining records
298         */
299        public int remaining() {
300                return size-consumed;
301        }
302        
303        class MemoryRandomisableList extends ArrayList<T> implements RandomisableList<T> {
304                private static final long serialVersionUID = 1L;
305                @Override
306                public RandomisableList<T> randomSubList(int nelem) {
307                        if (nelem > remaining()) 
308                                throw new IllegalArgumentException("not enough elements in list");
309                        
310                        int [] rnd = RandomData.getUniqueRandomInts(nelem, 0, size());
311                        MemoryRandomisableList newList = new MemoryRandomisableList();
312                        for (int i : rnd) {
313                                newList.add(get(i));
314                        }
315                        
316                        return newList;
317                }
318        }
319        
320        /** 
321         * This method creates a random sublist in ram from elements consumed
322         * from the target list. 
323         * 
324         * @see org.openimaj.util.list.RandomisableList#randomSubList(int)
325         */
326        @Override
327        public RandomisableList<T> randomSubList(int nelem) {
328                if (nelem > remaining()) 
329                        throw new IllegalArgumentException("not enough records remaining in list");
330                
331                MemoryRandomisableList newList = new MemoryRandomisableList();
332                int [] rnd = RandomData.getUniqueRandomInts(nelem, 0, remaining());
333                Arrays.sort(rnd);
334                
335                for (int i : rnd) {
336                        newList.add(get(i));
337                }
338                
339                Collections.shuffle(newList);
340                
341                return newList;
342        }
343}