001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.util.iterator; 031 032import java.io.BufferedReader; 033import java.io.File; 034import java.io.FileInputStream; 035import java.io.FileReader; 036import java.io.IOException; 037import java.io.InputStreamReader; 038import java.net.URL; 039import java.util.Iterator; 040import java.util.zip.GZIPInputStream; 041 042/** 043 * An {@link Iterable} that can provide access to lines of a text file 044 * referenced by a {@link File} or {@link URL}. It is safe to re-use this 045 * {@link Iterable} instance; a new {@link BufferedReader} will be created when 046 * {@link #iterator()} is called. Any {@link IOException}s are wrapped as 047 * {@link RuntimeException}s. 048 * 049 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 050 * 051 */ 052public class TextLineIterable implements Iterable<String> { 053 /** 054 * Interface describing things that can provide input for a 055 * {@link TextLineIterable} 056 * 057 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 058 * 059 */ 060 public static interface Provider { 061 /** 062 * Open a stream to the data 063 * 064 * @return the stream to the data 065 * @throws IOException 066 */ 067 BufferedReader open() throws IOException; 068 } 069 070 /** 071 * A {@link Provider} for gzipped text files 072 * 073 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 074 * 075 */ 076 public static class GZIPFileProvider implements Provider { 077 File f; 078 079 /** 080 * Construct with the given file 081 * 082 * @param f 083 * the file 084 */ 085 public GZIPFileProvider(File f) { 086 this.f = f; 087 } 088 089 @Override 090 public BufferedReader open() throws IOException { 091 return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f)))); 092 } 093 } 094 095 private Provider source; 096 097 /** 098 * Construct with the given provider 099 * 100 * @param source 101 * the provider 102 */ 103 public TextLineIterable(Provider source) { 104 this.source = source; 105 } 106 107 /** 108 * Construct with the given file 109 * 110 * @param f 111 * the file 112 */ 113 public TextLineIterable(final File f) { 114 source = new Provider() { 115 @Override 116 public BufferedReader open() throws IOException { 117 return new BufferedReader(new FileReader(f)); 118 } 119 }; 120 } 121 122 /** 123 * Construct with the given file and charset 124 * 125 * @param f 126 * the file 127 * @param charset 128 * the character set 129 */ 130 public TextLineIterable(final File f, final String charset) { 131 source = new Provider() { 132 @Override 133 public BufferedReader open() throws IOException { 134 return new BufferedReader(new InputStreamReader(new FileInputStream(f), charset)); 135 } 136 }; 137 } 138 139 /** 140 * Construct with the given url and charset 141 * 142 * @param f 143 * the url 144 * @param charset 145 * the character set 146 */ 147 public TextLineIterable(final URL f, final String charset) { 148 source = new Provider() { 149 @Override 150 public BufferedReader open() throws IOException { 151 return new BufferedReader(new InputStreamReader(f.openStream(), charset)); 152 } 153 }; 154 } 155 156 /** 157 * Construct with the given url 158 * 159 * @param f 160 * the url 161 */ 162 public TextLineIterable(final URL f) { 163 source = new Provider() { 164 @Override 165 public BufferedReader open() throws IOException { 166 return new BufferedReader(new InputStreamReader(f.openStream())); 167 } 168 }; 169 } 170 171 @Override 172 public Iterator<String> iterator() { 173 return new Iterator<String>() { 174 BufferedReader br = open(); 175 String nextLine = readLine(); 176 177 @Override 178 public boolean hasNext() { 179 return nextLine != null; 180 } 181 182 @Override 183 public String next() { 184 final String result = nextLine; 185 186 if (nextLine != null) { 187 nextLine = readLine(); 188 if (nextLine == null) 189 closeQuietly(); 190 } 191 192 return result; 193 } 194 195 @Override 196 public void remove() { 197 throw new UnsupportedOperationException("not supported"); 198 } 199 200 private String readLine() { 201 try { 202 return br.readLine(); 203 } catch (final IOException e) { 204 throw new RuntimeException(e); 205 } 206 } 207 208 private BufferedReader open() { 209 try { 210 return source.open(); 211 } catch (final IOException e) { 212 throw new RuntimeException(e); 213 } 214 } 215 216 private void closeQuietly() { 217 try { 218 br.close(); 219 } catch (final IOException e) { 220 // ignore 221 } 222 } 223 }; 224 } 225}