001/** 002 * Copyright 2011 The University of Southampton, Yahoo Inc., and the 003 * individual contributors. All rights reserved. 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.openimaj.web.scraping.images; 018 019import java.io.IOException; 020import java.io.InputStreamReader; 021import java.net.MalformedURLException; 022import java.net.URL; 023import java.util.ArrayList; 024import java.util.List; 025import java.util.Map; 026import java.util.regex.Matcher; 027import java.util.regex.Pattern; 028 029import org.apache.http.HttpResponse; 030import org.apache.http.client.ClientProtocolException; 031import org.apache.http.client.methods.HttpGet; 032import org.apache.http.impl.client.DefaultHttpClient; 033import org.apache.log4j.Logger; 034 035import com.google.gson.Gson; 036 037/** 038 * An imgur client has the various functionality of the imgur.com api 039 * 040 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 041 * 042 */ 043public class ImgurClient { 044 /** 045 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 046 * 047 */ 048 public static enum ImgurType { 049 /** 050 * /a/hash 051 */ 052 ALBUM, 053 /** 054 * an image 055 */ 056 IMAGE, 057 /** 058 * a call to the raw imgur gallery 059 */ 060 GALLERY 061 } 062 063 /** 064 * The type and hash usually returned from 065 * {@link ImgurClient#imgurURLtoHash(URL)} 066 * 067 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 068 * 069 */ 070 public static class ImgurTypeHash { 071 072 protected ImgurTypeHash(ImgurType type, String hash) { 073 this.hash = hash; 074 this.type = type; 075 } 076 077 /** 078 * the {@link ImgurType} 079 */ 080 public ImgurType type; 081 /** 082 * the hash code, might be null if {@link ImgurType} is GALLERY 083 */ 084 public String hash; 085 086 @Override 087 public String toString() { 088 return String.format("Imgur [%s] %s", type, hash); 089 } 090 } 091 092 /** 093 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 094 * 095 */ 096 private static class ImgurResponse { 097 AlbumImgurResponse album; 098 ImageResponse image; 099 } 100 101 /** 102 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 103 * 104 */ 105 private static class AlbumImgurResponse { 106 List<ImageResponse> images; 107 } 108 109 /** 110 * An image response is composed of two Maps, one describing the image and 111 * another describing its links 112 * 113 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 114 * 115 */ 116 public static class ImageResponse { 117 /** 118 * Image metadata 119 */ 120 public Map<String, Object> image; 121 /** 122 * various links 123 */ 124 public Map<String, Object> links; 125 126 /** 127 * @return return links.original from the imgur API response 128 */ 129 public URL getOriginalLink() { 130 final String orig = (String) links.get("original"); 131 if (orig == null) 132 return null; 133 try { 134 return new URL(orig); 135 } catch (final MalformedURLException e) { 136 } 137 return null; 138 } 139 } 140 141 private final static Pattern hashPattern = Pattern.compile("(^[a-zA-Z0-9]+)"); 142 143 private String apiKey; 144 private DefaultHttpClient client; 145 146 private static String ENDPOINT = "http://api.imgur.com/2"; 147 private transient Gson gson = new Gson(); 148 149 @SuppressWarnings("unused") 150 private static Logger logger = Logger.getLogger(ImgurClient.class); 151 152 /** 153 * 154 */ 155 public ImgurClient() { 156 this.apiKey = System.getProperty("imgur.api_key"); 157 if (apiKey == null) { 158 // Anonymous api key for Sina Samangooei 159 apiKey = "fecf663ef507f598e8119451e17a6c29"; 160 } 161 client = new DefaultHttpClient(); 162 } 163 164 /** 165 * Depending on the {@link ImgurTypeHash} instance calls 166 * {@link #getSingleImage(String)}, {@link #getAlbumImages(String)} or 167 * {@link #getGalleryImages()} 168 * 169 * @param typehash 170 * @return a list of {@link ImageResponse} instances 171 * @throws IOException 172 * @throws ClientProtocolException 173 */ 174 public List<ImageResponse> getImages(ImgurTypeHash typehash) throws ClientProtocolException, IOException { 175 final List<ImageResponse> ret = new ArrayList<ImageResponse>(); 176 switch (typehash.type) { 177 case IMAGE: 178 ret.add(getSingleImage(typehash.hash)); 179 break; 180 case ALBUM: 181 ret.addAll(getAlbumImages(typehash.hash)); 182 break; 183 case GALLERY: 184 ret.addAll(getGalleryImages()); 185 break; 186 } 187 return ret; 188 } 189 190 /** 191 * Calls http://api.imgur.com/2/image/:HASH 192 * 193 * @param hash 194 * @return the json response 195 * @throws IOException 196 * @throws ClientProtocolException 197 */ 198 public ImageResponse getSingleImage(String hash) throws ClientProtocolException, IOException { 199 final HttpGet get = new HttpGet(String.format("%s/image/%s.json", ENDPOINT, hash)); 200 final HttpResponse response = client.execute(get); 201 final ImgurResponse resp = gson.fromJson(new InputStreamReader(response.getEntity().getContent()), 202 ImgurResponse.class); 203 return resp.image; 204 } 205 206 /** 207 * Calls http://api.imgur.com/2/album/:ID 208 * 209 * @param hash 210 * @return the json response 211 * @throws IOException 212 * @throws ClientProtocolException 213 */ 214 public List<ImageResponse> getAlbumImages(String hash) throws ClientProtocolException, IOException { 215 final HttpGet get = new HttpGet(String.format("%s/album/%s.json", ENDPOINT, hash)); 216 final HttpResponse response = client.execute(get); 217 final ImgurResponse resp = gson.fromJson(new InputStreamReader(response.getEntity().getContent()), 218 ImgurResponse.class); 219 return resp.album.images; 220 } 221 222 /** 223 * Calls http://imgur.com/gallery.json 224 * 225 * @return the json response 226 */ 227 public List<ImageResponse> getGalleryImages() { 228 return null; 229 } 230 231 /** 232 * @param url 233 * @return the imgur type and hash, or null if the URL was too tricky 234 */ 235 public static ImgurTypeHash imgurURLtoHash(URL url) { 236 if (!url.getHost().contains("imgur")) 237 return null; 238 final String path = url.getPath(); 239 final String[] split = path.split("[/]+"); 240 if (split.length == 0) 241 return null; 242 else if (split.length == 2) { 243 if (split[1].equals("gallery")) 244 return new ImgurTypeHash(ImgurType.GALLERY, null); 245 else { 246 final Matcher matcher = hashPattern.matcher(split[1]); 247 if (matcher.find()) 248 { 249 final String hash = split[1].substring(0, matcher.end()); 250 return new ImgurTypeHash(ImgurType.IMAGE, hash); 251 } 252 return null; 253 } 254 } 255 else { 256 final String hashPart = split[split.length - 1]; 257 final String typePart = split[split.length - 2]; 258 ImgurType type = ImgurType.IMAGE; 259 if (typePart.equals("a")) 260 type = ImgurType.ALBUM; 261 262 final Matcher matcher = hashPattern.matcher(hashPart); 263 matcher.find(); 264 final String hash = hashPart.substring(0, matcher.end()); 265 return new ImgurTypeHash(type, hash); 266 } 267 268 } 269}