002 * Copyright 2011 The University of Southampton, Yahoo Inc., and the
003 * individual contributors. All rights reserved.
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *    http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.openimaj.web.scraping.images;
019import java.io.IOException;
020import java.io.InputStreamReader;
021import java.net.MalformedURLException;
022import java.net.URL;
023import java.util.ArrayList;
024import java.util.List;
025import java.util.Map;
026import java.util.regex.Matcher;
027import java.util.regex.Pattern;
029import org.apache.http.HttpResponse;
030import org.apache.http.client.ClientProtocolException;
031import org.apache.http.client.methods.HttpGet;
032import org.apache.http.impl.client.DefaultHttpClient;
033import org.apache.log4j.Logger;
035import com.google.gson.Gson;
038 * An imgur client has the various functionality of the imgur.com api
039 * 
040 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
041 * 
042 */
043public class ImgurClient {
044        /**
045         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
046         * 
047         */
048        public static enum ImgurType {
049                /**
050                 * /a/hash
051                 */
052                ALBUM,
053                /**
054                 * an image
055                 */
056                IMAGE,
057                /**
058                 * a call to the raw imgur gallery
059                 */
060                GALLERY
061        }
063        /**
064         * The type and hash usually returned from
065         * {@link ImgurClient#imgurURLtoHash(URL)}
066         * 
067         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
068         * 
069         */
070        public static class ImgurTypeHash {
072                protected ImgurTypeHash(ImgurType type, String hash) {
073                        this.hash = hash;
074                        this.type = type;
075                }
077                /**
078                 * the {@link ImgurType}
079                 */
080                public ImgurType type;
081                /**
082                 * the hash code, might be null if {@link ImgurType} is GALLERY
083                 */
084                public String hash;
086                @Override
087                public String toString() {
088                        return String.format("Imgur [%s] %s", type, hash);
089                }
090        }
092        /**
093         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
094         * 
095         */
096        private static class ImgurResponse {
097                AlbumImgurResponse album;
098                ImageResponse image;
099        }
101        /**
102         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
103         * 
104         */
105        private static class AlbumImgurResponse {
106                List<ImageResponse> images;
107        }
109        /**
110         * An image response is composed of two Maps, one describing the image and
111         * another describing its links
112         * 
113         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
114         * 
115         */
116        public static class ImageResponse {
117                /**
118                 * Image metadata
119                 */
120                public Map<String, Object> image;
121                /**
122                 * various links
123                 */
124                public Map<String, Object> links;
126                /**
127                 * @return return links.original from the imgur API response
128                 */
129                public URL getOriginalLink() {
130                        final String orig = (String) links.get("original");
131                        if (orig == null)
132                                return null;
133                        try {
134                                return new URL(orig);
135                        } catch (final MalformedURLException e) {
136                        }
137                        return null;
138                }
139        }
141        private final static Pattern hashPattern = Pattern.compile("(^[a-zA-Z0-9]+)");
143        private String apiKey;
144        private DefaultHttpClient client;
146        private static String ENDPOINT = "http://api.imgur.com/2";
147        private transient Gson gson = new Gson();
149        @SuppressWarnings("unused")
150        private static Logger logger = Logger.getLogger(ImgurClient.class);
152        /**
153         * 
154         */
155        public ImgurClient() {
156                this.apiKey = System.getProperty("imgur.api_key");
157                if (apiKey == null) {
158                        // Anonymous api key for Sina Samangooei
159                        apiKey = "fecf663ef507f598e8119451e17a6c29";
160                }
161                client = new DefaultHttpClient();
162        }
164        /**
165         * Depending on the {@link ImgurTypeHash} instance calls
166         * {@link #getSingleImage(String)}, {@link #getAlbumImages(String)} or
167         * {@link #getGalleryImages()}
168         * 
169         * @param typehash
170         * @return a list of {@link ImageResponse} instances
171         * @throws IOException
172         * @throws ClientProtocolException
173         */
174        public List<ImageResponse> getImages(ImgurTypeHash typehash) throws ClientProtocolException, IOException {
175                final List<ImageResponse> ret = new ArrayList<ImageResponse>();
176                switch (typehash.type) {
177                case IMAGE:
178                        ret.add(getSingleImage(typehash.hash));
179                        break;
180                case ALBUM:
181                        ret.addAll(getAlbumImages(typehash.hash));
182                        break;
183                case GALLERY:
184                        ret.addAll(getGalleryImages());
185                        break;
186                }
187                return ret;
188        }
190        /**
191         * Calls http://api.imgur.com/2/image/:HASH
192         * 
193         * @param hash
194         * @return the json response
195         * @throws IOException
196         * @throws ClientProtocolException
197         */
198        public ImageResponse getSingleImage(String hash) throws ClientProtocolException, IOException {
199                final HttpGet get = new HttpGet(String.format("%s/image/%s.json", ENDPOINT, hash));
200                final HttpResponse response = client.execute(get);
201                final ImgurResponse resp = gson.fromJson(new InputStreamReader(response.getEntity().getContent()),
202                                ImgurResponse.class);
203                return resp.image;
204        }
206        /**
207         * Calls http://api.imgur.com/2/album/:ID
208         * 
209         * @param hash
210         * @return the json response
211         * @throws IOException
212         * @throws ClientProtocolException
213         */
214        public List<ImageResponse> getAlbumImages(String hash) throws ClientProtocolException, IOException {
215                final HttpGet get = new HttpGet(String.format("%s/album/%s.json", ENDPOINT, hash));
216                final HttpResponse response = client.execute(get);
217                final ImgurResponse resp = gson.fromJson(new InputStreamReader(response.getEntity().getContent()),
218                                ImgurResponse.class);
219                return resp.album.images;
220        }
222        /**
223         * Calls http://imgur.com/gallery.json
224         * 
225         * @return the json response
226         */
227        public List<ImageResponse> getGalleryImages() {
228                return null;
229        }
231        /**
232         * @param url
233         * @return the imgur type and hash, or null if the URL was too tricky
234         */
235        public static ImgurTypeHash imgurURLtoHash(URL url) {
236                if (!url.getHost().contains("imgur"))
237                        return null;
238                final String path = url.getPath();
239                final String[] split = path.split("[/]+");
240                if (split.length == 0)
241                        return null;
242                else if (split.length == 2) {
243                        if (split[1].equals("gallery"))
244                                return new ImgurTypeHash(ImgurType.GALLERY, null);
245                        else {
246                                final Matcher matcher = hashPattern.matcher(split[1]);
247                                if (matcher.find())
248                                {
249                                        final String hash = split[1].substring(0, matcher.end());
250                                        return new ImgurTypeHash(ImgurType.IMAGE, hash);
251                                }
252                                return null;
253                        }
254                }
255                else {
256                        final String hashPart = split[split.length - 1];
257                        final String typePart = split[split.length - 2];
258                        ImgurType type = ImgurType.IMAGE;
259                        if (typePart.equals("a"))
260                                type = ImgurType.ALBUM;
262                        final Matcher matcher = hashPattern.matcher(hashPart);
263                        matcher.find();
264                        final String hash = hashPart.substring(0, matcher.end());
265                        return new ImgurTypeHash(type, hash);
266                }
268        }