001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.dataset;
031
032import java.io.IOException;
033import java.io.InputStream;
034import java.net.MalformedURLException;
035import java.net.URI;
036import java.net.URISyntaxException;
037import java.net.URL;
038import java.util.ArrayList;
039import java.util.List;
040import java.util.prefs.BackingStoreException;
041
042import org.apache.http.HttpEntity;
043import org.apache.http.HttpResponse;
044import org.apache.http.HttpStatus;
045import org.apache.http.client.HttpClient;
046import org.apache.http.client.methods.HttpGet;
047import org.apache.http.client.utils.URIBuilder;
048import org.apache.http.impl.client.HttpClients;
049import org.apache.http.util.EntityUtils;
050import org.json.simple.JSONArray;
051import org.json.simple.JSONObject;
052import org.json.simple.parser.JSONParser;
053import org.openimaj.data.dataset.ReadableListDataset;
054import org.openimaj.data.identity.Identifiable;
055import org.openimaj.image.DisplayUtilities;
056import org.openimaj.image.FImage;
057import org.openimaj.image.Image;
058import org.openimaj.image.ImageUtilities;
059import org.openimaj.io.HttpUtils;
060import org.openimaj.io.InputStreamObjectReader;
061import org.openimaj.util.api.auth.DefaultTokenFactory;
062import org.openimaj.util.api.auth.common.BingAPIToken;
063
064/**
065 * Image datasets dynamically created from the Bing search API.
066 * 
067 * <h5> WARNING </h5>
068 * Some of the images inside this dataset may be set to {@code null}if they could not be loaded.
069 *
070 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
071 *
072 * @param <IMAGE>
073 *            The type of {@link Image} instance held by the dataset.
074 */
075public class BingImageDataset<IMAGE extends Image<?, IMAGE>> extends ReadableListDataset<IMAGE, InputStream>
076                implements
077                Identifiable
078{
079        public static class ImageDataSourceQuery {
080                public static enum SafeSearch {
081                        Off, Moderate, Strict;
082                }
083
084                public static enum Aspect {
085                        Square, Wide, Tall, All;
086                }
087
088                public static enum Color {
089                        /**
090                         * Return color images
091                         */
092                        ColorOnly,
093                        /**
094                         * Return black and white images
095                         */
096                        Monochrome,
097                        Black,
098                        Blue,
099                        Brown,
100                        Gray,
101                        Green,
102                        Orange,
103                        Pink,
104                        Purple,
105                        Red,
106                        Teal,
107                        White,
108                        Yellow
109                }
110
111                public static enum Freshness {
112                        /**
113                         * Return images discovered within the last 24 hours
114                         */
115                        Day,
116                        /**
117                         * Return images discovered within the last 7 days
118                         */
119                        Week,
120                        /**
121                         * Return images discovered within the last 30 days
122                         */
123                        Month
124                }
125
126                /**
127                 * Filter images by content
128                 */
129                public static enum ImageContent {
130                        /**
131                         * Return images that show only a person's face
132                         */
133                        Face,
134                        /**
135                         * Return images that show only a person's head and shoulders
136                         */
137                        Portrait
138                }
139
140                /**
141                 * Filter images by image type.
142                 */
143                public static enum ImageType {
144                        /**
145                         * Return only animated GIFs
146                         */
147                        AnimatedGif,
148                        /**
149                         * Return only clip art images
150                         */
151                        Clipart,
152                        /**
153                         * Return only line drawings
154                         */
155                        Line,
156                        /**
157                         * Return only photographs (excluding line drawings, animated Gifs,
158                         * and clip art)
159                         */
160                        Photo,
161                        /**
162                         * Return only images that contain items where Bing knows of a
163                         * merchant that is selling the items.
164                         */
165                        Shopping,
166                }
167
168                public static enum License {
169                        /**
170                         * Return images where the creator has waived their exclusive
171                         * rights, to the fullest extent allowed by law.
172                         */
173                        Public,
174                        /**
175                         * Return images that may be shared with others. Changing or editing
176                         * the image might not be allowed. Also, modifying, sharing, and
177                         * using the image for commercial purposes might not be allowed.
178                         * Typically, this option returns the most images.
179                         */
180                        Share,
181                        /**
182                         * Return images that may be shared with others for personal or
183                         * commercial purposes. Changing or editing the image might not be
184                         * allowed.
185                         */
186                        ShareCommercially,
187                        /**
188                         * Return images that may be modified, shared, and used. Changing or
189                         * editing the image might not be allowed. Modifying, sharing, and
190                         * using the image for commercial purposes might not be allowed.
191                         */
192                        Modify,
193                        /**
194                         * Return images that may be modified, shared, and used for personal
195                         * or commercial purposes. Typically, this option returns the fewest
196                         * images.
197                         */
198                        ModifyCommercially,
199                        /**
200                         * Do not filter by license type. Specifying this value is the same
201                         * as not specifying the license parameter.
202                         */
203                        All
204                }
205
206                public static enum Size {
207                        /**
208                         * Return images that are less than 200x200 pixels
209                         */
210                        Small,
211                        /**
212                         * Return images that are greater than or equal to 200x200 pixels
213                         * but less than 500x500 pixels
214                         */
215                        Medium,
216                        /**
217                         * Return images that are 500x500 pixels or larger
218                         */
219                        Large,
220                        /**
221                         * Return wallpaper images.
222                         */
223                        Wallpaper,
224                        /**
225                         * Do not filter by size. Specifying this value is the same as not
226                         * specifying the size parameter.
227                         */
228                        All
229                }
230
231                SafeSearch safeSearch;
232                Aspect aspect;
233                Color color;
234                Freshness freshness;
235                int height;
236                ImageContent imageContent;
237                ImageType imageType;
238                License license;
239                Size size;
240                int width;
241                int offset;
242                int count;
243                String query;
244                private String accountKey;
245
246                /**
247                 * @return the safeSearch
248                 */
249                public SafeSearch getSafeSearch() {
250                        return safeSearch;
251                }
252
253                /**
254                 * @param safeSearch
255                 *            the safeSearch to set
256                 */
257                public void setSafeSearch(SafeSearch safeSearch) {
258                        this.safeSearch = safeSearch;
259                }
260
261                /**
262                 * @return the aspect
263                 */
264                public Aspect getAspect() {
265                        return aspect;
266                }
267
268                /**
269                 * @param aspect
270                 *            the aspect to set
271                 */
272                public void setAspect(Aspect aspect) {
273                        this.aspect = aspect;
274                }
275
276                /**
277                 * @return the color
278                 */
279                public Color getColor() {
280                        return color;
281                }
282
283                /**
284                 * @param color
285                 *            the color to set
286                 */
287                public void setColor(Color color) {
288                        this.color = color;
289                }
290
291                /**
292                 * @return the freshness
293                 */
294                public Freshness getFreshness() {
295                        return freshness;
296                }
297
298                /**
299                 * @param freshness
300                 *            the freshness to set
301                 */
302                public void setFreshness(Freshness freshness) {
303                        this.freshness = freshness;
304                }
305
306                /**
307                 * @return the height
308                 */
309                public int getHeight() {
310                        return height;
311                }
312
313                /**
314                 * @param height
315                 *            the height to set
316                 */
317                public void setHeight(int height) {
318                        this.height = height;
319                }
320
321                /**
322                 * @return the imageContent
323                 */
324                public ImageContent getImageContent() {
325                        return imageContent;
326                }
327
328                /**
329                 * @param imageContent
330                 *            the imageContent to set
331                 */
332                public void setImageContent(ImageContent imageContent) {
333                        this.imageContent = imageContent;
334                }
335
336                /**
337                 * @return the imageType
338                 */
339                public ImageType getImageType() {
340                        return imageType;
341                }
342
343                /**
344                 * @param imageType
345                 *            the imageType to set
346                 */
347                public void setImageType(ImageType imageType) {
348                        this.imageType = imageType;
349                }
350
351                /**
352                 * @return the license
353                 */
354                public License getLicense() {
355                        return license;
356                }
357
358                /**
359                 * @param license
360                 *            the license to set
361                 */
362                public void setLicense(License license) {
363                        this.license = license;
364                }
365
366                /**
367                 * @return the size
368                 */
369                public Size getSize() {
370                        return size;
371                }
372
373                /**
374                 * @param size
375                 *            the size to set
376                 */
377                public void setSize(Size size) {
378                        this.size = size;
379                }
380
381                /**
382                 * @return the width
383                 */
384                public int getWidth() {
385                        return width;
386                }
387
388                /**
389                 * @param width
390                 *            the width to set
391                 */
392                public void setWidth(int width) {
393                        this.width = width;
394                }
395
396                /**
397                 * @return the offset
398                 */
399                public int getOffset() {
400                        return offset;
401                }
402
403                /**
404                 * @param offset
405                 *            the offset to set
406                 */
407                public void setOffset(int offset) {
408                        this.offset = offset;
409                }
410
411                /**
412                 * @return the count
413                 */
414                public int getCount() {
415                        return count;
416                }
417
418                /**
419                 * @param count
420                 *            the count to set
421                 */
422                public void setCount(int count) {
423                        this.count = count;
424                }
425
426                /**
427                 * @return the query
428                 */
429                public String getQuery() {
430                        return query;
431                }
432
433                /**
434                 * @param query
435                 *            the query to set
436                 */
437                public void setQuery(String query) {
438                        this.query = query;
439                }
440
441                public void setSubscriptionKey(String accountKey) {
442                        this.accountKey = accountKey;
443                }
444
445                public URI buildURI() throws URISyntaxException {
446                        final URIBuilder builder = new URIBuilder("https://api.cognitive.microsoft.com/bing/v7.0/images/search");
447
448                        builder.setParameter("q", query);
449                        builder.setParameter("count", count + "");
450                        builder.setParameter("offset", offset + "");
451
452                        return builder.build();
453                }
454
455        }
456
457        public static class ImageDataSourceResponse {
458                String contentUrl;
459
460                public ImageDataSourceResponse(JSONObject jro) {
461                        contentUrl = (String) jro.get("contentUrl");
462                }
463
464                public String getContentUrl() {
465                        return contentUrl;
466                }
467        }
468
469        List<ImageDataSourceResponse> images;
470        ImageDataSourceQuery query;
471
472        protected BingImageDataset(InputStreamObjectReader<IMAGE> reader, List<ImageDataSourceResponse> results,
473                        ImageDataSourceQuery query)
474        {
475                super(reader);
476                this.images = results;
477                this.query = query;
478        }
479
480        @Override
481        public IMAGE getInstance(int index) {
482                return read(getImage(index));
483        }
484
485        /**
486         * Loads the image in {@code next} and converts it to the type {@code <IMAGE>}
487         * @param next the image source to load the image from
488         * @return the loaded and converted image if loading the image worked,
489         *         {@code null} otherwise
490         */
491        private IMAGE read(ImageDataSourceResponse next) {
492                if (next == null)
493                        return null;
494
495                final String imageURL = next.getContentUrl();
496                
497                InputStream stream = null;
498                try {
499                        stream = HttpUtils.readURL(new URL(imageURL));
500
501                        return reader.read(stream);
502                } catch (final MalformedURLException e) {
503                        //if the URL is malformed, something went wrong with programming
504                        throw new RuntimeException(e);
505                } catch (final IOException e) {
506                        if (e.getCause() instanceof org.apache.sanselan.ImageReadException) {
507                                // image urls that redirect to html pages will have this error (eg tinypic.com)
508                                System.out.println("The following URL didn't redirect to an image: " + imageURL);
509                        } else {
510                                // there was some issue with loading data from the URL
511                                e.printStackTrace();
512                        }
513                        return null;
514                } finally {
515                        try {
516                                if (stream != null)
517                                        stream.close();
518                        } catch (final IOException e) {
519                                // ignore
520                        }
521                }
522        }
523
524        @Override
525        public int numInstances() {
526                return images.size();
527        }
528
529        /**
530         * Get the underlying {@link ImageDataSourceResponse} objects that back the
531         * dataset.
532         *
533         * @return the underlying {@link ImageDataSourceResponse} objects
534         */
535        public List<ImageDataSourceResponse> getImages() {
536                return images;
537        }
538
539        /**
540         * Get the specific underlying {@link ImageDataSourceResponse} for the given
541         * index.
542         *
543         * @param index
544         *            the index
545         * @return the specific {@link ImageDataSourceResponse} for the given index.
546         */
547        public ImageDataSourceResponse getImage(int index) {
548                return images.get(index);
549        }
550
551        private static List<ImageDataSourceResponse> performSinglePageQuery(ImageDataSourceQuery query)
552        {
553                final HttpClient httpclient = HttpClients.createDefault();
554
555                try
556                {
557                        final URI uri = query.buildURI();
558                        final HttpGet request = new HttpGet(uri);
559                        request.setHeader("Ocp-Apim-Subscription-Key", query.accountKey);
560
561                        final HttpResponse response = httpclient.execute(request);
562                        
563                        if (response.getStatusLine().getStatusCode() == HttpStatus.SC_UNAUTHORIZED) {
564                                throw new IOException("HTTP ERROR 401: Unauthorized Recieved. "
565                                                + "You probably have the incorrect API Key");
566                        }
567                        final HttpEntity entity = response.getEntity();
568
569                        if (entity != null)
570                        {
571                                try {
572                                        final JSONParser parser = new JSONParser();
573                                        final JSONObject o = (JSONObject) parser.parse(EntityUtils.toString(entity));
574
575                                        final JSONArray jresults = ((JSONArray) o.get("value"));
576                                        final List<ImageDataSourceResponse> results = new ArrayList<>(jresults.size());
577
578                                        for (final Object jro : jresults) {
579                                                results.add(new ImageDataSourceResponse((JSONObject) jro));
580                                        }
581
582                                        return results;
583                                } catch (final Exception e) {
584                                        e.printStackTrace();
585                                }
586                        }
587                } catch (final IOException e) {
588                        e.printStackTrace();
589                } catch (final URISyntaxException e) {
590                        e.printStackTrace();
591                }
592
593                return null;
594        }
595
596        private static List<ImageDataSourceResponse> performQuery(ImageDataSourceQuery query, int number) {
597                if (number <= 0)
598                        number = 1000;
599
600                query.setOffset(0);
601                query.setCount(50);
602
603                final List<ImageDataSourceResponse> images = new ArrayList<ImageDataSourceResponse>();
604                for (int i = 0; i < 20; i++) {
605                        final List<ImageDataSourceResponse> res = performSinglePageQuery(query);
606
607                        if (res == null || res.size() == 0)
608                                break;
609
610                        images.addAll(res);
611
612                        if (images.size() >= number)
613                                break;
614
615                        query.setOffset(query.getOffset() + 50);
616                }
617
618                if (images.size() <= number)
619                        return images;
620                return images.subList(0, number);
621        }
622
623        /**
624         * Perform a search with the given query. The appid must have been set
625         * externally.
626         *
627         *
628         * @param reader
629         *            the reader with which to load the images
630         * @param query
631         *            the query
632         * @param number
633         *            the target number of results; the resultant dataset may
634         *            contain fewer images than specified.
635         * @return a new {@link BingImageDataset} created from the query.
636         */
637        public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
638                        ImageDataSourceQuery query, int number)
639        {
640                return new BingImageDataset<IMAGE>(reader, performQuery(query, number), query);
641        }
642
643        /**
644         * Perform a search with the given query. The given api token will be used
645         * to set the appid in the query object.
646         *
647         * @param reader
648         *            the reader with which to load the images
649         * @param token
650         *            the api authentication token
651         * @param query
652         *            the query
653         * @param number
654         *            the target number of results; the resultant dataset may
655         *            contain fewer images than specified.
656         * @return a new {@link BingImageDataset} created from the query.
657         */
658        public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
659                        BingAPIToken token, ImageDataSourceQuery query, int number)
660        {
661                query.setSubscriptionKey(token.accountKey);
662                return new BingImageDataset<IMAGE>(reader, performQuery(query, number), query);
663        }
664
665        /**
666         * Perform a search with the given query string.
667         *
668         * @param reader
669         *            the reader with which to load the images
670         * @param token
671         *            the api authentication token
672         * @param query
673         *            the query
674         * @param number
675         *            the target number of results; the resultant dataset may
676         *            contain fewer images than specified.
677         * @return a new {@link BingImageDataset} created from the query.
678         */
679        public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader,
680                        BingAPIToken token, String query, int number)
681        {
682                final ImageDataSourceQuery aq = new ImageDataSourceQuery();
683                aq.setSubscriptionKey(token.accountKey);
684                aq.setQuery(query);
685
686                return new BingImageDataset<IMAGE>(reader, performQuery(aq, number), aq);
687        }
688
689        @Override
690        public String getID() {
691                return query.getQuery();
692        }
693
694        public static void main(String[] args) throws BackingStoreException {
695                final BingAPIToken apiToken = DefaultTokenFactory.get(BingAPIToken.class);
696                final BingImageDataset<FImage> ds = BingImageDataset
697                                .create(ImageUtilities.FIMAGE_READER, apiToken, "foo", 10);
698
699                DisplayUtilities.display(ds.getRandomInstance());
700        }
701}