001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.dataset; 031 032import java.io.IOException; 033import java.io.InputStream; 034import java.net.MalformedURLException; 035import java.net.URI; 036import java.net.URISyntaxException; 037import java.net.URL; 038import java.util.ArrayList; 039import java.util.List; 040import java.util.prefs.BackingStoreException; 041 042import org.apache.http.HttpEntity; 043import org.apache.http.HttpResponse; 044import org.apache.http.HttpStatus; 045import org.apache.http.client.HttpClient; 046import org.apache.http.client.methods.HttpGet; 047import org.apache.http.client.utils.URIBuilder; 048import org.apache.http.impl.client.HttpClients; 049import org.apache.http.util.EntityUtils; 050import org.json.simple.JSONArray; 051import org.json.simple.JSONObject; 052import org.json.simple.parser.JSONParser; 053import org.openimaj.data.dataset.ReadableListDataset; 054import org.openimaj.data.identity.Identifiable; 055import org.openimaj.image.DisplayUtilities; 056import org.openimaj.image.FImage; 057import org.openimaj.image.Image; 058import org.openimaj.image.ImageUtilities; 059import org.openimaj.io.HttpUtils; 060import org.openimaj.io.InputStreamObjectReader; 061import org.openimaj.util.api.auth.DefaultTokenFactory; 062import org.openimaj.util.api.auth.common.BingAPIToken; 063 064/** 065 * Image datasets dynamically created from the Bing search API. 066 * 067 * <h5> WARNING </h5> 068 * Some of the images inside this dataset may be set to {@code null}if they could not be loaded. 069 * 070 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 071 * 072 * @param <IMAGE> 073 * The type of {@link Image} instance held by the dataset. 074 */ 075public class BingImageDataset<IMAGE extends Image<?, IMAGE>> extends ReadableListDataset<IMAGE, InputStream> 076 implements 077 Identifiable 078{ 079 public static class ImageDataSourceQuery { 080 public static enum SafeSearch { 081 Off, Moderate, Strict; 082 } 083 084 public static enum Aspect { 085 Square, Wide, Tall, All; 086 } 087 088 public static enum Color { 089 /** 090 * Return color images 091 */ 092 ColorOnly, 093 /** 094 * Return black and white images 095 */ 096 Monochrome, 097 Black, 098 Blue, 099 Brown, 100 Gray, 101 Green, 102 Orange, 103 Pink, 104 Purple, 105 Red, 106 Teal, 107 White, 108 Yellow 109 } 110 111 public static enum Freshness { 112 /** 113 * Return images discovered within the last 24 hours 114 */ 115 Day, 116 /** 117 * Return images discovered within the last 7 days 118 */ 119 Week, 120 /** 121 * Return images discovered within the last 30 days 122 */ 123 Month 124 } 125 126 /** 127 * Filter images by content 128 */ 129 public static enum ImageContent { 130 /** 131 * Return images that show only a person's face 132 */ 133 Face, 134 /** 135 * Return images that show only a person's head and shoulders 136 */ 137 Portrait 138 } 139 140 /** 141 * Filter images by image type. 142 */ 143 public static enum ImageType { 144 /** 145 * Return only animated GIFs 146 */ 147 AnimatedGif, 148 /** 149 * Return only clip art images 150 */ 151 Clipart, 152 /** 153 * Return only line drawings 154 */ 155 Line, 156 /** 157 * Return only photographs (excluding line drawings, animated Gifs, 158 * and clip art) 159 */ 160 Photo, 161 /** 162 * Return only images that contain items where Bing knows of a 163 * merchant that is selling the items. 164 */ 165 Shopping, 166 } 167 168 public static enum License { 169 /** 170 * Return images where the creator has waived their exclusive 171 * rights, to the fullest extent allowed by law. 172 */ 173 Public, 174 /** 175 * Return images that may be shared with others. Changing or editing 176 * the image might not be allowed. Also, modifying, sharing, and 177 * using the image for commercial purposes might not be allowed. 178 * Typically, this option returns the most images. 179 */ 180 Share, 181 /** 182 * Return images that may be shared with others for personal or 183 * commercial purposes. Changing or editing the image might not be 184 * allowed. 185 */ 186 ShareCommercially, 187 /** 188 * Return images that may be modified, shared, and used. Changing or 189 * editing the image might not be allowed. Modifying, sharing, and 190 * using the image for commercial purposes might not be allowed. 191 */ 192 Modify, 193 /** 194 * Return images that may be modified, shared, and used for personal 195 * or commercial purposes. Typically, this option returns the fewest 196 * images. 197 */ 198 ModifyCommercially, 199 /** 200 * Do not filter by license type. Specifying this value is the same 201 * as not specifying the license parameter. 202 */ 203 All 204 } 205 206 public static enum Size { 207 /** 208 * Return images that are less than 200x200 pixels 209 */ 210 Small, 211 /** 212 * Return images that are greater than or equal to 200x200 pixels 213 * but less than 500x500 pixels 214 */ 215 Medium, 216 /** 217 * Return images that are 500x500 pixels or larger 218 */ 219 Large, 220 /** 221 * Return wallpaper images. 222 */ 223 Wallpaper, 224 /** 225 * Do not filter by size. Specifying this value is the same as not 226 * specifying the size parameter. 227 */ 228 All 229 } 230 231 SafeSearch safeSearch; 232 Aspect aspect; 233 Color color; 234 Freshness freshness; 235 int height; 236 ImageContent imageContent; 237 ImageType imageType; 238 License license; 239 Size size; 240 int width; 241 int offset; 242 int count; 243 String query; 244 private String accountKey; 245 246 /** 247 * @return the safeSearch 248 */ 249 public SafeSearch getSafeSearch() { 250 return safeSearch; 251 } 252 253 /** 254 * @param safeSearch 255 * the safeSearch to set 256 */ 257 public void setSafeSearch(SafeSearch safeSearch) { 258 this.safeSearch = safeSearch; 259 } 260 261 /** 262 * @return the aspect 263 */ 264 public Aspect getAspect() { 265 return aspect; 266 } 267 268 /** 269 * @param aspect 270 * the aspect to set 271 */ 272 public void setAspect(Aspect aspect) { 273 this.aspect = aspect; 274 } 275 276 /** 277 * @return the color 278 */ 279 public Color getColor() { 280 return color; 281 } 282 283 /** 284 * @param color 285 * the color to set 286 */ 287 public void setColor(Color color) { 288 this.color = color; 289 } 290 291 /** 292 * @return the freshness 293 */ 294 public Freshness getFreshness() { 295 return freshness; 296 } 297 298 /** 299 * @param freshness 300 * the freshness to set 301 */ 302 public void setFreshness(Freshness freshness) { 303 this.freshness = freshness; 304 } 305 306 /** 307 * @return the height 308 */ 309 public int getHeight() { 310 return height; 311 } 312 313 /** 314 * @param height 315 * the height to set 316 */ 317 public void setHeight(int height) { 318 this.height = height; 319 } 320 321 /** 322 * @return the imageContent 323 */ 324 public ImageContent getImageContent() { 325 return imageContent; 326 } 327 328 /** 329 * @param imageContent 330 * the imageContent to set 331 */ 332 public void setImageContent(ImageContent imageContent) { 333 this.imageContent = imageContent; 334 } 335 336 /** 337 * @return the imageType 338 */ 339 public ImageType getImageType() { 340 return imageType; 341 } 342 343 /** 344 * @param imageType 345 * the imageType to set 346 */ 347 public void setImageType(ImageType imageType) { 348 this.imageType = imageType; 349 } 350 351 /** 352 * @return the license 353 */ 354 public License getLicense() { 355 return license; 356 } 357 358 /** 359 * @param license 360 * the license to set 361 */ 362 public void setLicense(License license) { 363 this.license = license; 364 } 365 366 /** 367 * @return the size 368 */ 369 public Size getSize() { 370 return size; 371 } 372 373 /** 374 * @param size 375 * the size to set 376 */ 377 public void setSize(Size size) { 378 this.size = size; 379 } 380 381 /** 382 * @return the width 383 */ 384 public int getWidth() { 385 return width; 386 } 387 388 /** 389 * @param width 390 * the width to set 391 */ 392 public void setWidth(int width) { 393 this.width = width; 394 } 395 396 /** 397 * @return the offset 398 */ 399 public int getOffset() { 400 return offset; 401 } 402 403 /** 404 * @param offset 405 * the offset to set 406 */ 407 public void setOffset(int offset) { 408 this.offset = offset; 409 } 410 411 /** 412 * @return the count 413 */ 414 public int getCount() { 415 return count; 416 } 417 418 /** 419 * @param count 420 * the count to set 421 */ 422 public void setCount(int count) { 423 this.count = count; 424 } 425 426 /** 427 * @return the query 428 */ 429 public String getQuery() { 430 return query; 431 } 432 433 /** 434 * @param query 435 * the query to set 436 */ 437 public void setQuery(String query) { 438 this.query = query; 439 } 440 441 public void setSubscriptionKey(String accountKey) { 442 this.accountKey = accountKey; 443 } 444 445 public URI buildURI() throws URISyntaxException { 446 final URIBuilder builder = new URIBuilder("https://api.cognitive.microsoft.com/bing/v7.0/images/search"); 447 448 builder.setParameter("q", query); 449 builder.setParameter("count", count + ""); 450 builder.setParameter("offset", offset + ""); 451 452 return builder.build(); 453 } 454 455 } 456 457 public static class ImageDataSourceResponse { 458 String contentUrl; 459 460 public ImageDataSourceResponse(JSONObject jro) { 461 contentUrl = (String) jro.get("contentUrl"); 462 } 463 464 public String getContentUrl() { 465 return contentUrl; 466 } 467 } 468 469 List<ImageDataSourceResponse> images; 470 ImageDataSourceQuery query; 471 472 protected BingImageDataset(InputStreamObjectReader<IMAGE> reader, List<ImageDataSourceResponse> results, 473 ImageDataSourceQuery query) 474 { 475 super(reader); 476 this.images = results; 477 this.query = query; 478 } 479 480 @Override 481 public IMAGE getInstance(int index) { 482 return read(getImage(index)); 483 } 484 485 /** 486 * Loads the image in {@code next} and converts it to the type {@code <IMAGE>} 487 * @param next the image source to load the image from 488 * @return the loaded and converted image if loading the image worked, 489 * {@code null} otherwise 490 */ 491 private IMAGE read(ImageDataSourceResponse next) { 492 if (next == null) 493 return null; 494 495 final String imageURL = next.getContentUrl(); 496 497 InputStream stream = null; 498 try { 499 stream = HttpUtils.readURL(new URL(imageURL)); 500 501 return reader.read(stream); 502 } catch (final MalformedURLException e) { 503 //if the URL is malformed, something went wrong with programming 504 throw new RuntimeException(e); 505 } catch (final IOException e) { 506 if (e.getCause() instanceof org.apache.sanselan.ImageReadException) { 507 // image urls that redirect to html pages will have this error (eg tinypic.com) 508 System.out.println("The following URL didn't redirect to an image: " + imageURL); 509 } else { 510 // there was some issue with loading data from the URL 511 e.printStackTrace(); 512 } 513 return null; 514 } finally { 515 try { 516 if (stream != null) 517 stream.close(); 518 } catch (final IOException e) { 519 // ignore 520 } 521 } 522 } 523 524 @Override 525 public int numInstances() { 526 return images.size(); 527 } 528 529 /** 530 * Get the underlying {@link ImageDataSourceResponse} objects that back the 531 * dataset. 532 * 533 * @return the underlying {@link ImageDataSourceResponse} objects 534 */ 535 public List<ImageDataSourceResponse> getImages() { 536 return images; 537 } 538 539 /** 540 * Get the specific underlying {@link ImageDataSourceResponse} for the given 541 * index. 542 * 543 * @param index 544 * the index 545 * @return the specific {@link ImageDataSourceResponse} for the given index. 546 */ 547 public ImageDataSourceResponse getImage(int index) { 548 return images.get(index); 549 } 550 551 private static List<ImageDataSourceResponse> performSinglePageQuery(ImageDataSourceQuery query) 552 { 553 final HttpClient httpclient = HttpClients.createDefault(); 554 555 try 556 { 557 final URI uri = query.buildURI(); 558 final HttpGet request = new HttpGet(uri); 559 request.setHeader("Ocp-Apim-Subscription-Key", query.accountKey); 560 561 final HttpResponse response = httpclient.execute(request); 562 563 if (response.getStatusLine().getStatusCode() == HttpStatus.SC_UNAUTHORIZED) { 564 throw new IOException("HTTP ERROR 401: Unauthorized Recieved. " 565 + "You probably have the incorrect API Key"); 566 } 567 final HttpEntity entity = response.getEntity(); 568 569 if (entity != null) 570 { 571 try { 572 final JSONParser parser = new JSONParser(); 573 final JSONObject o = (JSONObject) parser.parse(EntityUtils.toString(entity)); 574 575 final JSONArray jresults = ((JSONArray) o.get("value")); 576 final List<ImageDataSourceResponse> results = new ArrayList<>(jresults.size()); 577 578 for (final Object jro : jresults) { 579 results.add(new ImageDataSourceResponse((JSONObject) jro)); 580 } 581 582 return results; 583 } catch (final Exception e) { 584 e.printStackTrace(); 585 } 586 } 587 } catch (final IOException e) { 588 e.printStackTrace(); 589 } catch (final URISyntaxException e) { 590 e.printStackTrace(); 591 } 592 593 return null; 594 } 595 596 private static List<ImageDataSourceResponse> performQuery(ImageDataSourceQuery query, int number) { 597 if (number <= 0) 598 number = 1000; 599 600 query.setOffset(0); 601 query.setCount(50); 602 603 final List<ImageDataSourceResponse> images = new ArrayList<ImageDataSourceResponse>(); 604 for (int i = 0; i < 20; i++) { 605 final List<ImageDataSourceResponse> res = performSinglePageQuery(query); 606 607 if (res == null || res.size() == 0) 608 break; 609 610 images.addAll(res); 611 612 if (images.size() >= number) 613 break; 614 615 query.setOffset(query.getOffset() + 50); 616 } 617 618 if (images.size() <= number) 619 return images; 620 return images.subList(0, number); 621 } 622 623 /** 624 * Perform a search with the given query. The appid must have been set 625 * externally. 626 * 627 * 628 * @param reader 629 * the reader with which to load the images 630 * @param query 631 * the query 632 * @param number 633 * the target number of results; the resultant dataset may 634 * contain fewer images than specified. 635 * @return a new {@link BingImageDataset} created from the query. 636 */ 637 public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 638 ImageDataSourceQuery query, int number) 639 { 640 return new BingImageDataset<IMAGE>(reader, performQuery(query, number), query); 641 } 642 643 /** 644 * Perform a search with the given query. The given api token will be used 645 * to set the appid in the query object. 646 * 647 * @param reader 648 * the reader with which to load the images 649 * @param token 650 * the api authentication token 651 * @param query 652 * the query 653 * @param number 654 * the target number of results; the resultant dataset may 655 * contain fewer images than specified. 656 * @return a new {@link BingImageDataset} created from the query. 657 */ 658 public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 659 BingAPIToken token, ImageDataSourceQuery query, int number) 660 { 661 query.setSubscriptionKey(token.accountKey); 662 return new BingImageDataset<IMAGE>(reader, performQuery(query, number), query); 663 } 664 665 /** 666 * Perform a search with the given query string. 667 * 668 * @param reader 669 * the reader with which to load the images 670 * @param token 671 * the api authentication token 672 * @param query 673 * the query 674 * @param number 675 * the target number of results; the resultant dataset may 676 * contain fewer images than specified. 677 * @return a new {@link BingImageDataset} created from the query. 678 */ 679 public static <IMAGE extends Image<?, IMAGE>> BingImageDataset<IMAGE> create(InputStreamObjectReader<IMAGE> reader, 680 BingAPIToken token, String query, int number) 681 { 682 final ImageDataSourceQuery aq = new ImageDataSourceQuery(); 683 aq.setSubscriptionKey(token.accountKey); 684 aq.setQuery(query); 685 686 return new BingImageDataset<IMAGE>(reader, performQuery(aq, number), aq); 687 } 688 689 @Override 690 public String getID() { 691 return query.getQuery(); 692 } 693 694 public static void main(String[] args) throws BackingStoreException { 695 final BingAPIToken apiToken = DefaultTokenFactory.get(BingAPIToken.class); 696 final BingImageDataset<FImage> ds = BingImageDataset 697 .create(ImageUtilities.FIMAGE_READER, apiToken, "foo", 10); 698 699 DisplayUtilities.display(ds.getRandomInstance()); 700 } 701}