001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.tools.imagecollection.collection.webpage; 031 032import java.io.IOException; 033import java.net.MalformedURLException; 034import java.net.URL; 035import java.text.ParseException; 036import java.util.ArrayList; 037import java.util.HashSet; 038import java.util.Iterator; 039import java.util.List; 040import java.util.Map; 041import java.util.Set; 042 043import org.jsoup.Jsoup; 044import org.jsoup.nodes.Document; 045import org.openimaj.image.MBFImage; 046import org.openimaj.tools.imagecollection.collection.ImageCollection; 047import org.openimaj.tools.imagecollection.collection.ImageCollectionEntry; 048import org.openimaj.tools.imagecollection.collection.ImageCollectionEntrySelection; 049import org.openimaj.tools.imagecollection.collection.ImageCollectionSetupException; 050import org.openimaj.tools.imagecollection.collection.config.ImageCollectionConfig; 051import org.openimaj.util.pair.IndependentPair; 052 053public abstract class AbstractWebpageImageCollection implements ImageCollection<MBFImage>{ 054 055 private ImageCollectionEntrySelection<MBFImage> selection = null; 056 private Set<IndependentPair<URL, Map<String, String>>> imageList; 057 058 @Override 059 public Iterator<ImageCollectionEntry<MBFImage>> iterator() { 060 return new URLImageIterator(imageList,selection); 061 } 062 063 @Override 064 public void setup(ImageCollectionConfig config) throws ImageCollectionSetupException { 065 066 String url = null; 067 068 try { 069 url = config.read("webpage.url"); 070 } catch (ParseException e) { 071 throw new ImageCollectionSetupException("Could not deal with image source url, configuration error"); 072 } 073 try { 074 this.imageList = prepareURLs(new URL(url)); 075 } catch (MalformedURLException e) { 076 throw new ImageCollectionSetupException("Could not deal with image source url, invalid URL"); 077 } 078 } 079 080 public abstract Set<IndependentPair<URL, Map<String, String>>> prepareURLs(URL url) throws ImageCollectionSetupException; 081 082 083 @Override 084 public int useable(ImageCollectionConfig config) { 085 String url; 086 try { 087 url = config.read("webpage.url"); 088 } catch (ParseException e) { 089 return -1; 090 } 091 if(url!=null) return 0; 092 return -1; 093 } 094 095 @Override 096 public List<ImageCollectionEntry<MBFImage>> getAll() { 097 List<ImageCollectionEntry<MBFImage>> entries = new ArrayList<ImageCollectionEntry<MBFImage>>(); 098 for (ImageCollectionEntry<MBFImage> imageCollectionEntry : this) { 099 entries.add(imageCollectionEntry); 100 } 101 return entries; 102 } 103 104 @Override 105 public int countImages() { 106 return this.imageList.size(); 107 } 108 109 @Override 110 public void setEntrySelection(ImageCollectionEntrySelection<MBFImage> selection) { 111 this.selection = selection; 112 113 } 114 115 public static class Generic extends AbstractWebpageImageCollection{ 116 @Override 117 public Set<IndependentPair<URL, Map<String, String>>> prepareURLs(URL url) throws ImageCollectionSetupException { 118 Document doc = null; 119 try { 120 doc = Jsoup.parse(url, 1000); 121 } catch (IOException e) { 122 throw new ImageCollectionSetupException("Could not deal with image source url, problem parsing HTML"); 123 } 124 Set<IndependentPair<URL, Map<String, String>>> imageList = 125 new HashSet<IndependentPair<URL, Map<String, String>>>(); 126 imageList.addAll(WebpageUtils.allURLs(doc,"img","src")); 127 imageList.addAll(WebpageUtils.allURLs(doc,"a[href$=.png]","href")); 128 return imageList; 129 } 130 131 @Override 132 public int useable(String rawInput) { 133 // TODO Auto-generated method stub 134 return 0; 135 } 136 137 @Override 138 public ImageCollectionConfig defaultConfig(String rawInput) { 139 // TODO Auto-generated method stub 140 return null; 141 } 142 } 143}