001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.tools.imagecollection.collection.webpage;
031
032import java.io.IOException;
033import java.net.MalformedURLException;
034import java.net.URL;
035import java.text.ParseException;
036import java.util.ArrayList;
037import java.util.HashSet;
038import java.util.Iterator;
039import java.util.List;
040import java.util.Map;
041import java.util.Set;
042
043import org.jsoup.Jsoup;
044import org.jsoup.nodes.Document;
045import org.openimaj.image.MBFImage;
046import org.openimaj.tools.imagecollection.collection.ImageCollection;
047import org.openimaj.tools.imagecollection.collection.ImageCollectionEntry;
048import org.openimaj.tools.imagecollection.collection.ImageCollectionEntrySelection;
049import org.openimaj.tools.imagecollection.collection.ImageCollectionSetupException;
050import org.openimaj.tools.imagecollection.collection.config.ImageCollectionConfig;
051import org.openimaj.util.pair.IndependentPair;
052
053public abstract class AbstractWebpageImageCollection implements ImageCollection<MBFImage>{
054
055        private ImageCollectionEntrySelection<MBFImage> selection = null;
056        private Set<IndependentPair<URL, Map<String, String>>> imageList;
057
058        @Override
059        public Iterator<ImageCollectionEntry<MBFImage>> iterator() {
060                return new URLImageIterator(imageList,selection);
061        }
062        
063        @Override
064        public void setup(ImageCollectionConfig config) throws ImageCollectionSetupException {
065                
066                String url = null;
067                
068                try {
069                        url = config.read("webpage.url");
070                } catch (ParseException e) {
071                        throw new ImageCollectionSetupException("Could not deal with image source url, configuration error");
072                }
073                try {
074                        this.imageList = prepareURLs(new URL(url));
075                } catch (MalformedURLException e) {
076                        throw new ImageCollectionSetupException("Could not deal with image source url, invalid URL");
077                }
078        }
079
080        public abstract Set<IndependentPair<URL, Map<String, String>>> prepareURLs(URL url) throws ImageCollectionSetupException;
081        
082
083        @Override
084        public int useable(ImageCollectionConfig config) {
085                String url;
086                try {
087                        url = config.read("webpage.url");
088                } catch (ParseException e) {
089                        return -1;
090                }
091                if(url!=null) return 0;
092                return -1;
093        }
094
095        @Override
096        public List<ImageCollectionEntry<MBFImage>> getAll() {
097                List<ImageCollectionEntry<MBFImage>> entries = new ArrayList<ImageCollectionEntry<MBFImage>>();
098                for (ImageCollectionEntry<MBFImage> imageCollectionEntry : this) {
099                        entries.add(imageCollectionEntry);
100                }
101                return entries;
102        }
103
104        @Override
105        public int countImages() {
106                return this.imageList.size();
107        }
108
109        @Override
110        public void setEntrySelection(ImageCollectionEntrySelection<MBFImage> selection) {
111                this.selection  = selection;
112                
113        }
114        
115        public static class Generic extends AbstractWebpageImageCollection{
116                @Override
117                public Set<IndependentPair<URL, Map<String, String>>> prepareURLs(URL url) throws ImageCollectionSetupException {
118                        Document doc = null;
119                        try {
120                                doc = Jsoup.parse(url, 1000);
121                        } catch (IOException e) {
122                                throw new ImageCollectionSetupException("Could not deal with image source url, problem parsing HTML");
123                        }
124                        Set<IndependentPair<URL, Map<String, String>>> imageList = 
125                                new HashSet<IndependentPair<URL, Map<String, String>>>();
126                        imageList.addAll(WebpageUtils.allURLs(doc,"img","src"));
127                        imageList.addAll(WebpageUtils.allURLs(doc,"a[href$=.png]","href"));
128                        return imageList;
129                }
130
131                @Override
132                public int useable(String rawInput) {
133                        // TODO Auto-generated method stub
134                        return 0;
135                }
136
137                @Override
138                public ImageCollectionConfig defaultConfig(String rawInput) {
139                        // TODO Auto-generated method stub
140                        return null;
141                }
142        }
143}