1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.tools.imagecollection.collection.webpage;
31
32 import java.io.IOException;
33 import java.net.MalformedURLException;
34 import java.net.URL;
35 import java.text.ParseException;
36 import java.util.ArrayList;
37 import java.util.HashSet;
38 import java.util.Iterator;
39 import java.util.List;
40 import java.util.Map;
41 import java.util.Set;
42
43 import org.jsoup.Jsoup;
44 import org.jsoup.nodes.Document;
45 import org.openimaj.image.MBFImage;
46 import org.openimaj.tools.imagecollection.collection.ImageCollection;
47 import org.openimaj.tools.imagecollection.collection.ImageCollectionEntry;
48 import org.openimaj.tools.imagecollection.collection.ImageCollectionEntrySelection;
49 import org.openimaj.tools.imagecollection.collection.ImageCollectionSetupException;
50 import org.openimaj.tools.imagecollection.collection.config.ImageCollectionConfig;
51 import org.openimaj.util.pair.IndependentPair;
52
53 public abstract class AbstractWebpageImageCollection implements ImageCollection<MBFImage>{
54
55 private ImageCollectionEntrySelection<MBFImage> selection = null;
56 private Set<IndependentPair<URL, Map<String, String>>> imageList;
57
58 @Override
59 public Iterator<ImageCollectionEntry<MBFImage>> iterator() {
60 return new URLImageIterator(imageList,selection);
61 }
62
63 @Override
64 public void setup(ImageCollectionConfig config) throws ImageCollectionSetupException {
65
66 String url = null;
67
68 try {
69 url = config.read("webpage.url");
70 } catch (ParseException e) {
71 throw new ImageCollectionSetupException("Could not deal with image source url, configuration error");
72 }
73 try {
74 this.imageList = prepareURLs(new URL(url));
75 } catch (MalformedURLException e) {
76 throw new ImageCollectionSetupException("Could not deal with image source url, invalid URL");
77 }
78 }
79
80 public abstract Set<IndependentPair<URL, Map<String, String>>> prepareURLs(URL url) throws ImageCollectionSetupException;
81
82
83 @Override
84 public int useable(ImageCollectionConfig config) {
85 String url;
86 try {
87 url = config.read("webpage.url");
88 } catch (ParseException e) {
89 return -1;
90 }
91 if(url!=null) return 0;
92 return -1;
93 }
94
95 @Override
96 public List<ImageCollectionEntry<MBFImage>> getAll() {
97 List<ImageCollectionEntry<MBFImage>> entries = new ArrayList<ImageCollectionEntry<MBFImage>>();
98 for (ImageCollectionEntry<MBFImage> imageCollectionEntry : this) {
99 entries.add(imageCollectionEntry);
100 }
101 return entries;
102 }
103
104 @Override
105 public int countImages() {
106 return this.imageList.size();
107 }
108
109 @Override
110 public void setEntrySelection(ImageCollectionEntrySelection<MBFImage> selection) {
111 this.selection = selection;
112
113 }
114
115 public static class Generic extends AbstractWebpageImageCollection{
116 @Override
117 public Set<IndependentPair<URL, Map<String, String>>> prepareURLs(URL url) throws ImageCollectionSetupException {
118 Document doc = null;
119 try {
120 doc = Jsoup.parse(url, 1000);
121 } catch (IOException e) {
122 throw new ImageCollectionSetupException("Could not deal with image source url, problem parsing HTML");
123 }
124 Set<IndependentPair<URL, Map<String, String>>> imageList =
125 new HashSet<IndependentPair<URL, Map<String, String>>>();
126 imageList.addAll(WebpageUtils.allURLs(doc,"img","src"));
127 imageList.addAll(WebpageUtils.allURLs(doc,"a[href$=.png]","href"));
128 return imageList;
129 }
130
131 @Override
132 public int useable(String rawInput) {
133
134 return 0;
135 }
136
137 @Override
138 public ImageCollectionConfig defaultConfig(String rawInput) {
139
140 return null;
141 }
142 }
143 }