001/**
002 * Copyright 2011 The University of Southampton, Yahoo Inc., and the
003 * individual contributors. All rights reserved.
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *    http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.openimaj.web.scraping;
018
019import java.net.URL;
020import java.util.List;
021
022/**
023 * Site specific consumers answer whether they can handle a URL and when asked
024 * handle the URL, returning another URL from which the data can be downloaded.
025 * This interface doesn't specify exactly what the data is, but specific
026 * implementations will usually only apply to a single data modality.
027 * <p>
028 * Typical uses are for the downloading of images on a URL to an image hosting
029 * site.
030 * 
031 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
032 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
033 * 
034 */
035public interface SiteSpecificConsumer {
036        /**
037         * Determine whether the given URL can be handled by this consumer.
038         * 
039         * @param url
040         *            the url to test
041         * @return true if the URL can be handled by this consumer; false otherwise.
042         */
043        public boolean canConsume(URL url);
044
045        /**
046         * Get the data urls at the given URL.
047         * 
048         * @param url
049         *            the url to test
050         * @return A list of URLs to data items at the given URL
051         */
052        public List<URL> consume(URL url);
053}