001/** 002 * Copyright 2011 The University of Southampton, Yahoo Inc., and the 003 * individual contributors. All rights reserved. 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.openimaj.web.scraping; 018 019import java.net.URL; 020import java.util.List; 021 022/** 023 * Site specific consumers answer whether they can handle a URL and when asked 024 * handle the URL, returning another URL from which the data can be downloaded. 025 * This interface doesn't specify exactly what the data is, but specific 026 * implementations will usually only apply to a single data modality. 027 * <p> 028 * Typical uses are for the downloading of images on a URL to an image hosting 029 * site. 030 * 031 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 032 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 033 * 034 */ 035public interface SiteSpecificConsumer { 036 /** 037 * Determine whether the given URL can be handled by this consumer. 038 * 039 * @param url 040 * the url to test 041 * @return true if the URL can be handled by this consumer; false otherwise. 042 */ 043 public boolean canConsume(URL url); 044 045 /** 046 * Get the data urls at the given URL. 047 * 048 * @param url 049 * the url to test 050 * @return A list of URLs to data items at the given URL 051 */ 052 public List<URL> consume(URL url); 053}