Source code

001/**
002 * Copyright 2011 The University of Southampton, Yahoo Inc., and the
003 * individual contributors. All rights reserved.
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *    http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.openimaj.web.scraping;
018
019import java.net.URL;
020
021/**
022 * Simple scraper that just uses the given css selector to find all
023 * relevant data in the page
024 * 
025 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
026 * 
027 */
028public class SimpleHTMLScrapingConsumer extends HTMLScrapingSiteSpecificConsumer {
029        private String linkContains;
030        private String select;
031
032        /**
033         * @param linkContains
034         *            the link should contain this
035         * @param select
036         *            the css selector for the img
037         */
038        public SimpleHTMLScrapingConsumer(String linkContains, String select) {
039                this.linkContains = linkContains;
040                this.select = select;
041        }
042
043        @Override
044        public boolean canConsume(URL url) {
045                return url.getHost().contains(linkContains);
046        }
047
048        @Override
049        public String cssSelect() {
050                return select;
051        }
052
053}