001/** 002 * Copyright 2011 The University of Southampton, Yahoo Inc., and the 003 * individual contributors. All rights reserved. 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.openimaj.web.scraping; 018 019import java.net.URL; 020 021/** 022 * Simple scraper that just uses the given css selector to find all 023 * relevant data in the page 024 * 025 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 026 * 027 */ 028public class SimpleHTMLScrapingConsumer extends HTMLScrapingSiteSpecificConsumer { 029 private String linkContains; 030 private String select; 031 032 /** 033 * @param linkContains 034 * the link should contain this 035 * @param select 036 * the css selector for the img 037 */ 038 public SimpleHTMLScrapingConsumer(String linkContains, String select) { 039 this.linkContains = linkContains; 040 this.select = select; 041 } 042 043 @Override 044 public boolean canConsume(URL url) { 045 return url.getHost().contains(linkContains); 046 } 047 048 @Override 049 public String cssSelect() { 050 return select; 051 } 052 053}