001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.text.nlp.namedentity; 031 032import java.util.Map; 033 034import org.apache.commons.lang.StringEscapeUtils; 035 036/** 037 * Collection of uris and tools for accessing yago 038 * 039 * @author Laurence Willmore (lgw1e10@ecs.soton.ac.uk) 040 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 041 * 042 */ 043public class YagoQueryUtils { 044 private static String PREFIX = "PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> " 045 + "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> "; 046 /** 047 * The openlinksw sparql endpoint 048 */ 049 public static String YAGO_SPARQL_ENDPOINT = "http://lod.openlinksw.com/sparql"; 050 /** 051 * The wordnet organisation URI 052 */ 053 public static String WORDNET_ORGANISATION_URI = "http://yago-knowledge.org/resource/wordnet_organization_108008335"; 054 /** 055 * wordnet enterprise uri 056 */ 057 public static String WORDNET_ENTERPRISE_URI = "http://yago-knowledge.org/resource/wordnet_enterprise_108056231"; 058 /** 059 * wordnet company uri 060 */ 061 public static String WORDNET_COMPANY_URI = "http://yago-knowledge.org/resource/wordnet_company_108058098"; 062 /** 063 * the various kinds of wordnet organisation 064 */ 065 public static String[] WORDNET_ORGANISATION_ROOT_URIS = new String[] { 066 "http://yago-knowledge.org/resource/wordnet_adhocracy_108009239", 067 "http://yago-knowledge.org/resource/wordnet_affiliate_108009478", 068 "http://yago-knowledge.org/resource/wordnet_bureaucracy_108009659", 069 "http://yago-knowledge.org/resource/wordnet_nongovernmental_organization_108009834", 070 // "http://yago-knowledge.org/resource/wordnet_fiefdom_108048625", 071 // "http://yago-knowledge.org/resource/wordnet_line_of_defense_108048743", 072 // "http://yago-knowledge.org/resource/wordnet_line_organization_108048948", 073 "http://yago-knowledge.org/resource/wordnet_association_108049401", 074 "http://yago-knowledge.org/resource/wordnet_polity_108050385", 075 "http://yago-knowledge.org/resource/wordnet_quango_108050484", 076 "http://yago-knowledge.org/resource/wordnet_institution_108053576", 077 "http://yago-knowledge.org/resource/wordnet_enterprise_108056231", 078 "http://yago-knowledge.org/resource/wordnet_defense_108064130", 079 "http://yago-knowledge.org/resource/wordnet_establishment_108075847", 080 // "http://yago-knowledge.org/resource/wordnet_fire_brigade_108121394", 081 "http://yago-knowledge.org/resource/wordnet_company_108187033", 082 // "http://yago-knowledge.org/resource/wordnet_unit_108189659", 083 // "http://yago-knowledge.org/resource/wordnet_force_108208016", 084 "http://yago-knowledge.org/resource/wordnet_union_108233056", 085 // "http://yago-knowledge.org/resource/wordnet_musical_organization_108246613", 086 "http://yago-knowledge.org/resource/wordnet_party_108256968", 087 // "http://yago-knowledge.org/resource/wordnet_machine_108264583", 088 // "http://yago-knowledge.org/resource/wordnet_machine_108264759", 089 "http://yago-knowledge.org/resource/wordnet_professional_organization_108266070", 090 "http://yago-knowledge.org/resource/wordnet_alliance_108293982", 091 "http://yago-knowledge.org/resource/wordnet_federation_108303504", 092 // "http://yago-knowledge.org/resource/wordnet_hierarchy_108376051", 093 // "http://yago-knowledge.org/resource/wordnet_deputation_108402442", 094 // "http://yago-knowledge.org/resource/wordnet_blue_108480847", 095 // "http://yago-knowledge.org/resource/wordnet_grey_108481009", 096 // "http://yago-knowledge.org/resource/wordnet_host_108481184", 097 // "http://yago-knowledge.org/resource/wordnet_pool_108481369" 098 }; 099 100 /* 101 * Methods to return paramatised sparql query strings. 102 */ 103 104 /** 105 * @param companyURI 106 * @return a SPARQL query to find the "iscalled" fact for a companyURI 107 */ 108 public static String isCalledAlliasQuery(String companyURI) { 109 return PREFIX + "SELECT ?alias WHERE {" + "?fact rdf:predicate <http://yago-knowledge.org/resource/isCalled> ." 110 + "?fact rdf:object ?alias ." + "?fact rdf:subject <" + companyURI + "> }"; 111 } 112 113 /** 114 * @param companyURI 115 * @return query for company aliases via rdfs:label 116 */ 117 public static String labelAlliasQuery(String companyURI) { 118 return PREFIX + "SELECT ?alias WHERE {" + " <" + companyURI + "> rdfs:label ?alias ." + "}"; 119 } 120 121 /** 122 * @return wordnet company SPARQL using {@link #WORDNET_COMPANY_URI} 123 */ 124 public static String wordnetCompanyQuery() { 125 return PREFIX + "SELECT ?company WHERE {" + " ?company rdf:type <" + WORDNET_COMPANY_URI + "> . " + "}"; 126 } 127 128 /** 129 * @return things which are subclasses of {@link #WORDNET_COMPANY_URI} 130 */ 131 public static String subClassWordnetCompanyQuery() { 132 return PREFIX + "SELECT ?company WHERE {" + " ?subclass rdfs:subClassOf <" + WORDNET_COMPANY_URI + "> . " 133 + " ?company rdf:type ?subclass . " + "}"; 134 } 135 136 /** 137 * @param companyURI 138 * @return the subject of facts with object compnayURI and predicate owns 139 */ 140 public static String ownsContextQuery(String companyURI) { 141 return PREFIX + "SELECT ?context WHERE { " + "?fact rdf:object <" + companyURI + "> . " 142 + "?fact rdf:predicate owns ." + "?fact rdf:subject ?context}"; 143 } 144 145 /** 146 * @param companyURI 147 * @return the object of facts with subject companyURI and predicate created 148 */ 149 public static String createdContextQuery(String companyURI) { 150 return PREFIX + "SELECT ?context WHERE {" + "?fact rdf:subject <" + companyURI + "> . " 151 + "?fact rdf:predicate <http://yago-knowledge.org/resource/created> ." + "?fact rdf:object ?context}"; 152 } 153 154 /** 155 * @param companyURI 156 * @return the subject of facts with object companyURI and predicate 157 * hasWikipediaAnchorText 158 */ 159 public static String anchorContextQuery(String companyURI) { 160 return PREFIX + "SELECT ?context WHERE {" + "<" + companyURI 161 + "> <http://yago-knowledge.org/resource/hasWikipediaAnchorText> ?context " + "}"; 162 } 163 164 /** 165 * @param companyURI 166 * @return the subject of facts with object companyURI and predicate 167 * hasWikipediaUrl 168 */ 169 public static String wikiURLContextQuery(String companyURI) { 170 return PREFIX + "SELECT ?context WHERE {" + "<" + companyURI 171 + "> <http://yago-knowledge.org/resource/hasWikipediaUrl> ?context " + "}"; 172 } 173 174 /** 175 * @param subjectURI 176 * @param predicateURI 177 * @return the object of facts with subject subjectURI and predicate 178 * predicateURI 179 */ 180 public static String factObjectsQuery(String subjectURI, String predicateURI) { 181 return PREFIX + "SELECT ?object WHERE { " + "?f rdf:subject <" + subjectURI + "> . " + "?f rdf:predicate " 182 + predicateURI + " . " + "?f rdf:object ?object}"; 183 } 184 185 /** 186 * @param subjectURI 187 * @param predicateURI 188 * @return the object of a triple with subjectURI and predicateURI 189 */ 190 public static String tripleObjectsQuery(String subjectURI, String predicateURI) { 191 return PREFIX + "SELECT ?object WHERE { " + subjectURI + " " + predicateURI + " ?object}"; 192 } 193 194 /** 195 * @param objectURI 196 * @param predicateURI 197 * @return the subject with objectURI and predicateURI 198 */ 199 public static String factSubjectsQuery(String objectURI, String predicateURI) { 200 return PREFIX + "SELECT ?subject WHERE { " + "?f rdf:subject ?subject . " + "?f rdf:predicate " + predicateURI 201 + " . " + "?f rdf:object <" + objectURI + ">}"; 202 } 203 204 /** 205 * @param objectURI 206 * @param predicateURI 207 * @return the subject with objectURI and predicateURI 208 */ 209 public static String tripleSubjectsQuery(String objectURI, String predicateURI) { 210 return PREFIX + "SELECT ?subject WHERE { ?subject " + predicateURI + " " + objectURI + "}"; 211 } 212 213 /** 214 * @param variableNameToPredicate 215 * @param subjectUri 216 * @return retrieve all the predicate values for a given subjectUri. Each 217 * predicate variable can be named separately 218 */ 219 public static String multiTripleObjectsQuery(Map<String, String> variableNameToPredicate, String subjectUri) { 220 final StringBuffer sb = new StringBuffer(); 221 sb.append(PREFIX + "SELECT * WHERE {"); 222 for (final String varName : variableNameToPredicate.keySet()) { 223 sb.append("<" + subjectUri + "> " + variableNameToPredicate.get(varName) + " ?" + varName + " . "); 224 } 225 sb.append("}"); 226 return sb.toString(); 227 } 228 229 /** 230 * @param variableNameToPredicate 231 * @param subjectUri 232 * @return all the facts/objects whose subjects are subjectUri and which 233 * have the predicates defined in variableNameToPredicate 234 */ 235 public static String multiFactObjectsQuery(Map<String, String> variableNameToPredicate, String subjectUri) { 236 final StringBuffer sb = new StringBuffer(); 237 sb.append(PREFIX + "SELECT * WHERE {"); 238 final int fcount = 0; 239 for (final String varName : variableNameToPredicate.keySet()) { 240 sb.append("?fact" + fcount + " rdf:subject <" + subjectUri + "> . " + "?fact" + fcount + " rdf:predicate " 241 + variableNameToPredicate.get(varName) + " . " + "?fact" + fcount + " rdf:object ?" + varName + " . "); 242 } 243 sb.append("}"); 244 return sb.toString(); 245 } 246 247 /** 248 * @param literal 249 * @return turns ^^ literals to strings 250 */ 251 public static String yagoLiteralToString(String literal) { 252 return StringEscapeUtils.unescapeJava(literal.substring(0, literal.indexOf("^^http"))); 253 } 254 255 /** 256 * @param resource 257 * @return yago resources by name 258 */ 259 public static String yagoResourceToString(String resource) { 260 return resource.substring(resource.lastIndexOf("/") + 1).replaceAll("_", " ").trim(); 261 } 262 263 /** 264 * lightweight test 265 * 266 * @param args 267 */ 268 public static void main(String[] args) { 269 //final String apple = "http://yago-knowledge.org/resource/Apple_Inc."; 270 // System.out.println(isCalledAlliasQuery(apple)); //works 271 // System.out.println(labelAlliasQuery(apple)); //works 272 // System.out.println(wordnetCompanyQuery()); //works 273 // System.out.println(subClassWordnetCompanyQuery()); //works 274 // System.out.println(ownsContextQuery(apple)); /** Does not work **/ 275 // System.out.println(createdContextQuery(apple)); //works 276 // System.out.println(anchorContextQuery(apple)); /** Does not work **/ 277 // System.out.println(wikiURLContextQuery(apple)); // 278 //System.out.println(factSubjectsQuery("http://yago-knowledge.org/resource/wikicategory_Low-cost_airlines", 279 // "rdfs:subClassOf")); // works 280 // System.out.println(factObjectsQuery("http://yago-knowledge.org/resource/wikicategory_Low-cost_airlines", 281 // "rdfs:subClassOf")); //works 282 283 } 284 285}