001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.text.nlp.namedentity;
031
032import java.util.Map;
033
034import org.apache.commons.lang.StringEscapeUtils;
035
036/**
037 * Collection of uris and tools for accessing yago
038 * 
039 * @author Laurence Willmore (lgw1e10@ecs.soton.ac.uk)
040 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
041 * 
042 */
043public class YagoQueryUtils {
044        private static String PREFIX = "PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
045                        + "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> ";
046        /**
047         * The openlinksw sparql endpoint
048         */
049        public static String YAGO_SPARQL_ENDPOINT = "http://lod.openlinksw.com/sparql";
050        /**
051         * The wordnet organisation URI
052         */
053        public static String WORDNET_ORGANISATION_URI = "http://yago-knowledge.org/resource/wordnet_organization_108008335";
054        /**
055         * wordnet enterprise uri
056         */
057        public static String WORDNET_ENTERPRISE_URI = "http://yago-knowledge.org/resource/wordnet_enterprise_108056231";
058        /**
059         * wordnet company uri
060         */
061        public static String WORDNET_COMPANY_URI = "http://yago-knowledge.org/resource/wordnet_company_108058098";
062        /**
063         * the various kinds of wordnet organisation
064         */
065        public static String[] WORDNET_ORGANISATION_ROOT_URIS = new String[] {
066                        "http://yago-knowledge.org/resource/wordnet_adhocracy_108009239",
067                        "http://yago-knowledge.org/resource/wordnet_affiliate_108009478",
068                        "http://yago-knowledge.org/resource/wordnet_bureaucracy_108009659",
069                        "http://yago-knowledge.org/resource/wordnet_nongovernmental_organization_108009834",
070                        // "http://yago-knowledge.org/resource/wordnet_fiefdom_108048625",
071                        // "http://yago-knowledge.org/resource/wordnet_line_of_defense_108048743",
072                        // "http://yago-knowledge.org/resource/wordnet_line_organization_108048948",
073                        "http://yago-knowledge.org/resource/wordnet_association_108049401",
074                        "http://yago-knowledge.org/resource/wordnet_polity_108050385",
075                        "http://yago-knowledge.org/resource/wordnet_quango_108050484",
076                        "http://yago-knowledge.org/resource/wordnet_institution_108053576",
077                        "http://yago-knowledge.org/resource/wordnet_enterprise_108056231",
078                        "http://yago-knowledge.org/resource/wordnet_defense_108064130",
079                        "http://yago-knowledge.org/resource/wordnet_establishment_108075847",
080                        // "http://yago-knowledge.org/resource/wordnet_fire_brigade_108121394",
081                        "http://yago-knowledge.org/resource/wordnet_company_108187033",
082                        // "http://yago-knowledge.org/resource/wordnet_unit_108189659",
083                        // "http://yago-knowledge.org/resource/wordnet_force_108208016",
084                        "http://yago-knowledge.org/resource/wordnet_union_108233056",
085                        // "http://yago-knowledge.org/resource/wordnet_musical_organization_108246613",
086                        "http://yago-knowledge.org/resource/wordnet_party_108256968",
087                        // "http://yago-knowledge.org/resource/wordnet_machine_108264583",
088                        // "http://yago-knowledge.org/resource/wordnet_machine_108264759",
089                        "http://yago-knowledge.org/resource/wordnet_professional_organization_108266070",
090                        "http://yago-knowledge.org/resource/wordnet_alliance_108293982",
091                        "http://yago-knowledge.org/resource/wordnet_federation_108303504",
092        // "http://yago-knowledge.org/resource/wordnet_hierarchy_108376051",
093        // "http://yago-knowledge.org/resource/wordnet_deputation_108402442",
094        // "http://yago-knowledge.org/resource/wordnet_blue_108480847",
095        // "http://yago-knowledge.org/resource/wordnet_grey_108481009",
096        // "http://yago-knowledge.org/resource/wordnet_host_108481184",
097        // "http://yago-knowledge.org/resource/wordnet_pool_108481369"
098        };
099
100        /*
101         * Methods to return paramatised sparql query strings.
102         */
103
104        /**
105         * @param companyURI
106         * @return a SPARQL query to find the "iscalled" fact for a companyURI
107         */
108        public static String isCalledAlliasQuery(String companyURI) {
109                return PREFIX + "SELECT ?alias WHERE {" + "?fact rdf:predicate <http://yago-knowledge.org/resource/isCalled> ."
110                                + "?fact rdf:object   ?alias ." + "?fact rdf:subject <" + companyURI + "> }";
111        }
112
113        /**
114         * @param companyURI
115         * @return query for company aliases via rdfs:label
116         */
117        public static String labelAlliasQuery(String companyURI) {
118                return PREFIX + "SELECT ?alias WHERE {" + " <" + companyURI + "> rdfs:label ?alias ." + "}";
119        }
120
121        /**
122         * @return wordnet company SPARQL using {@link #WORDNET_COMPANY_URI}
123         */
124        public static String wordnetCompanyQuery() {
125                return PREFIX + "SELECT ?company WHERE {" + " ?company rdf:type <" + WORDNET_COMPANY_URI + "> . " + "}";
126        }
127
128        /**
129         * @return things which are subclasses of {@link #WORDNET_COMPANY_URI}
130         */
131        public static String subClassWordnetCompanyQuery() {
132                return PREFIX + "SELECT ?company WHERE {" + " ?subclass rdfs:subClassOf <" + WORDNET_COMPANY_URI + "> . "
133                                + " ?company rdf:type ?subclass . " + "}";
134        }
135
136        /**
137         * @param companyURI
138         * @return the subject of facts with object compnayURI and predicate owns
139         */
140        public static String ownsContextQuery(String companyURI) {
141                return PREFIX + "SELECT ?context WHERE { " + "?fact rdf:object <" + companyURI + "> . "
142                                + "?fact rdf:predicate owns ." + "?fact rdf:subject ?context}";
143        }
144
145        /**
146         * @param companyURI
147         * @return the object of facts with subject companyURI and predicate created
148         */
149        public static String createdContextQuery(String companyURI) {
150                return PREFIX + "SELECT ?context WHERE {" + "?fact rdf:subject <" + companyURI + "> . "
151                                + "?fact rdf:predicate <http://yago-knowledge.org/resource/created> ." + "?fact rdf:object ?context}";
152        }
153
154        /**
155         * @param companyURI
156         * @return the subject of facts with object companyURI and predicate
157         *         hasWikipediaAnchorText
158         */
159        public static String anchorContextQuery(String companyURI) {
160                return PREFIX + "SELECT ?context WHERE {" + "<" + companyURI
161                                + "> <http://yago-knowledge.org/resource/hasWikipediaAnchorText> ?context " + "}";
162        }
163
164        /**
165         * @param companyURI
166         * @return the subject of facts with object companyURI and predicate
167         *         hasWikipediaUrl
168         */
169        public static String wikiURLContextQuery(String companyURI) {
170                return PREFIX + "SELECT ?context WHERE {" + "<" + companyURI
171                                + "> <http://yago-knowledge.org/resource/hasWikipediaUrl> ?context " + "}";
172        }
173
174        /**
175         * @param subjectURI
176         * @param predicateURI
177         * @return the object of facts with subject subjectURI and predicate
178         *         predicateURI
179         */
180        public static String factObjectsQuery(String subjectURI, String predicateURI) {
181                return PREFIX + "SELECT ?object WHERE { " + "?f rdf:subject <" + subjectURI + "> . " + "?f rdf:predicate "
182                                + predicateURI + " . " + "?f rdf:object ?object}";
183        }
184
185        /**
186         * @param subjectURI
187         * @param predicateURI
188         * @return the object of a triple with subjectURI and predicateURI
189         */
190        public static String tripleObjectsQuery(String subjectURI, String predicateURI) {
191                return PREFIX + "SELECT ?object WHERE { " + subjectURI + " " + predicateURI + " ?object}";
192        }
193
194        /**
195         * @param objectURI
196         * @param predicateURI
197         * @return the subject with objectURI and predicateURI
198         */
199        public static String factSubjectsQuery(String objectURI, String predicateURI) {
200                return PREFIX + "SELECT ?subject WHERE { " + "?f rdf:subject ?subject . " + "?f rdf:predicate " + predicateURI
201                                + " . " + "?f rdf:object <" + objectURI + ">}";
202        }
203
204        /**
205         * @param objectURI
206         * @param predicateURI
207         * @return the subject with objectURI and predicateURI
208         */
209        public static String tripleSubjectsQuery(String objectURI, String predicateURI) {
210                return PREFIX + "SELECT ?subject WHERE { ?subject " + predicateURI + " " + objectURI + "}";
211        }
212
213        /**
214         * @param variableNameToPredicate
215         * @param subjectUri
216         * @return retrieve all the predicate values for a given subjectUri. Each
217         *         predicate variable can be named separately
218         */
219        public static String multiTripleObjectsQuery(Map<String, String> variableNameToPredicate, String subjectUri) {
220                final StringBuffer sb = new StringBuffer();
221                sb.append(PREFIX + "SELECT * WHERE {");
222                for (final String varName : variableNameToPredicate.keySet()) {
223                        sb.append("<" + subjectUri + "> " + variableNameToPredicate.get(varName) + " ?" + varName + " . ");
224                }
225                sb.append("}");
226                return sb.toString();
227        }
228
229        /**
230         * @param variableNameToPredicate
231         * @param subjectUri
232         * @return all the facts/objects whose subjects are subjectUri and which
233         *         have the predicates defined in variableNameToPredicate
234         */
235        public static String multiFactObjectsQuery(Map<String, String> variableNameToPredicate, String subjectUri) {
236                final StringBuffer sb = new StringBuffer();
237                sb.append(PREFIX + "SELECT * WHERE {");
238                final int fcount = 0;
239                for (final String varName : variableNameToPredicate.keySet()) {
240                        sb.append("?fact" + fcount + " rdf:subject <" + subjectUri + "> . " + "?fact" + fcount + " rdf:predicate "
241                                        + variableNameToPredicate.get(varName) + " . " + "?fact" + fcount + " rdf:object ?" + varName + " . ");
242                }
243                sb.append("}");
244                return sb.toString();
245        }
246
247        /**
248         * @param literal
249         * @return turns ^^ literals to strings
250         */
251        public static String yagoLiteralToString(String literal) {
252                return StringEscapeUtils.unescapeJava(literal.substring(0, literal.indexOf("^^http")));
253        }
254
255        /**
256         * @param resource
257         * @return yago resources by name
258         */
259        public static String yagoResourceToString(String resource) {
260                return resource.substring(resource.lastIndexOf("/") + 1).replaceAll("_", " ").trim();
261        }
262
263        /**
264         * lightweight test
265         * 
266         * @param args
267         */
268        public static void main(String[] args) {
269                //final String apple = "http://yago-knowledge.org/resource/Apple_Inc.";
270                // System.out.println(isCalledAlliasQuery(apple)); //works
271                // System.out.println(labelAlliasQuery(apple)); //works
272                // System.out.println(wordnetCompanyQuery()); //works
273                // System.out.println(subClassWordnetCompanyQuery()); //works
274                // System.out.println(ownsContextQuery(apple)); /** Does not work **/
275                // System.out.println(createdContextQuery(apple)); //works
276                // System.out.println(anchorContextQuery(apple)); /** Does not work **/
277                // System.out.println(wikiURLContextQuery(apple)); //
278                //System.out.println(factSubjectsQuery("http://yago-knowledge.org/resource/wikicategory_Low-cost_airlines",
279                //              "rdfs:subClassOf")); // works
280                // System.out.println(factObjectsQuery("http://yago-knowledge.org/resource/wikicategory_Low-cost_airlines",
281                // "rdfs:subClassOf")); //works
282
283        }
284
285}