001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.text.nlp.namedentity; 031 032import java.util.ArrayList; 033import java.util.HashMap; 034import java.util.List; 035import java.util.Map; 036import java.util.Set; 037 038import org.openimaj.ml.annotation.ScoredAnnotation; 039import org.openimaj.text.nlp.namedentity.YagoEntityCandidateFinderFactory.YagoEntityCandidateFinder; 040import org.openimaj.text.nlp.namedentity.YagoEntityContextScorerFactory.YagoEntityContextScorer; 041import org.openimaj.text.nlp.namedentity.YagoEntityExactMatcherFactory.YagoEntityExactMatcher; 042 043/** 044 * {@link EntityAnnotator} wrapper for {@link YagoEntityExactMatcher} 045 * 046 * @author Laurence Willmore (lgw1e10@ecs.soton.ac.uk) 047 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 048 */ 049public class YagoEntityCompleteAnnotator extends EntityAnnotator { 050 private YagoEntityContextScorer contextScorer; 051 private YagoEntityCandidateFinder candidateFinder; 052 private int localContextBound = 20; 053 054 /** 055 * Default constructor. 056 * 057 * @param contextScorer 058 * @param candidateFinder 059 */ 060 public YagoEntityCompleteAnnotator(YagoEntityContextScorer contextScorer, 061 YagoEntityCandidateFinder candidateFinder) 062 { 063 super(); 064 this.contextScorer = contextScorer; 065 this.candidateFinder = candidateFinder; 066 } 067 068 @Override 069 public Set<HashMap<String, Object>> getAnnotations() { 070 // Intentionally blank 071 return null; 072 } 073 074 @Override 075 public List<ScoredAnnotation<HashMap<String, Object>>> annotate( 076 List<String> tokens) 077 { 078 final ArrayList<ScoredAnnotation<HashMap<String, Object>>> result = new ArrayList<ScoredAnnotation<HashMap<String, Object>>>(); 079 // Check if any candidates are found 080 final List<List<NamedEntity>> candidates = candidateFinder 081 .getCandidates(tokens); 082 // If none found, return an empty. 083 if (candidates.size() == 0) { 084 return result; 085 } 086 // Use Context Scoring to disambiguate candidates 087 for (final List<NamedEntity> can : candidates) { 088 final ArrayList<String> companies = new ArrayList<String>(); 089 for (final NamedEntity ent : can) { 090 companies.add(ent.rootName); 091 } 092 // get the localised context for each list of named Entities 093 final List<String> localContext = getLocalContext(tokens, can.get(0).startToken, can.get(0).stopToken); 094 final Map<NamedEntity, Float> contextScores = contextScorer 095 .getScoresForEntityList(companies, localContext); 096 float topScore = 0; 097 NamedEntity resEntity = null; 098 for (final NamedEntity entity : can) { 099 if (contextScores.keySet().contains(entity) 100 && contextScores.get(entity) > topScore) 101 { 102 resEntity = entity; 103 topScore = contextScores.get(entity); 104 } 105 } 106 if (resEntity != null) { 107 final HashMap<String, Object> annotation = new HashMap<String, Object>(); 108 annotation.put(YagoEntityContextAnnotator.SCORE, topScore); 109 annotation.put(YagoEntityContextAnnotator.URI, resEntity.rootName); 110 annotation.put(YagoEntityContextAnnotator.START_TOKEN, 111 resEntity.startToken); 112 annotation.put(YagoEntityContextAnnotator.END_TOKEN, 113 resEntity.stopToken); 114 annotation.put(YagoEntityContextAnnotator.TYPE, 115 resEntity.type.toString()); 116 result.add(new ScoredAnnotation<HashMap<String, Object>>( 117 annotation, 1)); 118 } 119 } 120 return result; 121 } 122 123 private List<String> getLocalContext(List<String> tokens, int startToken, 124 int stopToken) 125 { 126 final int bottom = Math.max(0, startToken - localContextBound); 127 final int top = Math.min(tokens.size(), stopToken + localContextBound); 128 return tokens.subList(bottom, top); 129 } 130}