001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.text.nlp.textpipe.annotators;
031
032import java.util.List;
033
034import org.openimaj.text.nlp.namedentity.NamedEntity;
035import org.openimaj.text.nlp.namedentity.YagoEntityExactMatcherFactory;
036import org.openimaj.text.nlp.namedentity.YagoEntityExactMatcherFactory.YagoEntityExactMatcher;
037import org.openimaj.text.nlp.textpipe.annotations.NamedEntityAnnotation;
038import org.openimaj.text.nlp.textpipe.annotations.RawTextAnnotation;
039import org.openimaj.text.nlp.textpipe.annotations.SentenceAnnotation;
040import org.openimaj.text.nlp.textpipe.annotations.TokenAnnotation;
041
042public class YagoNEAnnotator extends AbstractNEAnnotator {
043
044        private int cw = 1; // On either side of current sentence.
045
046        public YagoEntityExactMatcher yagoMatcher;
047
048        public YagoNEAnnotator() {
049                yagoMatcher = YagoEntityExactMatcherFactory.getMatcher();
050        }
051
052        @Override
053        void performAnnotation(RawTextAnnotation annotation)
054                        throws MissingRequiredAnnotationException
055        {
056                // Get the sentences
057                final List<SentenceAnnotation> sentences = annotation.getAnnotationsFor(SentenceAnnotation.class);
058                for (int i = 0; i < sentences.size(); i++) {
059                        // get context for sentence
060                        final String context = getContextFrom(sentences.subList(Math.max(0, i - cw),
061                                        Math.min(i + cw + 1, sentences.size())));
062                        annotateSentence(sentences.get(i), context);
063                }
064        }
065
066        private void annotateSentence(SentenceAnnotation sentence,
067                        String context)
068        {
069
070                final List<NamedEntity> ents = yagoMatcher.matchExact(sentence.getAnnotationsFor(TokenAnnotation.class), context);
071
072                for (final NamedEntity ent : ents) {
073                        final NamedEntityAnnotation nea = new NamedEntityAnnotation();
074                        nea.namedEntity = ent;
075                        nea.tokensMatched.addAll(sentence.getAnnotationsFor(TokenAnnotation.class).subList(ent.startToken,
076                                        ent.stopToken));
077                        sentence.addAnnotation(nea);
078                }
079
080                /*
081                 * List<List<TokenAnnotation>> validEntityPhrases =
082                 * getValidEntityPhrases(sentence); for(List<TokenAnnotation> entPhrase:
083                 * validEntityPhrases){ List<NamedEntity> ents =
084                 * yagoMatcher.matchExact(entPhrase, context); for(NamedEntity ent:
085                 * ents){ NamedEntityAnnotation nea = new NamedEntityAnnotation();
086                 * nea.namedEntity=ent;
087                 * nea.tokensMatched.addAll(sentence.getAnnotationsFor
088                 * (TokenAnnotation.class).subList(ent.startToken, ent.stopToken));
089                 * sentence.addAnnotation(nea); } }
090                 */
091        }
092
093        // private List<List<TokenAnnotation>>
094        // getValidEntityPhrases(SentenceAnnotation sentence) {
095        // List<List<TokenAnnotation>> results = new
096        // ArrayList<List<TokenAnnotation>>();
097        // List<TokenAnnotation> current = new ArrayList<TokenAnnotation>();
098        // for(TokenAnnotation
099        // tok:sentence.getAnnotationsFor(TokenAnnotation.class)){
100        // if(start(tok)){
101        // current.add(tok);
102        // }
103        // else if(cont(tok)&&current.size()>0){
104        // current.add(tok);
105        // }
106        // else if(current.size()>0){
107        // results.add(current);
108        // current=new ArrayList<TokenAnnotation>();
109        // }
110        // }
111        // if(current.size()>0)results.add(current);
112        // return results;
113        // }
114        //
115        // private boolean cont(TokenAnnotation tok) {
116        // return (!tok.getAnnotationsFor(PhraseAnnotation.class).get(0).start);
117        // }
118        //
119        // private boolean start(TokenAnnotation tok) {
120        // final PhraseAnnotation phrase =
121        // tok.getAnnotationsFor(PhraseAnnotation.class).get(0);
122        // if (phrase.phrase.toString().equals("NP"))
123        // return phrase.start;
124        // else
125        // return false;
126        // }
127
128        private String getContextFrom(List<SentenceAnnotation> sents) {
129                final StringBuffer result = new StringBuffer();
130                for (final SentenceAnnotation sent : sents) {
131                        result.append(sent.text + " ");
132                }
133                return result.toString();
134        }
135
136        @Override
137        void checkForRequiredAnnotations(RawTextAnnotation annotation)
138                        throws MissingRequiredAnnotationException
139        {
140                if (!annotation.getAnnotationKeyList().contains(SentenceAnnotation.class))
141                        throw new MissingRequiredAnnotationException("No SentenceAnnotations found");
142        }
143
144}