001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.text.nlp.textpipe.annotations; 031 032/** 033 * An annotation representing a phrase as per the Penn Treebank. 034 * 035 * @author Laurence Willmore (lgw1e10@ecs.soton.ac.uk) 036 * 037 */ 038public class PhraseAnnotation extends TextPipeAnnotation { 039 040 /** 041 * Penn Treebank phrase abbreviations. 042 * 043 * @author Laurence Willmore (lgw1e10@ecs.soton.ac.uk) 044 * 045 */ 046 public enum Phrase { 047 ADJP("Adjective Phrase."), 048 ADVP("Adverb Phrase."), 049 CONJP("Conjunction Phrase."), 050 FRAG("Fragment."), 051 INTJ("Interjection. Corresponds approximately to the part-of-speech tag UH."), 052 LST("List marker. Includes surrounding punctuation."), 053 NAC("Not a Constituent; used to show the scope of certain prenominal modifiers within an NP."), 054 NP("Noun Phrase. "), 055 NX( 056 "Used within certain complex NPs to mark the head of the NP. Corresponds very roughly to N-bar level but used quite differently."), 057 PP("Prepositional Phrase."), 058 PRN("Parenthetical. "), 059 PRT("Particle. Category for words that should be tagged RP. "), 060 QP("Quantifier Phrase (i.e. complex measure/amount phrase); used within NP."), 061 RRC("Reduced Relative Clause. "), 062 UCP("Unlike Coordinated Phrase. "), 063 VP("Vereb Phrase. "), 064 WHADJP("Wh-adjective Phrase. Adjectival phrase containing a wh-adverb, as in how hot."), 065 WHAVP( 066 "Wh-adverb Phrase. Introduces a clause with an NP gap. May be null (containing the 0 complementizer) or lexical, containing a wh-adverb such as how or why."), 067 WHNP( 068 "Wh-noun Phrase. Introduces a clause with an NP gap. May be null (containing the 0 complementizer) or lexical, containing some wh-word, e.g. who, which book, whose daughter, none of which, or how many leopards."), 069 WHPP( 070 "Wh-prepositional Phrase. Prepositional phrase containing a wh-noun phrase (such as of which or by whose authority) that either introduces a PP gap or is contained by a WHNP."), 071 X( 072 "Unknown, uncertain, or unbracketable. X is often used for bracketing typos and in bracketing the...the-constructions."), 073 /** 074 * This is added for phrases with no mapping to penn bank. 075 */ 076 UK("Unknown"); 077 078 /** 079 * Penn Tree Bank short description 080 */ 081 public final String DESCRIPTION; 082 083 Phrase(String description) { 084 this.DESCRIPTION = description; 085 } 086 087 /** 088 * Returns a {@link Phrase} based on the string. 089 * 090 * @param pennAbreviation 091 * @return {@link Phrase} 092 */ 093 public static Phrase getPhrasefromString(String pennAbreviation) { 094 for (final Phrase pos : Phrase.values()) { 095 if (pos.toString().equals(pennAbreviation)) 096 return pos; 097 } 098 return Phrase.UK; 099 } 100 }; 101 102 /** 103 * The {@link Phrase} label. 104 */ 105 public Phrase phrase; 106 /** 107 * true if this is the start token of a phrase segment. false if it is a 108 * continuation. 109 */ 110 public boolean start; 111 112 public PhraseAnnotation(Phrase phrase, boolean start) { 113 super(); 114 this.phrase = phrase; 115 this.start = start; 116 } 117 118 /** 119 * Returns a string representation of the Phrase order of this phrase. 120 * 121 * @return String order. 122 */ 123 public String getOrder() { 124 if (start) 125 return "start"; 126 else 127 return "continue"; 128 } 129 130}