001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.text.nlp.textpipe.annotations;
031
032/**
033 * An annotation representing a phrase as per the Penn Treebank.
034 * 
035 * @author Laurence Willmore (lgw1e10@ecs.soton.ac.uk)
036 * 
037 */
038public class PhraseAnnotation extends TextPipeAnnotation {
039
040        /**
041         * Penn Treebank phrase abbreviations.
042         * 
043         * @author Laurence Willmore (lgw1e10@ecs.soton.ac.uk)
044         * 
045         */
046        public enum Phrase {
047                ADJP("Adjective Phrase."),
048                ADVP("Adverb Phrase."),
049                CONJP("Conjunction Phrase."),
050                FRAG("Fragment."),
051                INTJ("Interjection. Corresponds approximately to the part-of-speech tag UH."),
052                LST("List marker. Includes surrounding punctuation."),
053                NAC("Not a Constituent; used to show the scope of certain prenominal modifiers within an NP."),
054                NP("Noun Phrase. "),
055                NX(
056                                "Used within certain complex NPs to mark the head of the NP. Corresponds very roughly to N-bar level but used quite differently."),
057                PP("Prepositional Phrase."),
058                PRN("Parenthetical. "),
059                PRT("Particle. Category for words that should be tagged RP. "),
060                QP("Quantifier Phrase (i.e. complex measure/amount phrase); used within NP."),
061                RRC("Reduced Relative Clause. "),
062                UCP("Unlike Coordinated Phrase. "),
063                VP("Vereb Phrase. "),
064                WHADJP("Wh-adjective Phrase. Adjectival phrase containing a wh-adverb, as in how hot."),
065                WHAVP(
066                                "Wh-adverb Phrase. Introduces a clause with an NP gap. May be null (containing the 0 complementizer) or lexical, containing a wh-adverb such as how or why."),
067                WHNP(
068                                "Wh-noun Phrase. Introduces a clause with an NP gap. May be null (containing the 0 complementizer) or lexical, containing some wh-word, e.g. who, which book, whose daughter, none of which, or how many leopards."),
069                WHPP(
070                                "Wh-prepositional Phrase. Prepositional phrase containing a wh-noun phrase (such as of which or by whose authority) that either introduces a PP gap or is contained by a WHNP."),
071                X(
072                                "Unknown, uncertain, or unbracketable. X is often used for bracketing typos and in bracketing the...the-constructions."),
073                /**
074                 * This is added for phrases with no mapping to penn bank.
075                 */
076                UK("Unknown");
077
078                /**
079                 * Penn Tree Bank short description
080                 */
081                public final String DESCRIPTION;
082
083                Phrase(String description) {
084                        this.DESCRIPTION = description;
085                }
086
087                /**
088                 * Returns a {@link Phrase} based on the string.
089                 * 
090                 * @param pennAbreviation
091                 * @return {@link Phrase}
092                 */
093                public static Phrase getPhrasefromString(String pennAbreviation) {
094                        for (final Phrase pos : Phrase.values()) {
095                                if (pos.toString().equals(pennAbreviation))
096                                        return pos;
097                        }
098                        return Phrase.UK;
099                }
100        };
101
102        /**
103         * The {@link Phrase} label.
104         */
105        public Phrase phrase;
106        /**
107         * true if this is the start token of a phrase segment. false if it is a
108         * continuation.
109         */
110        public boolean start;
111
112        public PhraseAnnotation(Phrase phrase, boolean start) {
113                super();
114                this.phrase = phrase;
115                this.start = start;
116        }
117
118        /**
119         * Returns a string representation of the Phrase order of this phrase.
120         * 
121         * @return String order.
122         */
123        public String getOrder() {
124                if (start)
125                        return "start";
126                else
127                        return "continue";
128        }
129
130}