001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.text.nlp.sentiment;
031
032import java.io.BufferedReader;
033import java.io.IOException;
034import java.io.InputStreamReader;
035import java.util.HashMap;
036import java.util.HashSet;
037import java.util.List;
038import java.util.Map;
039import java.util.logging.Level;
040import java.util.logging.Logger;
041
042/**
043 *
044 * @author bill
045 */
046public class BillMPQASentiment extends SentimentExtractor {
047    private String mpqaN = "/org/openimaj/text/nlp/sentiment/mpqan.txt";
048    private String mpqaP = "/org/openimaj/text/nlp/sentiment/mpqap.txt";
049
050    @Override
051    public Map<String, Object> extract(List<String> strings) {
052
053        HashSet<String> mpqaPSet = readSentiSet(mpqaP);
054        HashSet<String> mpqaNSet = readSentiSet(mpqaN);
055        HashMap<String, Object> output = new HashMap<String, Object>();
056        HashSet<String> positiveWords = new HashSet<String>();
057        HashSet<String> negativeWords = new HashSet<String>();
058
059        int countP = 0;
060        int countN = 0;
061
062        for (String string : strings){
063
064            if (mpqaPSet.contains(string)){
065                countP++;
066                positiveWords.add(string);
067            }
068            else if (mpqaNSet.contains(string)){
069                countN++;
070                negativeWords.add(string);
071            }
072        }
073
074        output.put("sentiment", countP - countN);
075        output.put("sentiment_positive", countP);
076        output.put("sentiment_negative", countN);
077        output.put("positive_words",positiveWords);
078        output.put("negative_words",negativeWords);
079
080        return output;
081    }
082
083    /**
084     *
085     * @param filepath
086     * @return 
087     */
088    public HashSet<String> readSentiSet (String filepath){
089        HashSet<String> sentiSet = new HashSet<String>();
090
091        BufferedReader br = null;
092        try {
093            br = new BufferedReader(new InputStreamReader(SentimentExtractor.class.getResourceAsStream(filepath)));
094        } catch (Exception ex) {
095            Logger.getLogger(SentimentExtractor.class.getName()).log(Level.SEVERE, null, ex);
096        }
097        try{
098            String line = br.readLine();
099
100            while (line != null) {
101                //System.out.println(line);
102                sentiSet.add(line.trim());
103                line = br.readLine();
104            }
105        }
106        catch (IOException ex) {
107            Logger.getLogger(SentimentExtractor.class.getName()).log(Level.SEVERE, null, ex);
108        }
109        finally {
110            try {
111                br.close();
112            } catch (IOException ex) {
113                Logger.getLogger(SentimentExtractor.class.getName()).log(Level.SEVERE, null, ex);
114            }
115        }
116
117        return sentiSet;
118    }
119}