001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.demos.sandbox.ml.linear.learner.stream; 031 032import gov.sandia.cognition.math.matrix.Vector; 033 034import java.util.Comparator; 035import java.util.Iterator; 036 037import org.openimaj.ml.linear.learner.BilinearSparseOnlineLearner; 038import org.openimaj.ml.linear.learner.IncrementalBilinearSparseOnlineLearner; 039import org.openimaj.util.queue.BoundedPriorityQueue; 040 041import com.google.common.collect.BiMap; 042 043/** 044 * Iterate over words in terms of their importance 045 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 046 * 047 */ 048public class SortedImportantWords implements Iterable<String>{ 049 BiMap<String,Integer> words; 050 int[] indexes; 051 private int taskIndex; 052 private Vector wordWeights; 053 054 055 /** 056 * @param task The task for which word importance is to be measured 057 * @param learner The incremental learner (giving access to the words) 058 * @param bilearner The source of the word parameters 059 * @param size The number of words 060 */ 061 public SortedImportantWords( 062 String task, 063 IncrementalBilinearSparseOnlineLearner learner, 064 BilinearSparseOnlineLearner bilearner, 065 int size 066 ) { 067 this.words = learner.getVocabulary(); 068 this.taskIndex = learner.getDependantValues().get(task); 069 this.wordWeights = bilearner.getW().getColumn(taskIndex); 070 BoundedPriorityQueue<Integer> queue = new BoundedPriorityQueue<Integer>(size, new Comparator<Integer>() { 071 072 @Override 073 public int compare(Integer o1, Integer o2) { 074 Double weighto1 = wordWeights.getElement(o1); 075 Double weighto2 = wordWeights.getElement(o2); 076 return -weighto1.compareTo(weighto2); 077 } 078 }); 079 for (int i = 0; i < wordWeights.getDimensionality(); i++) { 080 queue.add(i); 081 } 082 this.indexes = new int[size]; 083 int i = 0; 084 while(!queue.isEmpty()){ 085 this.indexes[i++] = queue.pollTail(); 086 } 087 } 088 089 090 @Override 091 public Iterator<String> iterator() { 092 return new Iterator<String>() { 093 int index = 0; 094 @Override 095 public void remove() { 096 throw new UnsupportedOperationException(); 097 } 098 099 @Override 100 public String next() { 101 return words.inverse().get(indexes[index++]); 102 } 103 104 @Override 105 public boolean hasNext() { 106 return index<indexes.length; 107 } 108 }; 109 } 110 111 @Override 112 public String toString() { 113 String ret = "["; 114 for (String word : this) { 115 double wordWeight = this.wordWeights.getElement(this.words.get(word)); 116 ret += String.format("%s (%1.4f)",word,wordWeight) + ", "; 117 } 118 ret += "]"; 119 return ret; 120 } 121 122}