1 /** 2 * Copyright (c) 2011, The University of Southampton and the individual contributors. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without modification, 6 * are permitted provided that the following conditions are met: 7 * 8 * * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * * Neither the name of the University of Southampton nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 package org.openimaj.pgm.vb.lda.mle; 31 32 import org.openimaj.pgm.util.Document; 33 import org.openimaj.util.array.SparseIntArray.Entry; 34 35 /** 36 * The state of the E step of the MLE LDA 37 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 38 * 39 */ 40 public class LDAVariationlState{ 41 /** 42 * the n'th unique word in a document's probability for each topic 43 */ 44 public double[][] phi; 45 /** 46 * Useful for calculating the sumphi for a given document 47 */ 48 public double[] oldphi; 49 /** 50 * the dirichlet parameter for the topic multinomials 51 */ 52 public double[] varGamma; 53 54 /** 55 * The liklihood of the current topic sufficient statistics and variational parameters 56 */ 57 public double likelihood; 58 /** 59 * The old liklihood 60 */ 61 public double oldLikelihood; 62 /** 63 * The current LDAModel (i.e. the current sufficient statistics 64 */ 65 public LDAModel state; 66 /** 67 * Holds the first derivative of the gamma 68 */ 69 public double[] digamma; 70 71 72 int iteration; 73 74 75 /** 76 * The variational state holds phi and gamma states as well as 77 * information for convergence of the E step. 78 * @param state 79 */ 80 public LDAVariationlState(LDAModel state) { 81 this.oldphi = new double[state.ntopics]; 82 this.varGamma = new double[state.ntopics]; 83 this.digamma = new double[state.ntopics]; 84 this.state = state; 85 } 86 87 /** 88 * initialises the phi and sets everything to 0 89 * @param doc 90 */ 91 public void prepare(Document doc){ 92 this.phi = new double[doc.countUniqueWords()][state.ntopics]; 93 likelihood = 0; 94 oldLikelihood = Double.NEGATIVE_INFINITY; 95 for (int topici = 0; topici < phi.length; topici++) { 96 varGamma[topici] = this.state.alpha; 97 digamma[topici] = 0; // used to calculate likelihood 98 int wordi = 0; 99 for (Entry wordCount : doc.getVector().entries()) { 100 phi[wordi][topici] = 1f/this.state.ntopics; 101 varGamma[topici] += (double)wordCount.value / this.state.ntopics; 102 wordi++; 103 } 104 } 105 this.iteration = 0; 106 } 107 }