View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.pgm.vb.lda.mle;
31  
32  import org.openimaj.pgm.util.Document;
33  import org.openimaj.util.array.SparseIntArray.Entry;
34  
35  /**
36   * The state of the E step of the MLE LDA
37   * @author Sina Samangooei (ss@ecs.soton.ac.uk)
38   *
39   */
40  public class LDAVariationlState{
41  	/**
42  	 * the n'th unique word in a document's probability for each topic
43  	 */
44  	public double[][] phi;
45  	/**
46  	 * Useful for calculating the sumphi for a given document
47  	 */
48  	public double[] oldphi;
49  	/**
50  	 * the dirichlet parameter for the topic multinomials
51  	 */
52  	public double[] varGamma;
53  	
54  	/**
55  	 * The liklihood of the current topic sufficient statistics and variational parameters
56  	 */
57  	public double likelihood; 
58  	/**
59  	 * The old liklihood
60  	 */
61  	public double oldLikelihood;
62  	/**
63  	 * The current LDAModel (i.e. the current sufficient statistics
64  	 */
65  	public LDAModel state;
66  	/**
67  	 * Holds the first derivative of the gamma 
68  	 */
69  	public double[] digamma;
70  	
71  	
72  	int iteration;
73  	
74  
75  	/**
76  	 * The variational state holds phi and gamma states as well as 
77  	 * information for convergence of the E step.
78  	 * @param state
79  	 */
80  	public LDAVariationlState(LDAModel state) {
81  		this.oldphi = new double[state.ntopics];
82  		this.varGamma = new double[state.ntopics];
83  		this.digamma = new double[state.ntopics];
84  		this.state = state;
85  	}
86  
87  	/**
88  	 * initialises the phi and sets everything to 0
89  	 * @param doc
90  	 */
91  	public void prepare(Document doc){
92  		this.phi = new double[doc.countUniqueWords()][state.ntopics];
93  		likelihood = 0;
94  		oldLikelihood = Double.NEGATIVE_INFINITY;
95  		for (int topici = 0; topici < phi.length; topici++) {
96  			varGamma[topici] = this.state.alpha;
97  			digamma[topici] = 0; // used to calculate likelihood
98  			int wordi = 0;
99  			for (Entry wordCount : doc.getVector().entries()) {
100 				phi[wordi][topici] = 1f/this.state.ntopics;
101 				varGamma[topici] += (double)wordCount.value / this.state.ntopics;
102 				wordi++;
103 			}
104 		}
105 		this.iteration = 0;
106 	}
107 }