1 /**
2 * Copyright (c) 2011, The University of Southampton and the individual contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * * Neither the name of the University of Southampton nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 package org.openimaj.pgm.vb.lda.mle;
31
32 import org.openimaj.pgm.util.Document;
33 import org.openimaj.util.array.SparseIntArray.Entry;
34
35 /**
36 * The state of the E step of the MLE LDA
37 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
38 *
39 */
40 public class LDAVariationlState{
41 /**
42 * the n'th unique word in a document's probability for each topic
43 */
44 public double[][] phi;
45 /**
46 * Useful for calculating the sumphi for a given document
47 */
48 public double[] oldphi;
49 /**
50 * the dirichlet parameter for the topic multinomials
51 */
52 public double[] varGamma;
53
54 /**
55 * The liklihood of the current topic sufficient statistics and variational parameters
56 */
57 public double likelihood;
58 /**
59 * The old liklihood
60 */
61 public double oldLikelihood;
62 /**
63 * The current LDAModel (i.e. the current sufficient statistics
64 */
65 public LDAModel state;
66 /**
67 * Holds the first derivative of the gamma
68 */
69 public double[] digamma;
70
71
72 int iteration;
73
74
75 /**
76 * The variational state holds phi and gamma states as well as
77 * information for convergence of the E step.
78 * @param state
79 */
80 public LDAVariationlState(LDAModel state) {
81 this.oldphi = new double[state.ntopics];
82 this.varGamma = new double[state.ntopics];
83 this.digamma = new double[state.ntopics];
84 this.state = state;
85 }
86
87 /**
88 * initialises the phi and sets everything to 0
89 * @param doc
90 */
91 public void prepare(Document doc){
92 this.phi = new double[doc.countUniqueWords()][state.ntopics];
93 likelihood = 0;
94 oldLikelihood = Double.NEGATIVE_INFINITY;
95 for (int topici = 0; topici < phi.length; topici++) {
96 varGamma[topici] = this.state.alpha;
97 digamma[topici] = 0; // used to calculate likelihood
98 int wordi = 0;
99 for (Entry wordCount : doc.getVector().entries()) {
100 phi[wordi][topici] = 1f/this.state.ntopics;
101 varGamma[topici] += (double)wordCount.value / this.state.ntopics;
102 wordi++;
103 }
104 }
105 this.iteration = 0;
106 }
107 }