1 /** 2 * Copyright (c) 2011, The University of Southampton and the individual contributors. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without modification, 6 * are permitted provided that the following conditions are met: 7 * 8 * * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * * Neither the name of the University of Southampton nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 package org.openimaj.pgm.vb.lda.mle; 31 32 import org.openimaj.pgm.util.Corpus; 33 34 /** 35 * Holds the sufficient statistics for a maximum liklihood LDA 36 * as well as a single value for alpha (the parameter of the topic 37 * dirichlet prior) 38 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 39 * 40 */ 41 public class LDAModel{ 42 /** 43 * the dirichelet perameter for every dimension of the topic multinomial prior 44 */ 45 public double alpha; 46 /** 47 * The maximum likelihood sufficient statistics for estimation of Beta. 48 * This is the number of times a given word is in a given topic 49 */ 50 public double[][] topicWord; 51 /** 52 * The maximum likelihood sufficient statistics for estimation of Beta. 53 * This is the number of words total in a given topic 54 */ 55 public double[] topicTotal; 56 /** 57 * number of topics 58 */ 59 public int ntopics; 60 int iteration; 61 double likelihood,oldLikelihood; 62 63 /** 64 * @param ntopics the number of topics in this LDA model 65 */ 66 public LDAModel(int ntopics) { 67 this.ntopics = ntopics; 68 } 69 70 /** 71 * initialises the sufficient statistic holder based on ntopics and 72 * the {@link Corpus#vocabularySize()}. Alpha remains at 0 73 * @param corpus 74 */ 75 public void prepare(Corpus corpus) { 76 this.topicWord = new double[ntopics][corpus.vocabularySize()]; 77 this.topicTotal = new double[ntopics]; 78 this.alpha = 0; 79 this.iteration = 0; 80 this.likelihood = 0; 81 this.oldLikelihood = Double.NEGATIVE_INFINITY; 82 } 83 84 /** 85 * initialises the sufficient statistic holder based on ntopics and 86 * the vocabularySize. Alpha remains at 0 87 * @param vocabularySize 88 */ 89 public void prepare(int vocabularySize) { 90 this.topicWord = new double[ntopics][vocabularySize]; 91 this.topicTotal = new double[ntopics]; 92 this.alpha = 0; 93 } 94 95 /** 96 * Increment a topic and word index by d. The totals are left untouched 97 * @param topicIndex 98 * @param wordIndex 99 * @param d 100 */ 101 public void incTopicWord(int topicIndex, int wordIndex, double d) { 102 this.topicWord[topicIndex][wordIndex] += d; 103 } 104 105 /** 106 * Increment a topic and word index by d. The totals are left untouched 107 * @param topicIndex 108 * @param d 109 */ 110 public void incTopicTotal(int topicIndex, double d) { 111 this.topicTotal[topicIndex] += d; 112 } 113 114 /** 115 * @param initialAlpha the alpha parameter for the topic multinomial dirichelet prior 116 */ 117 public void setAlpha(double initialAlpha) { 118 this.alpha = initialAlpha; 119 } 120 121 /** 122 * This method also swaps the likelihoods (i.e. oldLikelihood == likelihood, likelhood = 0) 123 * @return a blank copy with unset alpha matching the current model's configuration 124 */ 125 public LDAModel newInstance() { 126 LDAModel ret = new LDAModel(ntopics); 127 ret.prepare(this.topicWord[0].length); 128 ret.iteration = this.iteration; 129 ret.likelihood = 0; 130 ret.oldLikelihood = this.likelihood; 131 return ret; 132 } 133 }