001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.pgm.vb.lda.mle; 031 032import org.openimaj.pgm.util.Corpus; 033 034/** 035 * Holds the sufficient statistics for a maximum liklihood LDA 036 * as well as a single value for alpha (the parameter of the topic 037 * dirichlet prior) 038 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 039 * 040 */ 041public class LDAModel{ 042 /** 043 * the dirichelet perameter for every dimension of the topic multinomial prior 044 */ 045 public double alpha; 046 /** 047 * The maximum likelihood sufficient statistics for estimation of Beta. 048 * This is the number of times a given word is in a given topic 049 */ 050 public double[][] topicWord; 051 /** 052 * The maximum likelihood sufficient statistics for estimation of Beta. 053 * This is the number of words total in a given topic 054 */ 055 public double[] topicTotal; 056 /** 057 * number of topics 058 */ 059 public int ntopics; 060 int iteration; 061 double likelihood,oldLikelihood; 062 063 /** 064 * @param ntopics the number of topics in this LDA model 065 */ 066 public LDAModel(int ntopics) { 067 this.ntopics = ntopics; 068 } 069 070 /** 071 * initialises the sufficient statistic holder based on ntopics and 072 * the {@link Corpus#vocabularySize()}. Alpha remains at 0 073 * @param corpus 074 */ 075 public void prepare(Corpus corpus) { 076 this.topicWord = new double[ntopics][corpus.vocabularySize()]; 077 this.topicTotal = new double[ntopics]; 078 this.alpha = 0; 079 this.iteration = 0; 080 this.likelihood = 0; 081 this.oldLikelihood = Double.NEGATIVE_INFINITY; 082 } 083 084 /** 085 * initialises the sufficient statistic holder based on ntopics and 086 * the vocabularySize. Alpha remains at 0 087 * @param vocabularySize 088 */ 089 public void prepare(int vocabularySize) { 090 this.topicWord = new double[ntopics][vocabularySize]; 091 this.topicTotal = new double[ntopics]; 092 this.alpha = 0; 093 } 094 095 /** 096 * Increment a topic and word index by d. The totals are left untouched 097 * @param topicIndex 098 * @param wordIndex 099 * @param d 100 */ 101 public void incTopicWord(int topicIndex, int wordIndex, double d) { 102 this.topicWord[topicIndex][wordIndex] += d; 103 } 104 105 /** 106 * Increment a topic and word index by d. The totals are left untouched 107 * @param topicIndex 108 * @param d 109 */ 110 public void incTopicTotal(int topicIndex, double d) { 111 this.topicTotal[topicIndex] += d; 112 } 113 114 /** 115 * @param initialAlpha the alpha parameter for the topic multinomial dirichelet prior 116 */ 117 public void setAlpha(double initialAlpha) { 118 this.alpha = initialAlpha; 119 } 120 121 /** 122 * This method also swaps the likelihoods (i.e. oldLikelihood == likelihood, likelhood = 0) 123 * @return a blank copy with unset alpha matching the current model's configuration 124 */ 125 public LDAModel newInstance() { 126 LDAModel ret = new LDAModel(ntopics); 127 ret.prepare(this.topicWord[0].length); 128 ret.iteration = this.iteration; 129 ret.likelihood = 0; 130 ret.oldLikelihood = this.likelihood; 131 return ret; 132 } 133}