1 /**
2 * Copyright (c) 2011, The University of Southampton and the individual contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * * Neither the name of the University of Southampton nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 package org.openimaj.pgm.vb.lda.mle;
31
32 import org.openimaj.pgm.util.Corpus;
33
34 import cern.jet.random.engine.MersenneTwister;
35
36 /**
37 * Initialisation strategies for the beta matrix in the maximum liklihood LDA.
38 * Specifically implementors are expected to initialise the sufficient statistics
39 * of beta (i.e. topicWord and topicTotal s.t. Beta_ij = topicWord_ij / topicTotal_i
40 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
41 *
42 */
43 public interface LDABetaInitStrategy{
44 /**
45 * Given a model and the corpus initialise the model's sufficient statistics
46 * @param model
47 * @param corpus
48 */
49 public void initModel(LDAModel model, Corpus corpus);
50
51 /**
52 * initialises beta randomly s.t. each each topicWord >= 1 and < 2
53 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
54 *
55 */
56 public static class RandomBetaInit implements LDABetaInitStrategy{
57 private MersenneTwister random;
58
59 /**
60 * unseeded random
61 */
62 public RandomBetaInit() {
63 random = new MersenneTwister();
64 }
65
66 /**
67 * seeded random
68 * @param seed
69 */
70 public RandomBetaInit(int seed) {
71 random = new MersenneTwister(seed);
72 }
73 @Override
74 public void initModel(LDAModel model, Corpus corpus) {
75 for (int topicIndex = 0; topicIndex < model.ntopics; topicIndex++) {
76 for (int wordIndex = 0; wordIndex < corpus.vocabularySize(); wordIndex++) {
77 double topicWord = 1 + random.nextDouble();
78 model.incTopicWord(topicIndex,wordIndex,topicWord);
79 model.incTopicTotal(topicIndex, topicWord);
80 }
81 }
82 }
83 }
84 }