001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.math.statistics.distribution;
031
032import java.util.Arrays;
033import java.util.Random;
034
035import org.openimaj.math.matrix.MatrixUtils;
036import org.openimaj.util.array.ArrayUtils;
037import org.openimaj.util.pair.IndependentPair;
038
039import Jama.CholeskyDecomposition;
040import Jama.Matrix;
041
042/**
043 * Implementation of a mixture of gaussians (gaussian mixture model). This class
044 * only models the distribution itself, and does not contain the necessary code
045 * to learn a mixture (for that see the <code>GaussianMixtureModelEM</code>
046 * class for example).
047 * 
048 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
049 */
050public class MixtureOfGaussians extends AbstractMultivariateDistribution {
051        /**
052         * Amount to add to the diagonal of the covariance matrix if it's
053         * ill-conditioned.
054         */
055        public static final double MIN_COVAR_RECONDITION = 1.0e-7;
056
057        /**
058         * The individual gaussians
059         */
060        public MultivariateGaussian[] gaussians;
061
062        /**
063         * The weight of each gaussian
064         */
065        public double[] weights;
066
067        /**
068         * Construct the mixture with the given gaussians and weights
069         * 
070         * @param gaussians
071         *            the gaussians
072         * @param weights
073         *            the weights
074         */
075        public MixtureOfGaussians(MultivariateGaussian[] gaussians, double[] weights) {
076                this.gaussians = gaussians;
077                this.weights = weights;
078        }
079
080        @Override
081        public double[] sample(Random rng) {
082                return sample(1, rng)[0];
083        }
084
085        @Override
086        public double[][] sample(int n_samples, Random rng) {
087                final double[] weight_cdf = ArrayUtils.cumulativeSum(this.weights);
088                final double[][] X = new double[n_samples][this.gaussians[0].getMean().getColumnDimension()];
089
090                // decide which component to use for each sample
091                final int[] comps = new int[n_samples];
092                for (int i = 0; i < n_samples; i++) {
093                        comps[i] = Arrays.binarySearch(weight_cdf, rng.nextDouble());
094                        if (comps[i] < 0)
095                                comps[i] = 0;
096                        if (comps[i] >= gaussians.length)
097                                comps[i] = gaussians.length - 1;
098                }
099
100                // generate samples for each component... doing it this way is more
101                // efficient as it minimises the number of times you'd need to do the
102                // cholesky decomposition
103                for (int i = 0; i < gaussians.length; i++) {
104                        final int[] idxs = ArrayUtils.search(comps, i);
105
106                        if (idxs.length == 0)
107                                continue;
108
109                        // generate the sample
110                        final double[][] samples = gaussians[i].sample(idxs.length, rng);
111
112                        for (int j = 0; j < samples.length; j++) {
113                                X[idxs[j]] = samples[j];
114                        }
115                }
116
117                return X;
118        }
119
120        @Override
121        public double estimateLogProbability(double[] sample) {
122                return estimateLogProbability(new double[][] { sample })[0];
123        }
124
125        /**
126         * Get the probability for a given points in space relative to the PDF
127         * represented by the gaussian mixture.
128         * 
129         * @param samples
130         *            the points
131         * @return the probability
132         */
133        @Override
134        public double[] estimateLogProbability(double[][] samples) {
135                if (samples[0].length != this.gaussians[0].getMean().getColumnDimension()) {
136                        throw new IllegalArgumentException(
137                                        "The number of dimensions of the given data is not compatible with the model");
138                }
139
140                final double[][] lpr = computeWeightedLogProb(samples);
141
142                final double[] logprob = new double[samples.length];
143                for (int i = 0; i < samples.length; i++) {
144                        for (int j = 0; j < lpr[0].length; j++) {
145                                logprob[i] += Math.exp(lpr[i][j]);
146                        }
147                        logprob[i] = Math.log(logprob[i]);
148                }
149
150                return logprob;
151        }
152
153        /**
154         * Compute the log probability of the given data points belonging to each of
155         * the given gaussians
156         * 
157         * @param x
158         *            the points
159         * @param gaussians
160         *            the gaussians
161         * @return the log probability of each point belonging to each gaussian
162         *         distribution
163         */
164        public static double[][] logProbability(double[][] x, MultivariateGaussian[] gaussians) {
165                final int ndims = x[0].length;
166                final int nmix = gaussians.length;
167                final int nsamples = x.length;
168                final Matrix X = new Matrix(x);
169
170                final double[][] log_prob = new double[nsamples][nmix];
171                for (int i = 0; i < nmix; i++) {
172                        final Matrix mu = gaussians[i].getMean();
173                        final Matrix cv = gaussians[i].getCovariance();
174
175                        final CholeskyDecomposition chol = cv.chol();
176                        Matrix cv_chol;
177                        if (chol.isSPD()) {
178                                cv_chol = chol.getL();
179                        } else {
180                                // covar probably doesn't have enough samples, so
181                                // recondition it
182                                final Matrix m = cv.plus(Matrix.identity(ndims, ndims).timesEquals(MIN_COVAR_RECONDITION));
183                                cv_chol = m.chol().getL();
184                        }
185
186                        double cv_log_det = 0;
187                        final double[][] cv_chol_d = cv_chol.getArray();
188                        for (int j = 0; j < ndims; j++) {
189                                cv_log_det += Math.log(cv_chol_d[j][j]);
190                        }
191                        cv_log_det *= 2;
192
193                        final Matrix cv_sol = cv_chol.solve(
194                                        MatrixUtils.minusRow(X, mu.getArray()[0]).transpose()).transpose();
195                        for (int k = 0; k < nsamples; k++) {
196                                double sum = 0;
197                                for (int j = 0; j < ndims; j++) {
198                                        sum += cv_sol.get(k, j) * cv_sol.get(k, j);
199                                }
200
201                                log_prob[k][i] = -0.5 * (sum + cv_log_det + ndims * Math.log(2 * Math.PI));
202                        }
203
204                }
205
206                return log_prob;
207        }
208
209        protected double[][] computeWeightedLogProb(double[][] samples) {
210                final double[][] lpr = logProbability(samples);
211
212                for (int j = 0; j < lpr[0].length; j++) {
213                        final double logw = Math.log(this.weights[j]);
214
215                        for (int i = 0; i < lpr.length; i++) {
216                                lpr[i][j] += logw;
217                        }
218                }
219
220                return lpr;
221        }
222
223        /**
224         * Compute the log probability of the given data points belonging to each of
225         * the gaussians
226         * 
227         * @param x
228         *            the points
229         * @return the log probability of each point belonging to each gaussian
230         *         distribution
231         */
232        public double[][] logProbability(double[][] x) {
233                final int nmix = gaussians.length;
234                final int nsamples = x.length;
235
236                final double[][] log_prob = new double[nsamples][nmix];
237                for (int i = 0; i < nmix; i++) {
238                        final double[] lp = gaussians[i].estimateLogProbability(x);
239
240                        for (int j = 0; j < nsamples; j++) {
241                                log_prob[j][i] = lp[j];
242                        }
243                }
244
245                return log_prob;
246                // return logProbability(x, gaussians);
247        }
248
249        /**
250         * Predict the log-posterior for the given sample; this is the
251         * log-probability of the sample point belonging to each of the gaussians in
252         * the mixture.
253         * 
254         * @param sample
255         *            the sample
256         * @return the log-probability for each gaussian
257         */
258        public double[] predictLogPosterior(double[] sample) {
259                return predictLogPosterior(new double[][] { sample })[0];
260        }
261
262        /**
263         * Predict the log-posterior for the given samples; this is the
264         * log-probability of each sample point belonging to each of the gaussians
265         * in the mixture.
266         * 
267         * @param samples
268         *            the samples
269         * @return the log-probability for each gaussian
270         */
271        public double[][] predictLogPosterior(double[][] samples) {
272                if (samples[0].length != this.gaussians[0].getMean().getColumnDimension()) {
273                        throw new IllegalArgumentException(
274                                        "The number of dimensions of the given data is not compatible with the model");
275                }
276
277                final double[][] lpr = computeWeightedLogProb(samples);
278                final double[] logprob = logsumexp(lpr);
279
280                final double[][] responsibilities = new double[samples.length][gaussians.length];
281                for (int i = 0; i < samples.length; i++) {
282                        for (int j = 0; j < gaussians.length; j++) {
283                                responsibilities[i][j] = lpr[i][j] - logprob[i]; // note no exp
284                                                                                                                                        // as want
285                                                                                                                                        // log prob
286                        }
287                }
288
289                return responsibilities;
290        }
291
292        /**
293         * Compute the posterior distribution of the samples, and the overall log
294         * probability of each sample as belonging to the model.
295         * 
296         * @param samples
297         *            the samples
298         * @return a pair of (log probabilities, log posterior probabilities)
299         */
300        public IndependentPair<double[], double[][]> scoreSamples(double[][] samples) {
301                if (samples[0].length != this.gaussians[0].getMean().getColumnDimension()) {
302                        throw new IllegalArgumentException(
303                                        "The number of dimensions of the given data is not compatible with the model");
304                }
305
306                final double[][] lpr = computeWeightedLogProb(samples);
307
308                final double[] logprob = logsumexp(lpr);
309
310                final double[][] responsibilities = new double[samples.length][gaussians.length];
311                for (int i = 0; i < samples.length; i++) {
312                        for (int j = 0; j < gaussians.length; j++) {
313                                responsibilities[i][j] = Math.exp(lpr[i][j] - logprob[i]);
314                        }
315                }
316
317                return IndependentPair.pair(logprob, responsibilities);
318        }
319
320        private double[] logsumexp(double[][] data) {
321                final double[] lse = new double[data.length];
322                for (int i = 0; i < data.length; i++) {
323                        final double max = ArrayUtils.maxValue(data[i]);
324
325                        for (int j = 0; j < data[0].length; j++) {
326                                lse[i] += Math.exp(data[i][j] - max);
327                        }
328                        lse[i] = max + Math.log(lse[i]);
329                }
330                return lse;
331        }
332
333        /**
334         * Get the gaussians that make up the mixture
335         * 
336         * @return the gaussians
337         */
338        public MultivariateGaussian[] getGaussians() {
339                return gaussians;
340        }
341
342        /**
343         * Get the mixture weights for each gaussian.
344         * 
345         * @return the weights
346         */
347        public double[] getWeights() {
348                return weights;
349        }
350
351        @Override
352        public double estimateProbability(double[] sample) {
353                return Math.exp(estimateLogProbability(sample));
354        }
355
356        /**
357         * Predict the class (the index of the most-probable gaussian) to which the
358         * given data point belongs.
359         * 
360         * @param data
361         *            the data point
362         * @return the class index
363         */
364        public int predict(double[] data) {
365                final double[] posterior = predictLogPosterior(data);
366                return ArrayUtils.maxIndex(posterior);
367        }
368}