001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.math.statistics.distribution; 031 032import java.util.Arrays; 033import java.util.Random; 034 035import org.openimaj.math.matrix.MatrixUtils; 036import org.openimaj.util.array.ArrayUtils; 037import org.openimaj.util.pair.IndependentPair; 038 039import Jama.CholeskyDecomposition; 040import Jama.Matrix; 041 042/** 043 * Implementation of a mixture of gaussians (gaussian mixture model). This class 044 * only models the distribution itself, and does not contain the necessary code 045 * to learn a mixture (for that see the <code>GaussianMixtureModelEM</code> 046 * class for example). 047 * 048 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 049 */ 050public class MixtureOfGaussians extends AbstractMultivariateDistribution { 051 /** 052 * Amount to add to the diagonal of the covariance matrix if it's 053 * ill-conditioned. 054 */ 055 public static final double MIN_COVAR_RECONDITION = 1.0e-7; 056 057 /** 058 * The individual gaussians 059 */ 060 public MultivariateGaussian[] gaussians; 061 062 /** 063 * The weight of each gaussian 064 */ 065 public double[] weights; 066 067 /** 068 * Construct the mixture with the given gaussians and weights 069 * 070 * @param gaussians 071 * the gaussians 072 * @param weights 073 * the weights 074 */ 075 public MixtureOfGaussians(MultivariateGaussian[] gaussians, double[] weights) { 076 this.gaussians = gaussians; 077 this.weights = weights; 078 } 079 080 @Override 081 public double[] sample(Random rng) { 082 return sample(1, rng)[0]; 083 } 084 085 @Override 086 public double[][] sample(int n_samples, Random rng) { 087 final double[] weight_cdf = ArrayUtils.cumulativeSum(this.weights); 088 final double[][] X = new double[n_samples][this.gaussians[0].getMean().getColumnDimension()]; 089 090 // decide which component to use for each sample 091 final int[] comps = new int[n_samples]; 092 for (int i = 0; i < n_samples; i++) { 093 comps[i] = Arrays.binarySearch(weight_cdf, rng.nextDouble()); 094 if (comps[i] < 0) 095 comps[i] = 0; 096 if (comps[i] >= gaussians.length) 097 comps[i] = gaussians.length - 1; 098 } 099 100 // generate samples for each component... doing it this way is more 101 // efficient as it minimises the number of times you'd need to do the 102 // cholesky decomposition 103 for (int i = 0; i < gaussians.length; i++) { 104 final int[] idxs = ArrayUtils.search(comps, i); 105 106 if (idxs.length == 0) 107 continue; 108 109 // generate the sample 110 final double[][] samples = gaussians[i].sample(idxs.length, rng); 111 112 for (int j = 0; j < samples.length; j++) { 113 X[idxs[j]] = samples[j]; 114 } 115 } 116 117 return X; 118 } 119 120 @Override 121 public double estimateLogProbability(double[] sample) { 122 return estimateLogProbability(new double[][] { sample })[0]; 123 } 124 125 /** 126 * Get the probability for a given points in space relative to the PDF 127 * represented by the gaussian mixture. 128 * 129 * @param samples 130 * the points 131 * @return the probability 132 */ 133 @Override 134 public double[] estimateLogProbability(double[][] samples) { 135 if (samples[0].length != this.gaussians[0].getMean().getColumnDimension()) { 136 throw new IllegalArgumentException( 137 "The number of dimensions of the given data is not compatible with the model"); 138 } 139 140 final double[][] lpr = computeWeightedLogProb(samples); 141 142 final double[] logprob = new double[samples.length]; 143 for (int i = 0; i < samples.length; i++) { 144 for (int j = 0; j < lpr[0].length; j++) { 145 logprob[i] += Math.exp(lpr[i][j]); 146 } 147 logprob[i] = Math.log(logprob[i]); 148 } 149 150 return logprob; 151 } 152 153 /** 154 * Compute the log probability of the given data points belonging to each of 155 * the given gaussians 156 * 157 * @param x 158 * the points 159 * @param gaussians 160 * the gaussians 161 * @return the log probability of each point belonging to each gaussian 162 * distribution 163 */ 164 public static double[][] logProbability(double[][] x, MultivariateGaussian[] gaussians) { 165 final int ndims = x[0].length; 166 final int nmix = gaussians.length; 167 final int nsamples = x.length; 168 final Matrix X = new Matrix(x); 169 170 final double[][] log_prob = new double[nsamples][nmix]; 171 for (int i = 0; i < nmix; i++) { 172 final Matrix mu = gaussians[i].getMean(); 173 final Matrix cv = gaussians[i].getCovariance(); 174 175 final CholeskyDecomposition chol = cv.chol(); 176 Matrix cv_chol; 177 if (chol.isSPD()) { 178 cv_chol = chol.getL(); 179 } else { 180 // covar probably doesn't have enough samples, so 181 // recondition it 182 final Matrix m = cv.plus(Matrix.identity(ndims, ndims).timesEquals(MIN_COVAR_RECONDITION)); 183 cv_chol = m.chol().getL(); 184 } 185 186 double cv_log_det = 0; 187 final double[][] cv_chol_d = cv_chol.getArray(); 188 for (int j = 0; j < ndims; j++) { 189 cv_log_det += Math.log(cv_chol_d[j][j]); 190 } 191 cv_log_det *= 2; 192 193 final Matrix cv_sol = cv_chol.solve( 194 MatrixUtils.minusRow(X, mu.getArray()[0]).transpose()).transpose(); 195 for (int k = 0; k < nsamples; k++) { 196 double sum = 0; 197 for (int j = 0; j < ndims; j++) { 198 sum += cv_sol.get(k, j) * cv_sol.get(k, j); 199 } 200 201 log_prob[k][i] = -0.5 * (sum + cv_log_det + ndims * Math.log(2 * Math.PI)); 202 } 203 204 } 205 206 return log_prob; 207 } 208 209 protected double[][] computeWeightedLogProb(double[][] samples) { 210 final double[][] lpr = logProbability(samples); 211 212 for (int j = 0; j < lpr[0].length; j++) { 213 final double logw = Math.log(this.weights[j]); 214 215 for (int i = 0; i < lpr.length; i++) { 216 lpr[i][j] += logw; 217 } 218 } 219 220 return lpr; 221 } 222 223 /** 224 * Compute the log probability of the given data points belonging to each of 225 * the gaussians 226 * 227 * @param x 228 * the points 229 * @return the log probability of each point belonging to each gaussian 230 * distribution 231 */ 232 public double[][] logProbability(double[][] x) { 233 final int nmix = gaussians.length; 234 final int nsamples = x.length; 235 236 final double[][] log_prob = new double[nsamples][nmix]; 237 for (int i = 0; i < nmix; i++) { 238 final double[] lp = gaussians[i].estimateLogProbability(x); 239 240 for (int j = 0; j < nsamples; j++) { 241 log_prob[j][i] = lp[j]; 242 } 243 } 244 245 return log_prob; 246 // return logProbability(x, gaussians); 247 } 248 249 /** 250 * Predict the log-posterior for the given sample; this is the 251 * log-probability of the sample point belonging to each of the gaussians in 252 * the mixture. 253 * 254 * @param sample 255 * the sample 256 * @return the log-probability for each gaussian 257 */ 258 public double[] predictLogPosterior(double[] sample) { 259 return predictLogPosterior(new double[][] { sample })[0]; 260 } 261 262 /** 263 * Predict the log-posterior for the given samples; this is the 264 * log-probability of each sample point belonging to each of the gaussians 265 * in the mixture. 266 * 267 * @param samples 268 * the samples 269 * @return the log-probability for each gaussian 270 */ 271 public double[][] predictLogPosterior(double[][] samples) { 272 if (samples[0].length != this.gaussians[0].getMean().getColumnDimension()) { 273 throw new IllegalArgumentException( 274 "The number of dimensions of the given data is not compatible with the model"); 275 } 276 277 final double[][] lpr = computeWeightedLogProb(samples); 278 final double[] logprob = logsumexp(lpr); 279 280 final double[][] responsibilities = new double[samples.length][gaussians.length]; 281 for (int i = 0; i < samples.length; i++) { 282 for (int j = 0; j < gaussians.length; j++) { 283 responsibilities[i][j] = lpr[i][j] - logprob[i]; // note no exp 284 // as want 285 // log prob 286 } 287 } 288 289 return responsibilities; 290 } 291 292 /** 293 * Compute the posterior distribution of the samples, and the overall log 294 * probability of each sample as belonging to the model. 295 * 296 * @param samples 297 * the samples 298 * @return a pair of (log probabilities, log posterior probabilities) 299 */ 300 public IndependentPair<double[], double[][]> scoreSamples(double[][] samples) { 301 if (samples[0].length != this.gaussians[0].getMean().getColumnDimension()) { 302 throw new IllegalArgumentException( 303 "The number of dimensions of the given data is not compatible with the model"); 304 } 305 306 final double[][] lpr = computeWeightedLogProb(samples); 307 308 final double[] logprob = logsumexp(lpr); 309 310 final double[][] responsibilities = new double[samples.length][gaussians.length]; 311 for (int i = 0; i < samples.length; i++) { 312 for (int j = 0; j < gaussians.length; j++) { 313 responsibilities[i][j] = Math.exp(lpr[i][j] - logprob[i]); 314 } 315 } 316 317 return IndependentPair.pair(logprob, responsibilities); 318 } 319 320 private double[] logsumexp(double[][] data) { 321 final double[] lse = new double[data.length]; 322 for (int i = 0; i < data.length; i++) { 323 final double max = ArrayUtils.maxValue(data[i]); 324 325 for (int j = 0; j < data[0].length; j++) { 326 lse[i] += Math.exp(data[i][j] - max); 327 } 328 lse[i] = max + Math.log(lse[i]); 329 } 330 return lse; 331 } 332 333 /** 334 * Get the gaussians that make up the mixture 335 * 336 * @return the gaussians 337 */ 338 public MultivariateGaussian[] getGaussians() { 339 return gaussians; 340 } 341 342 /** 343 * Get the mixture weights for each gaussian. 344 * 345 * @return the weights 346 */ 347 public double[] getWeights() { 348 return weights; 349 } 350 351 @Override 352 public double estimateProbability(double[] sample) { 353 return Math.exp(estimateLogProbability(sample)); 354 } 355 356 /** 357 * Predict the class (the index of the most-probable gaussian) to which the 358 * given data point belongs. 359 * 360 * @param data 361 * the data point 362 * @return the class index 363 */ 364 public int predict(double[] data) { 365 final double[] posterior = predictLogPosterior(data); 366 return ArrayUtils.maxIndex(posterior); 367 } 368}