001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.workinprogress; 031 032import java.io.File; 033import java.io.IOException; 034 035import org.openimaj.feature.FloatFVComparison; 036import org.openimaj.image.DisplayUtilities; 037import org.openimaj.image.FImage; 038import org.openimaj.image.Image; 039import org.openimaj.image.MBFImage; 040import org.openimaj.image.processor.SinglebandImageProcessor; 041import org.openimaj.util.array.ArrayUtils; 042import org.openimaj.video.Video; 043import org.openimaj.video.processor.VideoProcessor; 044import org.openimaj.video.xuggle.XuggleVideo; 045 046public class AdaptiveMoGBackgroundEstimator<IMAGE extends Image<?, IMAGE> & SinglebandImageProcessor.Processable<Float, FImage, IMAGE>> 047extends 048VideoProcessor<IMAGE> 049{ 050 /** 051 * The mixture means [y][x][gaussian][band] 052 */ 053 float[][][][] mu; 054 055 /** 056 * The mixture standard deviations (Gaussians are spherical) 057 * [y][x][gaussian] 058 */ 059 float[][][] sigma; 060 061 /** 062 * The mixture weights [y][x][gaussian] 063 */ 064 float[][][] weights; 065 066 /** 067 * Number of dimensions 068 */ 069 int n; 070 071 /** 072 * Number of Gaussians per mixture 073 */ 074 int K = 3; 075 076 /** 077 * Learning rate 078 */ 079 float alpha = 0.005f; 080 081 /** 082 * Initial (low) weight for new Gaussians 083 */ 084 float initialWeight = 0.05f; 085 086 /** 087 * Initial (high) standard deviation for new Gaussians 088 */ 089 float initialSigma = 30f / 255f; 090 091 /** 092 * Number of standard deviations for a pixel to be considered a match 093 */ 094 float matchThreshold = 2.5f; 095 096 private float T = 0.7f; 097 098 /** 099 * The segmentation mask 100 */ 101 private float[][] mask; 102 103 public AdaptiveMoGBackgroundEstimator(Video<IMAGE> xv) { 104 super(xv); 105 } 106 107 private float density(float[] pixel, float[] pkmu, float pksigma) { 108 if (pksigma == 0) 109 return 0; 110 111 final double norm = 1 / Math.sqrt(Math.pow(2 * Math.PI, n) * pksigma * pksigma); 112 double exp = 0; 113 for (int i = 0; i < n; i++) { 114 final float diff = pixel[i] - pkmu[i]; 115 exp += diff * diff / (pksigma * pksigma); 116 } 117 118 return (float) (Math.exp(-0.5 * exp) * norm); 119 } 120 121 void updateModel(float[][][] tImg) { 122 final int height = tImg[0].length; 123 final int width = tImg[0][0].length; 124 125 if (n != tImg.length || height != mu.length || width != mu[0].length) 126 initialiseModel(width, height, tImg.length); 127 128 for (int y = 0; y < height; y++) { 129 for (int x = 0; x < width; x++) { 130 // final float[] pixel = tImg[y][x]; 131 final float[] pixel = new float[n]; 132 for (int i = 0; i < n; i++) 133 pixel[i] = tImg[i][y][x]; 134 135 final float[][] means = mu[y][x]; 136 final float[] stddev = sigma[y][x]; 137 final float[] weight = weights[y][x]; 138 139 int match = -1; 140 for (int i = 0; i < K; i++) { 141 final double distance = FloatFVComparison.EUCLIDEAN.compare(means[i], pixel); 142 143 if (distance < stddev[i] * matchThreshold) { 144 match = i; 145 break; 146 } 147 } 148 149 if (match >= 0) { 150 // update matched distribution 151 final float p = alpha * density(pixel, means[match], stddev[match]); 152 153 float dot = 0; 154 for (int i = 0; i < n; i++) { 155 means[match][i] = (1 - p) * means[match][i] + p * pixel[i]; 156 157 dot += (pixel[i] - means[match][i]) * (pixel[i] - means[match][i]); 158 } 159 160 stddev[match] = (float) Math.sqrt((1 - p) * stddev[match] * stddev[match] + p * dot); 161 } else { 162 // find least-probable gaussian 163 float minProb = density(pixel, means[0], stddev[0]); 164 int minProbIdx = 0; 165 for (int i = 1; i < K; i++) { 166 final float prob = density(pixel, means[i], stddev[i]); 167 if (prob < minProb) { 168 minProb = prob; 169 minProbIdx = i; 170 } 171 } 172 173 // init new gaussian: 174 means[minProbIdx] = pixel.clone(); 175 stddev[minProbIdx] = initialSigma; 176 weight[minProbIdx] = initialWeight; 177 match = minProbIdx; 178 } 179 180 // update weights 181 float weightsum = 0; 182 for (int i = 0; i < K; i++) { 183 weight[i] = (1 - alpha) * weight[i]; 184 if (i == match) { 185 weight[i] += alpha; 186 } 187 weightsum += weight[i]; 188 } 189 190 // renormalise weights 191 ArrayUtils.divide(weight, weightsum); 192 193 // compute scores 194 final float[] scores = new float[K]; 195 for (int i = 0; i < K; i++) { 196 if (stddev[i] == 0) 197 scores[i] = 0; 198 else 199 scores[i] = weight[i] / stddev[i]; 200 } 201 202 final int[] indices = ArrayUtils.indexSort(scores); 203 204 float c = 0; 205 boolean found = false; 206 for (int i = indices.length - 1; i >= 0; i--) { 207 c += weight[indices[i]]; 208 if (match == indices[i]) { 209 found = true; 210 break; 211 } 212 if (c > T) { 213 break; 214 } 215 } 216 mask[y][x] = found ? 1 : 0; 217 } 218 } 219 } 220 221 /** 222 * Initialise the internal state of the model. This does not need to be 223 * called manually (the first call to {@link #updateModel(Image)} will call 224 * it automatically), however, it is public to allow the model to be reset. 225 * 226 * @param width 227 * the frame width 228 * @param height 229 * the frame height 230 * @param numBands 231 * the number of image bands 232 */ 233 public void initialiseModel(int width, int height, int numBands) { 234 this.n = numBands; 235 this.mu = new float[height][width][K][numBands]; 236 this.sigma = new float[height][width][K]; 237 this.weights = new float[height][width][K]; 238 this.mask = new float[height][width]; 239 240 // for (int y=0; y<height; y++) 241 // for (int x=0; x<width; x++) 242 // for (int k=0; k<K; k++) 243 // sigma[y][x][k] = 0f; 244 } 245 246 public void updateModel(IMAGE frame) { 247 if (frame instanceof MBFImage) { 248 final float[][][] data = new float[((MBFImage) frame).numBands()][][]; 249 250 for (int i = 0; i < data.length; i++) 251 data[i] = ((MBFImage) frame).getBand(i).pixels; 252 253 updateModel(data); 254 } else if (frame instanceof FImage) { 255 final float[][][] data = new float[1][][]; 256 data[0] = ((FImage) frame).pixels; 257 258 updateModel(data); 259 } else { 260 throw new UnsupportedOperationException("Only FImage and MBFImage are supported"); 261 } 262 } 263 264 @Override 265 public IMAGE processFrame(IMAGE frame) { 266 if (frame instanceof MBFImage) { 267 final float[][][] data = new float[((MBFImage) frame).numBands()][][]; 268 269 for (int i = 0; i < data.length; i++) 270 data[i] = ((MBFImage) frame).getBand(i).pixels; 271 272 updateModel(data); 273 } else if (frame instanceof FImage) { 274 final float[][][] data = new float[1][][]; 275 data[0] = ((FImage) frame).pixels; 276 277 updateModel(data); 278 } else { 279 throw new UnsupportedOperationException("Only FImage and MBFImage are supported"); 280 } 281 282 final FImage tmp = new FImage(mask); 283 if (frame instanceof FImage) { 284 ((FImage) frame).internalAssign(tmp); 285 } else { 286 // ((MBFImage) frame).drawImage(tmp.toRGB(), 0, 0); 287 ((MBFImage) frame).multiplyInplace(tmp.inverse().toRGB()); 288 } 289 290 return frame; 291 } 292 293 public static void main(String[] args) throws IOException { 294 // final XuggleVideo xv = new XuggleVideo(new 295 // File("/Users/jon/Desktop/merlin/tunnel480.mov")); 296 final XuggleVideo xv = new XuggleVideo(new File("/Users/jon/Downloads/ewap_dataset/seq_hotel/seq_hotel.avi")); 297 298 final AdaptiveMoGBackgroundEstimator<MBFImage> proc = new AdaptiveMoGBackgroundEstimator<MBFImage>(xv); 299 for (final MBFImage img : proc) { 300 DisplayUtilities.displayName(img, "video"); 301 } 302 } 303}