View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.workinprogress;
31  
32  import java.io.File;
33  import java.io.IOException;
34  
35  import org.openimaj.feature.FloatFVComparison;
36  import org.openimaj.image.DisplayUtilities;
37  import org.openimaj.image.FImage;
38  import org.openimaj.image.Image;
39  import org.openimaj.image.MBFImage;
40  import org.openimaj.image.processor.SinglebandImageProcessor;
41  import org.openimaj.util.array.ArrayUtils;
42  import org.openimaj.video.Video;
43  import org.openimaj.video.processor.VideoProcessor;
44  import org.openimaj.video.xuggle.XuggleVideo;
45  
46  public class AdaptiveMoGBackgroundEstimator<IMAGE extends Image<?, IMAGE> & SinglebandImageProcessor.Processable<Float, FImage, IMAGE>>
47  extends
48  VideoProcessor<IMAGE>
49  {
50  	/**
51  	 * The mixture means [y][x][gaussian][band]
52  	 */
53  	float[][][][] mu;
54  
55  	/**
56  	 * The mixture standard deviations (Gaussians are spherical)
57  	 * [y][x][gaussian]
58  	 */
59  	float[][][] sigma;
60  
61  	/**
62  	 * The mixture weights [y][x][gaussian]
63  	 */
64  	float[][][] weights;
65  
66  	/**
67  	 * Number of dimensions
68  	 */
69  	int n;
70  
71  	/**
72  	 * Number of Gaussians per mixture
73  	 */
74  	int K = 3;
75  
76  	/**
77  	 * Learning rate
78  	 */
79  	float alpha = 0.005f;
80  
81  	/**
82  	 * Initial (low) weight for new Gaussians
83  	 */
84  	float initialWeight = 0.05f;
85  
86  	/**
87  	 * Initial (high) standard deviation for new Gaussians
88  	 */
89  	float initialSigma = 30f / 255f;
90  
91  	/**
92  	 * Number of standard deviations for a pixel to be considered a match
93  	 */
94  	float matchThreshold = 2.5f;
95  
96  	private float T = 0.7f;
97  
98  	/**
99  	 * The segmentation mask
100 	 */
101 	private float[][] mask;
102 
103 	public AdaptiveMoGBackgroundEstimator(Video<IMAGE> xv) {
104 		super(xv);
105 	}
106 
107 	private float density(float[] pixel, float[] pkmu, float pksigma) {
108 		if (pksigma == 0)
109 			return 0;
110 
111 		final double norm = 1 / Math.sqrt(Math.pow(2 * Math.PI, n) * pksigma * pksigma);
112 		double exp = 0;
113 		for (int i = 0; i < n; i++) {
114 			final float diff = pixel[i] - pkmu[i];
115 			exp += diff * diff / (pksigma * pksigma);
116 		}
117 
118 		return (float) (Math.exp(-0.5 * exp) * norm);
119 	}
120 
121 	void updateModel(float[][][] tImg) {
122 		final int height = tImg[0].length;
123 		final int width = tImg[0][0].length;
124 
125 		if (n != tImg.length || height != mu.length || width != mu[0].length)
126 			initialiseModel(width, height, tImg.length);
127 
128 		for (int y = 0; y < height; y++) {
129 			for (int x = 0; x < width; x++) {
130 				// final float[] pixel = tImg[y][x];
131 				final float[] pixel = new float[n];
132 				for (int i = 0; i < n; i++)
133 					pixel[i] = tImg[i][y][x];
134 
135 				final float[][] means = mu[y][x];
136 				final float[] stddev = sigma[y][x];
137 				final float[] weight = weights[y][x];
138 
139 				int match = -1;
140 				for (int i = 0; i < K; i++) {
141 					final double distance = FloatFVComparison.EUCLIDEAN.compare(means[i], pixel);
142 
143 					if (distance < stddev[i] * matchThreshold) {
144 						match = i;
145 						break;
146 					}
147 				}
148 
149 				if (match >= 0) {
150 					// update matched distribution
151 					final float p = alpha * density(pixel, means[match], stddev[match]);
152 
153 					float dot = 0;
154 					for (int i = 0; i < n; i++) {
155 						means[match][i] = (1 - p) * means[match][i] + p * pixel[i];
156 
157 						dot += (pixel[i] - means[match][i]) * (pixel[i] - means[match][i]);
158 					}
159 
160 					stddev[match] = (float) Math.sqrt((1 - p) * stddev[match] * stddev[match] + p * dot);
161 				} else {
162 					// find least-probable gaussian
163 					float minProb = density(pixel, means[0], stddev[0]);
164 					int minProbIdx = 0;
165 					for (int i = 1; i < K; i++) {
166 						final float prob = density(pixel, means[i], stddev[i]);
167 						if (prob < minProb) {
168 							minProb = prob;
169 							minProbIdx = i;
170 						}
171 					}
172 
173 					// init new gaussian:
174 					means[minProbIdx] = pixel.clone();
175 					stddev[minProbIdx] = initialSigma;
176 					weight[minProbIdx] = initialWeight;
177 					match = minProbIdx;
178 				}
179 
180 				// update weights
181 				float weightsum = 0;
182 				for (int i = 0; i < K; i++) {
183 					weight[i] = (1 - alpha) * weight[i];
184 					if (i == match) {
185 						weight[i] += alpha;
186 					}
187 					weightsum += weight[i];
188 				}
189 
190 				// renormalise weights
191 				ArrayUtils.divide(weight, weightsum);
192 
193 				// compute scores
194 				final float[] scores = new float[K];
195 				for (int i = 0; i < K; i++) {
196 					if (stddev[i] == 0)
197 						scores[i] = 0;
198 					else
199 						scores[i] = weight[i] / stddev[i];
200 				}
201 
202 				final int[] indices = ArrayUtils.indexSort(scores);
203 
204 				float c = 0;
205 				boolean found = false;
206 				for (int i = indices.length - 1; i >= 0; i--) {
207 					c += weight[indices[i]];
208 					if (match == indices[i]) {
209 						found = true;
210 						break;
211 					}
212 					if (c > T) {
213 						break;
214 					}
215 				}
216 				mask[y][x] = found ? 1 : 0;
217 			}
218 		}
219 	}
220 
221 	/**
222 	 * Initialise the internal state of the model. This does not need to be
223 	 * called manually (the first call to {@link #updateModel(Image)} will call
224 	 * it automatically), however, it is public to allow the model to be reset.
225 	 *
226 	 * @param width
227 	 *            the frame width
228 	 * @param height
229 	 *            the frame height
230 	 * @param numBands
231 	 *            the number of image bands
232 	 */
233 	public void initialiseModel(int width, int height, int numBands) {
234 		this.n = numBands;
235 		this.mu = new float[height][width][K][numBands];
236 		this.sigma = new float[height][width][K];
237 		this.weights = new float[height][width][K];
238 		this.mask = new float[height][width];
239 
240 		// for (int y=0; y<height; y++)
241 		// for (int x=0; x<width; x++)
242 		// for (int k=0; k<K; k++)
243 		// sigma[y][x][k] = 0f;
244 	}
245 
246 	public void updateModel(IMAGE frame) {
247 		if (frame instanceof MBFImage) {
248 			final float[][][] data = new float[((MBFImage) frame).numBands()][][];
249 
250 			for (int i = 0; i < data.length; i++)
251 				data[i] = ((MBFImage) frame).getBand(i).pixels;
252 
253 			updateModel(data);
254 		} else if (frame instanceof FImage) {
255 			final float[][][] data = new float[1][][];
256 			data[0] = ((FImage) frame).pixels;
257 
258 			updateModel(data);
259 		} else {
260 			throw new UnsupportedOperationException("Only FImage and MBFImage are supported");
261 		}
262 	}
263 
264 	@Override
265 	public IMAGE processFrame(IMAGE frame) {
266 		if (frame instanceof MBFImage) {
267 			final float[][][] data = new float[((MBFImage) frame).numBands()][][];
268 
269 			for (int i = 0; i < data.length; i++)
270 				data[i] = ((MBFImage) frame).getBand(i).pixels;
271 
272 			updateModel(data);
273 		} else if (frame instanceof FImage) {
274 			final float[][][] data = new float[1][][];
275 			data[0] = ((FImage) frame).pixels;
276 
277 			updateModel(data);
278 		} else {
279 			throw new UnsupportedOperationException("Only FImage and MBFImage are supported");
280 		}
281 
282 		final FImage tmp = new FImage(mask);
283 		if (frame instanceof FImage) {
284 			((FImage) frame).internalAssign(tmp);
285 		} else {
286 			// ((MBFImage) frame).drawImage(tmp.toRGB(), 0, 0);
287 			((MBFImage) frame).multiplyInplace(tmp.inverse().toRGB());
288 		}
289 
290 		return frame;
291 	}
292 
293 	public static void main(String[] args) throws IOException {
294 		// final XuggleVideo xv = new XuggleVideo(new
295 		// File("/Users/jon/Desktop/merlin/tunnel480.mov"));
296 		final XuggleVideo xv = new XuggleVideo(new File("/Users/jon/Downloads/ewap_dataset/seq_hotel/seq_hotel.avi"));
297 
298 		final AdaptiveMoGBackgroundEstimator<MBFImage> proc = new AdaptiveMoGBackgroundEstimator<MBFImage>(xv);
299 		for (final MBFImage img : proc) {
300 			DisplayUtilities.displayName(img, "video");
301 		}
302 	}
303 }