001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.workinprogress;
031
032import java.io.File;
033import java.io.IOException;
034
035import org.openimaj.feature.FloatFVComparison;
036import org.openimaj.image.DisplayUtilities;
037import org.openimaj.image.FImage;
038import org.openimaj.image.Image;
039import org.openimaj.image.MBFImage;
040import org.openimaj.image.processor.SinglebandImageProcessor;
041import org.openimaj.util.array.ArrayUtils;
042import org.openimaj.video.Video;
043import org.openimaj.video.processor.VideoProcessor;
044import org.openimaj.video.xuggle.XuggleVideo;
045
046public class AdaptiveMoGBackgroundEstimator<IMAGE extends Image<?, IMAGE> & SinglebandImageProcessor.Processable<Float, FImage, IMAGE>>
047extends
048VideoProcessor<IMAGE>
049{
050        /**
051         * The mixture means [y][x][gaussian][band]
052         */
053        float[][][][] mu;
054
055        /**
056         * The mixture standard deviations (Gaussians are spherical)
057         * [y][x][gaussian]
058         */
059        float[][][] sigma;
060
061        /**
062         * The mixture weights [y][x][gaussian]
063         */
064        float[][][] weights;
065
066        /**
067         * Number of dimensions
068         */
069        int n;
070
071        /**
072         * Number of Gaussians per mixture
073         */
074        int K = 3;
075
076        /**
077         * Learning rate
078         */
079        float alpha = 0.005f;
080
081        /**
082         * Initial (low) weight for new Gaussians
083         */
084        float initialWeight = 0.05f;
085
086        /**
087         * Initial (high) standard deviation for new Gaussians
088         */
089        float initialSigma = 30f / 255f;
090
091        /**
092         * Number of standard deviations for a pixel to be considered a match
093         */
094        float matchThreshold = 2.5f;
095
096        private float T = 0.7f;
097
098        /**
099         * The segmentation mask
100         */
101        private float[][] mask;
102
103        public AdaptiveMoGBackgroundEstimator(Video<IMAGE> xv) {
104                super(xv);
105        }
106
107        private float density(float[] pixel, float[] pkmu, float pksigma) {
108                if (pksigma == 0)
109                        return 0;
110
111                final double norm = 1 / Math.sqrt(Math.pow(2 * Math.PI, n) * pksigma * pksigma);
112                double exp = 0;
113                for (int i = 0; i < n; i++) {
114                        final float diff = pixel[i] - pkmu[i];
115                        exp += diff * diff / (pksigma * pksigma);
116                }
117
118                return (float) (Math.exp(-0.5 * exp) * norm);
119        }
120
121        void updateModel(float[][][] tImg) {
122                final int height = tImg[0].length;
123                final int width = tImg[0][0].length;
124
125                if (n != tImg.length || height != mu.length || width != mu[0].length)
126                        initialiseModel(width, height, tImg.length);
127
128                for (int y = 0; y < height; y++) {
129                        for (int x = 0; x < width; x++) {
130                                // final float[] pixel = tImg[y][x];
131                                final float[] pixel = new float[n];
132                                for (int i = 0; i < n; i++)
133                                        pixel[i] = tImg[i][y][x];
134
135                                final float[][] means = mu[y][x];
136                                final float[] stddev = sigma[y][x];
137                                final float[] weight = weights[y][x];
138
139                                int match = -1;
140                                for (int i = 0; i < K; i++) {
141                                        final double distance = FloatFVComparison.EUCLIDEAN.compare(means[i], pixel);
142
143                                        if (distance < stddev[i] * matchThreshold) {
144                                                match = i;
145                                                break;
146                                        }
147                                }
148
149                                if (match >= 0) {
150                                        // update matched distribution
151                                        final float p = alpha * density(pixel, means[match], stddev[match]);
152
153                                        float dot = 0;
154                                        for (int i = 0; i < n; i++) {
155                                                means[match][i] = (1 - p) * means[match][i] + p * pixel[i];
156
157                                                dot += (pixel[i] - means[match][i]) * (pixel[i] - means[match][i]);
158                                        }
159
160                                        stddev[match] = (float) Math.sqrt((1 - p) * stddev[match] * stddev[match] + p * dot);
161                                } else {
162                                        // find least-probable gaussian
163                                        float minProb = density(pixel, means[0], stddev[0]);
164                                        int minProbIdx = 0;
165                                        for (int i = 1; i < K; i++) {
166                                                final float prob = density(pixel, means[i], stddev[i]);
167                                                if (prob < minProb) {
168                                                        minProb = prob;
169                                                        minProbIdx = i;
170                                                }
171                                        }
172
173                                        // init new gaussian:
174                                        means[minProbIdx] = pixel.clone();
175                                        stddev[minProbIdx] = initialSigma;
176                                        weight[minProbIdx] = initialWeight;
177                                        match = minProbIdx;
178                                }
179
180                                // update weights
181                                float weightsum = 0;
182                                for (int i = 0; i < K; i++) {
183                                        weight[i] = (1 - alpha) * weight[i];
184                                        if (i == match) {
185                                                weight[i] += alpha;
186                                        }
187                                        weightsum += weight[i];
188                                }
189
190                                // renormalise weights
191                                ArrayUtils.divide(weight, weightsum);
192
193                                // compute scores
194                                final float[] scores = new float[K];
195                                for (int i = 0; i < K; i++) {
196                                        if (stddev[i] == 0)
197                                                scores[i] = 0;
198                                        else
199                                                scores[i] = weight[i] / stddev[i];
200                                }
201
202                                final int[] indices = ArrayUtils.indexSort(scores);
203
204                                float c = 0;
205                                boolean found = false;
206                                for (int i = indices.length - 1; i >= 0; i--) {
207                                        c += weight[indices[i]];
208                                        if (match == indices[i]) {
209                                                found = true;
210                                                break;
211                                        }
212                                        if (c > T) {
213                                                break;
214                                        }
215                                }
216                                mask[y][x] = found ? 1 : 0;
217                        }
218                }
219        }
220
221        /**
222         * Initialise the internal state of the model. This does not need to be
223         * called manually (the first call to {@link #updateModel(Image)} will call
224         * it automatically), however, it is public to allow the model to be reset.
225         *
226         * @param width
227         *            the frame width
228         * @param height
229         *            the frame height
230         * @param numBands
231         *            the number of image bands
232         */
233        public void initialiseModel(int width, int height, int numBands) {
234                this.n = numBands;
235                this.mu = new float[height][width][K][numBands];
236                this.sigma = new float[height][width][K];
237                this.weights = new float[height][width][K];
238                this.mask = new float[height][width];
239
240                // for (int y=0; y<height; y++)
241                // for (int x=0; x<width; x++)
242                // for (int k=0; k<K; k++)
243                // sigma[y][x][k] = 0f;
244        }
245
246        public void updateModel(IMAGE frame) {
247                if (frame instanceof MBFImage) {
248                        final float[][][] data = new float[((MBFImage) frame).numBands()][][];
249
250                        for (int i = 0; i < data.length; i++)
251                                data[i] = ((MBFImage) frame).getBand(i).pixels;
252
253                        updateModel(data);
254                } else if (frame instanceof FImage) {
255                        final float[][][] data = new float[1][][];
256                        data[0] = ((FImage) frame).pixels;
257
258                        updateModel(data);
259                } else {
260                        throw new UnsupportedOperationException("Only FImage and MBFImage are supported");
261                }
262        }
263
264        @Override
265        public IMAGE processFrame(IMAGE frame) {
266                if (frame instanceof MBFImage) {
267                        final float[][][] data = new float[((MBFImage) frame).numBands()][][];
268
269                        for (int i = 0; i < data.length; i++)
270                                data[i] = ((MBFImage) frame).getBand(i).pixels;
271
272                        updateModel(data);
273                } else if (frame instanceof FImage) {
274                        final float[][][] data = new float[1][][];
275                        data[0] = ((FImage) frame).pixels;
276
277                        updateModel(data);
278                } else {
279                        throw new UnsupportedOperationException("Only FImage and MBFImage are supported");
280                }
281
282                final FImage tmp = new FImage(mask);
283                if (frame instanceof FImage) {
284                        ((FImage) frame).internalAssign(tmp);
285                } else {
286                        // ((MBFImage) frame).drawImage(tmp.toRGB(), 0, 0);
287                        ((MBFImage) frame).multiplyInplace(tmp.inverse().toRGB());
288                }
289
290                return frame;
291        }
292
293        public static void main(String[] args) throws IOException {
294                // final XuggleVideo xv = new XuggleVideo(new
295                // File("/Users/jon/Desktop/merlin/tunnel480.mov"));
296                final XuggleVideo xv = new XuggleVideo(new File("/Users/jon/Downloads/ewap_dataset/seq_hotel/seq_hotel.avi"));
297
298                final AdaptiveMoGBackgroundEstimator<MBFImage> proc = new AdaptiveMoGBackgroundEstimator<MBFImage>(xv);
299                for (final MBFImage img : proc) {
300                        DisplayUtilities.displayName(img, "video");
301                }
302        }
303}