Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.video.processing.motion;
031
032import org.apache.commons.math.complex.Complex;
033import org.apache.commons.math.linear.Array2DRowFieldMatrix;
034import org.apache.commons.math.linear.FieldLUDecompositionImpl;
035import org.openimaj.image.FImage;
036import org.openimaj.image.analysis.algorithm.TemplateMatcher;
037import org.openimaj.image.analysis.algorithm.TemplateMatcher.Mode;
038import org.openimaj.image.pixel.FValuePixel;
039import org.openimaj.image.processing.algorithm.FourierTransform;
040import org.openimaj.math.geometry.point.Point2d;
041import org.openimaj.math.geometry.point.Point2dImpl;
042import org.openimaj.math.geometry.shape.Rectangle;
043import org.openimaj.video.VideoFrame;
044import org.openimaj.video.VideoSubFrame;
045
046import edu.emory.mathcs.jtransforms.fft.FloatFFT_2D;
047
048/**
049 * A set of algorithms for the motion estimator.
050 *
051 * @author David Dupplaw (dpd@ecs.soton.ac.uk)
052 * @created 1 Mar 2012
053 *
054 */
055public abstract class MotionEstimatorAlgorithm
056{
057
058        /**
059         * Within a search window around the subimages detect most likely match and
060         * thus motion.
061         *
062         * @author Sina Samangooei (ss@ecs.soton.ac.uk)
063         *
064         */
065        public static class TEMPLATE_MATCH extends MotionEstimatorAlgorithm {
066                private float searchProp;
067                private Mode mode;
068
069                /**
070                 * Defaults to allowing a maximum of templatesize/2 movement using the
071                 * {@link Mode#CORRELATION}
072                 */
073                public TEMPLATE_MATCH() {
074                        this.searchProp = .5f;
075                        this.mode = TemplateMatcher.Mode.NORM_SUM_SQUARED_DIFFERENCE;
076                }
077
078                /**
079                 * Given the template's size, search around a border of size
080                 * template*searchWindowBorderProp
081                 *
082                 * @param searchWindowBorderProp
083                 * @param mode
084                 *            the matching mode
085                 */
086                public TEMPLATE_MATCH(float searchWindowBorderProp, Mode mode) {
087                        this.searchProp = searchWindowBorderProp;
088                        this.mode = mode;
089                }
090
091                @Override
092                Point2d estimateMotion(VideoSubFrame<FImage> img1sub,
093                                VideoSubFrame<FImage>... imagesSub)
094                {
095
096                        final VideoFrame<FImage> current = img1sub.extract();
097                        final VideoFrame<FImage> prev = imagesSub[0];
098                        final Rectangle prevSearchRect = imagesSub[0].roi;
099
100                        final int sw = (int) (prevSearchRect.width * this.searchProp);
101                        final int sh = (int) (prevSearchRect.height * this.searchProp);
102                        final int sx = (int) (prevSearchRect.x + ((prevSearchRect.width - sw) / 2.f));
103                        final int sy = (int) (prevSearchRect.y + ((prevSearchRect.height - sh) / 2.f));
104
105                        final Rectangle searchRect = new Rectangle(sx, sy, sw, sh);
106                        // System.out.println("Search window: " + searchRect);
107                        // MBFImage searchRectDraw = new
108                        // MBFImage(img1sub.frame.clone(),img1sub.frame.clone(),img1sub.frame.clone());
109                        // searchRectDraw.drawShape(searchRect, RGBColour.RED);
110                        // searchRectDraw.drawPoint(img1sub.roi.getCOG(), RGBColour.GREEN,
111                        // 3);
112                        final TemplateMatcher matcher = new TemplateMatcher(current.frame, mode, searchRect);
113                        matcher.analyseImage(prev.frame);
114                        final FValuePixel[] responses = matcher.getBestResponses(1);
115                        final FValuePixel firstBest = responses[0];
116                        // for (FValuePixel bestRespose : responses) {
117                        // if(bestRespose == null) continue;
118                        // if(firstBest == null) firstBest = bestRespose;
119                        // bestRespose.translate(current.frame.width/2,
120                        // current.frame.height/2);
121                        // // searchRectDraw.drawPoint(bestRespose, RGBColour.BLUE, 3);
122                        // }
123                        final Point2d centerOfGrid = img1sub.roi.calculateCentroid();
124                        // System.out.println("First reponse: " + firstBest );
125                        // System.out.println("Center of template: " + centerOfGrid);
126
127                        // DisplayUtilities.displayName(searchRectDraw, "searchWindow");
128                        if (firstBest == null || Float.isNaN(firstBest.value))
129                                return new Point2dImpl(0, 0);
130                        // firstBest.translate(current.frame.width/2,
131                        // current.frame.height/2);
132                        // System.out.println("First reponse (corrected): " + firstBest );
133                        // System.out.println("Diff: " + centerOfGrid.minus(firstBest));
134                        return centerOfGrid.minus(firstBest);
135                }
136        }
137
138        /**
139         * Basic phase correlation algorithm that finds peaks in the cross-power
140         * spectrum between two images. This is the basic implementation without
141         * sub-pixel accuracy.
142         */
143        public static class PHASE_CORRELATION extends MotionEstimatorAlgorithm
144        {
145                /**
146                 * Calculate the estimated motion vector between <code>images</code>
147                 * which [0] is first in the sequence and <code>img2</code> which is
148                 * second in the sequence. This method uses phase correlation - the fact
149                 * that translations in space can be seen as shifts in phase in the
150                 * frequency domain. The returned vector will have a maximum horizontal
151                 * displacement of <code>img2.getWidth()/2</code> and a minimum
152                 * displacement of <code>-img2.getWidth()/2</code> and similarly for the
153                 * vertical displacement and height.
154                 *
155                 * @param img2sub
156                 *            The second image in the sequence
157                 * @param imagesSub
158                 *            The previous image in the sequence
159                 * @return the estimated motion vector as a {@link Point2d} in absolute
160                 *         x and y coordinates.
161                 */
162                @Override
163                public Point2d estimateMotion(VideoSubFrame<FImage> img2sub,
164                                VideoSubFrame<FImage>... imagesSub)
165                {
166                        // The previous image will be the first in the images array
167                        final FImage img1 = imagesSub[0].extract().frame;
168                        final VideoFrame<FImage> img2 = img2sub.extract();
169
170                        // No previous frame?
171                        if (img1 == null)
172                                return new Point2dImpl(0, 0);
173
174                        // The images must have comparable shapes and must be square
175                        if (img1.getRows() != img2.frame.getRows() ||
176                                        img1.getCols() != img2.frame.getCols() ||
177                                        img1.getCols() != img2.frame.getRows())
178                                return new Point2dImpl(0, 0);
179
180                        // Prepare and perform an FFT for each of the incoming images.
181                        final int h = img1.getRows();
182                        final int w = img1.getCols();
183
184                        try
185                        {
186                                final FloatFFT_2D fft1 = new FloatFFT_2D(h, w);
187                                final FloatFFT_2D fft2 = new FloatFFT_2D(h, w);
188                                final float[][] data1 = FourierTransform.prepareData(img1, h, w, false);
189                                final float[][] data2 = FourierTransform.prepareData(img2.frame, h, w, false);
190                                fft1.complexForward(data1);
191                                fft2.complexForward(data2);
192
193                                // Multiply (element-wise) the fft and the conjugate of the fft.
194                                Complex[][] cfft = new Complex[h][w];
195                                for (int y = 0; y < h; y++)
196                                {
197                                        for (int x = 0; x < w; x++)
198                                        {
199                                                final float re1 = data1[y][x * 2];
200                                                final float im1 = data1[y][1 + x * 2];
201                                                final float re2 = data2[y][x * 2];
202                                                final float im2 = data2[y][1 + x * 2];
203
204                                                final Complex c1 = new Complex(re1, im1);
205                                                final Complex c2 = new Complex(re2, -im2);
206                                                cfft[y][x] = c1.multiply(c2);
207                                        }
208                                }
209
210                                // ----------------------------------------
211                                // Normalise by the determinant
212                                // ----------------------------------------
213                                // First calculate the determinant
214                                final Array2DRowFieldMatrix<Complex> cmat =
215                                                new Array2DRowFieldMatrix<Complex>(cfft);
216                                final FieldLUDecompositionImpl<Complex> luDecomp =
217                                                new FieldLUDecompositionImpl<Complex>(cmat);
218                                final Complex det = luDecomp.getDeterminant();
219                                cmat.scalarMultiply(new Complex(1d / det.abs(), 0));
220
221                                // Convert back to an array for doing the inverse FFTing
222                                cfft = cmat.getData();
223                                for (int y = 0; y < h; y++)
224                                {
225                                        for (int x = 0; x < w; x++)
226                                        {
227                                                data1[y][x * 2] = (float) cfft[y][x].getReal();
228                                                data1[y][1 + x * 2] = (float) cfft[y][x].getImaginary();
229                                        }
230                                }
231
232                                // Perform the inverse FFT
233                                fft1.complexInverse(data1, false);
234
235                                // Get the data back out
236                                FourierTransform.unprepareData(data1, img1, false);
237
238                                // Get the estimated motion vector from the peak in the space
239                                final FValuePixel p = img1.maxPixel();
240                                return new Point2dImpl(
241                                                -(p.x > w / 2 ? p.x - w : p.x),
242                                                -(p.y > w / 2 ? p.y - w : p.y));
243                        } catch (final Exception e)
244                        {
245                                return new Point2dImpl(0, 0);
246                        }
247                }
248        };
249
250        /**
251         * Estimate the motion to the given subimage, <code>img1sub</code> from the
252         * previous frames. The previous frames will be given in reverse order so
253         * that imagesSub[0] will be the previous frame, imagesSub[1] the frame
254         * before that, etc. The number of frames given will be at most that given
255         * by {@link #requiredNumberOfFrames()}. It could be less if at the
256         * beginning of the video. If you require more frames, return an empty
257         * motion vector - that is (0,0).
258         *
259         * @param img1sub
260         *            The image to which we want to estimate the motion.
261         * @param imagesSub
262         *            The previous frames in reverse order
263         * @return The estimated motion vector.
264         */
265        abstract Point2d estimateMotion(VideoSubFrame<FImage> img1sub,
266                        VideoSubFrame<FImage>... imagesSub);
267
268        /**
269         * The required number of frames required for the given motion estimation
270         * algorithm to work. The default is 1 which means the algorithm will only
271         * receive the previous frame. If more are required, override this method
272         * and return the required number.
273         *
274         * @return The required number of frames to pass to the algorithm.
275         */
276        protected int requiredNumberOfFrames()
277        {
278                return 1;
279        }
280}