001package org.openimaj.demos.sandbox.tldcpp.detector;
002
003import org.openimaj.demos.sandbox.tldcpp.videotld.TLDUtil;
004import org.openimaj.image.FImage;
005import org.openimaj.math.geometry.shape.Rectangle;
006
007/**
008 * The detector cascade prepares, inititates and controls the 3 underlying
009 * detection steps in the TLD algorithm. Each step is more accurate than the last,
010 * and is also more costly. The name of the game of this algorithm is to stop as soon
011 * as possible. If a permissive but fast classifier says you are not correct, then odds
012 * are you are DEFINITELY not correct.
013 * 
014 * The first step is a Variance check. If the variance at least equal to the variance detected in the patch.
015 * This is done using {@link VarianceFilter} and uses integral images. very fast. but an easy check to pass
016 * 
017 * The second step is a {@link EnsembleClassifier}. This is more complicated but boils down to checking 
018 * very few pixels of a patch against those same few pixels in previously seen correct patches and 
019 * previously seen incorrect patches. Better than dumb variance, but also permissive
020 * 
021 * The final step is a {@link NNClassifier} which quite literally does a normalised correlation between the
022 * patch and variance positive and negative examples. An excellent way to see if a patch is more similar to
023 * correct things than incorrect things, but obviously massively slow so this is only done when the other two classifiers
024 * are sure.
025 * 
026 * Generally the first two drop 26,000 patches and 30 or so are checked with normalised correlation. 
027 * This is where TLD gets its detection speed.
028 * 
029 * The detector works across an overlapping grid of windows at different scales. These scales are controlled
030 * by the size of the original box selected. The idea is that instead of checking arbitrary windows the grid windows
031 * are checked. This means that you get checks across scales and x,y locations. The whole point is that you 
032 * make quick decisions about not checking completely incorrect windows quickly.
033 * 
034 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
035 *
036 */
037public class DetectorCascade {
038        /**
039         * The size to which TLD windows are reduced in order to be checked by {@link NNClassifier}
040         */
041        public static final int TLD_WINDOW_SIZE = 5;
042        static final int TLD_WINDOW_OFFSET_SIZE = 6;
043        
044        
045        private int numScales;
046        private Rectangle[] scales;
047        //Configurable members
048        /**
049         * The minimum scale factor to check as compared to the selected object dims.
050         */
051        public int minScale;
052        /**
053         * The maximum scale factor to check as compared to the selected object dims.
054         */
055        public int maxScale;
056        /**
057         * Whether a shift value should be applied to all scales
058         */
059        public boolean useShift;
060        /**
061         * The shift applied, 0.1f by default
062         */
063        public float shift;
064        /**
065         * The minimum window size, defaults to 25, a 5x5 pixel area. fair.
066         */
067        public int minSize;
068        
069        /**
070         * The number of features per tree in the {@link EnsembleClassifier}
071         */
072        public int numFeatures;
073        /**
074         * The number of trees in the {@link EnsembleClassifier}
075         */
076        public int numTrees;
077
078        //Needed for init
079        private int imgWidth;
080        private int imgHeight;
081        private int objWidth;
082        private int objHeight;
083
084        private int numWindows;
085        private ScaleIndexRectangle[] windows;
086        private int[][] windowOffsets; // CONCENTRATE. entries: [[x1-1,y1-1],[x1-1,y2],[x2,y1-1],[x2,y2], [featuresForScaleIndex], [areaOfBoundBox]
087        
088        VarianceFilter varianceFilter;
089        EnsembleClassifier ensembleClassifier;
090        Clustering clustering;
091        NNClassifier nnClassifier;
092
093        DetectionResult detectionResult;
094
095        //State data
096        private boolean initialised;
097
098        /**
099         * Initialise the cascade and the underlying classifiers using the default values
100         */
101        public DetectorCascade() {
102                objWidth = -1; //MUST be set before calling init
103                objHeight = -1; //MUST be set before calling init
104                useShift = true;
105                imgHeight = -1;
106                imgWidth = -1;
107
108                shift=0.1f;
109                minScale=-10;
110                maxScale=10;
111                minSize = 25;
112
113                numTrees = 13;
114                numFeatures = 10;
115
116                initialised = false;
117
118                varianceFilter = new VarianceFilter();
119                ensembleClassifier = new EnsembleClassifier();
120                nnClassifier = new NNClassifier();
121                clustering = new Clustering();
122
123                detectionResult = new DetectionResult();
124        }
125
126        /**
127         * Release all underlying classifiers and rest windows etc.
128         */
129        public void release() {
130                if(!initialised) {
131                        return; //Do nothing
132                }
133
134                initialised = false;
135
136                ensembleClassifier.release();
137                nnClassifier.release();
138                
139                clustering.release();
140
141                numWindows = 0;
142                numScales = 0;
143
144                scales = null;
145                windows = null;
146                windowOffsets = null;
147
148                objWidth = -1;
149                objHeight = -1;
150
151                detectionResult.release();
152        }
153
154        /**
155         * initialise the cascade, prepare the windows and the classifiers
156         * @throws Exception
157         */
158        public void init() throws Exception {
159                if(imgWidth == -1 || imgHeight == -1 || objWidth == -1 || objHeight == -1) {
160                        throw new Exception("The image or object dimentions were not set");
161                }
162
163                initWindowsAndScales();
164                initWindowOffsets();
165
166                propagateMembers();
167
168                ensembleClassifier.init();
169
170                initialised = true;
171        }
172
173        private void propagateMembers() {
174                detectionResult.init(numWindows, numTrees);
175
176                varianceFilter.windowOffsets = windowOffsets;
177                ensembleClassifier.setWindowOffsets(windowOffsets);
178                ensembleClassifier.setNumScales(numScales);
179                ensembleClassifier.setScales(scales);
180                ensembleClassifier.numFeatures = numFeatures;
181                ensembleClassifier.numTrees = numTrees;
182                nnClassifier.windows = windows;
183                clustering.windows = windows;
184
185                varianceFilter.detectionResult = detectionResult;
186                ensembleClassifier.detectionResult = detectionResult;
187                nnClassifier.detectionResult = detectionResult;
188                clustering.detectionResult = detectionResult;
189        }
190
191        private void initWindowOffsets() {
192                windowOffsets = new int[TLD_WINDOW_OFFSET_SIZE*numWindows][];
193                int offIndex = 0;
194
195//              int windowSize = TLD_WINDOW_SIZE;
196
197                for (int i = 0; i < numWindows; i++) {
198                        ScaleIndexRectangle windowRect = windows[i];
199                        int x = (int)windowRect.x;
200                        int y = (int)windowRect.y;
201                        int width = (int)windowRect.width;
202                        int height = (int)windowRect.height;
203                        int scaleIndex = windowRect.scaleIndex;
204                        windowOffsets[offIndex++] = new int[]{x-1,y-1}; //sub2idx(window[0]-1,window[1]-1,imgWidthStep); // x1-1,y1-1
205                        windowOffsets[offIndex++] = new int[]{x-1,y+height-1}; // x1-1,y2
206                        windowOffsets[offIndex++] = new int[]{x+width-1,y-1}; // x2,y1-1
207                        windowOffsets[offIndex++] = new int[]{x+width-1,y+height-1}; // x2,y2
208                        windowOffsets[offIndex++] = new int[]{scaleIndex*2*numFeatures*numTrees}; // pointer to features for this scale
209                        windowOffsets[offIndex++] = new int[]{width*height};//Area of bounding box
210                }
211        }
212
213        private void initWindowsAndScales() {
214                int scanAreaX = 1; // It is important to start with 1/1, because the integral images aren't defined at pos(-1,-1) due to speed reasons
215                int scanAreaY = 1;
216                int scanAreaW = imgWidth-1;
217                int scanAreaH = imgHeight-1;
218
219                int windowIndex = 0;
220
221            scales = new Rectangle[maxScale-minScale+1];
222
223                numWindows = 0;
224
225                int scaleIndex = 0;
226                for(int i = minScale; i <= maxScale; i++) {
227                        float scale = (float) Math.pow(1.2,i);
228                        int w = (int)(objWidth*scale);
229                        int h = (int)(objHeight*scale);
230                        int ssw,ssh;
231                        if(useShift) {
232                                ssw = (int) Math.max(1,w*shift);
233                                ssh = (int) Math.max(1,h*shift);
234                        } else {
235                                ssw = 1;
236                                ssh = 1;
237                        }
238
239                        if(w < minSize || h < minSize || w > scanAreaW || h > scanAreaH) continue;
240                        scales[scaleIndex] = new Rectangle(0,0,w,h);
241
242                        scaleIndex++;
243
244                        numWindows += Math.floor((float)(scanAreaW - w + ssw)/ssw)*Math.floor((float)(scanAreaH - h + ssh) / ssh);
245                }
246
247                numScales = scaleIndex;
248
249                windows = new ScaleIndexRectangle[numWindows];
250
251                for(scaleIndex = 0; scaleIndex < numScales; scaleIndex++) {
252                        int w = (int) scales[scaleIndex].width;
253                        int h = (int) scales[scaleIndex].height;
254
255                        int ssw,ssh;
256                        if(useShift) {
257                                ssw = (int) Math.max(1,w*shift);
258                                ssh = (int) Math.max(1,h*shift);
259                        } else {
260                                ssw = 1;
261                                ssh = 1;
262                        }
263
264                        for(int y = scanAreaY; y + h <= scanAreaY +scanAreaH; y+=ssh) {
265                                for(int x = scanAreaX; x + w <= scanAreaX + scanAreaW; x+=ssw) {
266                                        int bb = windowIndex;
267                                        windows[bb] = new ScaleIndexRectangle();
268                                        windows[bb].x = x;
269                                        windows[bb].y = y;
270                                        windows[bb].width = w;
271                                        windows[bb].height = h;
272                                        windows[bb].scaleIndex = scaleIndex;
273
274                                        windowIndex++;
275                                }
276                        }
277
278                }
279
280                assert(windowIndex == numWindows);
281        }
282
283        /**
284         * In their current state, apply each classifier to each window in order of 
285         * computational simplicity. i.e. variance, then ensembleclassifier then nnclassifier.
286         * 
287         * If any windows remain, call {@link Clustering} instance and cluster the selected windows.
288         * @param img 
289         */
290        public void detect(FImage img) {
291                //For every bounding box, the output is confidence, pattern, variance
292
293                detectionResult.reset();
294
295                if(!initialised) {
296                        return;
297                }
298
299                //Prepare components
300                // Forget the foreground detector for now, this is an optimisation
301//              foregroundDetector.nextIteration(img); //Calculates foreground
302                varianceFilter.nextIteration(img); //Calculates integral images
303                ensembleClassifier.nextIteration(img);
304                
305//              Rectangle windowRect = new Rectangle();
306//              System.out.println("Number of windows is: " + numWindows);
307                detectionResult.varCount = 0;
308                detectionResult.ensCount = 0;
309                detectionResult.nnClassCount = 0;
310                for (int i = 0; i < numWindows; i++) {
311
312//                      int * window = &windows[TLD_WINDOW_SIZE*i];
313//                      int window = i;
314//                      windowRect = windows[window];
315//                      if(foregroundDetector.isActive()) {
316//                              boolean isInside = false;
317//
318//                              for (Rectangle rect : this.detectionResult.fgList) {
319//
320////                                    int bgBox[4];
321////                                    tldRectToArray(detectionResult->fgList->at(j), bgBox);
322////                                    if(tldIsInside(window,bgBox)) { //TODO: This is inefficient and should be replaced by a quadtree
323////                                            isInside = true;
324////                                    }
325//                                      if(windowRect.isInside(rect)){
326//                                              isInside = true;
327//                                              break;
328//                                      }
329//                              }
330//
331//                              if(!isInside) {
332//                                      detectionResult.posteriors[i] = 0;
333//                                      continue;
334//                              }
335//                      }
336                        
337                        if(!varianceFilter.filter(i)) {
338                                detectionResult.posteriors[i] = 0;
339                                detectionResult.varCount++;
340                                continue;
341                        }
342
343                        if(!ensembleClassifier.filter(i)) {
344                                detectionResult.ensCount++;
345                                continue;
346                        }
347
348                        if(!nnClassifier.filter(img, i)) {
349                                detectionResult.nnClassCount++;
350                                continue;
351                        }
352
353                        detectionResult.confidentIndices.add(i);
354
355
356                }
357//              System.out.println("Counts: " + varCount + ", " + ensCount + ", " + nnClassCount);
358                //Cluster
359                clustering.clusterConfidentIndices();
360
361                detectionResult.containsValidData = true;
362        }
363
364        /**
365         * FIXME? arguably this should change as the BB changes? would that be too slow?
366         * @param width sets the underlying scale windows in which to search based on factors of the original object detected
367         */
368        public void setObjWidth(int width) {
369                this.objWidth = width;
370        }
371        
372        /**
373         * FIXME? arguably this should change as the BB changes? would that be too slow?
374         * @param height sets the underlying scale windows in which to search based on factors of the original object detected
375         */
376        public void setObjHeight(int height) {
377                this.objHeight = height;
378        }
379
380        /**
381         * resets the underlying {@link DetectionResult} instance
382         */
383        public void cleanPreviousData() {
384                this.detectionResult.reset();
385        }
386
387        /**
388         * @return total number of windows searching within
389         */
390        public int getNumWindows() {
391                return this.numWindows;
392        }
393
394        /**
395         * The overlap of a bounding box with each underlying window. An assumption is
396         * made that overlap is the same size {@link #getNumWindows()}
397         * @param bb
398         * @param overlap the output
399         */
400        public void windowOverlap(Rectangle bb, float[] overlap) {
401                TLDUtil.tldOverlap(windows, numWindows,bb, overlap);
402                
403        }
404
405        /**
406         * @param idx
407         * @return the underlying {@link ScaleIndexRectangle} instance which is the idxth window
408         */
409        public ScaleIndexRectangle getWindow(int idx) {
410                return this.windows[idx];
411        }
412
413        /**
414         * @return whether the cascade has been correctly initialised (i.e. whether {@link #init()} has been called)
415         */
416        public boolean isInitialised() {
417                return initialised;
418        }
419
420        /**
421         * @param imgWidth the width of images to expect
422         */
423        public void setImgWidth(int imgWidth) {
424                this.imgWidth = imgWidth; 
425        }
426        
427        /**
428         * @param imgHeight the height of images to expect
429         */
430        public void setImgHeight(int imgHeight) {
431                this.imgHeight = imgHeight; 
432        }
433
434        /**
435         * @return the underlying {@link NNClassifier} instance
436         */
437        public NNClassifier getNNClassifier() {
438                return this.nnClassifier;
439        }
440
441        /**
442         * @return the underlying {@link DetectionResult} instance
443         */
444        public DetectionResult getDetectionResult() {
445                return this.detectionResult;
446        }
447
448        /**
449         * @return the underlying {@link VarianceFilter} instance
450         */
451        public VarianceFilter getVarianceFilter() {
452                return this.varianceFilter;
453        }
454
455        /**
456         * @return the underlying {@link EnsembleClassifier} instance
457         */
458        public EnsembleClassifier getEnsembleClassifier() {
459                return this.ensembleClassifier;
460        }
461}