001package org.openimaj.demos.sandbox.tldcpp.detector; 002 003import org.openimaj.demos.sandbox.tldcpp.videotld.TLDUtil; 004import org.openimaj.image.FImage; 005import org.openimaj.math.geometry.shape.Rectangle; 006 007/** 008 * The detector cascade prepares, inititates and controls the 3 underlying 009 * detection steps in the TLD algorithm. Each step is more accurate than the last, 010 * and is also more costly. The name of the game of this algorithm is to stop as soon 011 * as possible. If a permissive but fast classifier says you are not correct, then odds 012 * are you are DEFINITELY not correct. 013 * 014 * The first step is a Variance check. If the variance at least equal to the variance detected in the patch. 015 * This is done using {@link VarianceFilter} and uses integral images. very fast. but an easy check to pass 016 * 017 * The second step is a {@link EnsembleClassifier}. This is more complicated but boils down to checking 018 * very few pixels of a patch against those same few pixels in previously seen correct patches and 019 * previously seen incorrect patches. Better than dumb variance, but also permissive 020 * 021 * The final step is a {@link NNClassifier} which quite literally does a normalised correlation between the 022 * patch and variance positive and negative examples. An excellent way to see if a patch is more similar to 023 * correct things than incorrect things, but obviously massively slow so this is only done when the other two classifiers 024 * are sure. 025 * 026 * Generally the first two drop 26,000 patches and 30 or so are checked with normalised correlation. 027 * This is where TLD gets its detection speed. 028 * 029 * The detector works across an overlapping grid of windows at different scales. These scales are controlled 030 * by the size of the original box selected. The idea is that instead of checking arbitrary windows the grid windows 031 * are checked. This means that you get checks across scales and x,y locations. The whole point is that you 032 * make quick decisions about not checking completely incorrect windows quickly. 033 * 034 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 035 * 036 */ 037public class DetectorCascade { 038 /** 039 * The size to which TLD windows are reduced in order to be checked by {@link NNClassifier} 040 */ 041 public static final int TLD_WINDOW_SIZE = 5; 042 static final int TLD_WINDOW_OFFSET_SIZE = 6; 043 044 045 private int numScales; 046 private Rectangle[] scales; 047 //Configurable members 048 /** 049 * The minimum scale factor to check as compared to the selected object dims. 050 */ 051 public int minScale; 052 /** 053 * The maximum scale factor to check as compared to the selected object dims. 054 */ 055 public int maxScale; 056 /** 057 * Whether a shift value should be applied to all scales 058 */ 059 public boolean useShift; 060 /** 061 * The shift applied, 0.1f by default 062 */ 063 public float shift; 064 /** 065 * The minimum window size, defaults to 25, a 5x5 pixel area. fair. 066 */ 067 public int minSize; 068 069 /** 070 * The number of features per tree in the {@link EnsembleClassifier} 071 */ 072 public int numFeatures; 073 /** 074 * The number of trees in the {@link EnsembleClassifier} 075 */ 076 public int numTrees; 077 078 //Needed for init 079 private int imgWidth; 080 private int imgHeight; 081 private int objWidth; 082 private int objHeight; 083 084 private int numWindows; 085 private ScaleIndexRectangle[] windows; 086 private int[][] windowOffsets; // CONCENTRATE. entries: [[x1-1,y1-1],[x1-1,y2],[x2,y1-1],[x2,y2], [featuresForScaleIndex], [areaOfBoundBox] 087 088 VarianceFilter varianceFilter; 089 EnsembleClassifier ensembleClassifier; 090 Clustering clustering; 091 NNClassifier nnClassifier; 092 093 DetectionResult detectionResult; 094 095 //State data 096 private boolean initialised; 097 098 /** 099 * Initialise the cascade and the underlying classifiers using the default values 100 */ 101 public DetectorCascade() { 102 objWidth = -1; //MUST be set before calling init 103 objHeight = -1; //MUST be set before calling init 104 useShift = true; 105 imgHeight = -1; 106 imgWidth = -1; 107 108 shift=0.1f; 109 minScale=-10; 110 maxScale=10; 111 minSize = 25; 112 113 numTrees = 13; 114 numFeatures = 10; 115 116 initialised = false; 117 118 varianceFilter = new VarianceFilter(); 119 ensembleClassifier = new EnsembleClassifier(); 120 nnClassifier = new NNClassifier(); 121 clustering = new Clustering(); 122 123 detectionResult = new DetectionResult(); 124 } 125 126 /** 127 * Release all underlying classifiers and rest windows etc. 128 */ 129 public void release() { 130 if(!initialised) { 131 return; //Do nothing 132 } 133 134 initialised = false; 135 136 ensembleClassifier.release(); 137 nnClassifier.release(); 138 139 clustering.release(); 140 141 numWindows = 0; 142 numScales = 0; 143 144 scales = null; 145 windows = null; 146 windowOffsets = null; 147 148 objWidth = -1; 149 objHeight = -1; 150 151 detectionResult.release(); 152 } 153 154 /** 155 * initialise the cascade, prepare the windows and the classifiers 156 * @throws Exception 157 */ 158 public void init() throws Exception { 159 if(imgWidth == -1 || imgHeight == -1 || objWidth == -1 || objHeight == -1) { 160 throw new Exception("The image or object dimentions were not set"); 161 } 162 163 initWindowsAndScales(); 164 initWindowOffsets(); 165 166 propagateMembers(); 167 168 ensembleClassifier.init(); 169 170 initialised = true; 171 } 172 173 private void propagateMembers() { 174 detectionResult.init(numWindows, numTrees); 175 176 varianceFilter.windowOffsets = windowOffsets; 177 ensembleClassifier.setWindowOffsets(windowOffsets); 178 ensembleClassifier.setNumScales(numScales); 179 ensembleClassifier.setScales(scales); 180 ensembleClassifier.numFeatures = numFeatures; 181 ensembleClassifier.numTrees = numTrees; 182 nnClassifier.windows = windows; 183 clustering.windows = windows; 184 185 varianceFilter.detectionResult = detectionResult; 186 ensembleClassifier.detectionResult = detectionResult; 187 nnClassifier.detectionResult = detectionResult; 188 clustering.detectionResult = detectionResult; 189 } 190 191 private void initWindowOffsets() { 192 windowOffsets = new int[TLD_WINDOW_OFFSET_SIZE*numWindows][]; 193 int offIndex = 0; 194 195// int windowSize = TLD_WINDOW_SIZE; 196 197 for (int i = 0; i < numWindows; i++) { 198 ScaleIndexRectangle windowRect = windows[i]; 199 int x = (int)windowRect.x; 200 int y = (int)windowRect.y; 201 int width = (int)windowRect.width; 202 int height = (int)windowRect.height; 203 int scaleIndex = windowRect.scaleIndex; 204 windowOffsets[offIndex++] = new int[]{x-1,y-1}; //sub2idx(window[0]-1,window[1]-1,imgWidthStep); // x1-1,y1-1 205 windowOffsets[offIndex++] = new int[]{x-1,y+height-1}; // x1-1,y2 206 windowOffsets[offIndex++] = new int[]{x+width-1,y-1}; // x2,y1-1 207 windowOffsets[offIndex++] = new int[]{x+width-1,y+height-1}; // x2,y2 208 windowOffsets[offIndex++] = new int[]{scaleIndex*2*numFeatures*numTrees}; // pointer to features for this scale 209 windowOffsets[offIndex++] = new int[]{width*height};//Area of bounding box 210 } 211 } 212 213 private void initWindowsAndScales() { 214 int scanAreaX = 1; // It is important to start with 1/1, because the integral images aren't defined at pos(-1,-1) due to speed reasons 215 int scanAreaY = 1; 216 int scanAreaW = imgWidth-1; 217 int scanAreaH = imgHeight-1; 218 219 int windowIndex = 0; 220 221 scales = new Rectangle[maxScale-minScale+1]; 222 223 numWindows = 0; 224 225 int scaleIndex = 0; 226 for(int i = minScale; i <= maxScale; i++) { 227 float scale = (float) Math.pow(1.2,i); 228 int w = (int)(objWidth*scale); 229 int h = (int)(objHeight*scale); 230 int ssw,ssh; 231 if(useShift) { 232 ssw = (int) Math.max(1,w*shift); 233 ssh = (int) Math.max(1,h*shift); 234 } else { 235 ssw = 1; 236 ssh = 1; 237 } 238 239 if(w < minSize || h < minSize || w > scanAreaW || h > scanAreaH) continue; 240 scales[scaleIndex] = new Rectangle(0,0,w,h); 241 242 scaleIndex++; 243 244 numWindows += Math.floor((float)(scanAreaW - w + ssw)/ssw)*Math.floor((float)(scanAreaH - h + ssh) / ssh); 245 } 246 247 numScales = scaleIndex; 248 249 windows = new ScaleIndexRectangle[numWindows]; 250 251 for(scaleIndex = 0; scaleIndex < numScales; scaleIndex++) { 252 int w = (int) scales[scaleIndex].width; 253 int h = (int) scales[scaleIndex].height; 254 255 int ssw,ssh; 256 if(useShift) { 257 ssw = (int) Math.max(1,w*shift); 258 ssh = (int) Math.max(1,h*shift); 259 } else { 260 ssw = 1; 261 ssh = 1; 262 } 263 264 for(int y = scanAreaY; y + h <= scanAreaY +scanAreaH; y+=ssh) { 265 for(int x = scanAreaX; x + w <= scanAreaX + scanAreaW; x+=ssw) { 266 int bb = windowIndex; 267 windows[bb] = new ScaleIndexRectangle(); 268 windows[bb].x = x; 269 windows[bb].y = y; 270 windows[bb].width = w; 271 windows[bb].height = h; 272 windows[bb].scaleIndex = scaleIndex; 273 274 windowIndex++; 275 } 276 } 277 278 } 279 280 assert(windowIndex == numWindows); 281 } 282 283 /** 284 * In their current state, apply each classifier to each window in order of 285 * computational simplicity. i.e. variance, then ensembleclassifier then nnclassifier. 286 * 287 * If any windows remain, call {@link Clustering} instance and cluster the selected windows. 288 * @param img 289 */ 290 public void detect(FImage img) { 291 //For every bounding box, the output is confidence, pattern, variance 292 293 detectionResult.reset(); 294 295 if(!initialised) { 296 return; 297 } 298 299 //Prepare components 300 // Forget the foreground detector for now, this is an optimisation 301// foregroundDetector.nextIteration(img); //Calculates foreground 302 varianceFilter.nextIteration(img); //Calculates integral images 303 ensembleClassifier.nextIteration(img); 304 305// Rectangle windowRect = new Rectangle(); 306// System.out.println("Number of windows is: " + numWindows); 307 detectionResult.varCount = 0; 308 detectionResult.ensCount = 0; 309 detectionResult.nnClassCount = 0; 310 for (int i = 0; i < numWindows; i++) { 311 312// int * window = &windows[TLD_WINDOW_SIZE*i]; 313// int window = i; 314// windowRect = windows[window]; 315// if(foregroundDetector.isActive()) { 316// boolean isInside = false; 317// 318// for (Rectangle rect : this.detectionResult.fgList) { 319// 320//// int bgBox[4]; 321//// tldRectToArray(detectionResult->fgList->at(j), bgBox); 322//// if(tldIsInside(window,bgBox)) { //TODO: This is inefficient and should be replaced by a quadtree 323//// isInside = true; 324//// } 325// if(windowRect.isInside(rect)){ 326// isInside = true; 327// break; 328// } 329// } 330// 331// if(!isInside) { 332// detectionResult.posteriors[i] = 0; 333// continue; 334// } 335// } 336 337 if(!varianceFilter.filter(i)) { 338 detectionResult.posteriors[i] = 0; 339 detectionResult.varCount++; 340 continue; 341 } 342 343 if(!ensembleClassifier.filter(i)) { 344 detectionResult.ensCount++; 345 continue; 346 } 347 348 if(!nnClassifier.filter(img, i)) { 349 detectionResult.nnClassCount++; 350 continue; 351 } 352 353 detectionResult.confidentIndices.add(i); 354 355 356 } 357// System.out.println("Counts: " + varCount + ", " + ensCount + ", " + nnClassCount); 358 //Cluster 359 clustering.clusterConfidentIndices(); 360 361 detectionResult.containsValidData = true; 362 } 363 364 /** 365 * FIXME? arguably this should change as the BB changes? would that be too slow? 366 * @param width sets the underlying scale windows in which to search based on factors of the original object detected 367 */ 368 public void setObjWidth(int width) { 369 this.objWidth = width; 370 } 371 372 /** 373 * FIXME? arguably this should change as the BB changes? would that be too slow? 374 * @param height sets the underlying scale windows in which to search based on factors of the original object detected 375 */ 376 public void setObjHeight(int height) { 377 this.objHeight = height; 378 } 379 380 /** 381 * resets the underlying {@link DetectionResult} instance 382 */ 383 public void cleanPreviousData() { 384 this.detectionResult.reset(); 385 } 386 387 /** 388 * @return total number of windows searching within 389 */ 390 public int getNumWindows() { 391 return this.numWindows; 392 } 393 394 /** 395 * The overlap of a bounding box with each underlying window. An assumption is 396 * made that overlap is the same size {@link #getNumWindows()} 397 * @param bb 398 * @param overlap the output 399 */ 400 public void windowOverlap(Rectangle bb, float[] overlap) { 401 TLDUtil.tldOverlap(windows, numWindows,bb, overlap); 402 403 } 404 405 /** 406 * @param idx 407 * @return the underlying {@link ScaleIndexRectangle} instance which is the idxth window 408 */ 409 public ScaleIndexRectangle getWindow(int idx) { 410 return this.windows[idx]; 411 } 412 413 /** 414 * @return whether the cascade has been correctly initialised (i.e. whether {@link #init()} has been called) 415 */ 416 public boolean isInitialised() { 417 return initialised; 418 } 419 420 /** 421 * @param imgWidth the width of images to expect 422 */ 423 public void setImgWidth(int imgWidth) { 424 this.imgWidth = imgWidth; 425 } 426 427 /** 428 * @param imgHeight the height of images to expect 429 */ 430 public void setImgHeight(int imgHeight) { 431 this.imgHeight = imgHeight; 432 } 433 434 /** 435 * @return the underlying {@link NNClassifier} instance 436 */ 437 public NNClassifier getNNClassifier() { 438 return this.nnClassifier; 439 } 440 441 /** 442 * @return the underlying {@link DetectionResult} instance 443 */ 444 public DetectionResult getDetectionResult() { 445 return this.detectionResult; 446 } 447 448 /** 449 * @return the underlying {@link VarianceFilter} instance 450 */ 451 public VarianceFilter getVarianceFilter() { 452 return this.varianceFilter; 453 } 454 455 /** 456 * @return the underlying {@link EnsembleClassifier} instance 457 */ 458 public EnsembleClassifier getEnsembleClassifier() { 459 return this.ensembleClassifier; 460 } 461}