001/* ***** BEGIN LICENSE BLOCK ***** 002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 003 * 004 * The contents of this file are subject to the Mozilla Public License Version 005 * 1.1 (the "License"); you may not use this file except in compliance with 006 * the License. You may obtain a copy of the License at 007 * http://www.mozilla.org/MPL/ 008 * 009 * Software distributed under the License is distributed on an "AS IS" basis, 010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 011 * for the specific language governing rights and limitations under the 012 * License. 013 * 014 * The Original Code is JTransforms. 015 * 016 * The Initial Developer of the Original Code is 017 * Piotr Wendykier, Emory University. 018 * Portions created by the Initial Developer are Copyright (C) 2007-2009 019 * the Initial Developer. All Rights Reserved. 020 * 021 * Alternatively, the contents of this file may be used under the terms of 022 * either the GNU General Public License Version 2 or later (the "GPL"), or 023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 024 * in which case the provisions of the GPL or the LGPL are applicable instead 025 * of those above. If you wish to allow use of your version of this file only 026 * under the terms of either the GPL or the LGPL, and not to allow others to 027 * use your version of this file under the terms of the MPL, indicate your 028 * decision by deleting the provisions above and replace them with the notice 029 * and other provisions required by the GPL or the LGPL. If you do not delete 030 * the provisions above, a recipient may use your version of this file under 031 * the terms of any one of the MPL, the GPL or the LGPL. 032 * 033 * ***** END LICENSE BLOCK ***** */ 034 035package edu.emory.mathcs.jtransforms.dct; 036 037import java.util.concurrent.Future; 038 039import edu.emory.mathcs.utils.ConcurrencyUtils; 040 041/** 042 * Computes 2D Discrete Cosine Transform (DCT) of single precision data. The 043 * sizes of both dimensions can be arbitrary numbers. This is a parallel 044 * implementation of split-radix and mixed-radix algorithms optimized for SMP 045 * systems. <br> 046 * <br> 047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura 048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 049 * 050 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 051 * 052 */ 053public class FloatDCT_2D { 054 055 private int rows; 056 057 private int columns; 058 059 private float[] t; 060 061 private FloatDCT_1D dctColumns, dctRows; 062 063 private int nt; 064 065 private int oldNthreads; 066 067 private boolean isPowerOfTwo = false; 068 069 private boolean useThreads = false; 070 071 /** 072 * Creates new instance of FloatDCT_2D. 073 * 074 * @param rows 075 * number of rows 076 * @param columns 077 * number of columns 078 */ 079 public FloatDCT_2D(int rows, int columns) { 080 if (rows <= 1 || columns <= 1) { 081 throw new IllegalArgumentException("rows and columns must be greater than 1"); 082 } 083 this.rows = rows; 084 this.columns = columns; 085 if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) { 086 this.useThreads = true; 087 } 088 if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 089 isPowerOfTwo = true; 090 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 091 nt = 4 * oldNthreads * rows; 092 if (columns == 2 * oldNthreads) { 093 nt >>= 1; 094 } else if (columns < 2 * oldNthreads) { 095 nt >>= 2; 096 } 097 t = new float[nt]; 098 } 099 dctColumns = new FloatDCT_1D(columns); 100 if (columns == rows) { 101 dctRows = dctColumns; 102 } else { 103 dctRows = new FloatDCT_1D(rows); 104 } 105 } 106 107 /** 108 * Computes 2D forward DCT (DCT-II) leaving the result in <code>a</code>. 109 * The data is stored in 1D array in row-major order. 110 * 111 * @param a 112 * data to transform 113 * @param scale 114 * if true then scaling is performed 115 */ 116 public void forward(final float[] a, final boolean scale) { 117 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 118 if (isPowerOfTwo) { 119 if (nthreads != oldNthreads) { 120 nt = 4 * nthreads * rows; 121 if (columns == 2 * nthreads) { 122 nt >>= 1; 123 } else if (columns < 2 * nthreads) { 124 nt >>= 2; 125 } 126 t = new float[nt]; 127 oldNthreads = nthreads; 128 } 129 if ((nthreads > 1) && useThreads) { 130 ddxt2d_subth(-1, a, scale); 131 ddxt2d0_subth(-1, a, scale); 132 } else { 133 ddxt2d_sub(-1, a, scale); 134 for (int i = 0; i < rows; i++) { 135 dctColumns.forward(a, i * columns, scale); 136 } 137 } 138 } else { 139 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 140 Future<?>[] futures = new Future[nthreads]; 141 int p = rows / nthreads; 142 for (int l = 0; l < nthreads; l++) { 143 final int firstRow = l * p; 144 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 145 futures[l] = ConcurrencyUtils.submit(new Runnable() { 146 @Override 147 public void run() { 148 for (int r = firstRow; r < lastRow; r++) { 149 dctColumns.forward(a, r * columns, scale); 150 } 151 } 152 }); 153 } 154 ConcurrencyUtils.waitForCompletion(futures); 155 p = columns / nthreads; 156 for (int l = 0; l < nthreads; l++) { 157 final int firstColumn = l * p; 158 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 159 futures[l] = ConcurrencyUtils.submit(new Runnable() { 160 @Override 161 public void run() { 162 float[] temp = new float[rows]; 163 for (int c = firstColumn; c < lastColumn; c++) { 164 for (int r = 0; r < rows; r++) { 165 temp[r] = a[r * columns + c]; 166 } 167 dctRows.forward(temp, scale); 168 for (int r = 0; r < rows; r++) { 169 a[r * columns + c] = temp[r]; 170 } 171 } 172 } 173 }); 174 } 175 ConcurrencyUtils.waitForCompletion(futures); 176 } else { 177 for (int i = 0; i < rows; i++) { 178 dctColumns.forward(a, i * columns, scale); 179 } 180 float[] temp = new float[rows]; 181 for (int c = 0; c < columns; c++) { 182 for (int r = 0; r < rows; r++) { 183 temp[r] = a[r * columns + c]; 184 } 185 dctRows.forward(temp, scale); 186 for (int r = 0; r < rows; r++) { 187 a[r * columns + c] = temp[r]; 188 } 189 } 190 } 191 } 192 } 193 194 /** 195 * Computes 2D forward DCT (DCT-II) leaving the result in <code>a</code>. 196 * The data is stored in 2D array. 197 * 198 * @param a 199 * data to transform 200 * @param scale 201 * if true then scaling is performed 202 */ 203 public void forward(final float[][] a, final boolean scale) { 204 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 205 if (isPowerOfTwo) { 206 if (nthreads != oldNthreads) { 207 nt = 4 * nthreads * rows; 208 if (columns == 2 * nthreads) { 209 nt >>= 1; 210 } else if (columns < 2 * nthreads) { 211 nt >>= 2; 212 } 213 t = new float[nt]; 214 oldNthreads = nthreads; 215 } 216 if ((nthreads > 1) && useThreads) { 217 ddxt2d_subth(-1, a, scale); 218 ddxt2d0_subth(-1, a, scale); 219 } else { 220 ddxt2d_sub(-1, a, scale); 221 for (int i = 0; i < rows; i++) { 222 dctColumns.forward(a[i], scale); 223 } 224 } 225 } else { 226 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 227 Future<?>[] futures = new Future[nthreads]; 228 int p = rows / nthreads; 229 for (int l = 0; l < nthreads; l++) { 230 final int firstRow = l * p; 231 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 232 futures[l] = ConcurrencyUtils.submit(new Runnable() { 233 @Override 234 public void run() { 235 for (int i = firstRow; i < lastRow; i++) { 236 dctColumns.forward(a[i], scale); 237 } 238 } 239 }); 240 } 241 ConcurrencyUtils.waitForCompletion(futures); 242 p = columns / nthreads; 243 for (int l = 0; l < nthreads; l++) { 244 final int firstColumn = l * p; 245 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 246 futures[l] = ConcurrencyUtils.submit(new Runnable() { 247 @Override 248 public void run() { 249 float[] temp = new float[rows]; 250 for (int c = firstColumn; c < lastColumn; c++) { 251 for (int r = 0; r < rows; r++) { 252 temp[r] = a[r][c]; 253 } 254 dctRows.forward(temp, scale); 255 for (int r = 0; r < rows; r++) { 256 a[r][c] = temp[r]; 257 } 258 } 259 } 260 }); 261 } 262 ConcurrencyUtils.waitForCompletion(futures); 263 } else { 264 for (int i = 0; i < rows; i++) { 265 dctColumns.forward(a[i], scale); 266 } 267 float[] temp = new float[rows]; 268 for (int c = 0; c < columns; c++) { 269 for (int r = 0; r < rows; r++) { 270 temp[r] = a[r][c]; 271 } 272 dctRows.forward(temp, scale); 273 for (int r = 0; r < rows; r++) { 274 a[r][c] = temp[r]; 275 } 276 } 277 } 278 } 279 } 280 281 /** 282 * Computes 2D inverse DCT (DCT-III) leaving the result in <code>a</code>. 283 * The data is stored in 1D array in row-major order. 284 * 285 * @param a 286 * data to transform 287 * @param scale 288 * if true then scaling is performed 289 */ 290 public void inverse(final float[] a, final boolean scale) { 291 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 292 if (isPowerOfTwo) { 293 if (nthreads != oldNthreads) { 294 nt = 4 * nthreads * rows; 295 if (columns == 2 * nthreads) { 296 nt >>= 1; 297 } else if (columns < 2 * nthreads) { 298 nt >>= 2; 299 } 300 t = new float[nt]; 301 oldNthreads = nthreads; 302 } 303 if ((nthreads > 1) && useThreads) { 304 ddxt2d_subth(1, a, scale); 305 ddxt2d0_subth(1, a, scale); 306 } else { 307 ddxt2d_sub(1, a, scale); 308 for (int i = 0; i < rows; i++) { 309 dctColumns.inverse(a, i * columns, scale); 310 } 311 } 312 } else { 313 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 314 Future<?>[] futures = new Future[nthreads]; 315 int p = rows / nthreads; 316 for (int l = 0; l < nthreads; l++) { 317 final int firstRow = l * p; 318 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 319 futures[l] = ConcurrencyUtils.submit(new Runnable() { 320 @Override 321 public void run() { 322 for (int i = firstRow; i < lastRow; i++) { 323 dctColumns.inverse(a, i * columns, scale); 324 } 325 } 326 }); 327 } 328 ConcurrencyUtils.waitForCompletion(futures); 329 p = columns / nthreads; 330 for (int l = 0; l < nthreads; l++) { 331 final int firstColumn = l * p; 332 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 333 futures[l] = ConcurrencyUtils.submit(new Runnable() { 334 @Override 335 public void run() { 336 float[] temp = new float[rows]; 337 for (int c = firstColumn; c < lastColumn; c++) { 338 for (int r = 0; r < rows; r++) { 339 temp[r] = a[r * columns + c]; 340 } 341 dctRows.inverse(temp, scale); 342 for (int r = 0; r < rows; r++) { 343 a[r * columns + c] = temp[r]; 344 } 345 } 346 } 347 }); 348 } 349 ConcurrencyUtils.waitForCompletion(futures); 350 } else { 351 for (int i = 0; i < rows; i++) { 352 dctColumns.inverse(a, i * columns, scale); 353 } 354 float[] temp = new float[rows]; 355 for (int c = 0; c < columns; c++) { 356 for (int r = 0; r < rows; r++) { 357 temp[r] = a[r * columns + c]; 358 } 359 dctRows.inverse(temp, scale); 360 for (int r = 0; r < rows; r++) { 361 a[r * columns + c] = temp[r]; 362 } 363 } 364 } 365 } 366 } 367 368 /** 369 * Computes 2D inverse DCT (DCT-III) leaving the result in <code>a</code>. 370 * The data is stored in 2D array. 371 * 372 * @param a 373 * data to transform 374 * @param scale 375 * if true then scaling is performed 376 */ 377 public void inverse(final float[][] a, final boolean scale) { 378 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 379 if (isPowerOfTwo) { 380 if (nthreads != oldNthreads) { 381 nt = 4 * nthreads * rows; 382 if (columns == 2 * nthreads) { 383 nt >>= 1; 384 } else if (columns < 2 * nthreads) { 385 nt >>= 2; 386 } 387 t = new float[nt]; 388 oldNthreads = nthreads; 389 } 390 if ((nthreads > 1) && useThreads) { 391 ddxt2d_subth(1, a, scale); 392 ddxt2d0_subth(1, a, scale); 393 } else { 394 ddxt2d_sub(1, a, scale); 395 for (int i = 0; i < rows; i++) { 396 dctColumns.inverse(a[i], scale); 397 } 398 } 399 } else { 400 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 401 Future<?>[] futures = new Future[nthreads]; 402 int p = rows / nthreads; 403 for (int l = 0; l < nthreads; l++) { 404 final int firstRow = l * p; 405 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 406 futures[l] = ConcurrencyUtils.submit(new Runnable() { 407 @Override 408 public void run() { 409 for (int i = firstRow; i < lastRow; i++) { 410 dctColumns.inverse(a[i], scale); 411 } 412 } 413 }); 414 } 415 ConcurrencyUtils.waitForCompletion(futures); 416 p = columns / nthreads; 417 for (int l = 0; l < nthreads; l++) { 418 final int firstColumn = l * p; 419 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 420 futures[l] = ConcurrencyUtils.submit(new Runnable() { 421 @Override 422 public void run() { 423 float[] temp = new float[rows]; 424 for (int c = firstColumn; c < lastColumn; c++) { 425 for (int r = 0; r < rows; r++) { 426 temp[r] = a[r][c]; 427 } 428 dctRows.inverse(temp, scale); 429 for (int r = 0; r < rows; r++) { 430 a[r][c] = temp[r]; 431 } 432 } 433 } 434 }); 435 } 436 ConcurrencyUtils.waitForCompletion(futures); 437 } else { 438 for (int r = 0; r < rows; r++) { 439 dctColumns.inverse(a[r], scale); 440 } 441 float[] temp = new float[rows]; 442 for (int c = 0; c < columns; c++) { 443 for (int r = 0; r < rows; r++) { 444 temp[r] = a[r][c]; 445 } 446 dctRows.inverse(temp, scale); 447 for (int r = 0; r < rows; r++) { 448 a[r][c] = temp[r]; 449 } 450 } 451 } 452 } 453 } 454 455 private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) { 456 int nthread = ConcurrencyUtils.getNumberOfThreads(); 457 int nt = 4 * rows; 458 if (columns == 2 * nthread) { 459 nt >>= 1; 460 } else if (columns < 2 * nthread) { 461 nthread = columns; 462 nt >>= 2; 463 } 464 final int nthreads = nthread; 465 Future<?>[] futures = new Future[nthread]; 466 467 for (int i = 0; i < nthread; i++) { 468 final int n0 = i; 469 final int startt = nt * i; 470 futures[i] = ConcurrencyUtils.submit(new Runnable() { 471 @Override 472 public void run() { 473 int idx1, idx2; 474 if (columns > 2 * nthreads) { 475 if (isgn == -1) { 476 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 477 for (int r = 0; r < rows; r++) { 478 idx1 = r * columns + c; 479 idx2 = startt + rows + r; 480 t[startt + r] = a[idx1]; 481 t[idx2] = a[idx1 + 1]; 482 t[idx2 + rows] = a[idx1 + 2]; 483 t[idx2 + 2 * rows] = a[idx1 + 3]; 484 } 485 dctRows.forward(t, startt, scale); 486 dctRows.forward(t, startt + rows, scale); 487 dctRows.forward(t, startt + 2 * rows, scale); 488 dctRows.forward(t, startt + 3 * rows, scale); 489 for (int r = 0; r < rows; r++) { 490 idx1 = r * columns + c; 491 idx2 = startt + rows + r; 492 a[idx1] = t[startt + r]; 493 a[idx1 + 1] = t[idx2]; 494 a[idx1 + 2] = t[idx2 + rows]; 495 a[idx1 + 3] = t[idx2 + 2 * rows]; 496 } 497 } 498 } else { 499 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 500 for (int r = 0; r < rows; r++) { 501 idx1 = r * columns + c; 502 idx2 = startt + rows + r; 503 t[startt + r] = a[idx1]; 504 t[idx2] = a[idx1 + 1]; 505 t[idx2 + rows] = a[idx1 + 2]; 506 t[idx2 + 2 * rows] = a[idx1 + 3]; 507 } 508 dctRows.inverse(t, startt, scale); 509 dctRows.inverse(t, startt + rows, scale); 510 dctRows.inverse(t, startt + 2 * rows, scale); 511 dctRows.inverse(t, startt + 3 * rows, scale); 512 for (int r = 0; r < rows; r++) { 513 idx1 = r * columns + c; 514 idx2 = startt + rows + r; 515 a[idx1] = t[startt + r]; 516 a[idx1 + 1] = t[idx2]; 517 a[idx1 + 2] = t[idx2 + rows]; 518 a[idx1 + 3] = t[idx2 + 2 * rows]; 519 } 520 } 521 } 522 } else if (columns == 2 * nthreads) { 523 for (int r = 0; r < rows; r++) { 524 idx1 = r * columns + 2 * n0; 525 idx2 = startt + r; 526 t[idx2] = a[idx1]; 527 t[idx2 + rows] = a[idx1 + 1]; 528 } 529 if (isgn == -1) { 530 dctRows.forward(t, startt, scale); 531 dctRows.forward(t, startt + rows, scale); 532 } else { 533 dctRows.inverse(t, startt, scale); 534 dctRows.inverse(t, startt + rows, scale); 535 } 536 for (int r = 0; r < rows; r++) { 537 idx1 = r * columns + 2 * n0; 538 idx2 = startt + r; 539 a[idx1] = t[idx2]; 540 a[idx1 + 1] = t[idx2 + rows]; 541 } 542 } else if (columns == nthreads) { 543 for (int r = 0; r < rows; r++) { 544 t[startt + r] = a[r * columns + n0]; 545 } 546 if (isgn == -1) { 547 dctRows.forward(t, startt, scale); 548 } else { 549 dctRows.inverse(t, startt, scale); 550 } 551 for (int r = 0; r < rows; r++) { 552 a[r * columns + n0] = t[startt + r]; 553 } 554 } 555 } 556 }); 557 } 558 ConcurrencyUtils.waitForCompletion(futures); 559 } 560 561 private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) { 562 int nthread = ConcurrencyUtils.getNumberOfThreads(); 563 int nt = 4 * rows; 564 if (columns == 2 * nthread) { 565 nt >>= 1; 566 } else if (columns < 2 * nthread) { 567 nthread = columns; 568 nt >>= 2; 569 } 570 final int nthreads = nthread; 571 Future<?>[] futures = new Future[nthread]; 572 573 for (int i = 0; i < nthread; i++) { 574 final int n0 = i; 575 final int startt = nt * i; 576 futures[i] = ConcurrencyUtils.submit(new Runnable() { 577 @Override 578 public void run() { 579 int idx2; 580 if (columns > 2 * nthreads) { 581 if (isgn == -1) { 582 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 583 for (int r = 0; r < rows; r++) { 584 idx2 = startt + rows + r; 585 t[startt + r] = a[r][c]; 586 t[idx2] = a[r][c + 1]; 587 t[idx2 + rows] = a[r][c + 2]; 588 t[idx2 + 2 * rows] = a[r][c + 3]; 589 } 590 dctRows.forward(t, startt, scale); 591 dctRows.forward(t, startt + rows, scale); 592 dctRows.forward(t, startt + 2 * rows, scale); 593 dctRows.forward(t, startt + 3 * rows, scale); 594 for (int r = 0; r < rows; r++) { 595 idx2 = startt + rows + r; 596 a[r][c] = t[startt + r]; 597 a[r][c + 1] = t[idx2]; 598 a[r][c + 2] = t[idx2 + rows]; 599 a[r][c + 3] = t[idx2 + 2 * rows]; 600 } 601 } 602 } else { 603 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 604 for (int r = 0; r < rows; r++) { 605 idx2 = startt + rows + r; 606 t[startt + r] = a[r][c]; 607 t[idx2] = a[r][c + 1]; 608 t[idx2 + rows] = a[r][c + 2]; 609 t[idx2 + 2 * rows] = a[r][c + 3]; 610 } 611 dctRows.inverse(t, startt, scale); 612 dctRows.inverse(t, startt + rows, scale); 613 dctRows.inverse(t, startt + 2 * rows, scale); 614 dctRows.inverse(t, startt + 3 * rows, scale); 615 for (int r = 0; r < rows; r++) { 616 idx2 = startt + rows + r; 617 a[r][c] = t[startt + r]; 618 a[r][c + 1] = t[idx2]; 619 a[r][c + 2] = t[idx2 + rows]; 620 a[r][c + 3] = t[idx2 + 2 * rows]; 621 } 622 } 623 } 624 } else if (columns == 2 * nthreads) { 625 for (int r = 0; r < rows; r++) { 626 idx2 = startt + r; 627 t[idx2] = a[r][2 * n0]; 628 t[idx2 + rows] = a[r][2 * n0 + 1]; 629 } 630 if (isgn == -1) { 631 dctRows.forward(t, startt, scale); 632 dctRows.forward(t, startt + rows, scale); 633 } else { 634 dctRows.inverse(t, startt, scale); 635 dctRows.inverse(t, startt + rows, scale); 636 } 637 for (int r = 0; r < rows; r++) { 638 idx2 = startt + r; 639 a[r][2 * n0] = t[idx2]; 640 a[r][2 * n0 + 1] = t[idx2 + rows]; 641 } 642 } else if (columns == nthreads) { 643 for (int r = 0; r < rows; r++) { 644 t[startt + r] = a[r][n0]; 645 } 646 if (isgn == -1) { 647 dctRows.forward(t, startt, scale); 648 } else { 649 dctRows.inverse(t, startt, scale); 650 } 651 for (int r = 0; r < rows; r++) { 652 a[r][n0] = t[startt + r]; 653 } 654 } 655 } 656 }); 657 } 658 ConcurrencyUtils.waitForCompletion(futures); 659 } 660 661 private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) { 662 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 663 664 Future<?>[] futures = new Future[nthreads]; 665 666 for (int i = 0; i < nthreads; i++) { 667 final int n0 = i; 668 futures[i] = ConcurrencyUtils.submit(new Runnable() { 669 670 @Override 671 public void run() { 672 if (isgn == -1) { 673 for (int r = n0; r < rows; r += nthreads) { 674 dctColumns.forward(a, r * columns, scale); 675 } 676 } else { 677 for (int r = n0; r < rows; r += nthreads) { 678 dctColumns.inverse(a, r * columns, scale); 679 } 680 } 681 } 682 }); 683 } 684 ConcurrencyUtils.waitForCompletion(futures); 685 } 686 687 private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) { 688 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 689 690 Future<?>[] futures = new Future[nthreads]; 691 692 for (int i = 0; i < nthreads; i++) { 693 final int n0 = i; 694 futures[i] = ConcurrencyUtils.submit(new Runnable() { 695 696 @Override 697 public void run() { 698 if (isgn == -1) { 699 for (int r = n0; r < rows; r += nthreads) { 700 dctColumns.forward(a[r], scale); 701 } 702 } else { 703 for (int r = n0; r < rows; r += nthreads) { 704 dctColumns.inverse(a[r], scale); 705 } 706 } 707 } 708 }); 709 } 710 ConcurrencyUtils.waitForCompletion(futures); 711 } 712 713 private void ddxt2d_sub(int isgn, float[] a, boolean scale) { 714 int idx1, idx2; 715 716 if (columns > 2) { 717 if (isgn == -1) { 718 for (int c = 0; c < columns; c += 4) { 719 for (int r = 0; r < rows; r++) { 720 idx1 = r * columns + c; 721 idx2 = rows + r; 722 t[r] = a[idx1]; 723 t[idx2] = a[idx1 + 1]; 724 t[idx2 + rows] = a[idx1 + 2]; 725 t[idx2 + 2 * rows] = a[idx1 + 3]; 726 } 727 dctRows.forward(t, 0, scale); 728 dctRows.forward(t, rows, scale); 729 dctRows.forward(t, 2 * rows, scale); 730 dctRows.forward(t, 3 * rows, scale); 731 for (int r = 0; r < rows; r++) { 732 idx1 = r * columns + c; 733 idx2 = rows + r; 734 a[idx1] = t[r]; 735 a[idx1 + 1] = t[idx2]; 736 a[idx1 + 2] = t[idx2 + rows]; 737 a[idx1 + 3] = t[idx2 + 2 * rows]; 738 } 739 } 740 } else { 741 for (int c = 0; c < columns; c += 4) { 742 for (int r = 0; r < rows; r++) { 743 idx1 = r * columns + c; 744 idx2 = rows + r; 745 t[r] = a[idx1]; 746 t[idx2] = a[idx1 + 1]; 747 t[idx2 + rows] = a[idx1 + 2]; 748 t[idx2 + 2 * rows] = a[idx1 + 3]; 749 } 750 dctRows.inverse(t, 0, scale); 751 dctRows.inverse(t, rows, scale); 752 dctRows.inverse(t, 2 * rows, scale); 753 dctRows.inverse(t, 3 * rows, scale); 754 for (int r = 0; r < rows; r++) { 755 idx1 = r * columns + c; 756 idx2 = rows + r; 757 a[idx1] = t[r]; 758 a[idx1 + 1] = t[idx2]; 759 a[idx1 + 2] = t[idx2 + rows]; 760 a[idx1 + 3] = t[idx2 + 2 * rows]; 761 } 762 } 763 } 764 } else if (columns == 2) { 765 for (int r = 0; r < rows; r++) { 766 idx1 = r * columns; 767 t[r] = a[idx1]; 768 t[rows + r] = a[idx1 + 1]; 769 } 770 if (isgn == -1) { 771 dctRows.forward(t, 0, scale); 772 dctRows.forward(t, rows, scale); 773 } else { 774 dctRows.inverse(t, 0, scale); 775 dctRows.inverse(t, rows, scale); 776 } 777 for (int r = 0; r < rows; r++) { 778 idx1 = r * columns; 779 a[idx1] = t[r]; 780 a[idx1 + 1] = t[rows + r]; 781 } 782 } 783 } 784 785 private void ddxt2d_sub(int isgn, float[][] a, boolean scale) { 786 int idx2; 787 788 if (columns > 2) { 789 if (isgn == -1) { 790 for (int c = 0; c < columns; c += 4) { 791 for (int r = 0; r < rows; r++) { 792 idx2 = rows + r; 793 t[r] = a[r][c]; 794 t[idx2] = a[r][c + 1]; 795 t[idx2 + rows] = a[r][c + 2]; 796 t[idx2 + 2 * rows] = a[r][c + 3]; 797 } 798 dctRows.forward(t, 0, scale); 799 dctRows.forward(t, rows, scale); 800 dctRows.forward(t, 2 * rows, scale); 801 dctRows.forward(t, 3 * rows, scale); 802 for (int r = 0; r < rows; r++) { 803 idx2 = rows + r; 804 a[r][c] = t[r]; 805 a[r][c + 1] = t[idx2]; 806 a[r][c + 2] = t[idx2 + rows]; 807 a[r][c + 3] = t[idx2 + 2 * rows]; 808 } 809 } 810 } else { 811 for (int c = 0; c < columns; c += 4) { 812 for (int r = 0; r < rows; r++) { 813 idx2 = rows + r; 814 t[r] = a[r][c]; 815 t[idx2] = a[r][c + 1]; 816 t[idx2 + rows] = a[r][c + 2]; 817 t[idx2 + 2 * rows] = a[r][c + 3]; 818 } 819 dctRows.inverse(t, 0, scale); 820 dctRows.inverse(t, rows, scale); 821 dctRows.inverse(t, 2 * rows, scale); 822 dctRows.inverse(t, 3 * rows, scale); 823 for (int r = 0; r < rows; r++) { 824 idx2 = rows + r; 825 a[r][c] = t[r]; 826 a[r][c + 1] = t[idx2]; 827 a[r][c + 2] = t[idx2 + rows]; 828 a[r][c + 3] = t[idx2 + 2 * rows]; 829 } 830 } 831 } 832 } else if (columns == 2) { 833 for (int r = 0; r < rows; r++) { 834 t[r] = a[r][0]; 835 t[rows + r] = a[r][1]; 836 } 837 if (isgn == -1) { 838 dctRows.forward(t, 0, scale); 839 dctRows.forward(t, rows, scale); 840 } else { 841 dctRows.inverse(t, 0, scale); 842 dctRows.inverse(t, rows, scale); 843 } 844 for (int r = 0; r < rows; r++) { 845 a[r][0] = t[r]; 846 a[r][1] = t[rows + r]; 847 } 848 } 849 } 850}