001/* ***** BEGIN LICENSE BLOCK ***** 002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 003 * 004 * The contents of this file are subject to the Mozilla Public License Version 005 * 1.1 (the "License"); you may not use this file except in compliance with 006 * the License. You may obtain a copy of the License at 007 * http://www.mozilla.org/MPL/ 008 * 009 * Software distributed under the License is distributed on an "AS IS" basis, 010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 011 * for the specific language governing rights and limitations under the 012 * License. 013 * 014 * The Original Code is JTransforms. 015 * 016 * The Initial Developer of the Original Code is 017 * Piotr Wendykier, Emory University. 018 * Portions created by the Initial Developer are Copyright (C) 2007-2009 019 * the Initial Developer. All Rights Reserved. 020 * 021 * Alternatively, the contents of this file may be used under the terms of 022 * either the GNU General Public License Version 2 or later (the "GPL"), or 023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 024 * in which case the provisions of the GPL or the LGPL are applicable instead 025 * of those above. If you wish to allow use of your version of this file only 026 * under the terms of either the GPL or the LGPL, and not to allow others to 027 * use your version of this file under the terms of the MPL, indicate your 028 * decision by deleting the provisions above and replace them with the notice 029 * and other provisions required by the GPL or the LGPL. If you do not delete 030 * the provisions above, a recipient may use your version of this file under 031 * the terms of any one of the MPL, the GPL or the LGPL. 032 * 033 * ***** END LICENSE BLOCK ***** */ 034 035package edu.emory.mathcs.jtransforms.dct; 036 037import java.util.concurrent.Future; 038 039import edu.emory.mathcs.utils.ConcurrencyUtils; 040 041/** 042 * Computes 3D Discrete Cosine Transform (DCT) of double precision data. The 043 * sizes of all three dimensions can be arbitrary numbers. This is a parallel 044 * implementation of split-radix and mixed-radix algorithms optimized for SMP 045 * systems. <br> 046 * <br> 047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura 048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 049 * 050 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 051 * 052 */ 053public class DoubleDCT_3D { 054 055 private int slices; 056 057 private int rows; 058 059 private int columns; 060 061 private int sliceStride; 062 063 private int rowStride; 064 065 private double[] t; 066 067 private DoubleDCT_1D dctSlices, dctRows, dctColumns; 068 069 private int oldNthreads; 070 071 private int nt; 072 073 private boolean isPowerOfTwo = false; 074 075 private boolean useThreads = false; 076 077 /** 078 * Creates new instance of DoubleDCT_3D. 079 * 080 * @param slices 081 * number of slices 082 * @param rows 083 * number of rows 084 * @param columns 085 * number of columns 086 */ 087 public DoubleDCT_3D(int slices, int rows, int columns) { 088 if (slices <= 1 || rows <= 1 || columns <= 1) { 089 throw new IllegalArgumentException("slices, rows and columns must be greater than 1"); 090 } 091 this.slices = slices; 092 this.rows = rows; 093 this.columns = columns; 094 this.sliceStride = rows * columns; 095 this.rowStride = columns; 096 if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) { 097 this.useThreads = true; 098 } 099 if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 100 isPowerOfTwo = true; 101 102 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 103 nt = slices; 104 if (nt < rows) { 105 nt = rows; 106 } 107 nt *= 4; 108 if (oldNthreads > 1) { 109 nt *= oldNthreads; 110 } 111 if (columns == 2) { 112 nt >>= 1; 113 } 114 t = new double[nt]; 115 } 116 dctSlices = new DoubleDCT_1D(slices); 117 if (slices == rows) { 118 dctRows = dctSlices; 119 } else { 120 dctRows = new DoubleDCT_1D(rows); 121 } 122 if (slices == columns) { 123 dctColumns = dctSlices; 124 } else if (rows == columns) { 125 dctColumns = dctRows; 126 } else { 127 dctColumns = new DoubleDCT_1D(columns); 128 } 129 } 130 131 /** 132 * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code> 133 * . The data is stored in 1D array addressed in slice-major, then 134 * row-major, then column-major, in order of significance, i.e. the element 135 * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride 136 * + j*rowStride + k], where sliceStride = rows * columns and rowStride = 137 * columns. 138 * 139 * @param a 140 * data to transform 141 * @param scale 142 * if true then scaling is performed 143 */ 144 public void forward(final double[] a, final boolean scale) { 145 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 146 if (isPowerOfTwo) { 147 if (nthreads != oldNthreads) { 148 nt = slices; 149 if (nt < rows) { 150 nt = rows; 151 } 152 nt *= 4; 153 if (nthreads > 1) { 154 nt *= nthreads; 155 } 156 if (columns == 2) { 157 nt >>= 1; 158 } 159 t = new double[nt]; 160 oldNthreads = nthreads; 161 } 162 if ((nthreads > 1) && useThreads) { 163 ddxt3da_subth(-1, a, scale); 164 ddxt3db_subth(-1, a, scale); 165 } else { 166 ddxt3da_sub(-1, a, scale); 167 ddxt3db_sub(-1, a, scale); 168 } 169 } else { 170 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 171 Future<?>[] futures = new Future[nthreads]; 172 int p = slices / nthreads; 173 for (int l = 0; l < nthreads; l++) { 174 final int firstSlice = l * p; 175 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 176 futures[l] = ConcurrencyUtils.submit(new Runnable() { 177 @Override 178 public void run() { 179 for (int s = firstSlice; s < lastSlice; s++) { 180 int idx1 = s * sliceStride; 181 for (int r = 0; r < rows; r++) { 182 dctColumns.forward(a, idx1 + r * rowStride, scale); 183 } 184 } 185 } 186 }); 187 } 188 ConcurrencyUtils.waitForCompletion(futures); 189 190 for (int l = 0; l < nthreads; l++) { 191 final int firstSlice = l * p; 192 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 193 futures[l] = ConcurrencyUtils.submit(new Runnable() { 194 @Override 195 public void run() { 196 double[] temp = new double[rows]; 197 for (int s = firstSlice; s < lastSlice; s++) { 198 int idx1 = s * sliceStride; 199 for (int c = 0; c < columns; c++) { 200 for (int r = 0; r < rows; r++) { 201 int idx3 = idx1 + r * rowStride + c; 202 temp[r] = a[idx3]; 203 } 204 dctRows.forward(temp, scale); 205 for (int r = 0; r < rows; r++) { 206 int idx3 = idx1 + r * rowStride + c; 207 a[idx3] = temp[r]; 208 } 209 } 210 } 211 } 212 }); 213 } 214 ConcurrencyUtils.waitForCompletion(futures); 215 216 p = rows / nthreads; 217 for (int l = 0; l < nthreads; l++) { 218 final int firstRow = l * p; 219 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 220 futures[l] = ConcurrencyUtils.submit(new Runnable() { 221 @Override 222 public void run() { 223 double[] temp = new double[slices]; 224 for (int r = firstRow; r < lastRow; r++) { 225 int idx1 = r * rowStride; 226 for (int c = 0; c < columns; c++) { 227 for (int s = 0; s < slices; s++) { 228 int idx3 = s * sliceStride + idx1 + c; 229 temp[s] = a[idx3]; 230 } 231 dctSlices.forward(temp, scale); 232 for (int s = 0; s < slices; s++) { 233 int idx3 = s * sliceStride + idx1 + c; 234 a[idx3] = temp[s]; 235 } 236 } 237 } 238 } 239 }); 240 } 241 ConcurrencyUtils.waitForCompletion(futures); 242 243 } else { 244 for (int s = 0; s < slices; s++) { 245 int idx1 = s * sliceStride; 246 for (int r = 0; r < rows; r++) { 247 dctColumns.forward(a, idx1 + r * rowStride, scale); 248 } 249 } 250 double[] temp = new double[rows]; 251 for (int s = 0; s < slices; s++) { 252 int idx1 = s * sliceStride; 253 for (int c = 0; c < columns; c++) { 254 for (int r = 0; r < rows; r++) { 255 int idx3 = idx1 + r * rowStride + c; 256 temp[r] = a[idx3]; 257 } 258 dctRows.forward(temp, scale); 259 for (int r = 0; r < rows; r++) { 260 int idx3 = idx1 + r * rowStride + c; 261 a[idx3] = temp[r]; 262 } 263 } 264 } 265 temp = new double[slices]; 266 for (int r = 0; r < rows; r++) { 267 int idx1 = r * rowStride; 268 for (int c = 0; c < columns; c++) { 269 for (int s = 0; s < slices; s++) { 270 int idx3 = s * sliceStride + idx1 + c; 271 temp[s] = a[idx3]; 272 } 273 dctSlices.forward(temp, scale); 274 for (int s = 0; s < slices; s++) { 275 int idx3 = s * sliceStride + idx1 + c; 276 a[idx3] = temp[s]; 277 } 278 } 279 } 280 } 281 } 282 } 283 284 /** 285 * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code> 286 * . The data is stored in 3D array 287 * 288 * @param a 289 * data to transform 290 * @param scale 291 * if true then scaling is performed 292 */ 293 public void forward(final double[][][] a, final boolean scale) { 294 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 295 if (isPowerOfTwo) { 296 if (nthreads != oldNthreads) { 297 nt = slices; 298 if (nt < rows) { 299 nt = rows; 300 } 301 nt *= 4; 302 if (nthreads > 1) { 303 nt *= nthreads; 304 } 305 if (columns == 2) { 306 nt >>= 1; 307 } 308 t = new double[nt]; 309 oldNthreads = nthreads; 310 } 311 if ((nthreads > 1) && useThreads) { 312 ddxt3da_subth(-1, a, scale); 313 ddxt3db_subth(-1, a, scale); 314 } else { 315 ddxt3da_sub(-1, a, scale); 316 ddxt3db_sub(-1, a, scale); 317 } 318 } else { 319 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 320 Future<?>[] futures = new Future[nthreads]; 321 int p = slices / nthreads; 322 for (int l = 0; l < nthreads; l++) { 323 final int firstSlice = l * p; 324 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 325 futures[l] = ConcurrencyUtils.submit(new Runnable() { 326 @Override 327 public void run() { 328 for (int s = firstSlice; s < lastSlice; s++) { 329 for (int r = 0; r < rows; r++) { 330 dctColumns.forward(a[s][r], scale); 331 } 332 } 333 } 334 }); 335 } 336 ConcurrencyUtils.waitForCompletion(futures); 337 338 for (int l = 0; l < nthreads; l++) { 339 final int firstSlice = l * p; 340 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 341 futures[l] = ConcurrencyUtils.submit(new Runnable() { 342 @Override 343 public void run() { 344 double[] temp = new double[rows]; 345 for (int s = firstSlice; s < lastSlice; s++) { 346 for (int c = 0; c < columns; c++) { 347 for (int r = 0; r < rows; r++) { 348 temp[r] = a[s][r][c]; 349 } 350 dctRows.forward(temp, scale); 351 for (int r = 0; r < rows; r++) { 352 a[s][r][c] = temp[r]; 353 } 354 } 355 } 356 } 357 }); 358 } 359 ConcurrencyUtils.waitForCompletion(futures); 360 361 p = rows / nthreads; 362 for (int l = 0; l < nthreads; l++) { 363 final int firstRow = l * p; 364 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 365 futures[l] = ConcurrencyUtils.submit(new Runnable() { 366 @Override 367 public void run() { 368 double[] temp = new double[slices]; 369 for (int r = firstRow; r < lastRow; r++) { 370 for (int c = 0; c < columns; c++) { 371 for (int s = 0; s < slices; s++) { 372 temp[s] = a[s][r][c]; 373 } 374 dctSlices.forward(temp, scale); 375 for (int s = 0; s < slices; s++) { 376 a[s][r][c] = temp[s]; 377 } 378 } 379 } 380 } 381 }); 382 } 383 ConcurrencyUtils.waitForCompletion(futures); 384 385 } else { 386 for (int s = 0; s < slices; s++) { 387 for (int r = 0; r < rows; r++) { 388 dctColumns.forward(a[s][r], scale); 389 } 390 } 391 double[] temp = new double[rows]; 392 for (int s = 0; s < slices; s++) { 393 for (int c = 0; c < columns; c++) { 394 for (int r = 0; r < rows; r++) { 395 temp[r] = a[s][r][c]; 396 } 397 dctRows.forward(temp, scale); 398 for (int r = 0; r < rows; r++) { 399 a[s][r][c] = temp[r]; 400 } 401 } 402 } 403 temp = new double[slices]; 404 for (int r = 0; r < rows; r++) { 405 for (int c = 0; c < columns; c++) { 406 for (int s = 0; s < slices; s++) { 407 temp[s] = a[s][r][c]; 408 } 409 dctSlices.forward(temp, scale); 410 for (int s = 0; s < slices; s++) { 411 a[s][r][c] = temp[s]; 412 } 413 } 414 } 415 } 416 } 417 } 418 419 /** 420 * Computes the 3D inverse DCT (DCT-III) leaving the result in 421 * <code>a</code>. The data is stored in 1D array addressed in slice-major, 422 * then row-major, then column-major, in order of significance, i.e. the 423 * element (i,j,k) of 3D array x[slices][rows][columns] is stored in 424 * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns 425 * and rowStride = columns. 426 * 427 * @param a 428 * data to transform 429 * @param scale 430 * if true then scaling is performed 431 */ 432 public void inverse(final double[] a, final boolean scale) { 433 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 434 if (isPowerOfTwo) { 435 if (nthreads != oldNthreads) { 436 nt = slices; 437 if (nt < rows) { 438 nt = rows; 439 } 440 nt *= 4; 441 if (nthreads > 1) { 442 nt *= nthreads; 443 } 444 if (columns == 2) { 445 nt >>= 1; 446 } 447 t = new double[nt]; 448 oldNthreads = nthreads; 449 } 450 if ((nthreads > 1) && useThreads) { 451 ddxt3da_subth(1, a, scale); 452 ddxt3db_subth(1, a, scale); 453 } else { 454 ddxt3da_sub(1, a, scale); 455 ddxt3db_sub(1, a, scale); 456 } 457 } else { 458 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 459 Future<?>[] futures = new Future[nthreads]; 460 int p = slices / nthreads; 461 for (int l = 0; l < nthreads; l++) { 462 final int firstSlice = l * p; 463 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 464 futures[l] = ConcurrencyUtils.submit(new Runnable() { 465 @Override 466 public void run() { 467 for (int s = firstSlice; s < lastSlice; s++) { 468 int idx1 = s * sliceStride; 469 for (int r = 0; r < rows; r++) { 470 dctColumns.inverse(a, idx1 + r * rowStride, scale); 471 } 472 } 473 } 474 }); 475 } 476 ConcurrencyUtils.waitForCompletion(futures); 477 478 for (int l = 0; l < nthreads; l++) { 479 final int firstSlice = l * p; 480 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 481 futures[l] = ConcurrencyUtils.submit(new Runnable() { 482 @Override 483 public void run() { 484 double[] temp = new double[rows]; 485 for (int s = firstSlice; s < lastSlice; s++) { 486 int idx1 = s * sliceStride; 487 for (int c = 0; c < columns; c++) { 488 for (int r = 0; r < rows; r++) { 489 int idx3 = idx1 + r * rowStride + c; 490 temp[r] = a[idx3]; 491 } 492 dctRows.inverse(temp, scale); 493 for (int r = 0; r < rows; r++) { 494 int idx3 = idx1 + r * rowStride + c; 495 a[idx3] = temp[r]; 496 } 497 } 498 } 499 } 500 }); 501 } 502 ConcurrencyUtils.waitForCompletion(futures); 503 504 p = rows / nthreads; 505 for (int l = 0; l < nthreads; l++) { 506 final int firstRow = l * p; 507 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 508 futures[l] = ConcurrencyUtils.submit(new Runnable() { 509 @Override 510 public void run() { 511 double[] temp = new double[slices]; 512 for (int r = firstRow; r < lastRow; r++) { 513 int idx1 = r * rowStride; 514 for (int c = 0; c < columns; c++) { 515 for (int s = 0; s < slices; s++) { 516 int idx3 = s * sliceStride + idx1 + c; 517 temp[s] = a[idx3]; 518 } 519 dctSlices.inverse(temp, scale); 520 for (int s = 0; s < slices; s++) { 521 int idx3 = s * sliceStride + idx1 + c; 522 a[idx3] = temp[s]; 523 } 524 } 525 } 526 } 527 }); 528 } 529 ConcurrencyUtils.waitForCompletion(futures); 530 531 } else { 532 for (int s = 0; s < slices; s++) { 533 int idx1 = s * sliceStride; 534 for (int r = 0; r < rows; r++) { 535 dctColumns.inverse(a, idx1 + r * rowStride, scale); 536 } 537 } 538 double[] temp = new double[rows]; 539 for (int s = 0; s < slices; s++) { 540 int idx1 = s * sliceStride; 541 for (int c = 0; c < columns; c++) { 542 for (int r = 0; r < rows; r++) { 543 int idx3 = idx1 + r * rowStride + c; 544 temp[r] = a[idx3]; 545 } 546 dctRows.inverse(temp, scale); 547 for (int r = 0; r < rows; r++) { 548 int idx3 = idx1 + r * rowStride + c; 549 a[idx3] = temp[r]; 550 } 551 } 552 } 553 temp = new double[slices]; 554 for (int r = 0; r < rows; r++) { 555 int idx1 = r * rowStride; 556 for (int c = 0; c < columns; c++) { 557 for (int s = 0; s < slices; s++) { 558 int idx3 = s * sliceStride + idx1 + c; 559 temp[s] = a[idx3]; 560 } 561 dctSlices.inverse(temp, scale); 562 for (int s = 0; s < slices; s++) { 563 int idx3 = s * sliceStride + idx1 + c; 564 a[idx3] = temp[s]; 565 } 566 } 567 } 568 } 569 } 570 } 571 572 /** 573 * Computes the 3D inverse DCT (DCT-III) leaving the result in 574 * <code>a</code>. The data is stored in 3D array. 575 * 576 * @param a 577 * data to transform 578 * @param scale 579 * if true then scaling is performed 580 */ 581 public void inverse(final double[][][] a, final boolean scale) { 582 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 583 if (isPowerOfTwo) { 584 if (nthreads != oldNthreads) { 585 nt = slices; 586 if (nt < rows) { 587 nt = rows; 588 } 589 nt *= 4; 590 if (nthreads > 1) { 591 nt *= nthreads; 592 } 593 if (columns == 2) { 594 nt >>= 1; 595 } 596 t = new double[nt]; 597 oldNthreads = nthreads; 598 } 599 if ((nthreads > 1) && useThreads) { 600 ddxt3da_subth(1, a, scale); 601 ddxt3db_subth(1, a, scale); 602 } else { 603 ddxt3da_sub(1, a, scale); 604 ddxt3db_sub(1, a, scale); 605 } 606 } else { 607 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 608 Future<?>[] futures = new Future[nthreads]; 609 int p = slices / nthreads; 610 for (int l = 0; l < nthreads; l++) { 611 final int firstSlice = l * p; 612 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 613 futures[l] = ConcurrencyUtils.submit(new Runnable() { 614 @Override 615 public void run() { 616 for (int s = firstSlice; s < lastSlice; s++) { 617 for (int r = 0; r < rows; r++) { 618 dctColumns.inverse(a[s][r], scale); 619 } 620 } 621 } 622 }); 623 } 624 ConcurrencyUtils.waitForCompletion(futures); 625 626 for (int l = 0; l < nthreads; l++) { 627 final int firstSlice = l * p; 628 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 629 futures[l] = ConcurrencyUtils.submit(new Runnable() { 630 @Override 631 public void run() { 632 double[] temp = new double[rows]; 633 for (int s = firstSlice; s < lastSlice; s++) { 634 for (int c = 0; c < columns; c++) { 635 for (int r = 0; r < rows; r++) { 636 temp[r] = a[s][r][c]; 637 } 638 dctRows.inverse(temp, scale); 639 for (int r = 0; r < rows; r++) { 640 a[s][r][c] = temp[r]; 641 } 642 } 643 } 644 } 645 }); 646 } 647 ConcurrencyUtils.waitForCompletion(futures); 648 649 p = rows / nthreads; 650 for (int l = 0; l < nthreads; l++) { 651 final int firstRow = l * p; 652 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 653 futures[l] = ConcurrencyUtils.submit(new Runnable() { 654 @Override 655 public void run() { 656 double[] temp = new double[slices]; 657 for (int r = firstRow; r < lastRow; r++) { 658 for (int c = 0; c < columns; c++) { 659 for (int s = 0; s < slices; s++) { 660 temp[s] = a[s][r][c]; 661 } 662 dctSlices.inverse(temp, scale); 663 for (int s = 0; s < slices; s++) { 664 a[s][r][c] = temp[s]; 665 } 666 } 667 } 668 } 669 }); 670 } 671 ConcurrencyUtils.waitForCompletion(futures); 672 673 } else { 674 for (int s = 0; s < slices; s++) { 675 for (int r = 0; r < rows; r++) { 676 dctColumns.inverse(a[s][r], scale); 677 } 678 } 679 double[] temp = new double[rows]; 680 for (int s = 0; s < slices; s++) { 681 for (int c = 0; c < columns; c++) { 682 for (int r = 0; r < rows; r++) { 683 temp[r] = a[s][r][c]; 684 } 685 dctRows.inverse(temp, scale); 686 for (int r = 0; r < rows; r++) { 687 a[s][r][c] = temp[r]; 688 } 689 } 690 } 691 temp = new double[slices]; 692 for (int r = 0; r < rows; r++) { 693 for (int c = 0; c < columns; c++) { 694 for (int s = 0; s < slices; s++) { 695 temp[s] = a[s][r][c]; 696 } 697 dctSlices.inverse(temp, scale); 698 for (int s = 0; s < slices; s++) { 699 a[s][r][c] = temp[s]; 700 } 701 } 702 } 703 } 704 } 705 } 706 707 private void ddxt3da_sub(int isgn, double[] a, boolean scale) { 708 int idx0, idx1, idx2; 709 710 if (isgn == -1) { 711 for (int s = 0; s < slices; s++) { 712 idx0 = s * sliceStride; 713 for (int r = 0; r < rows; r++) { 714 dctColumns.forward(a, idx0 + r * rowStride, scale); 715 } 716 if (columns > 2) { 717 for (int c = 0; c < columns; c += 4) { 718 for (int r = 0; r < rows; r++) { 719 idx1 = idx0 + r * rowStride + c; 720 idx2 = rows + r; 721 t[r] = a[idx1]; 722 t[idx2] = a[idx1 + 1]; 723 t[idx2 + rows] = a[idx1 + 2]; 724 t[idx2 + 2 * rows] = a[idx1 + 3]; 725 } 726 dctRows.forward(t, 0, scale); 727 dctRows.forward(t, rows, scale); 728 dctRows.forward(t, 2 * rows, scale); 729 dctRows.forward(t, 3 * rows, scale); 730 for (int r = 0; r < rows; r++) { 731 idx1 = idx0 + r * rowStride + c; 732 idx2 = rows + r; 733 a[idx1] = t[r]; 734 a[idx1 + 1] = t[idx2]; 735 a[idx1 + 2] = t[idx2 + rows]; 736 a[idx1 + 3] = t[idx2 + 2 * rows]; 737 } 738 } 739 } else if (columns == 2) { 740 for (int r = 0; r < rows; r++) { 741 idx1 = idx0 + r * rowStride; 742 t[r] = a[idx1]; 743 t[rows + r] = a[idx1 + 1]; 744 } 745 dctRows.forward(t, 0, scale); 746 dctRows.forward(t, rows, scale); 747 for (int r = 0; r < rows; r++) { 748 idx1 = idx0 + r * rowStride; 749 a[idx1] = t[r]; 750 a[idx1 + 1] = t[rows + r]; 751 } 752 } 753 } 754 } else { 755 for (int s = 0; s < slices; s++) { 756 idx0 = s * sliceStride; 757 for (int r = 0; r < rows; r++) { 758 dctColumns.inverse(a, idx0 + r * rowStride, scale); 759 } 760 if (columns > 2) { 761 for (int c = 0; c < columns; c += 4) { 762 for (int r = 0; r < rows; r++) { 763 idx1 = idx0 + r * rowStride + c; 764 idx2 = rows + r; 765 t[r] = a[idx1]; 766 t[idx2] = a[idx1 + 1]; 767 t[idx2 + rows] = a[idx1 + 2]; 768 t[idx2 + 2 * rows] = a[idx1 + 3]; 769 } 770 dctRows.inverse(t, 0, scale); 771 dctRows.inverse(t, rows, scale); 772 dctRows.inverse(t, 2 * rows, scale); 773 dctRows.inverse(t, 3 * rows, scale); 774 for (int r = 0; r < rows; r++) { 775 idx1 = idx0 + r * rowStride + c; 776 idx2 = rows + r; 777 a[idx1] = t[r]; 778 a[idx1 + 1] = t[idx2]; 779 a[idx1 + 2] = t[idx2 + rows]; 780 a[idx1 + 3] = t[idx2 + 2 * rows]; 781 } 782 } 783 } else if (columns == 2) { 784 for (int r = 0; r < rows; r++) { 785 idx1 = idx0 + r * rowStride; 786 t[r] = a[idx1]; 787 t[rows + r] = a[idx1 + 1]; 788 } 789 dctRows.inverse(t, 0, scale); 790 dctRows.inverse(t, rows, scale); 791 for (int r = 0; r < rows; r++) { 792 idx1 = idx0 + r * rowStride; 793 a[idx1] = t[r]; 794 a[idx1 + 1] = t[rows + r]; 795 } 796 } 797 } 798 } 799 } 800 801 private void ddxt3da_sub(int isgn, double[][][] a, boolean scale) { 802 int idx2; 803 804 if (isgn == -1) { 805 for (int s = 0; s < slices; s++) { 806 for (int r = 0; r < rows; r++) { 807 dctColumns.forward(a[s][r], scale); 808 } 809 if (columns > 2) { 810 for (int c = 0; c < columns; c += 4) { 811 for (int r = 0; r < rows; r++) { 812 idx2 = rows + r; 813 t[r] = a[s][r][c]; 814 t[idx2] = a[s][r][c + 1]; 815 t[idx2 + rows] = a[s][r][c + 2]; 816 t[idx2 + 2 * rows] = a[s][r][c + 3]; 817 } 818 dctRows.forward(t, 0, scale); 819 dctRows.forward(t, rows, scale); 820 dctRows.forward(t, 2 * rows, scale); 821 dctRows.forward(t, 3 * rows, scale); 822 for (int r = 0; r < rows; r++) { 823 idx2 = rows + r; 824 a[s][r][c] = t[r]; 825 a[s][r][c + 1] = t[idx2]; 826 a[s][r][c + 2] = t[idx2 + rows]; 827 a[s][r][c + 3] = t[idx2 + 2 * rows]; 828 } 829 } 830 } else if (columns == 2) { 831 for (int r = 0; r < rows; r++) { 832 t[r] = a[s][r][0]; 833 t[rows + r] = a[s][r][1]; 834 } 835 dctRows.forward(t, 0, scale); 836 dctRows.forward(t, rows, scale); 837 for (int r = 0; r < rows; r++) { 838 a[s][r][0] = t[r]; 839 a[s][r][1] = t[rows + r]; 840 } 841 } 842 } 843 } else { 844 for (int s = 0; s < slices; s++) { 845 for (int r = 0; r < rows; r++) { 846 dctColumns.inverse(a[s][r], scale); 847 } 848 if (columns > 2) { 849 for (int c = 0; c < columns; c += 4) { 850 for (int r = 0; r < rows; r++) { 851 idx2 = rows + r; 852 t[r] = a[s][r][c]; 853 t[idx2] = a[s][r][c + 1]; 854 t[idx2 + rows] = a[s][r][c + 2]; 855 t[idx2 + 2 * rows] = a[s][r][c + 3]; 856 } 857 dctRows.inverse(t, 0, scale); 858 dctRows.inverse(t, rows, scale); 859 dctRows.inverse(t, 2 * rows, scale); 860 dctRows.inverse(t, 3 * rows, scale); 861 for (int r = 0; r < rows; r++) { 862 idx2 = rows + r; 863 a[s][r][c] = t[r]; 864 a[s][r][c + 1] = t[idx2]; 865 a[s][r][c + 2] = t[idx2 + rows]; 866 a[s][r][c + 3] = t[idx2 + 2 * rows]; 867 } 868 } 869 } else if (columns == 2) { 870 for (int r = 0; r < rows; r++) { 871 t[r] = a[s][r][0]; 872 t[rows + r] = a[s][r][1]; 873 } 874 dctRows.inverse(t, 0, scale); 875 dctRows.inverse(t, rows, scale); 876 for (int r = 0; r < rows; r++) { 877 a[s][r][0] = t[r]; 878 a[s][r][1] = t[rows + r]; 879 } 880 } 881 } 882 } 883 } 884 885 private void ddxt3db_sub(int isgn, double[] a, boolean scale) { 886 int idx0, idx1, idx2; 887 888 if (isgn == -1) { 889 if (columns > 2) { 890 for (int r = 0; r < rows; r++) { 891 idx0 = r * rowStride; 892 for (int c = 0; c < columns; c += 4) { 893 for (int s = 0; s < slices; s++) { 894 idx1 = s * sliceStride + idx0 + c; 895 idx2 = slices + s; 896 t[s] = a[idx1]; 897 t[idx2] = a[idx1 + 1]; 898 t[idx2 + slices] = a[idx1 + 2]; 899 t[idx2 + 2 * slices] = a[idx1 + 3]; 900 } 901 dctSlices.forward(t, 0, scale); 902 dctSlices.forward(t, slices, scale); 903 dctSlices.forward(t, 2 * slices, scale); 904 dctSlices.forward(t, 3 * slices, scale); 905 for (int s = 0; s < slices; s++) { 906 idx1 = s * sliceStride + idx0 + c; 907 idx2 = slices + s; 908 a[idx1] = t[s]; 909 a[idx1 + 1] = t[idx2]; 910 a[idx1 + 2] = t[idx2 + slices]; 911 a[idx1 + 3] = t[idx2 + 2 * slices]; 912 } 913 } 914 } 915 } else if (columns == 2) { 916 for (int r = 0; r < rows; r++) { 917 idx0 = r * rowStride; 918 for (int s = 0; s < slices; s++) { 919 idx1 = s * sliceStride + idx0; 920 t[s] = a[idx1]; 921 t[slices + s] = a[idx1 + 1]; 922 } 923 dctSlices.forward(t, 0, scale); 924 dctSlices.forward(t, slices, scale); 925 for (int s = 0; s < slices; s++) { 926 idx1 = s * sliceStride + idx0; 927 a[idx1] = t[s]; 928 a[idx1 + 1] = t[slices + s]; 929 } 930 } 931 } 932 } else { 933 if (columns > 2) { 934 for (int r = 0; r < rows; r++) { 935 idx0 = r * rowStride; 936 for (int c = 0; c < columns; c += 4) { 937 for (int s = 0; s < slices; s++) { 938 idx1 = s * sliceStride + idx0 + c; 939 idx2 = slices + s; 940 t[s] = a[idx1]; 941 t[idx2] = a[idx1 + 1]; 942 t[idx2 + slices] = a[idx1 + 2]; 943 t[idx2 + 2 * slices] = a[idx1 + 3]; 944 } 945 dctSlices.inverse(t, 0, scale); 946 dctSlices.inverse(t, slices, scale); 947 dctSlices.inverse(t, 2 * slices, scale); 948 dctSlices.inverse(t, 3 * slices, scale); 949 950 for (int s = 0; s < slices; s++) { 951 idx1 = s * sliceStride + idx0 + c; 952 idx2 = slices + s; 953 a[idx1] = t[s]; 954 a[idx1 + 1] = t[idx2]; 955 a[idx1 + 2] = t[idx2 + slices]; 956 a[idx1 + 3] = t[idx2 + 2 * slices]; 957 } 958 } 959 } 960 } else if (columns == 2) { 961 for (int r = 0; r < rows; r++) { 962 idx0 = r * rowStride; 963 for (int s = 0; s < slices; s++) { 964 idx1 = s * sliceStride + idx0; 965 t[s] = a[idx1]; 966 t[slices + s] = a[idx1 + 1]; 967 } 968 dctSlices.inverse(t, 0, scale); 969 dctSlices.inverse(t, slices, scale); 970 for (int s = 0; s < slices; s++) { 971 idx1 = s * sliceStride + idx0; 972 a[idx1] = t[s]; 973 a[idx1 + 1] = t[slices + s]; 974 } 975 } 976 } 977 } 978 } 979 980 private void ddxt3db_sub(int isgn, double[][][] a, boolean scale) { 981 int idx2; 982 983 if (isgn == -1) { 984 if (columns > 2) { 985 for (int r = 0; r < rows; r++) { 986 for (int c = 0; c < columns; c += 4) { 987 for (int s = 0; s < slices; s++) { 988 idx2 = slices + s; 989 t[s] = a[s][r][c]; 990 t[idx2] = a[s][r][c + 1]; 991 t[idx2 + slices] = a[s][r][c + 2]; 992 t[idx2 + 2 * slices] = a[s][r][c + 3]; 993 } 994 dctSlices.forward(t, 0, scale); 995 dctSlices.forward(t, slices, scale); 996 dctSlices.forward(t, 2 * slices, scale); 997 dctSlices.forward(t, 3 * slices, scale); 998 for (int s = 0; s < slices; s++) { 999 idx2 = slices + s; 1000 a[s][r][c] = t[s]; 1001 a[s][r][c + 1] = t[idx2]; 1002 a[s][r][c + 2] = t[idx2 + slices]; 1003 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1004 } 1005 } 1006 } 1007 } else if (columns == 2) { 1008 for (int r = 0; r < rows; r++) { 1009 for (int s = 0; s < slices; s++) { 1010 t[s] = a[s][r][0]; 1011 t[slices + s] = a[s][r][1]; 1012 } 1013 dctSlices.forward(t, 0, scale); 1014 dctSlices.forward(t, slices, scale); 1015 for (int s = 0; s < slices; s++) { 1016 a[s][r][0] = t[s]; 1017 a[s][r][1] = t[slices + s]; 1018 } 1019 } 1020 } 1021 } else { 1022 if (columns > 2) { 1023 for (int r = 0; r < rows; r++) { 1024 for (int c = 0; c < columns; c += 4) { 1025 for (int s = 0; s < slices; s++) { 1026 idx2 = slices + s; 1027 t[s] = a[s][r][c]; 1028 t[idx2] = a[s][r][c + 1]; 1029 t[idx2 + slices] = a[s][r][c + 2]; 1030 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1031 } 1032 dctSlices.inverse(t, 0, scale); 1033 dctSlices.inverse(t, slices, scale); 1034 dctSlices.inverse(t, 2 * slices, scale); 1035 dctSlices.inverse(t, 3 * slices, scale); 1036 1037 for (int s = 0; s < slices; s++) { 1038 idx2 = slices + s; 1039 a[s][r][c] = t[s]; 1040 a[s][r][c + 1] = t[idx2]; 1041 a[s][r][c + 2] = t[idx2 + slices]; 1042 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1043 } 1044 } 1045 } 1046 } else if (columns == 2) { 1047 for (int r = 0; r < rows; r++) { 1048 for (int s = 0; s < slices; s++) { 1049 t[s] = a[s][r][0]; 1050 t[slices + s] = a[s][r][1]; 1051 } 1052 dctSlices.inverse(t, 0, scale); 1053 dctSlices.inverse(t, slices, scale); 1054 for (int s = 0; s < slices; s++) { 1055 a[s][r][0] = t[s]; 1056 a[s][r][1] = t[slices + s]; 1057 } 1058 } 1059 } 1060 } 1061 } 1062 1063 private void ddxt3da_subth(final int isgn, final double[] a, final boolean scale) { 1064 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1065 int nt = 4 * rows; 1066 if (columns == 2) { 1067 nt >>= 1; 1068 } 1069 Future<?>[] futures = new Future[nthreads]; 1070 1071 for (int i = 0; i < nthreads; i++) { 1072 final int n0 = i; 1073 final int startt = nt * i; 1074 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1075 1076 @Override 1077 public void run() { 1078 int idx0, idx1, idx2; 1079 if (isgn == -1) { 1080 for (int s = n0; s < slices; s += nthreads) { 1081 idx0 = s * sliceStride; 1082 for (int r = 0; r < rows; r++) { 1083 dctColumns.forward(a, idx0 + r * rowStride, scale); 1084 } 1085 if (columns > 2) { 1086 for (int c = 0; c < columns; c += 4) { 1087 for (int r = 0; r < rows; r++) { 1088 idx1 = idx0 + r * rowStride + c; 1089 idx2 = startt + rows + r; 1090 t[startt + r] = a[idx1]; 1091 t[idx2] = a[idx1 + 1]; 1092 t[idx2 + rows] = a[idx1 + 2]; 1093 t[idx2 + 2 * rows] = a[idx1 + 3]; 1094 } 1095 dctRows.forward(t, startt, scale); 1096 dctRows.forward(t, startt + rows, scale); 1097 dctRows.forward(t, startt + 2 * rows, scale); 1098 dctRows.forward(t, startt + 3 * rows, scale); 1099 for (int r = 0; r < rows; r++) { 1100 idx1 = idx0 + r * rowStride + c; 1101 idx2 = startt + rows + r; 1102 a[idx1] = t[startt + r]; 1103 a[idx1 + 1] = t[idx2]; 1104 a[idx1 + 2] = t[idx2 + rows]; 1105 a[idx1 + 3] = t[idx2 + 2 * rows]; 1106 } 1107 } 1108 } else if (columns == 2) { 1109 for (int r = 0; r < rows; r++) { 1110 idx1 = idx0 + r * rowStride; 1111 t[startt + r] = a[idx1]; 1112 t[startt + rows + r] = a[idx1 + 1]; 1113 } 1114 dctRows.forward(t, startt, scale); 1115 dctRows.forward(t, startt + rows, scale); 1116 for (int r = 0; r < rows; r++) { 1117 idx1 = idx0 + r * rowStride; 1118 a[idx1] = t[startt + r]; 1119 a[idx1 + 1] = t[startt + rows + r]; 1120 } 1121 } 1122 } 1123 } else { 1124 for (int s = n0; s < slices; s += nthreads) { 1125 idx0 = s * sliceStride; 1126 for (int r = 0; r < rows; r++) { 1127 dctColumns.inverse(a, idx0 + r * rowStride, scale); 1128 } 1129 if (columns > 2) { 1130 for (int c = 0; c < columns; c += 4) { 1131 for (int j = 0; j < rows; j++) { 1132 idx1 = idx0 + j * rowStride + c; 1133 idx2 = startt + rows + j; 1134 t[startt + j] = a[idx1]; 1135 t[idx2] = a[idx1 + 1]; 1136 t[idx2 + rows] = a[idx1 + 2]; 1137 t[idx2 + 2 * rows] = a[idx1 + 3]; 1138 } 1139 dctRows.inverse(t, startt, scale); 1140 dctRows.inverse(t, startt + rows, scale); 1141 dctRows.inverse(t, startt + 2 * rows, scale); 1142 dctRows.inverse(t, startt + 3 * rows, scale); 1143 for (int j = 0; j < rows; j++) { 1144 idx1 = idx0 + j * rowStride + c; 1145 idx2 = startt + rows + j; 1146 a[idx1] = t[startt + j]; 1147 a[idx1 + 1] = t[idx2]; 1148 a[idx1 + 2] = t[idx2 + rows]; 1149 a[idx1 + 3] = t[idx2 + 2 * rows]; 1150 } 1151 } 1152 } else if (columns == 2) { 1153 for (int r = 0; r < rows; r++) { 1154 idx1 = idx0 + r * rowStride; 1155 t[startt + r] = a[idx1]; 1156 t[startt + rows + r] = a[idx1 + 1]; 1157 } 1158 dctRows.inverse(t, startt, scale); 1159 dctRows.inverse(t, startt + rows, scale); 1160 for (int r = 0; r < rows; r++) { 1161 idx1 = idx0 + r * rowStride; 1162 a[idx1] = t[startt + r]; 1163 a[idx1 + 1] = t[startt + rows + r]; 1164 } 1165 } 1166 } 1167 } 1168 } 1169 }); 1170 } 1171 ConcurrencyUtils.waitForCompletion(futures); 1172 } 1173 1174 private void ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale) { 1175 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1176 int nt = 4 * rows; 1177 if (columns == 2) { 1178 nt >>= 1; 1179 } 1180 Future<?>[] futures = new Future[nthreads]; 1181 1182 for (int i = 0; i < nthreads; i++) { 1183 final int n0 = i; 1184 final int startt = nt * i; 1185 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1186 1187 @Override 1188 public void run() { 1189 int idx2; 1190 if (isgn == -1) { 1191 for (int s = n0; s < slices; s += nthreads) { 1192 for (int r = 0; r < rows; r++) { 1193 dctColumns.forward(a[s][r], scale); 1194 } 1195 if (columns > 2) { 1196 for (int c = 0; c < columns; c += 4) { 1197 for (int r = 0; r < rows; r++) { 1198 idx2 = startt + rows + r; 1199 t[startt + r] = a[s][r][c]; 1200 t[idx2] = a[s][r][c + 1]; 1201 t[idx2 + rows] = a[s][r][c + 2]; 1202 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1203 } 1204 dctRows.forward(t, startt, scale); 1205 dctRows.forward(t, startt + rows, scale); 1206 dctRows.forward(t, startt + 2 * rows, scale); 1207 dctRows.forward(t, startt + 3 * rows, scale); 1208 for (int r = 0; r < rows; r++) { 1209 idx2 = startt + rows + r; 1210 a[s][r][c] = t[startt + r]; 1211 a[s][r][c + 1] = t[idx2]; 1212 a[s][r][c + 2] = t[idx2 + rows]; 1213 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1214 } 1215 } 1216 } else if (columns == 2) { 1217 for (int r = 0; r < rows; r++) { 1218 t[startt + r] = a[s][r][0]; 1219 t[startt + rows + r] = a[s][r][1]; 1220 } 1221 dctRows.forward(t, startt, scale); 1222 dctRows.forward(t, startt + rows, scale); 1223 for (int r = 0; r < rows; r++) { 1224 a[s][r][0] = t[startt + r]; 1225 a[s][r][1] = t[startt + rows + r]; 1226 } 1227 } 1228 } 1229 } else { 1230 for (int s = n0; s < slices; s += nthreads) { 1231 for (int r = 0; r < rows; r++) { 1232 dctColumns.inverse(a[s][r], scale); 1233 } 1234 if (columns > 2) { 1235 for (int c = 0; c < columns; c += 4) { 1236 for (int r = 0; r < rows; r++) { 1237 idx2 = startt + rows + r; 1238 t[startt + r] = a[s][r][c]; 1239 t[idx2] = a[s][r][c + 1]; 1240 t[idx2 + rows] = a[s][r][c + 2]; 1241 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1242 } 1243 dctRows.inverse(t, startt, scale); 1244 dctRows.inverse(t, startt + rows, scale); 1245 dctRows.inverse(t, startt + 2 * rows, scale); 1246 dctRows.inverse(t, startt + 3 * rows, scale); 1247 for (int r = 0; r < rows; r++) { 1248 idx2 = startt + rows + r; 1249 a[s][r][c] = t[startt + r]; 1250 a[s][r][c + 1] = t[idx2]; 1251 a[s][r][c + 2] = t[idx2 + rows]; 1252 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1253 } 1254 } 1255 } else if (columns == 2) { 1256 for (int r = 0; r < rows; r++) { 1257 t[startt + r] = a[s][r][0]; 1258 t[startt + rows + r] = a[s][r][1]; 1259 } 1260 dctRows.inverse(t, startt, scale); 1261 dctRows.inverse(t, startt + rows, scale); 1262 for (int r = 0; r < rows; r++) { 1263 a[s][r][0] = t[startt + r]; 1264 a[s][r][1] = t[startt + rows + r]; 1265 } 1266 } 1267 } 1268 } 1269 } 1270 }); 1271 } 1272 ConcurrencyUtils.waitForCompletion(futures); 1273 } 1274 1275 private void ddxt3db_subth(final int isgn, final double[] a, final boolean scale) { 1276 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1277 int nt = 4 * slices; 1278 if (columns == 2) { 1279 nt >>= 1; 1280 } 1281 Future<?>[] futures = new Future[nthreads]; 1282 for (int i = 0; i < nthreads; i++) { 1283 final int n0 = i; 1284 final int startt = nt * i; 1285 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1286 1287 @Override 1288 public void run() { 1289 int idx0, idx1, idx2; 1290 if (isgn == -1) { 1291 if (columns > 2) { 1292 for (int r = n0; r < rows; r += nthreads) { 1293 idx0 = r * rowStride; 1294 for (int c = 0; c < columns; c += 4) { 1295 for (int s = 0; s < slices; s++) { 1296 idx1 = s * sliceStride + idx0 + c; 1297 idx2 = startt + slices + s; 1298 t[startt + s] = a[idx1]; 1299 t[idx2] = a[idx1 + 1]; 1300 t[idx2 + slices] = a[idx1 + 2]; 1301 t[idx2 + 2 * slices] = a[idx1 + 3]; 1302 } 1303 dctSlices.forward(t, startt, scale); 1304 dctSlices.forward(t, startt + slices, scale); 1305 dctSlices.forward(t, startt + 2 * slices, scale); 1306 dctSlices.forward(t, startt + 3 * slices, scale); 1307 for (int s = 0; s < slices; s++) { 1308 idx1 = s * sliceStride + idx0 + c; 1309 idx2 = startt + slices + s; 1310 a[idx1] = t[startt + s]; 1311 a[idx1 + 1] = t[idx2]; 1312 a[idx1 + 2] = t[idx2 + slices]; 1313 a[idx1 + 3] = t[idx2 + 2 * slices]; 1314 } 1315 } 1316 } 1317 } else if (columns == 2) { 1318 for (int r = n0; r < rows; r += nthreads) { 1319 idx0 = r * rowStride; 1320 for (int s = 0; s < slices; s++) { 1321 idx1 = s * sliceStride + idx0; 1322 t[startt + s] = a[idx1]; 1323 t[startt + slices + s] = a[idx1 + 1]; 1324 } 1325 dctSlices.forward(t, startt, scale); 1326 dctSlices.forward(t, startt + slices, scale); 1327 for (int s = 0; s < slices; s++) { 1328 idx1 = s * sliceStride + idx0; 1329 a[idx1] = t[startt + s]; 1330 a[idx1 + 1] = t[startt + slices + s]; 1331 } 1332 } 1333 } 1334 } else { 1335 if (columns > 2) { 1336 for (int r = n0; r < rows; r += nthreads) { 1337 idx0 = r * rowStride; 1338 for (int c = 0; c < columns; c += 4) { 1339 for (int s = 0; s < slices; s++) { 1340 idx1 = s * sliceStride + idx0 + c; 1341 idx2 = startt + slices + s; 1342 t[startt + s] = a[idx1]; 1343 t[idx2] = a[idx1 + 1]; 1344 t[idx2 + slices] = a[idx1 + 2]; 1345 t[idx2 + 2 * slices] = a[idx1 + 3]; 1346 } 1347 dctSlices.inverse(t, startt, scale); 1348 dctSlices.inverse(t, startt + slices, scale); 1349 dctSlices.inverse(t, startt + 2 * slices, scale); 1350 dctSlices.inverse(t, startt + 3 * slices, scale); 1351 for (int s = 0; s < slices; s++) { 1352 idx1 = s * sliceStride + idx0 + c; 1353 idx2 = startt + slices + s; 1354 a[idx1] = t[startt + s]; 1355 a[idx1 + 1] = t[idx2]; 1356 a[idx1 + 2] = t[idx2 + slices]; 1357 a[idx1 + 3] = t[idx2 + 2 * slices]; 1358 } 1359 } 1360 } 1361 } else if (columns == 2) { 1362 for (int r = n0; r < rows; r += nthreads) { 1363 idx0 = r * rowStride; 1364 for (int s = 0; s < slices; s++) { 1365 idx1 = s * sliceStride + idx0; 1366 t[startt + s] = a[idx1]; 1367 t[startt + slices + s] = a[idx1 + 1]; 1368 } 1369 dctSlices.inverse(t, startt, scale); 1370 dctSlices.inverse(t, startt + slices, scale); 1371 1372 for (int s = 0; s < slices; s++) { 1373 idx1 = s * sliceStride + idx0; 1374 a[idx1] = t[startt + s]; 1375 a[idx1 + 1] = t[startt + slices + s]; 1376 } 1377 } 1378 } 1379 } 1380 } 1381 }); 1382 } 1383 ConcurrencyUtils.waitForCompletion(futures); 1384 } 1385 1386 private void ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale) { 1387 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1388 int nt = 4 * slices; 1389 if (columns == 2) { 1390 nt >>= 1; 1391 } 1392 Future<?>[] futures = new Future[nthreads]; 1393 1394 for (int i = 0; i < nthreads; i++) { 1395 final int n0 = i; 1396 final int startt = nt * i; 1397 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1398 1399 @Override 1400 public void run() { 1401 int idx2; 1402 if (isgn == -1) { 1403 if (columns > 2) { 1404 for (int r = n0; r < rows; r += nthreads) { 1405 for (int c = 0; c < columns; c += 4) { 1406 for (int s = 0; s < slices; s++) { 1407 idx2 = startt + slices + s; 1408 t[startt + s] = a[s][r][c]; 1409 t[idx2] = a[s][r][c + 1]; 1410 t[idx2 + slices] = a[s][r][c + 2]; 1411 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1412 } 1413 dctSlices.forward(t, startt, scale); 1414 dctSlices.forward(t, startt + slices, scale); 1415 dctSlices.forward(t, startt + 2 * slices, scale); 1416 dctSlices.forward(t, startt + 3 * slices, scale); 1417 for (int s = 0; s < slices; s++) { 1418 idx2 = startt + slices + s; 1419 a[s][r][c] = t[startt + s]; 1420 a[s][r][c + 1] = t[idx2]; 1421 a[s][r][c + 2] = t[idx2 + slices]; 1422 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1423 } 1424 } 1425 } 1426 } else if (columns == 2) { 1427 for (int r = n0; r < rows; r += nthreads) { 1428 for (int s = 0; s < slices; s++) { 1429 t[startt + s] = a[s][r][0]; 1430 t[startt + slices + s] = a[s][r][1]; 1431 } 1432 dctSlices.forward(t, startt, scale); 1433 dctSlices.forward(t, startt + slices, scale); 1434 for (int s = 0; s < slices; s++) { 1435 a[s][r][0] = t[startt + s]; 1436 a[s][r][1] = t[startt + slices + s]; 1437 } 1438 } 1439 } 1440 } else { 1441 if (columns > 2) { 1442 for (int r = n0; r < rows; r += nthreads) { 1443 for (int c = 0; c < columns; c += 4) { 1444 for (int s = 0; s < slices; s++) { 1445 idx2 = startt + slices + s; 1446 t[startt + s] = a[s][r][c]; 1447 t[idx2] = a[s][r][c + 1]; 1448 t[idx2 + slices] = a[s][r][c + 2]; 1449 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1450 } 1451 dctSlices.inverse(t, startt, scale); 1452 dctSlices.inverse(t, startt + slices, scale); 1453 dctSlices.inverse(t, startt + 2 * slices, scale); 1454 dctSlices.inverse(t, startt + 3 * slices, scale); 1455 for (int s = 0; s < slices; s++) { 1456 idx2 = startt + slices + s; 1457 a[s][r][c] = t[startt + s]; 1458 a[s][r][c + 1] = t[idx2]; 1459 a[s][r][c + 2] = t[idx2 + slices]; 1460 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1461 } 1462 } 1463 } 1464 } else if (columns == 2) { 1465 for (int r = n0; r < rows; r += nthreads) { 1466 for (int s = 0; s < slices; s++) { 1467 t[startt + s] = a[s][r][0]; 1468 t[startt + slices + s] = a[s][r][1]; 1469 } 1470 dctSlices.inverse(t, startt, scale); 1471 dctSlices.inverse(t, startt + slices, scale); 1472 1473 for (int s = 0; s < slices; s++) { 1474 a[s][r][0] = t[startt + s]; 1475 a[s][r][1] = t[startt + slices + s]; 1476 } 1477 } 1478 } 1479 } 1480 } 1481 }); 1482 } 1483 ConcurrencyUtils.waitForCompletion(futures); 1484 } 1485}