001/* ***** BEGIN LICENSE BLOCK ***** 002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 003 * 004 * The contents of this file are subject to the Mozilla Public License Version 005 * 1.1 (the "License"); you may not use this file except in compliance with 006 * the License. You may obtain a copy of the License at 007 * http://www.mozilla.org/MPL/ 008 * 009 * Software distributed under the License is distributed on an "AS IS" basis, 010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 011 * for the specific language governing rights and limitations under the 012 * License. 013 * 014 * The Original Code is JTransforms. 015 * 016 * The Initial Developer of the Original Code is 017 * Piotr Wendykier, Emory University. 018 * Portions created by the Initial Developer are Copyright (C) 2007-2009 019 * the Initial Developer. All Rights Reserved. 020 * 021 * Alternatively, the contents of this file may be used under the terms of 022 * either the GNU General Public License Version 2 or later (the "GPL"), or 023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 024 * in which case the provisions of the GPL or the LGPL are applicable instead 025 * of those above. If you wish to allow use of your version of this file only 026 * under the terms of either the GPL or the LGPL, and not to allow others to 027 * use your version of this file under the terms of the MPL, indicate your 028 * decision by deleting the provisions above and replace them with the notice 029 * and other provisions required by the GPL or the LGPL. If you do not delete 030 * the provisions above, a recipient may use your version of this file under 031 * the terms of any one of the MPL, the GPL or the LGPL. 032 * 033 * ***** END LICENSE BLOCK ***** */ 034 035package edu.emory.mathcs.jtransforms.dst; 036 037import java.util.concurrent.Future; 038 039import edu.emory.mathcs.utils.ConcurrencyUtils; 040 041/** 042 * Computes 3D Discrete Sine Transform (DST) of single precision data. The sizes 043 * of all three dimensions can be arbitrary numbers. This is a parallel 044 * implementation optimized for SMP systems.<br> 045 * <br> 046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura 047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 048 * 049 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 050 * 051 */ 052public class FloatDST_3D { 053 054 private int slices; 055 056 private int rows; 057 058 private int columns; 059 060 private int sliceStride; 061 062 private int rowStride; 063 064 private float[] t; 065 066 private FloatDST_1D dstSlices, dstRows, dstColumns; 067 068 private int oldNthreads; 069 070 private int nt; 071 072 private boolean isPowerOfTwo = false; 073 074 private boolean useThreads = false; 075 076 /** 077 * Creates new instance of FloatDST_3D. 078 * 079 * @param slices 080 * number of slices 081 * @param rows 082 * number of rows 083 * @param columns 084 * number of columns 085 */ 086 public FloatDST_3D(int slices, int rows, int columns) { 087 if (slices <= 1 || rows <= 1 || columns <= 1) { 088 throw new IllegalArgumentException("slices, rows and columns must be greater than 1"); 089 } 090 this.slices = slices; 091 this.rows = rows; 092 this.columns = columns; 093 this.sliceStride = rows * columns; 094 this.rowStride = columns; 095 if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) { 096 this.useThreads = true; 097 } 098 if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 099 isPowerOfTwo = true; 100 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 101 nt = slices; 102 if (nt < rows) { 103 nt = rows; 104 } 105 nt *= 4; 106 if (oldNthreads > 1) { 107 nt *= oldNthreads; 108 } 109 if (columns == 2) { 110 nt >>= 1; 111 } 112 t = new float[nt]; 113 } 114 dstSlices = new FloatDST_1D(slices); 115 if (slices == rows) { 116 dstRows = dstSlices; 117 } else { 118 dstRows = new FloatDST_1D(rows); 119 } 120 if (slices == columns) { 121 dstColumns = dstSlices; 122 } else if (rows == columns) { 123 dstColumns = dstRows; 124 } else { 125 dstColumns = new FloatDST_1D(columns); 126 } 127 } 128 129 /** 130 * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code> 131 * . The data is stored in 1D array addressed in slice-major, then 132 * row-major, then column-major, in order of significance, i.e. the element 133 * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride 134 * + j*rowStride + k], where sliceStride = rows * columns and rowStride = 135 * columns. 136 * 137 * @param a 138 * data to transform 139 * @param scale 140 * if true then scaling is performed 141 */ 142 public void forward(final float[] a, final boolean scale) { 143 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 144 if (isPowerOfTwo) { 145 if (nthreads != oldNthreads) { 146 nt = slices; 147 if (nt < rows) { 148 nt = rows; 149 } 150 nt *= 4; 151 if (nthreads > 1) { 152 nt *= nthreads; 153 } 154 if (columns == 2) { 155 nt >>= 1; 156 } 157 t = new float[nt]; 158 oldNthreads = nthreads; 159 } 160 if ((nthreads > 1) && useThreads) { 161 ddxt3da_subth(-1, a, scale); 162 ddxt3db_subth(-1, a, scale); 163 } else { 164 ddxt3da_sub(-1, a, scale); 165 ddxt3db_sub(-1, a, scale); 166 } 167 } else { 168 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 169 Future<?>[] futures = new Future[nthreads]; 170 int p = slices / nthreads; 171 for (int l = 0; l < nthreads; l++) { 172 final int firstSlice = l * p; 173 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 174 futures[l] = ConcurrencyUtils.submit(new Runnable() { 175 @Override 176 public void run() { 177 for (int s = firstSlice; s < lastSlice; s++) { 178 int idx1 = s * sliceStride; 179 for (int r = 0; r < rows; r++) { 180 dstColumns.forward(a, idx1 + r * rowStride, scale); 181 } 182 } 183 } 184 }); 185 } 186 ConcurrencyUtils.waitForCompletion(futures); 187 188 for (int l = 0; l < nthreads; l++) { 189 final int firstSlice = l * p; 190 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 191 futures[l] = ConcurrencyUtils.submit(new Runnable() { 192 @Override 193 public void run() { 194 float[] temp = new float[rows]; 195 for (int s = firstSlice; s < lastSlice; s++) { 196 int idx1 = s * sliceStride; 197 for (int c = 0; c < columns; c++) { 198 for (int r = 0; r < rows; r++) { 199 int idx3 = idx1 + r * rowStride + c; 200 temp[r] = a[idx3]; 201 } 202 dstRows.forward(temp, scale); 203 for (int r = 0; r < rows; r++) { 204 int idx3 = idx1 + r * rowStride + c; 205 a[idx3] = temp[r]; 206 } 207 } 208 } 209 } 210 }); 211 } 212 ConcurrencyUtils.waitForCompletion(futures); 213 214 p = rows / nthreads; 215 for (int l = 0; l < nthreads; l++) { 216 final int firstRow = l * p; 217 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 218 futures[l] = ConcurrencyUtils.submit(new Runnable() { 219 @Override 220 public void run() { 221 float[] temp = new float[slices]; 222 for (int r = firstRow; r < lastRow; r++) { 223 int idx1 = r * rowStride; 224 for (int c = 0; c < columns; c++) { 225 for (int s = 0; s < slices; s++) { 226 int idx3 = s * sliceStride + idx1 + c; 227 temp[s] = a[idx3]; 228 } 229 dstSlices.forward(temp, scale); 230 for (int s = 0; s < slices; s++) { 231 int idx3 = s * sliceStride + idx1 + c; 232 a[idx3] = temp[s]; 233 } 234 } 235 } 236 } 237 }); 238 } 239 ConcurrencyUtils.waitForCompletion(futures); 240 241 } else { 242 for (int s = 0; s < slices; s++) { 243 int idx1 = s * sliceStride; 244 for (int r = 0; r < rows; r++) { 245 dstColumns.forward(a, idx1 + r * rowStride, scale); 246 } 247 } 248 float[] temp = new float[rows]; 249 for (int s = 0; s < slices; s++) { 250 int idx1 = s * sliceStride; 251 for (int c = 0; c < columns; c++) { 252 for (int r = 0; r < rows; r++) { 253 int idx3 = idx1 + r * rowStride + c; 254 temp[r] = a[idx3]; 255 } 256 dstRows.forward(temp, scale); 257 for (int r = 0; r < rows; r++) { 258 int idx3 = idx1 + r * rowStride + c; 259 a[idx3] = temp[r]; 260 } 261 } 262 } 263 temp = new float[slices]; 264 for (int r = 0; r < rows; r++) { 265 int idx1 = r * rowStride; 266 for (int c = 0; c < columns; c++) { 267 for (int s = 0; s < slices; s++) { 268 int idx3 = s * sliceStride + idx1 + c; 269 temp[s] = a[idx3]; 270 } 271 dstSlices.forward(temp, scale); 272 for (int s = 0; s < slices; s++) { 273 int idx3 = s * sliceStride + idx1 + c; 274 a[idx3] = temp[s]; 275 } 276 } 277 } 278 } 279 } 280 } 281 282 /** 283 * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code> 284 * . The data is stored in 3D array. 285 * 286 * @param a 287 * data to transform 288 * @param scale 289 * if true then scaling is performed 290 */ 291 public void forward(final float[][][] a, final boolean scale) { 292 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 293 if (isPowerOfTwo) { 294 if (nthreads != oldNthreads) { 295 nt = slices; 296 if (nt < rows) { 297 nt = rows; 298 } 299 nt *= 4; 300 if (nthreads > 1) { 301 nt *= nthreads; 302 } 303 if (columns == 2) { 304 nt >>= 1; 305 } 306 t = new float[nt]; 307 oldNthreads = nthreads; 308 } 309 if ((nthreads > 1) && useThreads) { 310 ddxt3da_subth(-1, a, scale); 311 ddxt3db_subth(-1, a, scale); 312 } else { 313 ddxt3da_sub(-1, a, scale); 314 ddxt3db_sub(-1, a, scale); 315 } 316 } else { 317 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 318 Future<?>[] futures = new Future[nthreads]; 319 int p = slices / nthreads; 320 for (int l = 0; l < nthreads; l++) { 321 final int firstSlice = l * p; 322 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 323 futures[l] = ConcurrencyUtils.submit(new Runnable() { 324 @Override 325 public void run() { 326 for (int s = firstSlice; s < lastSlice; s++) { 327 for (int r = 0; r < rows; r++) { 328 dstColumns.forward(a[s][r], scale); 329 } 330 } 331 } 332 }); 333 } 334 ConcurrencyUtils.waitForCompletion(futures); 335 336 for (int l = 0; l < nthreads; l++) { 337 final int firstSlice = l * p; 338 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 339 futures[l] = ConcurrencyUtils.submit(new Runnable() { 340 @Override 341 public void run() { 342 float[] temp = new float[rows]; 343 for (int s = firstSlice; s < lastSlice; s++) { 344 for (int c = 0; c < columns; c++) { 345 for (int r = 0; r < rows; r++) { 346 temp[r] = a[s][r][c]; 347 } 348 dstRows.forward(temp, scale); 349 for (int r = 0; r < rows; r++) { 350 a[s][r][c] = temp[r]; 351 } 352 } 353 } 354 } 355 }); 356 } 357 ConcurrencyUtils.waitForCompletion(futures); 358 359 p = rows / nthreads; 360 for (int l = 0; l < nthreads; l++) { 361 final int firstRow = l * p; 362 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 363 futures[l] = ConcurrencyUtils.submit(new Runnable() { 364 @Override 365 public void run() { 366 float[] temp = new float[slices]; 367 for (int r = firstRow; r < lastRow; r++) { 368 for (int c = 0; c < columns; c++) { 369 for (int s = 0; s < slices; s++) { 370 temp[s] = a[s][r][c]; 371 } 372 dstSlices.forward(temp, scale); 373 for (int s = 0; s < slices; s++) { 374 a[s][r][c] = temp[s]; 375 } 376 } 377 } 378 } 379 }); 380 } 381 ConcurrencyUtils.waitForCompletion(futures); 382 383 } else { 384 for (int s = 0; s < slices; s++) { 385 for (int r = 0; r < rows; r++) { 386 dstColumns.forward(a[s][r], scale); 387 } 388 } 389 float[] temp = new float[rows]; 390 for (int s = 0; s < slices; s++) { 391 for (int c = 0; c < columns; c++) { 392 for (int r = 0; r < rows; r++) { 393 temp[r] = a[s][r][c]; 394 } 395 dstRows.forward(temp, scale); 396 for (int r = 0; r < rows; r++) { 397 a[s][r][c] = temp[r]; 398 } 399 } 400 } 401 temp = new float[slices]; 402 for (int r = 0; r < rows; r++) { 403 for (int c = 0; c < columns; c++) { 404 for (int s = 0; s < slices; s++) { 405 temp[s] = a[s][r][c]; 406 } 407 dstSlices.forward(temp, scale); 408 for (int s = 0; s < slices; s++) { 409 a[s][r][c] = temp[s]; 410 } 411 } 412 } 413 } 414 } 415 } 416 417 /** 418 * Computes the 3D inverse DST (DST-III) leaving the result in 419 * <code>a</code>. The data is stored in 1D array addressed in slice-major, 420 * then row-major, then column-major, in order of significance, i.e. the 421 * element (i,j,k) of 3D array x[slices][rows][columns] is stored in 422 * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns 423 * and rowStride = columns. 424 * 425 * @param a 426 * data to transform 427 * @param scale 428 * if true then scaling is performed 429 */ 430 public void inverse(final float[] a, final boolean scale) { 431 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 432 if (isPowerOfTwo) { 433 if (nthreads != oldNthreads) { 434 nt = slices; 435 if (nt < rows) { 436 nt = rows; 437 } 438 nt *= 4; 439 if (nthreads > 1) { 440 nt *= nthreads; 441 } 442 if (columns == 2) { 443 nt >>= 1; 444 } 445 t = new float[nt]; 446 oldNthreads = nthreads; 447 } 448 if ((nthreads > 1) && useThreads) { 449 ddxt3da_subth(1, a, scale); 450 ddxt3db_subth(1, a, scale); 451 } else { 452 ddxt3da_sub(1, a, scale); 453 ddxt3db_sub(1, a, scale); 454 } 455 } else { 456 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 457 Future<?>[] futures = new Future[nthreads]; 458 int p = slices / nthreads; 459 for (int l = 0; l < nthreads; l++) { 460 final int firstSlice = l * p; 461 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 462 futures[l] = ConcurrencyUtils.submit(new Runnable() { 463 @Override 464 public void run() { 465 for (int s = firstSlice; s < lastSlice; s++) { 466 int idx1 = s * sliceStride; 467 for (int r = 0; r < rows; r++) { 468 dstColumns.inverse(a, idx1 + r * rowStride, scale); 469 } 470 } 471 } 472 }); 473 } 474 ConcurrencyUtils.waitForCompletion(futures); 475 476 for (int l = 0; l < nthreads; l++) { 477 final int firstSlice = l * p; 478 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 479 futures[l] = ConcurrencyUtils.submit(new Runnable() { 480 @Override 481 public void run() { 482 float[] temp = new float[rows]; 483 for (int s = firstSlice; s < lastSlice; s++) { 484 int idx1 = s * sliceStride; 485 for (int c = 0; c < columns; c++) { 486 for (int r = 0; r < rows; r++) { 487 int idx3 = idx1 + r * rowStride + c; 488 temp[r] = a[idx3]; 489 } 490 dstRows.inverse(temp, scale); 491 for (int r = 0; r < rows; r++) { 492 int idx3 = idx1 + r * rowStride + c; 493 a[idx3] = temp[r]; 494 } 495 } 496 } 497 } 498 }); 499 } 500 ConcurrencyUtils.waitForCompletion(futures); 501 502 p = rows / nthreads; 503 for (int l = 0; l < nthreads; l++) { 504 final int firstRow = l * p; 505 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 506 futures[l] = ConcurrencyUtils.submit(new Runnable() { 507 @Override 508 public void run() { 509 float[] temp = new float[slices]; 510 for (int r = firstRow; r < lastRow; r++) { 511 int idx1 = r * rowStride; 512 for (int c = 0; c < columns; c++) { 513 for (int s = 0; s < slices; s++) { 514 int idx3 = s * sliceStride + idx1 + c; 515 temp[s] = a[idx3]; 516 } 517 dstSlices.inverse(temp, scale); 518 for (int s = 0; s < slices; s++) { 519 int idx3 = s * sliceStride + idx1 + c; 520 a[idx3] = temp[s]; 521 } 522 } 523 } 524 } 525 }); 526 } 527 ConcurrencyUtils.waitForCompletion(futures); 528 529 } else { 530 for (int s = 0; s < slices; s++) { 531 int idx1 = s * sliceStride; 532 for (int r = 0; r < rows; r++) { 533 dstColumns.inverse(a, idx1 + r * rowStride, scale); 534 } 535 } 536 float[] temp = new float[rows]; 537 for (int s = 0; s < slices; s++) { 538 int idx1 = s * sliceStride; 539 for (int c = 0; c < columns; c++) { 540 for (int r = 0; r < rows; r++) { 541 int idx3 = idx1 + r * rowStride + c; 542 temp[r] = a[idx3]; 543 } 544 dstRows.inverse(temp, scale); 545 for (int r = 0; r < rows; r++) { 546 int idx3 = idx1 + r * rowStride + c; 547 a[idx3] = temp[r]; 548 } 549 } 550 } 551 temp = new float[slices]; 552 for (int r = 0; r < rows; r++) { 553 int idx1 = r * rowStride; 554 for (int c = 0; c < columns; c++) { 555 for (int s = 0; s < slices; s++) { 556 int idx3 = s * sliceStride + idx1 + c; 557 temp[s] = a[idx3]; 558 } 559 dstSlices.inverse(temp, scale); 560 for (int s = 0; s < slices; s++) { 561 int idx3 = s * sliceStride + idx1 + c; 562 a[idx3] = temp[s]; 563 } 564 } 565 } 566 } 567 } 568 569 } 570 571 /** 572 * Computes the 3D inverse DST (DST-III) leaving the result in 573 * <code>a</code>. The data is stored in 3D array. 574 * 575 * @param a 576 * data to transform 577 * @param scale 578 * if true then scaling is performed 579 */ 580 public void inverse(final float[][][] a, final boolean scale) { 581 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 582 if (isPowerOfTwo) { 583 if (nthreads != oldNthreads) { 584 nt = slices; 585 if (nt < rows) { 586 nt = rows; 587 } 588 nt *= 4; 589 if (nthreads > 1) { 590 nt *= nthreads; 591 } 592 if (columns == 2) { 593 nt >>= 1; 594 } 595 t = new float[nt]; 596 oldNthreads = nthreads; 597 } 598 if ((nthreads > 1) && useThreads) { 599 ddxt3da_subth(1, a, scale); 600 ddxt3db_subth(1, a, scale); 601 } else { 602 ddxt3da_sub(1, a, scale); 603 ddxt3db_sub(1, a, scale); 604 } 605 } else { 606 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 607 Future<?>[] futures = new Future[nthreads]; 608 int p = slices / nthreads; 609 for (int l = 0; l < nthreads; l++) { 610 final int firstSlice = l * p; 611 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 612 futures[l] = ConcurrencyUtils.submit(new Runnable() { 613 @Override 614 public void run() { 615 for (int s = firstSlice; s < lastSlice; s++) { 616 for (int r = 0; r < rows; r++) { 617 dstColumns.inverse(a[s][r], scale); 618 } 619 } 620 } 621 }); 622 } 623 ConcurrencyUtils.waitForCompletion(futures); 624 625 for (int l = 0; l < nthreads; l++) { 626 final int firstSlice = l * p; 627 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 628 futures[l] = ConcurrencyUtils.submit(new Runnable() { 629 @Override 630 public void run() { 631 float[] temp = new float[rows]; 632 for (int s = firstSlice; s < lastSlice; s++) { 633 for (int c = 0; c < columns; c++) { 634 for (int r = 0; r < rows; r++) { 635 temp[r] = a[s][r][c]; 636 } 637 dstRows.inverse(temp, scale); 638 for (int r = 0; r < rows; r++) { 639 a[s][r][c] = temp[r]; 640 } 641 } 642 } 643 } 644 }); 645 } 646 ConcurrencyUtils.waitForCompletion(futures); 647 648 p = rows / nthreads; 649 for (int l = 0; l < nthreads; l++) { 650 final int firstRow = l * p; 651 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 652 futures[l] = ConcurrencyUtils.submit(new Runnable() { 653 @Override 654 public void run() { 655 float[] temp = new float[slices]; 656 for (int r = firstRow; r < lastRow; r++) { 657 for (int c = 0; c < columns; c++) { 658 for (int s = 0; s < slices; s++) { 659 temp[s] = a[s][r][c]; 660 } 661 dstSlices.inverse(temp, scale); 662 for (int s = 0; s < slices; s++) { 663 a[s][r][c] = temp[s]; 664 } 665 } 666 } 667 } 668 }); 669 } 670 ConcurrencyUtils.waitForCompletion(futures); 671 672 } else { 673 for (int s = 0; s < slices; s++) { 674 for (int r = 0; r < rows; r++) { 675 dstColumns.inverse(a[s][r], scale); 676 } 677 } 678 float[] temp = new float[rows]; 679 for (int s = 0; s < slices; s++) { 680 for (int c = 0; c < columns; c++) { 681 for (int r = 0; r < rows; r++) { 682 temp[r] = a[s][r][c]; 683 } 684 dstRows.inverse(temp, scale); 685 for (int r = 0; r < rows; r++) { 686 a[s][r][c] = temp[r]; 687 } 688 } 689 } 690 temp = new float[slices]; 691 for (int r = 0; r < rows; r++) { 692 for (int c = 0; c < columns; c++) { 693 for (int s = 0; s < slices; s++) { 694 temp[s] = a[s][r][c]; 695 } 696 dstSlices.inverse(temp, scale); 697 for (int s = 0; s < slices; s++) { 698 a[s][r][c] = temp[s]; 699 } 700 } 701 } 702 } 703 } 704 } 705 706 private void ddxt3da_sub(int isgn, float[] a, boolean scale) { 707 int idx0, idx1, idx2; 708 709 if (isgn == -1) { 710 for (int s = 0; s < slices; s++) { 711 idx0 = s * sliceStride; 712 for (int r = 0; r < rows; r++) { 713 dstColumns.forward(a, idx0 + r * rowStride, scale); 714 } 715 if (columns > 2) { 716 for (int c = 0; c < columns; c += 4) { 717 for (int r = 0; r < rows; r++) { 718 idx1 = idx0 + r * rowStride + c; 719 idx2 = rows + r; 720 t[r] = a[idx1]; 721 t[idx2] = a[idx1 + 1]; 722 t[idx2 + rows] = a[idx1 + 2]; 723 t[idx2 + 2 * rows] = a[idx1 + 3]; 724 } 725 dstRows.forward(t, 0, scale); 726 dstRows.forward(t, rows, scale); 727 dstRows.forward(t, 2 * rows, scale); 728 dstRows.forward(t, 3 * rows, scale); 729 for (int r = 0; r < rows; r++) { 730 idx1 = idx0 + r * rowStride + c; 731 idx2 = rows + r; 732 a[idx1] = t[r]; 733 a[idx1 + 1] = t[idx2]; 734 a[idx1 + 2] = t[idx2 + rows]; 735 a[idx1 + 3] = t[idx2 + 2 * rows]; 736 } 737 } 738 } else if (columns == 2) { 739 for (int r = 0; r < rows; r++) { 740 idx1 = idx0 + r * rowStride; 741 t[r] = a[idx1]; 742 t[rows + r] = a[idx1 + 1]; 743 } 744 dstRows.forward(t, 0, scale); 745 dstRows.forward(t, rows, scale); 746 for (int r = 0; r < rows; r++) { 747 idx1 = idx0 + r * rowStride; 748 a[idx1] = t[r]; 749 a[idx1 + 1] = t[rows + r]; 750 } 751 } 752 } 753 } else { 754 for (int s = 0; s < slices; s++) { 755 idx0 = s * sliceStride; 756 for (int r = 0; r < rows; r++) { 757 dstColumns.inverse(a, idx0 + r * rowStride, scale); 758 } 759 if (columns > 2) { 760 for (int c = 0; c < columns; c += 4) { 761 for (int r = 0; r < rows; r++) { 762 idx1 = idx0 + r * rowStride + c; 763 idx2 = rows + r; 764 t[r] = a[idx1]; 765 t[idx2] = a[idx1 + 1]; 766 t[idx2 + rows] = a[idx1 + 2]; 767 t[idx2 + 2 * rows] = a[idx1 + 3]; 768 } 769 dstRows.inverse(t, 0, scale); 770 dstRows.inverse(t, rows, scale); 771 dstRows.inverse(t, 2 * rows, scale); 772 dstRows.inverse(t, 3 * rows, scale); 773 for (int r = 0; r < rows; r++) { 774 idx1 = idx0 + r * rowStride + c; 775 idx2 = rows + r; 776 a[idx1] = t[r]; 777 a[idx1 + 1] = t[idx2]; 778 a[idx1 + 2] = t[idx2 + rows]; 779 a[idx1 + 3] = t[idx2 + 2 * rows]; 780 } 781 } 782 } else if (columns == 2) { 783 for (int r = 0; r < rows; r++) { 784 idx1 = idx0 + r * rowStride; 785 t[r] = a[idx1]; 786 t[rows + r] = a[idx1 + 1]; 787 } 788 dstRows.inverse(t, 0, scale); 789 dstRows.inverse(t, rows, scale); 790 for (int r = 0; r < rows; r++) { 791 idx1 = idx0 + r * rowStride; 792 a[idx1] = t[r]; 793 a[idx1 + 1] = t[rows + r]; 794 } 795 } 796 } 797 } 798 } 799 800 private void ddxt3da_sub(int isgn, float[][][] a, boolean scale) { 801 int idx2; 802 803 if (isgn == -1) { 804 for (int s = 0; s < slices; s++) { 805 for (int r = 0; r < rows; r++) { 806 dstColumns.forward(a[s][r], scale); 807 } 808 if (columns > 2) { 809 for (int c = 0; c < columns; c += 4) { 810 for (int r = 0; r < rows; r++) { 811 idx2 = rows + r; 812 t[r] = a[s][r][c]; 813 t[idx2] = a[s][r][c + 1]; 814 t[idx2 + rows] = a[s][r][c + 2]; 815 t[idx2 + 2 * rows] = a[s][r][c + 3]; 816 } 817 dstRows.forward(t, 0, scale); 818 dstRows.forward(t, rows, scale); 819 dstRows.forward(t, 2 * rows, scale); 820 dstRows.forward(t, 3 * rows, scale); 821 for (int r = 0; r < rows; r++) { 822 idx2 = rows + r; 823 a[s][r][c] = t[r]; 824 a[s][r][c + 1] = t[idx2]; 825 a[s][r][c + 2] = t[idx2 + rows]; 826 a[s][r][c + 3] = t[idx2 + 2 * rows]; 827 } 828 } 829 } else if (columns == 2) { 830 for (int r = 0; r < rows; r++) { 831 t[r] = a[s][r][0]; 832 t[rows + r] = a[s][r][1]; 833 } 834 dstRows.forward(t, 0, scale); 835 dstRows.forward(t, rows, scale); 836 for (int r = 0; r < rows; r++) { 837 a[s][r][0] = t[r]; 838 a[s][r][1] = t[rows + r]; 839 } 840 } 841 } 842 } else { 843 for (int s = 0; s < slices; s++) { 844 for (int r = 0; r < rows; r++) { 845 dstColumns.inverse(a[s][r], scale); 846 } 847 if (columns > 2) { 848 for (int c = 0; c < columns; c += 4) { 849 for (int r = 0; r < rows; r++) { 850 idx2 = rows + r; 851 t[r] = a[s][r][c]; 852 t[idx2] = a[s][r][c + 1]; 853 t[idx2 + rows] = a[s][r][c + 2]; 854 t[idx2 + 2 * rows] = a[s][r][c + 3]; 855 } 856 dstRows.inverse(t, 0, scale); 857 dstRows.inverse(t, rows, scale); 858 dstRows.inverse(t, 2 * rows, scale); 859 dstRows.inverse(t, 3 * rows, scale); 860 for (int r = 0; r < rows; r++) { 861 idx2 = rows + r; 862 a[s][r][c] = t[r]; 863 a[s][r][c + 1] = t[idx2]; 864 a[s][r][c + 2] = t[idx2 + rows]; 865 a[s][r][c + 3] = t[idx2 + 2 * rows]; 866 } 867 } 868 } else if (columns == 2) { 869 for (int r = 0; r < rows; r++) { 870 t[r] = a[s][r][0]; 871 t[rows + r] = a[s][r][1]; 872 } 873 dstRows.inverse(t, 0, scale); 874 dstRows.inverse(t, rows, scale); 875 for (int r = 0; r < rows; r++) { 876 a[s][r][0] = t[r]; 877 a[s][r][1] = t[rows + r]; 878 } 879 } 880 } 881 } 882 } 883 884 private void ddxt3db_sub(int isgn, float[] a, boolean scale) { 885 int idx0, idx1, idx2; 886 887 if (isgn == -1) { 888 if (columns > 2) { 889 for (int r = 0; r < rows; r++) { 890 idx0 = r * rowStride; 891 for (int c = 0; c < columns; c += 4) { 892 for (int s = 0; s < slices; s++) { 893 idx1 = s * sliceStride + idx0 + c; 894 idx2 = slices + s; 895 t[s] = a[idx1]; 896 t[idx2] = a[idx1 + 1]; 897 t[idx2 + slices] = a[idx1 + 2]; 898 t[idx2 + 2 * slices] = a[idx1 + 3]; 899 } 900 dstSlices.forward(t, 0, scale); 901 dstSlices.forward(t, slices, scale); 902 dstSlices.forward(t, 2 * slices, scale); 903 dstSlices.forward(t, 3 * slices, scale); 904 for (int s = 0; s < slices; s++) { 905 idx1 = s * sliceStride + idx0 + c; 906 idx2 = slices + s; 907 a[idx1] = t[s]; 908 a[idx1 + 1] = t[idx2]; 909 a[idx1 + 2] = t[idx2 + slices]; 910 a[idx1 + 3] = t[idx2 + 2 * slices]; 911 } 912 } 913 } 914 } else if (columns == 2) { 915 for (int r = 0; r < rows; r++) { 916 idx0 = r * rowStride; 917 for (int s = 0; s < slices; s++) { 918 idx1 = s * sliceStride + idx0; 919 t[s] = a[idx1]; 920 t[slices + s] = a[idx1 + 1]; 921 } 922 dstSlices.forward(t, 0, scale); 923 dstSlices.forward(t, slices, scale); 924 for (int s = 0; s < slices; s++) { 925 idx1 = s * sliceStride + idx0; 926 a[idx1] = t[s]; 927 a[idx1 + 1] = t[slices + s]; 928 } 929 } 930 } 931 } else { 932 if (columns > 2) { 933 for (int r = 0; r < rows; r++) { 934 idx0 = r * rowStride; 935 for (int c = 0; c < columns; c += 4) { 936 for (int s = 0; s < slices; s++) { 937 idx1 = s * sliceStride + idx0 + c; 938 idx2 = slices + s; 939 t[s] = a[idx1]; 940 t[idx2] = a[idx1 + 1]; 941 t[idx2 + slices] = a[idx1 + 2]; 942 t[idx2 + 2 * slices] = a[idx1 + 3]; 943 } 944 dstSlices.inverse(t, 0, scale); 945 dstSlices.inverse(t, slices, scale); 946 dstSlices.inverse(t, 2 * slices, scale); 947 dstSlices.inverse(t, 3 * slices, scale); 948 949 for (int s = 0; s < slices; s++) { 950 idx1 = s * sliceStride + idx0 + c; 951 idx2 = slices + s; 952 a[idx1] = t[s]; 953 a[idx1 + 1] = t[idx2]; 954 a[idx1 + 2] = t[idx2 + slices]; 955 a[idx1 + 3] = t[idx2 + 2 * slices]; 956 } 957 } 958 } 959 } else if (columns == 2) { 960 for (int r = 0; r < rows; r++) { 961 idx0 = r * rowStride; 962 for (int s = 0; s < slices; s++) { 963 idx1 = s * sliceStride + idx0; 964 t[s] = a[idx1]; 965 t[slices + s] = a[idx1 + 1]; 966 } 967 dstSlices.inverse(t, 0, scale); 968 dstSlices.inverse(t, slices, scale); 969 for (int s = 0; s < slices; s++) { 970 idx1 = s * sliceStride + idx0; 971 a[idx1] = t[s]; 972 a[idx1 + 1] = t[slices + s]; 973 } 974 } 975 } 976 } 977 } 978 979 private void ddxt3db_sub(int isgn, float[][][] a, boolean scale) { 980 int idx2; 981 982 if (isgn == -1) { 983 if (columns > 2) { 984 for (int r = 0; r < rows; r++) { 985 for (int c = 0; c < columns; c += 4) { 986 for (int s = 0; s < slices; s++) { 987 idx2 = slices + s; 988 t[s] = a[s][r][c]; 989 t[idx2] = a[s][r][c + 1]; 990 t[idx2 + slices] = a[s][r][c + 2]; 991 t[idx2 + 2 * slices] = a[s][r][c + 3]; 992 } 993 dstSlices.forward(t, 0, scale); 994 dstSlices.forward(t, slices, scale); 995 dstSlices.forward(t, 2 * slices, scale); 996 dstSlices.forward(t, 3 * slices, scale); 997 for (int s = 0; s < slices; s++) { 998 idx2 = slices + s; 999 a[s][r][c] = t[s]; 1000 a[s][r][c + 1] = t[idx2]; 1001 a[s][r][c + 2] = t[idx2 + slices]; 1002 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1003 } 1004 } 1005 } 1006 } else if (columns == 2) { 1007 for (int r = 0; r < rows; r++) { 1008 for (int s = 0; s < slices; s++) { 1009 t[s] = a[s][r][0]; 1010 t[slices + s] = a[s][r][1]; 1011 } 1012 dstSlices.forward(t, 0, scale); 1013 dstSlices.forward(t, slices, scale); 1014 for (int s = 0; s < slices; s++) { 1015 a[s][r][0] = t[s]; 1016 a[s][r][1] = t[slices + s]; 1017 } 1018 } 1019 } 1020 } else { 1021 if (columns > 2) { 1022 for (int r = 0; r < rows; r++) { 1023 for (int c = 0; c < columns; c += 4) { 1024 for (int s = 0; s < slices; s++) { 1025 idx2 = slices + s; 1026 t[s] = a[s][r][c]; 1027 t[idx2] = a[s][r][c + 1]; 1028 t[idx2 + slices] = a[s][r][c + 2]; 1029 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1030 } 1031 dstSlices.inverse(t, 0, scale); 1032 dstSlices.inverse(t, slices, scale); 1033 dstSlices.inverse(t, 2 * slices, scale); 1034 dstSlices.inverse(t, 3 * slices, scale); 1035 1036 for (int s = 0; s < slices; s++) { 1037 idx2 = slices + s; 1038 a[s][r][c] = t[s]; 1039 a[s][r][c + 1] = t[idx2]; 1040 a[s][r][c + 2] = t[idx2 + slices]; 1041 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1042 } 1043 } 1044 } 1045 } else if (columns == 2) { 1046 for (int r = 0; r < rows; r++) { 1047 for (int s = 0; s < slices; s++) { 1048 t[s] = a[s][r][0]; 1049 t[slices + s] = a[s][r][1]; 1050 } 1051 dstSlices.inverse(t, 0, scale); 1052 dstSlices.inverse(t, slices, scale); 1053 for (int s = 0; s < slices; s++) { 1054 a[s][r][0] = t[s]; 1055 a[s][r][1] = t[slices + s]; 1056 } 1057 } 1058 } 1059 } 1060 } 1061 1062 private void ddxt3da_subth(final int isgn, final float[] a, final boolean scale) { 1063 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1064 1065 int nt = 4 * rows; 1066 if (columns == 2) { 1067 nt >>= 1; 1068 } 1069 Future<?>[] futures = new Future[nthreads]; 1070 1071 for (int i = 0; i < nthreads; i++) { 1072 final int n0 = i; 1073 final int startt = nt * i; 1074 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1075 1076 @Override 1077 public void run() { 1078 int idx0, idx1, idx2; 1079 if (isgn == -1) { 1080 for (int s = n0; s < slices; s += nthreads) { 1081 idx0 = s * sliceStride; 1082 for (int r = 0; r < rows; r++) { 1083 dstColumns.forward(a, idx0 + r * rowStride, scale); 1084 } 1085 if (columns > 2) { 1086 for (int c = 0; c < columns; c += 4) { 1087 for (int r = 0; r < rows; r++) { 1088 idx1 = idx0 + r * rowStride + c; 1089 idx2 = startt + rows + r; 1090 t[startt + r] = a[idx1]; 1091 t[idx2] = a[idx1 + 1]; 1092 t[idx2 + rows] = a[idx1 + 2]; 1093 t[idx2 + 2 * rows] = a[idx1 + 3]; 1094 } 1095 dstRows.forward(t, startt, scale); 1096 dstRows.forward(t, startt + rows, scale); 1097 dstRows.forward(t, startt + 2 * rows, scale); 1098 dstRows.forward(t, startt + 3 * rows, scale); 1099 for (int r = 0; r < rows; r++) { 1100 idx1 = idx0 + r * rowStride + c; 1101 idx2 = startt + rows + r; 1102 a[idx1] = t[startt + r]; 1103 a[idx1 + 1] = t[idx2]; 1104 a[idx1 + 2] = t[idx2 + rows]; 1105 a[idx1 + 3] = t[idx2 + 2 * rows]; 1106 } 1107 } 1108 } else if (columns == 2) { 1109 for (int r = 0; r < rows; r++) { 1110 idx1 = idx0 + r * rowStride; 1111 t[startt + r] = a[idx1]; 1112 t[startt + rows + r] = a[idx1 + 1]; 1113 } 1114 dstRows.forward(t, startt, scale); 1115 dstRows.forward(t, startt + rows, scale); 1116 for (int r = 0; r < rows; r++) { 1117 idx1 = idx0 + r * rowStride; 1118 a[idx1] = t[startt + r]; 1119 a[idx1 + 1] = t[startt + rows + r]; 1120 } 1121 } 1122 } 1123 } else { 1124 for (int s = n0; s < slices; s += nthreads) { 1125 idx0 = s * sliceStride; 1126 for (int r = 0; r < rows; r++) { 1127 dstColumns.inverse(a, idx0 + r * rowStride, scale); 1128 } 1129 if (columns > 2) { 1130 for (int c = 0; c < columns; c += 4) { 1131 for (int r = 0; r < rows; r++) { 1132 idx1 = idx0 + r * rowStride + c; 1133 idx2 = startt + rows + r; 1134 t[startt + r] = a[idx1]; 1135 t[idx2] = a[idx1 + 1]; 1136 t[idx2 + rows] = a[idx1 + 2]; 1137 t[idx2 + 2 * rows] = a[idx1 + 3]; 1138 } 1139 dstRows.inverse(t, startt, scale); 1140 dstRows.inverse(t, startt + rows, scale); 1141 dstRows.inverse(t, startt + 2 * rows, scale); 1142 dstRows.inverse(t, startt + 3 * rows, scale); 1143 for (int r = 0; r < rows; r++) { 1144 idx1 = idx0 + r * rowStride + c; 1145 idx2 = startt + rows + r; 1146 a[idx1] = t[startt + r]; 1147 a[idx1 + 1] = t[idx2]; 1148 a[idx1 + 2] = t[idx2 + rows]; 1149 a[idx1 + 3] = t[idx2 + 2 * rows]; 1150 } 1151 } 1152 } else if (columns == 2) { 1153 for (int r = 0; r < rows; r++) { 1154 idx1 = idx0 + r * rowStride; 1155 t[startt + r] = a[idx1]; 1156 t[startt + rows + r] = a[idx1 + 1]; 1157 } 1158 dstRows.inverse(t, startt, scale); 1159 dstRows.inverse(t, startt + rows, scale); 1160 for (int r = 0; r < rows; r++) { 1161 idx1 = idx0 + r * rowStride; 1162 a[idx1] = t[startt + r]; 1163 a[idx1 + 1] = t[startt + rows + r]; 1164 } 1165 } 1166 } 1167 } 1168 } 1169 }); 1170 } 1171 ConcurrencyUtils.waitForCompletion(futures); 1172 } 1173 1174 private void ddxt3da_subth(final int isgn, final float[][][] a, final boolean scale) { 1175 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1176 int nt = 4 * rows; 1177 if (columns == 2) { 1178 nt >>= 1; 1179 } 1180 Future<?>[] futures = new Future[nthreads]; 1181 1182 for (int i = 0; i < nthreads; i++) { 1183 final int n0 = i; 1184 final int startt = nt * i; 1185 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1186 1187 @Override 1188 public void run() { 1189 int idx2; 1190 if (isgn == -1) { 1191 for (int s = n0; s < slices; s += nthreads) { 1192 for (int r = 0; r < rows; r++) { 1193 dstColumns.forward(a[s][r], scale); 1194 } 1195 if (columns > 2) { 1196 for (int c = 0; c < columns; c += 4) { 1197 for (int r = 0; r < rows; r++) { 1198 idx2 = startt + rows + r; 1199 t[startt + r] = a[s][r][c]; 1200 t[idx2] = a[s][r][c + 1]; 1201 t[idx2 + rows] = a[s][r][c + 2]; 1202 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1203 } 1204 dstRows.forward(t, startt, scale); 1205 dstRows.forward(t, startt + rows, scale); 1206 dstRows.forward(t, startt + 2 * rows, scale); 1207 dstRows.forward(t, startt + 3 * rows, scale); 1208 for (int r = 0; r < rows; r++) { 1209 idx2 = startt + rows + r; 1210 a[s][r][c] = t[startt + r]; 1211 a[s][r][c + 1] = t[idx2]; 1212 a[s][r][c + 2] = t[idx2 + rows]; 1213 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1214 } 1215 } 1216 } else if (columns == 2) { 1217 for (int r = 0; r < rows; r++) { 1218 t[startt + r] = a[s][r][0]; 1219 t[startt + rows + r] = a[s][r][1]; 1220 } 1221 dstRows.forward(t, startt, scale); 1222 dstRows.forward(t, startt + rows, scale); 1223 for (int r = 0; r < rows; r++) { 1224 a[s][r][0] = t[startt + r]; 1225 a[s][r][1] = t[startt + rows + r]; 1226 } 1227 } 1228 } 1229 } else { 1230 for (int s = n0; s < slices; s += nthreads) { 1231 for (int r = 0; r < rows; r++) { 1232 dstColumns.inverse(a[s][r], scale); 1233 } 1234 if (columns > 2) { 1235 for (int c = 0; c < columns; c += 4) { 1236 for (int r = 0; r < rows; r++) { 1237 idx2 = startt + rows + r; 1238 t[startt + r] = a[s][r][c]; 1239 t[idx2] = a[s][r][c + 1]; 1240 t[idx2 + rows] = a[s][r][c + 2]; 1241 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1242 } 1243 dstRows.inverse(t, startt, scale); 1244 dstRows.inverse(t, startt + rows, scale); 1245 dstRows.inverse(t, startt + 2 * rows, scale); 1246 dstRows.inverse(t, startt + 3 * rows, scale); 1247 for (int r = 0; r < rows; r++) { 1248 idx2 = startt + rows + r; 1249 a[s][r][c] = t[startt + r]; 1250 a[s][r][c + 1] = t[idx2]; 1251 a[s][r][c + 2] = t[idx2 + rows]; 1252 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1253 } 1254 } 1255 } else if (columns == 2) { 1256 for (int r = 0; r < rows; r++) { 1257 t[startt + r] = a[s][r][0]; 1258 t[startt + rows + r] = a[s][r][1]; 1259 } 1260 dstRows.inverse(t, startt, scale); 1261 dstRows.inverse(t, startt + rows, scale); 1262 for (int r = 0; r < rows; r++) { 1263 a[s][r][0] = t[startt + r]; 1264 a[s][r][1] = t[startt + rows + r]; 1265 } 1266 } 1267 } 1268 } 1269 } 1270 }); 1271 } 1272 ConcurrencyUtils.waitForCompletion(futures); 1273 } 1274 1275 private void ddxt3db_subth(final int isgn, final float[] a, final boolean scale) { 1276 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1277 int nt = 4 * slices; 1278 if (columns == 2) { 1279 nt >>= 1; 1280 } 1281 Future<?>[] futures = new Future[nthreads]; 1282 1283 for (int i = 0; i < nthreads; i++) { 1284 final int n0 = i; 1285 final int startt = nt * i; 1286 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1287 1288 @Override 1289 public void run() { 1290 int idx0, idx1, idx2; 1291 if (isgn == -1) { 1292 if (columns > 2) { 1293 for (int r = n0; r < rows; r += nthreads) { 1294 idx0 = r * rowStride; 1295 for (int c = 0; c < columns; c += 4) { 1296 for (int s = 0; s < slices; s++) { 1297 idx1 = s * sliceStride + idx0 + c; 1298 idx2 = startt + slices + s; 1299 t[startt + s] = a[idx1]; 1300 t[idx2] = a[idx1 + 1]; 1301 t[idx2 + slices] = a[idx1 + 2]; 1302 t[idx2 + 2 * slices] = a[idx1 + 3]; 1303 } 1304 dstSlices.forward(t, startt, scale); 1305 dstSlices.forward(t, startt + slices, scale); 1306 dstSlices.forward(t, startt + 2 * slices, scale); 1307 dstSlices.forward(t, startt + 3 * slices, scale); 1308 for (int s = 0; s < slices; s++) { 1309 idx1 = s * sliceStride + idx0 + c; 1310 idx2 = startt + slices + s; 1311 a[idx1] = t[startt + s]; 1312 a[idx1 + 1] = t[idx2]; 1313 a[idx1 + 2] = t[idx2 + slices]; 1314 a[idx1 + 3] = t[idx2 + 2 * slices]; 1315 } 1316 } 1317 } 1318 } else if (columns == 2) { 1319 for (int r = n0; r < rows; r += nthreads) { 1320 idx0 = r * rowStride; 1321 for (int s = 0; s < slices; s++) { 1322 idx1 = s * sliceStride + idx0; 1323 t[startt + s] = a[idx1]; 1324 t[startt + slices + s] = a[idx1 + 1]; 1325 } 1326 dstSlices.forward(t, startt, scale); 1327 dstSlices.forward(t, startt + slices, scale); 1328 for (int s = 0; s < slices; s++) { 1329 idx1 = s * sliceStride + idx0; 1330 a[idx1] = t[startt + s]; 1331 a[idx1 + 1] = t[startt + slices + s]; 1332 } 1333 } 1334 } 1335 } else { 1336 if (columns > 2) { 1337 for (int r = n0; r < rows; r += nthreads) { 1338 idx0 = r * rowStride; 1339 for (int c = 0; c < columns; c += 4) { 1340 for (int s = 0; s < slices; s++) { 1341 idx1 = s * sliceStride + idx0 + c; 1342 idx2 = startt + slices + s; 1343 t[startt + s] = a[idx1]; 1344 t[idx2] = a[idx1 + 1]; 1345 t[idx2 + slices] = a[idx1 + 2]; 1346 t[idx2 + 2 * slices] = a[idx1 + 3]; 1347 } 1348 dstSlices.inverse(t, startt, scale); 1349 dstSlices.inverse(t, startt + slices, scale); 1350 dstSlices.inverse(t, startt + 2 * slices, scale); 1351 dstSlices.inverse(t, startt + 3 * slices, scale); 1352 for (int s = 0; s < slices; s++) { 1353 idx1 = s * sliceStride + idx0 + c; 1354 idx2 = startt + slices + s; 1355 a[idx1] = t[startt + s]; 1356 a[idx1 + 1] = t[idx2]; 1357 a[idx1 + 2] = t[idx2 + slices]; 1358 a[idx1 + 3] = t[idx2 + 2 * slices]; 1359 } 1360 } 1361 } 1362 } else if (columns == 2) { 1363 for (int r = n0; r < rows; r += nthreads) { 1364 idx0 = r * rowStride; 1365 for (int s = 0; s < slices; s++) { 1366 idx1 = s * sliceStride + idx0; 1367 t[startt + s] = a[idx1]; 1368 t[startt + slices + s] = a[idx1 + 1]; 1369 } 1370 dstSlices.inverse(t, startt, scale); 1371 dstSlices.inverse(t, startt + slices, scale); 1372 1373 for (int s = 0; s < slices; s++) { 1374 idx1 = s * sliceStride + idx0; 1375 a[idx1] = t[startt + s]; 1376 a[idx1 + 1] = t[startt + slices + s]; 1377 } 1378 } 1379 } 1380 } 1381 } 1382 }); 1383 } 1384 ConcurrencyUtils.waitForCompletion(futures); 1385 } 1386 1387 private void ddxt3db_subth(final int isgn, final float[][][] a, final boolean scale) { 1388 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1389 int nt = 4 * slices; 1390 if (columns == 2) { 1391 nt >>= 1; 1392 } 1393 Future<?>[] futures = new Future[nthreads]; 1394 1395 for (int i = 0; i < nthreads; i++) { 1396 final int n0 = i; 1397 final int startt = nt * i; 1398 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1399 1400 @Override 1401 public void run() { 1402 int idx2; 1403 if (isgn == -1) { 1404 if (columns > 2) { 1405 for (int r = n0; r < rows; r += nthreads) { 1406 for (int c = 0; c < columns; c += 4) { 1407 for (int s = 0; s < slices; s++) { 1408 idx2 = startt + slices + s; 1409 t[startt + s] = a[s][r][c]; 1410 t[idx2] = a[s][r][c + 1]; 1411 t[idx2 + slices] = a[s][r][c + 2]; 1412 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1413 } 1414 dstSlices.forward(t, startt, scale); 1415 dstSlices.forward(t, startt + slices, scale); 1416 dstSlices.forward(t, startt + 2 * slices, scale); 1417 dstSlices.forward(t, startt + 3 * slices, scale); 1418 for (int s = 0; s < slices; s++) { 1419 idx2 = startt + slices + s; 1420 a[s][r][c] = t[startt + s]; 1421 a[s][r][c + 1] = t[idx2]; 1422 a[s][r][c + 2] = t[idx2 + slices]; 1423 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1424 } 1425 } 1426 } 1427 } else if (columns == 2) { 1428 for (int r = n0; r < rows; r += nthreads) { 1429 for (int s = 0; s < slices; s++) { 1430 t[startt + s] = a[s][r][0]; 1431 t[startt + slices + s] = a[s][r][1]; 1432 } 1433 dstSlices.forward(t, startt, scale); 1434 dstSlices.forward(t, startt + slices, scale); 1435 for (int s = 0; s < slices; s++) { 1436 a[s][r][0] = t[startt + s]; 1437 a[s][r][1] = t[startt + slices + s]; 1438 } 1439 } 1440 } 1441 } else { 1442 if (columns > 2) { 1443 for (int r = n0; r < rows; r += nthreads) { 1444 for (int c = 0; c < columns; c += 4) { 1445 for (int s = 0; s < slices; s++) { 1446 idx2 = startt + slices + s; 1447 t[startt + s] = a[s][r][c]; 1448 t[idx2] = a[s][r][c + 1]; 1449 t[idx2 + slices] = a[s][r][c + 2]; 1450 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1451 } 1452 dstSlices.inverse(t, startt, scale); 1453 dstSlices.inverse(t, startt + slices, scale); 1454 dstSlices.inverse(t, startt + 2 * slices, scale); 1455 dstSlices.inverse(t, startt + 3 * slices, scale); 1456 for (int s = 0; s < slices; s++) { 1457 idx2 = startt + slices + s; 1458 a[s][r][c] = t[startt + s]; 1459 a[s][r][c + 1] = t[idx2]; 1460 a[s][r][c + 2] = t[idx2 + slices]; 1461 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1462 } 1463 } 1464 } 1465 } else if (columns == 2) { 1466 for (int r = n0; r < rows; r += nthreads) { 1467 for (int s = 0; s < slices; s++) { 1468 t[startt + s] = a[s][r][0]; 1469 t[startt + slices + s] = a[s][r][1]; 1470 } 1471 dstSlices.inverse(t, startt, scale); 1472 dstSlices.inverse(t, startt + slices, scale); 1473 1474 for (int s = 0; s < slices; s++) { 1475 a[s][r][0] = t[startt + s]; 1476 a[s][r][1] = t[startt + slices + s]; 1477 } 1478 } 1479 } 1480 } 1481 } 1482 }); 1483 } 1484 ConcurrencyUtils.waitForCompletion(futures); 1485 } 1486}