001/* ***** BEGIN LICENSE BLOCK ***** 002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 003 * 004 * The contents of this file are subject to the Mozilla Public License Version 005 * 1.1 (the "License"); you may not use this file except in compliance with 006 * the License. You may obtain a copy of the License at 007 * http://www.mozilla.org/MPL/ 008 * 009 * Software distributed under the License is distributed on an "AS IS" basis, 010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 011 * for the specific language governing rights and limitations under the 012 * License. 013 * 014 * The Original Code is JTransforms. 015 * 016 * The Initial Developer of the Original Code is 017 * Piotr Wendykier, Emory University. 018 * Portions created by the Initial Developer are Copyright (C) 2007-2009 019 * the Initial Developer. All Rights Reserved. 020 * 021 * Alternatively, the contents of this file may be used under the terms of 022 * either the GNU General Public License Version 2 or later (the "GPL"), or 023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 024 * in which case the provisions of the GPL or the LGPL are applicable instead 025 * of those above. If you wish to allow use of your version of this file only 026 * under the terms of either the GPL or the LGPL, and not to allow others to 027 * use your version of this file under the terms of the MPL, indicate your 028 * decision by deleting the provisions above and replace them with the notice 029 * and other provisions required by the GPL or the LGPL. If you do not delete 030 * the provisions above, a recipient may use your version of this file under 031 * the terms of any one of the MPL, the GPL or the LGPL. 032 * 033 * ***** END LICENSE BLOCK ***** */ 034 035package edu.emory.mathcs.jtransforms.dht; 036 037import java.util.concurrent.Future; 038 039import edu.emory.mathcs.utils.ConcurrencyUtils; 040 041/** 042 * Computes 3D Discrete Hartley Transform (DHT) of real, single precision data. 043 * The sizes of all three dimensions can be arbitrary numbers. This is a 044 * parallel implementation optimized for SMP systems.<br> 045 * <br> 046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura 047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 048 * 049 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 050 * 051 */ 052public class FloatDHT_3D { 053 054 private int slices; 055 056 private int rows; 057 058 private int columns; 059 060 private int sliceStride; 061 062 private int rowStride; 063 064 private float[] t; 065 066 private FloatDHT_1D dhtSlices, dhtRows, dhtColumns; 067 068 private int oldNthreads; 069 070 private int nt; 071 072 private boolean isPowerOfTwo = false; 073 074 private boolean useThreads = false; 075 076 /** 077 * Creates new instance of FloatDHT_3D. 078 * 079 * @param slices 080 * number of slices 081 * @param rows 082 * number of rows 083 * @param columns 084 * number of columns 085 */ 086 public FloatDHT_3D(int slices, int rows, int columns) { 087 if (slices <= 1 || rows <= 1 || columns <= 1) { 088 throw new IllegalArgumentException("slices, rows and columns must be greater than 1"); 089 } 090 this.slices = slices; 091 this.rows = rows; 092 this.columns = columns; 093 this.sliceStride = rows * columns; 094 this.rowStride = columns; 095 if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) { 096 this.useThreads = true; 097 } 098 if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 099 isPowerOfTwo = true; 100 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 101 nt = slices; 102 if (nt < rows) { 103 nt = rows; 104 } 105 nt *= 4; 106 if (oldNthreads > 1) { 107 nt *= oldNthreads; 108 } 109 if (columns == 2) { 110 nt >>= 1; 111 } 112 t = new float[nt]; 113 } 114 dhtSlices = new FloatDHT_1D(slices); 115 if (slices == rows) { 116 dhtRows = dhtSlices; 117 } else { 118 dhtRows = new FloatDHT_1D(rows); 119 } 120 if (slices == columns) { 121 dhtColumns = dhtSlices; 122 } else if (rows == columns) { 123 dhtColumns = dhtRows; 124 } else { 125 dhtColumns = new FloatDHT_1D(columns); 126 } 127 } 128 129 /** 130 * Computes the 3D real, forward DHT leaving the result in <code>a</code>. 131 * The data is stored in 1D array addressed in slice-major, then row-major, 132 * then column-major, in order of significance, i.e. the element (i,j,k) of 133 * 3D array x[slices][rows][columns] is stored in a[i*sliceStride + 134 * j*rowStride + k], where sliceStride = rows * columns and rowStride = 135 * columns. 136 * 137 * @param a 138 * data to transform 139 */ 140 public void forward(final float[] a) { 141 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 142 if (isPowerOfTwo) { 143 if (nthreads != oldNthreads) { 144 nt = slices; 145 if (nt < rows) { 146 nt = rows; 147 } 148 nt *= 4; 149 if (nthreads > 1) { 150 nt *= nthreads; 151 } 152 if (columns == 2) { 153 nt >>= 1; 154 } 155 t = new float[nt]; 156 oldNthreads = nthreads; 157 } 158 if ((nthreads > 1) && useThreads) { 159 ddxt3da_subth(-1, a, true); 160 ddxt3db_subth(-1, a, true); 161 } else { 162 ddxt3da_sub(-1, a, true); 163 ddxt3db_sub(-1, a, true); 164 } 165 yTransform(a); 166 } else { 167 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 168 Future<?>[] futures = new Future[nthreads]; 169 int p = slices / nthreads; 170 for (int l = 0; l < nthreads; l++) { 171 final int firstSlice = l * p; 172 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 173 futures[l] = ConcurrencyUtils.submit(new Runnable() { 174 @Override 175 public void run() { 176 for (int s = firstSlice; s < lastSlice; s++) { 177 int idx1 = s * sliceStride; 178 for (int r = 0; r < rows; r++) { 179 dhtColumns.forward(a, idx1 + r * rowStride); 180 } 181 } 182 } 183 }); 184 } 185 ConcurrencyUtils.waitForCompletion(futures); 186 187 for (int l = 0; l < nthreads; l++) { 188 final int firstSlice = l * p; 189 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 190 futures[l] = ConcurrencyUtils.submit(new Runnable() { 191 @Override 192 public void run() { 193 float[] temp = new float[rows]; 194 for (int s = firstSlice; s < lastSlice; s++) { 195 int idx1 = s * sliceStride; 196 for (int c = 0; c < columns; c++) { 197 for (int r = 0; r < rows; r++) { 198 int idx3 = idx1 + r * rowStride + c; 199 temp[r] = a[idx3]; 200 } 201 dhtRows.forward(temp); 202 for (int r = 0; r < rows; r++) { 203 int idx3 = idx1 + r * rowStride + c; 204 a[idx3] = temp[r]; 205 } 206 } 207 } 208 } 209 }); 210 } 211 ConcurrencyUtils.waitForCompletion(futures); 212 213 p = rows / nthreads; 214 for (int l = 0; l < nthreads; l++) { 215 final int startRow = l * p; 216 final int stopRow; 217 if (l == nthreads - 1) { 218 stopRow = rows; 219 } else { 220 stopRow = startRow + p; 221 } 222 futures[l] = ConcurrencyUtils.submit(new Runnable() { 223 @Override 224 public void run() { 225 float[] temp = new float[slices]; 226 for (int r = startRow; r < stopRow; r++) { 227 int idx1 = r * rowStride; 228 for (int c = 0; c < columns; c++) { 229 for (int s = 0; s < slices; s++) { 230 int idx3 = s * sliceStride + idx1 + c; 231 temp[s] = a[idx3]; 232 } 233 dhtSlices.forward(temp); 234 for (int s = 0; s < slices; s++) { 235 int idx3 = s * sliceStride + idx1 + c; 236 a[idx3] = temp[s]; 237 } 238 } 239 } 240 } 241 }); 242 } 243 ConcurrencyUtils.waitForCompletion(futures); 244 245 } else { 246 for (int s = 0; s < slices; s++) { 247 int idx1 = s * sliceStride; 248 for (int r = 0; r < rows; r++) { 249 dhtColumns.forward(a, idx1 + r * rowStride); 250 } 251 } 252 float[] temp = new float[rows]; 253 for (int s = 0; s < slices; s++) { 254 int idx1 = s * sliceStride; 255 for (int c = 0; c < columns; c++) { 256 for (int r = 0; r < rows; r++) { 257 int idx3 = idx1 + r * rowStride + c; 258 temp[r] = a[idx3]; 259 } 260 dhtRows.forward(temp); 261 for (int r = 0; r < rows; r++) { 262 int idx3 = idx1 + r * rowStride + c; 263 a[idx3] = temp[r]; 264 } 265 } 266 } 267 temp = new float[slices]; 268 for (int r = 0; r < rows; r++) { 269 int idx1 = r * rowStride; 270 for (int c = 0; c < columns; c++) { 271 for (int s = 0; s < slices; s++) { 272 int idx3 = s * sliceStride + idx1 + c; 273 temp[s] = a[idx3]; 274 } 275 dhtSlices.forward(temp); 276 for (int s = 0; s < slices; s++) { 277 int idx3 = s * sliceStride + idx1 + c; 278 a[idx3] = temp[s]; 279 } 280 } 281 } 282 } 283 yTransform(a); 284 } 285 } 286 287 /** 288 * Computes the 3D real, forward DHT leaving the result in <code>a</code>. 289 * The data is stored in 3D array. 290 * 291 * @param a 292 * data to transform 293 */ 294 public void forward(final float[][][] a) { 295 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 296 if (isPowerOfTwo) { 297 if (nthreads != oldNthreads) { 298 nt = slices; 299 if (nt < rows) { 300 nt = rows; 301 } 302 nt *= 4; 303 if (nthreads > 1) { 304 nt *= nthreads; 305 } 306 if (columns == 2) { 307 nt >>= 1; 308 } 309 t = new float[nt]; 310 oldNthreads = nthreads; 311 } 312 if ((nthreads > 1) && useThreads) { 313 ddxt3da_subth(-1, a, true); 314 ddxt3db_subth(-1, a, true); 315 } else { 316 ddxt3da_sub(-1, a, true); 317 ddxt3db_sub(-1, a, true); 318 } 319 yTransform(a); 320 } else { 321 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 322 Future<?>[] futures = new Future[nthreads]; 323 int p = slices / nthreads; 324 for (int l = 0; l < nthreads; l++) { 325 final int firstSlice = l * p; 326 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 327 futures[l] = ConcurrencyUtils.submit(new Runnable() { 328 @Override 329 public void run() { 330 for (int s = firstSlice; s < lastSlice; s++) { 331 for (int r = 0; r < rows; r++) { 332 dhtColumns.forward(a[s][r]); 333 } 334 } 335 } 336 }); 337 } 338 ConcurrencyUtils.waitForCompletion(futures); 339 340 for (int l = 0; l < nthreads; l++) { 341 final int firstSlice = l * p; 342 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 343 futures[l] = ConcurrencyUtils.submit(new Runnable() { 344 @Override 345 public void run() { 346 float[] temp = new float[rows]; 347 for (int s = firstSlice; s < lastSlice; s++) { 348 for (int c = 0; c < columns; c++) { 349 for (int r = 0; r < rows; r++) { 350 temp[r] = a[s][r][c]; 351 } 352 dhtRows.forward(temp); 353 for (int r = 0; r < rows; r++) { 354 a[s][r][c] = temp[r]; 355 } 356 } 357 } 358 } 359 }); 360 } 361 ConcurrencyUtils.waitForCompletion(futures); 362 363 p = rows / nthreads; 364 for (int l = 0; l < nthreads; l++) { 365 final int firstRow = l * p; 366 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 367 futures[l] = ConcurrencyUtils.submit(new Runnable() { 368 @Override 369 public void run() { 370 float[] temp = new float[slices]; 371 for (int r = firstRow; r < lastRow; r++) { 372 for (int c = 0; c < columns; c++) { 373 for (int s = 0; s < slices; s++) { 374 temp[s] = a[s][r][c]; 375 } 376 dhtSlices.forward(temp); 377 for (int s = 0; s < slices; s++) { 378 a[s][r][c] = temp[s]; 379 } 380 } 381 } 382 } 383 }); 384 } 385 ConcurrencyUtils.waitForCompletion(futures); 386 387 } else { 388 for (int s = 0; s < slices; s++) { 389 for (int r = 0; r < rows; r++) { 390 dhtColumns.forward(a[s][r]); 391 } 392 } 393 float[] temp = new float[rows]; 394 for (int s = 0; s < slices; s++) { 395 for (int c = 0; c < columns; c++) { 396 for (int r = 0; r < rows; r++) { 397 temp[r] = a[s][r][c]; 398 } 399 dhtRows.forward(temp); 400 for (int r = 0; r < rows; r++) { 401 a[s][r][c] = temp[r]; 402 } 403 } 404 } 405 temp = new float[slices]; 406 for (int r = 0; r < rows; r++) { 407 for (int c = 0; c < columns; c++) { 408 for (int s = 0; s < slices; s++) { 409 temp[s] = a[s][r][c]; 410 } 411 dhtSlices.forward(temp); 412 for (int s = 0; s < slices; s++) { 413 a[s][r][c] = temp[s]; 414 } 415 } 416 } 417 } 418 yTransform(a); 419 } 420 } 421 422 /** 423 * Computes the 3D real, inverse DHT leaving the result in <code>a</code>. 424 * The data is stored in 1D array addressed in slice-major, then row-major, 425 * then column-major, in order of significance, i.e. the element (i,j,k) of 426 * 3D array x[slices][rows][columns] is stored in a[i*sliceStride + 427 * j*rowStride + k], where sliceStride = rows * columns and rowStride = 428 * columns. 429 * 430 * @param a 431 * data to transform 432 * @param scale 433 * if true then scaling is performed 434 */ 435 public void inverse(final float[] a, final boolean scale) { 436 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 437 if (isPowerOfTwo) { 438 if (nthreads != oldNthreads) { 439 nt = slices; 440 if (nt < rows) { 441 nt = rows; 442 } 443 nt *= 4; 444 if (nthreads > 1) { 445 nt *= nthreads; 446 } 447 if (columns == 2) { 448 nt >>= 1; 449 } 450 t = new float[nt]; 451 oldNthreads = nthreads; 452 } 453 if ((nthreads > 1) && useThreads) { 454 ddxt3da_subth(1, a, scale); 455 ddxt3db_subth(1, a, scale); 456 } else { 457 ddxt3da_sub(1, a, scale); 458 ddxt3db_sub(1, a, scale); 459 } 460 yTransform(a); 461 } else { 462 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 463 Future<?>[] futures = new Future[nthreads]; 464 int p = slices / nthreads; 465 for (int l = 0; l < nthreads; l++) { 466 final int firstSlice = l * p; 467 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 468 futures[l] = ConcurrencyUtils.submit(new Runnable() { 469 @Override 470 public void run() { 471 for (int s = firstSlice; s < lastSlice; s++) { 472 int idx1 = s * sliceStride; 473 for (int r = 0; r < rows; r++) { 474 dhtColumns.inverse(a, idx1 + r * rowStride, scale); 475 } 476 } 477 } 478 }); 479 } 480 ConcurrencyUtils.waitForCompletion(futures); 481 482 for (int l = 0; l < nthreads; l++) { 483 final int firstSlice = l * p; 484 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 485 futures[l] = ConcurrencyUtils.submit(new Runnable() { 486 @Override 487 public void run() { 488 float[] temp = new float[rows]; 489 for (int s = firstSlice; s < lastSlice; s++) { 490 int idx1 = s * sliceStride; 491 for (int c = 0; c < columns; c++) { 492 for (int r = 0; r < rows; r++) { 493 int idx3 = idx1 + r * rowStride + c; 494 temp[r] = a[idx3]; 495 } 496 dhtRows.inverse(temp, scale); 497 for (int r = 0; r < rows; r++) { 498 int idx3 = idx1 + r * rowStride + c; 499 a[idx3] = temp[r]; 500 } 501 } 502 } 503 } 504 }); 505 } 506 ConcurrencyUtils.waitForCompletion(futures); 507 508 p = rows / nthreads; 509 for (int l = 0; l < nthreads; l++) { 510 final int firstRow = l * p; 511 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 512 futures[l] = ConcurrencyUtils.submit(new Runnable() { 513 @Override 514 public void run() { 515 float[] temp = new float[slices]; 516 for (int r = firstRow; r < lastRow; r++) { 517 int idx1 = r * rowStride; 518 for (int c = 0; c < columns; c++) { 519 for (int s = 0; s < slices; s++) { 520 int idx3 = s * sliceStride + idx1 + c; 521 temp[s] = a[idx3]; 522 } 523 dhtSlices.inverse(temp, scale); 524 for (int s = 0; s < slices; s++) { 525 int idx3 = s * sliceStride + idx1 + c; 526 a[idx3] = temp[s]; 527 } 528 } 529 } 530 } 531 }); 532 } 533 ConcurrencyUtils.waitForCompletion(futures); 534 535 } else { 536 for (int s = 0; s < slices; s++) { 537 int idx1 = s * sliceStride; 538 for (int r = 0; r < rows; r++) { 539 dhtColumns.inverse(a, idx1 + r * rowStride, scale); 540 } 541 } 542 float[] temp = new float[rows]; 543 for (int s = 0; s < slices; s++) { 544 int idx1 = s * sliceStride; 545 for (int c = 0; c < columns; c++) { 546 for (int r = 0; r < rows; r++) { 547 int idx3 = idx1 + r * rowStride + c; 548 temp[r] = a[idx3]; 549 } 550 dhtRows.inverse(temp, scale); 551 for (int r = 0; r < rows; r++) { 552 int idx3 = idx1 + r * rowStride + c; 553 a[idx3] = temp[r]; 554 } 555 } 556 } 557 temp = new float[slices]; 558 for (int r = 0; r < rows; r++) { 559 int idx1 = r * rowStride; 560 for (int c = 0; c < columns; c++) { 561 for (int s = 0; s < slices; s++) { 562 int idx3 = s * sliceStride + idx1 + c; 563 temp[s] = a[idx3]; 564 } 565 dhtSlices.inverse(temp, scale); 566 for (int s = 0; s < slices; s++) { 567 int idx3 = s * sliceStride + idx1 + c; 568 a[idx3] = temp[s]; 569 } 570 } 571 } 572 } 573 yTransform(a); 574 } 575 } 576 577 /** 578 * Computes the 3D real, inverse DHT leaving the result in <code>a</code>. 579 * The data is stored in 3D array. 580 * 581 * @param a 582 * data to transform 583 * @param scale 584 * if true then scaling is performed 585 */ 586 public void inverse(final float[][][] a, final boolean scale) { 587 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 588 if (isPowerOfTwo) { 589 if (nthreads != oldNthreads) { 590 nt = slices; 591 if (nt < rows) { 592 nt = rows; 593 } 594 nt *= 4; 595 if (nthreads > 1) { 596 nt *= nthreads; 597 } 598 if (columns == 2) { 599 nt >>= 1; 600 } 601 t = new float[nt]; 602 oldNthreads = nthreads; 603 } 604 if ((nthreads > 1) && useThreads) { 605 ddxt3da_subth(1, a, scale); 606 ddxt3db_subth(1, a, scale); 607 } else { 608 ddxt3da_sub(1, a, scale); 609 ddxt3db_sub(1, a, scale); 610 } 611 yTransform(a); 612 } else { 613 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 614 Future<?>[] futures = new Future[nthreads]; 615 int p = slices / nthreads; 616 for (int l = 0; l < nthreads; l++) { 617 final int firstSlice = l * p; 618 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 619 futures[l] = ConcurrencyUtils.submit(new Runnable() { 620 @Override 621 public void run() { 622 for (int s = firstSlice; s < lastSlice; s++) { 623 for (int r = 0; r < rows; r++) { 624 dhtColumns.inverse(a[s][r], scale); 625 } 626 } 627 } 628 }); 629 } 630 ConcurrencyUtils.waitForCompletion(futures); 631 632 for (int l = 0; l < nthreads; l++) { 633 final int firstSlice = l * p; 634 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 635 futures[l] = ConcurrencyUtils.submit(new Runnable() { 636 @Override 637 public void run() { 638 float[] temp = new float[rows]; 639 for (int s = firstSlice; s < lastSlice; s++) { 640 for (int c = 0; c < columns; c++) { 641 for (int r = 0; r < rows; r++) { 642 temp[r] = a[s][r][c]; 643 } 644 dhtRows.inverse(temp, scale); 645 for (int r = 0; r < rows; r++) { 646 a[s][r][c] = temp[r]; 647 } 648 } 649 } 650 } 651 }); 652 } 653 ConcurrencyUtils.waitForCompletion(futures); 654 655 p = rows / nthreads; 656 for (int l = 0; l < nthreads; l++) { 657 final int firstRow = l * p; 658 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 659 futures[l] = ConcurrencyUtils.submit(new Runnable() { 660 @Override 661 public void run() { 662 float[] temp = new float[slices]; 663 for (int r = firstRow; r < lastRow; r++) { 664 for (int c = 0; c < columns; c++) { 665 for (int s = 0; s < slices; s++) { 666 temp[s] = a[s][r][c]; 667 } 668 dhtSlices.inverse(temp, scale); 669 for (int s = 0; s < slices; s++) { 670 a[s][r][c] = temp[s]; 671 } 672 } 673 } 674 } 675 }); 676 } 677 ConcurrencyUtils.waitForCompletion(futures); 678 679 } else { 680 for (int s = 0; s < slices; s++) { 681 for (int r = 0; r < rows; r++) { 682 dhtColumns.inverse(a[s][r], scale); 683 } 684 } 685 float[] temp = new float[rows]; 686 for (int s = 0; s < slices; s++) { 687 for (int c = 0; c < columns; c++) { 688 for (int r = 0; r < rows; r++) { 689 temp[r] = a[s][r][c]; 690 } 691 dhtRows.inverse(temp, scale); 692 for (int r = 0; r < rows; r++) { 693 a[s][r][c] = temp[r]; 694 } 695 } 696 } 697 temp = new float[slices]; 698 for (int r = 0; r < rows; r++) { 699 for (int c = 0; c < columns; c++) { 700 for (int s = 0; s < slices; s++) { 701 temp[s] = a[s][r][c]; 702 } 703 dhtSlices.inverse(temp, scale); 704 for (int s = 0; s < slices; s++) { 705 a[s][r][c] = temp[s]; 706 } 707 } 708 } 709 } 710 yTransform(a); 711 } 712 } 713 714 private void ddxt3da_sub(int isgn, float[] a, boolean scale) { 715 int idx0, idx1, idx2; 716 717 if (isgn == -1) { 718 for (int s = 0; s < slices; s++) { 719 idx0 = s * sliceStride; 720 for (int r = 0; r < rows; r++) { 721 dhtColumns.forward(a, idx0 + r * rowStride); 722 } 723 if (columns > 2) { 724 for (int c = 0; c < columns; c += 4) { 725 for (int r = 0; r < rows; r++) { 726 idx1 = idx0 + r * rowStride + c; 727 idx2 = rows + r; 728 t[r] = a[idx1]; 729 t[idx2] = a[idx1 + 1]; 730 t[idx2 + rows] = a[idx1 + 2]; 731 t[idx2 + 2 * rows] = a[idx1 + 3]; 732 } 733 dhtRows.forward(t, 0); 734 dhtRows.forward(t, rows); 735 dhtRows.forward(t, 2 * rows); 736 dhtRows.forward(t, 3 * rows); 737 for (int r = 0; r < rows; r++) { 738 idx1 = idx0 + r * rowStride + c; 739 idx2 = rows + r; 740 a[idx1] = t[r]; 741 a[idx1 + 1] = t[idx2]; 742 a[idx1 + 2] = t[idx2 + rows]; 743 a[idx1 + 3] = t[idx2 + 2 * rows]; 744 } 745 } 746 } else if (columns == 2) { 747 for (int r = 0; r < rows; r++) { 748 idx1 = idx0 + r * rowStride; 749 t[r] = a[idx1]; 750 t[rows + r] = a[idx1 + 1]; 751 } 752 dhtRows.forward(t, 0); 753 dhtRows.forward(t, rows); 754 for (int r = 0; r < rows; r++) { 755 idx1 = idx0 + r * rowStride; 756 a[idx1] = t[r]; 757 a[idx1 + 1] = t[rows + r]; 758 } 759 } 760 } 761 } else { 762 for (int s = 0; s < slices; s++) { 763 idx0 = s * sliceStride; 764 for (int r = 0; r < rows; r++) { 765 dhtColumns.inverse(a, idx0 + r * rowStride, scale); 766 } 767 if (columns > 2) { 768 for (int c = 0; c < columns; c += 4) { 769 for (int r = 0; r < rows; r++) { 770 idx1 = idx0 + r * rowStride + c; 771 idx2 = rows + r; 772 t[r] = a[idx1]; 773 t[idx2] = a[idx1 + 1]; 774 t[idx2 + rows] = a[idx1 + 2]; 775 t[idx2 + 2 * rows] = a[idx1 + 3]; 776 } 777 dhtRows.inverse(t, 0, scale); 778 dhtRows.inverse(t, rows, scale); 779 dhtRows.inverse(t, 2 * rows, scale); 780 dhtRows.inverse(t, 3 * rows, scale); 781 for (int r = 0; r < rows; r++) { 782 idx1 = idx0 + r * rowStride + c; 783 idx2 = rows + r; 784 a[idx1] = t[r]; 785 a[idx1 + 1] = t[idx2]; 786 a[idx1 + 2] = t[idx2 + rows]; 787 a[idx1 + 3] = t[idx2 + 2 * rows]; 788 } 789 } 790 } else if (columns == 2) { 791 for (int r = 0; r < rows; r++) { 792 idx1 = idx0 + r * rowStride; 793 t[r] = a[idx1]; 794 t[rows + r] = a[idx1 + 1]; 795 } 796 dhtRows.inverse(t, 0, scale); 797 dhtRows.inverse(t, rows, scale); 798 for (int r = 0; r < rows; r++) { 799 idx1 = idx0 + r * rowStride; 800 a[idx1] = t[r]; 801 a[idx1 + 1] = t[rows + r]; 802 } 803 } 804 } 805 } 806 } 807 808 private void ddxt3da_sub(int isgn, float[][][] a, boolean scale) { 809 int idx2; 810 811 if (isgn == -1) { 812 for (int s = 0; s < slices; s++) { 813 for (int r = 0; r < rows; r++) { 814 dhtColumns.forward(a[s][r]); 815 } 816 if (columns > 2) { 817 for (int c = 0; c < columns; c += 4) { 818 for (int r = 0; r < rows; r++) { 819 idx2 = rows + r; 820 t[r] = a[s][r][c]; 821 t[idx2] = a[s][r][c + 1]; 822 t[idx2 + rows] = a[s][r][c + 2]; 823 t[idx2 + 2 * rows] = a[s][r][c + 3]; 824 } 825 dhtRows.forward(t, 0); 826 dhtRows.forward(t, rows); 827 dhtRows.forward(t, 2 * rows); 828 dhtRows.forward(t, 3 * rows); 829 for (int r = 0; r < rows; r++) { 830 idx2 = rows + r; 831 a[s][r][c] = t[r]; 832 a[s][r][c + 1] = t[idx2]; 833 a[s][r][c + 2] = t[idx2 + rows]; 834 a[s][r][c + 3] = t[idx2 + 2 * rows]; 835 } 836 } 837 } else if (columns == 2) { 838 for (int r = 0; r < rows; r++) { 839 t[r] = a[s][r][0]; 840 t[rows + r] = a[s][r][1]; 841 } 842 dhtRows.forward(t, 0); 843 dhtRows.forward(t, rows); 844 for (int r = 0; r < rows; r++) { 845 a[s][r][0] = t[r]; 846 a[s][r][1] = t[rows + r]; 847 } 848 } 849 } 850 } else { 851 for (int s = 0; s < slices; s++) { 852 for (int r = 0; r < rows; r++) { 853 dhtColumns.inverse(a[s][r], scale); 854 } 855 if (columns > 2) { 856 for (int c = 0; c < columns; c += 4) { 857 for (int r = 0; r < rows; r++) { 858 idx2 = rows + r; 859 t[r] = a[s][r][c]; 860 t[idx2] = a[s][r][c + 1]; 861 t[idx2 + rows] = a[s][r][c + 2]; 862 t[idx2 + 2 * rows] = a[s][r][c + 3]; 863 } 864 dhtRows.inverse(t, 0, scale); 865 dhtRows.inverse(t, rows, scale); 866 dhtRows.inverse(t, 2 * rows, scale); 867 dhtRows.inverse(t, 3 * rows, scale); 868 for (int r = 0; r < rows; r++) { 869 idx2 = rows + r; 870 a[s][r][c] = t[r]; 871 a[s][r][c + 1] = t[idx2]; 872 a[s][r][c + 2] = t[idx2 + rows]; 873 a[s][r][c + 3] = t[idx2 + 2 * rows]; 874 } 875 } 876 } else if (columns == 2) { 877 for (int r = 0; r < rows; r++) { 878 t[r] = a[s][r][0]; 879 t[rows + r] = a[s][r][1]; 880 } 881 dhtRows.inverse(t, 0, scale); 882 dhtRows.inverse(t, rows, scale); 883 for (int r = 0; r < rows; r++) { 884 a[s][r][0] = t[r]; 885 a[s][r][1] = t[rows + r]; 886 } 887 } 888 } 889 } 890 } 891 892 private void ddxt3db_sub(int isgn, float[] a, boolean scale) { 893 int idx0, idx1, idx2; 894 895 if (isgn == -1) { 896 if (columns > 2) { 897 for (int r = 0; r < rows; r++) { 898 idx0 = r * rowStride; 899 for (int c = 0; c < columns; c += 4) { 900 for (int s = 0; s < slices; s++) { 901 idx1 = s * sliceStride + idx0 + c; 902 idx2 = slices + s; 903 t[s] = a[idx1]; 904 t[idx2] = a[idx1 + 1]; 905 t[idx2 + slices] = a[idx1 + 2]; 906 t[idx2 + 2 * slices] = a[idx1 + 3]; 907 } 908 dhtSlices.forward(t, 0); 909 dhtSlices.forward(t, slices); 910 dhtSlices.forward(t, 2 * slices); 911 dhtSlices.forward(t, 3 * slices); 912 for (int s = 0; s < slices; s++) { 913 idx1 = s * sliceStride + idx0 + c; 914 idx2 = slices + s; 915 a[idx1] = t[s]; 916 a[idx1 + 1] = t[idx2]; 917 a[idx1 + 2] = t[idx2 + slices]; 918 a[idx1 + 3] = t[idx2 + 2 * slices]; 919 } 920 } 921 } 922 } else if (columns == 2) { 923 for (int r = 0; r < rows; r++) { 924 idx0 = r * rowStride; 925 for (int s = 0; s < slices; s++) { 926 idx1 = s * sliceStride + idx0; 927 t[s] = a[idx1]; 928 t[slices + s] = a[idx1 + 1]; 929 } 930 dhtSlices.forward(t, 0); 931 dhtSlices.forward(t, slices); 932 for (int s = 0; s < slices; s++) { 933 idx1 = s * sliceStride + idx0; 934 a[idx1] = t[s]; 935 a[idx1 + 1] = t[slices + s]; 936 } 937 } 938 } 939 } else { 940 if (columns > 2) { 941 for (int r = 0; r < rows; r++) { 942 idx0 = r * rowStride; 943 for (int c = 0; c < columns; c += 4) { 944 for (int s = 0; s < slices; s++) { 945 idx1 = s * sliceStride + idx0 + c; 946 idx2 = slices + s; 947 t[s] = a[idx1]; 948 t[idx2] = a[idx1 + 1]; 949 t[idx2 + slices] = a[idx1 + 2]; 950 t[idx2 + 2 * slices] = a[idx1 + 3]; 951 } 952 dhtSlices.inverse(t, 0, scale); 953 dhtSlices.inverse(t, slices, scale); 954 dhtSlices.inverse(t, 2 * slices, scale); 955 dhtSlices.inverse(t, 3 * slices, scale); 956 957 for (int s = 0; s < slices; s++) { 958 idx1 = s * sliceStride + idx0 + c; 959 idx2 = slices + s; 960 a[idx1] = t[s]; 961 a[idx1 + 1] = t[idx2]; 962 a[idx1 + 2] = t[idx2 + slices]; 963 a[idx1 + 3] = t[idx2 + 2 * slices]; 964 } 965 } 966 } 967 } else if (columns == 2) { 968 for (int r = 0; r < rows; r++) { 969 idx0 = r * rowStride; 970 for (int s = 0; s < slices; s++) { 971 idx1 = s * sliceStride + idx0; 972 t[s] = a[idx1]; 973 t[slices + s] = a[idx1 + 1]; 974 } 975 dhtSlices.inverse(t, 0, scale); 976 dhtSlices.inverse(t, slices, scale); 977 for (int s = 0; s < slices; s++) { 978 idx1 = s * sliceStride + idx0; 979 a[idx1] = t[s]; 980 a[idx1 + 1] = t[slices + s]; 981 } 982 } 983 } 984 } 985 } 986 987 private void ddxt3db_sub(int isgn, float[][][] a, boolean scale) { 988 int idx2; 989 990 if (isgn == -1) { 991 if (columns > 2) { 992 for (int r = 0; r < rows; r++) { 993 for (int c = 0; c < columns; c += 4) { 994 for (int s = 0; s < slices; s++) { 995 idx2 = slices + s; 996 t[s] = a[s][r][c]; 997 t[idx2] = a[s][r][c + 1]; 998 t[idx2 + slices] = a[s][r][c + 2]; 999 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1000 } 1001 dhtSlices.forward(t, 0); 1002 dhtSlices.forward(t, slices); 1003 dhtSlices.forward(t, 2 * slices); 1004 dhtSlices.forward(t, 3 * slices); 1005 for (int s = 0; s < slices; s++) { 1006 idx2 = slices + s; 1007 a[s][r][c] = t[s]; 1008 a[s][r][c + 1] = t[idx2]; 1009 a[s][r][c + 2] = t[idx2 + slices]; 1010 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1011 } 1012 } 1013 } 1014 } else if (columns == 2) { 1015 for (int r = 0; r < rows; r++) { 1016 for (int s = 0; s < slices; s++) { 1017 t[s] = a[s][r][0]; 1018 t[slices + s] = a[s][r][1]; 1019 } 1020 dhtSlices.forward(t, 0); 1021 dhtSlices.forward(t, slices); 1022 for (int s = 0; s < slices; s++) { 1023 a[s][r][0] = t[s]; 1024 a[s][r][1] = t[slices + s]; 1025 } 1026 } 1027 } 1028 } else { 1029 if (columns > 2) { 1030 for (int r = 0; r < rows; r++) { 1031 for (int c = 0; c < columns; c += 4) { 1032 for (int s = 0; s < slices; s++) { 1033 idx2 = slices + s; 1034 t[s] = a[s][r][c]; 1035 t[idx2] = a[s][r][c + 1]; 1036 t[idx2 + slices] = a[s][r][c + 2]; 1037 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1038 } 1039 dhtSlices.inverse(t, 0, scale); 1040 dhtSlices.inverse(t, slices, scale); 1041 dhtSlices.inverse(t, 2 * slices, scale); 1042 dhtSlices.inverse(t, 3 * slices, scale); 1043 1044 for (int s = 0; s < slices; s++) { 1045 idx2 = slices + s; 1046 a[s][r][c] = t[s]; 1047 a[s][r][c + 1] = t[idx2]; 1048 a[s][r][c + 2] = t[idx2 + slices]; 1049 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1050 } 1051 } 1052 } 1053 } else if (columns == 2) { 1054 for (int r = 0; r < rows; r++) { 1055 for (int s = 0; s < slices; s++) { 1056 t[s] = a[s][r][0]; 1057 t[slices + s] = a[s][r][1]; 1058 } 1059 dhtSlices.inverse(t, 0, scale); 1060 dhtSlices.inverse(t, slices, scale); 1061 for (int s = 0; s < slices; s++) { 1062 a[s][r][0] = t[s]; 1063 a[s][r][1] = t[slices + s]; 1064 } 1065 } 1066 } 1067 } 1068 } 1069 1070 private void ddxt3da_subth(final int isgn, final float[] a, final boolean scale) { 1071 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1072 int nt = 4 * rows; 1073 if (columns == 2) { 1074 nt >>= 1; 1075 } 1076 Future<?>[] futures = new Future[nthreads]; 1077 1078 for (int i = 0; i < nthreads; i++) { 1079 final int n0 = i; 1080 final int startt = nt * i; 1081 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1082 1083 @Override 1084 public void run() { 1085 int idx0, idx1, idx2; 1086 if (isgn == -1) { 1087 for (int s = n0; s < slices; s += nthreads) { 1088 idx0 = s * sliceStride; 1089 for (int r = 0; r < rows; r++) { 1090 dhtColumns.forward(a, idx0 + r * rowStride); 1091 } 1092 if (columns > 2) { 1093 for (int c = 0; c < columns; c += 4) { 1094 for (int r = 0; r < rows; r++) { 1095 idx1 = idx0 + r * rowStride + c; 1096 idx2 = startt + rows + r; 1097 t[startt + r] = a[idx1]; 1098 t[idx2] = a[idx1 + 1]; 1099 t[idx2 + rows] = a[idx1 + 2]; 1100 t[idx2 + 2 * rows] = a[idx1 + 3]; 1101 } 1102 dhtRows.forward(t, startt); 1103 dhtRows.forward(t, startt + rows); 1104 dhtRows.forward(t, startt + 2 * rows); 1105 dhtRows.forward(t, startt + 3 * rows); 1106 for (int r = 0; r < rows; r++) { 1107 idx1 = idx0 + r * rowStride + c; 1108 idx2 = startt + rows + r; 1109 a[idx1] = t[startt + r]; 1110 a[idx1 + 1] = t[idx2]; 1111 a[idx1 + 2] = t[idx2 + rows]; 1112 a[idx1 + 3] = t[idx2 + 2 * rows]; 1113 } 1114 } 1115 } else if (columns == 2) { 1116 for (int r = 0; r < rows; r++) { 1117 idx1 = idx0 + r * rowStride; 1118 t[startt + r] = a[idx1]; 1119 t[startt + rows + r] = a[idx1 + 1]; 1120 } 1121 dhtRows.forward(t, startt); 1122 dhtRows.forward(t, startt + rows); 1123 for (int r = 0; r < rows; r++) { 1124 idx1 = idx0 + r * rowStride; 1125 a[idx1] = t[startt + r]; 1126 a[idx1 + 1] = t[startt + rows + r]; 1127 } 1128 } 1129 } 1130 } else { 1131 for (int s = n0; s < slices; s += nthreads) { 1132 idx0 = s * sliceStride; 1133 for (int r = 0; r < rows; r++) { 1134 dhtColumns.inverse(a, idx0 + r * rowStride, scale); 1135 } 1136 if (columns > 2) { 1137 for (int c = 0; c < columns; c += 4) { 1138 for (int r = 0; r < rows; r++) { 1139 idx1 = idx0 + r * rowStride + c; 1140 idx2 = startt + rows + r; 1141 t[startt + r] = a[idx1]; 1142 t[idx2] = a[idx1 + 1]; 1143 t[idx2 + rows] = a[idx1 + 2]; 1144 t[idx2 + 2 * rows] = a[idx1 + 3]; 1145 } 1146 dhtRows.inverse(t, startt, scale); 1147 dhtRows.inverse(t, startt + rows, scale); 1148 dhtRows.inverse(t, startt + 2 * rows, scale); 1149 dhtRows.inverse(t, startt + 3 * rows, scale); 1150 for (int r = 0; r < rows; r++) { 1151 idx1 = idx0 + r * rowStride + c; 1152 idx2 = startt + rows + r; 1153 a[idx1] = t[startt + r]; 1154 a[idx1 + 1] = t[idx2]; 1155 a[idx1 + 2] = t[idx2 + rows]; 1156 a[idx1 + 3] = t[idx2 + 2 * rows]; 1157 } 1158 } 1159 } else if (columns == 2) { 1160 for (int r = 0; r < rows; r++) { 1161 idx1 = idx0 + r * rowStride; 1162 t[startt + r] = a[idx1]; 1163 t[startt + rows + r] = a[idx1 + 1]; 1164 } 1165 dhtRows.inverse(t, startt, scale); 1166 dhtRows.inverse(t, startt + rows, scale); 1167 for (int r = 0; r < rows; r++) { 1168 idx1 = idx0 + r * rowStride; 1169 a[idx1] = t[startt + r]; 1170 a[idx1 + 1] = t[startt + rows + r]; 1171 } 1172 } 1173 } 1174 } 1175 } 1176 }); 1177 } 1178 ConcurrencyUtils.waitForCompletion(futures); 1179 } 1180 1181 private void ddxt3da_subth(final int isgn, final float[][][] a, final boolean scale) { 1182 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads(); 1183 int nt = 4 * rows; 1184 if (columns == 2) { 1185 nt >>= 1; 1186 } 1187 Future<?>[] futures = new Future[nthreads]; 1188 1189 for (int i = 0; i < nthreads; i++) { 1190 final int n0 = i; 1191 final int startt = nt * i; 1192 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1193 1194 @Override 1195 public void run() { 1196 int idx2; 1197 if (isgn == -1) { 1198 for (int s = n0; s < slices; s += nthreads) { 1199 for (int r = 0; r < rows; r++) { 1200 dhtColumns.forward(a[s][r]); 1201 } 1202 if (columns > 2) { 1203 for (int c = 0; c < columns; c += 4) { 1204 for (int r = 0; r < rows; r++) { 1205 idx2 = startt + rows + r; 1206 t[startt + r] = a[s][r][c]; 1207 t[idx2] = a[s][r][c + 1]; 1208 t[idx2 + rows] = a[s][r][c + 2]; 1209 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1210 } 1211 dhtRows.forward(t, startt); 1212 dhtRows.forward(t, startt + rows); 1213 dhtRows.forward(t, startt + 2 * rows); 1214 dhtRows.forward(t, startt + 3 * rows); 1215 for (int r = 0; r < rows; r++) { 1216 idx2 = startt + rows + r; 1217 a[s][r][c] = t[startt + r]; 1218 a[s][r][c + 1] = t[idx2]; 1219 a[s][r][c + 2] = t[idx2 + rows]; 1220 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1221 } 1222 } 1223 } else if (columns == 2) { 1224 for (int r = 0; r < rows; r++) { 1225 t[startt + r] = a[s][r][0]; 1226 t[startt + rows + r] = a[s][r][1]; 1227 } 1228 dhtRows.forward(t, startt); 1229 dhtRows.forward(t, startt + rows); 1230 for (int r = 0; r < rows; r++) { 1231 a[s][r][0] = t[startt + r]; 1232 a[s][r][1] = t[startt + rows + r]; 1233 } 1234 } 1235 } 1236 } else { 1237 for (int s = n0; s < slices; s += nthreads) { 1238 for (int r = 0; r < rows; r++) { 1239 dhtColumns.inverse(a[s][r], scale); 1240 } 1241 if (columns > 2) { 1242 for (int c = 0; c < columns; c += 4) { 1243 for (int r = 0; r < rows; r++) { 1244 idx2 = startt + rows + r; 1245 t[startt + r] = a[s][r][c]; 1246 t[idx2] = a[s][r][c + 1]; 1247 t[idx2 + rows] = a[s][r][c + 2]; 1248 t[idx2 + 2 * rows] = a[s][r][c + 3]; 1249 } 1250 dhtRows.inverse(t, startt, scale); 1251 dhtRows.inverse(t, startt + rows, scale); 1252 dhtRows.inverse(t, startt + 2 * rows, scale); 1253 dhtRows.inverse(t, startt + 3 * rows, scale); 1254 for (int r = 0; r < rows; r++) { 1255 idx2 = startt + rows + r; 1256 a[s][r][c] = t[startt + r]; 1257 a[s][r][c + 1] = t[idx2]; 1258 a[s][r][c + 2] = t[idx2 + rows]; 1259 a[s][r][c + 3] = t[idx2 + 2 * rows]; 1260 } 1261 } 1262 } else if (columns == 2) { 1263 for (int r = 0; r < rows; r++) { 1264 t[startt + r] = a[s][r][0]; 1265 t[startt + rows + r] = a[s][r][1]; 1266 } 1267 dhtRows.inverse(t, startt, scale); 1268 dhtRows.inverse(t, startt + rows, scale); 1269 for (int r = 0; r < rows; r++) { 1270 a[s][r][0] = t[startt + r]; 1271 a[s][r][1] = t[startt + rows + r]; 1272 } 1273 } 1274 } 1275 } 1276 } 1277 }); 1278 } 1279 ConcurrencyUtils.waitForCompletion(futures); 1280 } 1281 1282 private void ddxt3db_subth(final int isgn, final float[] a, final boolean scale) { 1283 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1284 int nt = 4 * slices; 1285 if (columns == 2) { 1286 nt >>= 1; 1287 } 1288 Future<?>[] futures = new Future[nthreads]; 1289 1290 for (int i = 0; i < nthreads; i++) { 1291 final int n0 = i; 1292 final int startt = nt * i; 1293 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1294 1295 @Override 1296 public void run() { 1297 int idx0, idx1, idx2; 1298 if (isgn == -1) { 1299 if (columns > 2) { 1300 for (int r = n0; r < rows; r += nthreads) { 1301 idx0 = r * rowStride; 1302 for (int c = 0; c < columns; c += 4) { 1303 for (int s = 0; s < slices; s++) { 1304 idx1 = s * sliceStride + idx0 + c; 1305 idx2 = startt + slices + s; 1306 t[startt + s] = a[idx1]; 1307 t[idx2] = a[idx1 + 1]; 1308 t[idx2 + slices] = a[idx1 + 2]; 1309 t[idx2 + 2 * slices] = a[idx1 + 3]; 1310 } 1311 dhtSlices.forward(t, startt); 1312 dhtSlices.forward(t, startt + slices); 1313 dhtSlices.forward(t, startt + 2 * slices); 1314 dhtSlices.forward(t, startt + 3 * slices); 1315 for (int s = 0; s < slices; s++) { 1316 idx1 = s * sliceStride + idx0 + c; 1317 idx2 = startt + slices + s; 1318 a[idx1] = t[startt + s]; 1319 a[idx1 + 1] = t[idx2]; 1320 a[idx1 + 2] = t[idx2 + slices]; 1321 a[idx1 + 3] = t[idx2 + 2 * slices]; 1322 } 1323 } 1324 } 1325 } else if (columns == 2) { 1326 for (int r = n0; r < rows; r += nthreads) { 1327 idx0 = r * rowStride; 1328 for (int s = 0; s < slices; s++) { 1329 idx1 = s * sliceStride + idx0; 1330 t[startt + s] = a[idx1]; 1331 t[startt + slices + s] = a[idx1 + 1]; 1332 } 1333 dhtSlices.forward(t, startt); 1334 dhtSlices.forward(t, startt + slices); 1335 for (int s = 0; s < slices; s++) { 1336 idx1 = s * sliceStride + idx0; 1337 a[idx1] = t[startt + s]; 1338 a[idx1 + 1] = t[startt + slices + s]; 1339 } 1340 } 1341 } 1342 } else { 1343 if (columns > 2) { 1344 for (int r = n0; r < rows; r += nthreads) { 1345 idx0 = r * rowStride; 1346 for (int c = 0; c < columns; c += 4) { 1347 for (int s = 0; s < slices; s++) { 1348 idx1 = s * sliceStride + idx0 + c; 1349 idx2 = startt + slices + s; 1350 t[startt + s] = a[idx1]; 1351 t[idx2] = a[idx1 + 1]; 1352 t[idx2 + slices] = a[idx1 + 2]; 1353 t[idx2 + 2 * slices] = a[idx1 + 3]; 1354 } 1355 dhtSlices.inverse(t, startt, scale); 1356 dhtSlices.inverse(t, startt + slices, scale); 1357 dhtSlices.inverse(t, startt + 2 * slices, scale); 1358 dhtSlices.inverse(t, startt + 3 * slices, scale); 1359 for (int s = 0; s < slices; s++) { 1360 idx1 = s * sliceStride + idx0 + c; 1361 idx2 = startt + slices + s; 1362 a[idx1] = t[startt + s]; 1363 a[idx1 + 1] = t[idx2]; 1364 a[idx1 + 2] = t[idx2 + slices]; 1365 a[idx1 + 3] = t[idx2 + 2 * slices]; 1366 } 1367 } 1368 } 1369 } else if (columns == 2) { 1370 for (int r = n0; r < rows; r += nthreads) { 1371 idx0 = r * rowStride; 1372 for (int s = 0; s < slices; s++) { 1373 idx1 = s * sliceStride + idx0; 1374 t[startt + s] = a[idx1]; 1375 t[startt + slices + s] = a[idx1 + 1]; 1376 } 1377 dhtSlices.inverse(t, startt, scale); 1378 dhtSlices.inverse(t, startt + slices, scale); 1379 for (int s = 0; s < slices; s++) { 1380 idx1 = s * sliceStride + idx0; 1381 a[idx1] = t[startt + s]; 1382 a[idx1 + 1] = t[startt + slices + s]; 1383 } 1384 } 1385 } 1386 } 1387 } 1388 }); 1389 } 1390 ConcurrencyUtils.waitForCompletion(futures); 1391 } 1392 1393 private void ddxt3db_subth(final int isgn, final float[][][] a, final boolean scale) { 1394 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 1395 int nt = 4 * slices; 1396 if (columns == 2) { 1397 nt >>= 1; 1398 } 1399 Future<?>[] futures = new Future[nthreads]; 1400 1401 for (int i = 0; i < nthreads; i++) { 1402 final int n0 = i; 1403 final int startt = nt * i; 1404 futures[i] = ConcurrencyUtils.submit(new Runnable() { 1405 1406 @Override 1407 public void run() { 1408 int idx2; 1409 if (isgn == -1) { 1410 if (columns > 2) { 1411 for (int r = n0; r < rows; r += nthreads) { 1412 for (int c = 0; c < columns; c += 4) { 1413 for (int s = 0; s < slices; s++) { 1414 idx2 = startt + slices + s; 1415 t[startt + s] = a[s][r][c]; 1416 t[idx2] = a[s][r][c + 1]; 1417 t[idx2 + slices] = a[s][r][c + 2]; 1418 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1419 } 1420 dhtSlices.forward(t, startt); 1421 dhtSlices.forward(t, startt + slices); 1422 dhtSlices.forward(t, startt + 2 * slices); 1423 dhtSlices.forward(t, startt + 3 * slices); 1424 for (int s = 0; s < slices; s++) { 1425 idx2 = startt + slices + s; 1426 a[s][r][c] = t[startt + s]; 1427 a[s][r][c + 1] = t[idx2]; 1428 a[s][r][c + 2] = t[idx2 + slices]; 1429 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1430 } 1431 } 1432 } 1433 } else if (columns == 2) { 1434 for (int r = n0; r < rows; r += nthreads) { 1435 for (int s = 0; s < slices; s++) { 1436 t[startt + s] = a[s][r][0]; 1437 t[startt + slices + s] = a[s][r][1]; 1438 } 1439 dhtSlices.forward(t, startt); 1440 dhtSlices.forward(t, startt + slices); 1441 for (int s = 0; s < slices; s++) { 1442 a[s][r][0] = t[startt + s]; 1443 a[s][r][1] = t[startt + slices + s]; 1444 } 1445 } 1446 } 1447 } else { 1448 if (columns > 2) { 1449 for (int r = n0; r < rows; r += nthreads) { 1450 for (int c = 0; c < columns; c += 4) { 1451 for (int s = 0; s < slices; s++) { 1452 idx2 = startt + slices + s; 1453 t[startt + s] = a[s][r][c]; 1454 t[idx2] = a[s][r][c + 1]; 1455 t[idx2 + slices] = a[s][r][c + 2]; 1456 t[idx2 + 2 * slices] = a[s][r][c + 3]; 1457 } 1458 dhtSlices.inverse(t, startt, scale); 1459 dhtSlices.inverse(t, startt + slices, scale); 1460 dhtSlices.inverse(t, startt + 2 * slices, scale); 1461 dhtSlices.inverse(t, startt + 3 * slices, scale); 1462 for (int s = 0; s < slices; s++) { 1463 idx2 = startt + slices + s; 1464 a[s][r][c] = t[startt + s]; 1465 a[s][r][c + 1] = t[idx2]; 1466 a[s][r][c + 2] = t[idx2 + slices]; 1467 a[s][r][c + 3] = t[idx2 + 2 * slices]; 1468 } 1469 } 1470 } 1471 } else if (columns == 2) { 1472 for (int r = n0; r < rows; r += nthreads) { 1473 for (int s = 0; s < slices; s++) { 1474 t[startt + s] = a[s][r][0]; 1475 t[startt + slices + s] = a[s][r][1]; 1476 } 1477 dhtSlices.inverse(t, startt, scale); 1478 dhtSlices.inverse(t, startt + slices, scale); 1479 1480 for (int s = 0; s < slices; s++) { 1481 a[s][r][0] = t[startt + s]; 1482 a[s][r][1] = t[startt + slices + s]; 1483 } 1484 } 1485 } 1486 } 1487 } 1488 }); 1489 } 1490 ConcurrencyUtils.waitForCompletion(futures); 1491 } 1492 1493 private void yTransform(float[] a) { 1494 float A, B, C, D, E, F, G, H; 1495 int cC, rC, sC; 1496 int idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10, idx11, idx12; 1497 for (int s = 0; s <= slices / 2; s++) { 1498 sC = (slices - s) % slices; 1499 idx9 = s * sliceStride; 1500 idx10 = sC * sliceStride; 1501 for (int r = 0; r <= rows / 2; r++) { 1502 rC = (rows - r) % rows; 1503 idx11 = r * rowStride; 1504 idx12 = rC * rowStride; 1505 for (int c = 0; c <= columns / 2; c++) { 1506 cC = (columns - c) % columns; 1507 idx1 = idx9 + idx12 + c; 1508 idx2 = idx9 + idx11 + cC; 1509 idx3 = idx10 + idx11 + c; 1510 idx4 = idx10 + idx12 + cC; 1511 idx5 = idx10 + idx12 + c; 1512 idx6 = idx10 + idx11 + cC; 1513 idx7 = idx9 + idx11 + c; 1514 idx8 = idx9 + idx12 + cC; 1515 A = a[idx1]; 1516 B = a[idx2]; 1517 C = a[idx3]; 1518 D = a[idx4]; 1519 E = a[idx5]; 1520 F = a[idx6]; 1521 G = a[idx7]; 1522 H = a[idx8]; 1523 a[idx7] = (A + B + C - D) / 2; 1524 a[idx3] = (E + F + G - H) / 2; 1525 a[idx1] = (G + H + E - F) / 2; 1526 a[idx5] = (C + D + A - B) / 2; 1527 a[idx2] = (H + G + F - E) / 2; 1528 a[idx6] = (D + C + B - A) / 2; 1529 a[idx8] = (B + A + D - C) / 2; 1530 a[idx4] = (F + E + H - G) / 2; 1531 } 1532 } 1533 } 1534 } 1535 1536 private void yTransform(float[][][] a) { 1537 float A, B, C, D, E, F, G, H; 1538 int cC, rC, sC; 1539 for (int s = 0; s <= slices / 2; s++) { 1540 sC = (slices - s) % slices; 1541 for (int r = 0; r <= rows / 2; r++) { 1542 rC = (rows - r) % rows; 1543 for (int c = 0; c <= columns / 2; c++) { 1544 cC = (columns - c) % columns; 1545 A = a[s][rC][c]; 1546 B = a[s][r][cC]; 1547 C = a[sC][r][c]; 1548 D = a[sC][rC][cC]; 1549 E = a[sC][rC][c]; 1550 F = a[sC][r][cC]; 1551 G = a[s][r][c]; 1552 H = a[s][rC][cC]; 1553 a[s][r][c] = (A + B + C - D) / 2; 1554 a[sC][r][c] = (E + F + G - H) / 2; 1555 a[s][rC][c] = (G + H + E - F) / 2; 1556 a[sC][rC][c] = (C + D + A - B) / 2; 1557 a[s][r][cC] = (H + G + F - E) / 2; 1558 a[sC][r][cC] = (D + C + B - A) / 2; 1559 a[s][rC][cC] = (B + A + D - C) / 2; 1560 a[sC][rC][cC] = (F + E + H - G) / 2; 1561 } 1562 } 1563 } 1564 } 1565 1566}