001/* ***** BEGIN LICENSE BLOCK ***** 002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 003 * 004 * The contents of this file are subject to the Mozilla Public License Version 005 * 1.1 (the "License"); you may not use this file except in compliance with 006 * the License. You may obtain a copy of the License at 007 * http://www.mozilla.org/MPL/ 008 * 009 * Software distributed under the License is distributed on an "AS IS" basis, 010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 011 * for the specific language governing rights and limitations under the 012 * License. 013 * 014 * The Original Code is JTransforms. 015 * 016 * The Initial Developer of the Original Code is 017 * Piotr Wendykier, Emory University. 018 * Portions created by the Initial Developer are Copyright (C) 2007-2009 019 * the Initial Developer. All Rights Reserved. 020 * 021 * Alternatively, the contents of this file may be used under the terms of 022 * either the GNU General Public License Version 2 or later (the "GPL"), or 023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 024 * in which case the provisions of the GPL or the LGPL are applicable instead 025 * of those above. If you wish to allow use of your version of this file only 026 * under the terms of either the GPL or the LGPL, and not to allow others to 027 * use your version of this file under the terms of the MPL, indicate your 028 * decision by deleting the provisions above and replace them with the notice 029 * and other provisions required by the GPL or the LGPL. If you do not delete 030 * the provisions above, a recipient may use your version of this file under 031 * the terms of any one of the MPL, the GPL or the LGPL. 032 * 033 * ***** END LICENSE BLOCK ***** */ 034 035package edu.emory.mathcs.jtransforms.fft; 036 037import java.util.concurrent.Future; 038 039import edu.emory.mathcs.utils.ConcurrencyUtils; 040 041/** 042 * Computes 3D Discrete Fourier Transform (DFT) of complex and real, single 043 * precision data. The sizes of all three dimensions can be arbitrary numbers. 044 * This is a parallel implementation of split-radix and mixed-radix algorithms 045 * optimized for SMP systems. <br> 046 * <br> 047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura 048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 049 * 050 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 051 * 052 */ 053public strictfp class FloatFFT_3D { 054 055 private int slices; 056 057 private int rows; 058 059 private int columns; 060 061 private int sliceStride; 062 063 private int rowStride; 064 065 private float[] t; 066 067 private FloatFFT_1D fftSlices, fftRows, fftColumns; 068 069 private int oldNthreads; 070 071 private int nt; 072 073 private boolean isPowerOfTwo = false; 074 075 private boolean useThreads = false; 076 077 /** 078 * Creates new instance of FloatFFT_3D. 079 * 080 * @param slices 081 * number of slices 082 * @param rows 083 * number of rows 084 * @param columns 085 * number of columns 086 * 087 */ 088 public FloatFFT_3D(int slices, int rows, int columns) { 089 if (slices <= 1 || rows <= 1 || columns <= 1) { 090 throw new IllegalArgumentException("slices, rows and columns must be greater than 1"); 091 } 092 this.slices = slices; 093 this.rows = rows; 094 this.columns = columns; 095 this.sliceStride = rows * columns; 096 this.rowStride = columns; 097 if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) { 098 this.useThreads = true; 099 } 100 if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 101 isPowerOfTwo = true; 102 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 103 nt = slices; 104 if (nt < rows) { 105 nt = rows; 106 } 107 nt *= 8; 108 if (oldNthreads > 1) { 109 nt *= oldNthreads; 110 } 111 if (2 * columns == 4) { 112 nt >>= 1; 113 } else if (2 * columns < 4) { 114 nt >>= 2; 115 } 116 t = new float[nt]; 117 } 118 fftSlices = new FloatFFT_1D(slices); 119 if (slices == rows) { 120 fftRows = fftSlices; 121 } else { 122 fftRows = new FloatFFT_1D(rows); 123 } 124 if (slices == columns) { 125 fftColumns = fftSlices; 126 } else if (rows == columns) { 127 fftColumns = fftRows; 128 } else { 129 fftColumns = new FloatFFT_1D(columns); 130 } 131 132 } 133 134 /** 135 * Computes 3D forward DFT of complex data leaving the result in 136 * <code>a</code>. The data is stored in 1D array addressed in slice-major, 137 * then row-major, then column-major, in order of significance, i.e. element 138 * (i,j,k) of 3D array x[slices][rows][2*columns] is stored in a[i*sliceStride + 139 * j*rowStride + k], where sliceStride = rows * 2 * columns and rowStride = 2 * columns. 140 * Complex number is stored as two float values in sequence: the real and 141 * imaginary part, i.e. the input array must be of size slices*rows*2*columns. The 142 * physical layout of the input data is as follows: 143 * 144 * <pre> 145 * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3], 146 * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3], 0<=k1<slices, 0<=k2<rows, 0<=k3<columns, 147 * </pre> 148 * 149 * @param a 150 * data to transform 151 */ 152 public void complexForward(final float[] a) { 153 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 154 if (isPowerOfTwo) { 155 int oldn3 = columns; 156 columns = 2 * columns; 157 158 sliceStride = rows * columns; 159 rowStride = columns; 160 161 if (nthreads != oldNthreads) { 162 nt = slices; 163 if (nt < rows) { 164 nt = rows; 165 } 166 nt *= 8; 167 if (nthreads > 1) { 168 nt *= nthreads; 169 } 170 if (columns == 4) { 171 nt >>= 1; 172 } else if (columns < 4) { 173 nt >>= 2; 174 } 175 t = new float[nt]; 176 oldNthreads = nthreads; 177 } 178 if ((nthreads > 1) && useThreads) { 179 xdft3da_subth2(0, -1, a, true); 180 cdft3db_subth(-1, a, true); 181 } else { 182 xdft3da_sub2(0, -1, a, true); 183 cdft3db_sub(-1, a, true); 184 } 185 columns = oldn3; 186 sliceStride = rows * columns; 187 rowStride = columns; 188 } else { 189 sliceStride = 2 * rows * columns; 190 rowStride = 2 * columns; 191 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 192 Future<?>[] futures = new Future[nthreads]; 193 int p = slices / nthreads; 194 for (int l = 0; l < nthreads; l++) { 195 final int firstSlice = l * p; 196 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 197 futures[l] = ConcurrencyUtils.submit(new Runnable() { 198 @Override 199 public void run() { 200 for (int s = firstSlice; s < lastSlice; s++) { 201 int idx1 = s * sliceStride; 202 for (int r = 0; r < rows; r++) { 203 fftColumns.complexForward(a, idx1 + r * rowStride); 204 } 205 } 206 } 207 }); 208 } 209 ConcurrencyUtils.waitForCompletion(futures); 210 211 for (int l = 0; l < nthreads; l++) { 212 final int firstSlice = l * p; 213 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 214 215 futures[l] = ConcurrencyUtils.submit(new Runnable() { 216 @Override 217 public void run() { 218 float[] temp = new float[2 * rows]; 219 for (int s = firstSlice; s < lastSlice; s++) { 220 int idx1 = s * sliceStride; 221 for (int c = 0; c < columns; c++) { 222 int idx2 = 2 * c; 223 for (int r = 0; r < rows; r++) { 224 int idx3 = idx1 + idx2 + r * rowStride; 225 int idx4 = 2 * r; 226 temp[idx4] = a[idx3]; 227 temp[idx4 + 1] = a[idx3 + 1]; 228 } 229 fftRows.complexForward(temp); 230 for (int r = 0; r < rows; r++) { 231 int idx3 = idx1 + idx2 + r * rowStride; 232 int idx4 = 2 * r; 233 a[idx3] = temp[idx4]; 234 a[idx3 + 1] = temp[idx4 + 1]; 235 } 236 } 237 } 238 } 239 }); 240 } 241 ConcurrencyUtils.waitForCompletion(futures); 242 243 p = rows / nthreads; 244 for (int l = 0; l < nthreads; l++) { 245 final int firstRow = l * p; 246 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 247 248 futures[l] = ConcurrencyUtils.submit(new Runnable() { 249 @Override 250 public void run() { 251 float[] temp = new float[2 * slices]; 252 for (int r = firstRow; r < lastRow; r++) { 253 int idx1 = r * rowStride; 254 for (int c = 0; c < columns; c++) { 255 int idx2 = 2 * c; 256 for (int s = 0; s < slices; s++) { 257 int idx3 = s * sliceStride + idx1 + idx2; 258 int idx4 = 2 * s; 259 temp[idx4] = a[idx3]; 260 temp[idx4 + 1] = a[idx3 + 1]; 261 } 262 fftSlices.complexForward(temp); 263 for (int s = 0; s < slices; s++) { 264 int idx3 = s * sliceStride + idx1 + idx2; 265 int idx4 = 2 * s; 266 a[idx3] = temp[idx4]; 267 a[idx3 + 1] = temp[idx4 + 1]; 268 } 269 } 270 } 271 } 272 }); 273 } 274 ConcurrencyUtils.waitForCompletion(futures); 275 276 } else { 277 for (int s = 0; s < slices; s++) { 278 int idx1 = s * sliceStride; 279 for (int r = 0; r < rows; r++) { 280 fftColumns.complexForward(a, idx1 + r * rowStride); 281 } 282 } 283 284 float[] temp = new float[2 * rows]; 285 for (int s = 0; s < slices; s++) { 286 int idx1 = s * sliceStride; 287 for (int c = 0; c < columns; c++) { 288 int idx2 = 2 * c; 289 for (int r = 0; r < rows; r++) { 290 int idx3 = idx1 + idx2 + r * rowStride; 291 int idx4 = 2 * r; 292 temp[idx4] = a[idx3]; 293 temp[idx4 + 1] = a[idx3 + 1]; 294 } 295 fftRows.complexForward(temp); 296 for (int r = 0; r < rows; r++) { 297 int idx3 = idx1 + idx2 + r * rowStride; 298 int idx4 = 2 * r; 299 a[idx3] = temp[idx4]; 300 a[idx3 + 1] = temp[idx4 + 1]; 301 } 302 } 303 } 304 305 temp = new float[2 * slices]; 306 for (int r = 0; r < rows; r++) { 307 int idx1 = r * rowStride; 308 for (int c = 0; c < columns; c++) { 309 int idx2 = 2 * c; 310 for (int s = 0; s < slices; s++) { 311 int idx3 = s * sliceStride + idx1 + idx2; 312 int idx4 = 2 * s; 313 temp[idx4] = a[idx3]; 314 temp[idx4 + 1] = a[idx3 + 1]; 315 } 316 fftSlices.complexForward(temp); 317 for (int s = 0; s < slices; s++) { 318 int idx3 = s * sliceStride + idx1 + idx2; 319 int idx4 = 2 * s; 320 a[idx3] = temp[idx4]; 321 a[idx3 + 1] = temp[idx4 + 1]; 322 } 323 } 324 } 325 } 326 sliceStride = rows * columns; 327 rowStride = columns; 328 } 329 } 330 331 /** 332 * Computes 3D forward DFT of complex data leaving the result in 333 * <code>a</code>. The data is stored in 3D array. Complex data is 334 * represented by 2 float values in sequence: the real and imaginary part, 335 * i.e. the input array must be of size slices by rows by 2*columns. The physical 336 * layout of the input data is as follows: 337 * 338 * <pre> 339 * a[k1][k2][2*k3] = Re[k1][k2][k3], 340 * a[k1][k2][2*k3+1] = Im[k1][k2][k3], 0<=k1<slices, 0<=k2<rows, 0<=k3<columns, 341 * </pre> 342 * 343 * @param a 344 * data to transform 345 */ 346 public void complexForward(final float[][][] a) { 347 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 348 if (isPowerOfTwo) { 349 int oldn3 = columns; 350 columns = 2 * columns; 351 352 sliceStride = rows * columns; 353 rowStride = columns; 354 355 if (nthreads != oldNthreads) { 356 nt = slices; 357 if (nt < rows) { 358 nt = rows; 359 } 360 nt *= 8; 361 if (nthreads > 1) { 362 nt *= nthreads; 363 } 364 if (columns == 4) { 365 nt >>= 1; 366 } else if (columns < 4) { 367 nt >>= 2; 368 } 369 t = new float[nt]; 370 oldNthreads = nthreads; 371 } 372 if ((nthreads > 1) && useThreads) { 373 xdft3da_subth2(0, -1, a, true); 374 cdft3db_subth(-1, a, true); 375 } else { 376 xdft3da_sub2(0, -1, a, true); 377 cdft3db_sub(-1, a, true); 378 } 379 columns = oldn3; 380 sliceStride = rows * columns; 381 rowStride = columns; 382 } else { 383 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 384 Future<?>[] futures = new Future[nthreads]; 385 int p = slices / nthreads; 386 for (int l = 0; l < nthreads; l++) { 387 final int firstSlice = l * p; 388 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 389 futures[l] = ConcurrencyUtils.submit(new Runnable() { 390 @Override 391 public void run() { 392 for (int s = firstSlice; s < lastSlice; s++) { 393 for (int r = 0; r < rows; r++) { 394 fftColumns.complexForward(a[s][r]); 395 } 396 } 397 } 398 }); 399 } 400 ConcurrencyUtils.waitForCompletion(futures); 401 402 for (int l = 0; l < nthreads; l++) { 403 final int firstSlice = l * p; 404 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 405 406 futures[l] = ConcurrencyUtils.submit(new Runnable() { 407 @Override 408 public void run() { 409 float[] temp = new float[2 * rows]; 410 for (int s = firstSlice; s < lastSlice; s++) { 411 for (int c = 0; c < columns; c++) { 412 int idx2 = 2 * c; 413 for (int r = 0; r < rows; r++) { 414 int idx4 = 2 * r; 415 temp[idx4] = a[s][r][idx2]; 416 temp[idx4 + 1] = a[s][r][idx2 + 1]; 417 } 418 fftRows.complexForward(temp); 419 for (int r = 0; r < rows; r++) { 420 int idx4 = 2 * r; 421 a[s][r][idx2] = temp[idx4]; 422 a[s][r][idx2 + 1] = temp[idx4 + 1]; 423 } 424 } 425 } 426 } 427 }); 428 } 429 ConcurrencyUtils.waitForCompletion(futures); 430 431 p = rows / nthreads; 432 for (int l = 0; l < nthreads; l++) { 433 final int firstRow = l * p; 434 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 435 436 futures[l] = ConcurrencyUtils.submit(new Runnable() { 437 @Override 438 public void run() { 439 float[] temp = new float[2 * slices]; 440 for (int r = firstRow; r < lastRow; r++) { 441 for (int c = 0; c < columns; c++) { 442 int idx2 = 2 * c; 443 for (int s = 0; s < slices; s++) { 444 int idx4 = 2 * s; 445 temp[idx4] = a[s][r][idx2]; 446 temp[idx4 + 1] = a[s][r][idx2 + 1]; 447 } 448 fftSlices.complexForward(temp); 449 for (int s = 0; s < slices; s++) { 450 int idx4 = 2 * s; 451 a[s][r][idx2] = temp[idx4]; 452 a[s][r][idx2 + 1] = temp[idx4 + 1]; 453 } 454 } 455 } 456 } 457 }); 458 } 459 ConcurrencyUtils.waitForCompletion(futures); 460 461 } else { 462 for (int s = 0; s < slices; s++) { 463 for (int r = 0; r < rows; r++) { 464 fftColumns.complexForward(a[s][r]); 465 } 466 } 467 468 float[] temp = new float[2 * rows]; 469 for (int s = 0; s < slices; s++) { 470 for (int c = 0; c < columns; c++) { 471 int idx2 = 2 * c; 472 for (int r = 0; r < rows; r++) { 473 int idx4 = 2 * r; 474 temp[idx4] = a[s][r][idx2]; 475 temp[idx4 + 1] = a[s][r][idx2 + 1]; 476 } 477 fftRows.complexForward(temp); 478 for (int r = 0; r < rows; r++) { 479 int idx4 = 2 * r; 480 a[s][r][idx2] = temp[idx4]; 481 a[s][r][idx2 + 1] = temp[idx4 + 1]; 482 } 483 } 484 } 485 486 temp = new float[2 * slices]; 487 for (int r = 0; r < rows; r++) { 488 for (int c = 0; c < columns; c++) { 489 int idx2 = 2 * c; 490 for (int s = 0; s < slices; s++) { 491 int idx4 = 2 * s; 492 temp[idx4] = a[s][r][idx2]; 493 temp[idx4 + 1] = a[s][r][idx2 + 1]; 494 } 495 fftSlices.complexForward(temp); 496 for (int s = 0; s < slices; s++) { 497 int idx4 = 2 * s; 498 a[s][r][idx2] = temp[idx4]; 499 a[s][r][idx2 + 1] = temp[idx4 + 1]; 500 } 501 } 502 } 503 } 504 } 505 } 506 507 /** 508 * Computes 3D inverse DFT of complex data leaving the result in 509 * <code>a</code>. The data is stored in a 1D array addressed in 510 * slice-major, then row-major, then column-major, in order of significance, 511 * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in 512 * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and 513 * rowStride = 2 * columns. Complex number is stored as two float values in 514 * sequence: the real and imaginary part, i.e. the input array must be of 515 * size slices*rows*2*columns. The physical layout of the input data is as follows: 516 * 517 * <pre> 518 * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3], 519 * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3], 0<=k1<slices, 0<=k2<rows, 0<=k3<columns, 520 * </pre> 521 * 522 * @param a 523 * data to transform 524 * @param scale 525 * if true then scaling is performed 526 */ 527 public void complexInverse(final float[] a, final boolean scale) { 528 529 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 530 531 if (isPowerOfTwo) { 532 int oldn3 = columns; 533 columns = 2 * columns; 534 sliceStride = rows * columns; 535 rowStride = columns; 536 if (nthreads != oldNthreads) { 537 nt = slices; 538 if (nt < rows) { 539 nt = rows; 540 } 541 nt *= 8; 542 if (nthreads > 1) { 543 nt *= nthreads; 544 } 545 if (columns == 4) { 546 nt >>= 1; 547 } else if (columns < 4) { 548 nt >>= 2; 549 } 550 t = new float[nt]; 551 oldNthreads = nthreads; 552 } 553 if ((nthreads > 1) && useThreads) { 554 xdft3da_subth2(0, 1, a, scale); 555 cdft3db_subth(1, a, scale); 556 } else { 557 xdft3da_sub2(0, 1, a, scale); 558 cdft3db_sub(1, a, scale); 559 } 560 columns = oldn3; 561 sliceStride = rows * columns; 562 rowStride = columns; 563 } else { 564 sliceStride = 2 * rows * columns; 565 rowStride = 2 * columns; 566 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 567 Future<?>[] futures = new Future[nthreads]; 568 int p = slices / nthreads; 569 for (int l = 0; l < nthreads; l++) { 570 final int firstSlice = l * p; 571 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 572 573 futures[l] = ConcurrencyUtils.submit(new Runnable() { 574 @Override 575 public void run() { 576 for (int s = firstSlice; s < lastSlice; s++) { 577 int idx1 = s * sliceStride; 578 for (int r = 0; r < rows; r++) { 579 fftColumns.complexInverse(a, idx1 + r * rowStride, scale); 580 } 581 } 582 } 583 }); 584 } 585 ConcurrencyUtils.waitForCompletion(futures); 586 587 for (int l = 0; l < nthreads; l++) { 588 final int firstSlice = l * p; 589 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 590 591 futures[l] = ConcurrencyUtils.submit(new Runnable() { 592 @Override 593 public void run() { 594 float[] temp = new float[2 * rows]; 595 for (int s = firstSlice; s < lastSlice; s++) { 596 int idx1 = s * sliceStride; 597 for (int c = 0; c < columns; c++) { 598 int idx2 = 2 * c; 599 for (int r = 0; r < rows; r++) { 600 int idx3 = idx1 + idx2 + r * rowStride; 601 int idx4 = 2 * r; 602 temp[idx4] = a[idx3]; 603 temp[idx4 + 1] = a[idx3 + 1]; 604 } 605 fftRows.complexInverse(temp, scale); 606 for (int r = 0; r < rows; r++) { 607 int idx3 = idx1 + idx2 + r * rowStride; 608 int idx4 = 2 * r; 609 a[idx3] = temp[idx4]; 610 a[idx3 + 1] = temp[idx4 + 1]; 611 } 612 } 613 } 614 } 615 }); 616 } 617 ConcurrencyUtils.waitForCompletion(futures); 618 619 p = rows / nthreads; 620 for (int l = 0; l < nthreads; l++) { 621 final int firstRow = l * p; 622 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 623 624 futures[l] = ConcurrencyUtils.submit(new Runnable() { 625 @Override 626 public void run() { 627 float[] temp = new float[2 * slices]; 628 for (int r = firstRow; r < lastRow; r++) { 629 int idx1 = r * rowStride; 630 for (int c = 0; c < columns; c++) { 631 int idx2 = 2 * c; 632 for (int s = 0; s < slices; s++) { 633 int idx3 = s * sliceStride + idx1 + idx2; 634 int idx4 = 2 * s; 635 temp[idx4] = a[idx3]; 636 temp[idx4 + 1] = a[idx3 + 1]; 637 } 638 fftSlices.complexInverse(temp, scale); 639 for (int s = 0; s < slices; s++) { 640 int idx3 = s * sliceStride + idx1 + idx2; 641 int idx4 = 2 * s; 642 a[idx3] = temp[idx4]; 643 a[idx3 + 1] = temp[idx4 + 1]; 644 } 645 } 646 } 647 } 648 }); 649 } 650 ConcurrencyUtils.waitForCompletion(futures); 651 652 } else { 653 for (int s = 0; s < slices; s++) { 654 int idx1 = s * sliceStride; 655 for (int r = 0; r < rows; r++) { 656 fftColumns.complexInverse(a, idx1 + r * rowStride, scale); 657 } 658 } 659 float[] temp = new float[2 * rows]; 660 for (int s = 0; s < slices; s++) { 661 int idx1 = s * sliceStride; 662 for (int c = 0; c < columns; c++) { 663 int idx2 = 2 * c; 664 for (int r = 0; r < rows; r++) { 665 int idx3 = idx1 + idx2 + r * rowStride; 666 int idx4 = 2 * r; 667 temp[idx4] = a[idx3]; 668 temp[idx4 + 1] = a[idx3 + 1]; 669 } 670 fftRows.complexInverse(temp, scale); 671 for (int r = 0; r < rows; r++) { 672 int idx3 = idx1 + idx2 + r * rowStride; 673 int idx4 = 2 * r; 674 a[idx3] = temp[idx4]; 675 a[idx3 + 1] = temp[idx4 + 1]; 676 } 677 } 678 } 679 temp = new float[2 * slices]; 680 for (int r = 0; r < rows; r++) { 681 int idx1 = r * rowStride; 682 for (int c = 0; c < columns; c++) { 683 int idx2 = 2 * c; 684 for (int s = 0; s < slices; s++) { 685 int idx3 = s * sliceStride + idx1 + idx2; 686 int idx4 = 2 * s; 687 temp[idx4] = a[idx3]; 688 temp[idx4 + 1] = a[idx3 + 1]; 689 } 690 fftSlices.complexInverse(temp, scale); 691 for (int s = 0; s < slices; s++) { 692 int idx3 = s * sliceStride + idx1 + idx2; 693 int idx4 = 2 * s; 694 a[idx3] = temp[idx4]; 695 a[idx3 + 1] = temp[idx4 + 1]; 696 } 697 } 698 } 699 } 700 sliceStride = rows * columns; 701 rowStride = columns; 702 } 703 } 704 705 /** 706 * Computes 3D inverse DFT of complex data leaving the result in 707 * <code>a</code>. The data is stored in a 3D array. Complex data is 708 * represented by 2 float values in sequence: the real and imaginary part, 709 * i.e. the input array must be of size slices by rows by 2*columns. The physical 710 * layout of the input data is as follows: 711 * 712 * <pre> 713 * a[k1][k2][2*k3] = Re[k1][k2][k3], 714 * a[k1][k2][2*k3+1] = Im[k1][k2][k3], 0<=k1<slices, 0<=k2<rows, 0<=k3<columns, 715 * </pre> 716 * 717 * @param a 718 * data to transform 719 * @param scale 720 * if true then scaling is performed 721 */ 722 public void complexInverse(final float[][][] a, final boolean scale) { 723 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 724 if (isPowerOfTwo) { 725 int oldn3 = columns; 726 columns = 2 * columns; 727 sliceStride = rows * columns; 728 rowStride = columns; 729 if (nthreads != oldNthreads) { 730 nt = slices; 731 if (nt < rows) { 732 nt = rows; 733 } 734 nt *= 8; 735 if (nthreads > 1) { 736 nt *= nthreads; 737 } 738 if (columns == 4) { 739 nt >>= 1; 740 } else if (columns < 4) { 741 nt >>= 2; 742 } 743 t = new float[nt]; 744 oldNthreads = nthreads; 745 } 746 if ((nthreads > 1) && useThreads) { 747 xdft3da_subth2(0, 1, a, scale); 748 cdft3db_subth(1, a, scale); 749 } else { 750 xdft3da_sub2(0, 1, a, scale); 751 cdft3db_sub(1, a, scale); 752 } 753 columns = oldn3; 754 sliceStride = rows * columns; 755 rowStride = columns; 756 } else { 757 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) { 758 Future<?>[] futures = new Future[nthreads]; 759 int p = slices / nthreads; 760 for (int l = 0; l < nthreads; l++) { 761 final int firstSlice = l * p; 762 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 763 764 futures[l] = ConcurrencyUtils.submit(new Runnable() { 765 @Override 766 public void run() { 767 for (int s = firstSlice; s < lastSlice; s++) { 768 for (int r = 0; r < rows; r++) { 769 fftColumns.complexInverse(a[s][r], scale); 770 } 771 } 772 } 773 }); 774 } 775 ConcurrencyUtils.waitForCompletion(futures); 776 777 for (int l = 0; l < nthreads; l++) { 778 final int firstSlice = l * p; 779 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 780 781 futures[l] = ConcurrencyUtils.submit(new Runnable() { 782 @Override 783 public void run() { 784 float[] temp = new float[2 * rows]; 785 for (int s = firstSlice; s < lastSlice; s++) { 786 for (int c = 0; c < columns; c++) { 787 int idx2 = 2 * c; 788 for (int r = 0; r < rows; r++) { 789 int idx4 = 2 * r; 790 temp[idx4] = a[s][r][idx2]; 791 temp[idx4 + 1] = a[s][r][idx2 + 1]; 792 } 793 fftRows.complexInverse(temp, scale); 794 for (int r = 0; r < rows; r++) { 795 int idx4 = 2 * r; 796 a[s][r][idx2] = temp[idx4]; 797 a[s][r][idx2 + 1] = temp[idx4 + 1]; 798 } 799 } 800 } 801 } 802 }); 803 } 804 ConcurrencyUtils.waitForCompletion(futures); 805 806 p = rows / nthreads; 807 for (int l = 0; l < nthreads; l++) { 808 final int firstRow = l * p; 809 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 810 811 futures[l] = ConcurrencyUtils.submit(new Runnable() { 812 @Override 813 public void run() { 814 float[] temp = new float[2 * slices]; 815 for (int r = firstRow; r < lastRow; r++) { 816 for (int c = 0; c < columns; c++) { 817 int idx2 = 2 * c; 818 for (int s = 0; s < slices; s++) { 819 int idx4 = 2 * s; 820 temp[idx4] = a[s][r][idx2]; 821 temp[idx4 + 1] = a[s][r][idx2 + 1]; 822 } 823 fftSlices.complexInverse(temp, scale); 824 for (int s = 0; s < slices; s++) { 825 int idx4 = 2 * s; 826 a[s][r][idx2] = temp[idx4]; 827 a[s][r][idx2 + 1] = temp[idx4 + 1]; 828 } 829 } 830 } 831 } 832 }); 833 } 834 ConcurrencyUtils.waitForCompletion(futures); 835 836 } else { 837 for (int s = 0; s < slices; s++) { 838 for (int r = 0; r < rows; r++) { 839 fftColumns.complexInverse(a[s][r], scale); 840 } 841 } 842 float[] temp = new float[2 * rows]; 843 for (int s = 0; s < slices; s++) { 844 for (int c = 0; c < columns; c++) { 845 int idx2 = 2 * c; 846 for (int r = 0; r < rows; r++) { 847 int idx4 = 2 * r; 848 temp[idx4] = a[s][r][idx2]; 849 temp[idx4 + 1] = a[s][r][idx2 + 1]; 850 } 851 fftRows.complexInverse(temp, scale); 852 for (int r = 0; r < rows; r++) { 853 int idx4 = 2 * r; 854 a[s][r][idx2] = temp[idx4]; 855 a[s][r][idx2 + 1] = temp[idx4 + 1]; 856 } 857 } 858 } 859 temp = new float[2 * slices]; 860 for (int r = 0; r < rows; r++) { 861 for (int c = 0; c < columns; c++) { 862 int idx2 = 2 * c; 863 for (int s = 0; s < slices; s++) { 864 int idx4 = 2 * s; 865 temp[idx4] = a[s][r][idx2]; 866 temp[idx4 + 1] = a[s][r][idx2 + 1]; 867 } 868 fftSlices.complexInverse(temp, scale); 869 for (int s = 0; s < slices; s++) { 870 int idx4 = 2 * s; 871 a[s][r][idx2] = temp[idx4]; 872 a[s][r][idx2 + 1] = temp[idx4 + 1]; 873 } 874 } 875 } 876 } 877 } 878 } 879 880 /** 881 * Computes 3D forward DFT of real data leaving the result in <code>a</code> 882 * . This method only works when the sizes of all three dimensions are 883 * power-of-two numbers. The data is stored in a 1D array addressed in 884 * slice-major, then row-major, then column-major, in order of significance, 885 * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in 886 * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and 887 * rowStride = 2 * columns. The physical layout of the output data is as follows: 888 * 889 * <pre> 890 * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3] 891 * = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 892 * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3] 893 * = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 894 * 0<=k1<slices, 0<=k2<rows, 0<k3<columns/2, 895 * a[k1*sliceStride + k2*rowStride] = Re[k1][k2][0] 896 * = Re[(slices-k1)%slices][rows-k2][0], 897 * a[k1*sliceStride + k2*rowStride + 1] = Im[k1][k2][0] 898 * = -Im[(slices-k1)%slices][rows-k2][0], 899 * a[k1*sliceStride + (rows-k2)*rowStride + 1] = Re[(slices-k1)%slices][k2][columns/2] 900 * = Re[k1][rows-k2][columns/2], 901 * a[k1*sliceStride + (rows-k2)*rowStride] = -Im[(slices-k1)%slices][k2][columns/2] 902 * = Im[k1][rows-k2][columns/2], 903 * 0<=k1<slices, 0<k2<rows/2, 904 * a[k1*sliceStride] = Re[k1][0][0] 905 * = Re[slices-k1][0][0], 906 * a[k1*sliceStride + 1] = Im[k1][0][0] 907 * = -Im[slices-k1][0][0], 908 * a[k1*sliceStride + (rows/2)*rowStride] = Re[k1][rows/2][0] 909 * = Re[slices-k1][rows/2][0], 910 * a[k1*sliceStride + (rows/2)*rowStride + 1] = Im[k1][rows/2][0] 911 * = -Im[slices-k1][rows/2][0], 912 * a[(slices-k1)*sliceStride + 1] = Re[k1][0][columns/2] 913 * = Re[slices-k1][0][columns/2], 914 * a[(slices-k1)*sliceStride] = -Im[k1][0][columns/2] 915 * = Im[slices-k1][0][columns/2], 916 * a[(slices-k1)*sliceStride + (rows/2)*rowStride + 1] = Re[k1][rows/2][columns/2] 917 * = Re[slices-k1][rows/2][columns/2], 918 * a[(slices-k1)*sliceStride + (rows/2) * rowStride] = -Im[k1][rows/2][columns/2] 919 * = Im[slices-k1][rows/2][columns/2], 920 * 0<k1<slices/2, 921 * a[0] = Re[0][0][0], 922 * a[1] = Re[0][0][columns/2], 923 * a[(rows/2)*rowStride] = Re[0][rows/2][0], 924 * a[(rows/2)*rowStride + 1] = Re[0][rows/2][columns/2], 925 * a[(slices/2)*sliceStride] = Re[slices/2][0][0], 926 * a[(slices/2)*sliceStride + 1] = Re[slices/2][0][columns/2], 927 * a[(slices/2)*sliceStride + (rows/2)*rowStride] = Re[slices/2][rows/2][0], 928 * a[(slices/2)*sliceStride + (rows/2)*rowStride + 1] = Re[slices/2][rows/2][columns/2] 929 * </pre> 930 * 931 * 932 * This method computes only half of the elements of the real transform. The 933 * other half satisfies the symmetry condition. If you want the full real 934 * forward transform, use <code>realForwardFull</code>. To get back the 935 * original data, use <code>realInverse</code> on the output of this method. 936 * 937 * @param a 938 * data to transform 939 */ 940 public void realForward(float[] a) { 941 if (isPowerOfTwo == false) { 942 throw new IllegalArgumentException("slices, rows and columns must be power of two numbers"); 943 } else { 944 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 945 if (nthreads != oldNthreads) { 946 nt = slices; 947 if (nt < rows) { 948 nt = rows; 949 } 950 nt *= 8; 951 if (nthreads > 1) { 952 nt *= nthreads; 953 } 954 if (columns == 4) { 955 nt >>= 1; 956 } else if (columns < 4) { 957 nt >>= 2; 958 } 959 t = new float[nt]; 960 oldNthreads = nthreads; 961 } 962 if ((nthreads > 1) && useThreads) { 963 xdft3da_subth1(1, -1, a, true); 964 cdft3db_subth(-1, a, true); 965 rdft3d_sub(1, a); 966 } else { 967 xdft3da_sub1(1, -1, a, true); 968 cdft3db_sub(-1, a, true); 969 rdft3d_sub(1, a); 970 } 971 } 972 } 973 974 /** 975 * Computes 3D forward DFT of real data leaving the result in <code>a</code> 976 * . This method only works when the sizes of all three dimensions are 977 * power-of-two numbers. The data is stored in a 3D array. The physical 978 * layout of the output data is as follows: 979 * 980 * <pre> 981 * a[k1][k2][2*k3] = Re[k1][k2][k3] 982 * = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 983 * a[k1][k2][2*k3+1] = Im[k1][k2][k3] 984 * = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 985 * 0<=k1<slices, 0<=k2<rows, 0<k3<columns/2, 986 * a[k1][k2][0] = Re[k1][k2][0] 987 * = Re[(slices-k1)%slices][rows-k2][0], 988 * a[k1][k2][1] = Im[k1][k2][0] 989 * = -Im[(slices-k1)%slices][rows-k2][0], 990 * a[k1][rows-k2][1] = Re[(slices-k1)%slices][k2][columns/2] 991 * = Re[k1][rows-k2][columns/2], 992 * a[k1][rows-k2][0] = -Im[(slices-k1)%slices][k2][columns/2] 993 * = Im[k1][rows-k2][columns/2], 994 * 0<=k1<slices, 0<k2<rows/2, 995 * a[k1][0][0] = Re[k1][0][0] 996 * = Re[slices-k1][0][0], 997 * a[k1][0][1] = Im[k1][0][0] 998 * = -Im[slices-k1][0][0], 999 * a[k1][rows/2][0] = Re[k1][rows/2][0] 1000 * = Re[slices-k1][rows/2][0], 1001 * a[k1][rows/2][1] = Im[k1][rows/2][0] 1002 * = -Im[slices-k1][rows/2][0], 1003 * a[slices-k1][0][1] = Re[k1][0][columns/2] 1004 * = Re[slices-k1][0][columns/2], 1005 * a[slices-k1][0][0] = -Im[k1][0][columns/2] 1006 * = Im[slices-k1][0][columns/2], 1007 * a[slices-k1][rows/2][1] = Re[k1][rows/2][columns/2] 1008 * = Re[slices-k1][rows/2][columns/2], 1009 * a[slices-k1][rows/2][0] = -Im[k1][rows/2][columns/2] 1010 * = Im[slices-k1][rows/2][columns/2], 1011 * 0<k1<slices/2, 1012 * a[0][0][0] = Re[0][0][0], 1013 * a[0][0][1] = Re[0][0][columns/2], 1014 * a[0][rows/2][0] = Re[0][rows/2][0], 1015 * a[0][rows/2][1] = Re[0][rows/2][columns/2], 1016 * a[slices/2][0][0] = Re[slices/2][0][0], 1017 * a[slices/2][0][1] = Re[slices/2][0][columns/2], 1018 * a[slices/2][rows/2][0] = Re[slices/2][rows/2][0], 1019 * a[slices/2][rows/2][1] = Re[slices/2][rows/2][columns/2] 1020 * </pre> 1021 * 1022 * 1023 * This method computes only half of the elements of the real transform. The 1024 * other half satisfies the symmetry condition. If you want the full real 1025 * forward transform, use <code>realForwardFull</code>. To get back the 1026 * original data, use <code>realInverse</code> on the output of this method. 1027 * 1028 * @param a 1029 * data to transform 1030 */ 1031 public void realForward(float[][][] a) { 1032 if (isPowerOfTwo == false) { 1033 throw new IllegalArgumentException("slices, rows and columns must be power of two numbers"); 1034 } else { 1035 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1036 if (nthreads != oldNthreads) { 1037 nt = slices; 1038 if (nt < rows) { 1039 nt = rows; 1040 } 1041 nt *= 8; 1042 if (nthreads > 1) { 1043 nt *= nthreads; 1044 } 1045 if (columns == 4) { 1046 nt >>= 1; 1047 } else if (columns < 4) { 1048 nt >>= 2; 1049 } 1050 t = new float[nt]; 1051 oldNthreads = nthreads; 1052 } 1053 if ((nthreads > 1) && useThreads) { 1054 xdft3da_subth1(1, -1, a, true); 1055 cdft3db_subth(-1, a, true); 1056 rdft3d_sub(1, a); 1057 } else { 1058 xdft3da_sub1(1, -1, a, true); 1059 cdft3db_sub(-1, a, true); 1060 rdft3d_sub(1, a); 1061 } 1062 } 1063 } 1064 1065 /** 1066 * Computes 3D forward DFT of real data leaving the result in <code>a</code> 1067 * . This method computes full real forward transform, i.e. you will get the 1068 * same result as from <code>complexForward</code> called with all imaginary 1069 * part equal 0. Because the result is stored in <code>a</code>, the input 1070 * array must be of size slices*rows*2*columns, with only the first slices*rows*columns elements 1071 * filled with real data. To get back the original data, use 1072 * <code>complexInverse</code> on the output of this method. 1073 * 1074 * @param a 1075 * data to transform 1076 */ 1077 public void realForwardFull(float[] a) { 1078 if (isPowerOfTwo) { 1079 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1080 if (nthreads != oldNthreads) { 1081 nt = slices; 1082 if (nt < rows) { 1083 nt = rows; 1084 } 1085 nt *= 8; 1086 if (nthreads > 1) { 1087 nt *= nthreads; 1088 } 1089 if (columns == 4) { 1090 nt >>= 1; 1091 } else if (columns < 4) { 1092 nt >>= 2; 1093 } 1094 t = new float[nt]; 1095 oldNthreads = nthreads; 1096 } 1097 if ((nthreads > 1) && useThreads) { 1098 xdft3da_subth2(1, -1, a, true); 1099 cdft3db_subth(-1, a, true); 1100 rdft3d_sub(1, a); 1101 } else { 1102 xdft3da_sub2(1, -1, a, true); 1103 cdft3db_sub(-1, a, true); 1104 rdft3d_sub(1, a); 1105 } 1106 fillSymmetric(a); 1107 } else { 1108 mixedRadixRealForwardFull(a); 1109 } 1110 } 1111 1112 /** 1113 * Computes 3D forward DFT of real data leaving the result in <code>a</code> 1114 * . This method computes full real forward transform, i.e. you will get the 1115 * same result as from <code>complexForward</code> called with all imaginary 1116 * part equal 0. Because the result is stored in <code>a</code>, the input 1117 * array must be of size slices by rows by 2*columns, with only the first slices by rows by 1118 * columns elements filled with real data. To get back the original data, use 1119 * <code>complexInverse</code> on the output of this method. 1120 * 1121 * @param a 1122 * data to transform 1123 */ 1124 public void realForwardFull(float[][][] a) { 1125 if (isPowerOfTwo) { 1126 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1127 if (nthreads != oldNthreads) { 1128 nt = slices; 1129 if (nt < rows) { 1130 nt = rows; 1131 } 1132 nt *= 8; 1133 if (nthreads > 1) { 1134 nt *= nthreads; 1135 } 1136 if (columns == 4) { 1137 nt >>= 1; 1138 } else if (columns < 4) { 1139 nt >>= 2; 1140 } 1141 t = new float[nt]; 1142 oldNthreads = nthreads; 1143 } 1144 if ((nthreads > 1) && useThreads) { 1145 xdft3da_subth2(1, -1, a, true); 1146 cdft3db_subth(-1, a, true); 1147 rdft3d_sub(1, a); 1148 } else { 1149 xdft3da_sub2(1, -1, a, true); 1150 cdft3db_sub(-1, a, true); 1151 rdft3d_sub(1, a); 1152 } 1153 fillSymmetric(a); 1154 } else { 1155 mixedRadixRealForwardFull(a); 1156 } 1157 } 1158 1159 /** 1160 * Computes 3D inverse DFT of real data leaving the result in <code>a</code> 1161 * . This method only works when the sizes of all three dimensions are 1162 * power-of-two numbers. The data is stored in a 1D array addressed in 1163 * slice-major, then row-major, then column-major, in order of significance, 1164 * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in 1165 * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and 1166 * rowStride = 2 * columns. The physical layout of the input data has to be as 1167 * follows: 1168 * 1169 * <pre> 1170 * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3] 1171 * = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 1172 * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3] 1173 * = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 1174 * 0<=k1<slices, 0<=k2<rows, 0<k3<columns/2, 1175 * a[k1*sliceStride + k2*rowStride] = Re[k1][k2][0] 1176 * = Re[(slices-k1)%slices][rows-k2][0], 1177 * a[k1*sliceStride + k2*rowStride + 1] = Im[k1][k2][0] 1178 * = -Im[(slices-k1)%slices][rows-k2][0], 1179 * a[k1*sliceStride + (rows-k2)*rowStride + 1] = Re[(slices-k1)%slices][k2][columns/2] 1180 * = Re[k1][rows-k2][columns/2], 1181 * a[k1*sliceStride + (rows-k2)*rowStride] = -Im[(slices-k1)%slices][k2][columns/2] 1182 * = Im[k1][rows-k2][columns/2], 1183 * 0<=k1<slices, 0<k2<rows/2, 1184 * a[k1*sliceStride] = Re[k1][0][0] 1185 * = Re[slices-k1][0][0], 1186 * a[k1*sliceStride + 1] = Im[k1][0][0] 1187 * = -Im[slices-k1][0][0], 1188 * a[k1*sliceStride + (rows/2)*rowStride] = Re[k1][rows/2][0] 1189 * = Re[slices-k1][rows/2][0], 1190 * a[k1*sliceStride + (rows/2)*rowStride + 1] = Im[k1][rows/2][0] 1191 * = -Im[slices-k1][rows/2][0], 1192 * a[(slices-k1)*sliceStride + 1] = Re[k1][0][columns/2] 1193 * = Re[slices-k1][0][columns/2], 1194 * a[(slices-k1)*sliceStride] = -Im[k1][0][columns/2] 1195 * = Im[slices-k1][0][columns/2], 1196 * a[(slices-k1)*sliceStride + (rows/2)*rowStride + 1] = Re[k1][rows/2][columns/2] 1197 * = Re[slices-k1][rows/2][columns/2], 1198 * a[(slices-k1)*sliceStride + (rows/2) * rowStride] = -Im[k1][rows/2][columns/2] 1199 * = Im[slices-k1][rows/2][columns/2], 1200 * 0<k1<slices/2, 1201 * a[0] = Re[0][0][0], 1202 * a[1] = Re[0][0][columns/2], 1203 * a[(rows/2)*rowStride] = Re[0][rows/2][0], 1204 * a[(rows/2)*rowStride + 1] = Re[0][rows/2][columns/2], 1205 * a[(slices/2)*sliceStride] = Re[slices/2][0][0], 1206 * a[(slices/2)*sliceStride + 1] = Re[slices/2][0][columns/2], 1207 * a[(slices/2)*sliceStride + (rows/2)*rowStride] = Re[slices/2][rows/2][0], 1208 * a[(slices/2)*sliceStride + (rows/2)*rowStride + 1] = Re[slices/2][rows/2][columns/2] 1209 * </pre> 1210 * 1211 * This method computes only half of the elements of the real transform. The 1212 * other half satisfies the symmetry condition. If you want the full real 1213 * inverse transform, use <code>realInverseFull</code>. 1214 * 1215 * @param a 1216 * data to transform 1217 * 1218 * @param scale 1219 * if true then scaling is performed 1220 */ 1221 public void realInverse(float[] a, boolean scale) { 1222 if (isPowerOfTwo == false) { 1223 throw new IllegalArgumentException("slices, rows and columns must be power of two numbers"); 1224 } else { 1225 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1226 if (nthreads != oldNthreads) { 1227 nt = slices; 1228 if (nt < rows) { 1229 nt = rows; 1230 } 1231 nt *= 8; 1232 if (nthreads > 1) { 1233 nt *= nthreads; 1234 } 1235 if (columns == 4) { 1236 nt >>= 1; 1237 } else if (columns < 4) { 1238 nt >>= 2; 1239 } 1240 t = new float[nt]; 1241 oldNthreads = nthreads; 1242 } 1243 if ((nthreads > 1) && useThreads) { 1244 rdft3d_sub(-1, a); 1245 cdft3db_subth(1, a, scale); 1246 xdft3da_subth1(1, 1, a, scale); 1247 } else { 1248 rdft3d_sub(-1, a); 1249 cdft3db_sub(1, a, scale); 1250 xdft3da_sub1(1, 1, a, scale); 1251 } 1252 } 1253 } 1254 1255 /** 1256 * Computes 3D inverse DFT of real data leaving the result in <code>a</code> 1257 * . This method only works when the sizes of all three dimensions are 1258 * power-of-two numbers. The data is stored in a 3D array. The physical 1259 * layout of the input data has to be as follows: 1260 * 1261 * <pre> 1262 * a[k1][k2][2*k3] = Re[k1][k2][k3] 1263 * = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 1264 * a[k1][k2][2*k3+1] = Im[k1][k2][k3] 1265 * = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 1266 * 0<=k1<slices, 0<=k2<rows, 0<k3<columns/2, 1267 * a[k1][k2][0] = Re[k1][k2][0] 1268 * = Re[(slices-k1)%slices][rows-k2][0], 1269 * a[k1][k2][1] = Im[k1][k2][0] 1270 * = -Im[(slices-k1)%slices][rows-k2][0], 1271 * a[k1][rows-k2][1] = Re[(slices-k1)%slices][k2][columns/2] 1272 * = Re[k1][rows-k2][columns/2], 1273 * a[k1][rows-k2][0] = -Im[(slices-k1)%slices][k2][columns/2] 1274 * = Im[k1][rows-k2][columns/2], 1275 * 0<=k1<slices, 0<k2<rows/2, 1276 * a[k1][0][0] = Re[k1][0][0] 1277 * = Re[slices-k1][0][0], 1278 * a[k1][0][1] = Im[k1][0][0] 1279 * = -Im[slices-k1][0][0], 1280 * a[k1][rows/2][0] = Re[k1][rows/2][0] 1281 * = Re[slices-k1][rows/2][0], 1282 * a[k1][rows/2][1] = Im[k1][rows/2][0] 1283 * = -Im[slices-k1][rows/2][0], 1284 * a[slices-k1][0][1] = Re[k1][0][columns/2] 1285 * = Re[slices-k1][0][columns/2], 1286 * a[slices-k1][0][0] = -Im[k1][0][columns/2] 1287 * = Im[slices-k1][0][columns/2], 1288 * a[slices-k1][rows/2][1] = Re[k1][rows/2][columns/2] 1289 * = Re[slices-k1][rows/2][columns/2], 1290 * a[slices-k1][rows/2][0] = -Im[k1][rows/2][columns/2] 1291 * = Im[slices-k1][rows/2][columns/2], 1292 * 0<k1<slices/2, 1293 * a[0][0][0] = Re[0][0][0], 1294 * a[0][0][1] = Re[0][0][columns/2], 1295 * a[0][rows/2][0] = Re[0][rows/2][0], 1296 * a[0][rows/2][1] = Re[0][rows/2][columns/2], 1297 * a[slices/2][0][0] = Re[slices/2][0][0], 1298 * a[slices/2][0][1] = Re[slices/2][0][columns/2], 1299 * a[slices/2][rows/2][0] = Re[slices/2][rows/2][0], 1300 * a[slices/2][rows/2][1] = Re[slices/2][rows/2][columns/2] 1301 * </pre> 1302 * 1303 * This method computes only half of the elements of the real transform. The 1304 * other half satisfies the symmetry condition. If you want the full real 1305 * inverse transform, use <code>realInverseFull</code>. 1306 * 1307 * @param a 1308 * data to transform 1309 * 1310 * @param scale 1311 * if true then scaling is performed 1312 */ 1313 public void realInverse(float[][][] a, boolean scale) { 1314 if (isPowerOfTwo == false) { 1315 throw new IllegalArgumentException("slices, rows and columns must be power of two numbers"); 1316 } else { 1317 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1318 if (nthreads != oldNthreads) { 1319 nt = slices; 1320 if (nt < rows) { 1321 nt = rows; 1322 } 1323 nt *= 8; 1324 if (nthreads > 1) { 1325 nt *= nthreads; 1326 } 1327 if (columns == 4) { 1328 nt >>= 1; 1329 } else if (columns < 4) { 1330 nt >>= 2; 1331 } 1332 t = new float[nt]; 1333 oldNthreads = nthreads; 1334 } 1335 if ((nthreads > 1) && useThreads) { 1336 rdft3d_sub(-1, a); 1337 cdft3db_subth(1, a, scale); 1338 xdft3da_subth1(1, 1, a, scale); 1339 } else { 1340 rdft3d_sub(-1, a); 1341 cdft3db_sub(1, a, scale); 1342 xdft3da_sub1(1, 1, a, scale); 1343 } 1344 } 1345 } 1346 1347 /** 1348 * Computes 3D inverse DFT of real data leaving the result in <code>a</code> 1349 * . This method computes full real inverse transform, i.e. you will get the 1350 * same result as from <code>complexInverse</code> called with all imaginary 1351 * part equal 0. Because the result is stored in <code>a</code>, the input 1352 * array must be of size slices*rows*2*columns, with only the first slices*rows*columns elements 1353 * filled with real data. 1354 * 1355 * @param a 1356 * data to transform 1357 * @param scale 1358 * if true then scaling is performed 1359 */ 1360 public void realInverseFull(float[] a, boolean scale) { 1361 if (isPowerOfTwo) { 1362 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1363 if (nthreads != oldNthreads) { 1364 nt = slices; 1365 if (nt < rows) { 1366 nt = rows; 1367 } 1368 nt *= 8; 1369 if (nthreads > 1) { 1370 nt *= nthreads; 1371 } 1372 if (columns == 4) { 1373 nt >>= 1; 1374 } else if (columns < 4) { 1375 nt >>= 2; 1376 } 1377 t = new float[nt]; 1378 oldNthreads = nthreads; 1379 } 1380 if ((nthreads > 1) && useThreads) { 1381 xdft3da_subth2(1, 1, a, scale); 1382 cdft3db_subth(1, a, scale); 1383 rdft3d_sub(1, a); 1384 } else { 1385 xdft3da_sub2(1, 1, a, scale); 1386 cdft3db_sub(1, a, scale); 1387 rdft3d_sub(1, a); 1388 } 1389 fillSymmetric(a); 1390 } else { 1391 mixedRadixRealInverseFull(a, scale); 1392 } 1393 } 1394 1395 /** 1396 * Computes 3D inverse DFT of real data leaving the result in <code>a</code> 1397 * . This method computes full real inverse transform, i.e. you will get the 1398 * same result as from <code>complexInverse</code> called with all imaginary 1399 * part equal 0. Because the result is stored in <code>a</code>, the input 1400 * array must be of size slices by rows by 2*columns, with only the first slices by rows by 1401 * columns elements filled with real data. 1402 * 1403 * @param a 1404 * data to transform 1405 * @param scale 1406 * if true then scaling is performed 1407 */ 1408 public void realInverseFull(float[][][] a, boolean scale) { 1409 if (isPowerOfTwo) { 1410 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1411 if (nthreads != oldNthreads) { 1412 nt = slices; 1413 if (nt < rows) { 1414 nt = rows; 1415 } 1416 nt *= 8; 1417 if (nthreads > 1) { 1418 nt *= nthreads; 1419 } 1420 if (columns == 4) { 1421 nt >>= 1; 1422 } else if (columns < 4) { 1423 nt >>= 2; 1424 } 1425 t = new float[nt]; 1426 oldNthreads = nthreads; 1427 } 1428 if ((nthreads > 1) && useThreads) { 1429 xdft3da_subth2(1, 1, a, scale); 1430 cdft3db_subth(1, a, scale); 1431 rdft3d_sub(1, a); 1432 } else { 1433 xdft3da_sub2(1, 1, a, scale); 1434 cdft3db_sub(1, a, scale); 1435 rdft3d_sub(1, a); 1436 } 1437 fillSymmetric(a); 1438 } else { 1439 mixedRadixRealInverseFull(a, scale); 1440 } 1441 } 1442 1443 /* -------- child routines -------- */ 1444 1445 private void mixedRadixRealForwardFull(final float[][][] a) { 1446 float[] temp = new float[2 * rows]; 1447 int ldimn2 = rows / 2 + 1; 1448 final int newn3 = 2 * columns; 1449 final int n2d2; 1450 if (rows % 2 == 0) { 1451 n2d2 = rows / 2; 1452 } else { 1453 n2d2 = (rows + 1) / 2; 1454 } 1455 1456 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1457 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) { 1458 Future<?>[] futures = new Future[nthreads]; 1459 int p = slices / nthreads; 1460 for (int l = 0; l < nthreads; l++) { 1461 final int firstSlice = l * p; 1462 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 1463 1464 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1465 @Override 1466 public void run() { 1467 for (int s = firstSlice; s < lastSlice; s++) { 1468 for (int r = 0; r < rows; r++) { 1469 fftColumns.realForwardFull(a[s][r]); 1470 } 1471 } 1472 } 1473 }); 1474 } 1475 ConcurrencyUtils.waitForCompletion(futures); 1476 1477 for (int l = 0; l < nthreads; l++) { 1478 final int firstSlice = l * p; 1479 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 1480 1481 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1482 @Override 1483 public void run() { 1484 float[] temp = new float[2 * rows]; 1485 1486 for (int s = firstSlice; s < lastSlice; s++) { 1487 for (int c = 0; c < columns; c++) { 1488 int idx2 = 2 * c; 1489 for (int r = 0; r < rows; r++) { 1490 int idx4 = 2 * r; 1491 temp[idx4] = a[s][r][idx2]; 1492 temp[idx4 + 1] = a[s][r][idx2 + 1]; 1493 } 1494 fftRows.complexForward(temp); 1495 for (int r = 0; r < rows; r++) { 1496 int idx4 = 2 * r; 1497 a[s][r][idx2] = temp[idx4]; 1498 a[s][r][idx2 + 1] = temp[idx4 + 1]; 1499 } 1500 } 1501 } 1502 } 1503 }); 1504 } 1505 ConcurrencyUtils.waitForCompletion(futures); 1506 1507 p = ldimn2 / nthreads; 1508 for (int l = 0; l < nthreads; l++) { 1509 final int firstRow = l * p; 1510 final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p; 1511 1512 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1513 @Override 1514 public void run() { 1515 float[] temp = new float[2 * slices]; 1516 1517 for (int r = firstRow; r < lastRow; r++) { 1518 for (int c = 0; c < columns; c++) { 1519 int idx1 = 2 * c; 1520 for (int s = 0; s < slices; s++) { 1521 int idx2 = 2 * s; 1522 temp[idx2] = a[s][r][idx1]; 1523 temp[idx2 + 1] = a[s][r][idx1 + 1]; 1524 } 1525 fftSlices.complexForward(temp); 1526 for (int s = 0; s < slices; s++) { 1527 int idx2 = 2 * s; 1528 a[s][r][idx1] = temp[idx2]; 1529 a[s][r][idx1 + 1] = temp[idx2 + 1]; 1530 } 1531 } 1532 } 1533 } 1534 }); 1535 } 1536 ConcurrencyUtils.waitForCompletion(futures); 1537 p = slices / nthreads; 1538 for (int l = 0; l < nthreads; l++) { 1539 final int firstSlice = l * p; 1540 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 1541 1542 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1543 @Override 1544 public void run() { 1545 1546 for (int s = firstSlice; s < lastSlice; s++) { 1547 int idx2 = (slices - s) % slices; 1548 for (int r = 1; r < n2d2; r++) { 1549 int idx4 = rows - r; 1550 for (int c = 0; c < columns; c++) { 1551 int idx1 = 2 * c; 1552 int idx3 = newn3 - idx1; 1553 a[idx2][idx4][idx3 % newn3] = a[s][r][idx1]; 1554 a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1]; 1555 } 1556 } 1557 } 1558 } 1559 }); 1560 } 1561 ConcurrencyUtils.waitForCompletion(futures); 1562 } else { 1563 1564 for (int s = 0; s < slices; s++) { 1565 for (int r = 0; r < rows; r++) { 1566 fftColumns.realForwardFull(a[s][r]); 1567 } 1568 } 1569 1570 for (int s = 0; s < slices; s++) { 1571 for (int c = 0; c < columns; c++) { 1572 int idx2 = 2 * c; 1573 for (int r = 0; r < rows; r++) { 1574 int idx4 = 2 * r; 1575 temp[idx4] = a[s][r][idx2]; 1576 temp[idx4 + 1] = a[s][r][idx2 + 1]; 1577 } 1578 fftRows.complexForward(temp); 1579 for (int r = 0; r < rows; r++) { 1580 int idx4 = 2 * r; 1581 a[s][r][idx2] = temp[idx4]; 1582 a[s][r][idx2 + 1] = temp[idx4 + 1]; 1583 } 1584 } 1585 } 1586 1587 temp = new float[2 * slices]; 1588 1589 for (int r = 0; r < ldimn2; r++) { 1590 for (int c = 0; c < columns; c++) { 1591 int idx1 = 2 * c; 1592 for (int s = 0; s < slices; s++) { 1593 int idx2 = 2 * s; 1594 temp[idx2] = a[s][r][idx1]; 1595 temp[idx2 + 1] = a[s][r][idx1 + 1]; 1596 } 1597 fftSlices.complexForward(temp); 1598 for (int s = 0; s < slices; s++) { 1599 int idx2 = 2 * s; 1600 a[s][r][idx1] = temp[idx2]; 1601 a[s][r][idx1 + 1] = temp[idx2 + 1]; 1602 } 1603 } 1604 } 1605 1606 for (int s = 0; s < slices; s++) { 1607 int idx2 = (slices - s) % slices; 1608 for (int r = 1; r < n2d2; r++) { 1609 int idx4 = rows - r; 1610 for (int c = 0; c < columns; c++) { 1611 int idx1 = 2 * c; 1612 int idx3 = newn3 - idx1; 1613 a[idx2][idx4][idx3 % newn3] = a[s][r][idx1]; 1614 a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1]; 1615 } 1616 } 1617 } 1618 1619 } 1620 } 1621 1622 private void mixedRadixRealInverseFull(final float[][][] a, final boolean scale) { 1623 float[] temp = new float[2 * rows]; 1624 int ldimn2 = rows / 2 + 1; 1625 final int newn3 = 2 * columns; 1626 final int n2d2; 1627 if (rows % 2 == 0) { 1628 n2d2 = rows / 2; 1629 } else { 1630 n2d2 = (rows + 1) / 2; 1631 } 1632 1633 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1634 if ((nthreads > 1) && useThreads && (slices >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) { 1635 Future<?>[] futures = new Future[nthreads]; 1636 int p = slices / nthreads; 1637 for (int l = 0; l < nthreads; l++) { 1638 final int firstSlice = l * p; 1639 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 1640 1641 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1642 @Override 1643 public void run() { 1644 for (int s = firstSlice; s < lastSlice; s++) { 1645 for (int r = 0; r < rows; r++) { 1646 fftColumns.realInverseFull(a[s][r], scale); 1647 } 1648 } 1649 } 1650 }); 1651 } 1652 ConcurrencyUtils.waitForCompletion(futures); 1653 1654 for (int l = 0; l < nthreads; l++) { 1655 final int firstSlice = l * p; 1656 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 1657 1658 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1659 @Override 1660 public void run() { 1661 float[] temp = new float[2 * rows]; 1662 1663 for (int s = firstSlice; s < lastSlice; s++) { 1664 for (int c = 0; c < columns; c++) { 1665 int idx2 = 2 * c; 1666 for (int r = 0; r < rows; r++) { 1667 int idx4 = 2 * r; 1668 temp[idx4] = a[s][r][idx2]; 1669 temp[idx4 + 1] = a[s][r][idx2 + 1]; 1670 } 1671 fftRows.complexInverse(temp, scale); 1672 for (int r = 0; r < rows; r++) { 1673 int idx4 = 2 * r; 1674 a[s][r][idx2] = temp[idx4]; 1675 a[s][r][idx2 + 1] = temp[idx4 + 1]; 1676 } 1677 } 1678 } 1679 } 1680 }); 1681 } 1682 ConcurrencyUtils.waitForCompletion(futures); 1683 1684 p = ldimn2 / nthreads; 1685 for (int l = 0; l < nthreads; l++) { 1686 final int firstRow = l * p; 1687 final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p; 1688 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1689 @Override 1690 public void run() { 1691 float[] temp = new float[2 * slices]; 1692 1693 for (int r = firstRow; r < lastRow; r++) { 1694 for (int c = 0; c < columns; c++) { 1695 int idx1 = 2 * c; 1696 for (int s = 0; s < slices; s++) { 1697 int idx2 = 2 * s; 1698 temp[idx2] = a[s][r][idx1]; 1699 temp[idx2 + 1] = a[s][r][idx1 + 1]; 1700 } 1701 fftSlices.complexInverse(temp, scale); 1702 for (int s = 0; s < slices; s++) { 1703 int idx2 = 2 * s; 1704 a[s][r][idx1] = temp[idx2]; 1705 a[s][r][idx1 + 1] = temp[idx2 + 1]; 1706 } 1707 } 1708 } 1709 } 1710 }); 1711 } 1712 ConcurrencyUtils.waitForCompletion(futures); 1713 p = slices / nthreads; 1714 for (int l = 0; l < nthreads; l++) { 1715 final int firstSlice = l * p; 1716 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 1717 1718 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1719 @Override 1720 public void run() { 1721 1722 for (int s = firstSlice; s < lastSlice; s++) { 1723 int idx2 = (slices - s) % slices; 1724 for (int r = 1; r < n2d2; r++) { 1725 int idx4 = rows - r; 1726 for (int c = 0; c < columns; c++) { 1727 int idx1 = 2 * c; 1728 int idx3 = newn3 - idx1; 1729 a[idx2][idx4][idx3 % newn3] = a[s][r][idx1]; 1730 a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1]; 1731 } 1732 } 1733 } 1734 } 1735 }); 1736 } 1737 ConcurrencyUtils.waitForCompletion(futures); 1738 } else { 1739 1740 for (int s = 0; s < slices; s++) { 1741 for (int r = 0; r < rows; r++) { 1742 fftColumns.realInverseFull(a[s][r], scale); 1743 } 1744 } 1745 1746 for (int s = 0; s < slices; s++) { 1747 for (int c = 0; c < columns; c++) { 1748 int idx2 = 2 * c; 1749 for (int r = 0; r < rows; r++) { 1750 int idx4 = 2 * r; 1751 temp[idx4] = a[s][r][idx2]; 1752 temp[idx4 + 1] = a[s][r][idx2 + 1]; 1753 } 1754 fftRows.complexInverse(temp, scale); 1755 for (int r = 0; r < rows; r++) { 1756 int idx4 = 2 * r; 1757 a[s][r][idx2] = temp[idx4]; 1758 a[s][r][idx2 + 1] = temp[idx4 + 1]; 1759 } 1760 } 1761 } 1762 1763 temp = new float[2 * slices]; 1764 1765 for (int r = 0; r < ldimn2; r++) { 1766 for (int c = 0; c < columns; c++) { 1767 int idx1 = 2 * c; 1768 for (int s = 0; s < slices; s++) { 1769 int idx2 = 2 * s; 1770 temp[idx2] = a[s][r][idx1]; 1771 temp[idx2 + 1] = a[s][r][idx1 + 1]; 1772 } 1773 fftSlices.complexInverse(temp, scale); 1774 for (int s = 0; s < slices; s++) { 1775 int idx2 = 2 * s; 1776 a[s][r][idx1] = temp[idx2]; 1777 a[s][r][idx1 + 1] = temp[idx2 + 1]; 1778 } 1779 } 1780 } 1781 1782 for (int s = 0; s < slices; s++) { 1783 int idx2 = (slices - s) % slices; 1784 for (int r = 1; r < n2d2; r++) { 1785 int idx4 = rows - r; 1786 for (int c = 0; c < columns; c++) { 1787 int idx1 = 2 * c; 1788 int idx3 = newn3 - idx1; 1789 a[idx2][idx4][idx3 % newn3] = a[s][r][idx1]; 1790 a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1]; 1791 } 1792 } 1793 } 1794 1795 } 1796 } 1797 1798 private void mixedRadixRealForwardFull(final float[] a) { 1799 final int twon3 = 2 * columns; 1800 float[] temp = new float[twon3]; 1801 int ldimn2 = rows / 2 + 1; 1802 final int n2d2; 1803 if (rows % 2 == 0) { 1804 n2d2 = rows / 2; 1805 } else { 1806 n2d2 = (rows + 1) / 2; 1807 } 1808 1809 final int twoSliceStride = 2 * sliceStride; 1810 final int twoRowStride = 2 * rowStride; 1811 int n1d2 = slices / 2; 1812 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 1813 if ((nthreads > 1) && useThreads && (n1d2 >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) { 1814 Future<?>[] futures = new Future[nthreads]; 1815 int p = n1d2 / nthreads; 1816 for (int l = 0; l < nthreads; l++) { 1817 final int firstSlice = slices - 1 - l * p; 1818 final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice - p; 1819 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1820 @Override 1821 public void run() { 1822 float[] temp = new float[twon3]; 1823 for (int s = firstSlice; s >= lastSlice; s--) { 1824 int idx1 = s * sliceStride; 1825 int idx2 = s * twoSliceStride; 1826 for (int r = rows - 1; r >= 0; r--) { 1827 System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns); 1828 fftColumns.realForwardFull(temp); 1829 System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3); 1830 } 1831 } 1832 } 1833 }); 1834 } 1835 ConcurrencyUtils.waitForCompletion(futures); 1836 1837 final float[][][] temp2 = new float[n1d2 + 1][rows][twon3]; 1838 1839 for (int l = 0; l < nthreads; l++) { 1840 final int firstSlice = l * p; 1841 final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p; 1842 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1843 @Override 1844 public void run() { 1845 for (int s = firstSlice; s < lastSlice; s++) { 1846 int idx1 = s * sliceStride; 1847 for (int r = 0; r < rows; r++) { 1848 System.arraycopy(a, idx1 + r * rowStride, temp2[s][r], 0, columns); 1849 fftColumns.realForwardFull(temp2[s][r]); 1850 } 1851 } 1852 } 1853 }); 1854 } 1855 ConcurrencyUtils.waitForCompletion(futures); 1856 1857 for (int l = 0; l < nthreads; l++) { 1858 final int firstSlice = l * p; 1859 final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p; 1860 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1861 @Override 1862 public void run() { 1863 for (int s = firstSlice; s < lastSlice; s++) { 1864 int idx1 = s * twoSliceStride; 1865 for (int r = 0; r < rows; r++) { 1866 System.arraycopy(temp2[s][r], 0, a, idx1 + r * twoRowStride, twon3); 1867 } 1868 } 1869 } 1870 }); 1871 } 1872 ConcurrencyUtils.waitForCompletion(futures); 1873 1874 p = slices / nthreads; 1875 for (int l = 0; l < nthreads; l++) { 1876 final int firstSlice = l * p; 1877 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 1878 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1879 @Override 1880 public void run() { 1881 float[] temp = new float[2 * rows]; 1882 1883 for (int s = firstSlice; s < lastSlice; s++) { 1884 int idx1 = s * twoSliceStride; 1885 for (int c = 0; c < columns; c++) { 1886 int idx2 = 2 * c; 1887 for (int r = 0; r < rows; r++) { 1888 int idx3 = idx1 + r * twoRowStride + idx2; 1889 int idx4 = 2 * r; 1890 temp[idx4] = a[idx3]; 1891 temp[idx4 + 1] = a[idx3 + 1]; 1892 } 1893 fftRows.complexForward(temp); 1894 for (int r = 0; r < rows; r++) { 1895 int idx3 = idx1 + r * twoRowStride + idx2; 1896 int idx4 = 2 * r; 1897 a[idx3] = temp[idx4]; 1898 a[idx3 + 1] = temp[idx4 + 1]; 1899 } 1900 } 1901 } 1902 } 1903 }); 1904 } 1905 ConcurrencyUtils.waitForCompletion(futures); 1906 1907 p = ldimn2 / nthreads; 1908 for (int l = 0; l < nthreads; l++) { 1909 final int firstRow = l * p; 1910 final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p; 1911 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1912 @Override 1913 public void run() { 1914 float[] temp = new float[2 * slices]; 1915 1916 for (int r = firstRow; r < lastRow; r++) { 1917 int idx3 = r * twoRowStride; 1918 for (int c = 0; c < columns; c++) { 1919 int idx1 = 2 * c; 1920 for (int s = 0; s < slices; s++) { 1921 int idx2 = 2 * s; 1922 int idx4 = s * twoSliceStride + idx3 + idx1; 1923 temp[idx2] = a[idx4]; 1924 temp[idx2 + 1] = a[idx4 + 1]; 1925 } 1926 fftSlices.complexForward(temp); 1927 for (int s = 0; s < slices; s++) { 1928 int idx2 = 2 * s; 1929 int idx4 = s * twoSliceStride + idx3 + idx1; 1930 a[idx4] = temp[idx2]; 1931 a[idx4 + 1] = temp[idx2 + 1]; 1932 } 1933 } 1934 } 1935 } 1936 }); 1937 } 1938 ConcurrencyUtils.waitForCompletion(futures); 1939 p = slices / nthreads; 1940 for (int l = 0; l < nthreads; l++) { 1941 final int firstSlice = l * p; 1942 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 1943 futures[l] = ConcurrencyUtils.submit(new Runnable() { 1944 @Override 1945 public void run() { 1946 1947 for (int s = firstSlice; s < lastSlice; s++) { 1948 int idx2 = (slices - s) % slices; 1949 int idx5 = idx2 * twoSliceStride; 1950 int idx6 = s * twoSliceStride; 1951 for (int r = 1; r < n2d2; r++) { 1952 int idx4 = rows - r; 1953 int idx7 = idx4 * twoRowStride; 1954 int idx8 = r * twoRowStride; 1955 int idx9 = idx5 + idx7; 1956 for (int c = 0; c < columns; c++) { 1957 int idx1 = 2 * c; 1958 int idx3 = twon3 - idx1; 1959 int idx10 = idx6 + idx8 + idx1; 1960 a[idx9 + idx3 % twon3] = a[idx10]; 1961 a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1]; 1962 } 1963 } 1964 } 1965 } 1966 }); 1967 } 1968 ConcurrencyUtils.waitForCompletion(futures); 1969 } else { 1970 1971 for (int s = slices - 1; s >= 0; s--) { 1972 int idx1 = s * sliceStride; 1973 int idx2 = s * twoSliceStride; 1974 for (int r = rows - 1; r >= 0; r--) { 1975 System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns); 1976 fftColumns.realForwardFull(temp); 1977 System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3); 1978 } 1979 } 1980 1981 temp = new float[2 * rows]; 1982 1983 for (int s = 0; s < slices; s++) { 1984 int idx1 = s * twoSliceStride; 1985 for (int c = 0; c < columns; c++) { 1986 int idx2 = 2 * c; 1987 for (int r = 0; r < rows; r++) { 1988 int idx4 = 2 * r; 1989 int idx3 = idx1 + r * twoRowStride + idx2; 1990 temp[idx4] = a[idx3]; 1991 temp[idx4 + 1] = a[idx3 + 1]; 1992 } 1993 fftRows.complexForward(temp); 1994 for (int r = 0; r < rows; r++) { 1995 int idx4 = 2 * r; 1996 int idx3 = idx1 + r * twoRowStride + idx2; 1997 a[idx3] = temp[idx4]; 1998 a[idx3 + 1] = temp[idx4 + 1]; 1999 } 2000 } 2001 } 2002 2003 temp = new float[2 * slices]; 2004 2005 for (int r = 0; r < ldimn2; r++) { 2006 int idx3 = r * twoRowStride; 2007 for (int c = 0; c < columns; c++) { 2008 int idx1 = 2 * c; 2009 for (int s = 0; s < slices; s++) { 2010 int idx2 = 2 * s; 2011 int idx4 = s * twoSliceStride + idx3 + idx1; 2012 temp[idx2] = a[idx4]; 2013 temp[idx2 + 1] = a[idx4 + 1]; 2014 } 2015 fftSlices.complexForward(temp); 2016 for (int s = 0; s < slices; s++) { 2017 int idx2 = 2 * s; 2018 int idx4 = s * twoSliceStride + idx3 + idx1; 2019 a[idx4] = temp[idx2]; 2020 a[idx4 + 1] = temp[idx2 + 1]; 2021 } 2022 } 2023 } 2024 2025 for (int s = 0; s < slices; s++) { 2026 int idx2 = (slices - s) % slices; 2027 int idx5 = idx2 * twoSliceStride; 2028 int idx6 = s * twoSliceStride; 2029 for (int r = 1; r < n2d2; r++) { 2030 int idx4 = rows - r; 2031 int idx7 = idx4 * twoRowStride; 2032 int idx8 = r * twoRowStride; 2033 int idx9 = idx5 + idx7; 2034 for (int c = 0; c < columns; c++) { 2035 int idx1 = 2 * c; 2036 int idx3 = twon3 - idx1; 2037 int idx10 = idx6 + idx8 + idx1; 2038 a[idx9 + idx3 % twon3] = a[idx10]; 2039 a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1]; 2040 } 2041 } 2042 } 2043 2044 } 2045 } 2046 2047 private void mixedRadixRealInverseFull(final float[] a, final boolean scale) { 2048 final int twon3 = 2 * columns; 2049 float[] temp = new float[twon3]; 2050 int ldimn2 = rows / 2 + 1; 2051 final int n2d2; 2052 if (rows % 2 == 0) { 2053 n2d2 = rows / 2; 2054 } else { 2055 n2d2 = (rows + 1) / 2; 2056 } 2057 2058 final int twoSliceStride = 2 * sliceStride; 2059 final int twoRowStride = 2 * rowStride; 2060 int n1d2 = slices / 2; 2061 2062 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 2063 if ((nthreads > 1) && useThreads && (n1d2 >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) { 2064 Future<?>[] futures = new Future[nthreads]; 2065 int p = n1d2 / nthreads; 2066 for (int l = 0; l < nthreads; l++) { 2067 final int firstSlice = slices - 1 - l * p; 2068 final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice - p; 2069 futures[l] = ConcurrencyUtils.submit(new Runnable() { 2070 @Override 2071 public void run() { 2072 float[] temp = new float[twon3]; 2073 for (int s = firstSlice; s >= lastSlice; s--) { 2074 int idx1 = s * sliceStride; 2075 int idx2 = s * twoSliceStride; 2076 for (int r = rows - 1; r >= 0; r--) { 2077 System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns); 2078 fftColumns.realInverseFull(temp, scale); 2079 System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3); 2080 } 2081 } 2082 } 2083 }); 2084 } 2085 ConcurrencyUtils.waitForCompletion(futures); 2086 2087 final float[][][] temp2 = new float[n1d2 + 1][rows][twon3]; 2088 2089 for (int l = 0; l < nthreads; l++) { 2090 final int firstSlice = l * p; 2091 final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p; 2092 futures[l] = ConcurrencyUtils.submit(new Runnable() { 2093 @Override 2094 public void run() { 2095 for (int s = firstSlice; s < lastSlice; s++) { 2096 int idx1 = s * sliceStride; 2097 for (int r = 0; r < rows; r++) { 2098 System.arraycopy(a, idx1 + r * rowStride, temp2[s][r], 0, columns); 2099 fftColumns.realInverseFull(temp2[s][r], scale); 2100 } 2101 } 2102 } 2103 }); 2104 } 2105 ConcurrencyUtils.waitForCompletion(futures); 2106 2107 for (int l = 0; l < nthreads; l++) { 2108 final int firstSlice = l * p; 2109 final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p; 2110 futures[l] = ConcurrencyUtils.submit(new Runnable() { 2111 @Override 2112 public void run() { 2113 for (int s = firstSlice; s < lastSlice; s++) { 2114 int idx1 = s * twoSliceStride; 2115 for (int r = 0; r < rows; r++) { 2116 System.arraycopy(temp2[s][r], 0, a, idx1 + r * twoRowStride, twon3); 2117 } 2118 } 2119 } 2120 }); 2121 } 2122 ConcurrencyUtils.waitForCompletion(futures); 2123 2124 p = slices / nthreads; 2125 for (int l = 0; l < nthreads; l++) { 2126 final int firstSlice = l * p; 2127 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 2128 futures[l] = ConcurrencyUtils.submit(new Runnable() { 2129 @Override 2130 public void run() { 2131 float[] temp = new float[2 * rows]; 2132 2133 for (int s = firstSlice; s < lastSlice; s++) { 2134 int idx1 = s * twoSliceStride; 2135 for (int c = 0; c < columns; c++) { 2136 int idx2 = 2 * c; 2137 for (int r = 0; r < rows; r++) { 2138 int idx3 = idx1 + r * twoRowStride + idx2; 2139 int idx4 = 2 * r; 2140 temp[idx4] = a[idx3]; 2141 temp[idx4 + 1] = a[idx3 + 1]; 2142 } 2143 fftRows.complexInverse(temp, scale); 2144 for (int r = 0; r < rows; r++) { 2145 int idx3 = idx1 + r * twoRowStride + idx2; 2146 int idx4 = 2 * r; 2147 a[idx3] = temp[idx4]; 2148 a[idx3 + 1] = temp[idx4 + 1]; 2149 } 2150 } 2151 } 2152 } 2153 }); 2154 } 2155 ConcurrencyUtils.waitForCompletion(futures); 2156 2157 p = ldimn2 / nthreads; 2158 for (int l = 0; l < nthreads; l++) { 2159 final int firstRow = l * p; 2160 final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p; 2161 futures[l] = ConcurrencyUtils.submit(new Runnable() { 2162 @Override 2163 public void run() { 2164 float[] temp = new float[2 * slices]; 2165 2166 for (int r = firstRow; r < lastRow; r++) { 2167 int idx3 = r * twoRowStride; 2168 for (int c = 0; c < columns; c++) { 2169 int idx1 = 2 * c; 2170 for (int s = 0; s < slices; s++) { 2171 int idx2 = 2 * s; 2172 int idx4 = s * twoSliceStride + idx3 + idx1; 2173 temp[idx2] = a[idx4]; 2174 temp[idx2 + 1] = a[idx4 + 1]; 2175 } 2176 fftSlices.complexInverse(temp, scale); 2177 for (int s = 0; s < slices; s++) { 2178 int idx2 = 2 * s; 2179 int idx4 = s * twoSliceStride + idx3 + idx1; 2180 a[idx4] = temp[idx2]; 2181 a[idx4 + 1] = temp[idx2 + 1]; 2182 } 2183 } 2184 } 2185 } 2186 }); 2187 } 2188 ConcurrencyUtils.waitForCompletion(futures); 2189 2190 p = slices / nthreads; 2191 for (int l = 0; l < nthreads; l++) { 2192 final int firstSlice = l * p; 2193 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 2194 futures[l] = ConcurrencyUtils.submit(new Runnable() { 2195 @Override 2196 public void run() { 2197 2198 for (int s = firstSlice; s < lastSlice; s++) { 2199 int idx2 = (slices - s) % slices; 2200 int idx5 = idx2 * twoSliceStride; 2201 int idx6 = s * twoSliceStride; 2202 for (int r = 1; r < n2d2; r++) { 2203 int idx4 = rows - r; 2204 int idx7 = idx4 * twoRowStride; 2205 int idx8 = r * twoRowStride; 2206 int idx9 = idx5 + idx7; 2207 for (int c = 0; c < columns; c++) { 2208 int idx1 = 2 * c; 2209 int idx3 = twon3 - idx1; 2210 int idx10 = idx6 + idx8 + idx1; 2211 a[idx9 + idx3 % twon3] = a[idx10]; 2212 a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1]; 2213 } 2214 } 2215 } 2216 } 2217 }); 2218 } 2219 ConcurrencyUtils.waitForCompletion(futures); 2220 } else { 2221 2222 for (int s = slices - 1; s >= 0; s--) { 2223 int idx1 = s * sliceStride; 2224 int idx2 = s * twoSliceStride; 2225 for (int r = rows - 1; r >= 0; r--) { 2226 System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns); 2227 fftColumns.realInverseFull(temp, scale); 2228 System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3); 2229 } 2230 } 2231 2232 temp = new float[2 * rows]; 2233 2234 for (int s = 0; s < slices; s++) { 2235 int idx1 = s * twoSliceStride; 2236 for (int c = 0; c < columns; c++) { 2237 int idx2 = 2 * c; 2238 for (int r = 0; r < rows; r++) { 2239 int idx4 = 2 * r; 2240 int idx3 = idx1 + r * twoRowStride + idx2; 2241 temp[idx4] = a[idx3]; 2242 temp[idx4 + 1] = a[idx3 + 1]; 2243 } 2244 fftRows.complexInverse(temp, scale); 2245 for (int r = 0; r < rows; r++) { 2246 int idx4 = 2 * r; 2247 int idx3 = idx1 + r * twoRowStride + idx2; 2248 a[idx3] = temp[idx4]; 2249 a[idx3 + 1] = temp[idx4 + 1]; 2250 } 2251 } 2252 } 2253 2254 temp = new float[2 * slices]; 2255 2256 for (int r = 0; r < ldimn2; r++) { 2257 int idx3 = r * twoRowStride; 2258 for (int c = 0; c < columns; c++) { 2259 int idx1 = 2 * c; 2260 for (int s = 0; s < slices; s++) { 2261 int idx2 = 2 * s; 2262 int idx4 = s * twoSliceStride + idx3 + idx1; 2263 temp[idx2] = a[idx4]; 2264 temp[idx2 + 1] = a[idx4 + 1]; 2265 } 2266 fftSlices.complexInverse(temp, scale); 2267 for (int s = 0; s < slices; s++) { 2268 int idx2 = 2 * s; 2269 int idx4 = s * twoSliceStride + idx3 + idx1; 2270 a[idx4] = temp[idx2]; 2271 a[idx4 + 1] = temp[idx2 + 1]; 2272 } 2273 } 2274 } 2275 2276 for (int s = 0; s < slices; s++) { 2277 int idx2 = (slices - s) % slices; 2278 int idx5 = idx2 * twoSliceStride; 2279 int idx6 = s * twoSliceStride; 2280 for (int r = 1; r < n2d2; r++) { 2281 int idx4 = rows - r; 2282 int idx7 = idx4 * twoRowStride; 2283 int idx8 = r * twoRowStride; 2284 int idx9 = idx5 + idx7; 2285 for (int c = 0; c < columns; c++) { 2286 int idx1 = 2 * c; 2287 int idx3 = twon3 - idx1; 2288 int idx10 = idx6 + idx8 + idx1; 2289 a[idx9 + idx3 % twon3] = a[idx10]; 2290 a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1]; 2291 } 2292 } 2293 } 2294 2295 } 2296 } 2297 2298 private void xdft3da_sub1(int icr, int isgn, float[] a, boolean scale) { 2299 int idx0, idx1, idx2, idx3, idx4, idx5; 2300 2301 if (isgn == -1) { 2302 for (int s = 0; s < slices; s++) { 2303 idx0 = s * sliceStride; 2304 if (icr == 0) { 2305 for (int r = 0; r < rows; r++) { 2306 fftColumns.complexForward(a, idx0 + r * rowStride); 2307 } 2308 } else { 2309 for (int r = 0; r < rows; r++) { 2310 fftColumns.realForward(a, idx0 + r * rowStride); 2311 } 2312 } 2313 if (columns > 4) { 2314 for (int c = 0; c < columns; c += 8) { 2315 for (int r = 0; r < rows; r++) { 2316 idx1 = idx0 + r * rowStride + c; 2317 idx2 = 2 * r; 2318 idx3 = 2 * rows + 2 * r; 2319 idx4 = idx3 + 2 * rows; 2320 idx5 = idx4 + 2 * rows; 2321 t[idx2] = a[idx1]; 2322 t[idx2 + 1] = a[idx1 + 1]; 2323 t[idx3] = a[idx1 + 2]; 2324 t[idx3 + 1] = a[idx1 + 3]; 2325 t[idx4] = a[idx1 + 4]; 2326 t[idx4 + 1] = a[idx1 + 5]; 2327 t[idx5] = a[idx1 + 6]; 2328 t[idx5 + 1] = a[idx1 + 7]; 2329 } 2330 fftRows.complexForward(t, 0); 2331 fftRows.complexForward(t, 2 * rows); 2332 fftRows.complexForward(t, 4 * rows); 2333 fftRows.complexForward(t, 6 * rows); 2334 for (int r = 0; r < rows; r++) { 2335 idx1 = idx0 + r * rowStride + c; 2336 idx2 = 2 * r; 2337 idx3 = 2 * rows + 2 * r; 2338 idx4 = idx3 + 2 * rows; 2339 idx5 = idx4 + 2 * rows; 2340 a[idx1] = t[idx2]; 2341 a[idx1 + 1] = t[idx2 + 1]; 2342 a[idx1 + 2] = t[idx3]; 2343 a[idx1 + 3] = t[idx3 + 1]; 2344 a[idx1 + 4] = t[idx4]; 2345 a[idx1 + 5] = t[idx4 + 1]; 2346 a[idx1 + 6] = t[idx5]; 2347 a[idx1 + 7] = t[idx5 + 1]; 2348 } 2349 } 2350 } else if (columns == 4) { 2351 for (int r = 0; r < rows; r++) { 2352 idx1 = idx0 + r * rowStride; 2353 idx2 = 2 * r; 2354 idx3 = 2 * rows + 2 * r; 2355 t[idx2] = a[idx1]; 2356 t[idx2 + 1] = a[idx1 + 1]; 2357 t[idx3] = a[idx1 + 2]; 2358 t[idx3 + 1] = a[idx1 + 3]; 2359 } 2360 fftRows.complexForward(t, 0); 2361 fftRows.complexForward(t, 2 * rows); 2362 for (int r = 0; r < rows; r++) { 2363 idx1 = idx0 + r * rowStride; 2364 idx2 = 2 * r; 2365 idx3 = 2 * rows + 2 * r; 2366 a[idx1] = t[idx2]; 2367 a[idx1 + 1] = t[idx2 + 1]; 2368 a[idx1 + 2] = t[idx3]; 2369 a[idx1 + 3] = t[idx3 + 1]; 2370 } 2371 } else if (columns == 2) { 2372 for (int r = 0; r < rows; r++) { 2373 idx1 = idx0 + r * rowStride; 2374 idx2 = 2 * r; 2375 t[idx2] = a[idx1]; 2376 t[idx2 + 1] = a[idx1 + 1]; 2377 } 2378 fftRows.complexForward(t, 0); 2379 for (int r = 0; r < rows; r++) { 2380 idx1 = idx0 + r * rowStride; 2381 idx2 = 2 * r; 2382 a[idx1] = t[idx2]; 2383 a[idx1 + 1] = t[idx2 + 1]; 2384 } 2385 } 2386 } 2387 } else { 2388 for (int s = 0; s < slices; s++) { 2389 idx0 = s * sliceStride; 2390 if (icr == 0) { 2391 for (int r = 0; r < rows; r++) { 2392 fftColumns.complexInverse(a, idx0 + r * rowStride, scale); 2393 } 2394 } 2395 if (columns > 4) { 2396 for (int c = 0; c < columns; c += 8) { 2397 for (int r = 0; r < rows; r++) { 2398 idx1 = idx0 + r * rowStride + c; 2399 idx2 = 2 * r; 2400 idx3 = 2 * rows + 2 * r; 2401 idx4 = idx3 + 2 * rows; 2402 idx5 = idx4 + 2 * rows; 2403 t[idx2] = a[idx1]; 2404 t[idx2 + 1] = a[idx1 + 1]; 2405 t[idx3] = a[idx1 + 2]; 2406 t[idx3 + 1] = a[idx1 + 3]; 2407 t[idx4] = a[idx1 + 4]; 2408 t[idx4 + 1] = a[idx1 + 5]; 2409 t[idx5] = a[idx1 + 6]; 2410 t[idx5 + 1] = a[idx1 + 7]; 2411 } 2412 fftRows.complexInverse(t, 0, scale); 2413 fftRows.complexInverse(t, 2 * rows, scale); 2414 fftRows.complexInverse(t, 4 * rows, scale); 2415 fftRows.complexInverse(t, 6 * rows, scale); 2416 for (int r = 0; r < rows; r++) { 2417 idx1 = idx0 + r * rowStride + c; 2418 idx2 = 2 * r; 2419 idx3 = 2 * rows + 2 * r; 2420 idx4 = idx3 + 2 * rows; 2421 idx5 = idx4 + 2 * rows; 2422 a[idx1] = t[idx2]; 2423 a[idx1 + 1] = t[idx2 + 1]; 2424 a[idx1 + 2] = t[idx3]; 2425 a[idx1 + 3] = t[idx3 + 1]; 2426 a[idx1 + 4] = t[idx4]; 2427 a[idx1 + 5] = t[idx4 + 1]; 2428 a[idx1 + 6] = t[idx5]; 2429 a[idx1 + 7] = t[idx5 + 1]; 2430 } 2431 } 2432 } else if (columns == 4) { 2433 for (int r = 0; r < rows; r++) { 2434 idx1 = idx0 + r * rowStride; 2435 idx2 = 2 * r; 2436 idx3 = 2 * rows + 2 * r; 2437 t[idx2] = a[idx1]; 2438 t[idx2 + 1] = a[idx1 + 1]; 2439 t[idx3] = a[idx1 + 2]; 2440 t[idx3 + 1] = a[idx1 + 3]; 2441 } 2442 fftRows.complexInverse(t, 0, scale); 2443 fftRows.complexInverse(t, 2 * rows, scale); 2444 for (int r = 0; r < rows; r++) { 2445 idx1 = idx0 + r * rowStride; 2446 idx2 = 2 * r; 2447 idx3 = 2 * rows + 2 * r; 2448 a[idx1] = t[idx2]; 2449 a[idx1 + 1] = t[idx2 + 1]; 2450 a[idx1 + 2] = t[idx3]; 2451 a[idx1 + 3] = t[idx3 + 1]; 2452 } 2453 } else if (columns == 2) { 2454 for (int r = 0; r < rows; r++) { 2455 idx1 = idx0 + r * rowStride; 2456 idx2 = 2 * r; 2457 t[idx2] = a[idx1]; 2458 t[idx2 + 1] = a[idx1 + 1]; 2459 } 2460 fftRows.complexInverse(t, 0, scale); 2461 for (int r = 0; r < rows; r++) { 2462 idx1 = idx0 + r * rowStride; 2463 idx2 = 2 * r; 2464 a[idx1] = t[idx2]; 2465 a[idx1 + 1] = t[idx2 + 1]; 2466 } 2467 } 2468 if (icr != 0) { 2469 for (int r = 0; r < rows; r++) { 2470 fftColumns.realInverse(a, idx0 + r * rowStride, scale); 2471 } 2472 } 2473 } 2474 } 2475 } 2476 2477 private void xdft3da_sub2(int icr, int isgn, float[] a, boolean scale) { 2478 int idx0, idx1, idx2, idx3, idx4, idx5; 2479 2480 if (isgn == -1) { 2481 for (int s = 0; s < slices; s++) { 2482 idx0 = s * sliceStride; 2483 if (icr == 0) { 2484 for (int r = 0; r < rows; r++) { 2485 fftColumns.complexForward(a, idx0 + r * rowStride); 2486 } 2487 } else { 2488 for (int r = 0; r < rows; r++) { 2489 fftColumns.realForward(a, idx0 + r * rowStride); 2490 } 2491 } 2492 if (columns > 4) { 2493 for (int c = 0; c < columns; c += 8) { 2494 for (int r = 0; r < rows; r++) { 2495 idx1 = idx0 + r * rowStride + c; 2496 idx2 = 2 * r; 2497 idx3 = 2 * rows + 2 * r; 2498 idx4 = idx3 + 2 * rows; 2499 idx5 = idx4 + 2 * rows; 2500 t[idx2] = a[idx1]; 2501 t[idx2 + 1] = a[idx1 + 1]; 2502 t[idx3] = a[idx1 + 2]; 2503 t[idx3 + 1] = a[idx1 + 3]; 2504 t[idx4] = a[idx1 + 4]; 2505 t[idx4 + 1] = a[idx1 + 5]; 2506 t[idx5] = a[idx1 + 6]; 2507 t[idx5 + 1] = a[idx1 + 7]; 2508 } 2509 fftRows.complexForward(t, 0); 2510 fftRows.complexForward(t, 2 * rows); 2511 fftRows.complexForward(t, 4 * rows); 2512 fftRows.complexForward(t, 6 * rows); 2513 for (int r = 0; r < rows; r++) { 2514 idx1 = idx0 + r * rowStride + c; 2515 idx2 = 2 * r; 2516 idx3 = 2 * rows + 2 * r; 2517 idx4 = idx3 + 2 * rows; 2518 idx5 = idx4 + 2 * rows; 2519 a[idx1] = t[idx2]; 2520 a[idx1 + 1] = t[idx2 + 1]; 2521 a[idx1 + 2] = t[idx3]; 2522 a[idx1 + 3] = t[idx3 + 1]; 2523 a[idx1 + 4] = t[idx4]; 2524 a[idx1 + 5] = t[idx4 + 1]; 2525 a[idx1 + 6] = t[idx5]; 2526 a[idx1 + 7] = t[idx5 + 1]; 2527 } 2528 } 2529 } else if (columns == 4) { 2530 for (int r = 0; r < rows; r++) { 2531 idx1 = idx0 + r * rowStride; 2532 idx2 = 2 * r; 2533 idx3 = 2 * rows + 2 * r; 2534 t[idx2] = a[idx1]; 2535 t[idx2 + 1] = a[idx1 + 1]; 2536 t[idx3] = a[idx1 + 2]; 2537 t[idx3 + 1] = a[idx1 + 3]; 2538 } 2539 fftRows.complexForward(t, 0); 2540 fftRows.complexForward(t, 2 * rows); 2541 for (int r = 0; r < rows; r++) { 2542 idx1 = idx0 + r * rowStride; 2543 idx2 = 2 * r; 2544 idx3 = 2 * rows + 2 * r; 2545 a[idx1] = t[idx2]; 2546 a[idx1 + 1] = t[idx2 + 1]; 2547 a[idx1 + 2] = t[idx3]; 2548 a[idx1 + 3] = t[idx3 + 1]; 2549 } 2550 } else if (columns == 2) { 2551 for (int r = 0; r < rows; r++) { 2552 idx1 = idx0 + r * rowStride; 2553 idx2 = 2 * r; 2554 t[idx2] = a[idx1]; 2555 t[idx2 + 1] = a[idx1 + 1]; 2556 } 2557 fftRows.complexForward(t, 0); 2558 for (int r = 0; r < rows; r++) { 2559 idx1 = idx0 + r * rowStride; 2560 idx2 = 2 * r; 2561 a[idx1] = t[idx2]; 2562 a[idx1 + 1] = t[idx2 + 1]; 2563 } 2564 } 2565 } 2566 } else { 2567 for (int s = 0; s < slices; s++) { 2568 idx0 = s * sliceStride; 2569 if (icr == 0) { 2570 for (int r = 0; r < rows; r++) { 2571 fftColumns.complexInverse(a, idx0 + r * rowStride, scale); 2572 } 2573 } else { 2574 for (int r = 0; r < rows; r++) { 2575 fftColumns.realInverse2(a, idx0 + r * rowStride, scale); 2576 } 2577 } 2578 if (columns > 4) { 2579 for (int c = 0; c < columns; c += 8) { 2580 for (int r = 0; r < rows; r++) { 2581 idx1 = idx0 + r * rowStride + c; 2582 idx2 = 2 * r; 2583 idx3 = 2 * rows + 2 * r; 2584 idx4 = idx3 + 2 * rows; 2585 idx5 = idx4 + 2 * rows; 2586 t[idx2] = a[idx1]; 2587 t[idx2 + 1] = a[idx1 + 1]; 2588 t[idx3] = a[idx1 + 2]; 2589 t[idx3 + 1] = a[idx1 + 3]; 2590 t[idx4] = a[idx1 + 4]; 2591 t[idx4 + 1] = a[idx1 + 5]; 2592 t[idx5] = a[idx1 + 6]; 2593 t[idx5 + 1] = a[idx1 + 7]; 2594 } 2595 fftRows.complexInverse(t, 0, scale); 2596 fftRows.complexInverse(t, 2 * rows, scale); 2597 fftRows.complexInverse(t, 4 * rows, scale); 2598 fftRows.complexInverse(t, 6 * rows, scale); 2599 for (int r = 0; r < rows; r++) { 2600 idx1 = idx0 + r * rowStride + c; 2601 idx2 = 2 * r; 2602 idx3 = 2 * rows + 2 * r; 2603 idx4 = idx3 + 2 * rows; 2604 idx5 = idx4 + 2 * rows; 2605 a[idx1] = t[idx2]; 2606 a[idx1 + 1] = t[idx2 + 1]; 2607 a[idx1 + 2] = t[idx3]; 2608 a[idx1 + 3] = t[idx3 + 1]; 2609 a[idx1 + 4] = t[idx4]; 2610 a[idx1 + 5] = t[idx4 + 1]; 2611 a[idx1 + 6] = t[idx5]; 2612 a[idx1 + 7] = t[idx5 + 1]; 2613 } 2614 } 2615 } else if (columns == 4) { 2616 for (int r = 0; r < rows; r++) { 2617 idx1 = idx0 + r * rowStride; 2618 idx2 = 2 * r; 2619 idx3 = 2 * rows + 2 * r; 2620 t[idx2] = a[idx1]; 2621 t[idx2 + 1] = a[idx1 + 1]; 2622 t[idx3] = a[idx1 + 2]; 2623 t[idx3 + 1] = a[idx1 + 3]; 2624 } 2625 fftRows.complexInverse(t, 0, scale); 2626 fftRows.complexInverse(t, 2 * rows, scale); 2627 for (int r = 0; r < rows; r++) { 2628 idx1 = idx0 + r * rowStride; 2629 idx2 = 2 * r; 2630 idx3 = 2 * rows + 2 * r; 2631 a[idx1] = t[idx2]; 2632 a[idx1 + 1] = t[idx2 + 1]; 2633 a[idx1 + 2] = t[idx3]; 2634 a[idx1 + 3] = t[idx3 + 1]; 2635 } 2636 } else if (columns == 2) { 2637 for (int r = 0; r < rows; r++) { 2638 idx1 = idx0 + r * rowStride; 2639 idx2 = 2 * r; 2640 t[idx2] = a[idx1]; 2641 t[idx2 + 1] = a[idx1 + 1]; 2642 } 2643 fftRows.complexInverse(t, 0, scale); 2644 for (int r = 0; r < rows; r++) { 2645 idx1 = idx0 + r * rowStride; 2646 idx2 = 2 * r; 2647 a[idx1] = t[idx2]; 2648 a[idx1 + 1] = t[idx2 + 1]; 2649 } 2650 } 2651 } 2652 } 2653 } 2654 2655 private void xdft3da_sub1(int icr, int isgn, float[][][] a, boolean scale) { 2656 int idx2, idx3, idx4, idx5; 2657 2658 if (isgn == -1) { 2659 for (int s = 0; s < slices; s++) { 2660 if (icr == 0) { 2661 for (int r = 0; r < rows; r++) { 2662 fftColumns.complexForward(a[s][r]); 2663 } 2664 } else { 2665 for (int r = 0; r < rows; r++) { 2666 fftColumns.realForward(a[s][r], 0); 2667 } 2668 } 2669 if (columns > 4) { 2670 for (int c = 0; c < columns; c += 8) { 2671 for (int r = 0; r < rows; r++) { 2672 idx2 = 2 * r; 2673 idx3 = 2 * rows + 2 * r; 2674 idx4 = idx3 + 2 * rows; 2675 idx5 = idx4 + 2 * rows; 2676 t[idx2] = a[s][r][c]; 2677 t[idx2 + 1] = a[s][r][c + 1]; 2678 t[idx3] = a[s][r][c + 2]; 2679 t[idx3 + 1] = a[s][r][c + 3]; 2680 t[idx4] = a[s][r][c + 4]; 2681 t[idx4 + 1] = a[s][r][c + 5]; 2682 t[idx5] = a[s][r][c + 6]; 2683 t[idx5 + 1] = a[s][r][c + 7]; 2684 } 2685 fftRows.complexForward(t, 0); 2686 fftRows.complexForward(t, 2 * rows); 2687 fftRows.complexForward(t, 4 * rows); 2688 fftRows.complexForward(t, 6 * rows); 2689 for (int r = 0; r < rows; r++) { 2690 idx2 = 2 * r; 2691 idx3 = 2 * rows + 2 * r; 2692 idx4 = idx3 + 2 * rows; 2693 idx5 = idx4 + 2 * rows; 2694 a[s][r][c] = t[idx2]; 2695 a[s][r][c + 1] = t[idx2 + 1]; 2696 a[s][r][c + 2] = t[idx3]; 2697 a[s][r][c + 3] = t[idx3 + 1]; 2698 a[s][r][c + 4] = t[idx4]; 2699 a[s][r][c + 5] = t[idx4 + 1]; 2700 a[s][r][c + 6] = t[idx5]; 2701 a[s][r][c + 7] = t[idx5 + 1]; 2702 } 2703 } 2704 } else if (columns == 4) { 2705 for (int r = 0; r < rows; r++) { 2706 idx2 = 2 * r; 2707 idx3 = 2 * rows + 2 * r; 2708 t[idx2] = a[s][r][0]; 2709 t[idx2 + 1] = a[s][r][1]; 2710 t[idx3] = a[s][r][2]; 2711 t[idx3 + 1] = a[s][r][3]; 2712 } 2713 fftRows.complexForward(t, 0); 2714 fftRows.complexForward(t, 2 * rows); 2715 for (int r = 0; r < rows; r++) { 2716 idx2 = 2 * r; 2717 idx3 = 2 * rows + 2 * r; 2718 a[s][r][0] = t[idx2]; 2719 a[s][r][1] = t[idx2 + 1]; 2720 a[s][r][2] = t[idx3]; 2721 a[s][r][3] = t[idx3 + 1]; 2722 } 2723 } else if (columns == 2) { 2724 for (int r = 0; r < rows; r++) { 2725 idx2 = 2 * r; 2726 t[idx2] = a[s][r][0]; 2727 t[idx2 + 1] = a[s][r][1]; 2728 } 2729 fftRows.complexForward(t, 0); 2730 for (int r = 0; r < rows; r++) { 2731 idx2 = 2 * r; 2732 a[s][r][0] = t[idx2]; 2733 a[s][r][1] = t[idx2 + 1]; 2734 } 2735 } 2736 } 2737 } else { 2738 for (int s = 0; s < slices; s++) { 2739 if (icr == 0) { 2740 for (int r = 0; r < rows; r++) { 2741 fftColumns.complexInverse(a[s][r], scale); 2742 } 2743 } 2744 if (columns > 4) { 2745 for (int c = 0; c < columns; c += 8) { 2746 for (int r = 0; r < rows; r++) { 2747 idx2 = 2 * r; 2748 idx3 = 2 * rows + 2 * r; 2749 idx4 = idx3 + 2 * rows; 2750 idx5 = idx4 + 2 * rows; 2751 t[idx2] = a[s][r][c]; 2752 t[idx2 + 1] = a[s][r][c + 1]; 2753 t[idx3] = a[s][r][c + 2]; 2754 t[idx3 + 1] = a[s][r][c + 3]; 2755 t[idx4] = a[s][r][c + 4]; 2756 t[idx4 + 1] = a[s][r][c + 5]; 2757 t[idx5] = a[s][r][c + 6]; 2758 t[idx5 + 1] = a[s][r][c + 7]; 2759 } 2760 fftRows.complexInverse(t, 0, scale); 2761 fftRows.complexInverse(t, 2 * rows, scale); 2762 fftRows.complexInverse(t, 4 * rows, scale); 2763 fftRows.complexInverse(t, 6 * rows, scale); 2764 for (int r = 0; r < rows; r++) { 2765 idx2 = 2 * r; 2766 idx3 = 2 * rows + 2 * r; 2767 idx4 = idx3 + 2 * rows; 2768 idx5 = idx4 + 2 * rows; 2769 a[s][r][c] = t[idx2]; 2770 a[s][r][c + 1] = t[idx2 + 1]; 2771 a[s][r][c + 2] = t[idx3]; 2772 a[s][r][c + 3] = t[idx3 + 1]; 2773 a[s][r][c + 4] = t[idx4]; 2774 a[s][r][c + 5] = t[idx4 + 1]; 2775 a[s][r][c + 6] = t[idx5]; 2776 a[s][r][c + 7] = t[idx5 + 1]; 2777 } 2778 } 2779 } else if (columns == 4) { 2780 for (int r = 0; r < rows; r++) { 2781 idx2 = 2 * r; 2782 idx3 = 2 * rows + 2 * r; 2783 t[idx2] = a[s][r][0]; 2784 t[idx2 + 1] = a[s][r][1]; 2785 t[idx3] = a[s][r][2]; 2786 t[idx3 + 1] = a[s][r][3]; 2787 } 2788 fftRows.complexInverse(t, 0, scale); 2789 fftRows.complexInverse(t, 2 * rows, scale); 2790 for (int r = 0; r < rows; r++) { 2791 idx2 = 2 * r; 2792 idx3 = 2 * rows + 2 * r; 2793 a[s][r][0] = t[idx2]; 2794 a[s][r][1] = t[idx2 + 1]; 2795 a[s][r][2] = t[idx3]; 2796 a[s][r][3] = t[idx3 + 1]; 2797 } 2798 } else if (columns == 2) { 2799 for (int r = 0; r < rows; r++) { 2800 idx2 = 2 * r; 2801 t[idx2] = a[s][r][0]; 2802 t[idx2 + 1] = a[s][r][1]; 2803 } 2804 fftRows.complexInverse(t, 0, scale); 2805 for (int r = 0; r < rows; r++) { 2806 idx2 = 2 * r; 2807 a[s][r][0] = t[idx2]; 2808 a[s][r][1] = t[idx2 + 1]; 2809 } 2810 } 2811 if (icr != 0) { 2812 for (int r = 0; r < rows; r++) { 2813 fftColumns.realInverse(a[s][r], 0, scale); 2814 } 2815 } 2816 } 2817 } 2818 } 2819 2820 private void xdft3da_sub2(int icr, int isgn, float[][][] a, boolean scale) { 2821 int idx2, idx3, idx4, idx5; 2822 2823 if (isgn == -1) { 2824 for (int s = 0; s < slices; s++) { 2825 if (icr == 0) { 2826 for (int r = 0; r < rows; r++) { 2827 fftColumns.complexForward(a[s][r]); 2828 } 2829 } else { 2830 for (int r = 0; r < rows; r++) { 2831 fftColumns.realForward(a[s][r]); 2832 } 2833 } 2834 if (columns > 4) { 2835 for (int c = 0; c < columns; c += 8) { 2836 for (int r = 0; r < rows; r++) { 2837 idx2 = 2 * r; 2838 idx3 = 2 * rows + 2 * r; 2839 idx4 = idx3 + 2 * rows; 2840 idx5 = idx4 + 2 * rows; 2841 t[idx2] = a[s][r][c]; 2842 t[idx2 + 1] = a[s][r][c + 1]; 2843 t[idx3] = a[s][r][c + 2]; 2844 t[idx3 + 1] = a[s][r][c + 3]; 2845 t[idx4] = a[s][r][c + 4]; 2846 t[idx4 + 1] = a[s][r][c + 5]; 2847 t[idx5] = a[s][r][c + 6]; 2848 t[idx5 + 1] = a[s][r][c + 7]; 2849 } 2850 fftRows.complexForward(t, 0); 2851 fftRows.complexForward(t, 2 * rows); 2852 fftRows.complexForward(t, 4 * rows); 2853 fftRows.complexForward(t, 6 * rows); 2854 for (int r = 0; r < rows; r++) { 2855 idx2 = 2 * r; 2856 idx3 = 2 * rows + 2 * r; 2857 idx4 = idx3 + 2 * rows; 2858 idx5 = idx4 + 2 * rows; 2859 a[s][r][c] = t[idx2]; 2860 a[s][r][c + 1] = t[idx2 + 1]; 2861 a[s][r][c + 2] = t[idx3]; 2862 a[s][r][c + 3] = t[idx3 + 1]; 2863 a[s][r][c + 4] = t[idx4]; 2864 a[s][r][c + 5] = t[idx4 + 1]; 2865 a[s][r][c + 6] = t[idx5]; 2866 a[s][r][c + 7] = t[idx5 + 1]; 2867 } 2868 } 2869 } else if (columns == 4) { 2870 for (int r = 0; r < rows; r++) { 2871 idx2 = 2 * r; 2872 idx3 = 2 * rows + 2 * r; 2873 t[idx2] = a[s][r][0]; 2874 t[idx2 + 1] = a[s][r][1]; 2875 t[idx3] = a[s][r][2]; 2876 t[idx3 + 1] = a[s][r][3]; 2877 } 2878 fftRows.complexForward(t, 0); 2879 fftRows.complexForward(t, 2 * rows); 2880 for (int r = 0; r < rows; r++) { 2881 idx2 = 2 * r; 2882 idx3 = 2 * rows + 2 * r; 2883 a[s][r][0] = t[idx2]; 2884 a[s][r][1] = t[idx2 + 1]; 2885 a[s][r][2] = t[idx3]; 2886 a[s][r][3] = t[idx3 + 1]; 2887 } 2888 } else if (columns == 2) { 2889 for (int r = 0; r < rows; r++) { 2890 idx2 = 2 * r; 2891 t[idx2] = a[s][r][0]; 2892 t[idx2 + 1] = a[s][r][1]; 2893 } 2894 fftRows.complexForward(t, 0); 2895 for (int r = 0; r < rows; r++) { 2896 idx2 = 2 * r; 2897 a[s][r][0] = t[idx2]; 2898 a[s][r][1] = t[idx2 + 1]; 2899 } 2900 } 2901 } 2902 } else { 2903 for (int s = 0; s < slices; s++) { 2904 if (icr == 0) { 2905 for (int r = 0; r < rows; r++) { 2906 fftColumns.complexInverse(a[s][r], scale); 2907 } 2908 } else { 2909 for (int r = 0; r < rows; r++) { 2910 fftColumns.realInverse2(a[s][r], 0, scale); 2911 } 2912 } 2913 if (columns > 4) { 2914 for (int c = 0; c < columns; c += 8) { 2915 for (int r = 0; r < rows; r++) { 2916 idx2 = 2 * r; 2917 idx3 = 2 * rows + 2 * r; 2918 idx4 = idx3 + 2 * rows; 2919 idx5 = idx4 + 2 * rows; 2920 t[idx2] = a[s][r][c]; 2921 t[idx2 + 1] = a[s][r][c + 1]; 2922 t[idx3] = a[s][r][c + 2]; 2923 t[idx3 + 1] = a[s][r][c + 3]; 2924 t[idx4] = a[s][r][c + 4]; 2925 t[idx4 + 1] = a[s][r][c + 5]; 2926 t[idx5] = a[s][r][c + 6]; 2927 t[idx5 + 1] = a[s][r][c + 7]; 2928 } 2929 fftRows.complexInverse(t, 0, scale); 2930 fftRows.complexInverse(t, 2 * rows, scale); 2931 fftRows.complexInverse(t, 4 * rows, scale); 2932 fftRows.complexInverse(t, 6 * rows, scale); 2933 for (int r = 0; r < rows; r++) { 2934 idx2 = 2 * r; 2935 idx3 = 2 * rows + 2 * r; 2936 idx4 = idx3 + 2 * rows; 2937 idx5 = idx4 + 2 * rows; 2938 a[s][r][c] = t[idx2]; 2939 a[s][r][c + 1] = t[idx2 + 1]; 2940 a[s][r][c + 2] = t[idx3]; 2941 a[s][r][c + 3] = t[idx3 + 1]; 2942 a[s][r][c + 4] = t[idx4]; 2943 a[s][r][c + 5] = t[idx4 + 1]; 2944 a[s][r][c + 6] = t[idx5]; 2945 a[s][r][c + 7] = t[idx5 + 1]; 2946 } 2947 } 2948 } else if (columns == 4) { 2949 for (int r = 0; r < rows; r++) { 2950 idx2 = 2 * r; 2951 idx3 = 2 * rows + 2 * r; 2952 t[idx2] = a[s][r][0]; 2953 t[idx2 + 1] = a[s][r][1]; 2954 t[idx3] = a[s][r][2]; 2955 t[idx3 + 1] = a[s][r][3]; 2956 } 2957 fftRows.complexInverse(t, 0, scale); 2958 fftRows.complexInverse(t, 2 * rows, scale); 2959 for (int r = 0; r < rows; r++) { 2960 idx2 = 2 * r; 2961 idx3 = 2 * rows + 2 * r; 2962 a[s][r][0] = t[idx2]; 2963 a[s][r][1] = t[idx2 + 1]; 2964 a[s][r][2] = t[idx3]; 2965 a[s][r][3] = t[idx3 + 1]; 2966 } 2967 } else if (columns == 2) { 2968 for (int r = 0; r < rows; r++) { 2969 idx2 = 2 * r; 2970 t[idx2] = a[s][r][0]; 2971 t[idx2 + 1] = a[s][r][1]; 2972 } 2973 fftRows.complexInverse(t, 0, scale); 2974 for (int r = 0; r < rows; r++) { 2975 idx2 = 2 * r; 2976 a[s][r][0] = t[idx2]; 2977 a[s][r][1] = t[idx2 + 1]; 2978 } 2979 } 2980 } 2981 } 2982 } 2983 2984 private void cdft3db_sub(int isgn, float[] a, boolean scale) { 2985 int idx0, idx1, idx2, idx3, idx4, idx5; 2986 2987 if (isgn == -1) { 2988 if (columns > 4) { 2989 for (int r = 0; r < rows; r++) { 2990 idx0 = r * rowStride; 2991 for (int c = 0; c < columns; c += 8) { 2992 for (int s = 0; s < slices; s++) { 2993 idx1 = s * sliceStride + idx0 + c; 2994 idx2 = 2 * s; 2995 idx3 = 2 * slices + 2 * s; 2996 idx4 = idx3 + 2 * slices; 2997 idx5 = idx4 + 2 * slices; 2998 t[idx2] = a[idx1]; 2999 t[idx2 + 1] = a[idx1 + 1]; 3000 t[idx3] = a[idx1 + 2]; 3001 t[idx3 + 1] = a[idx1 + 3]; 3002 t[idx4] = a[idx1 + 4]; 3003 t[idx4 + 1] = a[idx1 + 5]; 3004 t[idx5] = a[idx1 + 6]; 3005 t[idx5 + 1] = a[idx1 + 7]; 3006 } 3007 fftSlices.complexForward(t, 0); 3008 fftSlices.complexForward(t, 2 * slices); 3009 fftSlices.complexForward(t, 4 * slices); 3010 fftSlices.complexForward(t, 6 * slices); 3011 for (int s = 0; s < slices; s++) { 3012 idx1 = s * sliceStride + idx0 + c; 3013 idx2 = 2 * s; 3014 idx3 = 2 * slices + 2 * s; 3015 idx4 = idx3 + 2 * slices; 3016 idx5 = idx4 + 2 * slices; 3017 a[idx1] = t[idx2]; 3018 a[idx1 + 1] = t[idx2 + 1]; 3019 a[idx1 + 2] = t[idx3]; 3020 a[idx1 + 3] = t[idx3 + 1]; 3021 a[idx1 + 4] = t[idx4]; 3022 a[idx1 + 5] = t[idx4 + 1]; 3023 a[idx1 + 6] = t[idx5]; 3024 a[idx1 + 7] = t[idx5 + 1]; 3025 } 3026 } 3027 } 3028 } else if (columns == 4) { 3029 for (int r = 0; r < rows; r++) { 3030 idx0 = r * rowStride; 3031 for (int s = 0; s < slices; s++) { 3032 idx1 = s * sliceStride + idx0; 3033 idx2 = 2 * s; 3034 idx3 = 2 * slices + 2 * s; 3035 t[idx2] = a[idx1]; 3036 t[idx2 + 1] = a[idx1 + 1]; 3037 t[idx3] = a[idx1 + 2]; 3038 t[idx3 + 1] = a[idx1 + 3]; 3039 } 3040 fftSlices.complexForward(t, 0); 3041 fftSlices.complexForward(t, 2 * slices); 3042 for (int s = 0; s < slices; s++) { 3043 idx1 = s * sliceStride + idx0; 3044 idx2 = 2 * s; 3045 idx3 = 2 * slices + 2 * s; 3046 a[idx1] = t[idx2]; 3047 a[idx1 + 1] = t[idx2 + 1]; 3048 a[idx1 + 2] = t[idx3]; 3049 a[idx1 + 3] = t[idx3 + 1]; 3050 } 3051 } 3052 } else if (columns == 2) { 3053 for (int r = 0; r < rows; r++) { 3054 idx0 = r * rowStride; 3055 for (int s = 0; s < slices; s++) { 3056 idx1 = s * sliceStride + idx0; 3057 idx2 = 2 * s; 3058 t[idx2] = a[idx1]; 3059 t[idx2 + 1] = a[idx1 + 1]; 3060 } 3061 fftSlices.complexForward(t, 0); 3062 for (int s = 0; s < slices; s++) { 3063 idx1 = s * sliceStride + idx0; 3064 idx2 = 2 * s; 3065 a[idx1] = t[idx2]; 3066 a[idx1 + 1] = t[idx2 + 1]; 3067 } 3068 } 3069 } 3070 } else { 3071 if (columns > 4) { 3072 for (int r = 0; r < rows; r++) { 3073 idx0 = r * rowStride; 3074 for (int c = 0; c < columns; c += 8) { 3075 for (int s = 0; s < slices; s++) { 3076 idx1 = s * sliceStride + idx0 + c; 3077 idx2 = 2 * s; 3078 idx3 = 2 * slices + 2 * s; 3079 idx4 = idx3 + 2 * slices; 3080 idx5 = idx4 + 2 * slices; 3081 t[idx2] = a[idx1]; 3082 t[idx2 + 1] = a[idx1 + 1]; 3083 t[idx3] = a[idx1 + 2]; 3084 t[idx3 + 1] = a[idx1 + 3]; 3085 t[idx4] = a[idx1 + 4]; 3086 t[idx4 + 1] = a[idx1 + 5]; 3087 t[idx5] = a[idx1 + 6]; 3088 t[idx5 + 1] = a[idx1 + 7]; 3089 } 3090 fftSlices.complexInverse(t, 0, scale); 3091 fftSlices.complexInverse(t, 2 * slices, scale); 3092 fftSlices.complexInverse(t, 4 * slices, scale); 3093 fftSlices.complexInverse(t, 6 * slices, scale); 3094 for (int s = 0; s < slices; s++) { 3095 idx1 = s * sliceStride + idx0 + c; 3096 idx2 = 2 * s; 3097 idx3 = 2 * slices + 2 * s; 3098 idx4 = idx3 + 2 * slices; 3099 idx5 = idx4 + 2 * slices; 3100 a[idx1] = t[idx2]; 3101 a[idx1 + 1] = t[idx2 + 1]; 3102 a[idx1 + 2] = t[idx3]; 3103 a[idx1 + 3] = t[idx3 + 1]; 3104 a[idx1 + 4] = t[idx4]; 3105 a[idx1 + 5] = t[idx4 + 1]; 3106 a[idx1 + 6] = t[idx5]; 3107 a[idx1 + 7] = t[idx5 + 1]; 3108 } 3109 } 3110 } 3111 } else if (columns == 4) { 3112 for (int r = 0; r < rows; r++) { 3113 idx0 = r * rowStride; 3114 for (int s = 0; s < slices; s++) { 3115 idx1 = s * sliceStride + idx0; 3116 idx2 = 2 * s; 3117 idx3 = 2 * slices + 2 * s; 3118 t[idx2] = a[idx1]; 3119 t[idx2 + 1] = a[idx1 + 1]; 3120 t[idx3] = a[idx1 + 2]; 3121 t[idx3 + 1] = a[idx1 + 3]; 3122 } 3123 fftSlices.complexInverse(t, 0, scale); 3124 fftSlices.complexInverse(t, 2 * slices, scale); 3125 for (int s = 0; s < slices; s++) { 3126 idx1 = s * sliceStride + idx0; 3127 idx2 = 2 * s; 3128 idx3 = 2 * slices + 2 * s; 3129 a[idx1] = t[idx2]; 3130 a[idx1 + 1] = t[idx2 + 1]; 3131 a[idx1 + 2] = t[idx3]; 3132 a[idx1 + 3] = t[idx3 + 1]; 3133 } 3134 } 3135 } else if (columns == 2) { 3136 for (int r = 0; r < rows; r++) { 3137 idx0 = r * rowStride; 3138 for (int s = 0; s < slices; s++) { 3139 idx1 = s * sliceStride + idx0; 3140 idx2 = 2 * s; 3141 t[idx2] = a[idx1]; 3142 t[idx2 + 1] = a[idx1 + 1]; 3143 } 3144 fftSlices.complexInverse(t, 0, scale); 3145 for (int s = 0; s < slices; s++) { 3146 idx1 = s * sliceStride + idx0; 3147 idx2 = 2 * s; 3148 a[idx1] = t[idx2]; 3149 a[idx1 + 1] = t[idx2 + 1]; 3150 } 3151 } 3152 } 3153 } 3154 } 3155 3156 private void cdft3db_sub(int isgn, float[][][] a, boolean scale) { 3157 int idx2, idx3, idx4, idx5; 3158 3159 if (isgn == -1) { 3160 if (columns > 4) { 3161 for (int r = 0; r < rows; r++) { 3162 for (int c = 0; c < columns; c += 8) { 3163 for (int s = 0; s < slices; s++) { 3164 idx2 = 2 * s; 3165 idx3 = 2 * slices + 2 * s; 3166 idx4 = idx3 + 2 * slices; 3167 idx5 = idx4 + 2 * slices; 3168 t[idx2] = a[s][r][c]; 3169 t[idx2 + 1] = a[s][r][c + 1]; 3170 t[idx3] = a[s][r][c + 2]; 3171 t[idx3 + 1] = a[s][r][c + 3]; 3172 t[idx4] = a[s][r][c + 4]; 3173 t[idx4 + 1] = a[s][r][c + 5]; 3174 t[idx5] = a[s][r][c + 6]; 3175 t[idx5 + 1] = a[s][r][c + 7]; 3176 } 3177 fftSlices.complexForward(t, 0); 3178 fftSlices.complexForward(t, 2 * slices); 3179 fftSlices.complexForward(t, 4 * slices); 3180 fftSlices.complexForward(t, 6 * slices); 3181 for (int s = 0; s < slices; s++) { 3182 idx2 = 2 * s; 3183 idx3 = 2 * slices + 2 * s; 3184 idx4 = idx3 + 2 * slices; 3185 idx5 = idx4 + 2 * slices; 3186 a[s][r][c] = t[idx2]; 3187 a[s][r][c + 1] = t[idx2 + 1]; 3188 a[s][r][c + 2] = t[idx3]; 3189 a[s][r][c + 3] = t[idx3 + 1]; 3190 a[s][r][c + 4] = t[idx4]; 3191 a[s][r][c + 5] = t[idx4 + 1]; 3192 a[s][r][c + 6] = t[idx5]; 3193 a[s][r][c + 7] = t[idx5 + 1]; 3194 } 3195 } 3196 } 3197 } else if (columns == 4) { 3198 for (int r = 0; r < rows; r++) { 3199 for (int s = 0; s < slices; s++) { 3200 idx2 = 2 * s; 3201 idx3 = 2 * slices + 2 * s; 3202 t[idx2] = a[s][r][0]; 3203 t[idx2 + 1] = a[s][r][1]; 3204 t[idx3] = a[s][r][2]; 3205 t[idx3 + 1] = a[s][r][3]; 3206 } 3207 fftSlices.complexForward(t, 0); 3208 fftSlices.complexForward(t, 2 * slices); 3209 for (int s = 0; s < slices; s++) { 3210 idx2 = 2 * s; 3211 idx3 = 2 * slices + 2 * s; 3212 a[s][r][0] = t[idx2]; 3213 a[s][r][1] = t[idx2 + 1]; 3214 a[s][r][2] = t[idx3]; 3215 a[s][r][3] = t[idx3 + 1]; 3216 } 3217 } 3218 } else if (columns == 2) { 3219 for (int r = 0; r < rows; r++) { 3220 for (int s = 0; s < slices; s++) { 3221 idx2 = 2 * s; 3222 t[idx2] = a[s][r][0]; 3223 t[idx2 + 1] = a[s][r][1]; 3224 } 3225 fftSlices.complexForward(t, 0); 3226 for (int s = 0; s < slices; s++) { 3227 idx2 = 2 * s; 3228 a[s][r][0] = t[idx2]; 3229 a[s][r][1] = t[idx2 + 1]; 3230 } 3231 } 3232 } 3233 } else { 3234 if (columns > 4) { 3235 for (int r = 0; r < rows; r++) { 3236 for (int c = 0; c < columns; c += 8) { 3237 for (int s = 0; s < slices; s++) { 3238 idx2 = 2 * s; 3239 idx3 = 2 * slices + 2 * s; 3240 idx4 = idx3 + 2 * slices; 3241 idx5 = idx4 + 2 * slices; 3242 t[idx2] = a[s][r][c]; 3243 t[idx2 + 1] = a[s][r][c + 1]; 3244 t[idx3] = a[s][r][c + 2]; 3245 t[idx3 + 1] = a[s][r][c + 3]; 3246 t[idx4] = a[s][r][c + 4]; 3247 t[idx4 + 1] = a[s][r][c + 5]; 3248 t[idx5] = a[s][r][c + 6]; 3249 t[idx5 + 1] = a[s][r][c + 7]; 3250 } 3251 fftSlices.complexInverse(t, 0, scale); 3252 fftSlices.complexInverse(t, 2 * slices, scale); 3253 fftSlices.complexInverse(t, 4 * slices, scale); 3254 fftSlices.complexInverse(t, 6 * slices, scale); 3255 for (int s = 0; s < slices; s++) { 3256 idx2 = 2 * s; 3257 idx3 = 2 * slices + 2 * s; 3258 idx4 = idx3 + 2 * slices; 3259 idx5 = idx4 + 2 * slices; 3260 a[s][r][c] = t[idx2]; 3261 a[s][r][c + 1] = t[idx2 + 1]; 3262 a[s][r][c + 2] = t[idx3]; 3263 a[s][r][c + 3] = t[idx3 + 1]; 3264 a[s][r][c + 4] = t[idx4]; 3265 a[s][r][c + 5] = t[idx4 + 1]; 3266 a[s][r][c + 6] = t[idx5]; 3267 a[s][r][c + 7] = t[idx5 + 1]; 3268 } 3269 } 3270 } 3271 } else if (columns == 4) { 3272 for (int r = 0; r < rows; r++) { 3273 for (int s = 0; s < slices; s++) { 3274 idx2 = 2 * s; 3275 idx3 = 2 * slices + 2 * s; 3276 t[idx2] = a[s][r][0]; 3277 t[idx2 + 1] = a[s][r][1]; 3278 t[idx3] = a[s][r][2]; 3279 t[idx3 + 1] = a[s][r][3]; 3280 } 3281 fftSlices.complexInverse(t, 0, scale); 3282 fftSlices.complexInverse(t, 2 * slices, scale); 3283 for (int s = 0; s < slices; s++) { 3284 idx2 = 2 * s; 3285 idx3 = 2 * slices + 2 * s; 3286 a[s][r][0] = t[idx2]; 3287 a[s][r][1] = t[idx2 + 1]; 3288 a[s][r][2] = t[idx3]; 3289 a[s][r][3] = t[idx3 + 1]; 3290 } 3291 } 3292 } else if (columns == 2) { 3293 for (int r = 0; r < rows; r++) { 3294 for (int s = 0; s < slices; s++) { 3295 idx2 = 2 * s; 3296 t[idx2] = a[s][r][0]; 3297 t[idx2 + 1] = a[s][r][1]; 3298 } 3299 fftSlices.complexInverse(t, 0, scale); 3300 for (int s = 0; s < slices; s++) { 3301 idx2 = 2 * s; 3302 a[s][r][0] = t[idx2]; 3303 a[s][r][1] = t[idx2 + 1]; 3304 } 3305 } 3306 } 3307 } 3308 } 3309 3310 private void xdft3da_subth1(final int icr, final int isgn, final float[] a, final boolean scale) { 3311 int nt, i; 3312 final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices); 3313 nt = 8 * rows; 3314 if (columns == 4) { 3315 nt >>= 1; 3316 } else if (columns < 4) { 3317 nt >>= 2; 3318 } 3319 Future<?>[] futures = new Future[nthreads]; 3320 for (i = 0; i < nthreads; i++) { 3321 final int n0 = i; 3322 final int startt = nt * i; 3323 futures[i] = ConcurrencyUtils.submit(new Runnable() { 3324 @Override 3325 public void run() { 3326 int idx0, idx1, idx2, idx3, idx4, idx5; 3327 3328 if (isgn == -1) { 3329 for (int s = n0; s < slices; s += nthreads) { 3330 idx0 = s * sliceStride; 3331 if (icr == 0) { 3332 for (int r = 0; r < rows; r++) { 3333 fftColumns.complexForward(a, idx0 + r * rowStride); 3334 } 3335 } else { 3336 for (int r = 0; r < rows; r++) { 3337 fftColumns.realForward(a, idx0 + r * rowStride); 3338 } 3339 } 3340 if (columns > 4) { 3341 for (int c = 0; c < columns; c += 8) { 3342 for (int r = 0; r < rows; r++) { 3343 idx1 = idx0 + r * rowStride + c; 3344 idx2 = startt + 2 * r; 3345 idx3 = startt + 2 * rows + 2 * r; 3346 idx4 = idx3 + 2 * rows; 3347 idx5 = idx4 + 2 * rows; 3348 t[idx2] = a[idx1]; 3349 t[idx2 + 1] = a[idx1 + 1]; 3350 t[idx3] = a[idx1 + 2]; 3351 t[idx3 + 1] = a[idx1 + 3]; 3352 t[idx4] = a[idx1 + 4]; 3353 t[idx4 + 1] = a[idx1 + 5]; 3354 t[idx5] = a[idx1 + 6]; 3355 t[idx5 + 1] = a[idx1 + 7]; 3356 } 3357 fftRows.complexForward(t, startt); 3358 fftRows.complexForward(t, startt + 2 * rows); 3359 fftRows.complexForward(t, startt + 4 * rows); 3360 fftRows.complexForward(t, startt + 6 * rows); 3361 for (int r = 0; r < rows; r++) { 3362 idx1 = idx0 + r * rowStride + c; 3363 idx2 = startt + 2 * r; 3364 idx3 = startt + 2 * rows + 2 * r; 3365 idx4 = idx3 + 2 * rows; 3366 idx5 = idx4 + 2 * rows; 3367 a[idx1] = t[idx2]; 3368 a[idx1 + 1] = t[idx2 + 1]; 3369 a[idx1 + 2] = t[idx3]; 3370 a[idx1 + 3] = t[idx3 + 1]; 3371 a[idx1 + 4] = t[idx4]; 3372 a[idx1 + 5] = t[idx4 + 1]; 3373 a[idx1 + 6] = t[idx5]; 3374 a[idx1 + 7] = t[idx5 + 1]; 3375 } 3376 } 3377 } else if (columns == 4) { 3378 for (int r = 0; r < rows; r++) { 3379 idx1 = idx0 + r * rowStride; 3380 idx2 = startt + 2 * r; 3381 idx3 = startt + 2 * rows + 2 * r; 3382 t[idx2] = a[idx1]; 3383 t[idx2 + 1] = a[idx1 + 1]; 3384 t[idx3] = a[idx1 + 2]; 3385 t[idx3 + 1] = a[idx1 + 3]; 3386 } 3387 fftRows.complexForward(t, startt); 3388 fftRows.complexForward(t, startt + 2 * rows); 3389 for (int r = 0; r < rows; r++) { 3390 idx1 = idx0 + r * rowStride; 3391 idx2 = startt + 2 * r; 3392 idx3 = startt + 2 * rows + 2 * r; 3393 a[idx1] = t[idx2]; 3394 a[idx1 + 1] = t[idx2 + 1]; 3395 a[idx1 + 2] = t[idx3]; 3396 a[idx1 + 3] = t[idx3 + 1]; 3397 } 3398 } else if (columns == 2) { 3399 for (int r = 0; r < rows; r++) { 3400 idx1 = idx0 + r * rowStride; 3401 idx2 = startt + 2 * r; 3402 t[idx2] = a[idx1]; 3403 t[idx2 + 1] = a[idx1 + 1]; 3404 } 3405 fftRows.complexForward(t, startt); 3406 for (int r = 0; r < rows; r++) { 3407 idx1 = idx0 + r * rowStride; 3408 idx2 = startt + 2 * r; 3409 a[idx1] = t[idx2]; 3410 a[idx1 + 1] = t[idx2 + 1]; 3411 } 3412 } 3413 3414 } 3415 } else { 3416 for (int s = n0; s < slices; s += nthreads) { 3417 idx0 = s * sliceStride; 3418 if (icr == 0) { 3419 for (int r = 0; r < rows; r++) { 3420 fftColumns.complexInverse(a, idx0 + r * rowStride, scale); 3421 } 3422 } 3423 if (columns > 4) { 3424 for (int c = 0; c < columns; c += 8) { 3425 for (int r = 0; r < rows; r++) { 3426 idx1 = idx0 + r * rowStride + c; 3427 idx2 = startt + 2 * r; 3428 idx3 = startt + 2 * rows + 2 * r; 3429 idx4 = idx3 + 2 * rows; 3430 idx5 = idx4 + 2 * rows; 3431 t[idx2] = a[idx1]; 3432 t[idx2 + 1] = a[idx1 + 1]; 3433 t[idx3] = a[idx1 + 2]; 3434 t[idx3 + 1] = a[idx1 + 3]; 3435 t[idx4] = a[idx1 + 4]; 3436 t[idx4 + 1] = a[idx1 + 5]; 3437 t[idx5] = a[idx1 + 6]; 3438 t[idx5 + 1] = a[idx1 + 7]; 3439 } 3440 fftRows.complexInverse(t, startt, scale); 3441 fftRows.complexInverse(t, startt + 2 * rows, scale); 3442 fftRows.complexInverse(t, startt + 4 * rows, scale); 3443 fftRows.complexInverse(t, startt + 6 * rows, scale); 3444 for (int r = 0; r < rows; r++) { 3445 idx1 = idx0 + r * rowStride + c; 3446 idx2 = startt + 2 * r; 3447 idx3 = startt + 2 * rows + 2 * r; 3448 idx4 = idx3 + 2 * rows; 3449 idx5 = idx4 + 2 * rows; 3450 a[idx1] = t[idx2]; 3451 a[idx1 + 1] = t[idx2 + 1]; 3452 a[idx1 + 2] = t[idx3]; 3453 a[idx1 + 3] = t[idx3 + 1]; 3454 a[idx1 + 4] = t[idx4]; 3455 a[idx1 + 5] = t[idx4 + 1]; 3456 a[idx1 + 6] = t[idx5]; 3457 a[idx1 + 7] = t[idx5 + 1]; 3458 } 3459 } 3460 } else if (columns == 4) { 3461 for (int r = 0; r < rows; r++) { 3462 idx1 = idx0 + r * rowStride; 3463 idx2 = startt + 2 * r; 3464 idx3 = startt + 2 * rows + 2 * r; 3465 t[idx2] = a[idx1]; 3466 t[idx2 + 1] = a[idx1 + 1]; 3467 t[idx3] = a[idx1 + 2]; 3468 t[idx3 + 1] = a[idx1 + 3]; 3469 } 3470 fftRows.complexInverse(t, startt, scale); 3471 fftRows.complexInverse(t, startt + 2 * rows, scale); 3472 for (int r = 0; r < rows; r++) { 3473 idx1 = idx0 + r * rowStride; 3474 idx2 = startt + 2 * r; 3475 idx3 = startt + 2 * rows + 2 * r; 3476 a[idx1] = t[idx2]; 3477 a[idx1 + 1] = t[idx2 + 1]; 3478 a[idx1 + 2] = t[idx3]; 3479 a[idx1 + 3] = t[idx3 + 1]; 3480 } 3481 } else if (columns == 2) { 3482 for (int r = 0; r < rows; r++) { 3483 idx1 = idx0 + r * rowStride; 3484 idx2 = startt + 2 * r; 3485 t[idx2] = a[idx1]; 3486 t[idx2 + 1] = a[idx1 + 1]; 3487 } 3488 fftRows.complexInverse(t, startt, scale); 3489 for (int r = 0; r < rows; r++) { 3490 idx1 = idx0 + r * rowStride; 3491 idx2 = startt + 2 * r; 3492 a[idx1] = t[idx2]; 3493 a[idx1 + 1] = t[idx2 + 1]; 3494 } 3495 } 3496 if (icr != 0) { 3497 for (int r = 0; r < rows; r++) { 3498 fftColumns.realInverse(a, idx0 + r * rowStride, scale); 3499 } 3500 } 3501 } 3502 } 3503 } 3504 }); 3505 } 3506 ConcurrencyUtils.waitForCompletion(futures); 3507 } 3508 3509 private void xdft3da_subth2(final int icr, final int isgn, final float[] a, final boolean scale) { 3510 int nt, i; 3511 3512 final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices); 3513 nt = 8 * rows; 3514 if (columns == 4) { 3515 nt >>= 1; 3516 } else if (columns < 4) { 3517 nt >>= 2; 3518 } 3519 Future<?>[] futures = new Future[nthreads]; 3520 for (i = 0; i < nthreads; i++) { 3521 final int n0 = i; 3522 final int startt = nt * i; 3523 futures[i] = ConcurrencyUtils.submit(new Runnable() { 3524 @Override 3525 public void run() { 3526 int idx0, idx1, idx2, idx3, idx4, idx5; 3527 3528 if (isgn == -1) { 3529 for (int s = n0; s < slices; s += nthreads) { 3530 idx0 = s * sliceStride; 3531 if (icr == 0) { 3532 for (int r = 0; r < rows; r++) { 3533 fftColumns.complexForward(a, idx0 + r * rowStride); 3534 } 3535 } else { 3536 for (int r = 0; r < rows; r++) { 3537 fftColumns.realForward(a, idx0 + r * rowStride); 3538 } 3539 } 3540 if (columns > 4) { 3541 for (int c = 0; c < columns; c += 8) { 3542 for (int r = 0; r < rows; r++) { 3543 idx1 = idx0 + r * rowStride + c; 3544 idx2 = startt + 2 * r; 3545 idx3 = startt + 2 * rows + 2 * r; 3546 idx4 = idx3 + 2 * rows; 3547 idx5 = idx4 + 2 * rows; 3548 t[idx2] = a[idx1]; 3549 t[idx2 + 1] = a[idx1 + 1]; 3550 t[idx3] = a[idx1 + 2]; 3551 t[idx3 + 1] = a[idx1 + 3]; 3552 t[idx4] = a[idx1 + 4]; 3553 t[idx4 + 1] = a[idx1 + 5]; 3554 t[idx5] = a[idx1 + 6]; 3555 t[idx5 + 1] = a[idx1 + 7]; 3556 } 3557 fftRows.complexForward(t, startt); 3558 fftRows.complexForward(t, startt + 2 * rows); 3559 fftRows.complexForward(t, startt + 4 * rows); 3560 fftRows.complexForward(t, startt + 6 * rows); 3561 for (int r = 0; r < rows; r++) { 3562 idx1 = idx0 + r * rowStride + c; 3563 idx2 = startt + 2 * r; 3564 idx3 = startt + 2 * rows + 2 * r; 3565 idx4 = idx3 + 2 * rows; 3566 idx5 = idx4 + 2 * rows; 3567 a[idx1] = t[idx2]; 3568 a[idx1 + 1] = t[idx2 + 1]; 3569 a[idx1 + 2] = t[idx3]; 3570 a[idx1 + 3] = t[idx3 + 1]; 3571 a[idx1 + 4] = t[idx4]; 3572 a[idx1 + 5] = t[idx4 + 1]; 3573 a[idx1 + 6] = t[idx5]; 3574 a[idx1 + 7] = t[idx5 + 1]; 3575 } 3576 } 3577 } else if (columns == 4) { 3578 for (int r = 0; r < rows; r++) { 3579 idx1 = idx0 + r * rowStride; 3580 idx2 = startt + 2 * r; 3581 idx3 = startt + 2 * rows + 2 * r; 3582 t[idx2] = a[idx1]; 3583 t[idx2 + 1] = a[idx1 + 1]; 3584 t[idx3] = a[idx1 + 2]; 3585 t[idx3 + 1] = a[idx1 + 3]; 3586 } 3587 fftRows.complexForward(t, startt); 3588 fftRows.complexForward(t, startt + 2 * rows); 3589 for (int r = 0; r < rows; r++) { 3590 idx1 = idx0 + r * rowStride; 3591 idx2 = startt + 2 * r; 3592 idx3 = startt + 2 * rows + 2 * r; 3593 a[idx1] = t[idx2]; 3594 a[idx1 + 1] = t[idx2 + 1]; 3595 a[idx1 + 2] = t[idx3]; 3596 a[idx1 + 3] = t[idx3 + 1]; 3597 } 3598 } else if (columns == 2) { 3599 for (int r = 0; r < rows; r++) { 3600 idx1 = idx0 + r * rowStride; 3601 idx2 = startt + 2 * r; 3602 t[idx2] = a[idx1]; 3603 t[idx2 + 1] = a[idx1 + 1]; 3604 } 3605 fftRows.complexForward(t, startt); 3606 for (int r = 0; r < rows; r++) { 3607 idx1 = idx0 + r * rowStride; 3608 idx2 = startt + 2 * r; 3609 a[idx1] = t[idx2]; 3610 a[idx1 + 1] = t[idx2 + 1]; 3611 } 3612 } 3613 3614 } 3615 } else { 3616 for (int s = n0; s < slices; s += nthreads) { 3617 idx0 = s * sliceStride; 3618 if (icr == 0) { 3619 for (int r = 0; r < rows; r++) { 3620 fftColumns.complexInverse(a, idx0 + r * rowStride, scale); 3621 } 3622 } else { 3623 for (int r = 0; r < rows; r++) { 3624 fftColumns.realInverse2(a, idx0 + r * rowStride, scale); 3625 } 3626 } 3627 if (columns > 4) { 3628 for (int c = 0; c < columns; c += 8) { 3629 for (int r = 0; r < rows; r++) { 3630 idx1 = idx0 + r * rowStride + c; 3631 idx2 = startt + 2 * r; 3632 idx3 = startt + 2 * rows + 2 * r; 3633 idx4 = idx3 + 2 * rows; 3634 idx5 = idx4 + 2 * rows; 3635 t[idx2] = a[idx1]; 3636 t[idx2 + 1] = a[idx1 + 1]; 3637 t[idx3] = a[idx1 + 2]; 3638 t[idx3 + 1] = a[idx1 + 3]; 3639 t[idx4] = a[idx1 + 4]; 3640 t[idx4 + 1] = a[idx1 + 5]; 3641 t[idx5] = a[idx1 + 6]; 3642 t[idx5 + 1] = a[idx1 + 7]; 3643 } 3644 fftRows.complexInverse(t, startt, scale); 3645 fftRows.complexInverse(t, startt + 2 * rows, scale); 3646 fftRows.complexInverse(t, startt + 4 * rows, scale); 3647 fftRows.complexInverse(t, startt + 6 * rows, scale); 3648 for (int r = 0; r < rows; r++) { 3649 idx1 = idx0 + r * rowStride + c; 3650 idx2 = startt + 2 * r; 3651 idx3 = startt + 2 * rows + 2 * r; 3652 idx4 = idx3 + 2 * rows; 3653 idx5 = idx4 + 2 * rows; 3654 a[idx1] = t[idx2]; 3655 a[idx1 + 1] = t[idx2 + 1]; 3656 a[idx1 + 2] = t[idx3]; 3657 a[idx1 + 3] = t[idx3 + 1]; 3658 a[idx1 + 4] = t[idx4]; 3659 a[idx1 + 5] = t[idx4 + 1]; 3660 a[idx1 + 6] = t[idx5]; 3661 a[idx1 + 7] = t[idx5 + 1]; 3662 } 3663 } 3664 } else if (columns == 4) { 3665 for (int r = 0; r < rows; r++) { 3666 idx1 = idx0 + r * rowStride; 3667 idx2 = startt + 2 * r; 3668 idx3 = startt + 2 * rows + 2 * r; 3669 t[idx2] = a[idx1]; 3670 t[idx2 + 1] = a[idx1 + 1]; 3671 t[idx3] = a[idx1 + 2]; 3672 t[idx3 + 1] = a[idx1 + 3]; 3673 } 3674 fftRows.complexInverse(t, startt, scale); 3675 fftRows.complexInverse(t, startt + 2 * rows, scale); 3676 for (int r = 0; r < rows; r++) { 3677 idx1 = idx0 + r * rowStride; 3678 idx2 = startt + 2 * r; 3679 idx3 = startt + 2 * rows + 2 * r; 3680 a[idx1] = t[idx2]; 3681 a[idx1 + 1] = t[idx2 + 1]; 3682 a[idx1 + 2] = t[idx3]; 3683 a[idx1 + 3] = t[idx3 + 1]; 3684 } 3685 } else if (columns == 2) { 3686 for (int r = 0; r < rows; r++) { 3687 idx1 = idx0 + r * rowStride; 3688 idx2 = startt + 2 * r; 3689 t[idx2] = a[idx1]; 3690 t[idx2 + 1] = a[idx1 + 1]; 3691 } 3692 fftRows.complexInverse(t, startt, scale); 3693 for (int r = 0; r < rows; r++) { 3694 idx1 = idx0 + r * rowStride; 3695 idx2 = startt + 2 * r; 3696 a[idx1] = t[idx2]; 3697 a[idx1 + 1] = t[idx2 + 1]; 3698 } 3699 } 3700 } 3701 } 3702 } 3703 }); 3704 } 3705 ConcurrencyUtils.waitForCompletion(futures); 3706 } 3707 3708 private void xdft3da_subth1(final int icr, final int isgn, final float[][][] a, final boolean scale) { 3709 int nt, i; 3710 3711 final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices); 3712 nt = 8 * rows; 3713 if (columns == 4) { 3714 nt >>= 1; 3715 } else if (columns < 4) { 3716 nt >>= 2; 3717 } 3718 Future<?>[] futures = new Future[nthreads]; 3719 for (i = 0; i < nthreads; i++) { 3720 final int n0 = i; 3721 final int startt = nt * i; 3722 futures[i] = ConcurrencyUtils.submit(new Runnable() { 3723 @Override 3724 public void run() { 3725 int idx2, idx3, idx4, idx5; 3726 3727 if (isgn == -1) { 3728 for (int s = n0; s < slices; s += nthreads) { 3729 if (icr == 0) { 3730 for (int r = 0; r < rows; r++) { 3731 fftColumns.complexForward(a[s][r]); 3732 } 3733 } else { 3734 for (int r = 0; r < rows; r++) { 3735 fftColumns.realForward(a[s][r], 0); 3736 } 3737 } 3738 if (columns > 4) { 3739 for (int c = 0; c < columns; c += 8) { 3740 for (int r = 0; r < rows; r++) { 3741 idx2 = startt + 2 * r; 3742 idx3 = startt + 2 * rows + 2 * r; 3743 idx4 = idx3 + 2 * rows; 3744 idx5 = idx4 + 2 * rows; 3745 t[idx2] = a[s][r][c]; 3746 t[idx2 + 1] = a[s][r][c + 1]; 3747 t[idx3] = a[s][r][c + 2]; 3748 t[idx3 + 1] = a[s][r][c + 3]; 3749 t[idx4] = a[s][r][c + 4]; 3750 t[idx4 + 1] = a[s][r][c + 5]; 3751 t[idx5] = a[s][r][c + 6]; 3752 t[idx5 + 1] = a[s][r][c + 7]; 3753 } 3754 fftRows.complexForward(t, startt); 3755 fftRows.complexForward(t, startt + 2 * rows); 3756 fftRows.complexForward(t, startt + 4 * rows); 3757 fftRows.complexForward(t, startt + 6 * rows); 3758 for (int r = 0; r < rows; r++) { 3759 idx2 = startt + 2 * r; 3760 idx3 = startt + 2 * rows + 2 * r; 3761 idx4 = idx3 + 2 * rows; 3762 idx5 = idx4 + 2 * rows; 3763 a[s][r][c] = t[idx2]; 3764 a[s][r][c + 1] = t[idx2 + 1]; 3765 a[s][r][c + 2] = t[idx3]; 3766 a[s][r][c + 3] = t[idx3 + 1]; 3767 a[s][r][c + 4] = t[idx4]; 3768 a[s][r][c + 5] = t[idx4 + 1]; 3769 a[s][r][c + 6] = t[idx5]; 3770 a[s][r][c + 7] = t[idx5 + 1]; 3771 } 3772 } 3773 } else if (columns == 4) { 3774 for (int r = 0; r < rows; r++) { 3775 idx2 = startt + 2 * r; 3776 idx3 = startt + 2 * rows + 2 * r; 3777 t[idx2] = a[s][r][0]; 3778 t[idx2 + 1] = a[s][r][1]; 3779 t[idx3] = a[s][r][2]; 3780 t[idx3 + 1] = a[s][r][3]; 3781 } 3782 fftRows.complexForward(t, startt); 3783 fftRows.complexForward(t, startt + 2 * rows); 3784 for (int r = 0; r < rows; r++) { 3785 idx2 = startt + 2 * r; 3786 idx3 = startt + 2 * rows + 2 * r; 3787 a[s][r][0] = t[idx2]; 3788 a[s][r][1] = t[idx2 + 1]; 3789 a[s][r][2] = t[idx3]; 3790 a[s][r][3] = t[idx3 + 1]; 3791 } 3792 } else if (columns == 2) { 3793 for (int r = 0; r < rows; r++) { 3794 idx2 = startt + 2 * r; 3795 t[idx2] = a[s][r][0]; 3796 t[idx2 + 1] = a[s][r][1]; 3797 } 3798 fftRows.complexForward(t, startt); 3799 for (int r = 0; r < rows; r++) { 3800 idx2 = startt + 2 * r; 3801 a[s][r][0] = t[idx2]; 3802 a[s][r][1] = t[idx2 + 1]; 3803 } 3804 } 3805 3806 } 3807 } else { 3808 for (int s = n0; s < slices; s += nthreads) { 3809 if (icr == 0) { 3810 for (int r = 0; r < rows; r++) { 3811 fftColumns.complexInverse(a[s][r], scale); 3812 } 3813 } 3814 if (columns > 4) { 3815 for (int c = 0; c < columns; c += 8) { 3816 for (int r = 0; r < rows; r++) { 3817 idx2 = startt + 2 * r; 3818 idx3 = startt + 2 * rows + 2 * r; 3819 idx4 = idx3 + 2 * rows; 3820 idx5 = idx4 + 2 * rows; 3821 t[idx2] = a[s][r][c]; 3822 t[idx2 + 1] = a[s][r][c + 1]; 3823 t[idx3] = a[s][r][c + 2]; 3824 t[idx3 + 1] = a[s][r][c + 3]; 3825 t[idx4] = a[s][r][c + 4]; 3826 t[idx4 + 1] = a[s][r][c + 5]; 3827 t[idx5] = a[s][r][c + 6]; 3828 t[idx5 + 1] = a[s][r][c + 7]; 3829 } 3830 fftRows.complexInverse(t, startt, scale); 3831 fftRows.complexInverse(t, startt + 2 * rows, scale); 3832 fftRows.complexInverse(t, startt + 4 * rows, scale); 3833 fftRows.complexInverse(t, startt + 6 * rows, scale); 3834 for (int r = 0; r < rows; r++) { 3835 idx2 = startt + 2 * r; 3836 idx3 = startt + 2 * rows + 2 * r; 3837 idx4 = idx3 + 2 * rows; 3838 idx5 = idx4 + 2 * rows; 3839 a[s][r][c] = t[idx2]; 3840 a[s][r][c + 1] = t[idx2 + 1]; 3841 a[s][r][c + 2] = t[idx3]; 3842 a[s][r][c + 3] = t[idx3 + 1]; 3843 a[s][r][c + 4] = t[idx4]; 3844 a[s][r][c + 5] = t[idx4 + 1]; 3845 a[s][r][c + 6] = t[idx5]; 3846 a[s][r][c + 7] = t[idx5 + 1]; 3847 } 3848 } 3849 } else if (columns == 4) { 3850 for (int r = 0; r < rows; r++) { 3851 idx2 = startt + 2 * r; 3852 idx3 = startt + 2 * rows + 2 * r; 3853 t[idx2] = a[s][r][0]; 3854 t[idx2 + 1] = a[s][r][1]; 3855 t[idx3] = a[s][r][2]; 3856 t[idx3 + 1] = a[s][r][3]; 3857 } 3858 fftRows.complexInverse(t, startt, scale); 3859 fftRows.complexInverse(t, startt + 2 * rows, scale); 3860 for (int r = 0; r < rows; r++) { 3861 idx2 = startt + 2 * r; 3862 idx3 = startt + 2 * rows + 2 * r; 3863 a[s][r][0] = t[idx2]; 3864 a[s][r][1] = t[idx2 + 1]; 3865 a[s][r][2] = t[idx3]; 3866 a[s][r][3] = t[idx3 + 1]; 3867 } 3868 } else if (columns == 2) { 3869 for (int r = 0; r < rows; r++) { 3870 idx2 = startt + 2 * r; 3871 t[idx2] = a[s][r][0]; 3872 t[idx2 + 1] = a[s][r][1]; 3873 } 3874 fftRows.complexInverse(t, startt, scale); 3875 for (int r = 0; r < rows; r++) { 3876 idx2 = startt + 2 * r; 3877 a[s][r][0] = t[idx2]; 3878 a[s][r][1] = t[idx2 + 1]; 3879 } 3880 } 3881 if (icr != 0) { 3882 for (int r = 0; r < rows; r++) { 3883 fftColumns.realInverse(a[s][r], scale); 3884 } 3885 } 3886 } 3887 } 3888 } 3889 }); 3890 } 3891 ConcurrencyUtils.waitForCompletion(futures); 3892 } 3893 3894 private void xdft3da_subth2(final int icr, final int isgn, final float[][][] a, final boolean scale) { 3895 int nt, i; 3896 3897 final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices); 3898 nt = 8 * rows; 3899 if (columns == 4) { 3900 nt >>= 1; 3901 } else if (columns < 4) { 3902 nt >>= 2; 3903 } 3904 Future<?>[] futures = new Future[nthreads]; 3905 for (i = 0; i < nthreads; i++) { 3906 final int n0 = i; 3907 final int startt = nt * i; 3908 futures[i] = ConcurrencyUtils.submit(new Runnable() { 3909 @Override 3910 public void run() { 3911 int idx2, idx3, idx4, idx5; 3912 3913 if (isgn == -1) { 3914 for (int s = n0; s < slices; s += nthreads) { 3915 if (icr == 0) { 3916 for (int r = 0; r < rows; r++) { 3917 fftColumns.complexForward(a[s][r]); 3918 } 3919 } else { 3920 for (int r = 0; r < rows; r++) { 3921 fftColumns.realForward(a[s][r]); 3922 } 3923 } 3924 if (columns > 4) { 3925 for (int c = 0; c < columns; c += 8) { 3926 for (int r = 0; r < rows; r++) { 3927 idx2 = startt + 2 * r; 3928 idx3 = startt + 2 * rows + 2 * r; 3929 idx4 = idx3 + 2 * rows; 3930 idx5 = idx4 + 2 * rows; 3931 t[idx2] = a[s][r][c]; 3932 t[idx2 + 1] = a[s][r][c + 1]; 3933 t[idx3] = a[s][r][c + 2]; 3934 t[idx3 + 1] = a[s][r][c + 3]; 3935 t[idx4] = a[s][r][c + 4]; 3936 t[idx4 + 1] = a[s][r][c + 5]; 3937 t[idx5] = a[s][r][c + 6]; 3938 t[idx5 + 1] = a[s][r][c + 7]; 3939 } 3940 fftRows.complexForward(t, startt); 3941 fftRows.complexForward(t, startt + 2 * rows); 3942 fftRows.complexForward(t, startt + 4 * rows); 3943 fftRows.complexForward(t, startt + 6 * rows); 3944 for (int r = 0; r < rows; r++) { 3945 idx2 = startt + 2 * r; 3946 idx3 = startt + 2 * rows + 2 * r; 3947 idx4 = idx3 + 2 * rows; 3948 idx5 = idx4 + 2 * rows; 3949 a[s][r][c] = t[idx2]; 3950 a[s][r][c + 1] = t[idx2 + 1]; 3951 a[s][r][c + 2] = t[idx3]; 3952 a[s][r][c + 3] = t[idx3 + 1]; 3953 a[s][r][c + 4] = t[idx4]; 3954 a[s][r][c + 5] = t[idx4 + 1]; 3955 a[s][r][c + 6] = t[idx5]; 3956 a[s][r][c + 7] = t[idx5 + 1]; 3957 } 3958 } 3959 } else if (columns == 4) { 3960 for (int r = 0; r < rows; r++) { 3961 idx2 = startt + 2 * r; 3962 idx3 = startt + 2 * rows + 2 * r; 3963 t[idx2] = a[s][r][0]; 3964 t[idx2 + 1] = a[s][r][1]; 3965 t[idx3] = a[s][r][2]; 3966 t[idx3 + 1] = a[s][r][3]; 3967 } 3968 fftRows.complexForward(t, startt); 3969 fftRows.complexForward(t, startt + 2 * rows); 3970 for (int r = 0; r < rows; r++) { 3971 idx2 = startt + 2 * r; 3972 idx3 = startt + 2 * rows + 2 * r; 3973 a[s][r][0] = t[idx2]; 3974 a[s][r][1] = t[idx2 + 1]; 3975 a[s][r][2] = t[idx3]; 3976 a[s][r][3] = t[idx3 + 1]; 3977 } 3978 } else if (columns == 2) { 3979 for (int r = 0; r < rows; r++) { 3980 idx2 = startt + 2 * r; 3981 t[idx2] = a[s][r][0]; 3982 t[idx2 + 1] = a[s][r][1]; 3983 } 3984 fftRows.complexForward(t, startt); 3985 for (int r = 0; r < rows; r++) { 3986 idx2 = startt + 2 * r; 3987 a[s][r][0] = t[idx2]; 3988 a[s][r][1] = t[idx2 + 1]; 3989 } 3990 } 3991 3992 } 3993 } else { 3994 for (int s = n0; s < slices; s += nthreads) { 3995 if (icr == 0) { 3996 for (int r = 0; r < rows; r++) { 3997 fftColumns.complexInverse(a[s][r], scale); 3998 } 3999 } else { 4000 for (int r = 0; r < rows; r++) { 4001 fftColumns.realInverse2(a[s][r], 0, scale); 4002 } 4003 } 4004 if (columns > 4) { 4005 for (int c = 0; c < columns; c += 8) { 4006 for (int r = 0; r < rows; r++) { 4007 idx2 = startt + 2 * r; 4008 idx3 = startt + 2 * rows + 2 * r; 4009 idx4 = idx3 + 2 * rows; 4010 idx5 = idx4 + 2 * rows; 4011 t[idx2] = a[s][r][c]; 4012 t[idx2 + 1] = a[s][r][c + 1]; 4013 t[idx3] = a[s][r][c + 2]; 4014 t[idx3 + 1] = a[s][r][c + 3]; 4015 t[idx4] = a[s][r][c + 4]; 4016 t[idx4 + 1] = a[s][r][c + 5]; 4017 t[idx5] = a[s][r][c + 6]; 4018 t[idx5 + 1] = a[s][r][c + 7]; 4019 } 4020 fftRows.complexInverse(t, startt, scale); 4021 fftRows.complexInverse(t, startt + 2 * rows, scale); 4022 fftRows.complexInverse(t, startt + 4 * rows, scale); 4023 fftRows.complexInverse(t, startt + 6 * rows, scale); 4024 for (int r = 0; r < rows; r++) { 4025 idx2 = startt + 2 * r; 4026 idx3 = startt + 2 * rows + 2 * r; 4027 idx4 = idx3 + 2 * rows; 4028 idx5 = idx4 + 2 * rows; 4029 a[s][r][c] = t[idx2]; 4030 a[s][r][c + 1] = t[idx2 + 1]; 4031 a[s][r][c + 2] = t[idx3]; 4032 a[s][r][c + 3] = t[idx3 + 1]; 4033 a[s][r][c + 4] = t[idx4]; 4034 a[s][r][c + 5] = t[idx4 + 1]; 4035 a[s][r][c + 6] = t[idx5]; 4036 a[s][r][c + 7] = t[idx5 + 1]; 4037 } 4038 } 4039 } else if (columns == 4) { 4040 for (int r = 0; r < rows; r++) { 4041 idx2 = startt + 2 * r; 4042 idx3 = startt + 2 * rows + 2 * r; 4043 t[idx2] = a[s][r][0]; 4044 t[idx2 + 1] = a[s][r][1]; 4045 t[idx3] = a[s][r][2]; 4046 t[idx3 + 1] = a[s][r][3]; 4047 } 4048 fftRows.complexInverse(t, startt, scale); 4049 fftRows.complexInverse(t, startt + 2 * rows, scale); 4050 for (int r = 0; r < rows; r++) { 4051 idx2 = startt + 2 * r; 4052 idx3 = startt + 2 * rows + 2 * r; 4053 a[s][r][0] = t[idx2]; 4054 a[s][r][1] = t[idx2 + 1]; 4055 a[s][r][2] = t[idx3]; 4056 a[s][r][3] = t[idx3 + 1]; 4057 } 4058 } else if (columns == 2) { 4059 for (int r = 0; r < rows; r++) { 4060 idx2 = startt + 2 * r; 4061 t[idx2] = a[s][r][0]; 4062 t[idx2 + 1] = a[s][r][1]; 4063 } 4064 fftRows.complexInverse(t, startt, scale); 4065 for (int r = 0; r < rows; r++) { 4066 idx2 = startt + 2 * r; 4067 a[s][r][0] = t[idx2]; 4068 a[s][r][1] = t[idx2 + 1]; 4069 } 4070 } 4071 } 4072 } 4073 } 4074 }); 4075 } 4076 ConcurrencyUtils.waitForCompletion(futures); 4077 } 4078 4079 private void cdft3db_subth(final int isgn, final float[] a, final boolean scale) { 4080 int nt, i; 4081 4082 final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), rows); 4083 nt = 8 * slices; 4084 if (columns == 4) { 4085 nt >>= 1; 4086 } else if (columns < 4) { 4087 nt >>= 2; 4088 } 4089 Future<?>[] futures = new Future[nthreads]; 4090 for (i = 0; i < nthreads; i++) { 4091 final int n0 = i; 4092 final int startt = nt * i; 4093 futures[i] = ConcurrencyUtils.submit(new Runnable() { 4094 4095 @Override 4096 public void run() { 4097 int idx0, idx1, idx2, idx3, idx4, idx5; 4098 4099 if (isgn == -1) { 4100 if (columns > 4) { 4101 for (int r = n0; r < rows; r += nthreads) { 4102 idx0 = r * rowStride; 4103 for (int c = 0; c < columns; c += 8) { 4104 for (int s = 0; s < slices; s++) { 4105 idx1 = s * sliceStride + idx0 + c; 4106 idx2 = startt + 2 * s; 4107 idx3 = startt + 2 * slices + 2 * s; 4108 idx4 = idx3 + 2 * slices; 4109 idx5 = idx4 + 2 * slices; 4110 t[idx2] = a[idx1]; 4111 t[idx2 + 1] = a[idx1 + 1]; 4112 t[idx3] = a[idx1 + 2]; 4113 t[idx3 + 1] = a[idx1 + 3]; 4114 t[idx4] = a[idx1 + 4]; 4115 t[idx4 + 1] = a[idx1 + 5]; 4116 t[idx5] = a[idx1 + 6]; 4117 t[idx5 + 1] = a[idx1 + 7]; 4118 } 4119 fftSlices.complexForward(t, startt); 4120 fftSlices.complexForward(t, startt + 2 * slices); 4121 fftSlices.complexForward(t, startt + 4 * slices); 4122 fftSlices.complexForward(t, startt + 6 * slices); 4123 for (int s = 0; s < slices; s++) { 4124 idx1 = s * sliceStride + idx0 + c; 4125 idx2 = startt + 2 * s; 4126 idx3 = startt + 2 * slices + 2 * s; 4127 idx4 = idx3 + 2 * slices; 4128 idx5 = idx4 + 2 * slices; 4129 a[idx1] = t[idx2]; 4130 a[idx1 + 1] = t[idx2 + 1]; 4131 a[idx1 + 2] = t[idx3]; 4132 a[idx1 + 3] = t[idx3 + 1]; 4133 a[idx1 + 4] = t[idx4]; 4134 a[idx1 + 5] = t[idx4 + 1]; 4135 a[idx1 + 6] = t[idx5]; 4136 a[idx1 + 7] = t[idx5 + 1]; 4137 } 4138 } 4139 } 4140 } else if (columns == 4) { 4141 for (int r = n0; r < rows; r += nthreads) { 4142 idx0 = r * rowStride; 4143 for (int s = 0; s < slices; s++) { 4144 idx1 = s * sliceStride + idx0; 4145 idx2 = startt + 2 * s; 4146 idx3 = startt + 2 * slices + 2 * s; 4147 t[idx2] = a[idx1]; 4148 t[idx2 + 1] = a[idx1 + 1]; 4149 t[idx3] = a[idx1 + 2]; 4150 t[idx3 + 1] = a[idx1 + 3]; 4151 } 4152 fftSlices.complexForward(t, startt); 4153 fftSlices.complexForward(t, startt + 2 * slices); 4154 for (int s = 0; s < slices; s++) { 4155 idx1 = s * sliceStride + idx0; 4156 idx2 = startt + 2 * s; 4157 idx3 = startt + 2 * slices + 2 * s; 4158 a[idx1] = t[idx2]; 4159 a[idx1 + 1] = t[idx2 + 1]; 4160 a[idx1 + 2] = t[idx3]; 4161 a[idx1 + 3] = t[idx3 + 1]; 4162 } 4163 } 4164 } else if (columns == 2) { 4165 for (int r = n0; r < rows; r += nthreads) { 4166 idx0 = r * rowStride; 4167 for (int s = 0; s < slices; s++) { 4168 idx1 = s * sliceStride + idx0; 4169 idx2 = startt + 2 * s; 4170 t[idx2] = a[idx1]; 4171 t[idx2 + 1] = a[idx1 + 1]; 4172 } 4173 fftSlices.complexForward(t, startt); 4174 for (int s = 0; s < slices; s++) { 4175 idx1 = s * sliceStride + idx0; 4176 idx2 = startt + 2 * s; 4177 a[idx1] = t[idx2]; 4178 a[idx1 + 1] = t[idx2 + 1]; 4179 } 4180 } 4181 } 4182 } else { 4183 if (columns > 4) { 4184 for (int r = n0; r < rows; r += nthreads) { 4185 idx0 = r * rowStride; 4186 for (int c = 0; c < columns; c += 8) { 4187 for (int s = 0; s < slices; s++) { 4188 idx1 = s * sliceStride + idx0 + c; 4189 idx2 = startt + 2 * s; 4190 idx3 = startt + 2 * slices + 2 * s; 4191 idx4 = idx3 + 2 * slices; 4192 idx5 = idx4 + 2 * slices; 4193 t[idx2] = a[idx1]; 4194 t[idx2 + 1] = a[idx1 + 1]; 4195 t[idx3] = a[idx1 + 2]; 4196 t[idx3 + 1] = a[idx1 + 3]; 4197 t[idx4] = a[idx1 + 4]; 4198 t[idx4 + 1] = a[idx1 + 5]; 4199 t[idx5] = a[idx1 + 6]; 4200 t[idx5 + 1] = a[idx1 + 7]; 4201 } 4202 fftSlices.complexInverse(t, startt, scale); 4203 fftSlices.complexInverse(t, startt + 2 * slices, scale); 4204 fftSlices.complexInverse(t, startt + 4 * slices, scale); 4205 fftSlices.complexInverse(t, startt + 6 * slices, scale); 4206 for (int s = 0; s < slices; s++) { 4207 idx1 = s * sliceStride + idx0 + c; 4208 idx2 = startt + 2 * s; 4209 idx3 = startt + 2 * slices + 2 * s; 4210 idx4 = idx3 + 2 * slices; 4211 idx5 = idx4 + 2 * slices; 4212 a[idx1] = t[idx2]; 4213 a[idx1 + 1] = t[idx2 + 1]; 4214 a[idx1 + 2] = t[idx3]; 4215 a[idx1 + 3] = t[idx3 + 1]; 4216 a[idx1 + 4] = t[idx4]; 4217 a[idx1 + 5] = t[idx4 + 1]; 4218 a[idx1 + 6] = t[idx5]; 4219 a[idx1 + 7] = t[idx5 + 1]; 4220 } 4221 } 4222 } 4223 } else if (columns == 4) { 4224 for (int r = n0; r < rows; r += nthreads) { 4225 idx0 = r * rowStride; 4226 for (int s = 0; s < slices; s++) { 4227 idx1 = s * sliceStride + idx0; 4228 idx2 = startt + 2 * s; 4229 idx3 = startt + 2 * slices + 2 * s; 4230 t[idx2] = a[idx1]; 4231 t[idx2 + 1] = a[idx1 + 1]; 4232 t[idx3] = a[idx1 + 2]; 4233 t[idx3 + 1] = a[idx1 + 3]; 4234 } 4235 fftSlices.complexInverse(t, startt, scale); 4236 fftSlices.complexInverse(t, startt + 2 * slices, scale); 4237 for (int s = 0; s < slices; s++) { 4238 idx1 = s * sliceStride + idx0; 4239 idx2 = startt + 2 * s; 4240 idx3 = startt + 2 * slices + 2 * s; 4241 a[idx1] = t[idx2]; 4242 a[idx1 + 1] = t[idx2 + 1]; 4243 a[idx1 + 2] = t[idx3]; 4244 a[idx1 + 3] = t[idx3 + 1]; 4245 } 4246 } 4247 } else if (columns == 2) { 4248 for (int r = n0; r < rows; r += nthreads) { 4249 idx0 = r * rowStride; 4250 for (int s = 0; s < slices; s++) { 4251 idx1 = s * sliceStride + idx0; 4252 idx2 = startt + 2 * s; 4253 t[idx2] = a[idx1]; 4254 t[idx2 + 1] = a[idx1 + 1]; 4255 } 4256 fftSlices.complexInverse(t, startt, scale); 4257 for (int s = 0; s < slices; s++) { 4258 idx1 = s * sliceStride + idx0; 4259 idx2 = startt + 2 * s; 4260 a[idx1] = t[idx2]; 4261 a[idx1 + 1] = t[idx2 + 1]; 4262 } 4263 } 4264 } 4265 } 4266 4267 } 4268 }); 4269 } 4270 ConcurrencyUtils.waitForCompletion(futures); 4271 } 4272 4273 private void cdft3db_subth(final int isgn, final float[][][] a, final boolean scale) { 4274 int nt, i; 4275 4276 final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), rows); 4277 nt = 8 * slices; 4278 if (columns == 4) { 4279 nt >>= 1; 4280 } else if (columns < 4) { 4281 nt >>= 2; 4282 } 4283 Future<?>[] futures = new Future[nthreads]; 4284 for (i = 0; i < nthreads; i++) { 4285 final int n0 = i; 4286 final int startt = nt * i; 4287 futures[i] = ConcurrencyUtils.submit(new Runnable() { 4288 4289 @Override 4290 public void run() { 4291 int idx2, idx3, idx4, idx5; 4292 4293 if (isgn == -1) { 4294 if (columns > 4) { 4295 for (int r = n0; r < rows; r += nthreads) { 4296 for (int c = 0; c < columns; c += 8) { 4297 for (int s = 0; s < slices; s++) { 4298 idx2 = startt + 2 * s; 4299 idx3 = startt + 2 * slices + 2 * s; 4300 idx4 = idx3 + 2 * slices; 4301 idx5 = idx4 + 2 * slices; 4302 t[idx2] = a[s][r][c]; 4303 t[idx2 + 1] = a[s][r][c + 1]; 4304 t[idx3] = a[s][r][c + 2]; 4305 t[idx3 + 1] = a[s][r][c + 3]; 4306 t[idx4] = a[s][r][c + 4]; 4307 t[idx4 + 1] = a[s][r][c + 5]; 4308 t[idx5] = a[s][r][c + 6]; 4309 t[idx5 + 1] = a[s][r][c + 7]; 4310 } 4311 fftSlices.complexForward(t, startt); 4312 fftSlices.complexForward(t, startt + 2 * slices); 4313 fftSlices.complexForward(t, startt + 4 * slices); 4314 fftSlices.complexForward(t, startt + 6 * slices); 4315 for (int s = 0; s < slices; s++) { 4316 idx2 = startt + 2 * s; 4317 idx3 = startt + 2 * slices + 2 * s; 4318 idx4 = idx3 + 2 * slices; 4319 idx5 = idx4 + 2 * slices; 4320 a[s][r][c] = t[idx2]; 4321 a[s][r][c + 1] = t[idx2 + 1]; 4322 a[s][r][c + 2] = t[idx3]; 4323 a[s][r][c + 3] = t[idx3 + 1]; 4324 a[s][r][c + 4] = t[idx4]; 4325 a[s][r][c + 5] = t[idx4 + 1]; 4326 a[s][r][c + 6] = t[idx5]; 4327 a[s][r][c + 7] = t[idx5 + 1]; 4328 } 4329 } 4330 } 4331 } else if (columns == 4) { 4332 for (int r = n0; r < rows; r += nthreads) { 4333 for (int s = 0; s < slices; s++) { 4334 idx2 = startt + 2 * s; 4335 idx3 = startt + 2 * slices + 2 * s; 4336 t[idx2] = a[s][r][0]; 4337 t[idx2 + 1] = a[s][r][1]; 4338 t[idx3] = a[s][r][2]; 4339 t[idx3 + 1] = a[s][r][3]; 4340 } 4341 fftSlices.complexForward(t, startt); 4342 fftSlices.complexForward(t, startt + 2 * slices); 4343 for (int s = 0; s < slices; s++) { 4344 idx2 = startt + 2 * s; 4345 idx3 = startt + 2 * slices + 2 * s; 4346 a[s][r][0] = t[idx2]; 4347 a[s][r][1] = t[idx2 + 1]; 4348 a[s][r][2] = t[idx3]; 4349 a[s][r][3] = t[idx3 + 1]; 4350 } 4351 } 4352 } else if (columns == 2) { 4353 for (int r = n0; r < rows; r += nthreads) { 4354 for (int s = 0; s < slices; s++) { 4355 idx2 = startt + 2 * s; 4356 t[idx2] = a[s][r][0]; 4357 t[idx2 + 1] = a[s][r][1]; 4358 } 4359 fftSlices.complexForward(t, startt); 4360 for (int s = 0; s < slices; s++) { 4361 idx2 = startt + 2 * s; 4362 a[s][r][0] = t[idx2]; 4363 a[s][r][1] = t[idx2 + 1]; 4364 } 4365 } 4366 } 4367 } else { 4368 if (columns > 4) { 4369 for (int r = n0; r < rows; r += nthreads) { 4370 for (int c = 0; c < columns; c += 8) { 4371 for (int s = 0; s < slices; s++) { 4372 idx2 = startt + 2 * s; 4373 idx3 = startt + 2 * slices + 2 * s; 4374 idx4 = idx3 + 2 * slices; 4375 idx5 = idx4 + 2 * slices; 4376 t[idx2] = a[s][r][c]; 4377 t[idx2 + 1] = a[s][r][c + 1]; 4378 t[idx3] = a[s][r][c + 2]; 4379 t[idx3 + 1] = a[s][r][c + 3]; 4380 t[idx4] = a[s][r][c + 4]; 4381 t[idx4 + 1] = a[s][r][c + 5]; 4382 t[idx5] = a[s][r][c + 6]; 4383 t[idx5 + 1] = a[s][r][c + 7]; 4384 } 4385 fftSlices.complexInverse(t, startt, scale); 4386 fftSlices.complexInverse(t, startt + 2 * slices, scale); 4387 fftSlices.complexInverse(t, startt + 4 * slices, scale); 4388 fftSlices.complexInverse(t, startt + 6 * slices, scale); 4389 for (int s = 0; s < slices; s++) { 4390 idx2 = startt + 2 * s; 4391 idx3 = startt + 2 * slices + 2 * s; 4392 idx4 = idx3 + 2 * slices; 4393 idx5 = idx4 + 2 * slices; 4394 a[s][r][c] = t[idx2]; 4395 a[s][r][c + 1] = t[idx2 + 1]; 4396 a[s][r][c + 2] = t[idx3]; 4397 a[s][r][c + 3] = t[idx3 + 1]; 4398 a[s][r][c + 4] = t[idx4]; 4399 a[s][r][c + 5] = t[idx4 + 1]; 4400 a[s][r][c + 6] = t[idx5]; 4401 a[s][r][c + 7] = t[idx5 + 1]; 4402 } 4403 } 4404 } 4405 } else if (columns == 4) { 4406 for (int r = n0; r < rows; r += nthreads) { 4407 for (int s = 0; s < slices; s++) { 4408 idx2 = startt + 2 * s; 4409 idx3 = startt + 2 * slices + 2 * s; 4410 t[idx2] = a[s][r][0]; 4411 t[idx2 + 1] = a[s][r][1]; 4412 t[idx3] = a[s][r][2]; 4413 t[idx3 + 1] = a[s][r][3]; 4414 } 4415 fftSlices.complexInverse(t, startt, scale); 4416 fftSlices.complexInverse(t, startt + 2 * slices, scale); 4417 for (int s = 0; s < slices; s++) { 4418 idx2 = startt + 2 * s; 4419 idx3 = startt + 2 * slices + 2 * s; 4420 a[s][r][0] = t[idx2]; 4421 a[s][r][1] = t[idx2 + 1]; 4422 a[s][r][2] = t[idx3]; 4423 a[s][r][3] = t[idx3 + 1]; 4424 } 4425 } 4426 } else if (columns == 2) { 4427 for (int r = n0; r < rows; r += nthreads) { 4428 for (int s = 0; s < slices; s++) { 4429 idx2 = startt + 2 * s; 4430 t[idx2] = a[s][r][0]; 4431 t[idx2 + 1] = a[s][r][1]; 4432 } 4433 fftSlices.complexInverse(t, startt, scale); 4434 for (int s = 0; s < slices; s++) { 4435 idx2 = startt + 2 * s; 4436 a[s][r][0] = t[idx2]; 4437 a[s][r][1] = t[idx2 + 1]; 4438 } 4439 } 4440 } 4441 } 4442 4443 } 4444 }); 4445 } 4446 ConcurrencyUtils.waitForCompletion(futures); 4447 } 4448 4449 private void rdft3d_sub(int isgn, float[] a) { 4450 int n1h, n2h, i, j, k, l, idx1, idx2, idx3, idx4; 4451 float xi; 4452 4453 n1h = slices >> 1; 4454 n2h = rows >> 1; 4455 if (isgn < 0) { 4456 for (i = 1; i < n1h; i++) { 4457 j = slices - i; 4458 idx1 = i * sliceStride; 4459 idx2 = j * sliceStride; 4460 idx3 = i * sliceStride + n2h * rowStride; 4461 idx4 = j * sliceStride + n2h * rowStride; 4462 xi = a[idx1] - a[idx2]; 4463 a[idx1] += a[idx2]; 4464 a[idx2] = xi; 4465 xi = a[idx2 + 1] - a[idx1 + 1]; 4466 a[idx1 + 1] += a[idx2 + 1]; 4467 a[idx2 + 1] = xi; 4468 xi = a[idx3] - a[idx4]; 4469 a[idx3] += a[idx4]; 4470 a[idx4] = xi; 4471 xi = a[idx4 + 1] - a[idx3 + 1]; 4472 a[idx3 + 1] += a[idx4 + 1]; 4473 a[idx4 + 1] = xi; 4474 for (k = 1; k < n2h; k++) { 4475 l = rows - k; 4476 idx1 = i * sliceStride + k * rowStride; 4477 idx2 = j * sliceStride + l * rowStride; 4478 xi = a[idx1] - a[idx2]; 4479 a[idx1] += a[idx2]; 4480 a[idx2] = xi; 4481 xi = a[idx2 + 1] - a[idx1 + 1]; 4482 a[idx1 + 1] += a[idx2 + 1]; 4483 a[idx2 + 1] = xi; 4484 idx3 = j * sliceStride + k * rowStride; 4485 idx4 = i * sliceStride + l * rowStride; 4486 xi = a[idx3] - a[idx4]; 4487 a[idx3] += a[idx4]; 4488 a[idx4] = xi; 4489 xi = a[idx4 + 1] - a[idx3 + 1]; 4490 a[idx3 + 1] += a[idx4 + 1]; 4491 a[idx4 + 1] = xi; 4492 } 4493 } 4494 for (k = 1; k < n2h; k++) { 4495 l = rows - k; 4496 idx1 = k * rowStride; 4497 idx2 = l * rowStride; 4498 xi = a[idx1] - a[idx2]; 4499 a[idx1] += a[idx2]; 4500 a[idx2] = xi; 4501 xi = a[idx2 + 1] - a[idx1 + 1]; 4502 a[idx1 + 1] += a[idx2 + 1]; 4503 a[idx2 + 1] = xi; 4504 idx3 = n1h * sliceStride + k * rowStride; 4505 idx4 = n1h * sliceStride + l * rowStride; 4506 xi = a[idx3] - a[idx4]; 4507 a[idx3] += a[idx4]; 4508 a[idx4] = xi; 4509 xi = a[idx4 + 1] - a[idx3 + 1]; 4510 a[idx3 + 1] += a[idx4 + 1]; 4511 a[idx4 + 1] = xi; 4512 } 4513 } else { 4514 for (i = 1; i < n1h; i++) { 4515 j = slices - i; 4516 idx1 = j * sliceStride; 4517 idx2 = i * sliceStride; 4518 a[idx1] = 0.5f * (a[idx2] - a[idx1]); 4519 a[idx2] -= a[idx1]; 4520 a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]); 4521 a[idx2 + 1] -= a[idx1 + 1]; 4522 idx3 = j * sliceStride + n2h * rowStride; 4523 idx4 = i * sliceStride + n2h * rowStride; 4524 a[idx3] = 0.5f * (a[idx4] - a[idx3]); 4525 a[idx4] -= a[idx3]; 4526 a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]); 4527 a[idx4 + 1] -= a[idx3 + 1]; 4528 for (k = 1; k < n2h; k++) { 4529 l = rows - k; 4530 idx1 = j * sliceStride + l * rowStride; 4531 idx2 = i * sliceStride + k * rowStride; 4532 a[idx1] = 0.5f * (a[idx2] - a[idx1]); 4533 a[idx2] -= a[idx1]; 4534 a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]); 4535 a[idx2 + 1] -= a[idx1 + 1]; 4536 idx3 = i * sliceStride + l * rowStride; 4537 idx4 = j * sliceStride + k * rowStride; 4538 a[idx3] = 0.5f * (a[idx4] - a[idx3]); 4539 a[idx4] -= a[idx3]; 4540 a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]); 4541 a[idx4 + 1] -= a[idx3 + 1]; 4542 } 4543 } 4544 for (k = 1; k < n2h; k++) { 4545 l = rows - k; 4546 idx1 = l * rowStride; 4547 idx2 = k * rowStride; 4548 a[idx1] = 0.5f * (a[idx2] - a[idx1]); 4549 a[idx2] -= a[idx1]; 4550 a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]); 4551 a[idx2 + 1] -= a[idx1 + 1]; 4552 idx3 = n1h * sliceStride + l * rowStride; 4553 idx4 = n1h * sliceStride + k * rowStride; 4554 a[idx3] = 0.5f * (a[idx4] - a[idx3]); 4555 a[idx4] -= a[idx3]; 4556 a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]); 4557 a[idx4 + 1] -= a[idx3 + 1]; 4558 } 4559 } 4560 } 4561 4562 private void rdft3d_sub(int isgn, float[][][] a) { 4563 int n1h, n2h, i, j, k, l; 4564 float xi; 4565 4566 n1h = slices >> 1; 4567 n2h = rows >> 1; 4568 if (isgn < 0) { 4569 for (i = 1; i < n1h; i++) { 4570 j = slices - i; 4571 xi = a[i][0][0] - a[j][0][0]; 4572 a[i][0][0] += a[j][0][0]; 4573 a[j][0][0] = xi; 4574 xi = a[j][0][1] - a[i][0][1]; 4575 a[i][0][1] += a[j][0][1]; 4576 a[j][0][1] = xi; 4577 xi = a[i][n2h][0] - a[j][n2h][0]; 4578 a[i][n2h][0] += a[j][n2h][0]; 4579 a[j][n2h][0] = xi; 4580 xi = a[j][n2h][1] - a[i][n2h][1]; 4581 a[i][n2h][1] += a[j][n2h][1]; 4582 a[j][n2h][1] = xi; 4583 for (k = 1; k < n2h; k++) { 4584 l = rows - k; 4585 xi = a[i][k][0] - a[j][l][0]; 4586 a[i][k][0] += a[j][l][0]; 4587 a[j][l][0] = xi; 4588 xi = a[j][l][1] - a[i][k][1]; 4589 a[i][k][1] += a[j][l][1]; 4590 a[j][l][1] = xi; 4591 xi = a[j][k][0] - a[i][l][0]; 4592 a[j][k][0] += a[i][l][0]; 4593 a[i][l][0] = xi; 4594 xi = a[i][l][1] - a[j][k][1]; 4595 a[j][k][1] += a[i][l][1]; 4596 a[i][l][1] = xi; 4597 } 4598 } 4599 for (k = 1; k < n2h; k++) { 4600 l = rows - k; 4601 xi = a[0][k][0] - a[0][l][0]; 4602 a[0][k][0] += a[0][l][0]; 4603 a[0][l][0] = xi; 4604 xi = a[0][l][1] - a[0][k][1]; 4605 a[0][k][1] += a[0][l][1]; 4606 a[0][l][1] = xi; 4607 xi = a[n1h][k][0] - a[n1h][l][0]; 4608 a[n1h][k][0] += a[n1h][l][0]; 4609 a[n1h][l][0] = xi; 4610 xi = a[n1h][l][1] - a[n1h][k][1]; 4611 a[n1h][k][1] += a[n1h][l][1]; 4612 a[n1h][l][1] = xi; 4613 } 4614 } else { 4615 for (i = 1; i < n1h; i++) { 4616 j = slices - i; 4617 a[j][0][0] = 0.5f * (a[i][0][0] - a[j][0][0]); 4618 a[i][0][0] -= a[j][0][0]; 4619 a[j][0][1] = 0.5f * (a[i][0][1] + a[j][0][1]); 4620 a[i][0][1] -= a[j][0][1]; 4621 a[j][n2h][0] = 0.5f * (a[i][n2h][0] - a[j][n2h][0]); 4622 a[i][n2h][0] -= a[j][n2h][0]; 4623 a[j][n2h][1] = 0.5f * (a[i][n2h][1] + a[j][n2h][1]); 4624 a[i][n2h][1] -= a[j][n2h][1]; 4625 for (k = 1; k < n2h; k++) { 4626 l = rows - k; 4627 a[j][l][0] = 0.5f * (a[i][k][0] - a[j][l][0]); 4628 a[i][k][0] -= a[j][l][0]; 4629 a[j][l][1] = 0.5f * (a[i][k][1] + a[j][l][1]); 4630 a[i][k][1] -= a[j][l][1]; 4631 a[i][l][0] = 0.5f * (a[j][k][0] - a[i][l][0]); 4632 a[j][k][0] -= a[i][l][0]; 4633 a[i][l][1] = 0.5f * (a[j][k][1] + a[i][l][1]); 4634 a[j][k][1] -= a[i][l][1]; 4635 } 4636 } 4637 for (k = 1; k < n2h; k++) { 4638 l = rows - k; 4639 a[0][l][0] = 0.5f * (a[0][k][0] - a[0][l][0]); 4640 a[0][k][0] -= a[0][l][0]; 4641 a[0][l][1] = 0.5f * (a[0][k][1] + a[0][l][1]); 4642 a[0][k][1] -= a[0][l][1]; 4643 a[n1h][l][0] = 0.5f * (a[n1h][k][0] - a[n1h][l][0]); 4644 a[n1h][k][0] -= a[n1h][l][0]; 4645 a[n1h][l][1] = 0.5f * (a[n1h][k][1] + a[n1h][l][1]); 4646 a[n1h][k][1] -= a[n1h][l][1]; 4647 } 4648 } 4649 } 4650 4651 private void fillSymmetric(final float[][][] a) { 4652 final int twon3 = 2 * columns; 4653 final int n2d2 = rows / 2; 4654 int n1d2 = slices / 2; 4655 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 4656 if ((nthreads > 1) && useThreads && (slices >= nthreads)) { 4657 Future<?>[] futures = new Future[nthreads]; 4658 int p = slices / nthreads; 4659 for (int l = 0; l < nthreads; l++) { 4660 final int firstSlice = l * p; 4661 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 4662 futures[l] = ConcurrencyUtils.submit(new Runnable() { 4663 @Override 4664 public void run() { 4665 for (int s = firstSlice; s < lastSlice; s++) { 4666 int idx1 = (slices - s) % slices; 4667 for (int r = 0; r < rows; r++) { 4668 int idx2 = (rows - r) % rows; 4669 for (int c = 1; c < columns; c += 2) { 4670 int idx3 = twon3 - c; 4671 a[idx1][idx2][idx3] = -a[s][r][c + 2]; 4672 a[idx1][idx2][idx3 - 1] = a[s][r][c + 1]; 4673 } 4674 } 4675 } 4676 } 4677 }); 4678 } 4679 ConcurrencyUtils.waitForCompletion(futures); 4680 4681 // --------------------------------------------- 4682 4683 for (int l = 0; l < nthreads; l++) { 4684 final int firstSlice = l * p; 4685 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 4686 futures[l] = ConcurrencyUtils.submit(new Runnable() { 4687 @Override 4688 public void run() { 4689 for (int s = firstSlice; s < lastSlice; s++) { 4690 int idx1 = (slices - s) % slices; 4691 for (int r = 1; r < n2d2; r++) { 4692 int idx2 = rows - r; 4693 a[idx1][r][columns] = a[s][idx2][1]; 4694 a[s][idx2][columns] = a[s][idx2][1]; 4695 a[idx1][r][columns + 1] = -a[s][idx2][0]; 4696 a[s][idx2][columns + 1] = a[s][idx2][0]; 4697 } 4698 } 4699 } 4700 }); 4701 } 4702 ConcurrencyUtils.waitForCompletion(futures); 4703 4704 for (int l = 0; l < nthreads; l++) { 4705 final int firstSlice = l * p; 4706 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 4707 futures[l] = ConcurrencyUtils.submit(new Runnable() { 4708 @Override 4709 public void run() { 4710 for (int s = firstSlice; s < lastSlice; s++) { 4711 int idx1 = (slices - s) % slices; 4712 for (int r = 1; r < n2d2; r++) { 4713 int idx2 = rows - r; 4714 a[idx1][idx2][0] = a[s][r][0]; 4715 a[idx1][idx2][1] = -a[s][r][1]; 4716 } 4717 } 4718 } 4719 }); 4720 } 4721 ConcurrencyUtils.waitForCompletion(futures); 4722 4723 } else { 4724 4725 for (int s = 0; s < slices; s++) { 4726 int idx1 = (slices - s) % slices; 4727 for (int r = 0; r < rows; r++) { 4728 int idx2 = (rows - r) % rows; 4729 for (int c = 1; c < columns; c += 2) { 4730 int idx3 = twon3 - c; 4731 a[idx1][idx2][idx3] = -a[s][r][c + 2]; 4732 a[idx1][idx2][idx3 - 1] = a[s][r][c + 1]; 4733 } 4734 } 4735 } 4736 4737 // --------------------------------------------- 4738 4739 for (int s = 0; s < slices; s++) { 4740 int idx1 = (slices - s) % slices; 4741 for (int r = 1; r < n2d2; r++) { 4742 int idx2 = rows - r; 4743 a[idx1][r][columns] = a[s][idx2][1]; 4744 a[s][idx2][columns] = a[s][idx2][1]; 4745 a[idx1][r][columns + 1] = -a[s][idx2][0]; 4746 a[s][idx2][columns + 1] = a[s][idx2][0]; 4747 } 4748 } 4749 4750 for (int s = 0; s < slices; s++) { 4751 int idx1 = (slices - s) % slices; 4752 for (int r = 1; r < n2d2; r++) { 4753 int idx2 = rows - r; 4754 a[idx1][idx2][0] = a[s][r][0]; 4755 a[idx1][idx2][1] = -a[s][r][1]; 4756 } 4757 } 4758 } 4759 4760 // ---------------------------------------------------------- 4761 4762 for (int s = 1; s < n1d2; s++) { 4763 int idx1 = slices - s; 4764 a[s][0][columns] = a[idx1][0][1]; 4765 a[idx1][0][columns] = a[idx1][0][1]; 4766 a[s][0][columns + 1] = -a[idx1][0][0]; 4767 a[idx1][0][columns + 1] = a[idx1][0][0]; 4768 a[s][n2d2][columns] = a[idx1][n2d2][1]; 4769 a[idx1][n2d2][columns] = a[idx1][n2d2][1]; 4770 a[s][n2d2][columns + 1] = -a[idx1][n2d2][0]; 4771 a[idx1][n2d2][columns + 1] = a[idx1][n2d2][0]; 4772 a[idx1][0][0] = a[s][0][0]; 4773 a[idx1][0][1] = -a[s][0][1]; 4774 a[idx1][n2d2][0] = a[s][n2d2][0]; 4775 a[idx1][n2d2][1] = -a[s][n2d2][1]; 4776 4777 } 4778 // ---------------------------------------- 4779 4780 a[0][0][columns] = a[0][0][1]; 4781 a[0][0][1] = 0; 4782 a[0][n2d2][columns] = a[0][n2d2][1]; 4783 a[0][n2d2][1] = 0; 4784 a[n1d2][0][columns] = a[n1d2][0][1]; 4785 a[n1d2][0][1] = 0; 4786 a[n1d2][n2d2][columns] = a[n1d2][n2d2][1]; 4787 a[n1d2][n2d2][1] = 0; 4788 a[n1d2][0][columns + 1] = 0; 4789 a[n1d2][n2d2][columns + 1] = 0; 4790 } 4791 4792 private void fillSymmetric(final float[] a) { 4793 final int twon3 = 2 * columns; 4794 final int n2d2 = rows / 2; 4795 int n1d2 = slices / 2; 4796 4797 final int twoSliceStride = rows * twon3; 4798 final int twoRowStride = twon3; 4799 4800 int idx1, idx2, idx3, idx4, idx5, idx6; 4801 4802 for (int s = (slices - 1); s >= 1; s--) { 4803 idx3 = s * sliceStride; 4804 idx4 = 2 * idx3; 4805 for (int r = 0; r < rows; r++) { 4806 idx5 = r * rowStride; 4807 idx6 = 2 * idx5; 4808 for (int c = 0; c < columns; c += 2) { 4809 idx1 = idx3 + idx5 + c; 4810 idx2 = idx4 + idx6 + c; 4811 a[idx2] = a[idx1]; 4812 a[idx1] = 0; 4813 idx1++; 4814 idx2++; 4815 a[idx2] = a[idx1]; 4816 a[idx1] = 0; 4817 } 4818 } 4819 } 4820 4821 for (int r = 1; r < rows; r++) { 4822 idx3 = (rows - r) * rowStride; 4823 idx4 = (rows - r) * twoRowStride; 4824 for (int c = 0; c < columns; c += 2) { 4825 idx1 = idx3 + c; 4826 idx2 = idx4 + c; 4827 a[idx2] = a[idx1]; 4828 a[idx1] = 0; 4829 idx1++; 4830 idx2++; 4831 a[idx2] = a[idx1]; 4832 a[idx1] = 0; 4833 } 4834 } 4835 4836 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 4837 if ((nthreads > 1) && useThreads && (slices >= nthreads)) { 4838 Future<?>[] futures = new Future[nthreads]; 4839 int p = slices / nthreads; 4840 for (int l = 0; l < nthreads; l++) { 4841 final int firstSlice = l * p; 4842 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 4843 futures[l] = ConcurrencyUtils.submit(new Runnable() { 4844 @Override 4845 public void run() { 4846 for (int s = firstSlice; s < lastSlice; s++) { 4847 int idx3 = ((slices - s) % slices) * twoSliceStride; 4848 int idx5 = s * twoSliceStride; 4849 for (int r = 0; r < rows; r++) { 4850 int idx4 = ((rows - r) % rows) * twoRowStride; 4851 int idx6 = r * twoRowStride; 4852 for (int c = 1; c < columns; c += 2) { 4853 int idx1 = idx3 + idx4 + twon3 - c; 4854 int idx2 = idx5 + idx6 + c; 4855 a[idx1] = -a[idx2 + 2]; 4856 a[idx1 - 1] = a[idx2 + 1]; 4857 } 4858 } 4859 } 4860 } 4861 }); 4862 } 4863 ConcurrencyUtils.waitForCompletion(futures); 4864 4865 // --------------------------------------------- 4866 4867 for (int l = 0; l < nthreads; l++) { 4868 final int firstSlice = l * p; 4869 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 4870 futures[l] = ConcurrencyUtils.submit(new Runnable() { 4871 @Override 4872 public void run() { 4873 for (int s = firstSlice; s < lastSlice; s++) { 4874 int idx5 = ((slices - s) % slices) * twoSliceStride; 4875 int idx6 = s * twoSliceStride; 4876 for (int r = 1; r < n2d2; r++) { 4877 int idx4 = idx6 + (rows - r) * twoRowStride; 4878 int idx1 = idx5 + r * twoRowStride + columns; 4879 int idx2 = idx4 + columns; 4880 int idx3 = idx4 + 1; 4881 a[idx1] = a[idx3]; 4882 a[idx2] = a[idx3]; 4883 a[idx1 + 1] = -a[idx4]; 4884 a[idx2 + 1] = a[idx4]; 4885 4886 } 4887 } 4888 } 4889 }); 4890 } 4891 ConcurrencyUtils.waitForCompletion(futures); 4892 for (int l = 0; l < nthreads; l++) { 4893 final int firstSlice = l * p; 4894 final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p; 4895 futures[l] = ConcurrencyUtils.submit(new Runnable() { 4896 @Override 4897 public void run() { 4898 for (int s = firstSlice; s < lastSlice; s++) { 4899 int idx3 = ((slices - s) % slices) * twoSliceStride; 4900 int idx4 = s * twoSliceStride; 4901 for (int r = 1; r < n2d2; r++) { 4902 int idx1 = idx3 + (rows - r) * twoRowStride; 4903 int idx2 = idx4 + r * twoRowStride; 4904 a[idx1] = a[idx2]; 4905 a[idx1 + 1] = -a[idx2 + 1]; 4906 4907 } 4908 } 4909 } 4910 }); 4911 } 4912 ConcurrencyUtils.waitForCompletion(futures); 4913 } else { 4914 4915 // ----------------------------------------------- 4916 for (int s = 0; s < slices; s++) { 4917 idx3 = ((slices - s) % slices) * twoSliceStride; 4918 idx5 = s * twoSliceStride; 4919 for (int r = 0; r < rows; r++) { 4920 idx4 = ((rows - r) % rows) * twoRowStride; 4921 idx6 = r * twoRowStride; 4922 for (int c = 1; c < columns; c += 2) { 4923 idx1 = idx3 + idx4 + twon3 - c; 4924 idx2 = idx5 + idx6 + c; 4925 a[idx1] = -a[idx2 + 2]; 4926 a[idx1 - 1] = a[idx2 + 1]; 4927 } 4928 } 4929 } 4930 4931 // --------------------------------------------- 4932 4933 for (int s = 0; s < slices; s++) { 4934 idx5 = ((slices - s) % slices) * twoSliceStride; 4935 idx6 = s * twoSliceStride; 4936 for (int r = 1; r < n2d2; r++) { 4937 idx4 = idx6 + (rows - r) * twoRowStride; 4938 idx1 = idx5 + r * twoRowStride + columns; 4939 idx2 = idx4 + columns; 4940 idx3 = idx4 + 1; 4941 a[idx1] = a[idx3]; 4942 a[idx2] = a[idx3]; 4943 a[idx1 + 1] = -a[idx4]; 4944 a[idx2 + 1] = a[idx4]; 4945 4946 } 4947 } 4948 4949 for (int s = 0; s < slices; s++) { 4950 idx3 = ((slices - s) % slices) * twoSliceStride; 4951 idx4 = s * twoSliceStride; 4952 for (int r = 1; r < n2d2; r++) { 4953 idx1 = idx3 + (rows - r) * twoRowStride; 4954 idx2 = idx4 + r * twoRowStride; 4955 a[idx1] = a[idx2]; 4956 a[idx1 + 1] = -a[idx2 + 1]; 4957 4958 } 4959 } 4960 } 4961 4962 // ---------------------------------------------------------- 4963 4964 for (int s = 1; s < n1d2; s++) { 4965 idx1 = s * twoSliceStride; 4966 idx2 = (slices - s) * twoSliceStride; 4967 idx3 = n2d2 * twoRowStride; 4968 idx4 = idx1 + idx3; 4969 idx5 = idx2 + idx3; 4970 a[idx1 + columns] = a[idx2 + 1]; 4971 a[idx2 + columns] = a[idx2 + 1]; 4972 a[idx1 + columns + 1] = -a[idx2]; 4973 a[idx2 + columns + 1] = a[idx2]; 4974 a[idx4 + columns] = a[idx5 + 1]; 4975 a[idx5 + columns] = a[idx5 + 1]; 4976 a[idx4 + columns + 1] = -a[idx5]; 4977 a[idx5 + columns + 1] = a[idx5]; 4978 a[idx2] = a[idx1]; 4979 a[idx2 + 1] = -a[idx1 + 1]; 4980 a[idx5] = a[idx4]; 4981 a[idx5 + 1] = -a[idx4 + 1]; 4982 4983 } 4984 4985 // ---------------------------------------- 4986 4987 a[columns] = a[1]; 4988 a[1] = 0; 4989 idx1 = n2d2 * twoRowStride; 4990 idx2 = n1d2 * twoSliceStride; 4991 idx3 = idx1 + idx2; 4992 a[idx1 + columns] = a[idx1 + 1]; 4993 a[idx1 + 1] = 0; 4994 a[idx2 + columns] = a[idx2 + 1]; 4995 a[idx2 + 1] = 0; 4996 a[idx3 + columns] = a[idx3 + 1]; 4997 a[idx3 + 1] = 0; 4998 a[idx2 + columns + 1] = 0; 4999 a[idx3 + columns + 1] = 0; 5000 } 5001}