001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.fft;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 3D Discrete Fourier Transform (DFT) of complex and real, single
043 * precision data. The sizes of all three dimensions can be arbitrary numbers.
044 * This is a parallel implementation of split-radix and mixed-radix algorithms
045 * optimized for SMP systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 *
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 *
052 */
053public strictfp class FloatFFT_3D {
054
055    private int slices;
056
057    private int rows;
058
059    private int columns;
060
061    private int sliceStride;
062
063    private int rowStride;
064
065    private float[] t;
066
067    private FloatFFT_1D fftSlices, fftRows, fftColumns;
068
069    private int oldNthreads;
070
071    private int nt;
072
073    private boolean isPowerOfTwo = false;
074
075    private boolean useThreads = false;
076
077    /**
078     * Creates new instance of FloatFFT_3D.
079     *
080     * @param slices
081     *            number of slices
082     * @param rows
083     *            number of rows
084     * @param columns
085     *            number of columns
086     *
087     */
088    public FloatFFT_3D(int slices, int rows, int columns) {
089        if (slices <= 1 || rows <= 1 || columns <= 1) {
090            throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
091        }
092        this.slices = slices;
093        this.rows = rows;
094        this.columns = columns;
095        this.sliceStride = rows * columns;
096        this.rowStride = columns;
097        if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
098            this.useThreads = true;
099        }
100        if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
101            isPowerOfTwo = true;
102            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
103            nt = slices;
104            if (nt < rows) {
105                nt = rows;
106            }
107            nt *= 8;
108            if (oldNthreads > 1) {
109                nt *= oldNthreads;
110            }
111            if (2 * columns == 4) {
112                nt >>= 1;
113            } else if (2 * columns < 4) {
114                nt >>= 2;
115            }
116            t = new float[nt];
117        }
118        fftSlices = new FloatFFT_1D(slices);
119        if (slices == rows) {
120            fftRows = fftSlices;
121        } else {
122            fftRows = new FloatFFT_1D(rows);
123        }
124        if (slices == columns) {
125            fftColumns = fftSlices;
126        } else if (rows == columns) {
127            fftColumns = fftRows;
128        } else {
129            fftColumns = new FloatFFT_1D(columns);
130        }
131
132    }
133
134    /**
135     * Computes 3D forward DFT of complex data leaving the result in
136     * <code>a</code>. The data is stored in 1D array addressed in slice-major,
137     * then row-major, then column-major, in order of significance, i.e. element
138     * (i,j,k) of 3D array x[slices][rows][2*columns] is stored in a[i*sliceStride +
139     * j*rowStride + k], where sliceStride = rows * 2 * columns and rowStride = 2 * columns.
140     * Complex number is stored as two float values in sequence: the real and
141     * imaginary part, i.e. the input array must be of size slices*rows*2*columns. The
142     * physical layout of the input data is as follows:
143     *
144     * <pre>
145     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3],
146     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
147     * </pre>
148     *
149     * @param a
150     *            data to transform
151     */
152    public void complexForward(final float[] a) {
153        int nthreads = ConcurrencyUtils.getNumberOfThreads();
154        if (isPowerOfTwo) {
155            int oldn3 = columns;
156            columns = 2 * columns;
157
158            sliceStride = rows * columns;
159            rowStride = columns;
160
161            if (nthreads != oldNthreads) {
162                nt = slices;
163                if (nt < rows) {
164                    nt = rows;
165                }
166                nt *= 8;
167                if (nthreads > 1) {
168                    nt *= nthreads;
169                }
170                if (columns == 4) {
171                    nt >>= 1;
172                } else if (columns < 4) {
173                    nt >>= 2;
174                }
175                t = new float[nt];
176                oldNthreads = nthreads;
177            }
178            if ((nthreads > 1) && useThreads) {
179                xdft3da_subth2(0, -1, a, true);
180                cdft3db_subth(-1, a, true);
181            } else {
182                xdft3da_sub2(0, -1, a, true);
183                cdft3db_sub(-1, a, true);
184            }
185            columns = oldn3;
186            sliceStride = rows * columns;
187            rowStride = columns;
188        } else {
189            sliceStride = 2 * rows * columns;
190            rowStride = 2 * columns;
191            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
192                Future<?>[] futures = new Future[nthreads];
193                int p = slices / nthreads;
194                for (int l = 0; l < nthreads; l++) {
195                    final int firstSlice = l * p;
196                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
197                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
198                        @Override
199                                                public void run() {
200                            for (int s = firstSlice; s < lastSlice; s++) {
201                                int idx1 = s * sliceStride;
202                                for (int r = 0; r < rows; r++) {
203                                    fftColumns.complexForward(a, idx1 + r * rowStride);
204                                }
205                            }
206                        }
207                    });
208                }
209                ConcurrencyUtils.waitForCompletion(futures);
210
211                for (int l = 0; l < nthreads; l++) {
212                    final int firstSlice = l * p;
213                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
214
215                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
216                        @Override
217                                                public void run() {
218                            float[] temp = new float[2 * rows];
219                            for (int s = firstSlice; s < lastSlice; s++) {
220                                int idx1 = s * sliceStride;
221                                for (int c = 0; c < columns; c++) {
222                                    int idx2 = 2 * c;
223                                    for (int r = 0; r < rows; r++) {
224                                        int idx3 = idx1 + idx2 + r * rowStride;
225                                        int idx4 = 2 * r;
226                                        temp[idx4] = a[idx3];
227                                        temp[idx4 + 1] = a[idx3 + 1];
228                                    }
229                                    fftRows.complexForward(temp);
230                                    for (int r = 0; r < rows; r++) {
231                                        int idx3 = idx1 + idx2 + r * rowStride;
232                                        int idx4 = 2 * r;
233                                        a[idx3] = temp[idx4];
234                                        a[idx3 + 1] = temp[idx4 + 1];
235                                    }
236                                }
237                            }
238                        }
239                    });
240                }
241                ConcurrencyUtils.waitForCompletion(futures);
242
243                p = rows / nthreads;
244                for (int l = 0; l < nthreads; l++) {
245                    final int firstRow = l * p;
246                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
247
248                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
249                        @Override
250                                                public void run() {
251                            float[] temp = new float[2 * slices];
252                            for (int r = firstRow; r < lastRow; r++) {
253                                int idx1 = r * rowStride;
254                                for (int c = 0; c < columns; c++) {
255                                    int idx2 = 2 * c;
256                                    for (int s = 0; s < slices; s++) {
257                                        int idx3 = s * sliceStride + idx1 + idx2;
258                                        int idx4 = 2 * s;
259                                        temp[idx4] = a[idx3];
260                                        temp[idx4 + 1] = a[idx3 + 1];
261                                    }
262                                    fftSlices.complexForward(temp);
263                                    for (int s = 0; s < slices; s++) {
264                                        int idx3 = s * sliceStride + idx1 + idx2;
265                                        int idx4 = 2 * s;
266                                        a[idx3] = temp[idx4];
267                                        a[idx3 + 1] = temp[idx4 + 1];
268                                    }
269                                }
270                            }
271                        }
272                    });
273                }
274                ConcurrencyUtils.waitForCompletion(futures);
275
276            } else {
277                for (int s = 0; s < slices; s++) {
278                    int idx1 = s * sliceStride;
279                    for (int r = 0; r < rows; r++) {
280                        fftColumns.complexForward(a, idx1 + r * rowStride);
281                    }
282                }
283
284                float[] temp = new float[2 * rows];
285                for (int s = 0; s < slices; s++) {
286                    int idx1 = s * sliceStride;
287                    for (int c = 0; c < columns; c++) {
288                        int idx2 = 2 * c;
289                        for (int r = 0; r < rows; r++) {
290                            int idx3 = idx1 + idx2 + r * rowStride;
291                            int idx4 = 2 * r;
292                            temp[idx4] = a[idx3];
293                            temp[idx4 + 1] = a[idx3 + 1];
294                        }
295                        fftRows.complexForward(temp);
296                        for (int r = 0; r < rows; r++) {
297                            int idx3 = idx1 + idx2 + r * rowStride;
298                            int idx4 = 2 * r;
299                            a[idx3] = temp[idx4];
300                            a[idx3 + 1] = temp[idx4 + 1];
301                        }
302                    }
303                }
304
305                temp = new float[2 * slices];
306                for (int r = 0; r < rows; r++) {
307                    int idx1 = r * rowStride;
308                    for (int c = 0; c < columns; c++) {
309                        int idx2 = 2 * c;
310                        for (int s = 0; s < slices; s++) {
311                            int idx3 = s * sliceStride + idx1 + idx2;
312                            int idx4 = 2 * s;
313                            temp[idx4] = a[idx3];
314                            temp[idx4 + 1] = a[idx3 + 1];
315                        }
316                        fftSlices.complexForward(temp);
317                        for (int s = 0; s < slices; s++) {
318                            int idx3 = s * sliceStride + idx1 + idx2;
319                            int idx4 = 2 * s;
320                            a[idx3] = temp[idx4];
321                            a[idx3 + 1] = temp[idx4 + 1];
322                        }
323                    }
324                }
325            }
326            sliceStride = rows * columns;
327            rowStride = columns;
328        }
329    }
330
331    /**
332     * Computes 3D forward DFT of complex data leaving the result in
333     * <code>a</code>. The data is stored in 3D array. Complex data is
334     * represented by 2 float values in sequence: the real and imaginary part,
335     * i.e. the input array must be of size slices by rows by 2*columns. The physical
336     * layout of the input data is as follows:
337     *
338     * <pre>
339     * a[k1][k2][2*k3] = Re[k1][k2][k3],
340     * a[k1][k2][2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
341     * </pre>
342     *
343     * @param a
344     *            data to transform
345     */
346    public void complexForward(final float[][][] a) {
347        int nthreads = ConcurrencyUtils.getNumberOfThreads();
348        if (isPowerOfTwo) {
349            int oldn3 = columns;
350            columns = 2 * columns;
351
352            sliceStride = rows * columns;
353            rowStride = columns;
354
355            if (nthreads != oldNthreads) {
356                nt = slices;
357                if (nt < rows) {
358                    nt = rows;
359                }
360                nt *= 8;
361                if (nthreads > 1) {
362                    nt *= nthreads;
363                }
364                if (columns == 4) {
365                    nt >>= 1;
366                } else if (columns < 4) {
367                    nt >>= 2;
368                }
369                t = new float[nt];
370                oldNthreads = nthreads;
371            }
372            if ((nthreads > 1) && useThreads) {
373                xdft3da_subth2(0, -1, a, true);
374                cdft3db_subth(-1, a, true);
375            } else {
376                xdft3da_sub2(0, -1, a, true);
377                cdft3db_sub(-1, a, true);
378            }
379            columns = oldn3;
380            sliceStride = rows * columns;
381            rowStride = columns;
382        } else {
383            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
384                Future<?>[] futures = new Future[nthreads];
385                int p = slices / nthreads;
386                for (int l = 0; l < nthreads; l++) {
387                    final int firstSlice = l * p;
388                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
389                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
390                        @Override
391                                                public void run() {
392                            for (int s = firstSlice; s < lastSlice; s++) {
393                                for (int r = 0; r < rows; r++) {
394                                    fftColumns.complexForward(a[s][r]);
395                                }
396                            }
397                        }
398                    });
399                }
400                ConcurrencyUtils.waitForCompletion(futures);
401
402                for (int l = 0; l < nthreads; l++) {
403                    final int firstSlice = l * p;
404                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
405
406                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
407                        @Override
408                                                public void run() {
409                            float[] temp = new float[2 * rows];
410                            for (int s = firstSlice; s < lastSlice; s++) {
411                                for (int c = 0; c < columns; c++) {
412                                    int idx2 = 2 * c;
413                                    for (int r = 0; r < rows; r++) {
414                                        int idx4 = 2 * r;
415                                        temp[idx4] = a[s][r][idx2];
416                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
417                                    }
418                                    fftRows.complexForward(temp);
419                                    for (int r = 0; r < rows; r++) {
420                                        int idx4 = 2 * r;
421                                        a[s][r][idx2] = temp[idx4];
422                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
423                                    }
424                                }
425                            }
426                        }
427                    });
428                }
429                ConcurrencyUtils.waitForCompletion(futures);
430
431                p = rows / nthreads;
432                for (int l = 0; l < nthreads; l++) {
433                    final int firstRow = l * p;
434                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
435
436                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
437                        @Override
438                                                public void run() {
439                            float[] temp = new float[2 * slices];
440                            for (int r = firstRow; r < lastRow; r++) {
441                                for (int c = 0; c < columns; c++) {
442                                    int idx2 = 2 * c;
443                                    for (int s = 0; s < slices; s++) {
444                                        int idx4 = 2 * s;
445                                        temp[idx4] = a[s][r][idx2];
446                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
447                                    }
448                                    fftSlices.complexForward(temp);
449                                    for (int s = 0; s < slices; s++) {
450                                        int idx4 = 2 * s;
451                                        a[s][r][idx2] = temp[idx4];
452                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
453                                    }
454                                }
455                            }
456                        }
457                    });
458                }
459                ConcurrencyUtils.waitForCompletion(futures);
460
461            } else {
462                for (int s = 0; s < slices; s++) {
463                    for (int r = 0; r < rows; r++) {
464                        fftColumns.complexForward(a[s][r]);
465                    }
466                }
467
468                float[] temp = new float[2 * rows];
469                for (int s = 0; s < slices; s++) {
470                    for (int c = 0; c < columns; c++) {
471                        int idx2 = 2 * c;
472                        for (int r = 0; r < rows; r++) {
473                            int idx4 = 2 * r;
474                            temp[idx4] = a[s][r][idx2];
475                            temp[idx4 + 1] = a[s][r][idx2 + 1];
476                        }
477                        fftRows.complexForward(temp);
478                        for (int r = 0; r < rows; r++) {
479                            int idx4 = 2 * r;
480                            a[s][r][idx2] = temp[idx4];
481                            a[s][r][idx2 + 1] = temp[idx4 + 1];
482                        }
483                    }
484                }
485
486                temp = new float[2 * slices];
487                for (int r = 0; r < rows; r++) {
488                    for (int c = 0; c < columns; c++) {
489                        int idx2 = 2 * c;
490                        for (int s = 0; s < slices; s++) {
491                            int idx4 = 2 * s;
492                            temp[idx4] = a[s][r][idx2];
493                            temp[idx4 + 1] = a[s][r][idx2 + 1];
494                        }
495                        fftSlices.complexForward(temp);
496                        for (int s = 0; s < slices; s++) {
497                            int idx4 = 2 * s;
498                            a[s][r][idx2] = temp[idx4];
499                            a[s][r][idx2 + 1] = temp[idx4 + 1];
500                        }
501                    }
502                }
503            }
504        }
505    }
506
507    /**
508     * Computes 3D inverse DFT of complex data leaving the result in
509     * <code>a</code>. The data is stored in a 1D array addressed in
510     * slice-major, then row-major, then column-major, in order of significance,
511     * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in
512     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and
513     * rowStride = 2 * columns. Complex number is stored as two float values in
514     * sequence: the real and imaginary part, i.e. the input array must be of
515     * size slices*rows*2*columns. The physical layout of the input data is as follows:
516     *
517     * <pre>
518     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3],
519     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
520     * </pre>
521     *
522     * @param a
523     *            data to transform
524     * @param scale
525     *            if true then scaling is performed
526     */
527    public void complexInverse(final float[] a, final boolean scale) {
528
529        int nthreads = ConcurrencyUtils.getNumberOfThreads();
530
531        if (isPowerOfTwo) {
532            int oldn3 = columns;
533            columns = 2 * columns;
534            sliceStride = rows * columns;
535            rowStride = columns;
536            if (nthreads != oldNthreads) {
537                nt = slices;
538                if (nt < rows) {
539                    nt = rows;
540                }
541                nt *= 8;
542                if (nthreads > 1) {
543                    nt *= nthreads;
544                }
545                if (columns == 4) {
546                    nt >>= 1;
547                } else if (columns < 4) {
548                    nt >>= 2;
549                }
550                t = new float[nt];
551                oldNthreads = nthreads;
552            }
553            if ((nthreads > 1) && useThreads) {
554                xdft3da_subth2(0, 1, a, scale);
555                cdft3db_subth(1, a, scale);
556            } else {
557                xdft3da_sub2(0, 1, a, scale);
558                cdft3db_sub(1, a, scale);
559            }
560            columns = oldn3;
561            sliceStride = rows * columns;
562            rowStride = columns;
563        } else {
564            sliceStride = 2 * rows * columns;
565            rowStride = 2 * columns;
566            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
567                Future<?>[] futures = new Future[nthreads];
568                int p = slices / nthreads;
569                for (int l = 0; l < nthreads; l++) {
570                    final int firstSlice = l * p;
571                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
572
573                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
574                        @Override
575                                                public void run() {
576                            for (int s = firstSlice; s < lastSlice; s++) {
577                                int idx1 = s * sliceStride;
578                                for (int r = 0; r < rows; r++) {
579                                    fftColumns.complexInverse(a, idx1 + r * rowStride, scale);
580                                }
581                            }
582                        }
583                    });
584                }
585                ConcurrencyUtils.waitForCompletion(futures);
586
587                for (int l = 0; l < nthreads; l++) {
588                    final int firstSlice = l * p;
589                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
590
591                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
592                        @Override
593                                                public void run() {
594                            float[] temp = new float[2 * rows];
595                            for (int s = firstSlice; s < lastSlice; s++) {
596                                int idx1 = s * sliceStride;
597                                for (int c = 0; c < columns; c++) {
598                                    int idx2 = 2 * c;
599                                    for (int r = 0; r < rows; r++) {
600                                        int idx3 = idx1 + idx2 + r * rowStride;
601                                        int idx4 = 2 * r;
602                                        temp[idx4] = a[idx3];
603                                        temp[idx4 + 1] = a[idx3 + 1];
604                                    }
605                                    fftRows.complexInverse(temp, scale);
606                                    for (int r = 0; r < rows; r++) {
607                                        int idx3 = idx1 + idx2 + r * rowStride;
608                                        int idx4 = 2 * r;
609                                        a[idx3] = temp[idx4];
610                                        a[idx3 + 1] = temp[idx4 + 1];
611                                    }
612                                }
613                            }
614                        }
615                    });
616                }
617                ConcurrencyUtils.waitForCompletion(futures);
618
619                p = rows / nthreads;
620                for (int l = 0; l < nthreads; l++) {
621                    final int firstRow = l * p;
622                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
623
624                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
625                        @Override
626                                                public void run() {
627                            float[] temp = new float[2 * slices];
628                            for (int r = firstRow; r < lastRow; r++) {
629                                int idx1 = r * rowStride;
630                                for (int c = 0; c < columns; c++) {
631                                    int idx2 = 2 * c;
632                                    for (int s = 0; s < slices; s++) {
633                                        int idx3 = s * sliceStride + idx1 + idx2;
634                                        int idx4 = 2 * s;
635                                        temp[idx4] = a[idx3];
636                                        temp[idx4 + 1] = a[idx3 + 1];
637                                    }
638                                    fftSlices.complexInverse(temp, scale);
639                                    for (int s = 0; s < slices; s++) {
640                                        int idx3 = s * sliceStride + idx1 + idx2;
641                                        int idx4 = 2 * s;
642                                        a[idx3] = temp[idx4];
643                                        a[idx3 + 1] = temp[idx4 + 1];
644                                    }
645                                }
646                            }
647                        }
648                    });
649                }
650                ConcurrencyUtils.waitForCompletion(futures);
651
652            } else {
653                for (int s = 0; s < slices; s++) {
654                    int idx1 = s * sliceStride;
655                    for (int r = 0; r < rows; r++) {
656                        fftColumns.complexInverse(a, idx1 + r * rowStride, scale);
657                    }
658                }
659                float[] temp = new float[2 * rows];
660                for (int s = 0; s < slices; s++) {
661                    int idx1 = s * sliceStride;
662                    for (int c = 0; c < columns; c++) {
663                        int idx2 = 2 * c;
664                        for (int r = 0; r < rows; r++) {
665                            int idx3 = idx1 + idx2 + r * rowStride;
666                            int idx4 = 2 * r;
667                            temp[idx4] = a[idx3];
668                            temp[idx4 + 1] = a[idx3 + 1];
669                        }
670                        fftRows.complexInverse(temp, scale);
671                        for (int r = 0; r < rows; r++) {
672                            int idx3 = idx1 + idx2 + r * rowStride;
673                            int idx4 = 2 * r;
674                            a[idx3] = temp[idx4];
675                            a[idx3 + 1] = temp[idx4 + 1];
676                        }
677                    }
678                }
679                temp = new float[2 * slices];
680                for (int r = 0; r < rows; r++) {
681                    int idx1 = r * rowStride;
682                    for (int c = 0; c < columns; c++) {
683                        int idx2 = 2 * c;
684                        for (int s = 0; s < slices; s++) {
685                            int idx3 = s * sliceStride + idx1 + idx2;
686                            int idx4 = 2 * s;
687                            temp[idx4] = a[idx3];
688                            temp[idx4 + 1] = a[idx3 + 1];
689                        }
690                        fftSlices.complexInverse(temp, scale);
691                        for (int s = 0; s < slices; s++) {
692                            int idx3 = s * sliceStride + idx1 + idx2;
693                            int idx4 = 2 * s;
694                            a[idx3] = temp[idx4];
695                            a[idx3 + 1] = temp[idx4 + 1];
696                        }
697                    }
698                }
699            }
700            sliceStride = rows * columns;
701            rowStride = columns;
702        }
703    }
704
705    /**
706     * Computes 3D inverse DFT of complex data leaving the result in
707     * <code>a</code>. The data is stored in a 3D array. Complex data is
708     * represented by 2 float values in sequence: the real and imaginary part,
709     * i.e. the input array must be of size slices by rows by 2*columns. The physical
710     * layout of the input data is as follows:
711     *
712     * <pre>
713     * a[k1][k2][2*k3] = Re[k1][k2][k3],
714     * a[k1][k2][2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
715     * </pre>
716     *
717     * @param a
718     *            data to transform
719     * @param scale
720     *            if true then scaling is performed
721     */
722    public void complexInverse(final float[][][] a, final boolean scale) {
723        int nthreads = ConcurrencyUtils.getNumberOfThreads();
724        if (isPowerOfTwo) {
725            int oldn3 = columns;
726            columns = 2 * columns;
727            sliceStride = rows * columns;
728            rowStride = columns;
729            if (nthreads != oldNthreads) {
730                nt = slices;
731                if (nt < rows) {
732                    nt = rows;
733                }
734                nt *= 8;
735                if (nthreads > 1) {
736                    nt *= nthreads;
737                }
738                if (columns == 4) {
739                    nt >>= 1;
740                } else if (columns < 4) {
741                    nt >>= 2;
742                }
743                t = new float[nt];
744                oldNthreads = nthreads;
745            }
746            if ((nthreads > 1) && useThreads) {
747                xdft3da_subth2(0, 1, a, scale);
748                cdft3db_subth(1, a, scale);
749            } else {
750                xdft3da_sub2(0, 1, a, scale);
751                cdft3db_sub(1, a, scale);
752            }
753            columns = oldn3;
754            sliceStride = rows * columns;
755            rowStride = columns;
756        } else {
757            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
758                Future<?>[] futures = new Future[nthreads];
759                int p = slices / nthreads;
760                for (int l = 0; l < nthreads; l++) {
761                    final int firstSlice = l * p;
762                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
763
764                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
765                        @Override
766                                                public void run() {
767                            for (int s = firstSlice; s < lastSlice; s++) {
768                                for (int r = 0; r < rows; r++) {
769                                    fftColumns.complexInverse(a[s][r], scale);
770                                }
771                            }
772                        }
773                    });
774                }
775                ConcurrencyUtils.waitForCompletion(futures);
776
777                for (int l = 0; l < nthreads; l++) {
778                    final int firstSlice = l * p;
779                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
780
781                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
782                        @Override
783                                                public void run() {
784                            float[] temp = new float[2 * rows];
785                            for (int s = firstSlice; s < lastSlice; s++) {
786                                for (int c = 0; c < columns; c++) {
787                                    int idx2 = 2 * c;
788                                    for (int r = 0; r < rows; r++) {
789                                        int idx4 = 2 * r;
790                                        temp[idx4] = a[s][r][idx2];
791                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
792                                    }
793                                    fftRows.complexInverse(temp, scale);
794                                    for (int r = 0; r < rows; r++) {
795                                        int idx4 = 2 * r;
796                                        a[s][r][idx2] = temp[idx4];
797                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
798                                    }
799                                }
800                            }
801                        }
802                    });
803                }
804                ConcurrencyUtils.waitForCompletion(futures);
805
806                p = rows / nthreads;
807                for (int l = 0; l < nthreads; l++) {
808                    final int firstRow = l * p;
809                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
810
811                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
812                        @Override
813                                                public void run() {
814                            float[] temp = new float[2 * slices];
815                            for (int r = firstRow; r < lastRow; r++) {
816                                for (int c = 0; c < columns; c++) {
817                                    int idx2 = 2 * c;
818                                    for (int s = 0; s < slices; s++) {
819                                        int idx4 = 2 * s;
820                                        temp[idx4] = a[s][r][idx2];
821                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
822                                    }
823                                    fftSlices.complexInverse(temp, scale);
824                                    for (int s = 0; s < slices; s++) {
825                                        int idx4 = 2 * s;
826                                        a[s][r][idx2] = temp[idx4];
827                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
828                                    }
829                                }
830                            }
831                        }
832                    });
833                }
834                ConcurrencyUtils.waitForCompletion(futures);
835
836            } else {
837                for (int s = 0; s < slices; s++) {
838                    for (int r = 0; r < rows; r++) {
839                        fftColumns.complexInverse(a[s][r], scale);
840                    }
841                }
842                float[] temp = new float[2 * rows];
843                for (int s = 0; s < slices; s++) {
844                    for (int c = 0; c < columns; c++) {
845                        int idx2 = 2 * c;
846                        for (int r = 0; r < rows; r++) {
847                            int idx4 = 2 * r;
848                            temp[idx4] = a[s][r][idx2];
849                            temp[idx4 + 1] = a[s][r][idx2 + 1];
850                        }
851                        fftRows.complexInverse(temp, scale);
852                        for (int r = 0; r < rows; r++) {
853                            int idx4 = 2 * r;
854                            a[s][r][idx2] = temp[idx4];
855                            a[s][r][idx2 + 1] = temp[idx4 + 1];
856                        }
857                    }
858                }
859                temp = new float[2 * slices];
860                for (int r = 0; r < rows; r++) {
861                    for (int c = 0; c < columns; c++) {
862                        int idx2 = 2 * c;
863                        for (int s = 0; s < slices; s++) {
864                            int idx4 = 2 * s;
865                            temp[idx4] = a[s][r][idx2];
866                            temp[idx4 + 1] = a[s][r][idx2 + 1];
867                        }
868                        fftSlices.complexInverse(temp, scale);
869                        for (int s = 0; s < slices; s++) {
870                            int idx4 = 2 * s;
871                            a[s][r][idx2] = temp[idx4];
872                            a[s][r][idx2 + 1] = temp[idx4 + 1];
873                        }
874                    }
875                }
876            }
877        }
878    }
879
880    /**
881     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
882     * . This method only works when the sizes of all three dimensions are
883     * power-of-two numbers. The data is stored in a 1D array addressed in
884     * slice-major, then row-major, then column-major, in order of significance,
885     * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in
886     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and
887     * rowStride = 2 * columns. The physical layout of the output data is as follows:
888     *
889     * <pre>
890     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3]
891     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3],
892     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3]
893     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3],
894     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2,
895     * a[k1*sliceStride + k2*rowStride] = Re[k1][k2][0]
896     *              = Re[(slices-k1)%slices][rows-k2][0],
897     * a[k1*sliceStride + k2*rowStride + 1] = Im[k1][k2][0]
898     *              = -Im[(slices-k1)%slices][rows-k2][0],
899     * a[k1*sliceStride + (rows-k2)*rowStride + 1] = Re[(slices-k1)%slices][k2][columns/2]
900     *                 = Re[k1][rows-k2][columns/2],
901     * a[k1*sliceStride + (rows-k2)*rowStride] = -Im[(slices-k1)%slices][k2][columns/2]
902     *                 = Im[k1][rows-k2][columns/2],
903     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2,
904     * a[k1*sliceStride] = Re[k1][0][0]
905     *             = Re[slices-k1][0][0],
906     * a[k1*sliceStride + 1] = Im[k1][0][0]
907     *             = -Im[slices-k1][0][0],
908     * a[k1*sliceStride + (rows/2)*rowStride] = Re[k1][rows/2][0]
909     *                = Re[slices-k1][rows/2][0],
910     * a[k1*sliceStride + (rows/2)*rowStride + 1] = Im[k1][rows/2][0]
911     *                = -Im[slices-k1][rows/2][0],
912     * a[(slices-k1)*sliceStride + 1] = Re[k1][0][columns/2]
913     *                = Re[slices-k1][0][columns/2],
914     * a[(slices-k1)*sliceStride] = -Im[k1][0][columns/2]
915     *                = Im[slices-k1][0][columns/2],
916     * a[(slices-k1)*sliceStride + (rows/2)*rowStride + 1] = Re[k1][rows/2][columns/2]
917     *                   = Re[slices-k1][rows/2][columns/2],
918     * a[(slices-k1)*sliceStride + (rows/2) * rowStride] = -Im[k1][rows/2][columns/2]
919     *                   = Im[slices-k1][rows/2][columns/2],
920     *     0&lt;k1&lt;slices/2,
921     * a[0] = Re[0][0][0],
922     * a[1] = Re[0][0][columns/2],
923     * a[(rows/2)*rowStride] = Re[0][rows/2][0],
924     * a[(rows/2)*rowStride + 1] = Re[0][rows/2][columns/2],
925     * a[(slices/2)*sliceStride] = Re[slices/2][0][0],
926     * a[(slices/2)*sliceStride + 1] = Re[slices/2][0][columns/2],
927     * a[(slices/2)*sliceStride + (rows/2)*rowStride] = Re[slices/2][rows/2][0],
928     * a[(slices/2)*sliceStride + (rows/2)*rowStride + 1] = Re[slices/2][rows/2][columns/2]
929     * </pre>
930     *
931     *
932     * This method computes only half of the elements of the real transform. The
933     * other half satisfies the symmetry condition. If you want the full real
934     * forward transform, use <code>realForwardFull</code>. To get back the
935     * original data, use <code>realInverse</code> on the output of this method.
936     *
937     * @param a
938     *            data to transform
939     */
940    public void realForward(float[] a) {
941        if (isPowerOfTwo == false) {
942            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
943        } else {
944            int nthreads = ConcurrencyUtils.getNumberOfThreads();
945            if (nthreads != oldNthreads) {
946                nt = slices;
947                if (nt < rows) {
948                    nt = rows;
949                }
950                nt *= 8;
951                if (nthreads > 1) {
952                    nt *= nthreads;
953                }
954                if (columns == 4) {
955                    nt >>= 1;
956                } else if (columns < 4) {
957                    nt >>= 2;
958                }
959                t = new float[nt];
960                oldNthreads = nthreads;
961            }
962            if ((nthreads > 1) && useThreads) {
963                xdft3da_subth1(1, -1, a, true);
964                cdft3db_subth(-1, a, true);
965                rdft3d_sub(1, a);
966            } else {
967                xdft3da_sub1(1, -1, a, true);
968                cdft3db_sub(-1, a, true);
969                rdft3d_sub(1, a);
970            }
971        }
972    }
973
974    /**
975     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
976     * . This method only works when the sizes of all three dimensions are
977     * power-of-two numbers. The data is stored in a 3D array. The physical
978     * layout of the output data is as follows:
979     *
980     * <pre>
981     * a[k1][k2][2*k3] = Re[k1][k2][k3]
982     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3],
983     * a[k1][k2][2*k3+1] = Im[k1][k2][k3]
984     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3],
985     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2,
986     * a[k1][k2][0] = Re[k1][k2][0]
987     *              = Re[(slices-k1)%slices][rows-k2][0],
988     * a[k1][k2][1] = Im[k1][k2][0]
989     *              = -Im[(slices-k1)%slices][rows-k2][0],
990     * a[k1][rows-k2][1] = Re[(slices-k1)%slices][k2][columns/2]
991     *                 = Re[k1][rows-k2][columns/2],
992     * a[k1][rows-k2][0] = -Im[(slices-k1)%slices][k2][columns/2]
993     *                 = Im[k1][rows-k2][columns/2],
994     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2,
995     * a[k1][0][0] = Re[k1][0][0]
996     *             = Re[slices-k1][0][0],
997     * a[k1][0][1] = Im[k1][0][0]
998     *             = -Im[slices-k1][0][0],
999     * a[k1][rows/2][0] = Re[k1][rows/2][0]
1000     *                = Re[slices-k1][rows/2][0],
1001     * a[k1][rows/2][1] = Im[k1][rows/2][0]
1002     *                = -Im[slices-k1][rows/2][0],
1003     * a[slices-k1][0][1] = Re[k1][0][columns/2]
1004     *                = Re[slices-k1][0][columns/2],
1005     * a[slices-k1][0][0] = -Im[k1][0][columns/2]
1006     *                = Im[slices-k1][0][columns/2],
1007     * a[slices-k1][rows/2][1] = Re[k1][rows/2][columns/2]
1008     *                   = Re[slices-k1][rows/2][columns/2],
1009     * a[slices-k1][rows/2][0] = -Im[k1][rows/2][columns/2]
1010     *                   = Im[slices-k1][rows/2][columns/2],
1011     *     0&lt;k1&lt;slices/2,
1012     * a[0][0][0] = Re[0][0][0],
1013     * a[0][0][1] = Re[0][0][columns/2],
1014     * a[0][rows/2][0] = Re[0][rows/2][0],
1015     * a[0][rows/2][1] = Re[0][rows/2][columns/2],
1016     * a[slices/2][0][0] = Re[slices/2][0][0],
1017     * a[slices/2][0][1] = Re[slices/2][0][columns/2],
1018     * a[slices/2][rows/2][0] = Re[slices/2][rows/2][0],
1019     * a[slices/2][rows/2][1] = Re[slices/2][rows/2][columns/2]
1020     * </pre>
1021     *
1022     *
1023     * This method computes only half of the elements of the real transform. The
1024     * other half satisfies the symmetry condition. If you want the full real
1025     * forward transform, use <code>realForwardFull</code>. To get back the
1026     * original data, use <code>realInverse</code> on the output of this method.
1027     *
1028     * @param a
1029     *            data to transform
1030     */
1031    public void realForward(float[][][] a) {
1032        if (isPowerOfTwo == false) {
1033            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
1034        } else {
1035            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1036            if (nthreads != oldNthreads) {
1037                nt = slices;
1038                if (nt < rows) {
1039                    nt = rows;
1040                }
1041                nt *= 8;
1042                if (nthreads > 1) {
1043                    nt *= nthreads;
1044                }
1045                if (columns == 4) {
1046                    nt >>= 1;
1047                } else if (columns < 4) {
1048                    nt >>= 2;
1049                }
1050                t = new float[nt];
1051                oldNthreads = nthreads;
1052            }
1053            if ((nthreads > 1) && useThreads) {
1054                xdft3da_subth1(1, -1, a, true);
1055                cdft3db_subth(-1, a, true);
1056                rdft3d_sub(1, a);
1057            } else {
1058                xdft3da_sub1(1, -1, a, true);
1059                cdft3db_sub(-1, a, true);
1060                rdft3d_sub(1, a);
1061            }
1062        }
1063    }
1064
1065    /**
1066     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
1067     * . This method computes full real forward transform, i.e. you will get the
1068     * same result as from <code>complexForward</code> called with all imaginary
1069     * part equal 0. Because the result is stored in <code>a</code>, the input
1070     * array must be of size slices*rows*2*columns, with only the first slices*rows*columns elements
1071     * filled with real data. To get back the original data, use
1072     * <code>complexInverse</code> on the output of this method.
1073     *
1074     * @param a
1075     *            data to transform
1076     */
1077    public void realForwardFull(float[] a) {
1078        if (isPowerOfTwo) {
1079            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1080            if (nthreads != oldNthreads) {
1081                nt = slices;
1082                if (nt < rows) {
1083                    nt = rows;
1084                }
1085                nt *= 8;
1086                if (nthreads > 1) {
1087                    nt *= nthreads;
1088                }
1089                if (columns == 4) {
1090                    nt >>= 1;
1091                } else if (columns < 4) {
1092                    nt >>= 2;
1093                }
1094                t = new float[nt];
1095                oldNthreads = nthreads;
1096            }
1097            if ((nthreads > 1) && useThreads) {
1098                xdft3da_subth2(1, -1, a, true);
1099                cdft3db_subth(-1, a, true);
1100                rdft3d_sub(1, a);
1101            } else {
1102                xdft3da_sub2(1, -1, a, true);
1103                cdft3db_sub(-1, a, true);
1104                rdft3d_sub(1, a);
1105            }
1106            fillSymmetric(a);
1107        } else {
1108            mixedRadixRealForwardFull(a);
1109        }
1110    }
1111
1112    /**
1113     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
1114     * . This method computes full real forward transform, i.e. you will get the
1115     * same result as from <code>complexForward</code> called with all imaginary
1116     * part equal 0. Because the result is stored in <code>a</code>, the input
1117     * array must be of size slices by rows by 2*columns, with only the first slices by rows by
1118     * columns elements filled with real data. To get back the original data, use
1119     * <code>complexInverse</code> on the output of this method.
1120     *
1121     * @param a
1122     *            data to transform
1123     */
1124    public void realForwardFull(float[][][] a) {
1125        if (isPowerOfTwo) {
1126            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1127            if (nthreads != oldNthreads) {
1128                nt = slices;
1129                if (nt < rows) {
1130                    nt = rows;
1131                }
1132                nt *= 8;
1133                if (nthreads > 1) {
1134                    nt *= nthreads;
1135                }
1136                if (columns == 4) {
1137                    nt >>= 1;
1138                } else if (columns < 4) {
1139                    nt >>= 2;
1140                }
1141                t = new float[nt];
1142                oldNthreads = nthreads;
1143            }
1144            if ((nthreads > 1) && useThreads) {
1145                xdft3da_subth2(1, -1, a, true);
1146                cdft3db_subth(-1, a, true);
1147                rdft3d_sub(1, a);
1148            } else {
1149                xdft3da_sub2(1, -1, a, true);
1150                cdft3db_sub(-1, a, true);
1151                rdft3d_sub(1, a);
1152            }
1153            fillSymmetric(a);
1154        } else {
1155            mixedRadixRealForwardFull(a);
1156        }
1157    }
1158
1159    /**
1160     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1161     * . This method only works when the sizes of all three dimensions are
1162     * power-of-two numbers. The data is stored in a 1D array addressed in
1163     * slice-major, then row-major, then column-major, in order of significance,
1164     * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in
1165     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and
1166     * rowStride = 2 * columns. The physical layout of the input data has to be as
1167     * follows:
1168     *
1169     * <pre>
1170     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3]
1171     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3],
1172     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3]
1173     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3],
1174     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2,
1175     * a[k1*sliceStride + k2*rowStride] = Re[k1][k2][0]
1176     *              = Re[(slices-k1)%slices][rows-k2][0],
1177     * a[k1*sliceStride + k2*rowStride + 1] = Im[k1][k2][0]
1178     *              = -Im[(slices-k1)%slices][rows-k2][0],
1179     * a[k1*sliceStride + (rows-k2)*rowStride + 1] = Re[(slices-k1)%slices][k2][columns/2]
1180     *                 = Re[k1][rows-k2][columns/2],
1181     * a[k1*sliceStride + (rows-k2)*rowStride] = -Im[(slices-k1)%slices][k2][columns/2]
1182     *                 = Im[k1][rows-k2][columns/2],
1183     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2,
1184     * a[k1*sliceStride] = Re[k1][0][0]
1185     *             = Re[slices-k1][0][0],
1186     * a[k1*sliceStride + 1] = Im[k1][0][0]
1187     *             = -Im[slices-k1][0][0],
1188     * a[k1*sliceStride + (rows/2)*rowStride] = Re[k1][rows/2][0]
1189     *                = Re[slices-k1][rows/2][0],
1190     * a[k1*sliceStride + (rows/2)*rowStride + 1] = Im[k1][rows/2][0]
1191     *                = -Im[slices-k1][rows/2][0],
1192     * a[(slices-k1)*sliceStride + 1] = Re[k1][0][columns/2]
1193     *                = Re[slices-k1][0][columns/2],
1194     * a[(slices-k1)*sliceStride] = -Im[k1][0][columns/2]
1195     *                = Im[slices-k1][0][columns/2],
1196     * a[(slices-k1)*sliceStride + (rows/2)*rowStride + 1] = Re[k1][rows/2][columns/2]
1197     *                   = Re[slices-k1][rows/2][columns/2],
1198     * a[(slices-k1)*sliceStride + (rows/2) * rowStride] = -Im[k1][rows/2][columns/2]
1199     *                   = Im[slices-k1][rows/2][columns/2],
1200     *     0&lt;k1&lt;slices/2,
1201     * a[0] = Re[0][0][0],
1202     * a[1] = Re[0][0][columns/2],
1203     * a[(rows/2)*rowStride] = Re[0][rows/2][0],
1204     * a[(rows/2)*rowStride + 1] = Re[0][rows/2][columns/2],
1205     * a[(slices/2)*sliceStride] = Re[slices/2][0][0],
1206     * a[(slices/2)*sliceStride + 1] = Re[slices/2][0][columns/2],
1207     * a[(slices/2)*sliceStride + (rows/2)*rowStride] = Re[slices/2][rows/2][0],
1208     * a[(slices/2)*sliceStride + (rows/2)*rowStride + 1] = Re[slices/2][rows/2][columns/2]
1209     * </pre>
1210     *
1211     * This method computes only half of the elements of the real transform. The
1212     * other half satisfies the symmetry condition. If you want the full real
1213     * inverse transform, use <code>realInverseFull</code>.
1214     *
1215     * @param a
1216     *            data to transform
1217     *
1218     * @param scale
1219     *            if true then scaling is performed
1220     */
1221    public void realInverse(float[] a, boolean scale) {
1222        if (isPowerOfTwo == false) {
1223            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
1224        } else {
1225            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1226            if (nthreads != oldNthreads) {
1227                nt = slices;
1228                if (nt < rows) {
1229                    nt = rows;
1230                }
1231                nt *= 8;
1232                if (nthreads > 1) {
1233                    nt *= nthreads;
1234                }
1235                if (columns == 4) {
1236                    nt >>= 1;
1237                } else if (columns < 4) {
1238                    nt >>= 2;
1239                }
1240                t = new float[nt];
1241                oldNthreads = nthreads;
1242            }
1243            if ((nthreads > 1) && useThreads) {
1244                rdft3d_sub(-1, a);
1245                cdft3db_subth(1, a, scale);
1246                xdft3da_subth1(1, 1, a, scale);
1247            } else {
1248                rdft3d_sub(-1, a);
1249                cdft3db_sub(1, a, scale);
1250                xdft3da_sub1(1, 1, a, scale);
1251            }
1252        }
1253    }
1254
1255    /**
1256     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1257     * . This method only works when the sizes of all three dimensions are
1258     * power-of-two numbers. The data is stored in a 3D array. The physical
1259     * layout of the input data has to be as follows:
1260     *
1261     * <pre>
1262     * a[k1][k2][2*k3] = Re[k1][k2][k3]
1263     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3],
1264     * a[k1][k2][2*k3+1] = Im[k1][k2][k3]
1265     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3],
1266     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2,
1267     * a[k1][k2][0] = Re[k1][k2][0]
1268     *              = Re[(slices-k1)%slices][rows-k2][0],
1269     * a[k1][k2][1] = Im[k1][k2][0]
1270     *              = -Im[(slices-k1)%slices][rows-k2][0],
1271     * a[k1][rows-k2][1] = Re[(slices-k1)%slices][k2][columns/2]
1272     *                 = Re[k1][rows-k2][columns/2],
1273     * a[k1][rows-k2][0] = -Im[(slices-k1)%slices][k2][columns/2]
1274     *                 = Im[k1][rows-k2][columns/2],
1275     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2,
1276     * a[k1][0][0] = Re[k1][0][0]
1277     *             = Re[slices-k1][0][0],
1278     * a[k1][0][1] = Im[k1][0][0]
1279     *             = -Im[slices-k1][0][0],
1280     * a[k1][rows/2][0] = Re[k1][rows/2][0]
1281     *                = Re[slices-k1][rows/2][0],
1282     * a[k1][rows/2][1] = Im[k1][rows/2][0]
1283     *                = -Im[slices-k1][rows/2][0],
1284     * a[slices-k1][0][1] = Re[k1][0][columns/2]
1285     *                = Re[slices-k1][0][columns/2],
1286     * a[slices-k1][0][0] = -Im[k1][0][columns/2]
1287     *                = Im[slices-k1][0][columns/2],
1288     * a[slices-k1][rows/2][1] = Re[k1][rows/2][columns/2]
1289     *                   = Re[slices-k1][rows/2][columns/2],
1290     * a[slices-k1][rows/2][0] = -Im[k1][rows/2][columns/2]
1291     *                   = Im[slices-k1][rows/2][columns/2],
1292     *     0&lt;k1&lt;slices/2,
1293     * a[0][0][0] = Re[0][0][0],
1294     * a[0][0][1] = Re[0][0][columns/2],
1295     * a[0][rows/2][0] = Re[0][rows/2][0],
1296     * a[0][rows/2][1] = Re[0][rows/2][columns/2],
1297     * a[slices/2][0][0] = Re[slices/2][0][0],
1298     * a[slices/2][0][1] = Re[slices/2][0][columns/2],
1299     * a[slices/2][rows/2][0] = Re[slices/2][rows/2][0],
1300     * a[slices/2][rows/2][1] = Re[slices/2][rows/2][columns/2]
1301     * </pre>
1302     *
1303     * This method computes only half of the elements of the real transform. The
1304     * other half satisfies the symmetry condition. If you want the full real
1305     * inverse transform, use <code>realInverseFull</code>.
1306     *
1307     * @param a
1308     *            data to transform
1309     *
1310     * @param scale
1311     *            if true then scaling is performed
1312     */
1313    public void realInverse(float[][][] a, boolean scale) {
1314        if (isPowerOfTwo == false) {
1315            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
1316        } else {
1317            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1318            if (nthreads != oldNthreads) {
1319                nt = slices;
1320                if (nt < rows) {
1321                    nt = rows;
1322                }
1323                nt *= 8;
1324                if (nthreads > 1) {
1325                    nt *= nthreads;
1326                }
1327                if (columns == 4) {
1328                    nt >>= 1;
1329                } else if (columns < 4) {
1330                    nt >>= 2;
1331                }
1332                t = new float[nt];
1333                oldNthreads = nthreads;
1334            }
1335            if ((nthreads > 1) && useThreads) {
1336                rdft3d_sub(-1, a);
1337                cdft3db_subth(1, a, scale);
1338                xdft3da_subth1(1, 1, a, scale);
1339            } else {
1340                rdft3d_sub(-1, a);
1341                cdft3db_sub(1, a, scale);
1342                xdft3da_sub1(1, 1, a, scale);
1343            }
1344        }
1345    }
1346
1347    /**
1348     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1349     * . This method computes full real inverse transform, i.e. you will get the
1350     * same result as from <code>complexInverse</code> called with all imaginary
1351     * part equal 0. Because the result is stored in <code>a</code>, the input
1352     * array must be of size slices*rows*2*columns, with only the first slices*rows*columns elements
1353     * filled with real data.
1354     *
1355     * @param a
1356     *            data to transform
1357     * @param scale
1358     *            if true then scaling is performed
1359     */
1360    public void realInverseFull(float[] a, boolean scale) {
1361        if (isPowerOfTwo) {
1362            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1363            if (nthreads != oldNthreads) {
1364                nt = slices;
1365                if (nt < rows) {
1366                    nt = rows;
1367                }
1368                nt *= 8;
1369                if (nthreads > 1) {
1370                    nt *= nthreads;
1371                }
1372                if (columns == 4) {
1373                    nt >>= 1;
1374                } else if (columns < 4) {
1375                    nt >>= 2;
1376                }
1377                t = new float[nt];
1378                oldNthreads = nthreads;
1379            }
1380            if ((nthreads > 1) && useThreads) {
1381                xdft3da_subth2(1, 1, a, scale);
1382                cdft3db_subth(1, a, scale);
1383                rdft3d_sub(1, a);
1384            } else {
1385                xdft3da_sub2(1, 1, a, scale);
1386                cdft3db_sub(1, a, scale);
1387                rdft3d_sub(1, a);
1388            }
1389            fillSymmetric(a);
1390        } else {
1391            mixedRadixRealInverseFull(a, scale);
1392        }
1393    }
1394
1395    /**
1396     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1397     * . This method computes full real inverse transform, i.e. you will get the
1398     * same result as from <code>complexInverse</code> called with all imaginary
1399     * part equal 0. Because the result is stored in <code>a</code>, the input
1400     * array must be of size slices by rows by 2*columns, with only the first slices by rows by
1401     * columns elements filled with real data.
1402     *
1403     * @param a
1404     *            data to transform
1405     * @param scale
1406     *            if true then scaling is performed
1407     */
1408    public void realInverseFull(float[][][] a, boolean scale) {
1409        if (isPowerOfTwo) {
1410            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1411            if (nthreads != oldNthreads) {
1412                nt = slices;
1413                if (nt < rows) {
1414                    nt = rows;
1415                }
1416                nt *= 8;
1417                if (nthreads > 1) {
1418                    nt *= nthreads;
1419                }
1420                if (columns == 4) {
1421                    nt >>= 1;
1422                } else if (columns < 4) {
1423                    nt >>= 2;
1424                }
1425                t = new float[nt];
1426                oldNthreads = nthreads;
1427            }
1428            if ((nthreads > 1) && useThreads) {
1429                xdft3da_subth2(1, 1, a, scale);
1430                cdft3db_subth(1, a, scale);
1431                rdft3d_sub(1, a);
1432            } else {
1433                xdft3da_sub2(1, 1, a, scale);
1434                cdft3db_sub(1, a, scale);
1435                rdft3d_sub(1, a);
1436            }
1437            fillSymmetric(a);
1438        } else {
1439            mixedRadixRealInverseFull(a, scale);
1440        }
1441    }
1442
1443    /* -------- child routines -------- */
1444
1445    private void mixedRadixRealForwardFull(final float[][][] a) {
1446        float[] temp = new float[2 * rows];
1447        int ldimn2 = rows / 2 + 1;
1448        final int newn3 = 2 * columns;
1449        final int n2d2;
1450        if (rows % 2 == 0) {
1451            n2d2 = rows / 2;
1452        } else {
1453            n2d2 = (rows + 1) / 2;
1454        }
1455
1456        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1457        if ((nthreads > 1) && useThreads && (slices >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
1458            Future<?>[] futures = new Future[nthreads];
1459            int p = slices / nthreads;
1460            for (int l = 0; l < nthreads; l++) {
1461                final int firstSlice = l * p;
1462                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1463
1464                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1465                    @Override
1466                                        public void run() {
1467                        for (int s = firstSlice; s < lastSlice; s++) {
1468                            for (int r = 0; r < rows; r++) {
1469                                fftColumns.realForwardFull(a[s][r]);
1470                            }
1471                        }
1472                    }
1473                });
1474            }
1475            ConcurrencyUtils.waitForCompletion(futures);
1476
1477            for (int l = 0; l < nthreads; l++) {
1478                final int firstSlice = l * p;
1479                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1480
1481                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1482                    @Override
1483                                        public void run() {
1484                        float[] temp = new float[2 * rows];
1485
1486                        for (int s = firstSlice; s < lastSlice; s++) {
1487                            for (int c = 0; c < columns; c++) {
1488                                int idx2 = 2 * c;
1489                                for (int r = 0; r < rows; r++) {
1490                                    int idx4 = 2 * r;
1491                                    temp[idx4] = a[s][r][idx2];
1492                                    temp[idx4 + 1] = a[s][r][idx2 + 1];
1493                                }
1494                                fftRows.complexForward(temp);
1495                                for (int r = 0; r < rows; r++) {
1496                                    int idx4 = 2 * r;
1497                                    a[s][r][idx2] = temp[idx4];
1498                                    a[s][r][idx2 + 1] = temp[idx4 + 1];
1499                                }
1500                            }
1501                        }
1502                    }
1503                });
1504            }
1505            ConcurrencyUtils.waitForCompletion(futures);
1506
1507            p = ldimn2 / nthreads;
1508            for (int l = 0; l < nthreads; l++) {
1509                final int firstRow = l * p;
1510                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
1511
1512                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1513                    @Override
1514                                        public void run() {
1515                        float[] temp = new float[2 * slices];
1516
1517                        for (int r = firstRow; r < lastRow; r++) {
1518                            for (int c = 0; c < columns; c++) {
1519                                int idx1 = 2 * c;
1520                                for (int s = 0; s < slices; s++) {
1521                                    int idx2 = 2 * s;
1522                                    temp[idx2] = a[s][r][idx1];
1523                                    temp[idx2 + 1] = a[s][r][idx1 + 1];
1524                                }
1525                                fftSlices.complexForward(temp);
1526                                for (int s = 0; s < slices; s++) {
1527                                    int idx2 = 2 * s;
1528                                    a[s][r][idx1] = temp[idx2];
1529                                    a[s][r][idx1 + 1] = temp[idx2 + 1];
1530                                }
1531                            }
1532                        }
1533                    }
1534                });
1535            }
1536            ConcurrencyUtils.waitForCompletion(futures);
1537            p = slices / nthreads;
1538            for (int l = 0; l < nthreads; l++) {
1539                final int firstSlice = l * p;
1540                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1541
1542                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1543                    @Override
1544                                        public void run() {
1545
1546                        for (int s = firstSlice; s < lastSlice; s++) {
1547                            int idx2 = (slices - s) % slices;
1548                            for (int r = 1; r < n2d2; r++) {
1549                                int idx4 = rows - r;
1550                                for (int c = 0; c < columns; c++) {
1551                                    int idx1 = 2 * c;
1552                                    int idx3 = newn3 - idx1;
1553                                    a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1554                                    a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1555                                }
1556                            }
1557                        }
1558                    }
1559                });
1560            }
1561            ConcurrencyUtils.waitForCompletion(futures);
1562        } else {
1563
1564            for (int s = 0; s < slices; s++) {
1565                for (int r = 0; r < rows; r++) {
1566                    fftColumns.realForwardFull(a[s][r]);
1567                }
1568            }
1569
1570            for (int s = 0; s < slices; s++) {
1571                for (int c = 0; c < columns; c++) {
1572                    int idx2 = 2 * c;
1573                    for (int r = 0; r < rows; r++) {
1574                        int idx4 = 2 * r;
1575                        temp[idx4] = a[s][r][idx2];
1576                        temp[idx4 + 1] = a[s][r][idx2 + 1];
1577                    }
1578                    fftRows.complexForward(temp);
1579                    for (int r = 0; r < rows; r++) {
1580                        int idx4 = 2 * r;
1581                        a[s][r][idx2] = temp[idx4];
1582                        a[s][r][idx2 + 1] = temp[idx4 + 1];
1583                    }
1584                }
1585            }
1586
1587            temp = new float[2 * slices];
1588
1589            for (int r = 0; r < ldimn2; r++) {
1590                for (int c = 0; c < columns; c++) {
1591                    int idx1 = 2 * c;
1592                    for (int s = 0; s < slices; s++) {
1593                        int idx2 = 2 * s;
1594                        temp[idx2] = a[s][r][idx1];
1595                        temp[idx2 + 1] = a[s][r][idx1 + 1];
1596                    }
1597                    fftSlices.complexForward(temp);
1598                    for (int s = 0; s < slices; s++) {
1599                        int idx2 = 2 * s;
1600                        a[s][r][idx1] = temp[idx2];
1601                        a[s][r][idx1 + 1] = temp[idx2 + 1];
1602                    }
1603                }
1604            }
1605
1606            for (int s = 0; s < slices; s++) {
1607                int idx2 = (slices - s) % slices;
1608                for (int r = 1; r < n2d2; r++) {
1609                    int idx4 = rows - r;
1610                    for (int c = 0; c < columns; c++) {
1611                        int idx1 = 2 * c;
1612                        int idx3 = newn3 - idx1;
1613                        a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1614                        a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1615                    }
1616                }
1617            }
1618
1619        }
1620    }
1621
1622    private void mixedRadixRealInverseFull(final float[][][] a, final boolean scale) {
1623        float[] temp = new float[2 * rows];
1624        int ldimn2 = rows / 2 + 1;
1625        final int newn3 = 2 * columns;
1626        final int n2d2;
1627        if (rows % 2 == 0) {
1628            n2d2 = rows / 2;
1629        } else {
1630            n2d2 = (rows + 1) / 2;
1631        }
1632
1633        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1634        if ((nthreads > 1) && useThreads && (slices >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
1635            Future<?>[] futures = new Future[nthreads];
1636            int p = slices / nthreads;
1637            for (int l = 0; l < nthreads; l++) {
1638                final int firstSlice = l * p;
1639                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1640
1641                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1642                    @Override
1643                                        public void run() {
1644                        for (int s = firstSlice; s < lastSlice; s++) {
1645                            for (int r = 0; r < rows; r++) {
1646                                fftColumns.realInverseFull(a[s][r], scale);
1647                            }
1648                        }
1649                    }
1650                });
1651            }
1652            ConcurrencyUtils.waitForCompletion(futures);
1653
1654            for (int l = 0; l < nthreads; l++) {
1655                final int firstSlice = l * p;
1656                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1657
1658                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1659                    @Override
1660                                        public void run() {
1661                        float[] temp = new float[2 * rows];
1662
1663                        for (int s = firstSlice; s < lastSlice; s++) {
1664                            for (int c = 0; c < columns; c++) {
1665                                int idx2 = 2 * c;
1666                                for (int r = 0; r < rows; r++) {
1667                                    int idx4 = 2 * r;
1668                                    temp[idx4] = a[s][r][idx2];
1669                                    temp[idx4 + 1] = a[s][r][idx2 + 1];
1670                                }
1671                                fftRows.complexInverse(temp, scale);
1672                                for (int r = 0; r < rows; r++) {
1673                                    int idx4 = 2 * r;
1674                                    a[s][r][idx2] = temp[idx4];
1675                                    a[s][r][idx2 + 1] = temp[idx4 + 1];
1676                                }
1677                            }
1678                        }
1679                    }
1680                });
1681            }
1682            ConcurrencyUtils.waitForCompletion(futures);
1683
1684            p = ldimn2 / nthreads;
1685            for (int l = 0; l < nthreads; l++) {
1686                final int firstRow = l * p;
1687                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
1688                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1689                    @Override
1690                                        public void run() {
1691                        float[] temp = new float[2 * slices];
1692
1693                        for (int r = firstRow; r < lastRow; r++) {
1694                            for (int c = 0; c < columns; c++) {
1695                                int idx1 = 2 * c;
1696                                for (int s = 0; s < slices; s++) {
1697                                    int idx2 = 2 * s;
1698                                    temp[idx2] = a[s][r][idx1];
1699                                    temp[idx2 + 1] = a[s][r][idx1 + 1];
1700                                }
1701                                fftSlices.complexInverse(temp, scale);
1702                                for (int s = 0; s < slices; s++) {
1703                                    int idx2 = 2 * s;
1704                                    a[s][r][idx1] = temp[idx2];
1705                                    a[s][r][idx1 + 1] = temp[idx2 + 1];
1706                                }
1707                            }
1708                        }
1709                    }
1710                });
1711            }
1712            ConcurrencyUtils.waitForCompletion(futures);
1713            p = slices / nthreads;
1714            for (int l = 0; l < nthreads; l++) {
1715                final int firstSlice = l * p;
1716                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1717
1718                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1719                    @Override
1720                                        public void run() {
1721
1722                        for (int s = firstSlice; s < lastSlice; s++) {
1723                            int idx2 = (slices - s) % slices;
1724                            for (int r = 1; r < n2d2; r++) {
1725                                int idx4 = rows - r;
1726                                for (int c = 0; c < columns; c++) {
1727                                    int idx1 = 2 * c;
1728                                    int idx3 = newn3 - idx1;
1729                                    a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1730                                    a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1731                                }
1732                            }
1733                        }
1734                    }
1735                });
1736            }
1737            ConcurrencyUtils.waitForCompletion(futures);
1738        } else {
1739
1740            for (int s = 0; s < slices; s++) {
1741                for (int r = 0; r < rows; r++) {
1742                    fftColumns.realInverseFull(a[s][r], scale);
1743                }
1744            }
1745
1746            for (int s = 0; s < slices; s++) {
1747                for (int c = 0; c < columns; c++) {
1748                    int idx2 = 2 * c;
1749                    for (int r = 0; r < rows; r++) {
1750                        int idx4 = 2 * r;
1751                        temp[idx4] = a[s][r][idx2];
1752                        temp[idx4 + 1] = a[s][r][idx2 + 1];
1753                    }
1754                    fftRows.complexInverse(temp, scale);
1755                    for (int r = 0; r < rows; r++) {
1756                        int idx4 = 2 * r;
1757                        a[s][r][idx2] = temp[idx4];
1758                        a[s][r][idx2 + 1] = temp[idx4 + 1];
1759                    }
1760                }
1761            }
1762
1763            temp = new float[2 * slices];
1764
1765            for (int r = 0; r < ldimn2; r++) {
1766                for (int c = 0; c < columns; c++) {
1767                    int idx1 = 2 * c;
1768                    for (int s = 0; s < slices; s++) {
1769                        int idx2 = 2 * s;
1770                        temp[idx2] = a[s][r][idx1];
1771                        temp[idx2 + 1] = a[s][r][idx1 + 1];
1772                    }
1773                    fftSlices.complexInverse(temp, scale);
1774                    for (int s = 0; s < slices; s++) {
1775                        int idx2 = 2 * s;
1776                        a[s][r][idx1] = temp[idx2];
1777                        a[s][r][idx1 + 1] = temp[idx2 + 1];
1778                    }
1779                }
1780            }
1781
1782            for (int s = 0; s < slices; s++) {
1783                int idx2 = (slices - s) % slices;
1784                for (int r = 1; r < n2d2; r++) {
1785                    int idx4 = rows - r;
1786                    for (int c = 0; c < columns; c++) {
1787                        int idx1 = 2 * c;
1788                        int idx3 = newn3 - idx1;
1789                        a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1790                        a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1791                    }
1792                }
1793            }
1794
1795        }
1796    }
1797
1798    private void mixedRadixRealForwardFull(final float[] a) {
1799        final int twon3 = 2 * columns;
1800        float[] temp = new float[twon3];
1801        int ldimn2 = rows / 2 + 1;
1802        final int n2d2;
1803        if (rows % 2 == 0) {
1804            n2d2 = rows / 2;
1805        } else {
1806            n2d2 = (rows + 1) / 2;
1807        }
1808
1809        final int twoSliceStride = 2 * sliceStride;
1810        final int twoRowStride = 2 * rowStride;
1811        int n1d2 = slices / 2;
1812        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1813        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
1814            Future<?>[] futures = new Future[nthreads];
1815            int p = n1d2 / nthreads;
1816            for (int l = 0; l < nthreads; l++) {
1817                final int firstSlice = slices - 1 - l * p;
1818                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice - p;
1819                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1820                    @Override
1821                                        public void run() {
1822                        float[] temp = new float[twon3];
1823                        for (int s = firstSlice; s >= lastSlice; s--) {
1824                            int idx1 = s * sliceStride;
1825                            int idx2 = s * twoSliceStride;
1826                            for (int r = rows - 1; r >= 0; r--) {
1827                                System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
1828                                fftColumns.realForwardFull(temp);
1829                                System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
1830                            }
1831                        }
1832                    }
1833                });
1834            }
1835            ConcurrencyUtils.waitForCompletion(futures);
1836
1837            final float[][][] temp2 = new float[n1d2 + 1][rows][twon3];
1838
1839            for (int l = 0; l < nthreads; l++) {
1840                final int firstSlice = l * p;
1841                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
1842                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1843                    @Override
1844                                        public void run() {
1845                        for (int s = firstSlice; s < lastSlice; s++) {
1846                            int idx1 = s * sliceStride;
1847                            for (int r = 0; r < rows; r++) {
1848                                System.arraycopy(a, idx1 + r * rowStride, temp2[s][r], 0, columns);
1849                                fftColumns.realForwardFull(temp2[s][r]);
1850                            }
1851                        }
1852                    }
1853                });
1854            }
1855            ConcurrencyUtils.waitForCompletion(futures);
1856
1857            for (int l = 0; l < nthreads; l++) {
1858                final int firstSlice = l * p;
1859                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
1860                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1861                    @Override
1862                                        public void run() {
1863                        for (int s = firstSlice; s < lastSlice; s++) {
1864                            int idx1 = s * twoSliceStride;
1865                            for (int r = 0; r < rows; r++) {
1866                                System.arraycopy(temp2[s][r], 0, a, idx1 + r * twoRowStride, twon3);
1867                            }
1868                        }
1869                    }
1870                });
1871            }
1872            ConcurrencyUtils.waitForCompletion(futures);
1873
1874            p = slices / nthreads;
1875            for (int l = 0; l < nthreads; l++) {
1876                final int firstSlice = l * p;
1877                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1878                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1879                    @Override
1880                                        public void run() {
1881                        float[] temp = new float[2 * rows];
1882
1883                        for (int s = firstSlice; s < lastSlice; s++) {
1884                            int idx1 = s * twoSliceStride;
1885                            for (int c = 0; c < columns; c++) {
1886                                int idx2 = 2 * c;
1887                                for (int r = 0; r < rows; r++) {
1888                                    int idx3 = idx1 + r * twoRowStride + idx2;
1889                                    int idx4 = 2 * r;
1890                                    temp[idx4] = a[idx3];
1891                                    temp[idx4 + 1] = a[idx3 + 1];
1892                                }
1893                                fftRows.complexForward(temp);
1894                                for (int r = 0; r < rows; r++) {
1895                                    int idx3 = idx1 + r * twoRowStride + idx2;
1896                                    int idx4 = 2 * r;
1897                                    a[idx3] = temp[idx4];
1898                                    a[idx3 + 1] = temp[idx4 + 1];
1899                                }
1900                            }
1901                        }
1902                    }
1903                });
1904            }
1905            ConcurrencyUtils.waitForCompletion(futures);
1906
1907            p = ldimn2 / nthreads;
1908            for (int l = 0; l < nthreads; l++) {
1909                final int firstRow = l * p;
1910                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
1911                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1912                    @Override
1913                                        public void run() {
1914                        float[] temp = new float[2 * slices];
1915
1916                        for (int r = firstRow; r < lastRow; r++) {
1917                            int idx3 = r * twoRowStride;
1918                            for (int c = 0; c < columns; c++) {
1919                                int idx1 = 2 * c;
1920                                for (int s = 0; s < slices; s++) {
1921                                    int idx2 = 2 * s;
1922                                    int idx4 = s * twoSliceStride + idx3 + idx1;
1923                                    temp[idx2] = a[idx4];
1924                                    temp[idx2 + 1] = a[idx4 + 1];
1925                                }
1926                                fftSlices.complexForward(temp);
1927                                for (int s = 0; s < slices; s++) {
1928                                    int idx2 = 2 * s;
1929                                    int idx4 = s * twoSliceStride + idx3 + idx1;
1930                                    a[idx4] = temp[idx2];
1931                                    a[idx4 + 1] = temp[idx2 + 1];
1932                                }
1933                            }
1934                        }
1935                    }
1936                });
1937            }
1938            ConcurrencyUtils.waitForCompletion(futures);
1939            p = slices / nthreads;
1940            for (int l = 0; l < nthreads; l++) {
1941                final int firstSlice = l * p;
1942                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1943                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1944                    @Override
1945                                        public void run() {
1946
1947                        for (int s = firstSlice; s < lastSlice; s++) {
1948                            int idx2 = (slices - s) % slices;
1949                            int idx5 = idx2 * twoSliceStride;
1950                            int idx6 = s * twoSliceStride;
1951                            for (int r = 1; r < n2d2; r++) {
1952                                int idx4 = rows - r;
1953                                int idx7 = idx4 * twoRowStride;
1954                                int idx8 = r * twoRowStride;
1955                                int idx9 = idx5 + idx7;
1956                                for (int c = 0; c < columns; c++) {
1957                                    int idx1 = 2 * c;
1958                                    int idx3 = twon3 - idx1;
1959                                    int idx10 = idx6 + idx8 + idx1;
1960                                    a[idx9 + idx3 % twon3] = a[idx10];
1961                                    a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
1962                                }
1963                            }
1964                        }
1965                    }
1966                });
1967            }
1968            ConcurrencyUtils.waitForCompletion(futures);
1969        } else {
1970
1971            for (int s = slices - 1; s >= 0; s--) {
1972                int idx1 = s * sliceStride;
1973                int idx2 = s * twoSliceStride;
1974                for (int r = rows - 1; r >= 0; r--) {
1975                    System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
1976                    fftColumns.realForwardFull(temp);
1977                    System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
1978                }
1979            }
1980
1981            temp = new float[2 * rows];
1982
1983            for (int s = 0; s < slices; s++) {
1984                int idx1 = s * twoSliceStride;
1985                for (int c = 0; c < columns; c++) {
1986                    int idx2 = 2 * c;
1987                    for (int r = 0; r < rows; r++) {
1988                        int idx4 = 2 * r;
1989                        int idx3 = idx1 + r * twoRowStride + idx2;
1990                        temp[idx4] = a[idx3];
1991                        temp[idx4 + 1] = a[idx3 + 1];
1992                    }
1993                    fftRows.complexForward(temp);
1994                    for (int r = 0; r < rows; r++) {
1995                        int idx4 = 2 * r;
1996                        int idx3 = idx1 + r * twoRowStride + idx2;
1997                        a[idx3] = temp[idx4];
1998                        a[idx3 + 1] = temp[idx4 + 1];
1999                    }
2000                }
2001            }
2002
2003            temp = new float[2 * slices];
2004
2005            for (int r = 0; r < ldimn2; r++) {
2006                int idx3 = r * twoRowStride;
2007                for (int c = 0; c < columns; c++) {
2008                    int idx1 = 2 * c;
2009                    for (int s = 0; s < slices; s++) {
2010                        int idx2 = 2 * s;
2011                        int idx4 = s * twoSliceStride + idx3 + idx1;
2012                        temp[idx2] = a[idx4];
2013                        temp[idx2 + 1] = a[idx4 + 1];
2014                    }
2015                    fftSlices.complexForward(temp);
2016                    for (int s = 0; s < slices; s++) {
2017                        int idx2 = 2 * s;
2018                        int idx4 = s * twoSliceStride + idx3 + idx1;
2019                        a[idx4] = temp[idx2];
2020                        a[idx4 + 1] = temp[idx2 + 1];
2021                    }
2022                }
2023            }
2024
2025            for (int s = 0; s < slices; s++) {
2026                int idx2 = (slices - s) % slices;
2027                int idx5 = idx2 * twoSliceStride;
2028                int idx6 = s * twoSliceStride;
2029                for (int r = 1; r < n2d2; r++) {
2030                    int idx4 = rows - r;
2031                    int idx7 = idx4 * twoRowStride;
2032                    int idx8 = r * twoRowStride;
2033                    int idx9 = idx5 + idx7;
2034                    for (int c = 0; c < columns; c++) {
2035                        int idx1 = 2 * c;
2036                        int idx3 = twon3 - idx1;
2037                        int idx10 = idx6 + idx8 + idx1;
2038                        a[idx9 + idx3 % twon3] = a[idx10];
2039                        a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
2040                    }
2041                }
2042            }
2043
2044        }
2045    }
2046
2047    private void mixedRadixRealInverseFull(final float[] a, final boolean scale) {
2048        final int twon3 = 2 * columns;
2049        float[] temp = new float[twon3];
2050        int ldimn2 = rows / 2 + 1;
2051        final int n2d2;
2052        if (rows % 2 == 0) {
2053            n2d2 = rows / 2;
2054        } else {
2055            n2d2 = (rows + 1) / 2;
2056        }
2057
2058        final int twoSliceStride = 2 * sliceStride;
2059        final int twoRowStride = 2 * rowStride;
2060        int n1d2 = slices / 2;
2061
2062        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2063        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
2064            Future<?>[] futures = new Future[nthreads];
2065            int p = n1d2 / nthreads;
2066            for (int l = 0; l < nthreads; l++) {
2067                final int firstSlice = slices - 1 - l * p;
2068                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice - p;
2069                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2070                    @Override
2071                                        public void run() {
2072                        float[] temp = new float[twon3];
2073                        for (int s = firstSlice; s >= lastSlice; s--) {
2074                            int idx1 = s * sliceStride;
2075                            int idx2 = s * twoSliceStride;
2076                            for (int r = rows - 1; r >= 0; r--) {
2077                                System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
2078                                fftColumns.realInverseFull(temp, scale);
2079                                System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
2080                            }
2081                        }
2082                    }
2083                });
2084            }
2085            ConcurrencyUtils.waitForCompletion(futures);
2086
2087            final float[][][] temp2 = new float[n1d2 + 1][rows][twon3];
2088
2089            for (int l = 0; l < nthreads; l++) {
2090                final int firstSlice = l * p;
2091                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
2092                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2093                    @Override
2094                                        public void run() {
2095                        for (int s = firstSlice; s < lastSlice; s++) {
2096                            int idx1 = s * sliceStride;
2097                            for (int r = 0; r < rows; r++) {
2098                                System.arraycopy(a, idx1 + r * rowStride, temp2[s][r], 0, columns);
2099                                fftColumns.realInverseFull(temp2[s][r], scale);
2100                            }
2101                        }
2102                    }
2103                });
2104            }
2105            ConcurrencyUtils.waitForCompletion(futures);
2106
2107            for (int l = 0; l < nthreads; l++) {
2108                final int firstSlice = l * p;
2109                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
2110                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2111                    @Override
2112                                        public void run() {
2113                        for (int s = firstSlice; s < lastSlice; s++) {
2114                            int idx1 = s * twoSliceStride;
2115                            for (int r = 0; r < rows; r++) {
2116                                System.arraycopy(temp2[s][r], 0, a, idx1 + r * twoRowStride, twon3);
2117                            }
2118                        }
2119                    }
2120                });
2121            }
2122            ConcurrencyUtils.waitForCompletion(futures);
2123
2124            p = slices / nthreads;
2125            for (int l = 0; l < nthreads; l++) {
2126                final int firstSlice = l * p;
2127                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
2128                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2129                    @Override
2130                                        public void run() {
2131                        float[] temp = new float[2 * rows];
2132
2133                        for (int s = firstSlice; s < lastSlice; s++) {
2134                            int idx1 = s * twoSliceStride;
2135                            for (int c = 0; c < columns; c++) {
2136                                int idx2 = 2 * c;
2137                                for (int r = 0; r < rows; r++) {
2138                                    int idx3 = idx1 + r * twoRowStride + idx2;
2139                                    int idx4 = 2 * r;
2140                                    temp[idx4] = a[idx3];
2141                                    temp[idx4 + 1] = a[idx3 + 1];
2142                                }
2143                                fftRows.complexInverse(temp, scale);
2144                                for (int r = 0; r < rows; r++) {
2145                                    int idx3 = idx1 + r * twoRowStride + idx2;
2146                                    int idx4 = 2 * r;
2147                                    a[idx3] = temp[idx4];
2148                                    a[idx3 + 1] = temp[idx4 + 1];
2149                                }
2150                            }
2151                        }
2152                    }
2153                });
2154            }
2155            ConcurrencyUtils.waitForCompletion(futures);
2156
2157            p = ldimn2 / nthreads;
2158            for (int l = 0; l < nthreads; l++) {
2159                final int firstRow = l * p;
2160                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
2161                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2162                    @Override
2163                                        public void run() {
2164                        float[] temp = new float[2 * slices];
2165
2166                        for (int r = firstRow; r < lastRow; r++) {
2167                            int idx3 = r * twoRowStride;
2168                            for (int c = 0; c < columns; c++) {
2169                                int idx1 = 2 * c;
2170                                for (int s = 0; s < slices; s++) {
2171                                    int idx2 = 2 * s;
2172                                    int idx4 = s * twoSliceStride + idx3 + idx1;
2173                                    temp[idx2] = a[idx4];
2174                                    temp[idx2 + 1] = a[idx4 + 1];
2175                                }
2176                                fftSlices.complexInverse(temp, scale);
2177                                for (int s = 0; s < slices; s++) {
2178                                    int idx2 = 2 * s;
2179                                    int idx4 = s * twoSliceStride + idx3 + idx1;
2180                                    a[idx4] = temp[idx2];
2181                                    a[idx4 + 1] = temp[idx2 + 1];
2182                                }
2183                            }
2184                        }
2185                    }
2186                });
2187            }
2188            ConcurrencyUtils.waitForCompletion(futures);
2189
2190            p = slices / nthreads;
2191            for (int l = 0; l < nthreads; l++) {
2192                final int firstSlice = l * p;
2193                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
2194                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2195                    @Override
2196                                        public void run() {
2197
2198                        for (int s = firstSlice; s < lastSlice; s++) {
2199                            int idx2 = (slices - s) % slices;
2200                            int idx5 = idx2 * twoSliceStride;
2201                            int idx6 = s * twoSliceStride;
2202                            for (int r = 1; r < n2d2; r++) {
2203                                int idx4 = rows - r;
2204                                int idx7 = idx4 * twoRowStride;
2205                                int idx8 = r * twoRowStride;
2206                                int idx9 = idx5 + idx7;
2207                                for (int c = 0; c < columns; c++) {
2208                                    int idx1 = 2 * c;
2209                                    int idx3 = twon3 - idx1;
2210                                    int idx10 = idx6 + idx8 + idx1;
2211                                    a[idx9 + idx3 % twon3] = a[idx10];
2212                                    a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
2213                                }
2214                            }
2215                        }
2216                    }
2217                });
2218            }
2219            ConcurrencyUtils.waitForCompletion(futures);
2220        } else {
2221
2222            for (int s = slices - 1; s >= 0; s--) {
2223                int idx1 = s * sliceStride;
2224                int idx2 = s * twoSliceStride;
2225                for (int r = rows - 1; r >= 0; r--) {
2226                    System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
2227                    fftColumns.realInverseFull(temp, scale);
2228                    System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
2229                }
2230            }
2231
2232            temp = new float[2 * rows];
2233
2234            for (int s = 0; s < slices; s++) {
2235                int idx1 = s * twoSliceStride;
2236                for (int c = 0; c < columns; c++) {
2237                    int idx2 = 2 * c;
2238                    for (int r = 0; r < rows; r++) {
2239                        int idx4 = 2 * r;
2240                        int idx3 = idx1 + r * twoRowStride + idx2;
2241                        temp[idx4] = a[idx3];
2242                        temp[idx4 + 1] = a[idx3 + 1];
2243                    }
2244                    fftRows.complexInverse(temp, scale);
2245                    for (int r = 0; r < rows; r++) {
2246                        int idx4 = 2 * r;
2247                        int idx3 = idx1 + r * twoRowStride + idx2;
2248                        a[idx3] = temp[idx4];
2249                        a[idx3 + 1] = temp[idx4 + 1];
2250                    }
2251                }
2252            }
2253
2254            temp = new float[2 * slices];
2255
2256            for (int r = 0; r < ldimn2; r++) {
2257                int idx3 = r * twoRowStride;
2258                for (int c = 0; c < columns; c++) {
2259                    int idx1 = 2 * c;
2260                    for (int s = 0; s < slices; s++) {
2261                        int idx2 = 2 * s;
2262                        int idx4 = s * twoSliceStride + idx3 + idx1;
2263                        temp[idx2] = a[idx4];
2264                        temp[idx2 + 1] = a[idx4 + 1];
2265                    }
2266                    fftSlices.complexInverse(temp, scale);
2267                    for (int s = 0; s < slices; s++) {
2268                        int idx2 = 2 * s;
2269                        int idx4 = s * twoSliceStride + idx3 + idx1;
2270                        a[idx4] = temp[idx2];
2271                        a[idx4 + 1] = temp[idx2 + 1];
2272                    }
2273                }
2274            }
2275
2276            for (int s = 0; s < slices; s++) {
2277                int idx2 = (slices - s) % slices;
2278                int idx5 = idx2 * twoSliceStride;
2279                int idx6 = s * twoSliceStride;
2280                for (int r = 1; r < n2d2; r++) {
2281                    int idx4 = rows - r;
2282                    int idx7 = idx4 * twoRowStride;
2283                    int idx8 = r * twoRowStride;
2284                    int idx9 = idx5 + idx7;
2285                    for (int c = 0; c < columns; c++) {
2286                        int idx1 = 2 * c;
2287                        int idx3 = twon3 - idx1;
2288                        int idx10 = idx6 + idx8 + idx1;
2289                        a[idx9 + idx3 % twon3] = a[idx10];
2290                        a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
2291                    }
2292                }
2293            }
2294
2295        }
2296    }
2297
2298    private void xdft3da_sub1(int icr, int isgn, float[] a, boolean scale) {
2299        int idx0, idx1, idx2, idx3, idx4, idx5;
2300
2301        if (isgn == -1) {
2302            for (int s = 0; s < slices; s++) {
2303                idx0 = s * sliceStride;
2304                if (icr == 0) {
2305                    for (int r = 0; r < rows; r++) {
2306                        fftColumns.complexForward(a, idx0 + r * rowStride);
2307                    }
2308                } else {
2309                    for (int r = 0; r < rows; r++) {
2310                        fftColumns.realForward(a, idx0 + r * rowStride);
2311                    }
2312                }
2313                if (columns > 4) {
2314                    for (int c = 0; c < columns; c += 8) {
2315                        for (int r = 0; r < rows; r++) {
2316                            idx1 = idx0 + r * rowStride + c;
2317                            idx2 = 2 * r;
2318                            idx3 = 2 * rows + 2 * r;
2319                            idx4 = idx3 + 2 * rows;
2320                            idx5 = idx4 + 2 * rows;
2321                            t[idx2] = a[idx1];
2322                            t[idx2 + 1] = a[idx1 + 1];
2323                            t[idx3] = a[idx1 + 2];
2324                            t[idx3 + 1] = a[idx1 + 3];
2325                            t[idx4] = a[idx1 + 4];
2326                            t[idx4 + 1] = a[idx1 + 5];
2327                            t[idx5] = a[idx1 + 6];
2328                            t[idx5 + 1] = a[idx1 + 7];
2329                        }
2330                        fftRows.complexForward(t, 0);
2331                        fftRows.complexForward(t, 2 * rows);
2332                        fftRows.complexForward(t, 4 * rows);
2333                        fftRows.complexForward(t, 6 * rows);
2334                        for (int r = 0; r < rows; r++) {
2335                            idx1 = idx0 + r * rowStride + c;
2336                            idx2 = 2 * r;
2337                            idx3 = 2 * rows + 2 * r;
2338                            idx4 = idx3 + 2 * rows;
2339                            idx5 = idx4 + 2 * rows;
2340                            a[idx1] = t[idx2];
2341                            a[idx1 + 1] = t[idx2 + 1];
2342                            a[idx1 + 2] = t[idx3];
2343                            a[idx1 + 3] = t[idx3 + 1];
2344                            a[idx1 + 4] = t[idx4];
2345                            a[idx1 + 5] = t[idx4 + 1];
2346                            a[idx1 + 6] = t[idx5];
2347                            a[idx1 + 7] = t[idx5 + 1];
2348                        }
2349                    }
2350                } else if (columns == 4) {
2351                    for (int r = 0; r < rows; r++) {
2352                        idx1 = idx0 + r * rowStride;
2353                        idx2 = 2 * r;
2354                        idx3 = 2 * rows + 2 * r;
2355                        t[idx2] = a[idx1];
2356                        t[idx2 + 1] = a[idx1 + 1];
2357                        t[idx3] = a[idx1 + 2];
2358                        t[idx3 + 1] = a[idx1 + 3];
2359                    }
2360                    fftRows.complexForward(t, 0);
2361                    fftRows.complexForward(t, 2 * rows);
2362                    for (int r = 0; r < rows; r++) {
2363                        idx1 = idx0 + r * rowStride;
2364                        idx2 = 2 * r;
2365                        idx3 = 2 * rows + 2 * r;
2366                        a[idx1] = t[idx2];
2367                        a[idx1 + 1] = t[idx2 + 1];
2368                        a[idx1 + 2] = t[idx3];
2369                        a[idx1 + 3] = t[idx3 + 1];
2370                    }
2371                } else if (columns == 2) {
2372                    for (int r = 0; r < rows; r++) {
2373                        idx1 = idx0 + r * rowStride;
2374                        idx2 = 2 * r;
2375                        t[idx2] = a[idx1];
2376                        t[idx2 + 1] = a[idx1 + 1];
2377                    }
2378                    fftRows.complexForward(t, 0);
2379                    for (int r = 0; r < rows; r++) {
2380                        idx1 = idx0 + r * rowStride;
2381                        idx2 = 2 * r;
2382                        a[idx1] = t[idx2];
2383                        a[idx1 + 1] = t[idx2 + 1];
2384                    }
2385                }
2386            }
2387        } else {
2388            for (int s = 0; s < slices; s++) {
2389                idx0 = s * sliceStride;
2390                if (icr == 0) {
2391                    for (int r = 0; r < rows; r++) {
2392                        fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
2393                    }
2394                }
2395                if (columns > 4) {
2396                    for (int c = 0; c < columns; c += 8) {
2397                        for (int r = 0; r < rows; r++) {
2398                            idx1 = idx0 + r * rowStride + c;
2399                            idx2 = 2 * r;
2400                            idx3 = 2 * rows + 2 * r;
2401                            idx4 = idx3 + 2 * rows;
2402                            idx5 = idx4 + 2 * rows;
2403                            t[idx2] = a[idx1];
2404                            t[idx2 + 1] = a[idx1 + 1];
2405                            t[idx3] = a[idx1 + 2];
2406                            t[idx3 + 1] = a[idx1 + 3];
2407                            t[idx4] = a[idx1 + 4];
2408                            t[idx4 + 1] = a[idx1 + 5];
2409                            t[idx5] = a[idx1 + 6];
2410                            t[idx5 + 1] = a[idx1 + 7];
2411                        }
2412                        fftRows.complexInverse(t, 0, scale);
2413                        fftRows.complexInverse(t, 2 * rows, scale);
2414                        fftRows.complexInverse(t, 4 * rows, scale);
2415                        fftRows.complexInverse(t, 6 * rows, scale);
2416                        for (int r = 0; r < rows; r++) {
2417                            idx1 = idx0 + r * rowStride + c;
2418                            idx2 = 2 * r;
2419                            idx3 = 2 * rows + 2 * r;
2420                            idx4 = idx3 + 2 * rows;
2421                            idx5 = idx4 + 2 * rows;
2422                            a[idx1] = t[idx2];
2423                            a[idx1 + 1] = t[idx2 + 1];
2424                            a[idx1 + 2] = t[idx3];
2425                            a[idx1 + 3] = t[idx3 + 1];
2426                            a[idx1 + 4] = t[idx4];
2427                            a[idx1 + 5] = t[idx4 + 1];
2428                            a[idx1 + 6] = t[idx5];
2429                            a[idx1 + 7] = t[idx5 + 1];
2430                        }
2431                    }
2432                } else if (columns == 4) {
2433                    for (int r = 0; r < rows; r++) {
2434                        idx1 = idx0 + r * rowStride;
2435                        idx2 = 2 * r;
2436                        idx3 = 2 * rows + 2 * r;
2437                        t[idx2] = a[idx1];
2438                        t[idx2 + 1] = a[idx1 + 1];
2439                        t[idx3] = a[idx1 + 2];
2440                        t[idx3 + 1] = a[idx1 + 3];
2441                    }
2442                    fftRows.complexInverse(t, 0, scale);
2443                    fftRows.complexInverse(t, 2 * rows, scale);
2444                    for (int r = 0; r < rows; r++) {
2445                        idx1 = idx0 + r * rowStride;
2446                        idx2 = 2 * r;
2447                        idx3 = 2 * rows + 2 * r;
2448                        a[idx1] = t[idx2];
2449                        a[idx1 + 1] = t[idx2 + 1];
2450                        a[idx1 + 2] = t[idx3];
2451                        a[idx1 + 3] = t[idx3 + 1];
2452                    }
2453                } else if (columns == 2) {
2454                    for (int r = 0; r < rows; r++) {
2455                        idx1 = idx0 + r * rowStride;
2456                        idx2 = 2 * r;
2457                        t[idx2] = a[idx1];
2458                        t[idx2 + 1] = a[idx1 + 1];
2459                    }
2460                    fftRows.complexInverse(t, 0, scale);
2461                    for (int r = 0; r < rows; r++) {
2462                        idx1 = idx0 + r * rowStride;
2463                        idx2 = 2 * r;
2464                        a[idx1] = t[idx2];
2465                        a[idx1 + 1] = t[idx2 + 1];
2466                    }
2467                }
2468                if (icr != 0) {
2469                    for (int r = 0; r < rows; r++) {
2470                        fftColumns.realInverse(a, idx0 + r * rowStride, scale);
2471                    }
2472                }
2473            }
2474        }
2475    }
2476
2477    private void xdft3da_sub2(int icr, int isgn, float[] a, boolean scale) {
2478        int idx0, idx1, idx2, idx3, idx4, idx5;
2479
2480        if (isgn == -1) {
2481            for (int s = 0; s < slices; s++) {
2482                idx0 = s * sliceStride;
2483                if (icr == 0) {
2484                    for (int r = 0; r < rows; r++) {
2485                        fftColumns.complexForward(a, idx0 + r * rowStride);
2486                    }
2487                } else {
2488                    for (int r = 0; r < rows; r++) {
2489                        fftColumns.realForward(a, idx0 + r * rowStride);
2490                    }
2491                }
2492                if (columns > 4) {
2493                    for (int c = 0; c < columns; c += 8) {
2494                        for (int r = 0; r < rows; r++) {
2495                            idx1 = idx0 + r * rowStride + c;
2496                            idx2 = 2 * r;
2497                            idx3 = 2 * rows + 2 * r;
2498                            idx4 = idx3 + 2 * rows;
2499                            idx5 = idx4 + 2 * rows;
2500                            t[idx2] = a[idx1];
2501                            t[idx2 + 1] = a[idx1 + 1];
2502                            t[idx3] = a[idx1 + 2];
2503                            t[idx3 + 1] = a[idx1 + 3];
2504                            t[idx4] = a[idx1 + 4];
2505                            t[idx4 + 1] = a[idx1 + 5];
2506                            t[idx5] = a[idx1 + 6];
2507                            t[idx5 + 1] = a[idx1 + 7];
2508                        }
2509                        fftRows.complexForward(t, 0);
2510                        fftRows.complexForward(t, 2 * rows);
2511                        fftRows.complexForward(t, 4 * rows);
2512                        fftRows.complexForward(t, 6 * rows);
2513                        for (int r = 0; r < rows; r++) {
2514                            idx1 = idx0 + r * rowStride + c;
2515                            idx2 = 2 * r;
2516                            idx3 = 2 * rows + 2 * r;
2517                            idx4 = idx3 + 2 * rows;
2518                            idx5 = idx4 + 2 * rows;
2519                            a[idx1] = t[idx2];
2520                            a[idx1 + 1] = t[idx2 + 1];
2521                            a[idx1 + 2] = t[idx3];
2522                            a[idx1 + 3] = t[idx3 + 1];
2523                            a[idx1 + 4] = t[idx4];
2524                            a[idx1 + 5] = t[idx4 + 1];
2525                            a[idx1 + 6] = t[idx5];
2526                            a[idx1 + 7] = t[idx5 + 1];
2527                        }
2528                    }
2529                } else if (columns == 4) {
2530                    for (int r = 0; r < rows; r++) {
2531                        idx1 = idx0 + r * rowStride;
2532                        idx2 = 2 * r;
2533                        idx3 = 2 * rows + 2 * r;
2534                        t[idx2] = a[idx1];
2535                        t[idx2 + 1] = a[idx1 + 1];
2536                        t[idx3] = a[idx1 + 2];
2537                        t[idx3 + 1] = a[idx1 + 3];
2538                    }
2539                    fftRows.complexForward(t, 0);
2540                    fftRows.complexForward(t, 2 * rows);
2541                    for (int r = 0; r < rows; r++) {
2542                        idx1 = idx0 + r * rowStride;
2543                        idx2 = 2 * r;
2544                        idx3 = 2 * rows + 2 * r;
2545                        a[idx1] = t[idx2];
2546                        a[idx1 + 1] = t[idx2 + 1];
2547                        a[idx1 + 2] = t[idx3];
2548                        a[idx1 + 3] = t[idx3 + 1];
2549                    }
2550                } else if (columns == 2) {
2551                    for (int r = 0; r < rows; r++) {
2552                        idx1 = idx0 + r * rowStride;
2553                        idx2 = 2 * r;
2554                        t[idx2] = a[idx1];
2555                        t[idx2 + 1] = a[idx1 + 1];
2556                    }
2557                    fftRows.complexForward(t, 0);
2558                    for (int r = 0; r < rows; r++) {
2559                        idx1 = idx0 + r * rowStride;
2560                        idx2 = 2 * r;
2561                        a[idx1] = t[idx2];
2562                        a[idx1 + 1] = t[idx2 + 1];
2563                    }
2564                }
2565            }
2566        } else {
2567            for (int s = 0; s < slices; s++) {
2568                idx0 = s * sliceStride;
2569                if (icr == 0) {
2570                    for (int r = 0; r < rows; r++) {
2571                        fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
2572                    }
2573                } else {
2574                    for (int r = 0; r < rows; r++) {
2575                        fftColumns.realInverse2(a, idx0 + r * rowStride, scale);
2576                    }
2577                }
2578                if (columns > 4) {
2579                    for (int c = 0; c < columns; c += 8) {
2580                        for (int r = 0; r < rows; r++) {
2581                            idx1 = idx0 + r * rowStride + c;
2582                            idx2 = 2 * r;
2583                            idx3 = 2 * rows + 2 * r;
2584                            idx4 = idx3 + 2 * rows;
2585                            idx5 = idx4 + 2 * rows;
2586                            t[idx2] = a[idx1];
2587                            t[idx2 + 1] = a[idx1 + 1];
2588                            t[idx3] = a[idx1 + 2];
2589                            t[idx3 + 1] = a[idx1 + 3];
2590                            t[idx4] = a[idx1 + 4];
2591                            t[idx4 + 1] = a[idx1 + 5];
2592                            t[idx5] = a[idx1 + 6];
2593                            t[idx5 + 1] = a[idx1 + 7];
2594                        }
2595                        fftRows.complexInverse(t, 0, scale);
2596                        fftRows.complexInverse(t, 2 * rows, scale);
2597                        fftRows.complexInverse(t, 4 * rows, scale);
2598                        fftRows.complexInverse(t, 6 * rows, scale);
2599                        for (int r = 0; r < rows; r++) {
2600                            idx1 = idx0 + r * rowStride + c;
2601                            idx2 = 2 * r;
2602                            idx3 = 2 * rows + 2 * r;
2603                            idx4 = idx3 + 2 * rows;
2604                            idx5 = idx4 + 2 * rows;
2605                            a[idx1] = t[idx2];
2606                            a[idx1 + 1] = t[idx2 + 1];
2607                            a[idx1 + 2] = t[idx3];
2608                            a[idx1 + 3] = t[idx3 + 1];
2609                            a[idx1 + 4] = t[idx4];
2610                            a[idx1 + 5] = t[idx4 + 1];
2611                            a[idx1 + 6] = t[idx5];
2612                            a[idx1 + 7] = t[idx5 + 1];
2613                        }
2614                    }
2615                } else if (columns == 4) {
2616                    for (int r = 0; r < rows; r++) {
2617                        idx1 = idx0 + r * rowStride;
2618                        idx2 = 2 * r;
2619                        idx3 = 2 * rows + 2 * r;
2620                        t[idx2] = a[idx1];
2621                        t[idx2 + 1] = a[idx1 + 1];
2622                        t[idx3] = a[idx1 + 2];
2623                        t[idx3 + 1] = a[idx1 + 3];
2624                    }
2625                    fftRows.complexInverse(t, 0, scale);
2626                    fftRows.complexInverse(t, 2 * rows, scale);
2627                    for (int r = 0; r < rows; r++) {
2628                        idx1 = idx0 + r * rowStride;
2629                        idx2 = 2 * r;
2630                        idx3 = 2 * rows + 2 * r;
2631                        a[idx1] = t[idx2];
2632                        a[idx1 + 1] = t[idx2 + 1];
2633                        a[idx1 + 2] = t[idx3];
2634                        a[idx1 + 3] = t[idx3 + 1];
2635                    }
2636                } else if (columns == 2) {
2637                    for (int r = 0; r < rows; r++) {
2638                        idx1 = idx0 + r * rowStride;
2639                        idx2 = 2 * r;
2640                        t[idx2] = a[idx1];
2641                        t[idx2 + 1] = a[idx1 + 1];
2642                    }
2643                    fftRows.complexInverse(t, 0, scale);
2644                    for (int r = 0; r < rows; r++) {
2645                        idx1 = idx0 + r * rowStride;
2646                        idx2 = 2 * r;
2647                        a[idx1] = t[idx2];
2648                        a[idx1 + 1] = t[idx2 + 1];
2649                    }
2650                }
2651            }
2652        }
2653    }
2654
2655    private void xdft3da_sub1(int icr, int isgn, float[][][] a, boolean scale) {
2656        int idx2, idx3, idx4, idx5;
2657
2658        if (isgn == -1) {
2659            for (int s = 0; s < slices; s++) {
2660                if (icr == 0) {
2661                    for (int r = 0; r < rows; r++) {
2662                        fftColumns.complexForward(a[s][r]);
2663                    }
2664                } else {
2665                    for (int r = 0; r < rows; r++) {
2666                        fftColumns.realForward(a[s][r], 0);
2667                    }
2668                }
2669                if (columns > 4) {
2670                    for (int c = 0; c < columns; c += 8) {
2671                        for (int r = 0; r < rows; r++) {
2672                            idx2 = 2 * r;
2673                            idx3 = 2 * rows + 2 * r;
2674                            idx4 = idx3 + 2 * rows;
2675                            idx5 = idx4 + 2 * rows;
2676                            t[idx2] = a[s][r][c];
2677                            t[idx2 + 1] = a[s][r][c + 1];
2678                            t[idx3] = a[s][r][c + 2];
2679                            t[idx3 + 1] = a[s][r][c + 3];
2680                            t[idx4] = a[s][r][c + 4];
2681                            t[idx4 + 1] = a[s][r][c + 5];
2682                            t[idx5] = a[s][r][c + 6];
2683                            t[idx5 + 1] = a[s][r][c + 7];
2684                        }
2685                        fftRows.complexForward(t, 0);
2686                        fftRows.complexForward(t, 2 * rows);
2687                        fftRows.complexForward(t, 4 * rows);
2688                        fftRows.complexForward(t, 6 * rows);
2689                        for (int r = 0; r < rows; r++) {
2690                            idx2 = 2 * r;
2691                            idx3 = 2 * rows + 2 * r;
2692                            idx4 = idx3 + 2 * rows;
2693                            idx5 = idx4 + 2 * rows;
2694                            a[s][r][c] = t[idx2];
2695                            a[s][r][c + 1] = t[idx2 + 1];
2696                            a[s][r][c + 2] = t[idx3];
2697                            a[s][r][c + 3] = t[idx3 + 1];
2698                            a[s][r][c + 4] = t[idx4];
2699                            a[s][r][c + 5] = t[idx4 + 1];
2700                            a[s][r][c + 6] = t[idx5];
2701                            a[s][r][c + 7] = t[idx5 + 1];
2702                        }
2703                    }
2704                } else if (columns == 4) {
2705                    for (int r = 0; r < rows; r++) {
2706                        idx2 = 2 * r;
2707                        idx3 = 2 * rows + 2 * r;
2708                        t[idx2] = a[s][r][0];
2709                        t[idx2 + 1] = a[s][r][1];
2710                        t[idx3] = a[s][r][2];
2711                        t[idx3 + 1] = a[s][r][3];
2712                    }
2713                    fftRows.complexForward(t, 0);
2714                    fftRows.complexForward(t, 2 * rows);
2715                    for (int r = 0; r < rows; r++) {
2716                        idx2 = 2 * r;
2717                        idx3 = 2 * rows + 2 * r;
2718                        a[s][r][0] = t[idx2];
2719                        a[s][r][1] = t[idx2 + 1];
2720                        a[s][r][2] = t[idx3];
2721                        a[s][r][3] = t[idx3 + 1];
2722                    }
2723                } else if (columns == 2) {
2724                    for (int r = 0; r < rows; r++) {
2725                        idx2 = 2 * r;
2726                        t[idx2] = a[s][r][0];
2727                        t[idx2 + 1] = a[s][r][1];
2728                    }
2729                    fftRows.complexForward(t, 0);
2730                    for (int r = 0; r < rows; r++) {
2731                        idx2 = 2 * r;
2732                        a[s][r][0] = t[idx2];
2733                        a[s][r][1] = t[idx2 + 1];
2734                    }
2735                }
2736            }
2737        } else {
2738            for (int s = 0; s < slices; s++) {
2739                if (icr == 0) {
2740                    for (int r = 0; r < rows; r++) {
2741                        fftColumns.complexInverse(a[s][r], scale);
2742                    }
2743                }
2744                if (columns > 4) {
2745                    for (int c = 0; c < columns; c += 8) {
2746                        for (int r = 0; r < rows; r++) {
2747                            idx2 = 2 * r;
2748                            idx3 = 2 * rows + 2 * r;
2749                            idx4 = idx3 + 2 * rows;
2750                            idx5 = idx4 + 2 * rows;
2751                            t[idx2] = a[s][r][c];
2752                            t[idx2 + 1] = a[s][r][c + 1];
2753                            t[idx3] = a[s][r][c + 2];
2754                            t[idx3 + 1] = a[s][r][c + 3];
2755                            t[idx4] = a[s][r][c + 4];
2756                            t[idx4 + 1] = a[s][r][c + 5];
2757                            t[idx5] = a[s][r][c + 6];
2758                            t[idx5 + 1] = a[s][r][c + 7];
2759                        }
2760                        fftRows.complexInverse(t, 0, scale);
2761                        fftRows.complexInverse(t, 2 * rows, scale);
2762                        fftRows.complexInverse(t, 4 * rows, scale);
2763                        fftRows.complexInverse(t, 6 * rows, scale);
2764                        for (int r = 0; r < rows; r++) {
2765                            idx2 = 2 * r;
2766                            idx3 = 2 * rows + 2 * r;
2767                            idx4 = idx3 + 2 * rows;
2768                            idx5 = idx4 + 2 * rows;
2769                            a[s][r][c] = t[idx2];
2770                            a[s][r][c + 1] = t[idx2 + 1];
2771                            a[s][r][c + 2] = t[idx3];
2772                            a[s][r][c + 3] = t[idx3 + 1];
2773                            a[s][r][c + 4] = t[idx4];
2774                            a[s][r][c + 5] = t[idx4 + 1];
2775                            a[s][r][c + 6] = t[idx5];
2776                            a[s][r][c + 7] = t[idx5 + 1];
2777                        }
2778                    }
2779                } else if (columns == 4) {
2780                    for (int r = 0; r < rows; r++) {
2781                        idx2 = 2 * r;
2782                        idx3 = 2 * rows + 2 * r;
2783                        t[idx2] = a[s][r][0];
2784                        t[idx2 + 1] = a[s][r][1];
2785                        t[idx3] = a[s][r][2];
2786                        t[idx3 + 1] = a[s][r][3];
2787                    }
2788                    fftRows.complexInverse(t, 0, scale);
2789                    fftRows.complexInverse(t, 2 * rows, scale);
2790                    for (int r = 0; r < rows; r++) {
2791                        idx2 = 2 * r;
2792                        idx3 = 2 * rows + 2 * r;
2793                        a[s][r][0] = t[idx2];
2794                        a[s][r][1] = t[idx2 + 1];
2795                        a[s][r][2] = t[idx3];
2796                        a[s][r][3] = t[idx3 + 1];
2797                    }
2798                } else if (columns == 2) {
2799                    for (int r = 0; r < rows; r++) {
2800                        idx2 = 2 * r;
2801                        t[idx2] = a[s][r][0];
2802                        t[idx2 + 1] = a[s][r][1];
2803                    }
2804                    fftRows.complexInverse(t, 0, scale);
2805                    for (int r = 0; r < rows; r++) {
2806                        idx2 = 2 * r;
2807                        a[s][r][0] = t[idx2];
2808                        a[s][r][1] = t[idx2 + 1];
2809                    }
2810                }
2811                if (icr != 0) {
2812                    for (int r = 0; r < rows; r++) {
2813                        fftColumns.realInverse(a[s][r], 0, scale);
2814                    }
2815                }
2816            }
2817        }
2818    }
2819
2820    private void xdft3da_sub2(int icr, int isgn, float[][][] a, boolean scale) {
2821        int idx2, idx3, idx4, idx5;
2822
2823        if (isgn == -1) {
2824            for (int s = 0; s < slices; s++) {
2825                if (icr == 0) {
2826                    for (int r = 0; r < rows; r++) {
2827                        fftColumns.complexForward(a[s][r]);
2828                    }
2829                } else {
2830                    for (int r = 0; r < rows; r++) {
2831                        fftColumns.realForward(a[s][r]);
2832                    }
2833                }
2834                if (columns > 4) {
2835                    for (int c = 0; c < columns; c += 8) {
2836                        for (int r = 0; r < rows; r++) {
2837                            idx2 = 2 * r;
2838                            idx3 = 2 * rows + 2 * r;
2839                            idx4 = idx3 + 2 * rows;
2840                            idx5 = idx4 + 2 * rows;
2841                            t[idx2] = a[s][r][c];
2842                            t[idx2 + 1] = a[s][r][c + 1];
2843                            t[idx3] = a[s][r][c + 2];
2844                            t[idx3 + 1] = a[s][r][c + 3];
2845                            t[idx4] = a[s][r][c + 4];
2846                            t[idx4 + 1] = a[s][r][c + 5];
2847                            t[idx5] = a[s][r][c + 6];
2848                            t[idx5 + 1] = a[s][r][c + 7];
2849                        }
2850                        fftRows.complexForward(t, 0);
2851                        fftRows.complexForward(t, 2 * rows);
2852                        fftRows.complexForward(t, 4 * rows);
2853                        fftRows.complexForward(t, 6 * rows);
2854                        for (int r = 0; r < rows; r++) {
2855                            idx2 = 2 * r;
2856                            idx3 = 2 * rows + 2 * r;
2857                            idx4 = idx3 + 2 * rows;
2858                            idx5 = idx4 + 2 * rows;
2859                            a[s][r][c] = t[idx2];
2860                            a[s][r][c + 1] = t[idx2 + 1];
2861                            a[s][r][c + 2] = t[idx3];
2862                            a[s][r][c + 3] = t[idx3 + 1];
2863                            a[s][r][c + 4] = t[idx4];
2864                            a[s][r][c + 5] = t[idx4 + 1];
2865                            a[s][r][c + 6] = t[idx5];
2866                            a[s][r][c + 7] = t[idx5 + 1];
2867                        }
2868                    }
2869                } else if (columns == 4) {
2870                    for (int r = 0; r < rows; r++) {
2871                        idx2 = 2 * r;
2872                        idx3 = 2 * rows + 2 * r;
2873                        t[idx2] = a[s][r][0];
2874                        t[idx2 + 1] = a[s][r][1];
2875                        t[idx3] = a[s][r][2];
2876                        t[idx3 + 1] = a[s][r][3];
2877                    }
2878                    fftRows.complexForward(t, 0);
2879                    fftRows.complexForward(t, 2 * rows);
2880                    for (int r = 0; r < rows; r++) {
2881                        idx2 = 2 * r;
2882                        idx3 = 2 * rows + 2 * r;
2883                        a[s][r][0] = t[idx2];
2884                        a[s][r][1] = t[idx2 + 1];
2885                        a[s][r][2] = t[idx3];
2886                        a[s][r][3] = t[idx3 + 1];
2887                    }
2888                } else if (columns == 2) {
2889                    for (int r = 0; r < rows; r++) {
2890                        idx2 = 2 * r;
2891                        t[idx2] = a[s][r][0];
2892                        t[idx2 + 1] = a[s][r][1];
2893                    }
2894                    fftRows.complexForward(t, 0);
2895                    for (int r = 0; r < rows; r++) {
2896                        idx2 = 2 * r;
2897                        a[s][r][0] = t[idx2];
2898                        a[s][r][1] = t[idx2 + 1];
2899                    }
2900                }
2901            }
2902        } else {
2903            for (int s = 0; s < slices; s++) {
2904                if (icr == 0) {
2905                    for (int r = 0; r < rows; r++) {
2906                        fftColumns.complexInverse(a[s][r], scale);
2907                    }
2908                } else {
2909                    for (int r = 0; r < rows; r++) {
2910                        fftColumns.realInverse2(a[s][r], 0, scale);
2911                    }
2912                }
2913                if (columns > 4) {
2914                    for (int c = 0; c < columns; c += 8) {
2915                        for (int r = 0; r < rows; r++) {
2916                            idx2 = 2 * r;
2917                            idx3 = 2 * rows + 2 * r;
2918                            idx4 = idx3 + 2 * rows;
2919                            idx5 = idx4 + 2 * rows;
2920                            t[idx2] = a[s][r][c];
2921                            t[idx2 + 1] = a[s][r][c + 1];
2922                            t[idx3] = a[s][r][c + 2];
2923                            t[idx3 + 1] = a[s][r][c + 3];
2924                            t[idx4] = a[s][r][c + 4];
2925                            t[idx4 + 1] = a[s][r][c + 5];
2926                            t[idx5] = a[s][r][c + 6];
2927                            t[idx5 + 1] = a[s][r][c + 7];
2928                        }
2929                        fftRows.complexInverse(t, 0, scale);
2930                        fftRows.complexInverse(t, 2 * rows, scale);
2931                        fftRows.complexInverse(t, 4 * rows, scale);
2932                        fftRows.complexInverse(t, 6 * rows, scale);
2933                        for (int r = 0; r < rows; r++) {
2934                            idx2 = 2 * r;
2935                            idx3 = 2 * rows + 2 * r;
2936                            idx4 = idx3 + 2 * rows;
2937                            idx5 = idx4 + 2 * rows;
2938                            a[s][r][c] = t[idx2];
2939                            a[s][r][c + 1] = t[idx2 + 1];
2940                            a[s][r][c + 2] = t[idx3];
2941                            a[s][r][c + 3] = t[idx3 + 1];
2942                            a[s][r][c + 4] = t[idx4];
2943                            a[s][r][c + 5] = t[idx4 + 1];
2944                            a[s][r][c + 6] = t[idx5];
2945                            a[s][r][c + 7] = t[idx5 + 1];
2946                        }
2947                    }
2948                } else if (columns == 4) {
2949                    for (int r = 0; r < rows; r++) {
2950                        idx2 = 2 * r;
2951                        idx3 = 2 * rows + 2 * r;
2952                        t[idx2] = a[s][r][0];
2953                        t[idx2 + 1] = a[s][r][1];
2954                        t[idx3] = a[s][r][2];
2955                        t[idx3 + 1] = a[s][r][3];
2956                    }
2957                    fftRows.complexInverse(t, 0, scale);
2958                    fftRows.complexInverse(t, 2 * rows, scale);
2959                    for (int r = 0; r < rows; r++) {
2960                        idx2 = 2 * r;
2961                        idx3 = 2 * rows + 2 * r;
2962                        a[s][r][0] = t[idx2];
2963                        a[s][r][1] = t[idx2 + 1];
2964                        a[s][r][2] = t[idx3];
2965                        a[s][r][3] = t[idx3 + 1];
2966                    }
2967                } else if (columns == 2) {
2968                    for (int r = 0; r < rows; r++) {
2969                        idx2 = 2 * r;
2970                        t[idx2] = a[s][r][0];
2971                        t[idx2 + 1] = a[s][r][1];
2972                    }
2973                    fftRows.complexInverse(t, 0, scale);
2974                    for (int r = 0; r < rows; r++) {
2975                        idx2 = 2 * r;
2976                        a[s][r][0] = t[idx2];
2977                        a[s][r][1] = t[idx2 + 1];
2978                    }
2979                }
2980            }
2981        }
2982    }
2983
2984    private void cdft3db_sub(int isgn, float[] a, boolean scale) {
2985        int idx0, idx1, idx2, idx3, idx4, idx5;
2986
2987        if (isgn == -1) {
2988            if (columns > 4) {
2989                for (int r = 0; r < rows; r++) {
2990                    idx0 = r * rowStride;
2991                    for (int c = 0; c < columns; c += 8) {
2992                        for (int s = 0; s < slices; s++) {
2993                            idx1 = s * sliceStride + idx0 + c;
2994                            idx2 = 2 * s;
2995                            idx3 = 2 * slices + 2 * s;
2996                            idx4 = idx3 + 2 * slices;
2997                            idx5 = idx4 + 2 * slices;
2998                            t[idx2] = a[idx1];
2999                            t[idx2 + 1] = a[idx1 + 1];
3000                            t[idx3] = a[idx1 + 2];
3001                            t[idx3 + 1] = a[idx1 + 3];
3002                            t[idx4] = a[idx1 + 4];
3003                            t[idx4 + 1] = a[idx1 + 5];
3004                            t[idx5] = a[idx1 + 6];
3005                            t[idx5 + 1] = a[idx1 + 7];
3006                        }
3007                        fftSlices.complexForward(t, 0);
3008                        fftSlices.complexForward(t, 2 * slices);
3009                        fftSlices.complexForward(t, 4 * slices);
3010                        fftSlices.complexForward(t, 6 * slices);
3011                        for (int s = 0; s < slices; s++) {
3012                            idx1 = s * sliceStride + idx0 + c;
3013                            idx2 = 2 * s;
3014                            idx3 = 2 * slices + 2 * s;
3015                            idx4 = idx3 + 2 * slices;
3016                            idx5 = idx4 + 2 * slices;
3017                            a[idx1] = t[idx2];
3018                            a[idx1 + 1] = t[idx2 + 1];
3019                            a[idx1 + 2] = t[idx3];
3020                            a[idx1 + 3] = t[idx3 + 1];
3021                            a[idx1 + 4] = t[idx4];
3022                            a[idx1 + 5] = t[idx4 + 1];
3023                            a[idx1 + 6] = t[idx5];
3024                            a[idx1 + 7] = t[idx5 + 1];
3025                        }
3026                    }
3027                }
3028            } else if (columns == 4) {
3029                for (int r = 0; r < rows; r++) {
3030                    idx0 = r * rowStride;
3031                    for (int s = 0; s < slices; s++) {
3032                        idx1 = s * sliceStride + idx0;
3033                        idx2 = 2 * s;
3034                        idx3 = 2 * slices + 2 * s;
3035                        t[idx2] = a[idx1];
3036                        t[idx2 + 1] = a[idx1 + 1];
3037                        t[idx3] = a[idx1 + 2];
3038                        t[idx3 + 1] = a[idx1 + 3];
3039                    }
3040                    fftSlices.complexForward(t, 0);
3041                    fftSlices.complexForward(t, 2 * slices);
3042                    for (int s = 0; s < slices; s++) {
3043                        idx1 = s * sliceStride + idx0;
3044                        idx2 = 2 * s;
3045                        idx3 = 2 * slices + 2 * s;
3046                        a[idx1] = t[idx2];
3047                        a[idx1 + 1] = t[idx2 + 1];
3048                        a[idx1 + 2] = t[idx3];
3049                        a[idx1 + 3] = t[idx3 + 1];
3050                    }
3051                }
3052            } else if (columns == 2) {
3053                for (int r = 0; r < rows; r++) {
3054                    idx0 = r * rowStride;
3055                    for (int s = 0; s < slices; s++) {
3056                        idx1 = s * sliceStride + idx0;
3057                        idx2 = 2 * s;
3058                        t[idx2] = a[idx1];
3059                        t[idx2 + 1] = a[idx1 + 1];
3060                    }
3061                    fftSlices.complexForward(t, 0);
3062                    for (int s = 0; s < slices; s++) {
3063                        idx1 = s * sliceStride + idx0;
3064                        idx2 = 2 * s;
3065                        a[idx1] = t[idx2];
3066                        a[idx1 + 1] = t[idx2 + 1];
3067                    }
3068                }
3069            }
3070        } else {
3071            if (columns > 4) {
3072                for (int r = 0; r < rows; r++) {
3073                    idx0 = r * rowStride;
3074                    for (int c = 0; c < columns; c += 8) {
3075                        for (int s = 0; s < slices; s++) {
3076                            idx1 = s * sliceStride + idx0 + c;
3077                            idx2 = 2 * s;
3078                            idx3 = 2 * slices + 2 * s;
3079                            idx4 = idx3 + 2 * slices;
3080                            idx5 = idx4 + 2 * slices;
3081                            t[idx2] = a[idx1];
3082                            t[idx2 + 1] = a[idx1 + 1];
3083                            t[idx3] = a[idx1 + 2];
3084                            t[idx3 + 1] = a[idx1 + 3];
3085                            t[idx4] = a[idx1 + 4];
3086                            t[idx4 + 1] = a[idx1 + 5];
3087                            t[idx5] = a[idx1 + 6];
3088                            t[idx5 + 1] = a[idx1 + 7];
3089                        }
3090                        fftSlices.complexInverse(t, 0, scale);
3091                        fftSlices.complexInverse(t, 2 * slices, scale);
3092                        fftSlices.complexInverse(t, 4 * slices, scale);
3093                        fftSlices.complexInverse(t, 6 * slices, scale);
3094                        for (int s = 0; s < slices; s++) {
3095                            idx1 = s * sliceStride + idx0 + c;
3096                            idx2 = 2 * s;
3097                            idx3 = 2 * slices + 2 * s;
3098                            idx4 = idx3 + 2 * slices;
3099                            idx5 = idx4 + 2 * slices;
3100                            a[idx1] = t[idx2];
3101                            a[idx1 + 1] = t[idx2 + 1];
3102                            a[idx1 + 2] = t[idx3];
3103                            a[idx1 + 3] = t[idx3 + 1];
3104                            a[idx1 + 4] = t[idx4];
3105                            a[idx1 + 5] = t[idx4 + 1];
3106                            a[idx1 + 6] = t[idx5];
3107                            a[idx1 + 7] = t[idx5 + 1];
3108                        }
3109                    }
3110                }
3111            } else if (columns == 4) {
3112                for (int r = 0; r < rows; r++) {
3113                    idx0 = r * rowStride;
3114                    for (int s = 0; s < slices; s++) {
3115                        idx1 = s * sliceStride + idx0;
3116                        idx2 = 2 * s;
3117                        idx3 = 2 * slices + 2 * s;
3118                        t[idx2] = a[idx1];
3119                        t[idx2 + 1] = a[idx1 + 1];
3120                        t[idx3] = a[idx1 + 2];
3121                        t[idx3 + 1] = a[idx1 + 3];
3122                    }
3123                    fftSlices.complexInverse(t, 0, scale);
3124                    fftSlices.complexInverse(t, 2 * slices, scale);
3125                    for (int s = 0; s < slices; s++) {
3126                        idx1 = s * sliceStride + idx0;
3127                        idx2 = 2 * s;
3128                        idx3 = 2 * slices + 2 * s;
3129                        a[idx1] = t[idx2];
3130                        a[idx1 + 1] = t[idx2 + 1];
3131                        a[idx1 + 2] = t[idx3];
3132                        a[idx1 + 3] = t[idx3 + 1];
3133                    }
3134                }
3135            } else if (columns == 2) {
3136                for (int r = 0; r < rows; r++) {
3137                    idx0 = r * rowStride;
3138                    for (int s = 0; s < slices; s++) {
3139                        idx1 = s * sliceStride + idx0;
3140                        idx2 = 2 * s;
3141                        t[idx2] = a[idx1];
3142                        t[idx2 + 1] = a[idx1 + 1];
3143                    }
3144                    fftSlices.complexInverse(t, 0, scale);
3145                    for (int s = 0; s < slices; s++) {
3146                        idx1 = s * sliceStride + idx0;
3147                        idx2 = 2 * s;
3148                        a[idx1] = t[idx2];
3149                        a[idx1 + 1] = t[idx2 + 1];
3150                    }
3151                }
3152            }
3153        }
3154    }
3155
3156    private void cdft3db_sub(int isgn, float[][][] a, boolean scale) {
3157        int idx2, idx3, idx4, idx5;
3158
3159        if (isgn == -1) {
3160            if (columns > 4) {
3161                for (int r = 0; r < rows; r++) {
3162                    for (int c = 0; c < columns; c += 8) {
3163                        for (int s = 0; s < slices; s++) {
3164                            idx2 = 2 * s;
3165                            idx3 = 2 * slices + 2 * s;
3166                            idx4 = idx3 + 2 * slices;
3167                            idx5 = idx4 + 2 * slices;
3168                            t[idx2] = a[s][r][c];
3169                            t[idx2 + 1] = a[s][r][c + 1];
3170                            t[idx3] = a[s][r][c + 2];
3171                            t[idx3 + 1] = a[s][r][c + 3];
3172                            t[idx4] = a[s][r][c + 4];
3173                            t[idx4 + 1] = a[s][r][c + 5];
3174                            t[idx5] = a[s][r][c + 6];
3175                            t[idx5 + 1] = a[s][r][c + 7];
3176                        }
3177                        fftSlices.complexForward(t, 0);
3178                        fftSlices.complexForward(t, 2 * slices);
3179                        fftSlices.complexForward(t, 4 * slices);
3180                        fftSlices.complexForward(t, 6 * slices);
3181                        for (int s = 0; s < slices; s++) {
3182                            idx2 = 2 * s;
3183                            idx3 = 2 * slices + 2 * s;
3184                            idx4 = idx3 + 2 * slices;
3185                            idx5 = idx4 + 2 * slices;
3186                            a[s][r][c] = t[idx2];
3187                            a[s][r][c + 1] = t[idx2 + 1];
3188                            a[s][r][c + 2] = t[idx3];
3189                            a[s][r][c + 3] = t[idx3 + 1];
3190                            a[s][r][c + 4] = t[idx4];
3191                            a[s][r][c + 5] = t[idx4 + 1];
3192                            a[s][r][c + 6] = t[idx5];
3193                            a[s][r][c + 7] = t[idx5 + 1];
3194                        }
3195                    }
3196                }
3197            } else if (columns == 4) {
3198                for (int r = 0; r < rows; r++) {
3199                    for (int s = 0; s < slices; s++) {
3200                        idx2 = 2 * s;
3201                        idx3 = 2 * slices + 2 * s;
3202                        t[idx2] = a[s][r][0];
3203                        t[idx2 + 1] = a[s][r][1];
3204                        t[idx3] = a[s][r][2];
3205                        t[idx3 + 1] = a[s][r][3];
3206                    }
3207                    fftSlices.complexForward(t, 0);
3208                    fftSlices.complexForward(t, 2 * slices);
3209                    for (int s = 0; s < slices; s++) {
3210                        idx2 = 2 * s;
3211                        idx3 = 2 * slices + 2 * s;
3212                        a[s][r][0] = t[idx2];
3213                        a[s][r][1] = t[idx2 + 1];
3214                        a[s][r][2] = t[idx3];
3215                        a[s][r][3] = t[idx3 + 1];
3216                    }
3217                }
3218            } else if (columns == 2) {
3219                for (int r = 0; r < rows; r++) {
3220                    for (int s = 0; s < slices; s++) {
3221                        idx2 = 2 * s;
3222                        t[idx2] = a[s][r][0];
3223                        t[idx2 + 1] = a[s][r][1];
3224                    }
3225                    fftSlices.complexForward(t, 0);
3226                    for (int s = 0; s < slices; s++) {
3227                        idx2 = 2 * s;
3228                        a[s][r][0] = t[idx2];
3229                        a[s][r][1] = t[idx2 + 1];
3230                    }
3231                }
3232            }
3233        } else {
3234            if (columns > 4) {
3235                for (int r = 0; r < rows; r++) {
3236                    for (int c = 0; c < columns; c += 8) {
3237                        for (int s = 0; s < slices; s++) {
3238                            idx2 = 2 * s;
3239                            idx3 = 2 * slices + 2 * s;
3240                            idx4 = idx3 + 2 * slices;
3241                            idx5 = idx4 + 2 * slices;
3242                            t[idx2] = a[s][r][c];
3243                            t[idx2 + 1] = a[s][r][c + 1];
3244                            t[idx3] = a[s][r][c + 2];
3245                            t[idx3 + 1] = a[s][r][c + 3];
3246                            t[idx4] = a[s][r][c + 4];
3247                            t[idx4 + 1] = a[s][r][c + 5];
3248                            t[idx5] = a[s][r][c + 6];
3249                            t[idx5 + 1] = a[s][r][c + 7];
3250                        }
3251                        fftSlices.complexInverse(t, 0, scale);
3252                        fftSlices.complexInverse(t, 2 * slices, scale);
3253                        fftSlices.complexInverse(t, 4 * slices, scale);
3254                        fftSlices.complexInverse(t, 6 * slices, scale);
3255                        for (int s = 0; s < slices; s++) {
3256                            idx2 = 2 * s;
3257                            idx3 = 2 * slices + 2 * s;
3258                            idx4 = idx3 + 2 * slices;
3259                            idx5 = idx4 + 2 * slices;
3260                            a[s][r][c] = t[idx2];
3261                            a[s][r][c + 1] = t[idx2 + 1];
3262                            a[s][r][c + 2] = t[idx3];
3263                            a[s][r][c + 3] = t[idx3 + 1];
3264                            a[s][r][c + 4] = t[idx4];
3265                            a[s][r][c + 5] = t[idx4 + 1];
3266                            a[s][r][c + 6] = t[idx5];
3267                            a[s][r][c + 7] = t[idx5 + 1];
3268                        }
3269                    }
3270                }
3271            } else if (columns == 4) {
3272                for (int r = 0; r < rows; r++) {
3273                    for (int s = 0; s < slices; s++) {
3274                        idx2 = 2 * s;
3275                        idx3 = 2 * slices + 2 * s;
3276                        t[idx2] = a[s][r][0];
3277                        t[idx2 + 1] = a[s][r][1];
3278                        t[idx3] = a[s][r][2];
3279                        t[idx3 + 1] = a[s][r][3];
3280                    }
3281                    fftSlices.complexInverse(t, 0, scale);
3282                    fftSlices.complexInverse(t, 2 * slices, scale);
3283                    for (int s = 0; s < slices; s++) {
3284                        idx2 = 2 * s;
3285                        idx3 = 2 * slices + 2 * s;
3286                        a[s][r][0] = t[idx2];
3287                        a[s][r][1] = t[idx2 + 1];
3288                        a[s][r][2] = t[idx3];
3289                        a[s][r][3] = t[idx3 + 1];
3290                    }
3291                }
3292            } else if (columns == 2) {
3293                for (int r = 0; r < rows; r++) {
3294                    for (int s = 0; s < slices; s++) {
3295                        idx2 = 2 * s;
3296                        t[idx2] = a[s][r][0];
3297                        t[idx2 + 1] = a[s][r][1];
3298                    }
3299                    fftSlices.complexInverse(t, 0, scale);
3300                    for (int s = 0; s < slices; s++) {
3301                        idx2 = 2 * s;
3302                        a[s][r][0] = t[idx2];
3303                        a[s][r][1] = t[idx2 + 1];
3304                    }
3305                }
3306            }
3307        }
3308    }
3309
3310    private void xdft3da_subth1(final int icr, final int isgn, final float[] a, final boolean scale) {
3311        int nt, i;
3312        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3313        nt = 8 * rows;
3314        if (columns == 4) {
3315            nt >>= 1;
3316        } else if (columns < 4) {
3317            nt >>= 2;
3318        }
3319        Future<?>[] futures = new Future[nthreads];
3320        for (i = 0; i < nthreads; i++) {
3321            final int n0 = i;
3322            final int startt = nt * i;
3323            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3324                @Override
3325                                public void run() {
3326                    int idx0, idx1, idx2, idx3, idx4, idx5;
3327
3328                    if (isgn == -1) {
3329                        for (int s = n0; s < slices; s += nthreads) {
3330                            idx0 = s * sliceStride;
3331                            if (icr == 0) {
3332                                for (int r = 0; r < rows; r++) {
3333                                    fftColumns.complexForward(a, idx0 + r * rowStride);
3334                                }
3335                            } else {
3336                                for (int r = 0; r < rows; r++) {
3337                                    fftColumns.realForward(a, idx0 + r * rowStride);
3338                                }
3339                            }
3340                            if (columns > 4) {
3341                                for (int c = 0; c < columns; c += 8) {
3342                                    for (int r = 0; r < rows; r++) {
3343                                        idx1 = idx0 + r * rowStride + c;
3344                                        idx2 = startt + 2 * r;
3345                                        idx3 = startt + 2 * rows + 2 * r;
3346                                        idx4 = idx3 + 2 * rows;
3347                                        idx5 = idx4 + 2 * rows;
3348                                        t[idx2] = a[idx1];
3349                                        t[idx2 + 1] = a[idx1 + 1];
3350                                        t[idx3] = a[idx1 + 2];
3351                                        t[idx3 + 1] = a[idx1 + 3];
3352                                        t[idx4] = a[idx1 + 4];
3353                                        t[idx4 + 1] = a[idx1 + 5];
3354                                        t[idx5] = a[idx1 + 6];
3355                                        t[idx5 + 1] = a[idx1 + 7];
3356                                    }
3357                                    fftRows.complexForward(t, startt);
3358                                    fftRows.complexForward(t, startt + 2 * rows);
3359                                    fftRows.complexForward(t, startt + 4 * rows);
3360                                    fftRows.complexForward(t, startt + 6 * rows);
3361                                    for (int r = 0; r < rows; r++) {
3362                                        idx1 = idx0 + r * rowStride + c;
3363                                        idx2 = startt + 2 * r;
3364                                        idx3 = startt + 2 * rows + 2 * r;
3365                                        idx4 = idx3 + 2 * rows;
3366                                        idx5 = idx4 + 2 * rows;
3367                                        a[idx1] = t[idx2];
3368                                        a[idx1 + 1] = t[idx2 + 1];
3369                                        a[idx1 + 2] = t[idx3];
3370                                        a[idx1 + 3] = t[idx3 + 1];
3371                                        a[idx1 + 4] = t[idx4];
3372                                        a[idx1 + 5] = t[idx4 + 1];
3373                                        a[idx1 + 6] = t[idx5];
3374                                        a[idx1 + 7] = t[idx5 + 1];
3375                                    }
3376                                }
3377                            } else if (columns == 4) {
3378                                for (int r = 0; r < rows; r++) {
3379                                    idx1 = idx0 + r * rowStride;
3380                                    idx2 = startt + 2 * r;
3381                                    idx3 = startt + 2 * rows + 2 * r;
3382                                    t[idx2] = a[idx1];
3383                                    t[idx2 + 1] = a[idx1 + 1];
3384                                    t[idx3] = a[idx1 + 2];
3385                                    t[idx3 + 1] = a[idx1 + 3];
3386                                }
3387                                fftRows.complexForward(t, startt);
3388                                fftRows.complexForward(t, startt + 2 * rows);
3389                                for (int r = 0; r < rows; r++) {
3390                                    idx1 = idx0 + r * rowStride;
3391                                    idx2 = startt + 2 * r;
3392                                    idx3 = startt + 2 * rows + 2 * r;
3393                                    a[idx1] = t[idx2];
3394                                    a[idx1 + 1] = t[idx2 + 1];
3395                                    a[idx1 + 2] = t[idx3];
3396                                    a[idx1 + 3] = t[idx3 + 1];
3397                                }
3398                            } else if (columns == 2) {
3399                                for (int r = 0; r < rows; r++) {
3400                                    idx1 = idx0 + r * rowStride;
3401                                    idx2 = startt + 2 * r;
3402                                    t[idx2] = a[idx1];
3403                                    t[idx2 + 1] = a[idx1 + 1];
3404                                }
3405                                fftRows.complexForward(t, startt);
3406                                for (int r = 0; r < rows; r++) {
3407                                    idx1 = idx0 + r * rowStride;
3408                                    idx2 = startt + 2 * r;
3409                                    a[idx1] = t[idx2];
3410                                    a[idx1 + 1] = t[idx2 + 1];
3411                                }
3412                            }
3413
3414                        }
3415                    } else {
3416                        for (int s = n0; s < slices; s += nthreads) {
3417                            idx0 = s * sliceStride;
3418                            if (icr == 0) {
3419                                for (int r = 0; r < rows; r++) {
3420                                    fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
3421                                }
3422                            }
3423                            if (columns > 4) {
3424                                for (int c = 0; c < columns; c += 8) {
3425                                    for (int r = 0; r < rows; r++) {
3426                                        idx1 = idx0 + r * rowStride + c;
3427                                        idx2 = startt + 2 * r;
3428                                        idx3 = startt + 2 * rows + 2 * r;
3429                                        idx4 = idx3 + 2 * rows;
3430                                        idx5 = idx4 + 2 * rows;
3431                                        t[idx2] = a[idx1];
3432                                        t[idx2 + 1] = a[idx1 + 1];
3433                                        t[idx3] = a[idx1 + 2];
3434                                        t[idx3 + 1] = a[idx1 + 3];
3435                                        t[idx4] = a[idx1 + 4];
3436                                        t[idx4 + 1] = a[idx1 + 5];
3437                                        t[idx5] = a[idx1 + 6];
3438                                        t[idx5 + 1] = a[idx1 + 7];
3439                                    }
3440                                    fftRows.complexInverse(t, startt, scale);
3441                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3442                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3443                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3444                                    for (int r = 0; r < rows; r++) {
3445                                        idx1 = idx0 + r * rowStride + c;
3446                                        idx2 = startt + 2 * r;
3447                                        idx3 = startt + 2 * rows + 2 * r;
3448                                        idx4 = idx3 + 2 * rows;
3449                                        idx5 = idx4 + 2 * rows;
3450                                        a[idx1] = t[idx2];
3451                                        a[idx1 + 1] = t[idx2 + 1];
3452                                        a[idx1 + 2] = t[idx3];
3453                                        a[idx1 + 3] = t[idx3 + 1];
3454                                        a[idx1 + 4] = t[idx4];
3455                                        a[idx1 + 5] = t[idx4 + 1];
3456                                        a[idx1 + 6] = t[idx5];
3457                                        a[idx1 + 7] = t[idx5 + 1];
3458                                    }
3459                                }
3460                            } else if (columns == 4) {
3461                                for (int r = 0; r < rows; r++) {
3462                                    idx1 = idx0 + r * rowStride;
3463                                    idx2 = startt + 2 * r;
3464                                    idx3 = startt + 2 * rows + 2 * r;
3465                                    t[idx2] = a[idx1];
3466                                    t[idx2 + 1] = a[idx1 + 1];
3467                                    t[idx3] = a[idx1 + 2];
3468                                    t[idx3 + 1] = a[idx1 + 3];
3469                                }
3470                                fftRows.complexInverse(t, startt, scale);
3471                                fftRows.complexInverse(t, startt + 2 * rows, scale);
3472                                for (int r = 0; r < rows; r++) {
3473                                    idx1 = idx0 + r * rowStride;
3474                                    idx2 = startt + 2 * r;
3475                                    idx3 = startt + 2 * rows + 2 * r;
3476                                    a[idx1] = t[idx2];
3477                                    a[idx1 + 1] = t[idx2 + 1];
3478                                    a[idx1 + 2] = t[idx3];
3479                                    a[idx1 + 3] = t[idx3 + 1];
3480                                }
3481                            } else if (columns == 2) {
3482                                for (int r = 0; r < rows; r++) {
3483                                    idx1 = idx0 + r * rowStride;
3484                                    idx2 = startt + 2 * r;
3485                                    t[idx2] = a[idx1];
3486                                    t[idx2 + 1] = a[idx1 + 1];
3487                                }
3488                                fftRows.complexInverse(t, startt, scale);
3489                                for (int r = 0; r < rows; r++) {
3490                                    idx1 = idx0 + r * rowStride;
3491                                    idx2 = startt + 2 * r;
3492                                    a[idx1] = t[idx2];
3493                                    a[idx1 + 1] = t[idx2 + 1];
3494                                }
3495                            }
3496                            if (icr != 0) {
3497                                for (int r = 0; r < rows; r++) {
3498                                    fftColumns.realInverse(a, idx0 + r * rowStride, scale);
3499                                }
3500                            }
3501                        }
3502                    }
3503                }
3504            });
3505        }
3506        ConcurrencyUtils.waitForCompletion(futures);
3507    }
3508
3509    private void xdft3da_subth2(final int icr, final int isgn, final float[] a, final boolean scale) {
3510        int nt, i;
3511
3512        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3513        nt = 8 * rows;
3514        if (columns == 4) {
3515            nt >>= 1;
3516        } else if (columns < 4) {
3517            nt >>= 2;
3518        }
3519        Future<?>[] futures = new Future[nthreads];
3520        for (i = 0; i < nthreads; i++) {
3521            final int n0 = i;
3522            final int startt = nt * i;
3523            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3524                @Override
3525                                public void run() {
3526                    int idx0, idx1, idx2, idx3, idx4, idx5;
3527
3528                    if (isgn == -1) {
3529                        for (int s = n0; s < slices; s += nthreads) {
3530                            idx0 = s * sliceStride;
3531                            if (icr == 0) {
3532                                for (int r = 0; r < rows; r++) {
3533                                    fftColumns.complexForward(a, idx0 + r * rowStride);
3534                                }
3535                            } else {
3536                                for (int r = 0; r < rows; r++) {
3537                                    fftColumns.realForward(a, idx0 + r * rowStride);
3538                                }
3539                            }
3540                            if (columns > 4) {
3541                                for (int c = 0; c < columns; c += 8) {
3542                                    for (int r = 0; r < rows; r++) {
3543                                        idx1 = idx0 + r * rowStride + c;
3544                                        idx2 = startt + 2 * r;
3545                                        idx3 = startt + 2 * rows + 2 * r;
3546                                        idx4 = idx3 + 2 * rows;
3547                                        idx5 = idx4 + 2 * rows;
3548                                        t[idx2] = a[idx1];
3549                                        t[idx2 + 1] = a[idx1 + 1];
3550                                        t[idx3] = a[idx1 + 2];
3551                                        t[idx3 + 1] = a[idx1 + 3];
3552                                        t[idx4] = a[idx1 + 4];
3553                                        t[idx4 + 1] = a[idx1 + 5];
3554                                        t[idx5] = a[idx1 + 6];
3555                                        t[idx5 + 1] = a[idx1 + 7];
3556                                    }
3557                                    fftRows.complexForward(t, startt);
3558                                    fftRows.complexForward(t, startt + 2 * rows);
3559                                    fftRows.complexForward(t, startt + 4 * rows);
3560                                    fftRows.complexForward(t, startt + 6 * rows);
3561                                    for (int r = 0; r < rows; r++) {
3562                                        idx1 = idx0 + r * rowStride + c;
3563                                        idx2 = startt + 2 * r;
3564                                        idx3 = startt + 2 * rows + 2 * r;
3565                                        idx4 = idx3 + 2 * rows;
3566                                        idx5 = idx4 + 2 * rows;
3567                                        a[idx1] = t[idx2];
3568                                        a[idx1 + 1] = t[idx2 + 1];
3569                                        a[idx1 + 2] = t[idx3];
3570                                        a[idx1 + 3] = t[idx3 + 1];
3571                                        a[idx1 + 4] = t[idx4];
3572                                        a[idx1 + 5] = t[idx4 + 1];
3573                                        a[idx1 + 6] = t[idx5];
3574                                        a[idx1 + 7] = t[idx5 + 1];
3575                                    }
3576                                }
3577                            } else if (columns == 4) {
3578                                for (int r = 0; r < rows; r++) {
3579                                    idx1 = idx0 + r * rowStride;
3580                                    idx2 = startt + 2 * r;
3581                                    idx3 = startt + 2 * rows + 2 * r;
3582                                    t[idx2] = a[idx1];
3583                                    t[idx2 + 1] = a[idx1 + 1];
3584                                    t[idx3] = a[idx1 + 2];
3585                                    t[idx3 + 1] = a[idx1 + 3];
3586                                }
3587                                fftRows.complexForward(t, startt);
3588                                fftRows.complexForward(t, startt + 2 * rows);
3589                                for (int r = 0; r < rows; r++) {
3590                                    idx1 = idx0 + r * rowStride;
3591                                    idx2 = startt + 2 * r;
3592                                    idx3 = startt + 2 * rows + 2 * r;
3593                                    a[idx1] = t[idx2];
3594                                    a[idx1 + 1] = t[idx2 + 1];
3595                                    a[idx1 + 2] = t[idx3];
3596                                    a[idx1 + 3] = t[idx3 + 1];
3597                                }
3598                            } else if (columns == 2) {
3599                                for (int r = 0; r < rows; r++) {
3600                                    idx1 = idx0 + r * rowStride;
3601                                    idx2 = startt + 2 * r;
3602                                    t[idx2] = a[idx1];
3603                                    t[idx2 + 1] = a[idx1 + 1];
3604                                }
3605                                fftRows.complexForward(t, startt);
3606                                for (int r = 0; r < rows; r++) {
3607                                    idx1 = idx0 + r * rowStride;
3608                                    idx2 = startt + 2 * r;
3609                                    a[idx1] = t[idx2];
3610                                    a[idx1 + 1] = t[idx2 + 1];
3611                                }
3612                            }
3613
3614                        }
3615                    } else {
3616                        for (int s = n0; s < slices; s += nthreads) {
3617                            idx0 = s * sliceStride;
3618                            if (icr == 0) {
3619                                for (int r = 0; r < rows; r++) {
3620                                    fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
3621                                }
3622                            } else {
3623                                for (int r = 0; r < rows; r++) {
3624                                    fftColumns.realInverse2(a, idx0 + r * rowStride, scale);
3625                                }
3626                            }
3627                            if (columns > 4) {
3628                                for (int c = 0; c < columns; c += 8) {
3629                                    for (int r = 0; r < rows; r++) {
3630                                        idx1 = idx0 + r * rowStride + c;
3631                                        idx2 = startt + 2 * r;
3632                                        idx3 = startt + 2 * rows + 2 * r;
3633                                        idx4 = idx3 + 2 * rows;
3634                                        idx5 = idx4 + 2 * rows;
3635                                        t[idx2] = a[idx1];
3636                                        t[idx2 + 1] = a[idx1 + 1];
3637                                        t[idx3] = a[idx1 + 2];
3638                                        t[idx3 + 1] = a[idx1 + 3];
3639                                        t[idx4] = a[idx1 + 4];
3640                                        t[idx4 + 1] = a[idx1 + 5];
3641                                        t[idx5] = a[idx1 + 6];
3642                                        t[idx5 + 1] = a[idx1 + 7];
3643                                    }
3644                                    fftRows.complexInverse(t, startt, scale);
3645                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3646                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3647                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3648                                    for (int r = 0; r < rows; r++) {
3649                                        idx1 = idx0 + r * rowStride + c;
3650                                        idx2 = startt + 2 * r;
3651                                        idx3 = startt + 2 * rows + 2 * r;
3652                                        idx4 = idx3 + 2 * rows;
3653                                        idx5 = idx4 + 2 * rows;
3654                                        a[idx1] = t[idx2];
3655                                        a[idx1 + 1] = t[idx2 + 1];
3656                                        a[idx1 + 2] = t[idx3];
3657                                        a[idx1 + 3] = t[idx3 + 1];
3658                                        a[idx1 + 4] = t[idx4];
3659                                        a[idx1 + 5] = t[idx4 + 1];
3660                                        a[idx1 + 6] = t[idx5];
3661                                        a[idx1 + 7] = t[idx5 + 1];
3662                                    }
3663                                }
3664                            } else if (columns == 4) {
3665                                for (int r = 0; r < rows; r++) {
3666                                    idx1 = idx0 + r * rowStride;
3667                                    idx2 = startt + 2 * r;
3668                                    idx3 = startt + 2 * rows + 2 * r;
3669                                    t[idx2] = a[idx1];
3670                                    t[idx2 + 1] = a[idx1 + 1];
3671                                    t[idx3] = a[idx1 + 2];
3672                                    t[idx3 + 1] = a[idx1 + 3];
3673                                }
3674                                fftRows.complexInverse(t, startt, scale);
3675                                fftRows.complexInverse(t, startt + 2 * rows, scale);
3676                                for (int r = 0; r < rows; r++) {
3677                                    idx1 = idx0 + r * rowStride;
3678                                    idx2 = startt + 2 * r;
3679                                    idx3 = startt + 2 * rows + 2 * r;
3680                                    a[idx1] = t[idx2];
3681                                    a[idx1 + 1] = t[idx2 + 1];
3682                                    a[idx1 + 2] = t[idx3];
3683                                    a[idx1 + 3] = t[idx3 + 1];
3684                                }
3685                            } else if (columns == 2) {
3686                                for (int r = 0; r < rows; r++) {
3687                                    idx1 = idx0 + r * rowStride;
3688                                    idx2 = startt + 2 * r;
3689                                    t[idx2] = a[idx1];
3690                                    t[idx2 + 1] = a[idx1 + 1];
3691                                }
3692                                fftRows.complexInverse(t, startt, scale);
3693                                for (int r = 0; r < rows; r++) {
3694                                    idx1 = idx0 + r * rowStride;
3695                                    idx2 = startt + 2 * r;
3696                                    a[idx1] = t[idx2];
3697                                    a[idx1 + 1] = t[idx2 + 1];
3698                                }
3699                            }
3700                        }
3701                    }
3702                }
3703            });
3704        }
3705        ConcurrencyUtils.waitForCompletion(futures);
3706    }
3707
3708    private void xdft3da_subth1(final int icr, final int isgn, final float[][][] a, final boolean scale) {
3709        int nt, i;
3710
3711        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3712        nt = 8 * rows;
3713        if (columns == 4) {
3714            nt >>= 1;
3715        } else if (columns < 4) {
3716            nt >>= 2;
3717        }
3718        Future<?>[] futures = new Future[nthreads];
3719        for (i = 0; i < nthreads; i++) {
3720            final int n0 = i;
3721            final int startt = nt * i;
3722            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3723                @Override
3724                                public void run() {
3725                    int idx2, idx3, idx4, idx5;
3726
3727                    if (isgn == -1) {
3728                        for (int s = n0; s < slices; s += nthreads) {
3729                            if (icr == 0) {
3730                                for (int r = 0; r < rows; r++) {
3731                                    fftColumns.complexForward(a[s][r]);
3732                                }
3733                            } else {
3734                                for (int r = 0; r < rows; r++) {
3735                                    fftColumns.realForward(a[s][r], 0);
3736                                }
3737                            }
3738                            if (columns > 4) {
3739                                for (int c = 0; c < columns; c += 8) {
3740                                    for (int r = 0; r < rows; r++) {
3741                                        idx2 = startt + 2 * r;
3742                                        idx3 = startt + 2 * rows + 2 * r;
3743                                        idx4 = idx3 + 2 * rows;
3744                                        idx5 = idx4 + 2 * rows;
3745                                        t[idx2] = a[s][r][c];
3746                                        t[idx2 + 1] = a[s][r][c + 1];
3747                                        t[idx3] = a[s][r][c + 2];
3748                                        t[idx3 + 1] = a[s][r][c + 3];
3749                                        t[idx4] = a[s][r][c + 4];
3750                                        t[idx4 + 1] = a[s][r][c + 5];
3751                                        t[idx5] = a[s][r][c + 6];
3752                                        t[idx5 + 1] = a[s][r][c + 7];
3753                                    }
3754                                    fftRows.complexForward(t, startt);
3755                                    fftRows.complexForward(t, startt + 2 * rows);
3756                                    fftRows.complexForward(t, startt + 4 * rows);
3757                                    fftRows.complexForward(t, startt + 6 * rows);
3758                                    for (int r = 0; r < rows; r++) {
3759                                        idx2 = startt + 2 * r;
3760                                        idx3 = startt + 2 * rows + 2 * r;
3761                                        idx4 = idx3 + 2 * rows;
3762                                        idx5 = idx4 + 2 * rows;
3763                                        a[s][r][c] = t[idx2];
3764                                        a[s][r][c + 1] = t[idx2 + 1];
3765                                        a[s][r][c + 2] = t[idx3];
3766                                        a[s][r][c + 3] = t[idx3 + 1];
3767                                        a[s][r][c + 4] = t[idx4];
3768                                        a[s][r][c + 5] = t[idx4 + 1];
3769                                        a[s][r][c + 6] = t[idx5];
3770                                        a[s][r][c + 7] = t[idx5 + 1];
3771                                    }
3772                                }
3773                            } else if (columns == 4) {
3774                                for (int r = 0; r < rows; r++) {
3775                                    idx2 = startt + 2 * r;
3776                                    idx3 = startt + 2 * rows + 2 * r;
3777                                    t[idx2] = a[s][r][0];
3778                                    t[idx2 + 1] = a[s][r][1];
3779                                    t[idx3] = a[s][r][2];
3780                                    t[idx3 + 1] = a[s][r][3];
3781                                }
3782                                fftRows.complexForward(t, startt);
3783                                fftRows.complexForward(t, startt + 2 * rows);
3784                                for (int r = 0; r < rows; r++) {
3785                                    idx2 = startt + 2 * r;
3786                                    idx3 = startt + 2 * rows + 2 * r;
3787                                    a[s][r][0] = t[idx2];
3788                                    a[s][r][1] = t[idx2 + 1];
3789                                    a[s][r][2] = t[idx3];
3790                                    a[s][r][3] = t[idx3 + 1];
3791                                }
3792                            } else if (columns == 2) {
3793                                for (int r = 0; r < rows; r++) {
3794                                    idx2 = startt + 2 * r;
3795                                    t[idx2] = a[s][r][0];
3796                                    t[idx2 + 1] = a[s][r][1];
3797                                }
3798                                fftRows.complexForward(t, startt);
3799                                for (int r = 0; r < rows; r++) {
3800                                    idx2 = startt + 2 * r;
3801                                    a[s][r][0] = t[idx2];
3802                                    a[s][r][1] = t[idx2 + 1];
3803                                }
3804                            }
3805
3806                        }
3807                    } else {
3808                        for (int s = n0; s < slices; s += nthreads) {
3809                            if (icr == 0) {
3810                                for (int r = 0; r < rows; r++) {
3811                                    fftColumns.complexInverse(a[s][r], scale);
3812                                }
3813                            }
3814                            if (columns > 4) {
3815                                for (int c = 0; c < columns; c += 8) {
3816                                    for (int r = 0; r < rows; r++) {
3817                                        idx2 = startt + 2 * r;
3818                                        idx3 = startt + 2 * rows + 2 * r;
3819                                        idx4 = idx3 + 2 * rows;
3820                                        idx5 = idx4 + 2 * rows;
3821                                        t[idx2] = a[s][r][c];
3822                                        t[idx2 + 1] = a[s][r][c + 1];
3823                                        t[idx3] = a[s][r][c + 2];
3824                                        t[idx3 + 1] = a[s][r][c + 3];
3825                                        t[idx4] = a[s][r][c + 4];
3826                                        t[idx4 + 1] = a[s][r][c + 5];
3827                                        t[idx5] = a[s][r][c + 6];
3828                                        t[idx5 + 1] = a[s][r][c + 7];
3829                                    }
3830                                    fftRows.complexInverse(t, startt, scale);
3831                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3832                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3833                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3834                                    for (int r = 0; r < rows; r++) {
3835                                        idx2 = startt + 2 * r;
3836                                        idx3 = startt + 2 * rows + 2 * r;
3837                                        idx4 = idx3 + 2 * rows;
3838                                        idx5 = idx4 + 2 * rows;
3839                                        a[s][r][c] = t[idx2];
3840                                        a[s][r][c + 1] = t[idx2 + 1];
3841                                        a[s][r][c + 2] = t[idx3];
3842                                        a[s][r][c + 3] = t[idx3 + 1];
3843                                        a[s][r][c + 4] = t[idx4];
3844                                        a[s][r][c + 5] = t[idx4 + 1];
3845                                        a[s][r][c + 6] = t[idx5];
3846                                        a[s][r][c + 7] = t[idx5 + 1];
3847                                    }
3848                                }
3849                            } else if (columns == 4) {
3850                                for (int r = 0; r < rows; r++) {
3851                                    idx2 = startt + 2 * r;
3852                                    idx3 = startt + 2 * rows + 2 * r;
3853                                    t[idx2] = a[s][r][0];
3854                                    t[idx2 + 1] = a[s][r][1];
3855                                    t[idx3] = a[s][r][2];
3856                                    t[idx3 + 1] = a[s][r][3];
3857                                }
3858                                fftRows.complexInverse(t, startt, scale);
3859                                fftRows.complexInverse(t, startt + 2 * rows, scale);
3860                                for (int r = 0; r < rows; r++) {
3861                                    idx2 = startt + 2 * r;
3862                                    idx3 = startt + 2 * rows + 2 * r;
3863                                    a[s][r][0] = t[idx2];
3864                                    a[s][r][1] = t[idx2 + 1];
3865                                    a[s][r][2] = t[idx3];
3866                                    a[s][r][3] = t[idx3 + 1];
3867                                }
3868                            } else if (columns == 2) {
3869                                for (int r = 0; r < rows; r++) {
3870                                    idx2 = startt + 2 * r;
3871                                    t[idx2] = a[s][r][0];
3872                                    t[idx2 + 1] = a[s][r][1];
3873                                }
3874                                fftRows.complexInverse(t, startt, scale);
3875                                for (int r = 0; r < rows; r++) {
3876                                    idx2 = startt + 2 * r;
3877                                    a[s][r][0] = t[idx2];
3878                                    a[s][r][1] = t[idx2 + 1];
3879                                }
3880                            }
3881                            if (icr != 0) {
3882                                for (int r = 0; r < rows; r++) {
3883                                    fftColumns.realInverse(a[s][r], scale);
3884                                }
3885                            }
3886                        }
3887                    }
3888                }
3889            });
3890        }
3891        ConcurrencyUtils.waitForCompletion(futures);
3892    }
3893
3894    private void xdft3da_subth2(final int icr, final int isgn, final float[][][] a, final boolean scale) {
3895        int nt, i;
3896
3897        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3898        nt = 8 * rows;
3899        if (columns == 4) {
3900            nt >>= 1;
3901        } else if (columns < 4) {
3902            nt >>= 2;
3903        }
3904        Future<?>[] futures = new Future[nthreads];
3905        for (i = 0; i < nthreads; i++) {
3906            final int n0 = i;
3907            final int startt = nt * i;
3908            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3909                @Override
3910                                public void run() {
3911                    int idx2, idx3, idx4, idx5;
3912
3913                    if (isgn == -1) {
3914                        for (int s = n0; s < slices; s += nthreads) {
3915                            if (icr == 0) {
3916                                for (int r = 0; r < rows; r++) {
3917                                    fftColumns.complexForward(a[s][r]);
3918                                }
3919                            } else {
3920                                for (int r = 0; r < rows; r++) {
3921                                    fftColumns.realForward(a[s][r]);
3922                                }
3923                            }
3924                            if (columns > 4) {
3925                                for (int c = 0; c < columns; c += 8) {
3926                                    for (int r = 0; r < rows; r++) {
3927                                        idx2 = startt + 2 * r;
3928                                        idx3 = startt + 2 * rows + 2 * r;
3929                                        idx4 = idx3 + 2 * rows;
3930                                        idx5 = idx4 + 2 * rows;
3931                                        t[idx2] = a[s][r][c];
3932                                        t[idx2 + 1] = a[s][r][c + 1];
3933                                        t[idx3] = a[s][r][c + 2];
3934                                        t[idx3 + 1] = a[s][r][c + 3];
3935                                        t[idx4] = a[s][r][c + 4];
3936                                        t[idx4 + 1] = a[s][r][c + 5];
3937                                        t[idx5] = a[s][r][c + 6];
3938                                        t[idx5 + 1] = a[s][r][c + 7];
3939                                    }
3940                                    fftRows.complexForward(t, startt);
3941                                    fftRows.complexForward(t, startt + 2 * rows);
3942                                    fftRows.complexForward(t, startt + 4 * rows);
3943                                    fftRows.complexForward(t, startt + 6 * rows);
3944                                    for (int r = 0; r < rows; r++) {
3945                                        idx2 = startt + 2 * r;
3946                                        idx3 = startt + 2 * rows + 2 * r;
3947                                        idx4 = idx3 + 2 * rows;
3948                                        idx5 = idx4 + 2 * rows;
3949                                        a[s][r][c] = t[idx2];
3950                                        a[s][r][c + 1] = t[idx2 + 1];
3951                                        a[s][r][c + 2] = t[idx3];
3952                                        a[s][r][c + 3] = t[idx3 + 1];
3953                                        a[s][r][c + 4] = t[idx4];
3954                                        a[s][r][c + 5] = t[idx4 + 1];
3955                                        a[s][r][c + 6] = t[idx5];
3956                                        a[s][r][c + 7] = t[idx5 + 1];
3957                                    }
3958                                }
3959                            } else if (columns == 4) {
3960                                for (int r = 0; r < rows; r++) {
3961                                    idx2 = startt + 2 * r;
3962                                    idx3 = startt + 2 * rows + 2 * r;
3963                                    t[idx2] = a[s][r][0];
3964                                    t[idx2 + 1] = a[s][r][1];
3965                                    t[idx3] = a[s][r][2];
3966                                    t[idx3 + 1] = a[s][r][3];
3967                                }
3968                                fftRows.complexForward(t, startt);
3969                                fftRows.complexForward(t, startt + 2 * rows);
3970                                for (int r = 0; r < rows; r++) {
3971                                    idx2 = startt + 2 * r;
3972                                    idx3 = startt + 2 * rows + 2 * r;
3973                                    a[s][r][0] = t[idx2];
3974                                    a[s][r][1] = t[idx2 + 1];
3975                                    a[s][r][2] = t[idx3];
3976                                    a[s][r][3] = t[idx3 + 1];
3977                                }
3978                            } else if (columns == 2) {
3979                                for (int r = 0; r < rows; r++) {
3980                                    idx2 = startt + 2 * r;
3981                                    t[idx2] = a[s][r][0];
3982                                    t[idx2 + 1] = a[s][r][1];
3983                                }
3984                                fftRows.complexForward(t, startt);
3985                                for (int r = 0; r < rows; r++) {
3986                                    idx2 = startt + 2 * r;
3987                                    a[s][r][0] = t[idx2];
3988                                    a[s][r][1] = t[idx2 + 1];
3989                                }
3990                            }
3991
3992                        }
3993                    } else {
3994                        for (int s = n0; s < slices; s += nthreads) {
3995                            if (icr == 0) {
3996                                for (int r = 0; r < rows; r++) {
3997                                    fftColumns.complexInverse(a[s][r], scale);
3998                                }
3999                            } else {
4000                                for (int r = 0; r < rows; r++) {
4001                                    fftColumns.realInverse2(a[s][r], 0, scale);
4002                                }
4003                            }
4004                            if (columns > 4) {
4005                                for (int c = 0; c < columns; c += 8) {
4006                                    for (int r = 0; r < rows; r++) {
4007                                        idx2 = startt + 2 * r;
4008                                        idx3 = startt + 2 * rows + 2 * r;
4009                                        idx4 = idx3 + 2 * rows;
4010                                        idx5 = idx4 + 2 * rows;
4011                                        t[idx2] = a[s][r][c];
4012                                        t[idx2 + 1] = a[s][r][c + 1];
4013                                        t[idx3] = a[s][r][c + 2];
4014                                        t[idx3 + 1] = a[s][r][c + 3];
4015                                        t[idx4] = a[s][r][c + 4];
4016                                        t[idx4 + 1] = a[s][r][c + 5];
4017                                        t[idx5] = a[s][r][c + 6];
4018                                        t[idx5 + 1] = a[s][r][c + 7];
4019                                    }
4020                                    fftRows.complexInverse(t, startt, scale);
4021                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
4022                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
4023                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
4024                                    for (int r = 0; r < rows; r++) {
4025                                        idx2 = startt + 2 * r;
4026                                        idx3 = startt + 2 * rows + 2 * r;
4027                                        idx4 = idx3 + 2 * rows;
4028                                        idx5 = idx4 + 2 * rows;
4029                                        a[s][r][c] = t[idx2];
4030                                        a[s][r][c + 1] = t[idx2 + 1];
4031                                        a[s][r][c + 2] = t[idx3];
4032                                        a[s][r][c + 3] = t[idx3 + 1];
4033                                        a[s][r][c + 4] = t[idx4];
4034                                        a[s][r][c + 5] = t[idx4 + 1];
4035                                        a[s][r][c + 6] = t[idx5];
4036                                        a[s][r][c + 7] = t[idx5 + 1];
4037                                    }
4038                                }
4039                            } else if (columns == 4) {
4040                                for (int r = 0; r < rows; r++) {
4041                                    idx2 = startt + 2 * r;
4042                                    idx3 = startt + 2 * rows + 2 * r;
4043                                    t[idx2] = a[s][r][0];
4044                                    t[idx2 + 1] = a[s][r][1];
4045                                    t[idx3] = a[s][r][2];
4046                                    t[idx3 + 1] = a[s][r][3];
4047                                }
4048                                fftRows.complexInverse(t, startt, scale);
4049                                fftRows.complexInverse(t, startt + 2 * rows, scale);
4050                                for (int r = 0; r < rows; r++) {
4051                                    idx2 = startt + 2 * r;
4052                                    idx3 = startt + 2 * rows + 2 * r;
4053                                    a[s][r][0] = t[idx2];
4054                                    a[s][r][1] = t[idx2 + 1];
4055                                    a[s][r][2] = t[idx3];
4056                                    a[s][r][3] = t[idx3 + 1];
4057                                }
4058                            } else if (columns == 2) {
4059                                for (int r = 0; r < rows; r++) {
4060                                    idx2 = startt + 2 * r;
4061                                    t[idx2] = a[s][r][0];
4062                                    t[idx2 + 1] = a[s][r][1];
4063                                }
4064                                fftRows.complexInverse(t, startt, scale);
4065                                for (int r = 0; r < rows; r++) {
4066                                    idx2 = startt + 2 * r;
4067                                    a[s][r][0] = t[idx2];
4068                                    a[s][r][1] = t[idx2 + 1];
4069                                }
4070                            }
4071                        }
4072                    }
4073                }
4074            });
4075        }
4076        ConcurrencyUtils.waitForCompletion(futures);
4077    }
4078
4079    private void cdft3db_subth(final int isgn, final float[] a, final boolean scale) {
4080        int nt, i;
4081
4082        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), rows);
4083        nt = 8 * slices;
4084        if (columns == 4) {
4085            nt >>= 1;
4086        } else if (columns < 4) {
4087            nt >>= 2;
4088        }
4089        Future<?>[] futures = new Future[nthreads];
4090        for (i = 0; i < nthreads; i++) {
4091            final int n0 = i;
4092            final int startt = nt * i;
4093            futures[i] = ConcurrencyUtils.submit(new Runnable() {
4094
4095                @Override
4096                                public void run() {
4097                    int idx0, idx1, idx2, idx3, idx4, idx5;
4098
4099                    if (isgn == -1) {
4100                        if (columns > 4) {
4101                            for (int r = n0; r < rows; r += nthreads) {
4102                                idx0 = r * rowStride;
4103                                for (int c = 0; c < columns; c += 8) {
4104                                    for (int s = 0; s < slices; s++) {
4105                                        idx1 = s * sliceStride + idx0 + c;
4106                                        idx2 = startt + 2 * s;
4107                                        idx3 = startt + 2 * slices + 2 * s;
4108                                        idx4 = idx3 + 2 * slices;
4109                                        idx5 = idx4 + 2 * slices;
4110                                        t[idx2] = a[idx1];
4111                                        t[idx2 + 1] = a[idx1 + 1];
4112                                        t[idx3] = a[idx1 + 2];
4113                                        t[idx3 + 1] = a[idx1 + 3];
4114                                        t[idx4] = a[idx1 + 4];
4115                                        t[idx4 + 1] = a[idx1 + 5];
4116                                        t[idx5] = a[idx1 + 6];
4117                                        t[idx5 + 1] = a[idx1 + 7];
4118                                    }
4119                                    fftSlices.complexForward(t, startt);
4120                                    fftSlices.complexForward(t, startt + 2 * slices);
4121                                    fftSlices.complexForward(t, startt + 4 * slices);
4122                                    fftSlices.complexForward(t, startt + 6 * slices);
4123                                    for (int s = 0; s < slices; s++) {
4124                                        idx1 = s * sliceStride + idx0 + c;
4125                                        idx2 = startt + 2 * s;
4126                                        idx3 = startt + 2 * slices + 2 * s;
4127                                        idx4 = idx3 + 2 * slices;
4128                                        idx5 = idx4 + 2 * slices;
4129                                        a[idx1] = t[idx2];
4130                                        a[idx1 + 1] = t[idx2 + 1];
4131                                        a[idx1 + 2] = t[idx3];
4132                                        a[idx1 + 3] = t[idx3 + 1];
4133                                        a[idx1 + 4] = t[idx4];
4134                                        a[idx1 + 5] = t[idx4 + 1];
4135                                        a[idx1 + 6] = t[idx5];
4136                                        a[idx1 + 7] = t[idx5 + 1];
4137                                    }
4138                                }
4139                            }
4140                        } else if (columns == 4) {
4141                            for (int r = n0; r < rows; r += nthreads) {
4142                                idx0 = r * rowStride;
4143                                for (int s = 0; s < slices; s++) {
4144                                    idx1 = s * sliceStride + idx0;
4145                                    idx2 = startt + 2 * s;
4146                                    idx3 = startt + 2 * slices + 2 * s;
4147                                    t[idx2] = a[idx1];
4148                                    t[idx2 + 1] = a[idx1 + 1];
4149                                    t[idx3] = a[idx1 + 2];
4150                                    t[idx3 + 1] = a[idx1 + 3];
4151                                }
4152                                fftSlices.complexForward(t, startt);
4153                                fftSlices.complexForward(t, startt + 2 * slices);
4154                                for (int s = 0; s < slices; s++) {
4155                                    idx1 = s * sliceStride + idx0;
4156                                    idx2 = startt + 2 * s;
4157                                    idx3 = startt + 2 * slices + 2 * s;
4158                                    a[idx1] = t[idx2];
4159                                    a[idx1 + 1] = t[idx2 + 1];
4160                                    a[idx1 + 2] = t[idx3];
4161                                    a[idx1 + 3] = t[idx3 + 1];
4162                                }
4163                            }
4164                        } else if (columns == 2) {
4165                            for (int r = n0; r < rows; r += nthreads) {
4166                                idx0 = r * rowStride;
4167                                for (int s = 0; s < slices; s++) {
4168                                    idx1 = s * sliceStride + idx0;
4169                                    idx2 = startt + 2 * s;
4170                                    t[idx2] = a[idx1];
4171                                    t[idx2 + 1] = a[idx1 + 1];
4172                                }
4173                                fftSlices.complexForward(t, startt);
4174                                for (int s = 0; s < slices; s++) {
4175                                    idx1 = s * sliceStride + idx0;
4176                                    idx2 = startt + 2 * s;
4177                                    a[idx1] = t[idx2];
4178                                    a[idx1 + 1] = t[idx2 + 1];
4179                                }
4180                            }
4181                        }
4182                    } else {
4183                        if (columns > 4) {
4184                            for (int r = n0; r < rows; r += nthreads) {
4185                                idx0 = r * rowStride;
4186                                for (int c = 0; c < columns; c += 8) {
4187                                    for (int s = 0; s < slices; s++) {
4188                                        idx1 = s * sliceStride + idx0 + c;
4189                                        idx2 = startt + 2 * s;
4190                                        idx3 = startt + 2 * slices + 2 * s;
4191                                        idx4 = idx3 + 2 * slices;
4192                                        idx5 = idx4 + 2 * slices;
4193                                        t[idx2] = a[idx1];
4194                                        t[idx2 + 1] = a[idx1 + 1];
4195                                        t[idx3] = a[idx1 + 2];
4196                                        t[idx3 + 1] = a[idx1 + 3];
4197                                        t[idx4] = a[idx1 + 4];
4198                                        t[idx4 + 1] = a[idx1 + 5];
4199                                        t[idx5] = a[idx1 + 6];
4200                                        t[idx5 + 1] = a[idx1 + 7];
4201                                    }
4202                                    fftSlices.complexInverse(t, startt, scale);
4203                                    fftSlices.complexInverse(t, startt + 2 * slices, scale);
4204                                    fftSlices.complexInverse(t, startt + 4 * slices, scale);
4205                                    fftSlices.complexInverse(t, startt + 6 * slices, scale);
4206                                    for (int s = 0; s < slices; s++) {
4207                                        idx1 = s * sliceStride + idx0 + c;
4208                                        idx2 = startt + 2 * s;
4209                                        idx3 = startt + 2 * slices + 2 * s;
4210                                        idx4 = idx3 + 2 * slices;
4211                                        idx5 = idx4 + 2 * slices;
4212                                        a[idx1] = t[idx2];
4213                                        a[idx1 + 1] = t[idx2 + 1];
4214                                        a[idx1 + 2] = t[idx3];
4215                                        a[idx1 + 3] = t[idx3 + 1];
4216                                        a[idx1 + 4] = t[idx4];
4217                                        a[idx1 + 5] = t[idx4 + 1];
4218                                        a[idx1 + 6] = t[idx5];
4219                                        a[idx1 + 7] = t[idx5 + 1];
4220                                    }
4221                                }
4222                            }
4223                        } else if (columns == 4) {
4224                            for (int r = n0; r < rows; r += nthreads) {
4225                                idx0 = r * rowStride;
4226                                for (int s = 0; s < slices; s++) {
4227                                    idx1 = s * sliceStride + idx0;
4228                                    idx2 = startt + 2 * s;
4229                                    idx3 = startt + 2 * slices + 2 * s;
4230                                    t[idx2] = a[idx1];
4231                                    t[idx2 + 1] = a[idx1 + 1];
4232                                    t[idx3] = a[idx1 + 2];
4233                                    t[idx3 + 1] = a[idx1 + 3];
4234                                }
4235                                fftSlices.complexInverse(t, startt, scale);
4236                                fftSlices.complexInverse(t, startt + 2 * slices, scale);
4237                                for (int s = 0; s < slices; s++) {
4238                                    idx1 = s * sliceStride + idx0;
4239                                    idx2 = startt + 2 * s;
4240                                    idx3 = startt + 2 * slices + 2 * s;
4241                                    a[idx1] = t[idx2];
4242                                    a[idx1 + 1] = t[idx2 + 1];
4243                                    a[idx1 + 2] = t[idx3];
4244                                    a[idx1 + 3] = t[idx3 + 1];
4245                                }
4246                            }
4247                        } else if (columns == 2) {
4248                            for (int r = n0; r < rows; r += nthreads) {
4249                                idx0 = r * rowStride;
4250                                for (int s = 0; s < slices; s++) {
4251                                    idx1 = s * sliceStride + idx0;
4252                                    idx2 = startt + 2 * s;
4253                                    t[idx2] = a[idx1];
4254                                    t[idx2 + 1] = a[idx1 + 1];
4255                                }
4256                                fftSlices.complexInverse(t, startt, scale);
4257                                for (int s = 0; s < slices; s++) {
4258                                    idx1 = s * sliceStride + idx0;
4259                                    idx2 = startt + 2 * s;
4260                                    a[idx1] = t[idx2];
4261                                    a[idx1 + 1] = t[idx2 + 1];
4262                                }
4263                            }
4264                        }
4265                    }
4266
4267                }
4268            });
4269        }
4270        ConcurrencyUtils.waitForCompletion(futures);
4271    }
4272
4273    private void cdft3db_subth(final int isgn, final float[][][] a, final boolean scale) {
4274        int nt, i;
4275
4276        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), rows);
4277        nt = 8 * slices;
4278        if (columns == 4) {
4279            nt >>= 1;
4280        } else if (columns < 4) {
4281            nt >>= 2;
4282        }
4283        Future<?>[] futures = new Future[nthreads];
4284        for (i = 0; i < nthreads; i++) {
4285            final int n0 = i;
4286            final int startt = nt * i;
4287            futures[i] = ConcurrencyUtils.submit(new Runnable() {
4288
4289                @Override
4290                                public void run() {
4291                    int idx2, idx3, idx4, idx5;
4292
4293                    if (isgn == -1) {
4294                        if (columns > 4) {
4295                            for (int r = n0; r < rows; r += nthreads) {
4296                                for (int c = 0; c < columns; c += 8) {
4297                                    for (int s = 0; s < slices; s++) {
4298                                        idx2 = startt + 2 * s;
4299                                        idx3 = startt + 2 * slices + 2 * s;
4300                                        idx4 = idx3 + 2 * slices;
4301                                        idx5 = idx4 + 2 * slices;
4302                                        t[idx2] = a[s][r][c];
4303                                        t[idx2 + 1] = a[s][r][c + 1];
4304                                        t[idx3] = a[s][r][c + 2];
4305                                        t[idx3 + 1] = a[s][r][c + 3];
4306                                        t[idx4] = a[s][r][c + 4];
4307                                        t[idx4 + 1] = a[s][r][c + 5];
4308                                        t[idx5] = a[s][r][c + 6];
4309                                        t[idx5 + 1] = a[s][r][c + 7];
4310                                    }
4311                                    fftSlices.complexForward(t, startt);
4312                                    fftSlices.complexForward(t, startt + 2 * slices);
4313                                    fftSlices.complexForward(t, startt + 4 * slices);
4314                                    fftSlices.complexForward(t, startt + 6 * slices);
4315                                    for (int s = 0; s < slices; s++) {
4316                                        idx2 = startt + 2 * s;
4317                                        idx3 = startt + 2 * slices + 2 * s;
4318                                        idx4 = idx3 + 2 * slices;
4319                                        idx5 = idx4 + 2 * slices;
4320                                        a[s][r][c] = t[idx2];
4321                                        a[s][r][c + 1] = t[idx2 + 1];
4322                                        a[s][r][c + 2] = t[idx3];
4323                                        a[s][r][c + 3] = t[idx3 + 1];
4324                                        a[s][r][c + 4] = t[idx4];
4325                                        a[s][r][c + 5] = t[idx4 + 1];
4326                                        a[s][r][c + 6] = t[idx5];
4327                                        a[s][r][c + 7] = t[idx5 + 1];
4328                                    }
4329                                }
4330                            }
4331                        } else if (columns == 4) {
4332                            for (int r = n0; r < rows; r += nthreads) {
4333                                for (int s = 0; s < slices; s++) {
4334                                    idx2 = startt + 2 * s;
4335                                    idx3 = startt + 2 * slices + 2 * s;
4336                                    t[idx2] = a[s][r][0];
4337                                    t[idx2 + 1] = a[s][r][1];
4338                                    t[idx3] = a[s][r][2];
4339                                    t[idx3 + 1] = a[s][r][3];
4340                                }
4341                                fftSlices.complexForward(t, startt);
4342                                fftSlices.complexForward(t, startt + 2 * slices);
4343                                for (int s = 0; s < slices; s++) {
4344                                    idx2 = startt + 2 * s;
4345                                    idx3 = startt + 2 * slices + 2 * s;
4346                                    a[s][r][0] = t[idx2];
4347                                    a[s][r][1] = t[idx2 + 1];
4348                                    a[s][r][2] = t[idx3];
4349                                    a[s][r][3] = t[idx3 + 1];
4350                                }
4351                            }
4352                        } else if (columns == 2) {
4353                            for (int r = n0; r < rows; r += nthreads) {
4354                                for (int s = 0; s < slices; s++) {
4355                                    idx2 = startt + 2 * s;
4356                                    t[idx2] = a[s][r][0];
4357                                    t[idx2 + 1] = a[s][r][1];
4358                                }
4359                                fftSlices.complexForward(t, startt);
4360                                for (int s = 0; s < slices; s++) {
4361                                    idx2 = startt + 2 * s;
4362                                    a[s][r][0] = t[idx2];
4363                                    a[s][r][1] = t[idx2 + 1];
4364                                }
4365                            }
4366                        }
4367                    } else {
4368                        if (columns > 4) {
4369                            for (int r = n0; r < rows; r += nthreads) {
4370                                for (int c = 0; c < columns; c += 8) {
4371                                    for (int s = 0; s < slices; s++) {
4372                                        idx2 = startt + 2 * s;
4373                                        idx3 = startt + 2 * slices + 2 * s;
4374                                        idx4 = idx3 + 2 * slices;
4375                                        idx5 = idx4 + 2 * slices;
4376                                        t[idx2] = a[s][r][c];
4377                                        t[idx2 + 1] = a[s][r][c + 1];
4378                                        t[idx3] = a[s][r][c + 2];
4379                                        t[idx3 + 1] = a[s][r][c + 3];
4380                                        t[idx4] = a[s][r][c + 4];
4381                                        t[idx4 + 1] = a[s][r][c + 5];
4382                                        t[idx5] = a[s][r][c + 6];
4383                                        t[idx5 + 1] = a[s][r][c + 7];
4384                                    }
4385                                    fftSlices.complexInverse(t, startt, scale);
4386                                    fftSlices.complexInverse(t, startt + 2 * slices, scale);
4387                                    fftSlices.complexInverse(t, startt + 4 * slices, scale);
4388                                    fftSlices.complexInverse(t, startt + 6 * slices, scale);
4389                                    for (int s = 0; s < slices; s++) {
4390                                        idx2 = startt + 2 * s;
4391                                        idx3 = startt + 2 * slices + 2 * s;
4392                                        idx4 = idx3 + 2 * slices;
4393                                        idx5 = idx4 + 2 * slices;
4394                                        a[s][r][c] = t[idx2];
4395                                        a[s][r][c + 1] = t[idx2 + 1];
4396                                        a[s][r][c + 2] = t[idx3];
4397                                        a[s][r][c + 3] = t[idx3 + 1];
4398                                        a[s][r][c + 4] = t[idx4];
4399                                        a[s][r][c + 5] = t[idx4 + 1];
4400                                        a[s][r][c + 6] = t[idx5];
4401                                        a[s][r][c + 7] = t[idx5 + 1];
4402                                    }
4403                                }
4404                            }
4405                        } else if (columns == 4) {
4406                            for (int r = n0; r < rows; r += nthreads) {
4407                                for (int s = 0; s < slices; s++) {
4408                                    idx2 = startt + 2 * s;
4409                                    idx3 = startt + 2 * slices + 2 * s;
4410                                    t[idx2] = a[s][r][0];
4411                                    t[idx2 + 1] = a[s][r][1];
4412                                    t[idx3] = a[s][r][2];
4413                                    t[idx3 + 1] = a[s][r][3];
4414                                }
4415                                fftSlices.complexInverse(t, startt, scale);
4416                                fftSlices.complexInverse(t, startt + 2 * slices, scale);
4417                                for (int s = 0; s < slices; s++) {
4418                                    idx2 = startt + 2 * s;
4419                                    idx3 = startt + 2 * slices + 2 * s;
4420                                    a[s][r][0] = t[idx2];
4421                                    a[s][r][1] = t[idx2 + 1];
4422                                    a[s][r][2] = t[idx3];
4423                                    a[s][r][3] = t[idx3 + 1];
4424                                }
4425                            }
4426                        } else if (columns == 2) {
4427                            for (int r = n0; r < rows; r += nthreads) {
4428                                for (int s = 0; s < slices; s++) {
4429                                    idx2 = startt + 2 * s;
4430                                    t[idx2] = a[s][r][0];
4431                                    t[idx2 + 1] = a[s][r][1];
4432                                }
4433                                fftSlices.complexInverse(t, startt, scale);
4434                                for (int s = 0; s < slices; s++) {
4435                                    idx2 = startt + 2 * s;
4436                                    a[s][r][0] = t[idx2];
4437                                    a[s][r][1] = t[idx2 + 1];
4438                                }
4439                            }
4440                        }
4441                    }
4442
4443                }
4444            });
4445        }
4446        ConcurrencyUtils.waitForCompletion(futures);
4447    }
4448
4449    private void rdft3d_sub(int isgn, float[] a) {
4450        int n1h, n2h, i, j, k, l, idx1, idx2, idx3, idx4;
4451        float xi;
4452
4453        n1h = slices >> 1;
4454        n2h = rows >> 1;
4455        if (isgn < 0) {
4456            for (i = 1; i < n1h; i++) {
4457                j = slices - i;
4458                idx1 = i * sliceStride;
4459                idx2 = j * sliceStride;
4460                idx3 = i * sliceStride + n2h * rowStride;
4461                idx4 = j * sliceStride + n2h * rowStride;
4462                xi = a[idx1] - a[idx2];
4463                a[idx1] += a[idx2];
4464                a[idx2] = xi;
4465                xi = a[idx2 + 1] - a[idx1 + 1];
4466                a[idx1 + 1] += a[idx2 + 1];
4467                a[idx2 + 1] = xi;
4468                xi = a[idx3] - a[idx4];
4469                a[idx3] += a[idx4];
4470                a[idx4] = xi;
4471                xi = a[idx4 + 1] - a[idx3 + 1];
4472                a[idx3 + 1] += a[idx4 + 1];
4473                a[idx4 + 1] = xi;
4474                for (k = 1; k < n2h; k++) {
4475                    l = rows - k;
4476                    idx1 = i * sliceStride + k * rowStride;
4477                    idx2 = j * sliceStride + l * rowStride;
4478                    xi = a[idx1] - a[idx2];
4479                    a[idx1] += a[idx2];
4480                    a[idx2] = xi;
4481                    xi = a[idx2 + 1] - a[idx1 + 1];
4482                    a[idx1 + 1] += a[idx2 + 1];
4483                    a[idx2 + 1] = xi;
4484                    idx3 = j * sliceStride + k * rowStride;
4485                    idx4 = i * sliceStride + l * rowStride;
4486                    xi = a[idx3] - a[idx4];
4487                    a[idx3] += a[idx4];
4488                    a[idx4] = xi;
4489                    xi = a[idx4 + 1] - a[idx3 + 1];
4490                    a[idx3 + 1] += a[idx4 + 1];
4491                    a[idx4 + 1] = xi;
4492                }
4493            }
4494            for (k = 1; k < n2h; k++) {
4495                l = rows - k;
4496                idx1 = k * rowStride;
4497                idx2 = l * rowStride;
4498                xi = a[idx1] - a[idx2];
4499                a[idx1] += a[idx2];
4500                a[idx2] = xi;
4501                xi = a[idx2 + 1] - a[idx1 + 1];
4502                a[idx1 + 1] += a[idx2 + 1];
4503                a[idx2 + 1] = xi;
4504                idx3 = n1h * sliceStride + k * rowStride;
4505                idx4 = n1h * sliceStride + l * rowStride;
4506                xi = a[idx3] - a[idx4];
4507                a[idx3] += a[idx4];
4508                a[idx4] = xi;
4509                xi = a[idx4 + 1] - a[idx3 + 1];
4510                a[idx3 + 1] += a[idx4 + 1];
4511                a[idx4 + 1] = xi;
4512            }
4513        } else {
4514            for (i = 1; i < n1h; i++) {
4515                j = slices - i;
4516                idx1 = j * sliceStride;
4517                idx2 = i * sliceStride;
4518                a[idx1] = 0.5f * (a[idx2] - a[idx1]);
4519                a[idx2] -= a[idx1];
4520                a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]);
4521                a[idx2 + 1] -= a[idx1 + 1];
4522                idx3 = j * sliceStride + n2h * rowStride;
4523                idx4 = i * sliceStride + n2h * rowStride;
4524                a[idx3] = 0.5f * (a[idx4] - a[idx3]);
4525                a[idx4] -= a[idx3];
4526                a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]);
4527                a[idx4 + 1] -= a[idx3 + 1];
4528                for (k = 1; k < n2h; k++) {
4529                    l = rows - k;
4530                    idx1 = j * sliceStride + l * rowStride;
4531                    idx2 = i * sliceStride + k * rowStride;
4532                    a[idx1] = 0.5f * (a[idx2] - a[idx1]);
4533                    a[idx2] -= a[idx1];
4534                    a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]);
4535                    a[idx2 + 1] -= a[idx1 + 1];
4536                    idx3 = i * sliceStride + l * rowStride;
4537                    idx4 = j * sliceStride + k * rowStride;
4538                    a[idx3] = 0.5f * (a[idx4] - a[idx3]);
4539                    a[idx4] -= a[idx3];
4540                    a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]);
4541                    a[idx4 + 1] -= a[idx3 + 1];
4542                }
4543            }
4544            for (k = 1; k < n2h; k++) {
4545                l = rows - k;
4546                idx1 = l * rowStride;
4547                idx2 = k * rowStride;
4548                a[idx1] = 0.5f * (a[idx2] - a[idx1]);
4549                a[idx2] -= a[idx1];
4550                a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]);
4551                a[idx2 + 1] -= a[idx1 + 1];
4552                idx3 = n1h * sliceStride + l * rowStride;
4553                idx4 = n1h * sliceStride + k * rowStride;
4554                a[idx3] = 0.5f * (a[idx4] - a[idx3]);
4555                a[idx4] -= a[idx3];
4556                a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]);
4557                a[idx4 + 1] -= a[idx3 + 1];
4558            }
4559        }
4560    }
4561
4562    private void rdft3d_sub(int isgn, float[][][] a) {
4563        int n1h, n2h, i, j, k, l;
4564        float xi;
4565
4566        n1h = slices >> 1;
4567        n2h = rows >> 1;
4568        if (isgn < 0) {
4569            for (i = 1; i < n1h; i++) {
4570                j = slices - i;
4571                xi = a[i][0][0] - a[j][0][0];
4572                a[i][0][0] += a[j][0][0];
4573                a[j][0][0] = xi;
4574                xi = a[j][0][1] - a[i][0][1];
4575                a[i][0][1] += a[j][0][1];
4576                a[j][0][1] = xi;
4577                xi = a[i][n2h][0] - a[j][n2h][0];
4578                a[i][n2h][0] += a[j][n2h][0];
4579                a[j][n2h][0] = xi;
4580                xi = a[j][n2h][1] - a[i][n2h][1];
4581                a[i][n2h][1] += a[j][n2h][1];
4582                a[j][n2h][1] = xi;
4583                for (k = 1; k < n2h; k++) {
4584                    l = rows - k;
4585                    xi = a[i][k][0] - a[j][l][0];
4586                    a[i][k][0] += a[j][l][0];
4587                    a[j][l][0] = xi;
4588                    xi = a[j][l][1] - a[i][k][1];
4589                    a[i][k][1] += a[j][l][1];
4590                    a[j][l][1] = xi;
4591                    xi = a[j][k][0] - a[i][l][0];
4592                    a[j][k][0] += a[i][l][0];
4593                    a[i][l][0] = xi;
4594                    xi = a[i][l][1] - a[j][k][1];
4595                    a[j][k][1] += a[i][l][1];
4596                    a[i][l][1] = xi;
4597                }
4598            }
4599            for (k = 1; k < n2h; k++) {
4600                l = rows - k;
4601                xi = a[0][k][0] - a[0][l][0];
4602                a[0][k][0] += a[0][l][0];
4603                a[0][l][0] = xi;
4604                xi = a[0][l][1] - a[0][k][1];
4605                a[0][k][1] += a[0][l][1];
4606                a[0][l][1] = xi;
4607                xi = a[n1h][k][0] - a[n1h][l][0];
4608                a[n1h][k][0] += a[n1h][l][0];
4609                a[n1h][l][0] = xi;
4610                xi = a[n1h][l][1] - a[n1h][k][1];
4611                a[n1h][k][1] += a[n1h][l][1];
4612                a[n1h][l][1] = xi;
4613            }
4614        } else {
4615            for (i = 1; i < n1h; i++) {
4616                j = slices - i;
4617                a[j][0][0] = 0.5f * (a[i][0][0] - a[j][0][0]);
4618                a[i][0][0] -= a[j][0][0];
4619                a[j][0][1] = 0.5f * (a[i][0][1] + a[j][0][1]);
4620                a[i][0][1] -= a[j][0][1];
4621                a[j][n2h][0] = 0.5f * (a[i][n2h][0] - a[j][n2h][0]);
4622                a[i][n2h][0] -= a[j][n2h][0];
4623                a[j][n2h][1] = 0.5f * (a[i][n2h][1] + a[j][n2h][1]);
4624                a[i][n2h][1] -= a[j][n2h][1];
4625                for (k = 1; k < n2h; k++) {
4626                    l = rows - k;
4627                    a[j][l][0] = 0.5f * (a[i][k][0] - a[j][l][0]);
4628                    a[i][k][0] -= a[j][l][0];
4629                    a[j][l][1] = 0.5f * (a[i][k][1] + a[j][l][1]);
4630                    a[i][k][1] -= a[j][l][1];
4631                    a[i][l][0] = 0.5f * (a[j][k][0] - a[i][l][0]);
4632                    a[j][k][0] -= a[i][l][0];
4633                    a[i][l][1] = 0.5f * (a[j][k][1] + a[i][l][1]);
4634                    a[j][k][1] -= a[i][l][1];
4635                }
4636            }
4637            for (k = 1; k < n2h; k++) {
4638                l = rows - k;
4639                a[0][l][0] = 0.5f * (a[0][k][0] - a[0][l][0]);
4640                a[0][k][0] -= a[0][l][0];
4641                a[0][l][1] = 0.5f * (a[0][k][1] + a[0][l][1]);
4642                a[0][k][1] -= a[0][l][1];
4643                a[n1h][l][0] = 0.5f * (a[n1h][k][0] - a[n1h][l][0]);
4644                a[n1h][k][0] -= a[n1h][l][0];
4645                a[n1h][l][1] = 0.5f * (a[n1h][k][1] + a[n1h][l][1]);
4646                a[n1h][k][1] -= a[n1h][l][1];
4647            }
4648        }
4649    }
4650
4651    private void fillSymmetric(final float[][][] a) {
4652        final int twon3 = 2 * columns;
4653        final int n2d2 = rows / 2;
4654        int n1d2 = slices / 2;
4655        int nthreads = ConcurrencyUtils.getNumberOfThreads();
4656        if ((nthreads > 1) && useThreads && (slices >= nthreads)) {
4657            Future<?>[] futures = new Future[nthreads];
4658            int p = slices / nthreads;
4659            for (int l = 0; l < nthreads; l++) {
4660                final int firstSlice = l * p;
4661                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4662                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4663                    @Override
4664                                        public void run() {
4665                        for (int s = firstSlice; s < lastSlice; s++) {
4666                            int idx1 = (slices - s) % slices;
4667                            for (int r = 0; r < rows; r++) {
4668                                int idx2 = (rows - r) % rows;
4669                                for (int c = 1; c < columns; c += 2) {
4670                                    int idx3 = twon3 - c;
4671                                    a[idx1][idx2][idx3] = -a[s][r][c + 2];
4672                                    a[idx1][idx2][idx3 - 1] = a[s][r][c + 1];
4673                                }
4674                            }
4675                        }
4676                    }
4677                });
4678            }
4679            ConcurrencyUtils.waitForCompletion(futures);
4680
4681            // ---------------------------------------------
4682
4683            for (int l = 0; l < nthreads; l++) {
4684                final int firstSlice = l * p;
4685                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4686                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4687                    @Override
4688                                        public void run() {
4689                        for (int s = firstSlice; s < lastSlice; s++) {
4690                            int idx1 = (slices - s) % slices;
4691                            for (int r = 1; r < n2d2; r++) {
4692                                int idx2 = rows - r;
4693                                a[idx1][r][columns] = a[s][idx2][1];
4694                                a[s][idx2][columns] = a[s][idx2][1];
4695                                a[idx1][r][columns + 1] = -a[s][idx2][0];
4696                                a[s][idx2][columns + 1] = a[s][idx2][0];
4697                            }
4698                        }
4699                    }
4700                });
4701            }
4702            ConcurrencyUtils.waitForCompletion(futures);
4703
4704            for (int l = 0; l < nthreads; l++) {
4705                final int firstSlice = l * p;
4706                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4707                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4708                    @Override
4709                                        public void run() {
4710                        for (int s = firstSlice; s < lastSlice; s++) {
4711                            int idx1 = (slices - s) % slices;
4712                            for (int r = 1; r < n2d2; r++) {
4713                                int idx2 = rows - r;
4714                                a[idx1][idx2][0] = a[s][r][0];
4715                                a[idx1][idx2][1] = -a[s][r][1];
4716                            }
4717                        }
4718                    }
4719                });
4720            }
4721            ConcurrencyUtils.waitForCompletion(futures);
4722
4723        } else {
4724
4725            for (int s = 0; s < slices; s++) {
4726                int idx1 = (slices - s) % slices;
4727                for (int r = 0; r < rows; r++) {
4728                    int idx2 = (rows - r) % rows;
4729                    for (int c = 1; c < columns; c += 2) {
4730                        int idx3 = twon3 - c;
4731                        a[idx1][idx2][idx3] = -a[s][r][c + 2];
4732                        a[idx1][idx2][idx3 - 1] = a[s][r][c + 1];
4733                    }
4734                }
4735            }
4736
4737            // ---------------------------------------------
4738
4739            for (int s = 0; s < slices; s++) {
4740                int idx1 = (slices - s) % slices;
4741                for (int r = 1; r < n2d2; r++) {
4742                    int idx2 = rows - r;
4743                    a[idx1][r][columns] = a[s][idx2][1];
4744                    a[s][idx2][columns] = a[s][idx2][1];
4745                    a[idx1][r][columns + 1] = -a[s][idx2][0];
4746                    a[s][idx2][columns + 1] = a[s][idx2][0];
4747                }
4748            }
4749
4750            for (int s = 0; s < slices; s++) {
4751                int idx1 = (slices - s) % slices;
4752                for (int r = 1; r < n2d2; r++) {
4753                    int idx2 = rows - r;
4754                    a[idx1][idx2][0] = a[s][r][0];
4755                    a[idx1][idx2][1] = -a[s][r][1];
4756                }
4757            }
4758        }
4759
4760        // ----------------------------------------------------------
4761
4762        for (int s = 1; s < n1d2; s++) {
4763            int idx1 = slices - s;
4764            a[s][0][columns] = a[idx1][0][1];
4765            a[idx1][0][columns] = a[idx1][0][1];
4766            a[s][0][columns + 1] = -a[idx1][0][0];
4767            a[idx1][0][columns + 1] = a[idx1][0][0];
4768            a[s][n2d2][columns] = a[idx1][n2d2][1];
4769            a[idx1][n2d2][columns] = a[idx1][n2d2][1];
4770            a[s][n2d2][columns + 1] = -a[idx1][n2d2][0];
4771            a[idx1][n2d2][columns + 1] = a[idx1][n2d2][0];
4772            a[idx1][0][0] = a[s][0][0];
4773            a[idx1][0][1] = -a[s][0][1];
4774            a[idx1][n2d2][0] = a[s][n2d2][0];
4775            a[idx1][n2d2][1] = -a[s][n2d2][1];
4776
4777        }
4778        // ----------------------------------------
4779
4780        a[0][0][columns] = a[0][0][1];
4781        a[0][0][1] = 0;
4782        a[0][n2d2][columns] = a[0][n2d2][1];
4783        a[0][n2d2][1] = 0;
4784        a[n1d2][0][columns] = a[n1d2][0][1];
4785        a[n1d2][0][1] = 0;
4786        a[n1d2][n2d2][columns] = a[n1d2][n2d2][1];
4787        a[n1d2][n2d2][1] = 0;
4788        a[n1d2][0][columns + 1] = 0;
4789        a[n1d2][n2d2][columns + 1] = 0;
4790    }
4791
4792    private void fillSymmetric(final float[] a) {
4793        final int twon3 = 2 * columns;
4794        final int n2d2 = rows / 2;
4795        int n1d2 = slices / 2;
4796
4797        final int twoSliceStride = rows * twon3;
4798        final int twoRowStride = twon3;
4799
4800        int idx1, idx2, idx3, idx4, idx5, idx6;
4801
4802        for (int s = (slices - 1); s >= 1; s--) {
4803            idx3 = s * sliceStride;
4804            idx4 = 2 * idx3;
4805            for (int r = 0; r < rows; r++) {
4806                idx5 = r * rowStride;
4807                idx6 = 2 * idx5;
4808                for (int c = 0; c < columns; c += 2) {
4809                    idx1 = idx3 + idx5 + c;
4810                    idx2 = idx4 + idx6 + c;
4811                    a[idx2] = a[idx1];
4812                    a[idx1] = 0;
4813                    idx1++;
4814                    idx2++;
4815                    a[idx2] = a[idx1];
4816                    a[idx1] = 0;
4817                }
4818            }
4819        }
4820
4821        for (int r = 1; r < rows; r++) {
4822            idx3 = (rows - r) * rowStride;
4823            idx4 = (rows - r) * twoRowStride;
4824            for (int c = 0; c < columns; c += 2) {
4825                idx1 = idx3 + c;
4826                idx2 = idx4 + c;
4827                a[idx2] = a[idx1];
4828                a[idx1] = 0;
4829                idx1++;
4830                idx2++;
4831                a[idx2] = a[idx1];
4832                a[idx1] = 0;
4833            }
4834        }
4835
4836        int nthreads = ConcurrencyUtils.getNumberOfThreads();
4837        if ((nthreads > 1) && useThreads && (slices >= nthreads)) {
4838            Future<?>[] futures = new Future[nthreads];
4839            int p = slices / nthreads;
4840            for (int l = 0; l < nthreads; l++) {
4841                final int firstSlice = l * p;
4842                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4843                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4844                    @Override
4845                                        public void run() {
4846                        for (int s = firstSlice; s < lastSlice; s++) {
4847                            int idx3 = ((slices - s) % slices) * twoSliceStride;
4848                            int idx5 = s * twoSliceStride;
4849                            for (int r = 0; r < rows; r++) {
4850                                int idx4 = ((rows - r) % rows) * twoRowStride;
4851                                int idx6 = r * twoRowStride;
4852                                for (int c = 1; c < columns; c += 2) {
4853                                    int idx1 = idx3 + idx4 + twon3 - c;
4854                                    int idx2 = idx5 + idx6 + c;
4855                                    a[idx1] = -a[idx2 + 2];
4856                                    a[idx1 - 1] = a[idx2 + 1];
4857                                }
4858                            }
4859                        }
4860                    }
4861                });
4862            }
4863            ConcurrencyUtils.waitForCompletion(futures);
4864
4865            // ---------------------------------------------
4866
4867            for (int l = 0; l < nthreads; l++) {
4868                final int firstSlice = l * p;
4869                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4870                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4871                    @Override
4872                                        public void run() {
4873                        for (int s = firstSlice; s < lastSlice; s++) {
4874                            int idx5 = ((slices - s) % slices) * twoSliceStride;
4875                            int idx6 = s * twoSliceStride;
4876                            for (int r = 1; r < n2d2; r++) {
4877                                int idx4 = idx6 + (rows - r) * twoRowStride;
4878                                int idx1 = idx5 + r * twoRowStride + columns;
4879                                int idx2 = idx4 + columns;
4880                                int idx3 = idx4 + 1;
4881                                a[idx1] = a[idx3];
4882                                a[idx2] = a[idx3];
4883                                a[idx1 + 1] = -a[idx4];
4884                                a[idx2 + 1] = a[idx4];
4885
4886                            }
4887                        }
4888                    }
4889                });
4890            }
4891            ConcurrencyUtils.waitForCompletion(futures);
4892            for (int l = 0; l < nthreads; l++) {
4893                final int firstSlice = l * p;
4894                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4895                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4896                    @Override
4897                                        public void run() {
4898                        for (int s = firstSlice; s < lastSlice; s++) {
4899                            int idx3 = ((slices - s) % slices) * twoSliceStride;
4900                            int idx4 = s * twoSliceStride;
4901                            for (int r = 1; r < n2d2; r++) {
4902                                int idx1 = idx3 + (rows - r) * twoRowStride;
4903                                int idx2 = idx4 + r * twoRowStride;
4904                                a[idx1] = a[idx2];
4905                                a[idx1 + 1] = -a[idx2 + 1];
4906
4907                            }
4908                        }
4909                    }
4910                });
4911            }
4912            ConcurrencyUtils.waitForCompletion(futures);
4913        } else {
4914
4915            // -----------------------------------------------
4916            for (int s = 0; s < slices; s++) {
4917                idx3 = ((slices - s) % slices) * twoSliceStride;
4918                idx5 = s * twoSliceStride;
4919                for (int r = 0; r < rows; r++) {
4920                    idx4 = ((rows - r) % rows) * twoRowStride;
4921                    idx6 = r * twoRowStride;
4922                    for (int c = 1; c < columns; c += 2) {
4923                        idx1 = idx3 + idx4 + twon3 - c;
4924                        idx2 = idx5 + idx6 + c;
4925                        a[idx1] = -a[idx2 + 2];
4926                        a[idx1 - 1] = a[idx2 + 1];
4927                    }
4928                }
4929            }
4930
4931            // ---------------------------------------------
4932
4933            for (int s = 0; s < slices; s++) {
4934                idx5 = ((slices - s) % slices) * twoSliceStride;
4935                idx6 = s * twoSliceStride;
4936                for (int r = 1; r < n2d2; r++) {
4937                    idx4 = idx6 + (rows - r) * twoRowStride;
4938                    idx1 = idx5 + r * twoRowStride + columns;
4939                    idx2 = idx4 + columns;
4940                    idx3 = idx4 + 1;
4941                    a[idx1] = a[idx3];
4942                    a[idx2] = a[idx3];
4943                    a[idx1 + 1] = -a[idx4];
4944                    a[idx2 + 1] = a[idx4];
4945
4946                }
4947            }
4948
4949            for (int s = 0; s < slices; s++) {
4950                idx3 = ((slices - s) % slices) * twoSliceStride;
4951                idx4 = s * twoSliceStride;
4952                for (int r = 1; r < n2d2; r++) {
4953                    idx1 = idx3 + (rows - r) * twoRowStride;
4954                    idx2 = idx4 + r * twoRowStride;
4955                    a[idx1] = a[idx2];
4956                    a[idx1 + 1] = -a[idx2 + 1];
4957
4958                }
4959            }
4960        }
4961
4962        // ----------------------------------------------------------
4963
4964        for (int s = 1; s < n1d2; s++) {
4965            idx1 = s * twoSliceStride;
4966            idx2 = (slices - s) * twoSliceStride;
4967            idx3 = n2d2 * twoRowStride;
4968            idx4 = idx1 + idx3;
4969            idx5 = idx2 + idx3;
4970            a[idx1 + columns] = a[idx2 + 1];
4971            a[idx2 + columns] = a[idx2 + 1];
4972            a[idx1 + columns + 1] = -a[idx2];
4973            a[idx2 + columns + 1] = a[idx2];
4974            a[idx4 + columns] = a[idx5 + 1];
4975            a[idx5 + columns] = a[idx5 + 1];
4976            a[idx4 + columns + 1] = -a[idx5];
4977            a[idx5 + columns + 1] = a[idx5];
4978            a[idx2] = a[idx1];
4979            a[idx2 + 1] = -a[idx1 + 1];
4980            a[idx5] = a[idx4];
4981            a[idx5 + 1] = -a[idx4 + 1];
4982
4983        }
4984
4985        // ----------------------------------------
4986
4987        a[columns] = a[1];
4988        a[1] = 0;
4989        idx1 = n2d2 * twoRowStride;
4990        idx2 = n1d2 * twoSliceStride;
4991        idx3 = idx1 + idx2;
4992        a[idx1 + columns] = a[idx1 + 1];
4993        a[idx1 + 1] = 0;
4994        a[idx2 + columns] = a[idx2 + 1];
4995        a[idx2 + 1] = 0;
4996        a[idx3 + columns] = a[idx3 + 1];
4997        a[idx3 + 1] = 0;
4998        a[idx2 + columns + 1] = 0;
4999        a[idx3 + columns + 1] = 0;
5000    }
5001}