001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.fft;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 3D Discrete Fourier Transform (DFT) of complex and real, double
043 * precision data. The sizes of all three dimensions can be arbitrary numbers.
044 * This is a parallel implementation of split-radix and mixed-radix algorithms
045 * optimized for SMP systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public class DoubleFFT_3D {
054
055    private int slices;
056
057    private int rows;
058
059    private int columns;
060
061    private int sliceStride;
062
063    private int rowStride;
064
065    private double[] t;
066
067    private DoubleFFT_1D fftSlices, fftRows, fftColumns;
068
069    private int oldNthreads;
070
071    private int nt;
072
073    private boolean isPowerOfTwo = false;
074
075    private boolean useThreads = false;
076
077    /**
078     * Creates new instance of DoubleFFT_3D.
079     * 
080     * @param slices
081     *            number of slices
082     * @param rows
083     *            number of rows
084     * @param columns
085     *            number of columns
086     * 
087     */
088    public DoubleFFT_3D(int slices, int rows, int columns) {
089        if (slices <= 1 || rows <= 1 || columns <= 1) {
090            throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
091        }
092        this.slices = slices;
093        this.rows = rows;
094        this.columns = columns;
095        this.sliceStride = rows * columns;
096        this.rowStride = columns;
097        if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
098            this.useThreads = true;
099        }
100        if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
101            isPowerOfTwo = true;
102            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
103            nt = slices;
104            if (nt < rows) {
105                nt = rows;
106            }
107            nt *= 8;
108            if (oldNthreads > 1) {
109                nt *= oldNthreads;
110            }
111            if (2 * columns == 4) {
112                nt >>= 1;
113            } else if (2 * columns < 4) {
114                nt >>= 2;
115            }
116            t = new double[nt];
117        }
118        fftSlices = new DoubleFFT_1D(slices);
119        if (slices == rows) {
120            fftRows = fftSlices;
121        } else {
122            fftRows = new DoubleFFT_1D(rows);
123        }
124        if (slices == columns) {
125            fftColumns = fftSlices;
126        } else if (rows == columns) {
127            fftColumns = fftRows;
128        } else {
129            fftColumns = new DoubleFFT_1D(columns);
130        }
131
132    }
133
134    /**
135     * Computes 3D forward DFT of complex data leaving the result in
136     * <code>a</code>. The data is stored in 1D array addressed in slice-major,
137     * then row-major, then column-major, in order of significance, i.e. element
138     * (i,j,k) of 3D array x[slices][rows][2*columns] is stored in a[i*sliceStride +
139     * j*rowStride + k], where sliceStride = rows * 2 * columns and rowStride = 2 * columns.
140     * Complex number is stored as two double values in sequence: the real and
141     * imaginary part, i.e. the input array must be of size slices*rows*2*columns. The
142     * physical layout of the input data is as follows:
143     * 
144     * <pre>
145     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3], 
146     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
147     * </pre>
148     * 
149     * @param a
150     *            data to transform
151     */
152    public void complexForward(final double[] a) {
153        int nthreads = ConcurrencyUtils.getNumberOfThreads();
154        if (isPowerOfTwo) {
155            int oldn3 = columns;
156            columns = 2 * columns;
157
158            sliceStride = rows * columns;
159            rowStride = columns;
160
161            if (nthreads != oldNthreads) {
162                nt = slices;
163                if (nt < rows) {
164                    nt = rows;
165                }
166                nt *= 8;
167                if (nthreads > 1) {
168                    nt *= nthreads;
169                }
170                if (columns == 4) {
171                    nt >>= 1;
172                } else if (columns < 4) {
173                    nt >>= 2;
174                }
175                t = new double[nt];
176                oldNthreads = nthreads;
177            }
178            if ((nthreads > 1) && useThreads) {
179                xdft3da_subth2(0, -1, a, true);
180                cdft3db_subth(-1, a, true);
181            } else {
182                xdft3da_sub2(0, -1, a, true);
183                cdft3db_sub(-1, a, true);
184            }
185            columns = oldn3;
186            sliceStride = rows * columns;
187            rowStride = columns;
188        } else {
189            sliceStride = 2 * rows * columns;
190            rowStride = 2 * columns;
191            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
192                Future<?>[] futures = new Future[nthreads];
193                int p = slices / nthreads;
194                for (int l = 0; l < nthreads; l++) {
195                    final int firstSlice = l * p;
196                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
197                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
198                        @Override
199                                                public void run() {
200                            for (int s = firstSlice; s < lastSlice; s++) {
201                                int idx1 = s * sliceStride;
202                                for (int r = 0; r < rows; r++) {
203                                    fftColumns.complexForward(a, idx1 + r * rowStride);
204                                }
205                            }
206                        }
207                    });
208                }
209                ConcurrencyUtils.waitForCompletion(futures);
210
211                for (int l = 0; l < nthreads; l++) {
212                    final int firstSlice = l * p;
213                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
214
215                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
216                        @Override
217                                                public void run() {
218                            double[] temp = new double[2 * rows];
219                            for (int s = firstSlice; s < lastSlice; s++) {
220                                int idx1 = s * sliceStride;
221                                for (int c = 0; c < columns; c++) {
222                                    int idx2 = 2 * c;
223                                    for (int r = 0; r < rows; r++) {
224                                        int idx3 = idx1 + idx2 + r * rowStride;
225                                        int idx4 = 2 * r;
226                                        temp[idx4] = a[idx3];
227                                        temp[idx4 + 1] = a[idx3 + 1];
228                                    }
229                                    fftRows.complexForward(temp);
230                                    for (int r = 0; r < rows; r++) {
231                                        int idx3 = idx1 + idx2 + r * rowStride;
232                                        int idx4 = 2 * r;
233                                        a[idx3] = temp[idx4];
234                                        a[idx3 + 1] = temp[idx4 + 1];
235                                    }
236                                }
237                            }
238                        }
239                    });
240                }
241                ConcurrencyUtils.waitForCompletion(futures);
242
243                p = rows / nthreads;
244                for (int l = 0; l < nthreads; l++) {
245                    final int firstRow = l * p;
246                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
247
248                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
249                        @Override
250                                                public void run() {
251                            double[] temp = new double[2 * slices];
252                            for (int r = firstRow; r < lastRow; r++) {
253                                int idx1 = r * rowStride;
254                                for (int c = 0; c < columns; c++) {
255                                    int idx2 = 2 * c;
256                                    for (int s = 0; s < slices; s++) {
257                                        int idx3 = s * sliceStride + idx1 + idx2;
258                                        int idx4 = 2 * s;
259                                        temp[idx4] = a[idx3];
260                                        temp[idx4 + 1] = a[idx3 + 1];
261                                    }
262                                    fftSlices.complexForward(temp);
263                                    for (int s = 0; s < slices; s++) {
264                                        int idx3 = s * sliceStride + idx1 + idx2;
265                                        int idx4 = 2 * s;
266                                        a[idx3] = temp[idx4];
267                                        a[idx3 + 1] = temp[idx4 + 1];
268                                    }
269                                }
270                            }
271                        }
272                    });
273                }
274                ConcurrencyUtils.waitForCompletion(futures);
275
276            } else {
277                for (int s = 0; s < slices; s++) {
278                    int idx1 = s * sliceStride;
279                    for (int r = 0; r < rows; r++) {
280                        fftColumns.complexForward(a, idx1 + r * rowStride);
281                    }
282                }
283
284                double[] temp = new double[2 * rows];
285                for (int s = 0; s < slices; s++) {
286                    int idx1 = s * sliceStride;
287                    for (int c = 0; c < columns; c++) {
288                        int idx2 = 2 * c;
289                        for (int r = 0; r < rows; r++) {
290                            int idx3 = idx1 + idx2 + r * rowStride;
291                            int idx4 = 2 * r;
292                            temp[idx4] = a[idx3];
293                            temp[idx4 + 1] = a[idx3 + 1];
294                        }
295                        fftRows.complexForward(temp);
296                        for (int r = 0; r < rows; r++) {
297                            int idx3 = idx1 + idx2 + r * rowStride;
298                            int idx4 = 2 * r;
299                            a[idx3] = temp[idx4];
300                            a[idx3 + 1] = temp[idx4 + 1];
301                        }
302                    }
303                }
304
305                temp = new double[2 * slices];
306                for (int r = 0; r < rows; r++) {
307                    int idx1 = r * rowStride;
308                    for (int c = 0; c < columns; c++) {
309                        int idx2 = 2 * c;
310                        for (int s = 0; s < slices; s++) {
311                            int idx3 = s * sliceStride + idx1 + idx2;
312                            int idx4 = 2 * s;
313                            temp[idx4] = a[idx3];
314                            temp[idx4 + 1] = a[idx3 + 1];
315                        }
316                        fftSlices.complexForward(temp);
317                        for (int s = 0; s < slices; s++) {
318                            int idx3 = s * sliceStride + idx1 + idx2;
319                            int idx4 = 2 * s;
320                            a[idx3] = temp[idx4];
321                            a[idx3 + 1] = temp[idx4 + 1];
322                        }
323                    }
324                }
325            }
326            sliceStride = rows * columns;
327            rowStride = columns;
328        }
329    }
330
331    /**
332     * Computes 3D forward DFT of complex data leaving the result in
333     * <code>a</code>. The data is stored in 3D array. Complex data is
334     * represented by 2 double values in sequence: the real and imaginary part,
335     * i.e. the input array must be of size slices by rows by 2*columns. The physical
336     * layout of the input data is as follows:
337     * 
338     * <pre>
339     * a[k1][k2][2*k3] = Re[k1][k2][k3], 
340     * a[k1][k2][2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
341     * </pre>
342     * 
343     * @param a
344     *            data to transform
345     */
346    public void complexForward(final double[][][] a) {
347        int nthreads = ConcurrencyUtils.getNumberOfThreads();
348        if (isPowerOfTwo) {
349            int oldn3 = columns;
350            columns = 2 * columns;
351
352            sliceStride = rows * columns;
353            rowStride = columns;
354
355            if (nthreads != oldNthreads) {
356                nt = slices;
357                if (nt < rows) {
358                    nt = rows;
359                }
360                nt *= 8;
361                if (nthreads > 1) {
362                    nt *= nthreads;
363                }
364                if (columns == 4) {
365                    nt >>= 1;
366                } else if (columns < 4) {
367                    nt >>= 2;
368                }
369                t = new double[nt];
370                oldNthreads = nthreads;
371            }
372            if ((nthreads > 1) && useThreads) {
373                xdft3da_subth2(0, -1, a, true);
374                cdft3db_subth(-1, a, true);
375            } else {
376                xdft3da_sub2(0, -1, a, true);
377                cdft3db_sub(-1, a, true);
378            }
379            columns = oldn3;
380            sliceStride = rows * columns;
381            rowStride = columns;
382        } else {
383            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
384                Future<?>[] futures = new Future[nthreads];
385                int p = slices / nthreads;
386                for (int l = 0; l < nthreads; l++) {
387                    final int firstSlice = l * p;
388                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
389                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
390                        @Override
391                                                public void run() {
392                            for (int s = firstSlice; s < lastSlice; s++) {
393                                for (int r = 0; r < rows; r++) {
394                                    fftColumns.complexForward(a[s][r]);
395                                }
396                            }
397                        }
398                    });
399                }
400                ConcurrencyUtils.waitForCompletion(futures);
401
402                for (int l = 0; l < nthreads; l++) {
403                    final int firstSlice = l * p;
404                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
405
406                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
407                        @Override
408                                                public void run() {
409                            double[] temp = new double[2 * rows];
410                            for (int s = firstSlice; s < lastSlice; s++) {
411                                for (int c = 0; c < columns; c++) {
412                                    int idx2 = 2 * c;
413                                    for (int r = 0; r < rows; r++) {
414                                        int idx4 = 2 * r;
415                                        temp[idx4] = a[s][r][idx2];
416                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
417                                    }
418                                    fftRows.complexForward(temp);
419                                    for (int r = 0; r < rows; r++) {
420                                        int idx4 = 2 * r;
421                                        a[s][r][idx2] = temp[idx4];
422                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
423                                    }
424                                }
425                            }
426                        }
427                    });
428                }
429                ConcurrencyUtils.waitForCompletion(futures);
430
431                p = rows / nthreads;
432                for (int l = 0; l < nthreads; l++) {
433                    final int firstRow = l * p;
434                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
435
436                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
437                        @Override
438                                                public void run() {
439                            double[] temp = new double[2 * slices];
440                            for (int r = firstRow; r < lastRow; r++) {
441                                for (int c = 0; c < columns; c++) {
442                                    int idx2 = 2 * c;
443                                    for (int s = 0; s < slices; s++) {
444                                        int idx4 = 2 * s;
445                                        temp[idx4] = a[s][r][idx2];
446                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
447                                    }
448                                    fftSlices.complexForward(temp);
449                                    for (int s = 0; s < slices; s++) {
450                                        int idx4 = 2 * s;
451                                        a[s][r][idx2] = temp[idx4];
452                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
453                                    }
454                                }
455                            }
456                        }
457                    });
458                }
459                ConcurrencyUtils.waitForCompletion(futures);
460
461            } else {
462                for (int s = 0; s < slices; s++) {
463                    for (int r = 0; r < rows; r++) {
464                        fftColumns.complexForward(a[s][r]);
465                    }
466                }
467
468                double[] temp = new double[2 * rows];
469                for (int s = 0; s < slices; s++) {
470                    for (int c = 0; c < columns; c++) {
471                        int idx2 = 2 * c;
472                        for (int r = 0; r < rows; r++) {
473                            int idx4 = 2 * r;
474                            temp[idx4] = a[s][r][idx2];
475                            temp[idx4 + 1] = a[s][r][idx2 + 1];
476                        }
477                        fftRows.complexForward(temp);
478                        for (int r = 0; r < rows; r++) {
479                            int idx4 = 2 * r;
480                            a[s][r][idx2] = temp[idx4];
481                            a[s][r][idx2 + 1] = temp[idx4 + 1];
482                        }
483                    }
484                }
485
486                temp = new double[2 * slices];
487                for (int r = 0; r < rows; r++) {
488                    for (int c = 0; c < columns; c++) {
489                        int idx2 = 2 * c;
490                        for (int s = 0; s < slices; s++) {
491                            int idx4 = 2 * s;
492                            temp[idx4] = a[s][r][idx2];
493                            temp[idx4 + 1] = a[s][r][idx2 + 1];
494                        }
495                        fftSlices.complexForward(temp);
496                        for (int s = 0; s < slices; s++) {
497                            int idx4 = 2 * s;
498                            a[s][r][idx2] = temp[idx4];
499                            a[s][r][idx2 + 1] = temp[idx4 + 1];
500                        }
501                    }
502                }
503            }
504        }
505    }
506
507    /**
508     * Computes 3D inverse DFT of complex data leaving the result in
509     * <code>a</code>. The data is stored in a 1D array addressed in
510     * slice-major, then row-major, then column-major, in order of significance,
511     * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in
512     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and
513     * rowStride = 2 * columns. Complex number is stored as two double values in
514     * sequence: the real and imaginary part, i.e. the input array must be of
515     * size slices*rows*2*columns. The physical layout of the input data is as follows:
516     * 
517     * <pre>
518     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3], 
519     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
520     * </pre>
521     * 
522     * @param a
523     *            data to transform
524     * @param scale
525     *            if true then scaling is performed
526     */
527    public void complexInverse(final double[] a, final boolean scale) {
528
529        int nthreads = ConcurrencyUtils.getNumberOfThreads();
530
531        if (isPowerOfTwo) {
532            int oldn3 = columns;
533            columns = 2 * columns;
534            sliceStride = rows * columns;
535            rowStride = columns;
536            if (nthreads != oldNthreads) {
537                nt = slices;
538                if (nt < rows) {
539                    nt = rows;
540                }
541                nt *= 8;
542                if (nthreads > 1) {
543                    nt *= nthreads;
544                }
545                if (columns == 4) {
546                    nt >>= 1;
547                } else if (columns < 4) {
548                    nt >>= 2;
549                }
550                t = new double[nt];
551                oldNthreads = nthreads;
552            }
553            if ((nthreads > 1) && useThreads) {
554                xdft3da_subth2(0, 1, a, scale);
555                cdft3db_subth(1, a, scale);
556            } else {
557                xdft3da_sub2(0, 1, a, scale);
558                cdft3db_sub(1, a, scale);
559            }
560            columns = oldn3;
561            sliceStride = rows * columns;
562            rowStride = columns;
563        } else {
564            sliceStride = 2 * rows * columns;
565            rowStride = 2 * columns;
566            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
567                Future<?>[] futures = new Future[nthreads];
568                int p = slices / nthreads;
569                for (int l = 0; l < nthreads; l++) {
570                    final int firstSlice = l * p;
571                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
572
573                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
574                        @Override
575                                                public void run() {
576                            for (int s = firstSlice; s < lastSlice; s++) {
577                                int idx1 = s * sliceStride;
578                                for (int r = 0; r < rows; r++) {
579                                    fftColumns.complexInverse(a, idx1 + r * rowStride, scale);
580                                }
581                            }
582                        }
583                    });
584                }
585                ConcurrencyUtils.waitForCompletion(futures);
586
587                for (int l = 0; l < nthreads; l++) {
588                    final int firstSlice = l * p;
589                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
590
591                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
592                        @Override
593                                                public void run() {
594                            double[] temp = new double[2 * rows];
595                            for (int s = firstSlice; s < lastSlice; s++) {
596                                int idx1 = s * sliceStride;
597                                for (int c = 0; c < columns; c++) {
598                                    int idx2 = 2 * c;
599                                    for (int r = 0; r < rows; r++) {
600                                        int idx3 = idx1 + idx2 + r * rowStride;
601                                        int idx4 = 2 * r;
602                                        temp[idx4] = a[idx3];
603                                        temp[idx4 + 1] = a[idx3 + 1];
604                                    }
605                                    fftRows.complexInverse(temp, scale);
606                                    for (int r = 0; r < rows; r++) {
607                                        int idx3 = idx1 + idx2 + r * rowStride;
608                                        int idx4 = 2 * r;
609                                        a[idx3] = temp[idx4];
610                                        a[idx3 + 1] = temp[idx4 + 1];
611                                    }
612                                }
613                            }
614                        }
615                    });
616                }
617                ConcurrencyUtils.waitForCompletion(futures);
618
619                p = rows / nthreads;
620                for (int l = 0; l < nthreads; l++) {
621                    final int firstRow = l * p;
622                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
623
624                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
625                        @Override
626                                                public void run() {
627                            double[] temp = new double[2 * slices];
628                            for (int r = firstRow; r < lastRow; r++) {
629                                int idx1 = r * rowStride;
630                                for (int c = 0; c < columns; c++) {
631                                    int idx2 = 2 * c;
632                                    for (int s = 0; s < slices; s++) {
633                                        int idx3 = s * sliceStride + idx1 + idx2;
634                                        int idx4 = 2 * s;
635                                        temp[idx4] = a[idx3];
636                                        temp[idx4 + 1] = a[idx3 + 1];
637                                    }
638                                    fftSlices.complexInverse(temp, scale);
639                                    for (int s = 0; s < slices; s++) {
640                                        int idx3 = s * sliceStride + idx1 + idx2;
641                                        int idx4 = 2 * s;
642                                        a[idx3] = temp[idx4];
643                                        a[idx3 + 1] = temp[idx4 + 1];
644                                    }
645                                }
646                            }
647                        }
648                    });
649                }
650                ConcurrencyUtils.waitForCompletion(futures);
651
652            } else {
653                for (int s = 0; s < slices; s++) {
654                    int idx1 = s * sliceStride;
655                    for (int r = 0; r < rows; r++) {
656                        fftColumns.complexInverse(a, idx1 + r * rowStride, scale);
657                    }
658                }
659                double[] temp = new double[2 * rows];
660                for (int s = 0; s < slices; s++) {
661                    int idx1 = s * sliceStride;
662                    for (int c = 0; c < columns; c++) {
663                        int idx2 = 2 * c;
664                        for (int r = 0; r < rows; r++) {
665                            int idx3 = idx1 + idx2 + r * rowStride;
666                            int idx4 = 2 * r;
667                            temp[idx4] = a[idx3];
668                            temp[idx4 + 1] = a[idx3 + 1];
669                        }
670                        fftRows.complexInverse(temp, scale);
671                        for (int r = 0; r < rows; r++) {
672                            int idx3 = idx1 + idx2 + r * rowStride;
673                            int idx4 = 2 * r;
674                            a[idx3] = temp[idx4];
675                            a[idx3 + 1] = temp[idx4 + 1];
676                        }
677                    }
678                }
679                temp = new double[2 * slices];
680                for (int r = 0; r < rows; r++) {
681                    int idx1 = r * rowStride;
682                    for (int c = 0; c < columns; c++) {
683                        int idx2 = 2 * c;
684                        for (int s = 0; s < slices; s++) {
685                            int idx3 = s * sliceStride + idx1 + idx2;
686                            int idx4 = 2 * s;
687                            temp[idx4] = a[idx3];
688                            temp[idx4 + 1] = a[idx3 + 1];
689                        }
690                        fftSlices.complexInverse(temp, scale);
691                        for (int s = 0; s < slices; s++) {
692                            int idx3 = s * sliceStride + idx1 + idx2;
693                            int idx4 = 2 * s;
694                            a[idx3] = temp[idx4];
695                            a[idx3 + 1] = temp[idx4 + 1];
696                        }
697                    }
698                }
699            }
700            sliceStride = rows * columns;
701            rowStride = columns;
702        }
703    }
704
705    /**
706     * Computes 3D inverse DFT of complex data leaving the result in
707     * <code>a</code>. The data is stored in a 3D array. Complex data is
708     * represented by 2 double values in sequence: the real and imaginary part,
709     * i.e. the input array must be of size slices by rows by 2*columns. The physical
710     * layout of the input data is as follows:
711     * 
712     * <pre>
713     * a[k1][k2][2*k3] = Re[k1][k2][k3], 
714     * a[k1][k2][2*k3+1] = Im[k1][k2][k3], 0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;=k3&lt;columns,
715     * </pre>
716     * 
717     * @param a
718     *            data to transform
719     * @param scale
720     *            if true then scaling is performed
721     */
722    public void complexInverse(final double[][][] a, final boolean scale) {
723        int nthreads = ConcurrencyUtils.getNumberOfThreads();
724        if (isPowerOfTwo) {
725            int oldn3 = columns;
726            columns = 2 * columns;
727            sliceStride = rows * columns;
728            rowStride = columns;
729            if (nthreads != oldNthreads) {
730                nt = slices;
731                if (nt < rows) {
732                    nt = rows;
733                }
734                nt *= 8;
735                if (nthreads > 1) {
736                    nt *= nthreads;
737                }
738                if (columns == 4) {
739                    nt >>= 1;
740                } else if (columns < 4) {
741                    nt >>= 2;
742                }
743                t = new double[nt];
744                oldNthreads = nthreads;
745            }
746            if ((nthreads > 1) && useThreads) {
747                xdft3da_subth2(0, 1, a, scale);
748                cdft3db_subth(1, a, scale);
749            } else {
750                xdft3da_sub2(0, 1, a, scale);
751                cdft3db_sub(1, a, scale);
752            }
753            columns = oldn3;
754            sliceStride = rows * columns;
755            rowStride = columns;
756        } else {
757            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
758                Future<?>[] futures = new Future[nthreads];
759                int p = slices / nthreads;
760                for (int l = 0; l < nthreads; l++) {
761                    final int firstSlice = l * p;
762                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
763
764                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
765                        @Override
766                                                public void run() {
767                            for (int s = firstSlice; s < lastSlice; s++) {
768                                for (int r = 0; r < rows; r++) {
769                                    fftColumns.complexInverse(a[s][r], scale);
770                                }
771                            }
772                        }
773                    });
774                }
775                ConcurrencyUtils.waitForCompletion(futures);
776
777                for (int l = 0; l < nthreads; l++) {
778                    final int firstSlice = l * p;
779                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
780
781                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
782                        @Override
783                                                public void run() {
784                            double[] temp = new double[2 * rows];
785                            for (int s = firstSlice; s < lastSlice; s++) {
786                                for (int c = 0; c < columns; c++) {
787                                    int idx2 = 2 * c;
788                                    for (int r = 0; r < rows; r++) {
789                                        int idx4 = 2 * r;
790                                        temp[idx4] = a[s][r][idx2];
791                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
792                                    }
793                                    fftRows.complexInverse(temp, scale);
794                                    for (int r = 0; r < rows; r++) {
795                                        int idx4 = 2 * r;
796                                        a[s][r][idx2] = temp[idx4];
797                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
798                                    }
799                                }
800                            }
801                        }
802                    });
803                }
804                ConcurrencyUtils.waitForCompletion(futures);
805
806                p = rows / nthreads;
807                for (int l = 0; l < nthreads; l++) {
808                    final int firstRow = l * p;
809                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
810
811                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
812                        @Override
813                                                public void run() {
814                            double[] temp = new double[2 * slices];
815                            for (int r = firstRow; r < lastRow; r++) {
816                                for (int c = 0; c < columns; c++) {
817                                    int idx2 = 2 * c;
818                                    for (int s = 0; s < slices; s++) {
819                                        int idx4 = 2 * s;
820                                        temp[idx4] = a[s][r][idx2];
821                                        temp[idx4 + 1] = a[s][r][idx2 + 1];
822                                    }
823                                    fftSlices.complexInverse(temp, scale);
824                                    for (int s = 0; s < slices; s++) {
825                                        int idx4 = 2 * s;
826                                        a[s][r][idx2] = temp[idx4];
827                                        a[s][r][idx2 + 1] = temp[idx4 + 1];
828                                    }
829                                }
830                            }
831                        }
832                    });
833                }
834                ConcurrencyUtils.waitForCompletion(futures);
835
836            } else {
837                for (int s = 0; s < slices; s++) {
838                    for (int r = 0; r < rows; r++) {
839                        fftColumns.complexInverse(a[s][r], scale);
840                    }
841                }
842                double[] temp = new double[2 * rows];
843                for (int s = 0; s < slices; s++) {
844                    for (int c = 0; c < columns; c++) {
845                        int idx2 = 2 * c;
846                        for (int r = 0; r < rows; r++) {
847                            int idx4 = 2 * r;
848                            temp[idx4] = a[s][r][idx2];
849                            temp[idx4 + 1] = a[s][r][idx2 + 1];
850                        }
851                        fftRows.complexInverse(temp, scale);
852                        for (int r = 0; r < rows; r++) {
853                            int idx4 = 2 * r;
854                            a[s][r][idx2] = temp[idx4];
855                            a[s][r][idx2 + 1] = temp[idx4 + 1];
856                        }
857                    }
858                }
859                temp = new double[2 * slices];
860                for (int r = 0; r < rows; r++) {
861                    for (int c = 0; c < columns; c++) {
862                        int idx2 = 2 * c;
863                        for (int s = 0; s < slices; s++) {
864                            int idx4 = 2 * s;
865                            temp[idx4] = a[s][r][idx2];
866                            temp[idx4 + 1] = a[s][r][idx2 + 1];
867                        }
868                        fftSlices.complexInverse(temp, scale);
869                        for (int s = 0; s < slices; s++) {
870                            int idx4 = 2 * s;
871                            a[s][r][idx2] = temp[idx4];
872                            a[s][r][idx2 + 1] = temp[idx4 + 1];
873                        }
874                    }
875                }
876            }
877        }
878    }
879
880    /**
881     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
882     * . This method only works when the sizes of all three dimensions are
883     * power-of-two numbers. The data is stored in a 1D array addressed in
884     * slice-major, then row-major, then column-major, in order of significance,
885     * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in
886     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and
887     * rowStride = 2 * columns. The physical layout of the output data is as follows:
888     * 
889     * <pre>
890     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3]
891     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
892     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3]
893     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
894     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2, 
895     * a[k1*sliceStride + k2*rowStride] = Re[k1][k2][0]
896     *              = Re[(slices-k1)%slices][rows-k2][0], 
897     * a[k1*sliceStride + k2*rowStride + 1] = Im[k1][k2][0]
898     *              = -Im[(slices-k1)%slices][rows-k2][0], 
899     * a[k1*sliceStride + (rows-k2)*rowStride + 1] = Re[(slices-k1)%slices][k2][columns/2]
900     *                 = Re[k1][rows-k2][columns/2], 
901     * a[k1*sliceStride + (rows-k2)*rowStride] = -Im[(slices-k1)%slices][k2][columns/2]
902     *                 = Im[k1][rows-k2][columns/2], 
903     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2, 
904     * a[k1*sliceStride] = Re[k1][0][0]
905     *             = Re[slices-k1][0][0], 
906     * a[k1*sliceStride + 1] = Im[k1][0][0]
907     *             = -Im[slices-k1][0][0], 
908     * a[k1*sliceStride + (rows/2)*rowStride] = Re[k1][rows/2][0]
909     *                = Re[slices-k1][rows/2][0], 
910     * a[k1*sliceStride + (rows/2)*rowStride + 1] = Im[k1][rows/2][0]
911     *                = -Im[slices-k1][rows/2][0], 
912     * a[(slices-k1)*sliceStride + 1] = Re[k1][0][columns/2]
913     *                = Re[slices-k1][0][columns/2], 
914     * a[(slices-k1)*sliceStride] = -Im[k1][0][columns/2]
915     *                = Im[slices-k1][0][columns/2], 
916     * a[(slices-k1)*sliceStride + (rows/2)*rowStride + 1] = Re[k1][rows/2][columns/2]
917     *                   = Re[slices-k1][rows/2][columns/2], 
918     * a[(slices-k1)*sliceStride + (rows/2) * rowStride] = -Im[k1][rows/2][columns/2]
919     *                   = Im[slices-k1][rows/2][columns/2], 
920     *     0&lt;k1&lt;slices/2, 
921     * a[0] = Re[0][0][0], 
922     * a[1] = Re[0][0][columns/2], 
923     * a[(rows/2)*rowStride] = Re[0][rows/2][0], 
924     * a[(rows/2)*rowStride + 1] = Re[0][rows/2][columns/2], 
925     * a[(slices/2)*sliceStride] = Re[slices/2][0][0], 
926     * a[(slices/2)*sliceStride + 1] = Re[slices/2][0][columns/2], 
927     * a[(slices/2)*sliceStride + (rows/2)*rowStride] = Re[slices/2][rows/2][0], 
928     * a[(slices/2)*sliceStride + (rows/2)*rowStride + 1] = Re[slices/2][rows/2][columns/2]
929     * </pre>
930     * 
931     * 
932     * This method computes only half of the elements of the real transform. The
933     * other half satisfies the symmetry condition. If you want the full real
934     * forward transform, use <code>realForwardFull</code>. To get back the
935     * original data, use <code>realInverse</code> on the output of this method.
936     * 
937     * @param a
938     *            data to transform
939     */
940    public void realForward(double[] a) {
941        if (isPowerOfTwo == false) {
942            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
943        } else {
944            int nthreads = ConcurrencyUtils.getNumberOfThreads();
945            if (nthreads != oldNthreads) {
946                nt = slices;
947                if (nt < rows) {
948                    nt = rows;
949                }
950                nt *= 8;
951                if (nthreads > 1) {
952                    nt *= nthreads;
953                }
954                if (columns == 4) {
955                    nt >>= 1;
956                } else if (columns < 4) {
957                    nt >>= 2;
958                }
959                t = new double[nt];
960                oldNthreads = nthreads;
961            }
962            if ((nthreads > 1) && useThreads) {
963                xdft3da_subth1(1, -1, a, true);
964                cdft3db_subth(-1, a, true);
965                rdft3d_sub(1, a);
966            } else {
967                xdft3da_sub1(1, -1, a, true);
968                cdft3db_sub(-1, a, true);
969                rdft3d_sub(1, a);
970            }
971        }
972    }
973
974    /**
975     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
976     * . This method only works when the sizes of all three dimensions are
977     * power-of-two numbers. The data is stored in a 3D array. The physical
978     * layout of the output data is as follows:
979     * 
980     * <pre>
981     * a[k1][k2][2*k3] = Re[k1][k2][k3]
982     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
983     * a[k1][k2][2*k3+1] = Im[k1][k2][k3]
984     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
985     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2, 
986     * a[k1][k2][0] = Re[k1][k2][0]
987     *              = Re[(slices-k1)%slices][rows-k2][0], 
988     * a[k1][k2][1] = Im[k1][k2][0]
989     *              = -Im[(slices-k1)%slices][rows-k2][0], 
990     * a[k1][rows-k2][1] = Re[(slices-k1)%slices][k2][columns/2]
991     *                 = Re[k1][rows-k2][columns/2], 
992     * a[k1][rows-k2][0] = -Im[(slices-k1)%slices][k2][columns/2]
993     *                 = Im[k1][rows-k2][columns/2], 
994     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2, 
995     * a[k1][0][0] = Re[k1][0][0]
996     *             = Re[slices-k1][0][0], 
997     * a[k1][0][1] = Im[k1][0][0]
998     *             = -Im[slices-k1][0][0], 
999     * a[k1][rows/2][0] = Re[k1][rows/2][0]
1000     *                = Re[slices-k1][rows/2][0], 
1001     * a[k1][rows/2][1] = Im[k1][rows/2][0]
1002     *                = -Im[slices-k1][rows/2][0], 
1003     * a[slices-k1][0][1] = Re[k1][0][columns/2]
1004     *                = Re[slices-k1][0][columns/2], 
1005     * a[slices-k1][0][0] = -Im[k1][0][columns/2]
1006     *                = Im[slices-k1][0][columns/2], 
1007     * a[slices-k1][rows/2][1] = Re[k1][rows/2][columns/2]
1008     *                   = Re[slices-k1][rows/2][columns/2], 
1009     * a[slices-k1][rows/2][0] = -Im[k1][rows/2][columns/2]
1010     *                   = Im[slices-k1][rows/2][columns/2], 
1011     *     0&lt;k1&lt;slices/2, 
1012     * a[0][0][0] = Re[0][0][0], 
1013     * a[0][0][1] = Re[0][0][columns/2], 
1014     * a[0][rows/2][0] = Re[0][rows/2][0], 
1015     * a[0][rows/2][1] = Re[0][rows/2][columns/2], 
1016     * a[slices/2][0][0] = Re[slices/2][0][0], 
1017     * a[slices/2][0][1] = Re[slices/2][0][columns/2], 
1018     * a[slices/2][rows/2][0] = Re[slices/2][rows/2][0], 
1019     * a[slices/2][rows/2][1] = Re[slices/2][rows/2][columns/2]
1020     * </pre>
1021     * 
1022     * 
1023     * This method computes only half of the elements of the real transform. The
1024     * other half satisfies the symmetry condition. If you want the full real
1025     * forward transform, use <code>realForwardFull</code>. To get back the
1026     * original data, use <code>realInverse</code> on the output of this method.
1027     * 
1028     * @param a
1029     *            data to transform
1030     */
1031    public void realForward(double[][][] a) {
1032        if (isPowerOfTwo == false) {
1033            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
1034        } else {
1035            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1036            if (nthreads != oldNthreads) {
1037                nt = slices;
1038                if (nt < rows) {
1039                    nt = rows;
1040                }
1041                nt *= 8;
1042                if (nthreads > 1) {
1043                    nt *= nthreads;
1044                }
1045                if (columns == 4) {
1046                    nt >>= 1;
1047                } else if (columns < 4) {
1048                    nt >>= 2;
1049                }
1050                t = new double[nt];
1051                oldNthreads = nthreads;
1052            }
1053            if ((nthreads > 1) && useThreads) {
1054                xdft3da_subth1(1, -1, a, true);
1055                cdft3db_subth(-1, a, true);
1056                rdft3d_sub(1, a);
1057            } else {
1058                xdft3da_sub1(1, -1, a, true);
1059                cdft3db_sub(-1, a, true);
1060                rdft3d_sub(1, a);
1061            }
1062        }
1063    }
1064
1065    /**
1066     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
1067     * . This method computes full real forward transform, i.e. you will get the
1068     * same result as from <code>complexForward</code> called with all imaginary
1069     * part equal 0. Because the result is stored in <code>a</code>, the input
1070     * array must be of size slices*rows*2*columns, with only the first slices*rows*columns elements
1071     * filled with real data. To get back the original data, use
1072     * <code>complexInverse</code> on the output of this method.
1073     * 
1074     * @param a
1075     *            data to transform
1076     */
1077    public void realForwardFull(double[] a) {
1078        if (isPowerOfTwo) {
1079            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1080            if (nthreads != oldNthreads) {
1081                nt = slices;
1082                if (nt < rows) {
1083                    nt = rows;
1084                }
1085                nt *= 8;
1086                if (nthreads > 1) {
1087                    nt *= nthreads;
1088                }
1089                if (columns == 4) {
1090                    nt >>= 1;
1091                } else if (columns < 4) {
1092                    nt >>= 2;
1093                }
1094                t = new double[nt];
1095                oldNthreads = nthreads;
1096            }
1097            if ((nthreads > 1) && useThreads) {
1098                xdft3da_subth2(1, -1, a, true);
1099                cdft3db_subth(-1, a, true);
1100                rdft3d_sub(1, a);
1101            } else {
1102                xdft3da_sub2(1, -1, a, true);
1103                cdft3db_sub(-1, a, true);
1104                rdft3d_sub(1, a);
1105            }
1106            fillSymmetric(a);
1107        } else {
1108            mixedRadixRealForwardFull(a);
1109        }
1110    }
1111
1112    /**
1113     * Computes 3D forward DFT of real data leaving the result in <code>a</code>
1114     * . This method computes full real forward transform, i.e. you will get the
1115     * same result as from <code>complexForward</code> called with all imaginary
1116     * part equal 0. Because the result is stored in <code>a</code>, the input
1117     * array must be of size slices by rows by 2*columns, with only the first slices by rows by
1118     * columns elements filled with real data. To get back the original data, use
1119     * <code>complexInverse</code> on the output of this method.
1120     * 
1121     * @param a
1122     *            data to transform
1123     */
1124    public void realForwardFull(double[][][] a) {
1125        if (isPowerOfTwo) {
1126            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1127            if (nthreads != oldNthreads) {
1128                nt = slices;
1129                if (nt < rows) {
1130                    nt = rows;
1131                }
1132                nt *= 8;
1133                if (nthreads > 1) {
1134                    nt *= nthreads;
1135                }
1136                if (columns == 4) {
1137                    nt >>= 1;
1138                } else if (columns < 4) {
1139                    nt >>= 2;
1140                }
1141                t = new double[nt];
1142                oldNthreads = nthreads;
1143            }
1144            if ((nthreads > 1) && useThreads) {
1145                xdft3da_subth2(1, -1, a, true);
1146                cdft3db_subth(-1, a, true);
1147                rdft3d_sub(1, a);
1148            } else {
1149                xdft3da_sub2(1, -1, a, true);
1150                cdft3db_sub(-1, a, true);
1151                rdft3d_sub(1, a);
1152            }
1153            fillSymmetric(a);
1154        } else {
1155            mixedRadixRealForwardFull(a);
1156        }
1157    }
1158
1159    /**
1160     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1161     * . This method only works when the sizes of all three dimensions are
1162     * power-of-two numbers. The data is stored in a 1D array addressed in
1163     * slice-major, then row-major, then column-major, in order of significance,
1164     * i.e. element (i,j,k) of 3-d array x[slices][rows][2*columns] is stored in
1165     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * 2 * columns and
1166     * rowStride = 2 * columns. The physical layout of the input data has to be as
1167     * follows:
1168     * 
1169     * <pre>
1170     * a[k1*sliceStride + k2*rowStride + 2*k3] = Re[k1][k2][k3]
1171     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
1172     * a[k1*sliceStride + k2*rowStride + 2*k3+1] = Im[k1][k2][k3]
1173     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
1174     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2, 
1175     * a[k1*sliceStride + k2*rowStride] = Re[k1][k2][0]
1176     *              = Re[(slices-k1)%slices][rows-k2][0], 
1177     * a[k1*sliceStride + k2*rowStride + 1] = Im[k1][k2][0]
1178     *              = -Im[(slices-k1)%slices][rows-k2][0], 
1179     * a[k1*sliceStride + (rows-k2)*rowStride + 1] = Re[(slices-k1)%slices][k2][columns/2]
1180     *                 = Re[k1][rows-k2][columns/2], 
1181     * a[k1*sliceStride + (rows-k2)*rowStride] = -Im[(slices-k1)%slices][k2][columns/2]
1182     *                 = Im[k1][rows-k2][columns/2], 
1183     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2, 
1184     * a[k1*sliceStride] = Re[k1][0][0]
1185     *             = Re[slices-k1][0][0], 
1186     * a[k1*sliceStride + 1] = Im[k1][0][0]
1187     *             = -Im[slices-k1][0][0], 
1188     * a[k1*sliceStride + (rows/2)*rowStride] = Re[k1][rows/2][0]
1189     *                = Re[slices-k1][rows/2][0], 
1190     * a[k1*sliceStride + (rows/2)*rowStride + 1] = Im[k1][rows/2][0]
1191     *                = -Im[slices-k1][rows/2][0], 
1192     * a[(slices-k1)*sliceStride + 1] = Re[k1][0][columns/2]
1193     *                = Re[slices-k1][0][columns/2], 
1194     * a[(slices-k1)*sliceStride] = -Im[k1][0][columns/2]
1195     *                = Im[slices-k1][0][columns/2], 
1196     * a[(slices-k1)*sliceStride + (rows/2)*rowStride + 1] = Re[k1][rows/2][columns/2]
1197     *                   = Re[slices-k1][rows/2][columns/2], 
1198     * a[(slices-k1)*sliceStride + (rows/2) * rowStride] = -Im[k1][rows/2][columns/2]
1199     *                   = Im[slices-k1][rows/2][columns/2], 
1200     *     0&lt;k1&lt;slices/2, 
1201     * a[0] = Re[0][0][0], 
1202     * a[1] = Re[0][0][columns/2], 
1203     * a[(rows/2)*rowStride] = Re[0][rows/2][0], 
1204     * a[(rows/2)*rowStride + 1] = Re[0][rows/2][columns/2], 
1205     * a[(slices/2)*sliceStride] = Re[slices/2][0][0], 
1206     * a[(slices/2)*sliceStride + 1] = Re[slices/2][0][columns/2], 
1207     * a[(slices/2)*sliceStride + (rows/2)*rowStride] = Re[slices/2][rows/2][0], 
1208     * a[(slices/2)*sliceStride + (rows/2)*rowStride + 1] = Re[slices/2][rows/2][columns/2]
1209     * </pre>
1210     * 
1211     * This method computes only half of the elements of the real transform. The
1212     * other half satisfies the symmetry condition. If you want the full real
1213     * inverse transform, use <code>realInverseFull</code>.
1214     * 
1215     * @param a
1216     *            data to transform
1217     * 
1218     * @param scale
1219     *            if true then scaling is performed
1220     */
1221    public void realInverse(double[] a, boolean scale) {
1222        if (isPowerOfTwo == false) {
1223            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
1224        } else {
1225            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1226            if (nthreads != oldNthreads) {
1227                nt = slices;
1228                if (nt < rows) {
1229                    nt = rows;
1230                }
1231                nt *= 8;
1232                if (nthreads > 1) {
1233                    nt *= nthreads;
1234                }
1235                if (columns == 4) {
1236                    nt >>= 1;
1237                } else if (columns < 4) {
1238                    nt >>= 2;
1239                }
1240                t = new double[nt];
1241                oldNthreads = nthreads;
1242            }
1243            if ((nthreads > 1) && useThreads) {
1244                rdft3d_sub(-1, a);
1245                cdft3db_subth(1, a, scale);
1246                xdft3da_subth1(1, 1, a, scale);
1247            } else {
1248                rdft3d_sub(-1, a);
1249                cdft3db_sub(1, a, scale);
1250                xdft3da_sub1(1, 1, a, scale);
1251            }
1252        }
1253    }
1254
1255    /**
1256     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1257     * . This method only works when the sizes of all three dimensions are
1258     * power-of-two numbers. The data is stored in a 3D array. The physical
1259     * layout of the input data has to be as follows:
1260     * 
1261     * <pre>
1262     * a[k1][k2][2*k3] = Re[k1][k2][k3]
1263     *                 = Re[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
1264     * a[k1][k2][2*k3+1] = Im[k1][k2][k3]
1265     *                   = -Im[(slices-k1)%slices][(rows-k2)%rows][columns-k3], 
1266     *     0&lt;=k1&lt;slices, 0&lt;=k2&lt;rows, 0&lt;k3&lt;columns/2, 
1267     * a[k1][k2][0] = Re[k1][k2][0]
1268     *              = Re[(slices-k1)%slices][rows-k2][0], 
1269     * a[k1][k2][1] = Im[k1][k2][0]
1270     *              = -Im[(slices-k1)%slices][rows-k2][0], 
1271     * a[k1][rows-k2][1] = Re[(slices-k1)%slices][k2][columns/2]
1272     *                 = Re[k1][rows-k2][columns/2], 
1273     * a[k1][rows-k2][0] = -Im[(slices-k1)%slices][k2][columns/2]
1274     *                 = Im[k1][rows-k2][columns/2], 
1275     *     0&lt;=k1&lt;slices, 0&lt;k2&lt;rows/2, 
1276     * a[k1][0][0] = Re[k1][0][0]
1277     *             = Re[slices-k1][0][0], 
1278     * a[k1][0][1] = Im[k1][0][0]
1279     *             = -Im[slices-k1][0][0], 
1280     * a[k1][rows/2][0] = Re[k1][rows/2][0]
1281     *                = Re[slices-k1][rows/2][0], 
1282     * a[k1][rows/2][1] = Im[k1][rows/2][0]
1283     *                = -Im[slices-k1][rows/2][0], 
1284     * a[slices-k1][0][1] = Re[k1][0][columns/2]
1285     *                = Re[slices-k1][0][columns/2], 
1286     * a[slices-k1][0][0] = -Im[k1][0][columns/2]
1287     *                = Im[slices-k1][0][columns/2], 
1288     * a[slices-k1][rows/2][1] = Re[k1][rows/2][columns/2]
1289     *                   = Re[slices-k1][rows/2][columns/2], 
1290     * a[slices-k1][rows/2][0] = -Im[k1][rows/2][columns/2]
1291     *                   = Im[slices-k1][rows/2][columns/2], 
1292     *     0&lt;k1&lt;slices/2, 
1293     * a[0][0][0] = Re[0][0][0], 
1294     * a[0][0][1] = Re[0][0][columns/2], 
1295     * a[0][rows/2][0] = Re[0][rows/2][0], 
1296     * a[0][rows/2][1] = Re[0][rows/2][columns/2], 
1297     * a[slices/2][0][0] = Re[slices/2][0][0], 
1298     * a[slices/2][0][1] = Re[slices/2][0][columns/2], 
1299     * a[slices/2][rows/2][0] = Re[slices/2][rows/2][0], 
1300     * a[slices/2][rows/2][1] = Re[slices/2][rows/2][columns/2]
1301     * </pre>
1302     * 
1303     * This method computes only half of the elements of the real transform. The
1304     * other half satisfies the symmetry condition. If you want the full real
1305     * inverse transform, use <code>realInverseFull</code>.
1306     * 
1307     * @param a
1308     *            data to transform
1309     * 
1310     * @param scale
1311     *            if true then scaling is performed
1312     */
1313    public void realInverse(double[][][] a, boolean scale) {
1314        if (isPowerOfTwo == false) {
1315            throw new IllegalArgumentException("slices, rows and columns must be power of two numbers");
1316        } else {
1317            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1318            if (nthreads != oldNthreads) {
1319                nt = slices;
1320                if (nt < rows) {
1321                    nt = rows;
1322                }
1323                nt *= 8;
1324                if (nthreads > 1) {
1325                    nt *= nthreads;
1326                }
1327                if (columns == 4) {
1328                    nt >>= 1;
1329                } else if (columns < 4) {
1330                    nt >>= 2;
1331                }
1332                t = new double[nt];
1333                oldNthreads = nthreads;
1334            }
1335            if ((nthreads > 1) && useThreads) {
1336                rdft3d_sub(-1, a);
1337                cdft3db_subth(1, a, scale);
1338                xdft3da_subth1(1, 1, a, scale);
1339            } else {
1340                rdft3d_sub(-1, a);
1341                cdft3db_sub(1, a, scale);
1342                xdft3da_sub1(1, 1, a, scale);
1343            }
1344        }
1345    }
1346
1347    /**
1348     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1349     * . This method computes full real inverse transform, i.e. you will get the
1350     * same result as from <code>complexInverse</code> called with all imaginary
1351     * part equal 0. Because the result is stored in <code>a</code>, the input
1352     * array must be of size slices*rows*2*columns, with only the first slices*rows*columns elements
1353     * filled with real data.
1354     * 
1355     * @param a
1356     *            data to transform
1357     * @param scale
1358     *            if true then scaling is performed
1359     */
1360    public void realInverseFull(double[] a, boolean scale) {
1361        if (isPowerOfTwo) {
1362            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1363            if (nthreads != oldNthreads) {
1364                nt = slices;
1365                if (nt < rows) {
1366                    nt = rows;
1367                }
1368                nt *= 8;
1369                if (nthreads > 1) {
1370                    nt *= nthreads;
1371                }
1372                if (columns == 4) {
1373                    nt >>= 1;
1374                } else if (columns < 4) {
1375                    nt >>= 2;
1376                }
1377                t = new double[nt];
1378                oldNthreads = nthreads;
1379            }
1380            if ((nthreads > 1) && useThreads) {
1381                xdft3da_subth2(1, 1, a, scale);
1382                cdft3db_subth(1, a, scale);
1383                rdft3d_sub(1, a);
1384            } else {
1385                xdft3da_sub2(1, 1, a, scale);
1386                cdft3db_sub(1, a, scale);
1387                rdft3d_sub(1, a);
1388            }
1389            fillSymmetric(a);
1390        } else {
1391            mixedRadixRealInverseFull(a, scale);
1392        }
1393    }
1394
1395    /**
1396     * Computes 3D inverse DFT of real data leaving the result in <code>a</code>
1397     * . This method computes full real inverse transform, i.e. you will get the
1398     * same result as from <code>complexInverse</code> called with all imaginary
1399     * part equal 0. Because the result is stored in <code>a</code>, the input
1400     * array must be of size slices by rows by 2*columns, with only the first slices by rows by
1401     * columns elements filled with real data.
1402     * 
1403     * @param a
1404     *            data to transform
1405     * @param scale
1406     *            if true then scaling is performed
1407     */
1408    public void realInverseFull(double[][][] a, boolean scale) {
1409        if (isPowerOfTwo) {
1410            int nthreads = ConcurrencyUtils.getNumberOfThreads();
1411            if (nthreads != oldNthreads) {
1412                nt = slices;
1413                if (nt < rows) {
1414                    nt = rows;
1415                }
1416                nt *= 8;
1417                if (nthreads > 1) {
1418                    nt *= nthreads;
1419                }
1420                if (columns == 4) {
1421                    nt >>= 1;
1422                } else if (columns < 4) {
1423                    nt >>= 2;
1424                }
1425                t = new double[nt];
1426                oldNthreads = nthreads;
1427            }
1428            if ((nthreads > 1) && useThreads) {
1429                xdft3da_subth2(1, 1, a, scale);
1430                cdft3db_subth(1, a, scale);
1431                rdft3d_sub(1, a);
1432            } else {
1433                xdft3da_sub2(1, 1, a, scale);
1434                cdft3db_sub(1, a, scale);
1435                rdft3d_sub(1, a);
1436            }
1437            fillSymmetric(a);
1438        } else {
1439            mixedRadixRealInverseFull(a, scale);
1440        }
1441    }
1442
1443    /* -------- child routines -------- */
1444
1445    private void mixedRadixRealForwardFull(final double[][][] a) {
1446        double[] temp = new double[2 * rows];
1447        int ldimn2 = rows / 2 + 1;
1448        final int newn3 = 2 * columns;
1449        final int n2d2;
1450        if (rows % 2 == 0) {
1451            n2d2 = rows / 2;
1452        } else {
1453            n2d2 = (rows + 1) / 2;
1454        }
1455
1456        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1457        if ((nthreads > 1) && useThreads && (slices >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
1458            Future<?>[] futures = new Future[nthreads];
1459            int p = slices / nthreads;
1460            for (int l = 0; l < nthreads; l++) {
1461                final int firstSlice = l * p;
1462                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1463
1464                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1465                    @Override
1466                                        public void run() {
1467                        for (int s = firstSlice; s < lastSlice; s++) {
1468                            for (int r = 0; r < rows; r++) {
1469                                fftColumns.realForwardFull(a[s][r]);
1470                            }
1471                        }
1472                    }
1473                });
1474            }
1475            ConcurrencyUtils.waitForCompletion(futures);
1476
1477            for (int l = 0; l < nthreads; l++) {
1478                final int firstSlice = l * p;
1479                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1480
1481                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1482                    @Override
1483                                        public void run() {
1484                        double[] temp = new double[2 * rows];
1485
1486                        for (int s = firstSlice; s < lastSlice; s++) {
1487                            for (int c = 0; c < columns; c++) {
1488                                int idx2 = 2 * c;
1489                                for (int r = 0; r < rows; r++) {
1490                                    int idx4 = 2 * r;
1491                                    temp[idx4] = a[s][r][idx2];
1492                                    temp[idx4 + 1] = a[s][r][idx2 + 1];
1493                                }
1494                                fftRows.complexForward(temp);
1495                                for (int r = 0; r < rows; r++) {
1496                                    int idx4 = 2 * r;
1497                                    a[s][r][idx2] = temp[idx4];
1498                                    a[s][r][idx2 + 1] = temp[idx4 + 1];
1499                                }
1500                            }
1501                        }
1502                    }
1503                });
1504            }
1505            ConcurrencyUtils.waitForCompletion(futures);
1506
1507            p = ldimn2 / nthreads;
1508            for (int l = 0; l < nthreads; l++) {
1509                final int firstRow = l * p;
1510                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
1511
1512                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1513                    @Override
1514                                        public void run() {
1515                        double[] temp = new double[2 * slices];
1516
1517                        for (int r = firstRow; r < lastRow; r++) {
1518                            for (int c = 0; c < columns; c++) {
1519                                int idx1 = 2 * c;
1520                                for (int s = 0; s < slices; s++) {
1521                                    int idx2 = 2 * s;
1522                                    temp[idx2] = a[s][r][idx1];
1523                                    temp[idx2 + 1] = a[s][r][idx1 + 1];
1524                                }
1525                                fftSlices.complexForward(temp);
1526                                for (int s = 0; s < slices; s++) {
1527                                    int idx2 = 2 * s;
1528                                    a[s][r][idx1] = temp[idx2];
1529                                    a[s][r][idx1 + 1] = temp[idx2 + 1];
1530                                }
1531                            }
1532                        }
1533                    }
1534                });
1535            }
1536            ConcurrencyUtils.waitForCompletion(futures);
1537            p = slices / nthreads;
1538            for (int l = 0; l < nthreads; l++) {
1539                final int firstSlice = l * p;
1540                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1541
1542                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1543                    @Override
1544                                        public void run() {
1545
1546                        for (int s = firstSlice; s < lastSlice; s++) {
1547                            int idx2 = (slices - s) % slices;
1548                            for (int r = 1; r < n2d2; r++) {
1549                                int idx4 = rows - r;
1550                                for (int c = 0; c < columns; c++) {
1551                                    int idx1 = 2 * c;
1552                                    int idx3 = newn3 - idx1;
1553                                    a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1554                                    a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1555                                }
1556                            }
1557                        }
1558                    }
1559                });
1560            }
1561            ConcurrencyUtils.waitForCompletion(futures);
1562        } else {
1563
1564            for (int s = 0; s < slices; s++) {
1565                for (int r = 0; r < rows; r++) {
1566                    fftColumns.realForwardFull(a[s][r]);
1567                }
1568            }
1569
1570            for (int s = 0; s < slices; s++) {
1571                for (int c = 0; c < columns; c++) {
1572                    int idx2 = 2 * c;
1573                    for (int r = 0; r < rows; r++) {
1574                        int idx4 = 2 * r;
1575                        temp[idx4] = a[s][r][idx2];
1576                        temp[idx4 + 1] = a[s][r][idx2 + 1];
1577                    }
1578                    fftRows.complexForward(temp);
1579                    for (int r = 0; r < rows; r++) {
1580                        int idx4 = 2 * r;
1581                        a[s][r][idx2] = temp[idx4];
1582                        a[s][r][idx2 + 1] = temp[idx4 + 1];
1583                    }
1584                }
1585            }
1586
1587            temp = new double[2 * slices];
1588
1589            for (int r = 0; r < ldimn2; r++) {
1590                for (int c = 0; c < columns; c++) {
1591                    int idx1 = 2 * c;
1592                    for (int s = 0; s < slices; s++) {
1593                        int idx2 = 2 * s;
1594                        temp[idx2] = a[s][r][idx1];
1595                        temp[idx2 + 1] = a[s][r][idx1 + 1];
1596                    }
1597                    fftSlices.complexForward(temp);
1598                    for (int s = 0; s < slices; s++) {
1599                        int idx2 = 2 * s;
1600                        a[s][r][idx1] = temp[idx2];
1601                        a[s][r][idx1 + 1] = temp[idx2 + 1];
1602                    }
1603                }
1604            }
1605
1606            for (int s = 0; s < slices; s++) {
1607                int idx2 = (slices - s) % slices;
1608                for (int r = 1; r < n2d2; r++) {
1609                    int idx4 = rows - r;
1610                    for (int c = 0; c < columns; c++) {
1611                        int idx1 = 2 * c;
1612                        int idx3 = newn3 - idx1;
1613                        a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1614                        a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1615                    }
1616                }
1617            }
1618
1619        }
1620    }
1621
1622    private void mixedRadixRealInverseFull(final double[][][] a, final boolean scale) {
1623        double[] temp = new double[2 * rows];
1624        int ldimn2 = rows / 2 + 1;
1625        final int newn3 = 2 * columns;
1626        final int n2d2;
1627        if (rows % 2 == 0) {
1628            n2d2 = rows / 2;
1629        } else {
1630            n2d2 = (rows + 1) / 2;
1631        }
1632
1633        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1634        if ((nthreads > 1) && useThreads && (slices >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
1635            Future<?>[] futures = new Future[nthreads];
1636            int p = slices / nthreads;
1637            for (int l = 0; l < nthreads; l++) {
1638                final int firstSlice = l * p;
1639                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1640
1641                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1642                    @Override
1643                                        public void run() {
1644                        for (int s = firstSlice; s < lastSlice; s++) {
1645                            for (int r = 0; r < rows; r++) {
1646                                fftColumns.realInverseFull(a[s][r], scale);
1647                            }
1648                        }
1649                    }
1650                });
1651            }
1652            ConcurrencyUtils.waitForCompletion(futures);
1653
1654            for (int l = 0; l < nthreads; l++) {
1655                final int firstSlice = l * p;
1656                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1657
1658                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1659                    @Override
1660                                        public void run() {
1661                        double[] temp = new double[2 * rows];
1662
1663                        for (int s = firstSlice; s < lastSlice; s++) {
1664                            for (int c = 0; c < columns; c++) {
1665                                int idx2 = 2 * c;
1666                                for (int r = 0; r < rows; r++) {
1667                                    int idx4 = 2 * r;
1668                                    temp[idx4] = a[s][r][idx2];
1669                                    temp[idx4 + 1] = a[s][r][idx2 + 1];
1670                                }
1671                                fftRows.complexInverse(temp, scale);
1672                                for (int r = 0; r < rows; r++) {
1673                                    int idx4 = 2 * r;
1674                                    a[s][r][idx2] = temp[idx4];
1675                                    a[s][r][idx2 + 1] = temp[idx4 + 1];
1676                                }
1677                            }
1678                        }
1679                    }
1680                });
1681            }
1682            ConcurrencyUtils.waitForCompletion(futures);
1683
1684            p = ldimn2 / nthreads;
1685            for (int l = 0; l < nthreads; l++) {
1686                final int firstRow = l * p;
1687                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
1688                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1689                    @Override
1690                                        public void run() {
1691                        double[] temp = new double[2 * slices];
1692
1693                        for (int r = firstRow; r < lastRow; r++) {
1694                            for (int c = 0; c < columns; c++) {
1695                                int idx1 = 2 * c;
1696                                for (int s = 0; s < slices; s++) {
1697                                    int idx2 = 2 * s;
1698                                    temp[idx2] = a[s][r][idx1];
1699                                    temp[idx2 + 1] = a[s][r][idx1 + 1];
1700                                }
1701                                fftSlices.complexInverse(temp, scale);
1702                                for (int s = 0; s < slices; s++) {
1703                                    int idx2 = 2 * s;
1704                                    a[s][r][idx1] = temp[idx2];
1705                                    a[s][r][idx1 + 1] = temp[idx2 + 1];
1706                                }
1707                            }
1708                        }
1709                    }
1710                });
1711            }
1712            ConcurrencyUtils.waitForCompletion(futures);
1713            p = slices / nthreads;
1714            for (int l = 0; l < nthreads; l++) {
1715                final int firstSlice = l * p;
1716                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1717
1718                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1719                    @Override
1720                                        public void run() {
1721
1722                        for (int s = firstSlice; s < lastSlice; s++) {
1723                            int idx2 = (slices - s) % slices;
1724                            for (int r = 1; r < n2d2; r++) {
1725                                int idx4 = rows - r;
1726                                for (int c = 0; c < columns; c++) {
1727                                    int idx1 = 2 * c;
1728                                    int idx3 = newn3 - idx1;
1729                                    a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1730                                    a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1731                                }
1732                            }
1733                        }
1734                    }
1735                });
1736            }
1737            ConcurrencyUtils.waitForCompletion(futures);
1738        } else {
1739
1740            for (int s = 0; s < slices; s++) {
1741                for (int r = 0; r < rows; r++) {
1742                    fftColumns.realInverseFull(a[s][r], scale);
1743                }
1744            }
1745
1746            for (int s = 0; s < slices; s++) {
1747                for (int c = 0; c < columns; c++) {
1748                    int idx2 = 2 * c;
1749                    for (int r = 0; r < rows; r++) {
1750                        int idx4 = 2 * r;
1751                        temp[idx4] = a[s][r][idx2];
1752                        temp[idx4 + 1] = a[s][r][idx2 + 1];
1753                    }
1754                    fftRows.complexInverse(temp, scale);
1755                    for (int r = 0; r < rows; r++) {
1756                        int idx4 = 2 * r;
1757                        a[s][r][idx2] = temp[idx4];
1758                        a[s][r][idx2 + 1] = temp[idx4 + 1];
1759                    }
1760                }
1761            }
1762
1763            temp = new double[2 * slices];
1764
1765            for (int r = 0; r < ldimn2; r++) {
1766                for (int c = 0; c < columns; c++) {
1767                    int idx1 = 2 * c;
1768                    for (int s = 0; s < slices; s++) {
1769                        int idx2 = 2 * s;
1770                        temp[idx2] = a[s][r][idx1];
1771                        temp[idx2 + 1] = a[s][r][idx1 + 1];
1772                    }
1773                    fftSlices.complexInverse(temp, scale);
1774                    for (int s = 0; s < slices; s++) {
1775                        int idx2 = 2 * s;
1776                        a[s][r][idx1] = temp[idx2];
1777                        a[s][r][idx1 + 1] = temp[idx2 + 1];
1778                    }
1779                }
1780            }
1781
1782            for (int s = 0; s < slices; s++) {
1783                int idx2 = (slices - s) % slices;
1784                for (int r = 1; r < n2d2; r++) {
1785                    int idx4 = rows - r;
1786                    for (int c = 0; c < columns; c++) {
1787                        int idx1 = 2 * c;
1788                        int idx3 = newn3 - idx1;
1789                        a[idx2][idx4][idx3 % newn3] = a[s][r][idx1];
1790                        a[idx2][idx4][(idx3 + 1) % newn3] = -a[s][r][idx1 + 1];
1791                    }
1792                }
1793            }
1794
1795        }
1796    }
1797
1798    private void mixedRadixRealForwardFull(final double[] a) {
1799        final int twon3 = 2 * columns;
1800        double[] temp = new double[twon3];
1801        int ldimn2 = rows / 2 + 1;
1802        final int n2d2;
1803        if (rows % 2 == 0) {
1804            n2d2 = rows / 2;
1805        } else {
1806            n2d2 = (rows + 1) / 2;
1807        }
1808
1809        final int twoSliceStride = 2 * sliceStride;
1810        final int twoRowStride = 2 * rowStride;
1811        int n1d2 = slices / 2;
1812        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1813        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
1814            Future<?>[] futures = new Future[nthreads];
1815            int p = n1d2 / nthreads;
1816            for (int l = 0; l < nthreads; l++) {
1817                final int firstSlice = slices - 1 - l * p;
1818                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice - p;
1819                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1820                    @Override
1821                                        public void run() {
1822                        double[] temp = new double[twon3];
1823                        for (int s = firstSlice; s >= lastSlice; s--) {
1824                            int idx1 = s * sliceStride;
1825                            int idx2 = s * twoSliceStride;
1826                            for (int r = rows - 1; r >= 0; r--) {
1827                                System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
1828                                fftColumns.realForwardFull(temp);
1829                                System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
1830                            }
1831                        }
1832                    }
1833                });
1834            }
1835            ConcurrencyUtils.waitForCompletion(futures);
1836
1837            final double[][][] temp2 = new double[n1d2 + 1][rows][twon3];
1838
1839            for (int l = 0; l < nthreads; l++) {
1840                final int firstSlice = l * p;
1841                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
1842                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1843                    @Override
1844                                        public void run() {
1845                        for (int s = firstSlice; s < lastSlice; s++) {
1846                            int idx1 = s * sliceStride;
1847                            for (int r = 0; r < rows; r++) {
1848                                System.arraycopy(a, idx1 + r * rowStride, temp2[s][r], 0, columns);
1849                                fftColumns.realForwardFull(temp2[s][r]);
1850                            }
1851                        }
1852                    }
1853                });
1854            }
1855            ConcurrencyUtils.waitForCompletion(futures);
1856
1857            for (int l = 0; l < nthreads; l++) {
1858                final int firstSlice = l * p;
1859                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
1860                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1861                    @Override
1862                                        public void run() {
1863                        for (int s = firstSlice; s < lastSlice; s++) {
1864                            int idx1 = s * twoSliceStride;
1865                            for (int r = 0; r < rows; r++) {
1866                                System.arraycopy(temp2[s][r], 0, a, idx1 + r * twoRowStride, twon3);
1867                            }
1868                        }
1869                    }
1870                });
1871            }
1872            ConcurrencyUtils.waitForCompletion(futures);
1873
1874            p = slices / nthreads;
1875            for (int l = 0; l < nthreads; l++) {
1876                final int firstSlice = l * p;
1877                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1878                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1879                    @Override
1880                                        public void run() {
1881                        double[] temp = new double[2 * rows];
1882
1883                        for (int s = firstSlice; s < lastSlice; s++) {
1884                            int idx1 = s * twoSliceStride;
1885                            for (int c = 0; c < columns; c++) {
1886                                int idx2 = 2 * c;
1887                                for (int r = 0; r < rows; r++) {
1888                                    int idx3 = idx1 + r * twoRowStride + idx2;
1889                                    int idx4 = 2 * r;
1890                                    temp[idx4] = a[idx3];
1891                                    temp[idx4 + 1] = a[idx3 + 1];
1892                                }
1893                                fftRows.complexForward(temp);
1894                                for (int r = 0; r < rows; r++) {
1895                                    int idx3 = idx1 + r * twoRowStride + idx2;
1896                                    int idx4 = 2 * r;
1897                                    a[idx3] = temp[idx4];
1898                                    a[idx3 + 1] = temp[idx4 + 1];
1899                                }
1900                            }
1901                        }
1902                    }
1903                });
1904            }
1905            ConcurrencyUtils.waitForCompletion(futures);
1906
1907            p = ldimn2 / nthreads;
1908            for (int l = 0; l < nthreads; l++) {
1909                final int firstRow = l * p;
1910                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
1911                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1912                    @Override
1913                                        public void run() {
1914                        double[] temp = new double[2 * slices];
1915
1916                        for (int r = firstRow; r < lastRow; r++) {
1917                            int idx3 = r * twoRowStride;
1918                            for (int c = 0; c < columns; c++) {
1919                                int idx1 = 2 * c;
1920                                for (int s = 0; s < slices; s++) {
1921                                    int idx2 = 2 * s;
1922                                    int idx4 = s * twoSliceStride + idx3 + idx1;
1923                                    temp[idx2] = a[idx4];
1924                                    temp[idx2 + 1] = a[idx4 + 1];
1925                                }
1926                                fftSlices.complexForward(temp);
1927                                for (int s = 0; s < slices; s++) {
1928                                    int idx2 = 2 * s;
1929                                    int idx4 = s * twoSliceStride + idx3 + idx1;
1930                                    a[idx4] = temp[idx2];
1931                                    a[idx4 + 1] = temp[idx2 + 1];
1932                                }
1933                            }
1934                        }
1935                    }
1936                });
1937            }
1938            ConcurrencyUtils.waitForCompletion(futures);
1939            p = slices / nthreads;
1940            for (int l = 0; l < nthreads; l++) {
1941                final int firstSlice = l * p;
1942                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
1943                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1944                    @Override
1945                                        public void run() {
1946
1947                        for (int s = firstSlice; s < lastSlice; s++) {
1948                            int idx2 = (slices - s) % slices;
1949                            int idx5 = idx2 * twoSliceStride;
1950                            int idx6 = s * twoSliceStride;
1951                            for (int r = 1; r < n2d2; r++) {
1952                                int idx4 = rows - r;
1953                                int idx7 = idx4 * twoRowStride;
1954                                int idx8 = r * twoRowStride;
1955                                int idx9 = idx5 + idx7;
1956                                for (int c = 0; c < columns; c++) {
1957                                    int idx1 = 2 * c;
1958                                    int idx3 = twon3 - idx1;
1959                                    int idx10 = idx6 + idx8 + idx1;
1960                                    a[idx9 + idx3 % twon3] = a[idx10];
1961                                    a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
1962                                }
1963                            }
1964                        }
1965                    }
1966                });
1967            }
1968            ConcurrencyUtils.waitForCompletion(futures);
1969        } else {
1970
1971            for (int s = slices - 1; s >= 0; s--) {
1972                int idx1 = s * sliceStride;
1973                int idx2 = s * twoSliceStride;
1974                for (int r = rows - 1; r >= 0; r--) {
1975                    System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
1976                    fftColumns.realForwardFull(temp);
1977                    System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
1978                }
1979            }
1980
1981            temp = new double[2 * rows];
1982
1983            for (int s = 0; s < slices; s++) {
1984                int idx1 = s * twoSliceStride;
1985                for (int c = 0; c < columns; c++) {
1986                    int idx2 = 2 * c;
1987                    for (int r = 0; r < rows; r++) {
1988                        int idx4 = 2 * r;
1989                        int idx3 = idx1 + r * twoRowStride + idx2;
1990                        temp[idx4] = a[idx3];
1991                        temp[idx4 + 1] = a[idx3 + 1];
1992                    }
1993                    fftRows.complexForward(temp);
1994                    for (int r = 0; r < rows; r++) {
1995                        int idx4 = 2 * r;
1996                        int idx3 = idx1 + r * twoRowStride + idx2;
1997                        a[idx3] = temp[idx4];
1998                        a[idx3 + 1] = temp[idx4 + 1];
1999                    }
2000                }
2001            }
2002
2003            temp = new double[2 * slices];
2004
2005            for (int r = 0; r < ldimn2; r++) {
2006                int idx3 = r * twoRowStride;
2007                for (int c = 0; c < columns; c++) {
2008                    int idx1 = 2 * c;
2009                    for (int s = 0; s < slices; s++) {
2010                        int idx2 = 2 * s;
2011                        int idx4 = s * twoSliceStride + idx3 + idx1;
2012                        temp[idx2] = a[idx4];
2013                        temp[idx2 + 1] = a[idx4 + 1];
2014                    }
2015                    fftSlices.complexForward(temp);
2016                    for (int s = 0; s < slices; s++) {
2017                        int idx2 = 2 * s;
2018                        int idx4 = s * twoSliceStride + idx3 + idx1;
2019                        a[idx4] = temp[idx2];
2020                        a[idx4 + 1] = temp[idx2 + 1];
2021                    }
2022                }
2023            }
2024
2025            for (int s = 0; s < slices; s++) {
2026                int idx2 = (slices - s) % slices;
2027                int idx5 = idx2 * twoSliceStride;
2028                int idx6 = s * twoSliceStride;
2029                for (int r = 1; r < n2d2; r++) {
2030                    int idx4 = rows - r;
2031                    int idx7 = idx4 * twoRowStride;
2032                    int idx8 = r * twoRowStride;
2033                    int idx9 = idx5 + idx7;
2034                    for (int c = 0; c < columns; c++) {
2035                        int idx1 = 2 * c;
2036                        int idx3 = twon3 - idx1;
2037                        int idx10 = idx6 + idx8 + idx1;
2038                        a[idx9 + idx3 % twon3] = a[idx10];
2039                        a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
2040                    }
2041                }
2042            }
2043
2044        }
2045    }
2046
2047    private void mixedRadixRealInverseFull(final double[] a, final boolean scale) {
2048        final int twon3 = 2 * columns;
2049        double[] temp = new double[twon3];
2050        int ldimn2 = rows / 2 + 1;
2051        final int n2d2;
2052        if (rows % 2 == 0) {
2053            n2d2 = rows / 2;
2054        } else {
2055            n2d2 = (rows + 1) / 2;
2056        }
2057
2058        final int twoSliceStride = 2 * sliceStride;
2059        final int twoRowStride = 2 * rowStride;
2060        int n1d2 = slices / 2;
2061
2062        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2063        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads) && (columns >= nthreads) && (ldimn2 >= nthreads)) {
2064            Future<?>[] futures = new Future[nthreads];
2065            int p = n1d2 / nthreads;
2066            for (int l = 0; l < nthreads; l++) {
2067                final int firstSlice = slices - 1 - l * p;
2068                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice - p;
2069                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2070                    @Override
2071                                        public void run() {
2072                        double[] temp = new double[twon3];
2073                        for (int s = firstSlice; s >= lastSlice; s--) {
2074                            int idx1 = s * sliceStride;
2075                            int idx2 = s * twoSliceStride;
2076                            for (int r = rows - 1; r >= 0; r--) {
2077                                System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
2078                                fftColumns.realInverseFull(temp, scale);
2079                                System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
2080                            }
2081                        }
2082                    }
2083                });
2084            }
2085            ConcurrencyUtils.waitForCompletion(futures);
2086
2087            final double[][][] temp2 = new double[n1d2 + 1][rows][twon3];
2088
2089            for (int l = 0; l < nthreads; l++) {
2090                final int firstSlice = l * p;
2091                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
2092                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2093                    @Override
2094                                        public void run() {
2095                        for (int s = firstSlice; s < lastSlice; s++) {
2096                            int idx1 = s * sliceStride;
2097                            for (int r = 0; r < rows; r++) {
2098                                System.arraycopy(a, idx1 + r * rowStride, temp2[s][r], 0, columns);
2099                                fftColumns.realInverseFull(temp2[s][r], scale);
2100                            }
2101                        }
2102                    }
2103                });
2104            }
2105            ConcurrencyUtils.waitForCompletion(futures);
2106
2107            for (int l = 0; l < nthreads; l++) {
2108                final int firstSlice = l * p;
2109                final int lastSlice = (l == (nthreads - 1)) ? n1d2 + 1 : firstSlice + p;
2110                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2111                    @Override
2112                                        public void run() {
2113                        for (int s = firstSlice; s < lastSlice; s++) {
2114                            int idx1 = s * twoSliceStride;
2115                            for (int r = 0; r < rows; r++) {
2116                                System.arraycopy(temp2[s][r], 0, a, idx1 + r * twoRowStride, twon3);
2117                            }
2118                        }
2119                    }
2120                });
2121            }
2122            ConcurrencyUtils.waitForCompletion(futures);
2123
2124            p = slices / nthreads;
2125            for (int l = 0; l < nthreads; l++) {
2126                final int firstSlice = l * p;
2127                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
2128                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2129                    @Override
2130                                        public void run() {
2131                        double[] temp = new double[2 * rows];
2132
2133                        for (int s = firstSlice; s < lastSlice; s++) {
2134                            int idx1 = s * twoSliceStride;
2135                            for (int c = 0; c < columns; c++) {
2136                                int idx2 = 2 * c;
2137                                for (int r = 0; r < rows; r++) {
2138                                    int idx3 = idx1 + r * twoRowStride + idx2;
2139                                    int idx4 = 2 * r;
2140                                    temp[idx4] = a[idx3];
2141                                    temp[idx4 + 1] = a[idx3 + 1];
2142                                }
2143                                fftRows.complexInverse(temp, scale);
2144                                for (int r = 0; r < rows; r++) {
2145                                    int idx3 = idx1 + r * twoRowStride + idx2;
2146                                    int idx4 = 2 * r;
2147                                    a[idx3] = temp[idx4];
2148                                    a[idx3 + 1] = temp[idx4 + 1];
2149                                }
2150                            }
2151                        }
2152                    }
2153                });
2154            }
2155            ConcurrencyUtils.waitForCompletion(futures);
2156
2157            p = ldimn2 / nthreads;
2158            for (int l = 0; l < nthreads; l++) {
2159                final int firstRow = l * p;
2160                final int lastRow = (l == (nthreads - 1)) ? ldimn2 : firstRow + p;
2161                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2162                    @Override
2163                                        public void run() {
2164                        double[] temp = new double[2 * slices];
2165
2166                        for (int r = firstRow; r < lastRow; r++) {
2167                            int idx3 = r * twoRowStride;
2168                            for (int c = 0; c < columns; c++) {
2169                                int idx1 = 2 * c;
2170                                for (int s = 0; s < slices; s++) {
2171                                    int idx2 = 2 * s;
2172                                    int idx4 = s * twoSliceStride + idx3 + idx1;
2173                                    temp[idx2] = a[idx4];
2174                                    temp[idx2 + 1] = a[idx4 + 1];
2175                                }
2176                                fftSlices.complexInverse(temp, scale);
2177                                for (int s = 0; s < slices; s++) {
2178                                    int idx2 = 2 * s;
2179                                    int idx4 = s * twoSliceStride + idx3 + idx1;
2180                                    a[idx4] = temp[idx2];
2181                                    a[idx4 + 1] = temp[idx2 + 1];
2182                                }
2183                            }
2184                        }
2185                    }
2186                });
2187            }
2188            ConcurrencyUtils.waitForCompletion(futures);
2189
2190            p = slices / nthreads;
2191            for (int l = 0; l < nthreads; l++) {
2192                final int firstSlice = l * p;
2193                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
2194                futures[l] = ConcurrencyUtils.submit(new Runnable() {
2195                    @Override
2196                                        public void run() {
2197
2198                        for (int s = firstSlice; s < lastSlice; s++) {
2199                            int idx2 = (slices - s) % slices;
2200                            int idx5 = idx2 * twoSliceStride;
2201                            int idx6 = s * twoSliceStride;
2202                            for (int r = 1; r < n2d2; r++) {
2203                                int idx4 = rows - r;
2204                                int idx7 = idx4 * twoRowStride;
2205                                int idx8 = r * twoRowStride;
2206                                int idx9 = idx5 + idx7;
2207                                for (int c = 0; c < columns; c++) {
2208                                    int idx1 = 2 * c;
2209                                    int idx3 = twon3 - idx1;
2210                                    int idx10 = idx6 + idx8 + idx1;
2211                                    a[idx9 + idx3 % twon3] = a[idx10];
2212                                    a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
2213                                }
2214                            }
2215                        }
2216                    }
2217                });
2218            }
2219            ConcurrencyUtils.waitForCompletion(futures);
2220        } else {
2221
2222            for (int s = slices - 1; s >= 0; s--) {
2223                int idx1 = s * sliceStride;
2224                int idx2 = s * twoSliceStride;
2225                for (int r = rows - 1; r >= 0; r--) {
2226                    System.arraycopy(a, idx1 + r * rowStride, temp, 0, columns);
2227                    fftColumns.realInverseFull(temp, scale);
2228                    System.arraycopy(temp, 0, a, idx2 + r * twoRowStride, twon3);
2229                }
2230            }
2231
2232            temp = new double[2 * rows];
2233
2234            for (int s = 0; s < slices; s++) {
2235                int idx1 = s * twoSliceStride;
2236                for (int c = 0; c < columns; c++) {
2237                    int idx2 = 2 * c;
2238                    for (int r = 0; r < rows; r++) {
2239                        int idx4 = 2 * r;
2240                        int idx3 = idx1 + r * twoRowStride + idx2;
2241                        temp[idx4] = a[idx3];
2242                        temp[idx4 + 1] = a[idx3 + 1];
2243                    }
2244                    fftRows.complexInverse(temp, scale);
2245                    for (int r = 0; r < rows; r++) {
2246                        int idx4 = 2 * r;
2247                        int idx3 = idx1 + r * twoRowStride + idx2;
2248                        a[idx3] = temp[idx4];
2249                        a[idx3 + 1] = temp[idx4 + 1];
2250                    }
2251                }
2252            }
2253
2254            temp = new double[2 * slices];
2255
2256            for (int r = 0; r < ldimn2; r++) {
2257                int idx3 = r * twoRowStride;
2258                for (int c = 0; c < columns; c++) {
2259                    int idx1 = 2 * c;
2260                    for (int s = 0; s < slices; s++) {
2261                        int idx2 = 2 * s;
2262                        int idx4 = s * twoSliceStride + idx3 + idx1;
2263                        temp[idx2] = a[idx4];
2264                        temp[idx2 + 1] = a[idx4 + 1];
2265                    }
2266                    fftSlices.complexInverse(temp, scale);
2267                    for (int s = 0; s < slices; s++) {
2268                        int idx2 = 2 * s;
2269                        int idx4 = s * twoSliceStride + idx3 + idx1;
2270                        a[idx4] = temp[idx2];
2271                        a[idx4 + 1] = temp[idx2 + 1];
2272                    }
2273                }
2274            }
2275
2276            for (int s = 0; s < slices; s++) {
2277                int idx2 = (slices - s) % slices;
2278                int idx5 = idx2 * twoSliceStride;
2279                int idx6 = s * twoSliceStride;
2280                for (int r = 1; r < n2d2; r++) {
2281                    int idx4 = rows - r;
2282                    int idx7 = idx4 * twoRowStride;
2283                    int idx8 = r * twoRowStride;
2284                    int idx9 = idx5 + idx7;
2285                    for (int c = 0; c < columns; c++) {
2286                        int idx1 = 2 * c;
2287                        int idx3 = twon3 - idx1;
2288                        int idx10 = idx6 + idx8 + idx1;
2289                        a[idx9 + idx3 % twon3] = a[idx10];
2290                        a[idx9 + (idx3 + 1) % twon3] = -a[idx10 + 1];
2291                    }
2292                }
2293            }
2294
2295        }
2296    }
2297
2298    private void xdft3da_sub1(int icr, int isgn, double[] a, boolean scale) {
2299        int idx0, idx1, idx2, idx3, idx4, idx5;
2300
2301        if (isgn == -1) {
2302            for (int s = 0; s < slices; s++) {
2303                idx0 = s * sliceStride;
2304                if (icr == 0) {
2305                    for (int r = 0; r < rows; r++) {
2306                        fftColumns.complexForward(a, idx0 + r * rowStride);
2307                    }
2308                } else {
2309                    for (int r = 0; r < rows; r++) {
2310                        fftColumns.realForward(a, idx0 + r * rowStride);
2311                    }
2312                }
2313                if (columns > 4) {
2314                    for (int c = 0; c < columns; c += 8) {
2315                        for (int r = 0; r < rows; r++) {
2316                            idx1 = idx0 + r * rowStride + c;
2317                            idx2 = 2 * r;
2318                            idx3 = 2 * rows + 2 * r;
2319                            idx4 = idx3 + 2 * rows;
2320                            idx5 = idx4 + 2 * rows;
2321                            t[idx2] = a[idx1];
2322                            t[idx2 + 1] = a[idx1 + 1];
2323                            t[idx3] = a[idx1 + 2];
2324                            t[idx3 + 1] = a[idx1 + 3];
2325                            t[idx4] = a[idx1 + 4];
2326                            t[idx4 + 1] = a[idx1 + 5];
2327                            t[idx5] = a[idx1 + 6];
2328                            t[idx5 + 1] = a[idx1 + 7];
2329                        }
2330                        fftRows.complexForward(t, 0);
2331                        fftRows.complexForward(t, 2 * rows);
2332                        fftRows.complexForward(t, 4 * rows);
2333                        fftRows.complexForward(t, 6 * rows);
2334                        for (int r = 0; r < rows; r++) {
2335                            idx1 = idx0 + r * rowStride + c;
2336                            idx2 = 2 * r;
2337                            idx3 = 2 * rows + 2 * r;
2338                            idx4 = idx3 + 2 * rows;
2339                            idx5 = idx4 + 2 * rows;
2340                            a[idx1] = t[idx2];
2341                            a[idx1 + 1] = t[idx2 + 1];
2342                            a[idx1 + 2] = t[idx3];
2343                            a[idx1 + 3] = t[idx3 + 1];
2344                            a[idx1 + 4] = t[idx4];
2345                            a[idx1 + 5] = t[idx4 + 1];
2346                            a[idx1 + 6] = t[idx5];
2347                            a[idx1 + 7] = t[idx5 + 1];
2348                        }
2349                    }
2350                } else if (columns == 4) {
2351                    for (int r = 0; r < rows; r++) {
2352                        idx1 = idx0 + r * rowStride;
2353                        idx2 = 2 * r;
2354                        idx3 = 2 * rows + 2 * r;
2355                        t[idx2] = a[idx1];
2356                        t[idx2 + 1] = a[idx1 + 1];
2357                        t[idx3] = a[idx1 + 2];
2358                        t[idx3 + 1] = a[idx1 + 3];
2359                    }
2360                    fftRows.complexForward(t, 0);
2361                    fftRows.complexForward(t, 2 * rows);
2362                    for (int r = 0; r < rows; r++) {
2363                        idx1 = idx0 + r * rowStride;
2364                        idx2 = 2 * r;
2365                        idx3 = 2 * rows + 2 * r;
2366                        a[idx1] = t[idx2];
2367                        a[idx1 + 1] = t[idx2 + 1];
2368                        a[idx1 + 2] = t[idx3];
2369                        a[idx1 + 3] = t[idx3 + 1];
2370                    }
2371                } else if (columns == 2) {
2372                    for (int r = 0; r < rows; r++) {
2373                        idx1 = idx0 + r * rowStride;
2374                        idx2 = 2 * r;
2375                        t[idx2] = a[idx1];
2376                        t[idx2 + 1] = a[idx1 + 1];
2377                    }
2378                    fftRows.complexForward(t, 0);
2379                    for (int r = 0; r < rows; r++) {
2380                        idx1 = idx0 + r * rowStride;
2381                        idx2 = 2 * r;
2382                        a[idx1] = t[idx2];
2383                        a[idx1 + 1] = t[idx2 + 1];
2384                    }
2385                }
2386            }
2387        } else {
2388            for (int s = 0; s < slices; s++) {
2389                idx0 = s * sliceStride;
2390                if (icr == 0) {
2391                    for (int r = 0; r < rows; r++) {
2392                        fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
2393                    }
2394                }
2395                if (columns > 4) {
2396                    for (int c = 0; c < columns; c += 8) {
2397                        for (int r = 0; r < rows; r++) {
2398                            idx1 = idx0 + r * rowStride + c;
2399                            idx2 = 2 * r;
2400                            idx3 = 2 * rows + 2 * r;
2401                            idx4 = idx3 + 2 * rows;
2402                            idx5 = idx4 + 2 * rows;
2403                            t[idx2] = a[idx1];
2404                            t[idx2 + 1] = a[idx1 + 1];
2405                            t[idx3] = a[idx1 + 2];
2406                            t[idx3 + 1] = a[idx1 + 3];
2407                            t[idx4] = a[idx1 + 4];
2408                            t[idx4 + 1] = a[idx1 + 5];
2409                            t[idx5] = a[idx1 + 6];
2410                            t[idx5 + 1] = a[idx1 + 7];
2411                        }
2412                        fftRows.complexInverse(t, 0, scale);
2413                        fftRows.complexInverse(t, 2 * rows, scale);
2414                        fftRows.complexInverse(t, 4 * rows, scale);
2415                        fftRows.complexInverse(t, 6 * rows, scale);
2416                        for (int r = 0; r < rows; r++) {
2417                            idx1 = idx0 + r * rowStride + c;
2418                            idx2 = 2 * r;
2419                            idx3 = 2 * rows + 2 * r;
2420                            idx4 = idx3 + 2 * rows;
2421                            idx5 = idx4 + 2 * rows;
2422                            a[idx1] = t[idx2];
2423                            a[idx1 + 1] = t[idx2 + 1];
2424                            a[idx1 + 2] = t[idx3];
2425                            a[idx1 + 3] = t[idx3 + 1];
2426                            a[idx1 + 4] = t[idx4];
2427                            a[idx1 + 5] = t[idx4 + 1];
2428                            a[idx1 + 6] = t[idx5];
2429                            a[idx1 + 7] = t[idx5 + 1];
2430                        }
2431                    }
2432                } else if (columns == 4) {
2433                    for (int r = 0; r < rows; r++) {
2434                        idx1 = idx0 + r * rowStride;
2435                        idx2 = 2 * r;
2436                        idx3 = 2 * rows + 2 * r;
2437                        t[idx2] = a[idx1];
2438                        t[idx2 + 1] = a[idx1 + 1];
2439                        t[idx3] = a[idx1 + 2];
2440                        t[idx3 + 1] = a[idx1 + 3];
2441                    }
2442                    fftRows.complexInverse(t, 0, scale);
2443                    fftRows.complexInverse(t, 2 * rows, scale);
2444                    for (int r = 0; r < rows; r++) {
2445                        idx1 = idx0 + r * rowStride;
2446                        idx2 = 2 * r;
2447                        idx3 = 2 * rows + 2 * r;
2448                        a[idx1] = t[idx2];
2449                        a[idx1 + 1] = t[idx2 + 1];
2450                        a[idx1 + 2] = t[idx3];
2451                        a[idx1 + 3] = t[idx3 + 1];
2452                    }
2453                } else if (columns == 2) {
2454                    for (int r = 0; r < rows; r++) {
2455                        idx1 = idx0 + r * rowStride;
2456                        idx2 = 2 * r;
2457                        t[idx2] = a[idx1];
2458                        t[idx2 + 1] = a[idx1 + 1];
2459                    }
2460                    fftRows.complexInverse(t, 0, scale);
2461                    for (int r = 0; r < rows; r++) {
2462                        idx1 = idx0 + r * rowStride;
2463                        idx2 = 2 * r;
2464                        a[idx1] = t[idx2];
2465                        a[idx1 + 1] = t[idx2 + 1];
2466                    }
2467                }
2468                if (icr != 0) {
2469                    for (int r = 0; r < rows; r++) {
2470                        fftColumns.realInverse(a, idx0 + r * rowStride, scale);
2471                    }
2472                }
2473            }
2474        }
2475    }
2476
2477    private void xdft3da_sub2(int icr, int isgn, double[] a, boolean scale) {
2478        int idx0, idx1, idx2, idx3, idx4, idx5;
2479
2480        if (isgn == -1) {
2481            for (int s = 0; s < slices; s++) {
2482                idx0 = s * sliceStride;
2483                if (icr == 0) {
2484                    for (int r = 0; r < rows; r++) {
2485                        fftColumns.complexForward(a, idx0 + r * rowStride);
2486                    }
2487                } else {
2488                    for (int r = 0; r < rows; r++) {
2489                        fftColumns.realForward(a, idx0 + r * rowStride);
2490                    }
2491                }
2492                if (columns > 4) {
2493                    for (int c = 0; c < columns; c += 8) {
2494                        for (int r = 0; r < rows; r++) {
2495                            idx1 = idx0 + r * rowStride + c;
2496                            idx2 = 2 * r;
2497                            idx3 = 2 * rows + 2 * r;
2498                            idx4 = idx3 + 2 * rows;
2499                            idx5 = idx4 + 2 * rows;
2500                            t[idx2] = a[idx1];
2501                            t[idx2 + 1] = a[idx1 + 1];
2502                            t[idx3] = a[idx1 + 2];
2503                            t[idx3 + 1] = a[idx1 + 3];
2504                            t[idx4] = a[idx1 + 4];
2505                            t[idx4 + 1] = a[idx1 + 5];
2506                            t[idx5] = a[idx1 + 6];
2507                            t[idx5 + 1] = a[idx1 + 7];
2508                        }
2509                        fftRows.complexForward(t, 0);
2510                        fftRows.complexForward(t, 2 * rows);
2511                        fftRows.complexForward(t, 4 * rows);
2512                        fftRows.complexForward(t, 6 * rows);
2513                        for (int r = 0; r < rows; r++) {
2514                            idx1 = idx0 + r * rowStride + c;
2515                            idx2 = 2 * r;
2516                            idx3 = 2 * rows + 2 * r;
2517                            idx4 = idx3 + 2 * rows;
2518                            idx5 = idx4 + 2 * rows;
2519                            a[idx1] = t[idx2];
2520                            a[idx1 + 1] = t[idx2 + 1];
2521                            a[idx1 + 2] = t[idx3];
2522                            a[idx1 + 3] = t[idx3 + 1];
2523                            a[idx1 + 4] = t[idx4];
2524                            a[idx1 + 5] = t[idx4 + 1];
2525                            a[idx1 + 6] = t[idx5];
2526                            a[idx1 + 7] = t[idx5 + 1];
2527                        }
2528                    }
2529                } else if (columns == 4) {
2530                    for (int r = 0; r < rows; r++) {
2531                        idx1 = idx0 + r * rowStride;
2532                        idx2 = 2 * r;
2533                        idx3 = 2 * rows + 2 * r;
2534                        t[idx2] = a[idx1];
2535                        t[idx2 + 1] = a[idx1 + 1];
2536                        t[idx3] = a[idx1 + 2];
2537                        t[idx3 + 1] = a[idx1 + 3];
2538                    }
2539                    fftRows.complexForward(t, 0);
2540                    fftRows.complexForward(t, 2 * rows);
2541                    for (int r = 0; r < rows; r++) {
2542                        idx1 = idx0 + r * rowStride;
2543                        idx2 = 2 * r;
2544                        idx3 = 2 * rows + 2 * r;
2545                        a[idx1] = t[idx2];
2546                        a[idx1 + 1] = t[idx2 + 1];
2547                        a[idx1 + 2] = t[idx3];
2548                        a[idx1 + 3] = t[idx3 + 1];
2549                    }
2550                } else if (columns == 2) {
2551                    for (int r = 0; r < rows; r++) {
2552                        idx1 = idx0 + r * rowStride;
2553                        idx2 = 2 * r;
2554                        t[idx2] = a[idx1];
2555                        t[idx2 + 1] = a[idx1 + 1];
2556                    }
2557                    fftRows.complexForward(t, 0);
2558                    for (int r = 0; r < rows; r++) {
2559                        idx1 = idx0 + r * rowStride;
2560                        idx2 = 2 * r;
2561                        a[idx1] = t[idx2];
2562                        a[idx1 + 1] = t[idx2 + 1];
2563                    }
2564                }
2565            }
2566        } else {
2567            for (int s = 0; s < slices; s++) {
2568                idx0 = s * sliceStride;
2569                if (icr == 0) {
2570                    for (int r = 0; r < rows; r++) {
2571                        fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
2572                    }
2573                } else {
2574                    for (int r = 0; r < rows; r++) {
2575                        fftColumns.realInverse2(a, idx0 + r * rowStride, scale);
2576                    }
2577                }
2578                if (columns > 4) {
2579                    for (int c = 0; c < columns; c += 8) {
2580                        for (int r = 0; r < rows; r++) {
2581                            idx1 = idx0 + r * rowStride + c;
2582                            idx2 = 2 * r;
2583                            idx3 = 2 * rows + 2 * r;
2584                            idx4 = idx3 + 2 * rows;
2585                            idx5 = idx4 + 2 * rows;
2586                            t[idx2] = a[idx1];
2587                            t[idx2 + 1] = a[idx1 + 1];
2588                            t[idx3] = a[idx1 + 2];
2589                            t[idx3 + 1] = a[idx1 + 3];
2590                            t[idx4] = a[idx1 + 4];
2591                            t[idx4 + 1] = a[idx1 + 5];
2592                            t[idx5] = a[idx1 + 6];
2593                            t[idx5 + 1] = a[idx1 + 7];
2594                        }
2595                        fftRows.complexInverse(t, 0, scale);
2596                        fftRows.complexInverse(t, 2 * rows, scale);
2597                        fftRows.complexInverse(t, 4 * rows, scale);
2598                        fftRows.complexInverse(t, 6 * rows, scale);
2599                        for (int r = 0; r < rows; r++) {
2600                            idx1 = idx0 + r * rowStride + c;
2601                            idx2 = 2 * r;
2602                            idx3 = 2 * rows + 2 * r;
2603                            idx4 = idx3 + 2 * rows;
2604                            idx5 = idx4 + 2 * rows;
2605                            a[idx1] = t[idx2];
2606                            a[idx1 + 1] = t[idx2 + 1];
2607                            a[idx1 + 2] = t[idx3];
2608                            a[idx1 + 3] = t[idx3 + 1];
2609                            a[idx1 + 4] = t[idx4];
2610                            a[idx1 + 5] = t[idx4 + 1];
2611                            a[idx1 + 6] = t[idx5];
2612                            a[idx1 + 7] = t[idx5 + 1];
2613                        }
2614                    }
2615                } else if (columns == 4) {
2616                    for (int r = 0; r < rows; r++) {
2617                        idx1 = idx0 + r * rowStride;
2618                        idx2 = 2 * r;
2619                        idx3 = 2 * rows + 2 * r;
2620                        t[idx2] = a[idx1];
2621                        t[idx2 + 1] = a[idx1 + 1];
2622                        t[idx3] = a[idx1 + 2];
2623                        t[idx3 + 1] = a[idx1 + 3];
2624                    }
2625                    fftRows.complexInverse(t, 0, scale);
2626                    fftRows.complexInverse(t, 2 * rows, scale);
2627                    for (int r = 0; r < rows; r++) {
2628                        idx1 = idx0 + r * rowStride;
2629                        idx2 = 2 * r;
2630                        idx3 = 2 * rows + 2 * r;
2631                        a[idx1] = t[idx2];
2632                        a[idx1 + 1] = t[idx2 + 1];
2633                        a[idx1 + 2] = t[idx3];
2634                        a[idx1 + 3] = t[idx3 + 1];
2635                    }
2636                } else if (columns == 2) {
2637                    for (int r = 0; r < rows; r++) {
2638                        idx1 = idx0 + r * rowStride;
2639                        idx2 = 2 * r;
2640                        t[idx2] = a[idx1];
2641                        t[idx2 + 1] = a[idx1 + 1];
2642                    }
2643                    fftRows.complexInverse(t, 0, scale);
2644                    for (int r = 0; r < rows; r++) {
2645                        idx1 = idx0 + r * rowStride;
2646                        idx2 = 2 * r;
2647                        a[idx1] = t[idx2];
2648                        a[idx1 + 1] = t[idx2 + 1];
2649                    }
2650                }
2651            }
2652        }
2653    }
2654
2655    private void xdft3da_sub1(int icr, int isgn, double[][][] a, boolean scale) {
2656        int idx2, idx3, idx4, idx5;
2657
2658        if (isgn == -1) {
2659            for (int s = 0; s < slices; s++) {
2660                if (icr == 0) {
2661                    for (int r = 0; r < rows; r++) {
2662                        fftColumns.complexForward(a[s][r]);
2663                    }
2664                } else {
2665                    for (int r = 0; r < rows; r++) {
2666                        fftColumns.realForward(a[s][r], 0);
2667                    }
2668                }
2669                if (columns > 4) {
2670                    for (int c = 0; c < columns; c += 8) {
2671                        for (int r = 0; r < rows; r++) {
2672                            idx2 = 2 * r;
2673                            idx3 = 2 * rows + 2 * r;
2674                            idx4 = idx3 + 2 * rows;
2675                            idx5 = idx4 + 2 * rows;
2676                            t[idx2] = a[s][r][c];
2677                            t[idx2 + 1] = a[s][r][c + 1];
2678                            t[idx3] = a[s][r][c + 2];
2679                            t[idx3 + 1] = a[s][r][c + 3];
2680                            t[idx4] = a[s][r][c + 4];
2681                            t[idx4 + 1] = a[s][r][c + 5];
2682                            t[idx5] = a[s][r][c + 6];
2683                            t[idx5 + 1] = a[s][r][c + 7];
2684                        }
2685                        fftRows.complexForward(t, 0);
2686                        fftRows.complexForward(t, 2 * rows);
2687                        fftRows.complexForward(t, 4 * rows);
2688                        fftRows.complexForward(t, 6 * rows);
2689                        for (int r = 0; r < rows; r++) {
2690                            idx2 = 2 * r;
2691                            idx3 = 2 * rows + 2 * r;
2692                            idx4 = idx3 + 2 * rows;
2693                            idx5 = idx4 + 2 * rows;
2694                            a[s][r][c] = t[idx2];
2695                            a[s][r][c + 1] = t[idx2 + 1];
2696                            a[s][r][c + 2] = t[idx3];
2697                            a[s][r][c + 3] = t[idx3 + 1];
2698                            a[s][r][c + 4] = t[idx4];
2699                            a[s][r][c + 5] = t[idx4 + 1];
2700                            a[s][r][c + 6] = t[idx5];
2701                            a[s][r][c + 7] = t[idx5 + 1];
2702                        }
2703                    }
2704                } else if (columns == 4) {
2705                    for (int r = 0; r < rows; r++) {
2706                        idx2 = 2 * r;
2707                        idx3 = 2 * rows + 2 * r;
2708                        t[idx2] = a[s][r][0];
2709                        t[idx2 + 1] = a[s][r][1];
2710                        t[idx3] = a[s][r][2];
2711                        t[idx3 + 1] = a[s][r][3];
2712                    }
2713                    fftRows.complexForward(t, 0);
2714                    fftRows.complexForward(t, 2 * rows);
2715                    for (int r = 0; r < rows; r++) {
2716                        idx2 = 2 * r;
2717                        idx3 = 2 * rows + 2 * r;
2718                        a[s][r][0] = t[idx2];
2719                        a[s][r][1] = t[idx2 + 1];
2720                        a[s][r][2] = t[idx3];
2721                        a[s][r][3] = t[idx3 + 1];
2722                    }
2723                } else if (columns == 2) {
2724                    for (int r = 0; r < rows; r++) {
2725                        idx2 = 2 * r;
2726                        t[idx2] = a[s][r][0];
2727                        t[idx2 + 1] = a[s][r][1];
2728                    }
2729                    fftRows.complexForward(t, 0);
2730                    for (int r = 0; r < rows; r++) {
2731                        idx2 = 2 * r;
2732                        a[s][r][0] = t[idx2];
2733                        a[s][r][1] = t[idx2 + 1];
2734                    }
2735                }
2736            }
2737        } else {
2738            for (int s = 0; s < slices; s++) {
2739                if (icr == 0) {
2740                    for (int r = 0; r < rows; r++) {
2741                        fftColumns.complexInverse(a[s][r], scale);
2742                    }
2743                }
2744                if (columns > 4) {
2745                    for (int c = 0; c < columns; c += 8) {
2746                        for (int r = 0; r < rows; r++) {
2747                            idx2 = 2 * r;
2748                            idx3 = 2 * rows + 2 * r;
2749                            idx4 = idx3 + 2 * rows;
2750                            idx5 = idx4 + 2 * rows;
2751                            t[idx2] = a[s][r][c];
2752                            t[idx2 + 1] = a[s][r][c + 1];
2753                            t[idx3] = a[s][r][c + 2];
2754                            t[idx3 + 1] = a[s][r][c + 3];
2755                            t[idx4] = a[s][r][c + 4];
2756                            t[idx4 + 1] = a[s][r][c + 5];
2757                            t[idx5] = a[s][r][c + 6];
2758                            t[idx5 + 1] = a[s][r][c + 7];
2759                        }
2760                        fftRows.complexInverse(t, 0, scale);
2761                        fftRows.complexInverse(t, 2 * rows, scale);
2762                        fftRows.complexInverse(t, 4 * rows, scale);
2763                        fftRows.complexInverse(t, 6 * rows, scale);
2764                        for (int r = 0; r < rows; r++) {
2765                            idx2 = 2 * r;
2766                            idx3 = 2 * rows + 2 * r;
2767                            idx4 = idx3 + 2 * rows;
2768                            idx5 = idx4 + 2 * rows;
2769                            a[s][r][c] = t[idx2];
2770                            a[s][r][c + 1] = t[idx2 + 1];
2771                            a[s][r][c + 2] = t[idx3];
2772                            a[s][r][c + 3] = t[idx3 + 1];
2773                            a[s][r][c + 4] = t[idx4];
2774                            a[s][r][c + 5] = t[idx4 + 1];
2775                            a[s][r][c + 6] = t[idx5];
2776                            a[s][r][c + 7] = t[idx5 + 1];
2777                        }
2778                    }
2779                } else if (columns == 4) {
2780                    for (int r = 0; r < rows; r++) {
2781                        idx2 = 2 * r;
2782                        idx3 = 2 * rows + 2 * r;
2783                        t[idx2] = a[s][r][0];
2784                        t[idx2 + 1] = a[s][r][1];
2785                        t[idx3] = a[s][r][2];
2786                        t[idx3 + 1] = a[s][r][3];
2787                    }
2788                    fftRows.complexInverse(t, 0, scale);
2789                    fftRows.complexInverse(t, 2 * rows, scale);
2790                    for (int r = 0; r < rows; r++) {
2791                        idx2 = 2 * r;
2792                        idx3 = 2 * rows + 2 * r;
2793                        a[s][r][0] = t[idx2];
2794                        a[s][r][1] = t[idx2 + 1];
2795                        a[s][r][2] = t[idx3];
2796                        a[s][r][3] = t[idx3 + 1];
2797                    }
2798                } else if (columns == 2) {
2799                    for (int r = 0; r < rows; r++) {
2800                        idx2 = 2 * r;
2801                        t[idx2] = a[s][r][0];
2802                        t[idx2 + 1] = a[s][r][1];
2803                    }
2804                    fftRows.complexInverse(t, 0, scale);
2805                    for (int r = 0; r < rows; r++) {
2806                        idx2 = 2 * r;
2807                        a[s][r][0] = t[idx2];
2808                        a[s][r][1] = t[idx2 + 1];
2809                    }
2810                }
2811                if (icr != 0) {
2812                    for (int r = 0; r < rows; r++) {
2813                        fftColumns.realInverse(a[s][r], scale);
2814                    }
2815                }
2816            }
2817        }
2818    }
2819
2820    private void xdft3da_sub2(int icr, int isgn, double[][][] a, boolean scale) {
2821        int idx2, idx3, idx4, idx5;
2822
2823        if (isgn == -1) {
2824            for (int s = 0; s < slices; s++) {
2825                if (icr == 0) {
2826                    for (int r = 0; r < rows; r++) {
2827                        fftColumns.complexForward(a[s][r]);
2828                    }
2829                } else {
2830                    for (int r = 0; r < rows; r++) {
2831                        fftColumns.realForward(a[s][r]);
2832                    }
2833                }
2834                if (columns > 4) {
2835                    for (int c = 0; c < columns; c += 8) {
2836                        for (int r = 0; r < rows; r++) {
2837                            idx2 = 2 * r;
2838                            idx3 = 2 * rows + 2 * r;
2839                            idx4 = idx3 + 2 * rows;
2840                            idx5 = idx4 + 2 * rows;
2841                            t[idx2] = a[s][r][c];
2842                            t[idx2 + 1] = a[s][r][c + 1];
2843                            t[idx3] = a[s][r][c + 2];
2844                            t[idx3 + 1] = a[s][r][c + 3];
2845                            t[idx4] = a[s][r][c + 4];
2846                            t[idx4 + 1] = a[s][r][c + 5];
2847                            t[idx5] = a[s][r][c + 6];
2848                            t[idx5 + 1] = a[s][r][c + 7];
2849                        }
2850                        fftRows.complexForward(t, 0);
2851                        fftRows.complexForward(t, 2 * rows);
2852                        fftRows.complexForward(t, 4 * rows);
2853                        fftRows.complexForward(t, 6 * rows);
2854                        for (int r = 0; r < rows; r++) {
2855                            idx2 = 2 * r;
2856                            idx3 = 2 * rows + 2 * r;
2857                            idx4 = idx3 + 2 * rows;
2858                            idx5 = idx4 + 2 * rows;
2859                            a[s][r][c] = t[idx2];
2860                            a[s][r][c + 1] = t[idx2 + 1];
2861                            a[s][r][c + 2] = t[idx3];
2862                            a[s][r][c + 3] = t[idx3 + 1];
2863                            a[s][r][c + 4] = t[idx4];
2864                            a[s][r][c + 5] = t[idx4 + 1];
2865                            a[s][r][c + 6] = t[idx5];
2866                            a[s][r][c + 7] = t[idx5 + 1];
2867                        }
2868                    }
2869                } else if (columns == 4) {
2870                    for (int r = 0; r < rows; r++) {
2871                        idx2 = 2 * r;
2872                        idx3 = 2 * rows + 2 * r;
2873                        t[idx2] = a[s][r][0];
2874                        t[idx2 + 1] = a[s][r][1];
2875                        t[idx3] = a[s][r][2];
2876                        t[idx3 + 1] = a[s][r][3];
2877                    }
2878                    fftRows.complexForward(t, 0);
2879                    fftRows.complexForward(t, 2 * rows);
2880                    for (int r = 0; r < rows; r++) {
2881                        idx2 = 2 * r;
2882                        idx3 = 2 * rows + 2 * r;
2883                        a[s][r][0] = t[idx2];
2884                        a[s][r][1] = t[idx2 + 1];
2885                        a[s][r][2] = t[idx3];
2886                        a[s][r][3] = t[idx3 + 1];
2887                    }
2888                } else if (columns == 2) {
2889                    for (int r = 0; r < rows; r++) {
2890                        idx2 = 2 * r;
2891                        t[idx2] = a[s][r][0];
2892                        t[idx2 + 1] = a[s][r][1];
2893                    }
2894                    fftRows.complexForward(t, 0);
2895                    for (int r = 0; r < rows; r++) {
2896                        idx2 = 2 * r;
2897                        a[s][r][0] = t[idx2];
2898                        a[s][r][1] = t[idx2 + 1];
2899                    }
2900                }
2901            }
2902        } else {
2903            for (int s = 0; s < slices; s++) {
2904                if (icr == 0) {
2905                    for (int r = 0; r < rows; r++) {
2906                        fftColumns.complexInverse(a[s][r], scale);
2907                    }
2908                } else {
2909                    for (int r = 0; r < rows; r++) {
2910                        fftColumns.realInverse2(a[s][r], 0, scale);
2911                    }
2912                }
2913                if (columns > 4) {
2914                    for (int c = 0; c < columns; c += 8) {
2915                        for (int r = 0; r < rows; r++) {
2916                            idx2 = 2 * r;
2917                            idx3 = 2 * rows + 2 * r;
2918                            idx4 = idx3 + 2 * rows;
2919                            idx5 = idx4 + 2 * rows;
2920                            t[idx2] = a[s][r][c];
2921                            t[idx2 + 1] = a[s][r][c + 1];
2922                            t[idx3] = a[s][r][c + 2];
2923                            t[idx3 + 1] = a[s][r][c + 3];
2924                            t[idx4] = a[s][r][c + 4];
2925                            t[idx4 + 1] = a[s][r][c + 5];
2926                            t[idx5] = a[s][r][c + 6];
2927                            t[idx5 + 1] = a[s][r][c + 7];
2928                        }
2929                        fftRows.complexInverse(t, 0, scale);
2930                        fftRows.complexInverse(t, 2 * rows, scale);
2931                        fftRows.complexInverse(t, 4 * rows, scale);
2932                        fftRows.complexInverse(t, 6 * rows, scale);
2933                        for (int r = 0; r < rows; r++) {
2934                            idx2 = 2 * r;
2935                            idx3 = 2 * rows + 2 * r;
2936                            idx4 = idx3 + 2 * rows;
2937                            idx5 = idx4 + 2 * rows;
2938                            a[s][r][c] = t[idx2];
2939                            a[s][r][c + 1] = t[idx2 + 1];
2940                            a[s][r][c + 2] = t[idx3];
2941                            a[s][r][c + 3] = t[idx3 + 1];
2942                            a[s][r][c + 4] = t[idx4];
2943                            a[s][r][c + 5] = t[idx4 + 1];
2944                            a[s][r][c + 6] = t[idx5];
2945                            a[s][r][c + 7] = t[idx5 + 1];
2946                        }
2947                    }
2948                } else if (columns == 4) {
2949                    for (int r = 0; r < rows; r++) {
2950                        idx2 = 2 * r;
2951                        idx3 = 2 * rows + 2 * r;
2952                        t[idx2] = a[s][r][0];
2953                        t[idx2 + 1] = a[s][r][1];
2954                        t[idx3] = a[s][r][2];
2955                        t[idx3 + 1] = a[s][r][3];
2956                    }
2957                    fftRows.complexInverse(t, 0, scale);
2958                    fftRows.complexInverse(t, 2 * rows, scale);
2959                    for (int r = 0; r < rows; r++) {
2960                        idx2 = 2 * r;
2961                        idx3 = 2 * rows + 2 * r;
2962                        a[s][r][0] = t[idx2];
2963                        a[s][r][1] = t[idx2 + 1];
2964                        a[s][r][2] = t[idx3];
2965                        a[s][r][3] = t[idx3 + 1];
2966                    }
2967                } else if (columns == 2) {
2968                    for (int r = 0; r < rows; r++) {
2969                        idx2 = 2 * r;
2970                        t[idx2] = a[s][r][0];
2971                        t[idx2 + 1] = a[s][r][1];
2972                    }
2973                    fftRows.complexInverse(t, 0, scale);
2974                    for (int r = 0; r < rows; r++) {
2975                        idx2 = 2 * r;
2976                        a[s][r][0] = t[idx2];
2977                        a[s][r][1] = t[idx2 + 1];
2978                    }
2979                }
2980            }
2981        }
2982    }
2983
2984    private void cdft3db_sub(int isgn, double[] a, boolean scale) {
2985        int idx0, idx1, idx2, idx3, idx4, idx5;
2986
2987        if (isgn == -1) {
2988            if (columns > 4) {
2989                for (int r = 0; r < rows; r++) {
2990                    idx0 = r * rowStride;
2991                    for (int c = 0; c < columns; c += 8) {
2992                        for (int s = 0; s < slices; s++) {
2993                            idx1 = s * sliceStride + idx0 + c;
2994                            idx2 = 2 * s;
2995                            idx3 = 2 * slices + 2 * s;
2996                            idx4 = idx3 + 2 * slices;
2997                            idx5 = idx4 + 2 * slices;
2998                            t[idx2] = a[idx1];
2999                            t[idx2 + 1] = a[idx1 + 1];
3000                            t[idx3] = a[idx1 + 2];
3001                            t[idx3 + 1] = a[idx1 + 3];
3002                            t[idx4] = a[idx1 + 4];
3003                            t[idx4 + 1] = a[idx1 + 5];
3004                            t[idx5] = a[idx1 + 6];
3005                            t[idx5 + 1] = a[idx1 + 7];
3006                        }
3007                        fftSlices.complexForward(t, 0);
3008                        fftSlices.complexForward(t, 2 * slices);
3009                        fftSlices.complexForward(t, 4 * slices);
3010                        fftSlices.complexForward(t, 6 * slices);
3011                        for (int s = 0; s < slices; s++) {
3012                            idx1 = s * sliceStride + idx0 + c;
3013                            idx2 = 2 * s;
3014                            idx3 = 2 * slices + 2 * s;
3015                            idx4 = idx3 + 2 * slices;
3016                            idx5 = idx4 + 2 * slices;
3017                            a[idx1] = t[idx2];
3018                            a[idx1 + 1] = t[idx2 + 1];
3019                            a[idx1 + 2] = t[idx3];
3020                            a[idx1 + 3] = t[idx3 + 1];
3021                            a[idx1 + 4] = t[idx4];
3022                            a[idx1 + 5] = t[idx4 + 1];
3023                            a[idx1 + 6] = t[idx5];
3024                            a[idx1 + 7] = t[idx5 + 1];
3025                        }
3026                    }
3027                }
3028            } else if (columns == 4) {
3029                for (int r = 0; r < rows; r++) {
3030                    idx0 = r * rowStride;
3031                    for (int s = 0; s < slices; s++) {
3032                        idx1 = s * sliceStride + idx0;
3033                        idx2 = 2 * s;
3034                        idx3 = 2 * slices + 2 * s;
3035                        t[idx2] = a[idx1];
3036                        t[idx2 + 1] = a[idx1 + 1];
3037                        t[idx3] = a[idx1 + 2];
3038                        t[idx3 + 1] = a[idx1 + 3];
3039                    }
3040                    fftSlices.complexForward(t, 0);
3041                    fftSlices.complexForward(t, 2 * slices);
3042                    for (int s = 0; s < slices; s++) {
3043                        idx1 = s * sliceStride + idx0;
3044                        idx2 = 2 * s;
3045                        idx3 = 2 * slices + 2 * s;
3046                        a[idx1] = t[idx2];
3047                        a[idx1 + 1] = t[idx2 + 1];
3048                        a[idx1 + 2] = t[idx3];
3049                        a[idx1 + 3] = t[idx3 + 1];
3050                    }
3051                }
3052            } else if (columns == 2) {
3053                for (int r = 0; r < rows; r++) {
3054                    idx0 = r * rowStride;
3055                    for (int s = 0; s < slices; s++) {
3056                        idx1 = s * sliceStride + idx0;
3057                        idx2 = 2 * s;
3058                        t[idx2] = a[idx1];
3059                        t[idx2 + 1] = a[idx1 + 1];
3060                    }
3061                    fftSlices.complexForward(t, 0);
3062                    for (int s = 0; s < slices; s++) {
3063                        idx1 = s * sliceStride + idx0;
3064                        idx2 = 2 * s;
3065                        a[idx1] = t[idx2];
3066                        a[idx1 + 1] = t[idx2 + 1];
3067                    }
3068                }
3069            }
3070        } else {
3071            if (columns > 4) {
3072                for (int r = 0; r < rows; r++) {
3073                    idx0 = r * rowStride;
3074                    for (int c = 0; c < columns; c += 8) {
3075                        for (int s = 0; s < slices; s++) {
3076                            idx1 = s * sliceStride + idx0 + c;
3077                            idx2 = 2 * s;
3078                            idx3 = 2 * slices + 2 * s;
3079                            idx4 = idx3 + 2 * slices;
3080                            idx5 = idx4 + 2 * slices;
3081                            t[idx2] = a[idx1];
3082                            t[idx2 + 1] = a[idx1 + 1];
3083                            t[idx3] = a[idx1 + 2];
3084                            t[idx3 + 1] = a[idx1 + 3];
3085                            t[idx4] = a[idx1 + 4];
3086                            t[idx4 + 1] = a[idx1 + 5];
3087                            t[idx5] = a[idx1 + 6];
3088                            t[idx5 + 1] = a[idx1 + 7];
3089                        }
3090                        fftSlices.complexInverse(t, 0, scale);
3091                        fftSlices.complexInverse(t, 2 * slices, scale);
3092                        fftSlices.complexInverse(t, 4 * slices, scale);
3093                        fftSlices.complexInverse(t, 6 * slices, scale);
3094                        for (int s = 0; s < slices; s++) {
3095                            idx1 = s * sliceStride + idx0 + c;
3096                            idx2 = 2 * s;
3097                            idx3 = 2 * slices + 2 * s;
3098                            idx4 = idx3 + 2 * slices;
3099                            idx5 = idx4 + 2 * slices;
3100                            a[idx1] = t[idx2];
3101                            a[idx1 + 1] = t[idx2 + 1];
3102                            a[idx1 + 2] = t[idx3];
3103                            a[idx1 + 3] = t[idx3 + 1];
3104                            a[idx1 + 4] = t[idx4];
3105                            a[idx1 + 5] = t[idx4 + 1];
3106                            a[idx1 + 6] = t[idx5];
3107                            a[idx1 + 7] = t[idx5 + 1];
3108                        }
3109                    }
3110                }
3111            } else if (columns == 4) {
3112                for (int r = 0; r < rows; r++) {
3113                    idx0 = r * rowStride;
3114                    for (int s = 0; s < slices; s++) {
3115                        idx1 = s * sliceStride + idx0;
3116                        idx2 = 2 * s;
3117                        idx3 = 2 * slices + 2 * s;
3118                        t[idx2] = a[idx1];
3119                        t[idx2 + 1] = a[idx1 + 1];
3120                        t[idx3] = a[idx1 + 2];
3121                        t[idx3 + 1] = a[idx1 + 3];
3122                    }
3123                    fftSlices.complexInverse(t, 0, scale);
3124                    fftSlices.complexInverse(t, 2 * slices, scale);
3125                    for (int s = 0; s < slices; s++) {
3126                        idx1 = s * sliceStride + idx0;
3127                        idx2 = 2 * s;
3128                        idx3 = 2 * slices + 2 * s;
3129                        a[idx1] = t[idx2];
3130                        a[idx1 + 1] = t[idx2 + 1];
3131                        a[idx1 + 2] = t[idx3];
3132                        a[idx1 + 3] = t[idx3 + 1];
3133                    }
3134                }
3135            } else if (columns == 2) {
3136                for (int r = 0; r < rows; r++) {
3137                    idx0 = r * rowStride;
3138                    for (int s = 0; s < slices; s++) {
3139                        idx1 = s * sliceStride + idx0;
3140                        idx2 = 2 * s;
3141                        t[idx2] = a[idx1];
3142                        t[idx2 + 1] = a[idx1 + 1];
3143                    }
3144                    fftSlices.complexInverse(t, 0, scale);
3145                    for (int s = 0; s < slices; s++) {
3146                        idx1 = s * sliceStride + idx0;
3147                        idx2 = 2 * s;
3148                        a[idx1] = t[idx2];
3149                        a[idx1 + 1] = t[idx2 + 1];
3150                    }
3151                }
3152            }
3153        }
3154    }
3155
3156    private void cdft3db_sub(int isgn, double[][][] a, boolean scale) {
3157        int idx2, idx3, idx4, idx5;
3158
3159        if (isgn == -1) {
3160            if (columns > 4) {
3161                for (int r = 0; r < rows; r++) {
3162                    for (int c = 0; c < columns; c += 8) {
3163                        for (int s = 0; s < slices; s++) {
3164                            idx2 = 2 * s;
3165                            idx3 = 2 * slices + 2 * s;
3166                            idx4 = idx3 + 2 * slices;
3167                            idx5 = idx4 + 2 * slices;
3168                            t[idx2] = a[s][r][c];
3169                            t[idx2 + 1] = a[s][r][c + 1];
3170                            t[idx3] = a[s][r][c + 2];
3171                            t[idx3 + 1] = a[s][r][c + 3];
3172                            t[idx4] = a[s][r][c + 4];
3173                            t[idx4 + 1] = a[s][r][c + 5];
3174                            t[idx5] = a[s][r][c + 6];
3175                            t[idx5 + 1] = a[s][r][c + 7];
3176                        }
3177                        fftSlices.complexForward(t, 0);
3178                        fftSlices.complexForward(t, 2 * slices);
3179                        fftSlices.complexForward(t, 4 * slices);
3180                        fftSlices.complexForward(t, 6 * slices);
3181                        for (int s = 0; s < slices; s++) {
3182                            idx2 = 2 * s;
3183                            idx3 = 2 * slices + 2 * s;
3184                            idx4 = idx3 + 2 * slices;
3185                            idx5 = idx4 + 2 * slices;
3186                            a[s][r][c] = t[idx2];
3187                            a[s][r][c + 1] = t[idx2 + 1];
3188                            a[s][r][c + 2] = t[idx3];
3189                            a[s][r][c + 3] = t[idx3 + 1];
3190                            a[s][r][c + 4] = t[idx4];
3191                            a[s][r][c + 5] = t[idx4 + 1];
3192                            a[s][r][c + 6] = t[idx5];
3193                            a[s][r][c + 7] = t[idx5 + 1];
3194                        }
3195                    }
3196                }
3197            } else if (columns == 4) {
3198                for (int r = 0; r < rows; r++) {
3199                    for (int s = 0; s < slices; s++) {
3200                        idx2 = 2 * s;
3201                        idx3 = 2 * slices + 2 * s;
3202                        t[idx2] = a[s][r][0];
3203                        t[idx2 + 1] = a[s][r][1];
3204                        t[idx3] = a[s][r][2];
3205                        t[idx3 + 1] = a[s][r][3];
3206                    }
3207                    fftSlices.complexForward(t, 0);
3208                    fftSlices.complexForward(t, 2 * slices);
3209                    for (int s = 0; s < slices; s++) {
3210                        idx2 = 2 * s;
3211                        idx3 = 2 * slices + 2 * s;
3212                        a[s][r][0] = t[idx2];
3213                        a[s][r][1] = t[idx2 + 1];
3214                        a[s][r][2] = t[idx3];
3215                        a[s][r][3] = t[idx3 + 1];
3216                    }
3217                }
3218            } else if (columns == 2) {
3219                for (int r = 0; r < rows; r++) {
3220                    for (int s = 0; s < slices; s++) {
3221                        idx2 = 2 * s;
3222                        t[idx2] = a[s][r][0];
3223                        t[idx2 + 1] = a[s][r][1];
3224                    }
3225                    fftSlices.complexForward(t, 0);
3226                    for (int s = 0; s < slices; s++) {
3227                        idx2 = 2 * s;
3228                        a[s][r][0] = t[idx2];
3229                        a[s][r][1] = t[idx2 + 1];
3230                    }
3231                }
3232            }
3233        } else {
3234            if (columns > 4) {
3235                for (int r = 0; r < rows; r++) {
3236                    for (int c = 0; c < columns; c += 8) {
3237                        for (int s = 0; s < slices; s++) {
3238                            idx2 = 2 * s;
3239                            idx3 = 2 * slices + 2 * s;
3240                            idx4 = idx3 + 2 * slices;
3241                            idx5 = idx4 + 2 * slices;
3242                            t[idx2] = a[s][r][c];
3243                            t[idx2 + 1] = a[s][r][c + 1];
3244                            t[idx3] = a[s][r][c + 2];
3245                            t[idx3 + 1] = a[s][r][c + 3];
3246                            t[idx4] = a[s][r][c + 4];
3247                            t[idx4 + 1] = a[s][r][c + 5];
3248                            t[idx5] = a[s][r][c + 6];
3249                            t[idx5 + 1] = a[s][r][c + 7];
3250                        }
3251                        fftSlices.complexInverse(t, 0, scale);
3252                        fftSlices.complexInverse(t, 2 * slices, scale);
3253                        fftSlices.complexInverse(t, 4 * slices, scale);
3254                        fftSlices.complexInverse(t, 6 * slices, scale);
3255                        for (int s = 0; s < slices; s++) {
3256                            idx2 = 2 * s;
3257                            idx3 = 2 * slices + 2 * s;
3258                            idx4 = idx3 + 2 * slices;
3259                            idx5 = idx4 + 2 * slices;
3260                            a[s][r][c] = t[idx2];
3261                            a[s][r][c + 1] = t[idx2 + 1];
3262                            a[s][r][c + 2] = t[idx3];
3263                            a[s][r][c + 3] = t[idx3 + 1];
3264                            a[s][r][c + 4] = t[idx4];
3265                            a[s][r][c + 5] = t[idx4 + 1];
3266                            a[s][r][c + 6] = t[idx5];
3267                            a[s][r][c + 7] = t[idx5 + 1];
3268                        }
3269                    }
3270                }
3271            } else if (columns == 4) {
3272                for (int r = 0; r < rows; r++) {
3273                    for (int s = 0; s < slices; s++) {
3274                        idx2 = 2 * s;
3275                        idx3 = 2 * slices + 2 * s;
3276                        t[idx2] = a[s][r][0];
3277                        t[idx2 + 1] = a[s][r][1];
3278                        t[idx3] = a[s][r][2];
3279                        t[idx3 + 1] = a[s][r][3];
3280                    }
3281                    fftSlices.complexInverse(t, 0, scale);
3282                    fftSlices.complexInverse(t, 2 * slices, scale);
3283                    for (int s = 0; s < slices; s++) {
3284                        idx2 = 2 * s;
3285                        idx3 = 2 * slices + 2 * s;
3286                        a[s][r][0] = t[idx2];
3287                        a[s][r][1] = t[idx2 + 1];
3288                        a[s][r][2] = t[idx3];
3289                        a[s][r][3] = t[idx3 + 1];
3290                    }
3291                }
3292            } else if (columns == 2) {
3293                for (int r = 0; r < rows; r++) {
3294                    for (int s = 0; s < slices; s++) {
3295                        idx2 = 2 * s;
3296                        t[idx2] = a[s][r][0];
3297                        t[idx2 + 1] = a[s][r][1];
3298                    }
3299                    fftSlices.complexInverse(t, 0, scale);
3300                    for (int s = 0; s < slices; s++) {
3301                        idx2 = 2 * s;
3302                        a[s][r][0] = t[idx2];
3303                        a[s][r][1] = t[idx2 + 1];
3304                    }
3305                }
3306            }
3307        }
3308    }
3309
3310    private void xdft3da_subth1(final int icr, final int isgn, final double[] a, final boolean scale) {
3311        int nt, i;
3312        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3313        nt = 8 * rows;
3314        if (columns == 4) {
3315            nt >>= 1;
3316        } else if (columns < 4) {
3317            nt >>= 2;
3318        }
3319        Future<?>[] futures = new Future[nthreads];
3320        for (i = 0; i < nthreads; i++) {
3321            final int n0 = i;
3322            final int startt = nt * i;
3323            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3324                @Override
3325                                public void run() {
3326                    int idx0, idx1, idx2, idx3, idx4, idx5;
3327
3328                    if (isgn == -1) {
3329                        for (int s = n0; s < slices; s += nthreads) {
3330                            idx0 = s * sliceStride;
3331                            if (icr == 0) {
3332                                for (int r = 0; r < rows; r++) {
3333                                    fftColumns.complexForward(a, idx0 + r * rowStride);
3334                                }
3335                            } else {
3336                                for (int r = 0; r < rows; r++) {
3337                                    fftColumns.realForward(a, idx0 + r * rowStride);
3338                                }
3339                            }
3340                            if (columns > 4) {
3341                                for (int c = 0; c < columns; c += 8) {
3342                                    for (int r = 0; r < rows; r++) {
3343                                        idx1 = idx0 + r * rowStride + c;
3344                                        idx2 = startt + 2 * r;
3345                                        idx3 = startt + 2 * rows + 2 * r;
3346                                        idx4 = idx3 + 2 * rows;
3347                                        idx5 = idx4 + 2 * rows;
3348                                        t[idx2] = a[idx1];
3349                                        t[idx2 + 1] = a[idx1 + 1];
3350                                        t[idx3] = a[idx1 + 2];
3351                                        t[idx3 + 1] = a[idx1 + 3];
3352                                        t[idx4] = a[idx1 + 4];
3353                                        t[idx4 + 1] = a[idx1 + 5];
3354                                        t[idx5] = a[idx1 + 6];
3355                                        t[idx5 + 1] = a[idx1 + 7];
3356                                    }
3357                                    fftRows.complexForward(t, startt);
3358                                    fftRows.complexForward(t, startt + 2 * rows);
3359                                    fftRows.complexForward(t, startt + 4 * rows);
3360                                    fftRows.complexForward(t, startt + 6 * rows);
3361                                    for (int r = 0; r < rows; r++) {
3362                                        idx1 = idx0 + r * rowStride + c;
3363                                        idx2 = startt + 2 * r;
3364                                        idx3 = startt + 2 * rows + 2 * r;
3365                                        idx4 = idx3 + 2 * rows;
3366                                        idx5 = idx4 + 2 * rows;
3367                                        a[idx1] = t[idx2];
3368                                        a[idx1 + 1] = t[idx2 + 1];
3369                                        a[idx1 + 2] = t[idx3];
3370                                        a[idx1 + 3] = t[idx3 + 1];
3371                                        a[idx1 + 4] = t[idx4];
3372                                        a[idx1 + 5] = t[idx4 + 1];
3373                                        a[idx1 + 6] = t[idx5];
3374                                        a[idx1 + 7] = t[idx5 + 1];
3375                                    }
3376                                }
3377                            } else if (columns == 4) {
3378                                for (int r = 0; r < rows; r++) {
3379                                    idx1 = idx0 + r * rowStride;
3380                                    idx2 = startt + 2 * r;
3381                                    idx3 = startt + 2 * rows + 2 * r;
3382                                    t[idx2] = a[idx1];
3383                                    t[idx2 + 1] = a[idx1 + 1];
3384                                    t[idx3] = a[idx1 + 2];
3385                                    t[idx3 + 1] = a[idx1 + 3];
3386                                }
3387                                fftRows.complexForward(t, startt);
3388                                fftRows.complexForward(t, startt + 2 * rows);
3389                                for (int r = 0; r < rows; r++) {
3390                                    idx1 = idx0 + r * rowStride;
3391                                    idx2 = startt + 2 * r;
3392                                    idx3 = startt + 2 * rows + 2 * r;
3393                                    a[idx1] = t[idx2];
3394                                    a[idx1 + 1] = t[idx2 + 1];
3395                                    a[idx1 + 2] = t[idx3];
3396                                    a[idx1 + 3] = t[idx3 + 1];
3397                                }
3398                            } else if (columns == 2) {
3399                                for (int r = 0; r < rows; r++) {
3400                                    idx1 = idx0 + r * rowStride;
3401                                    idx2 = startt + 2 * r;
3402                                    t[idx2] = a[idx1];
3403                                    t[idx2 + 1] = a[idx1 + 1];
3404                                }
3405                                fftRows.complexForward(t, startt);
3406                                for (int r = 0; r < rows; r++) {
3407                                    idx1 = idx0 + r * rowStride;
3408                                    idx2 = startt + 2 * r;
3409                                    a[idx1] = t[idx2];
3410                                    a[idx1 + 1] = t[idx2 + 1];
3411                                }
3412                            }
3413
3414                        }
3415                    } else {
3416                        for (int s = n0; s < slices; s += nthreads) {
3417                            idx0 = s * sliceStride;
3418                            if (icr == 0) {
3419                                for (int r = 0; r < rows; r++) {
3420                                    fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
3421                                }
3422                            }
3423                            if (columns > 4) {
3424                                for (int c = 0; c < columns; c += 8) {
3425                                    for (int r = 0; r < rows; r++) {
3426                                        idx1 = idx0 + r * rowStride + c;
3427                                        idx2 = startt + 2 * r;
3428                                        idx3 = startt + 2 * rows + 2 * r;
3429                                        idx4 = idx3 + 2 * rows;
3430                                        idx5 = idx4 + 2 * rows;
3431                                        t[idx2] = a[idx1];
3432                                        t[idx2 + 1] = a[idx1 + 1];
3433                                        t[idx3] = a[idx1 + 2];
3434                                        t[idx3 + 1] = a[idx1 + 3];
3435                                        t[idx4] = a[idx1 + 4];
3436                                        t[idx4 + 1] = a[idx1 + 5];
3437                                        t[idx5] = a[idx1 + 6];
3438                                        t[idx5 + 1] = a[idx1 + 7];
3439                                    }
3440                                    fftRows.complexInverse(t, startt, scale);
3441                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3442                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3443                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3444                                    for (int r = 0; r < rows; r++) {
3445                                        idx1 = idx0 + r * rowStride + c;
3446                                        idx2 = startt + 2 * r;
3447                                        idx3 = startt + 2 * rows + 2 * r;
3448                                        idx4 = idx3 + 2 * rows;
3449                                        idx5 = idx4 + 2 * rows;
3450                                        a[idx1] = t[idx2];
3451                                        a[idx1 + 1] = t[idx2 + 1];
3452                                        a[idx1 + 2] = t[idx3];
3453                                        a[idx1 + 3] = t[idx3 + 1];
3454                                        a[idx1 + 4] = t[idx4];
3455                                        a[idx1 + 5] = t[idx4 + 1];
3456                                        a[idx1 + 6] = t[idx5];
3457                                        a[idx1 + 7] = t[idx5 + 1];
3458                                    }
3459                                }
3460                            } else if (columns == 4) {
3461                                for (int r = 0; r < rows; r++) {
3462                                    idx1 = idx0 + r * rowStride;
3463                                    idx2 = startt + 2 * r;
3464                                    idx3 = startt + 2 * rows + 2 * r;
3465                                    t[idx2] = a[idx1];
3466                                    t[idx2 + 1] = a[idx1 + 1];
3467                                    t[idx3] = a[idx1 + 2];
3468                                    t[idx3 + 1] = a[idx1 + 3];
3469                                }
3470                                fftRows.complexInverse(t, startt, scale);
3471                                fftRows.complexInverse(t, startt + 2 * rows, scale);
3472                                for (int r = 0; r < rows; r++) {
3473                                    idx1 = idx0 + r * rowStride;
3474                                    idx2 = startt + 2 * r;
3475                                    idx3 = startt + 2 * rows + 2 * r;
3476                                    a[idx1] = t[idx2];
3477                                    a[idx1 + 1] = t[idx2 + 1];
3478                                    a[idx1 + 2] = t[idx3];
3479                                    a[idx1 + 3] = t[idx3 + 1];
3480                                }
3481                            } else if (columns == 2) {
3482                                for (int r = 0; r < rows; r++) {
3483                                    idx1 = idx0 + r * rowStride;
3484                                    idx2 = startt + 2 * r;
3485                                    t[idx2] = a[idx1];
3486                                    t[idx2 + 1] = a[idx1 + 1];
3487                                }
3488                                fftRows.complexInverse(t, startt, scale);
3489                                for (int r = 0; r < rows; r++) {
3490                                    idx1 = idx0 + r * rowStride;
3491                                    idx2 = startt + 2 * r;
3492                                    a[idx1] = t[idx2];
3493                                    a[idx1 + 1] = t[idx2 + 1];
3494                                }
3495                            }
3496                            if (icr != 0) {
3497                                for (int r = 0; r < rows; r++) {
3498                                    fftColumns.realInverse(a, idx0 + r
3499                                            * rowStride, scale);
3500                                }
3501                            }
3502                        }
3503                    }
3504                }
3505            });
3506        }
3507        ConcurrencyUtils.waitForCompletion(futures);
3508    }
3509
3510    private void xdft3da_subth2(final int icr, final int isgn, final double[] a, final boolean scale) {
3511        int nt, i;
3512
3513        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3514        nt = 8 * rows;
3515        if (columns == 4) {
3516            nt >>= 1;
3517        } else if (columns < 4) {
3518            nt >>= 2;
3519        }
3520        Future<?>[] futures = new Future[nthreads];
3521        for (i = 0; i < nthreads; i++) {
3522            final int n0 = i;
3523            final int startt = nt * i;
3524            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3525                @Override
3526                                public void run() {
3527                    int idx0, idx1, idx2, idx3, idx4, idx5;
3528
3529                    if (isgn == -1) {
3530                        for (int s = n0; s < slices; s += nthreads) {
3531                            idx0 = s * sliceStride;
3532                            if (icr == 0) {
3533                                for (int r = 0; r < rows; r++) {
3534                                    fftColumns.complexForward(a, idx0 + r * rowStride);
3535                                }
3536                            } else {
3537                                for (int r = 0; r < rows; r++) {
3538                                    fftColumns.realForward(a, idx0 + r * rowStride);
3539                                }
3540                            }
3541                            if (columns > 4) {
3542                                for (int c = 0; c < columns; c += 8) {
3543                                    for (int r = 0; r < rows; r++) {
3544                                        idx1 = idx0 + r * rowStride + c;
3545                                        idx2 = startt + 2 * r;
3546                                        idx3 = startt + 2 * rows + 2 * r;
3547                                        idx4 = idx3 + 2 * rows;
3548                                        idx5 = idx4 + 2 * rows;
3549                                        t[idx2] = a[idx1];
3550                                        t[idx2 + 1] = a[idx1 + 1];
3551                                        t[idx3] = a[idx1 + 2];
3552                                        t[idx3 + 1] = a[idx1 + 3];
3553                                        t[idx4] = a[idx1 + 4];
3554                                        t[idx4 + 1] = a[idx1 + 5];
3555                                        t[idx5] = a[idx1 + 6];
3556                                        t[idx5 + 1] = a[idx1 + 7];
3557                                    }
3558                                    fftRows.complexForward(t, startt);
3559                                    fftRows.complexForward(t, startt + 2 * rows);
3560                                    fftRows.complexForward(t, startt + 4 * rows);
3561                                    fftRows.complexForward(t, startt + 6 * rows);
3562                                    for (int r = 0; r < rows; r++) {
3563                                        idx1 = idx0 + r * rowStride + c;
3564                                        idx2 = startt + 2 * r;
3565                                        idx3 = startt + 2 * rows + 2 * r;
3566                                        idx4 = idx3 + 2 * rows;
3567                                        idx5 = idx4 + 2 * rows;
3568                                        a[idx1] = t[idx2];
3569                                        a[idx1 + 1] = t[idx2 + 1];
3570                                        a[idx1 + 2] = t[idx3];
3571                                        a[idx1 + 3] = t[idx3 + 1];
3572                                        a[idx1 + 4] = t[idx4];
3573                                        a[idx1 + 5] = t[idx4 + 1];
3574                                        a[idx1 + 6] = t[idx5];
3575                                        a[idx1 + 7] = t[idx5 + 1];
3576                                    }
3577                                }
3578                            } else if (columns == 4) {
3579                                for (int r = 0; r < rows; r++) {
3580                                    idx1 = idx0 + r * rowStride;
3581                                    idx2 = startt + 2 * r;
3582                                    idx3 = startt + 2 * rows + 2 * r;
3583                                    t[idx2] = a[idx1];
3584                                    t[idx2 + 1] = a[idx1 + 1];
3585                                    t[idx3] = a[idx1 + 2];
3586                                    t[idx3 + 1] = a[idx1 + 3];
3587                                }
3588                                fftRows.complexForward(t, startt);
3589                                fftRows.complexForward(t, startt + 2 * rows);
3590                                for (int r = 0; r < rows; r++) {
3591                                    idx1 = idx0 + r * rowStride;
3592                                    idx2 = startt + 2 * r;
3593                                    idx3 = startt + 2 * rows + 2 * r;
3594                                    a[idx1] = t[idx2];
3595                                    a[idx1 + 1] = t[idx2 + 1];
3596                                    a[idx1 + 2] = t[idx3];
3597                                    a[idx1 + 3] = t[idx3 + 1];
3598                                }
3599                            } else if (columns == 2) {
3600                                for (int r = 0; r < rows; r++) {
3601                                    idx1 = idx0 + r * rowStride;
3602                                    idx2 = startt + 2 * r;
3603                                    t[idx2] = a[idx1];
3604                                    t[idx2 + 1] = a[idx1 + 1];
3605                                }
3606                                fftRows.complexForward(t, startt);
3607                                for (int r = 0; r < rows; r++) {
3608                                    idx1 = idx0 + r * rowStride;
3609                                    idx2 = startt + 2 * r;
3610                                    a[idx1] = t[idx2];
3611                                    a[idx1 + 1] = t[idx2 + 1];
3612                                }
3613                            }
3614
3615                        }
3616                    } else {
3617                        for (int s = n0; s < slices; s += nthreads) {
3618                            idx0 = s * sliceStride;
3619                            if (icr == 0) {
3620                                for (int r = 0; r < rows; r++) {
3621                                    fftColumns.complexInverse(a, idx0 + r * rowStride, scale);
3622                                }
3623                            } else {
3624                                for (int r = 0; r < rows; r++) {
3625                                    fftColumns.realInverse2(a, idx0 + r * rowStride, scale);
3626                                }
3627                            }
3628                            if (columns > 4) {
3629                                for (int c = 0; c < columns; c += 8) {
3630                                    for (int r = 0; r < rows; r++) {
3631                                        idx1 = idx0 + r * rowStride + c;
3632                                        idx2 = startt + 2 * r;
3633                                        idx3 = startt + 2 * rows + 2 * r;
3634                                        idx4 = idx3 + 2 * rows;
3635                                        idx5 = idx4 + 2 * rows;
3636                                        t[idx2] = a[idx1];
3637                                        t[idx2 + 1] = a[idx1 + 1];
3638                                        t[idx3] = a[idx1 + 2];
3639                                        t[idx3 + 1] = a[idx1 + 3];
3640                                        t[idx4] = a[idx1 + 4];
3641                                        t[idx4 + 1] = a[idx1 + 5];
3642                                        t[idx5] = a[idx1 + 6];
3643                                        t[idx5 + 1] = a[idx1 + 7];
3644                                    }
3645                                    fftRows.complexInverse(t, startt, scale);
3646                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3647                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3648                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3649                                    for (int r = 0; r < rows; r++) {
3650                                        idx1 = idx0 + r * rowStride + c;
3651                                        idx2 = startt + 2 * r;
3652                                        idx3 = startt + 2 * rows + 2 * r;
3653                                        idx4 = idx3 + 2 * rows;
3654                                        idx5 = idx4 + 2 * rows;
3655                                        a[idx1] = t[idx2];
3656                                        a[idx1 + 1] = t[idx2 + 1];
3657                                        a[idx1 + 2] = t[idx3];
3658                                        a[idx1 + 3] = t[idx3 + 1];
3659                                        a[idx1 + 4] = t[idx4];
3660                                        a[idx1 + 5] = t[idx4 + 1];
3661                                        a[idx1 + 6] = t[idx5];
3662                                        a[idx1 + 7] = t[idx5 + 1];
3663                                    }
3664                                }
3665                            } else if (columns == 4) {
3666                                for (int r = 0; r < rows; r++) {
3667                                    idx1 = idx0 + r * rowStride;
3668                                    idx2 = startt + 2 * r;
3669                                    idx3 = startt + 2 * rows + 2 * r;
3670                                    t[idx2] = a[idx1];
3671                                    t[idx2 + 1] = a[idx1 + 1];
3672                                    t[idx3] = a[idx1 + 2];
3673                                    t[idx3 + 1] = a[idx1 + 3];
3674                                }
3675                                fftRows.complexInverse(t, startt, scale);
3676                                fftRows.complexInverse(t, startt + 2 * rows, scale);
3677                                for (int r = 0; r < rows; r++) {
3678                                    idx1 = idx0 + r * rowStride;
3679                                    idx2 = startt + 2 * r;
3680                                    idx3 = startt + 2 * rows + 2 * r;
3681                                    a[idx1] = t[idx2];
3682                                    a[idx1 + 1] = t[idx2 + 1];
3683                                    a[idx1 + 2] = t[idx3];
3684                                    a[idx1 + 3] = t[idx3 + 1];
3685                                }
3686                            } else if (columns == 2) {
3687                                for (int r = 0; r < rows; r++) {
3688                                    idx1 = idx0 + r * rowStride;
3689                                    idx2 = startt + 2 * r;
3690                                    t[idx2] = a[idx1];
3691                                    t[idx2 + 1] = a[idx1 + 1];
3692                                }
3693                                fftRows.complexInverse(t, startt, scale);
3694                                for (int r = 0; r < rows; r++) {
3695                                    idx1 = idx0 + r * rowStride;
3696                                    idx2 = startt + 2 * r;
3697                                    a[idx1] = t[idx2];
3698                                    a[idx1 + 1] = t[idx2 + 1];
3699                                }
3700                            }
3701                        }
3702                    }
3703                }
3704            });
3705        }
3706        ConcurrencyUtils.waitForCompletion(futures);
3707    }
3708
3709    private void xdft3da_subth1(final int icr, final int isgn, final double[][][] a, final boolean scale) {
3710        int nt, i;
3711
3712        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3713        nt = 8 * rows;
3714        if (columns == 4) {
3715            nt >>= 1;
3716        } else if (columns < 4) {
3717            nt >>= 2;
3718        }
3719        Future<?>[] futures = new Future[nthreads];
3720        for (i = 0; i < nthreads; i++) {
3721            final int n0 = i;
3722            final int startt = nt * i;
3723            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3724                @Override
3725                                public void run() {
3726                    int idx2, idx3, idx4, idx5;
3727
3728                    if (isgn == -1) {
3729                        for (int s = n0; s < slices; s += nthreads) {
3730                            if (icr == 0) {
3731                                for (int r = 0; r < rows; r++) {
3732                                    fftColumns.complexForward(a[s][r]);
3733                                }
3734                            } else {
3735                                for (int r = 0; r < rows; r++) {
3736                                    fftColumns.realForward(a[s][r], 0);
3737                                }
3738                            }
3739                            if (columns > 4) {
3740                                for (int c = 0; c < columns; c += 8) {
3741                                    for (int r = 0; r < rows; r++) {
3742                                        idx2 = startt + 2 * r;
3743                                        idx3 = startt + 2 * rows + 2 * r;
3744                                        idx4 = idx3 + 2 * rows;
3745                                        idx5 = idx4 + 2 * rows;
3746                                        t[idx2] = a[s][r][c];
3747                                        t[idx2 + 1] = a[s][r][c + 1];
3748                                        t[idx3] = a[s][r][c + 2];
3749                                        t[idx3 + 1] = a[s][r][c + 3];
3750                                        t[idx4] = a[s][r][c + 4];
3751                                        t[idx4 + 1] = a[s][r][c + 5];
3752                                        t[idx5] = a[s][r][c + 6];
3753                                        t[idx5 + 1] = a[s][r][c + 7];
3754                                    }
3755                                    fftRows.complexForward(t, startt);
3756                                    fftRows.complexForward(t, startt + 2 * rows);
3757                                    fftRows.complexForward(t, startt + 4 * rows);
3758                                    fftRows.complexForward(t, startt + 6 * rows);
3759                                    for (int r = 0; r < rows; r++) {
3760                                        idx2 = startt + 2 * r;
3761                                        idx3 = startt + 2 * rows + 2 * r;
3762                                        idx4 = idx3 + 2 * rows;
3763                                        idx5 = idx4 + 2 * rows;
3764                                        a[s][r][c] = t[idx2];
3765                                        a[s][r][c + 1] = t[idx2 + 1];
3766                                        a[s][r][c + 2] = t[idx3];
3767                                        a[s][r][c + 3] = t[idx3 + 1];
3768                                        a[s][r][c + 4] = t[idx4];
3769                                        a[s][r][c + 5] = t[idx4 + 1];
3770                                        a[s][r][c + 6] = t[idx5];
3771                                        a[s][r][c + 7] = t[idx5 + 1];
3772                                    }
3773                                }
3774                            } else if (columns == 4) {
3775                                for (int r = 0; r < rows; r++) {
3776                                    idx2 = startt + 2 * r;
3777                                    idx3 = startt + 2 * rows + 2 * r;
3778                                    t[idx2] = a[s][r][0];
3779                                    t[idx2 + 1] = a[s][r][1];
3780                                    t[idx3] = a[s][r][2];
3781                                    t[idx3 + 1] = a[s][r][3];
3782                                }
3783                                fftRows.complexForward(t, startt);
3784                                fftRows.complexForward(t, startt + 2 * rows);
3785                                for (int r = 0; r < rows; r++) {
3786                                    idx2 = startt + 2 * r;
3787                                    idx3 = startt + 2 * rows + 2 * r;
3788                                    a[s][r][0] = t[idx2];
3789                                    a[s][r][1] = t[idx2 + 1];
3790                                    a[s][r][2] = t[idx3];
3791                                    a[s][r][3] = t[idx3 + 1];
3792                                }
3793                            } else if (columns == 2) {
3794                                for (int r = 0; r < rows; r++) {
3795                                    idx2 = startt + 2 * r;
3796                                    t[idx2] = a[s][r][0];
3797                                    t[idx2 + 1] = a[s][r][1];
3798                                }
3799                                fftRows.complexForward(t, startt);
3800                                for (int r = 0; r < rows; r++) {
3801                                    idx2 = startt + 2 * r;
3802                                    a[s][r][0] = t[idx2];
3803                                    a[s][r][1] = t[idx2 + 1];
3804                                }
3805                            }
3806
3807                        }
3808                    } else {
3809                        for (int s = n0; s < slices; s += nthreads) {
3810                            if (icr == 0) {
3811                                for (int r = 0; r < rows; r++) {
3812                                    fftColumns.complexInverse(a[s][r], scale);
3813                                }
3814                            }
3815                            if (columns > 4) {
3816                                for (int c = 0; c < columns; c += 8) {
3817                                    for (int r = 0; r < rows; r++) {
3818                                        idx2 = startt + 2 * r;
3819                                        idx3 = startt + 2 * rows + 2 * r;
3820                                        idx4 = idx3 + 2 * rows;
3821                                        idx5 = idx4 + 2 * rows;
3822                                        t[idx2] = a[s][r][c];
3823                                        t[idx2 + 1] = a[s][r][c + 1];
3824                                        t[idx3] = a[s][r][c + 2];
3825                                        t[idx3 + 1] = a[s][r][c + 3];
3826                                        t[idx4] = a[s][r][c + 4];
3827                                        t[idx4 + 1] = a[s][r][c + 5];
3828                                        t[idx5] = a[s][r][c + 6];
3829                                        t[idx5 + 1] = a[s][r][c + 7];
3830                                    }
3831                                    fftRows.complexInverse(t, startt, scale);
3832                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
3833                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
3834                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
3835                                    for (int r = 0; r < rows; r++) {
3836                                        idx2 = startt + 2 * r;
3837                                        idx3 = startt + 2 * rows + 2 * r;
3838                                        idx4 = idx3 + 2 * rows;
3839                                        idx5 = idx4 + 2 * rows;
3840                                        a[s][r][c] = t[idx2];
3841                                        a[s][r][c + 1] = t[idx2 + 1];
3842                                        a[s][r][c + 2] = t[idx3];
3843                                        a[s][r][c + 3] = t[idx3 + 1];
3844                                        a[s][r][c + 4] = t[idx4];
3845                                        a[s][r][c + 5] = t[idx4 + 1];
3846                                        a[s][r][c + 6] = t[idx5];
3847                                        a[s][r][c + 7] = t[idx5 + 1];
3848                                    }
3849                                }
3850                            } else if (columns == 4) {
3851                                for (int r = 0; r < rows; r++) {
3852                                    idx2 = startt + 2 * r;
3853                                    idx3 = startt + 2 * rows + 2 * r;
3854                                    t[idx2] = a[s][r][0];
3855                                    t[idx2 + 1] = a[s][r][1];
3856                                    t[idx3] = a[s][r][2];
3857                                    t[idx3 + 1] = a[s][r][3];
3858                                }
3859                                fftRows.complexInverse(t, startt, scale);
3860                                fftRows.complexInverse(t, startt + 2 * rows, scale);
3861                                for (int r = 0; r < rows; r++) {
3862                                    idx2 = startt + 2 * r;
3863                                    idx3 = startt + 2 * rows + 2 * r;
3864                                    a[s][r][0] = t[idx2];
3865                                    a[s][r][1] = t[idx2 + 1];
3866                                    a[s][r][2] = t[idx3];
3867                                    a[s][r][3] = t[idx3 + 1];
3868                                }
3869                            } else if (columns == 2) {
3870                                for (int r = 0; r < rows; r++) {
3871                                    idx2 = startt + 2 * r;
3872                                    t[idx2] = a[s][r][0];
3873                                    t[idx2 + 1] = a[s][r][1];
3874                                }
3875                                fftRows.complexInverse(t, startt, scale);
3876                                for (int r = 0; r < rows; r++) {
3877                                    idx2 = startt + 2 * r;
3878                                    a[s][r][0] = t[idx2];
3879                                    a[s][r][1] = t[idx2 + 1];
3880                                }
3881                            }
3882                            if (icr != 0) {
3883                                for (int r = 0; r < rows; r++) {
3884                                    fftColumns.realInverse(a[s][r], scale);
3885                                }
3886                            }
3887                        }
3888                    }
3889                }
3890            });
3891        }
3892        ConcurrencyUtils.waitForCompletion(futures);
3893    }
3894
3895    private void xdft3da_subth2(final int icr, final int isgn, final double[][][] a, final boolean scale) {
3896        int nt, i;
3897
3898        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), slices);
3899        nt = 8 * rows;
3900        if (columns == 4) {
3901            nt >>= 1;
3902        } else if (columns < 4) {
3903            nt >>= 2;
3904        }
3905        Future<?>[] futures = new Future[nthreads];
3906        for (i = 0; i < nthreads; i++) {
3907            final int n0 = i;
3908            final int startt = nt * i;
3909            futures[i] = ConcurrencyUtils.submit(new Runnable() {
3910                @Override
3911                                public void run() {
3912                    int idx2, idx3, idx4, idx5;
3913
3914                    if (isgn == -1) {
3915                        for (int s = n0; s < slices; s += nthreads) {
3916                            if (icr == 0) {
3917                                for (int r = 0; r < rows; r++) {
3918                                    fftColumns.complexForward(a[s][r]);
3919                                }
3920                            } else {
3921                                for (int r = 0; r < rows; r++) {
3922                                    fftColumns.realForward(a[s][r]);
3923                                }
3924                            }
3925                            if (columns > 4) {
3926                                for (int c = 0; c < columns; c += 8) {
3927                                    for (int r = 0; r < rows; r++) {
3928                                        idx2 = startt + 2 * r;
3929                                        idx3 = startt + 2 * rows + 2 * r;
3930                                        idx4 = idx3 + 2 * rows;
3931                                        idx5 = idx4 + 2 * rows;
3932                                        t[idx2] = a[s][r][c];
3933                                        t[idx2 + 1] = a[s][r][c + 1];
3934                                        t[idx3] = a[s][r][c + 2];
3935                                        t[idx3 + 1] = a[s][r][c + 3];
3936                                        t[idx4] = a[s][r][c + 4];
3937                                        t[idx4 + 1] = a[s][r][c + 5];
3938                                        t[idx5] = a[s][r][c + 6];
3939                                        t[idx5 + 1] = a[s][r][c + 7];
3940                                    }
3941                                    fftRows.complexForward(t, startt);
3942                                    fftRows.complexForward(t, startt + 2 * rows);
3943                                    fftRows.complexForward(t, startt + 4 * rows);
3944                                    fftRows.complexForward(t, startt + 6 * rows);
3945                                    for (int r = 0; r < rows; r++) {
3946                                        idx2 = startt + 2 * r;
3947                                        idx3 = startt + 2 * rows + 2 * r;
3948                                        idx4 = idx3 + 2 * rows;
3949                                        idx5 = idx4 + 2 * rows;
3950                                        a[s][r][c] = t[idx2];
3951                                        a[s][r][c + 1] = t[idx2 + 1];
3952                                        a[s][r][c + 2] = t[idx3];
3953                                        a[s][r][c + 3] = t[idx3 + 1];
3954                                        a[s][r][c + 4] = t[idx4];
3955                                        a[s][r][c + 5] = t[idx4 + 1];
3956                                        a[s][r][c + 6] = t[idx5];
3957                                        a[s][r][c + 7] = t[idx5 + 1];
3958                                    }
3959                                }
3960                            } else if (columns == 4) {
3961                                for (int r = 0; r < rows; r++) {
3962                                    idx2 = startt + 2 * r;
3963                                    idx3 = startt + 2 * rows + 2 * r;
3964                                    t[idx2] = a[s][r][0];
3965                                    t[idx2 + 1] = a[s][r][1];
3966                                    t[idx3] = a[s][r][2];
3967                                    t[idx3 + 1] = a[s][r][3];
3968                                }
3969                                fftRows.complexForward(t, startt);
3970                                fftRows.complexForward(t, startt + 2 * rows);
3971                                for (int r = 0; r < rows; r++) {
3972                                    idx2 = startt + 2 * r;
3973                                    idx3 = startt + 2 * rows + 2 * r;
3974                                    a[s][r][0] = t[idx2];
3975                                    a[s][r][1] = t[idx2 + 1];
3976                                    a[s][r][2] = t[idx3];
3977                                    a[s][r][3] = t[idx3 + 1];
3978                                }
3979                            } else if (columns == 2) {
3980                                for (int r = 0; r < rows; r++) {
3981                                    idx2 = startt + 2 * r;
3982                                    t[idx2] = a[s][r][0];
3983                                    t[idx2 + 1] = a[s][r][1];
3984                                }
3985                                fftRows.complexForward(t, startt);
3986                                for (int r = 0; r < rows; r++) {
3987                                    idx2 = startt + 2 * r;
3988                                    a[s][r][0] = t[idx2];
3989                                    a[s][r][1] = t[idx2 + 1];
3990                                }
3991                            }
3992
3993                        }
3994                    } else {
3995                        for (int s = n0; s < slices; s += nthreads) {
3996                            if (icr == 0) {
3997                                for (int r = 0; r < rows; r++) {
3998                                    fftColumns.complexInverse(a[s][r], scale);
3999                                }
4000                            } else {
4001                                for (int r = 0; r < rows; r++) {
4002                                    fftColumns.realInverse2(a[s][r], 0, scale);
4003                                }
4004                            }
4005                            if (columns > 4) {
4006                                for (int c = 0; c < columns; c += 8) {
4007                                    for (int r = 0; r < rows; r++) {
4008                                        idx2 = startt + 2 * r;
4009                                        idx3 = startt + 2 * rows + 2 * r;
4010                                        idx4 = idx3 + 2 * rows;
4011                                        idx5 = idx4 + 2 * rows;
4012                                        t[idx2] = a[s][r][c];
4013                                        t[idx2 + 1] = a[s][r][c + 1];
4014                                        t[idx3] = a[s][r][c + 2];
4015                                        t[idx3 + 1] = a[s][r][c + 3];
4016                                        t[idx4] = a[s][r][c + 4];
4017                                        t[idx4 + 1] = a[s][r][c + 5];
4018                                        t[idx5] = a[s][r][c + 6];
4019                                        t[idx5 + 1] = a[s][r][c + 7];
4020                                    }
4021                                    fftRows.complexInverse(t, startt, scale);
4022                                    fftRows.complexInverse(t, startt + 2 * rows, scale);
4023                                    fftRows.complexInverse(t, startt + 4 * rows, scale);
4024                                    fftRows.complexInverse(t, startt + 6 * rows, scale);
4025                                    for (int r = 0; r < rows; r++) {
4026                                        idx2 = startt + 2 * r;
4027                                        idx3 = startt + 2 * rows + 2 * r;
4028                                        idx4 = idx3 + 2 * rows;
4029                                        idx5 = idx4 + 2 * rows;
4030                                        a[s][r][c] = t[idx2];
4031                                        a[s][r][c + 1] = t[idx2 + 1];
4032                                        a[s][r][c + 2] = t[idx3];
4033                                        a[s][r][c + 3] = t[idx3 + 1];
4034                                        a[s][r][c + 4] = t[idx4];
4035                                        a[s][r][c + 5] = t[idx4 + 1];
4036                                        a[s][r][c + 6] = t[idx5];
4037                                        a[s][r][c + 7] = t[idx5 + 1];
4038                                    }
4039                                }
4040                            } else if (columns == 4) {
4041                                for (int r = 0; r < rows; r++) {
4042                                    idx2 = startt + 2 * r;
4043                                    idx3 = startt + 2 * rows + 2 * r;
4044                                    t[idx2] = a[s][r][0];
4045                                    t[idx2 + 1] = a[s][r][1];
4046                                    t[idx3] = a[s][r][2];
4047                                    t[idx3 + 1] = a[s][r][3];
4048                                }
4049                                fftRows.complexInverse(t, startt, scale);
4050                                fftRows.complexInverse(t, startt + 2 * rows, scale);
4051                                for (int r = 0; r < rows; r++) {
4052                                    idx2 = startt + 2 * r;
4053                                    idx3 = startt + 2 * rows + 2 * r;
4054                                    a[s][r][0] = t[idx2];
4055                                    a[s][r][1] = t[idx2 + 1];
4056                                    a[s][r][2] = t[idx3];
4057                                    a[s][r][3] = t[idx3 + 1];
4058                                }
4059                            } else if (columns == 2) {
4060                                for (int r = 0; r < rows; r++) {
4061                                    idx2 = startt + 2 * r;
4062                                    t[idx2] = a[s][r][0];
4063                                    t[idx2 + 1] = a[s][r][1];
4064                                }
4065                                fftRows.complexInverse(t, startt, scale);
4066                                for (int r = 0; r < rows; r++) {
4067                                    idx2 = startt + 2 * r;
4068                                    a[s][r][0] = t[idx2];
4069                                    a[s][r][1] = t[idx2 + 1];
4070                                }
4071                            }
4072                        }
4073                    }
4074                }
4075            });
4076        }
4077        ConcurrencyUtils.waitForCompletion(futures);
4078    }
4079
4080    private void cdft3db_subth(final int isgn, final double[] a, final boolean scale) {
4081        int nt, i;
4082
4083        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), rows);
4084        nt = 8 * slices;
4085        if (columns == 4) {
4086            nt >>= 1;
4087        } else if (columns < 4) {
4088            nt >>= 2;
4089        }
4090        Future<?>[] futures = new Future[nthreads];
4091        for (i = 0; i < nthreads; i++) {
4092            final int n0 = i;
4093            final int startt = nt * i;
4094            futures[i] = ConcurrencyUtils.submit(new Runnable() {
4095
4096                @Override
4097                                public void run() {
4098                    int idx0, idx1, idx2, idx3, idx4, idx5;
4099
4100                    if (isgn == -1) {
4101                        if (columns > 4) {
4102                            for (int r = n0; r < rows; r += nthreads) {
4103                                idx0 = r * rowStride;
4104                                for (int c = 0; c < columns; c += 8) {
4105                                    for (int s = 0; s < slices; s++) {
4106                                        idx1 = s * sliceStride + idx0 + c;
4107                                        idx2 = startt + 2 * s;
4108                                        idx3 = startt + 2 * slices + 2 * s;
4109                                        idx4 = idx3 + 2 * slices;
4110                                        idx5 = idx4 + 2 * slices;
4111                                        t[idx2] = a[idx1];
4112                                        t[idx2 + 1] = a[idx1 + 1];
4113                                        t[idx3] = a[idx1 + 2];
4114                                        t[idx3 + 1] = a[idx1 + 3];
4115                                        t[idx4] = a[idx1 + 4];
4116                                        t[idx4 + 1] = a[idx1 + 5];
4117                                        t[idx5] = a[idx1 + 6];
4118                                        t[idx5 + 1] = a[idx1 + 7];
4119                                    }
4120                                    fftSlices.complexForward(t, startt);
4121                                    fftSlices.complexForward(t, startt + 2 * slices);
4122                                    fftSlices.complexForward(t, startt + 4 * slices);
4123                                    fftSlices.complexForward(t, startt + 6 * slices);
4124                                    for (int s = 0; s < slices; s++) {
4125                                        idx1 = s * sliceStride + idx0 + c;
4126                                        idx2 = startt + 2 * s;
4127                                        idx3 = startt + 2 * slices + 2 * s;
4128                                        idx4 = idx3 + 2 * slices;
4129                                        idx5 = idx4 + 2 * slices;
4130                                        a[idx1] = t[idx2];
4131                                        a[idx1 + 1] = t[idx2 + 1];
4132                                        a[idx1 + 2] = t[idx3];
4133                                        a[idx1 + 3] = t[idx3 + 1];
4134                                        a[idx1 + 4] = t[idx4];
4135                                        a[idx1 + 5] = t[idx4 + 1];
4136                                        a[idx1 + 6] = t[idx5];
4137                                        a[idx1 + 7] = t[idx5 + 1];
4138                                    }
4139                                }
4140                            }
4141                        } else if (columns == 4) {
4142                            for (int r = n0; r < rows; r += nthreads) {
4143                                idx0 = r * rowStride;
4144                                for (int s = 0; s < slices; s++) {
4145                                    idx1 = s * sliceStride + idx0;
4146                                    idx2 = startt + 2 * s;
4147                                    idx3 = startt + 2 * slices + 2 * s;
4148                                    t[idx2] = a[idx1];
4149                                    t[idx2 + 1] = a[idx1 + 1];
4150                                    t[idx3] = a[idx1 + 2];
4151                                    t[idx3 + 1] = a[idx1 + 3];
4152                                }
4153                                fftSlices.complexForward(t, startt);
4154                                fftSlices.complexForward(t, startt + 2 * slices);
4155                                for (int s = 0; s < slices; s++) {
4156                                    idx1 = s * sliceStride + idx0;
4157                                    idx2 = startt + 2 * s;
4158                                    idx3 = startt + 2 * slices + 2 * s;
4159                                    a[idx1] = t[idx2];
4160                                    a[idx1 + 1] = t[idx2 + 1];
4161                                    a[idx1 + 2] = t[idx3];
4162                                    a[idx1 + 3] = t[idx3 + 1];
4163                                }
4164                            }
4165                        } else if (columns == 2) {
4166                            for (int r = n0; r < rows; r += nthreads) {
4167                                idx0 = r * rowStride;
4168                                for (int s = 0; s < slices; s++) {
4169                                    idx1 = s * sliceStride + idx0;
4170                                    idx2 = startt + 2 * s;
4171                                    t[idx2] = a[idx1];
4172                                    t[idx2 + 1] = a[idx1 + 1];
4173                                }
4174                                fftSlices.complexForward(t, startt);
4175                                for (int s = 0; s < slices; s++) {
4176                                    idx1 = s * sliceStride + idx0;
4177                                    idx2 = startt + 2 * s;
4178                                    a[idx1] = t[idx2];
4179                                    a[idx1 + 1] = t[idx2 + 1];
4180                                }
4181                            }
4182                        }
4183                    } else {
4184                        if (columns > 4) {
4185                            for (int r = n0; r < rows; r += nthreads) {
4186                                idx0 = r * rowStride;
4187                                for (int c = 0; c < columns; c += 8) {
4188                                    for (int s = 0; s < slices; s++) {
4189                                        idx1 = s * sliceStride + idx0 + c;
4190                                        idx2 = startt + 2 * s;
4191                                        idx3 = startt + 2 * slices + 2 * s;
4192                                        idx4 = idx3 + 2 * slices;
4193                                        idx5 = idx4 + 2 * slices;
4194                                        t[idx2] = a[idx1];
4195                                        t[idx2 + 1] = a[idx1 + 1];
4196                                        t[idx3] = a[idx1 + 2];
4197                                        t[idx3 + 1] = a[idx1 + 3];
4198                                        t[idx4] = a[idx1 + 4];
4199                                        t[idx4 + 1] = a[idx1 + 5];
4200                                        t[idx5] = a[idx1 + 6];
4201                                        t[idx5 + 1] = a[idx1 + 7];
4202                                    }
4203                                    fftSlices.complexInverse(t, startt, scale);
4204                                    fftSlices.complexInverse(t, startt + 2 * slices, scale);
4205                                    fftSlices.complexInverse(t, startt + 4 * slices, scale);
4206                                    fftSlices.complexInverse(t, startt + 6 * slices, scale);
4207                                    for (int s = 0; s < slices; s++) {
4208                                        idx1 = s * sliceStride + idx0 + c;
4209                                        idx2 = startt + 2 * s;
4210                                        idx3 = startt + 2 * slices + 2 * s;
4211                                        idx4 = idx3 + 2 * slices;
4212                                        idx5 = idx4 + 2 * slices;
4213                                        a[idx1] = t[idx2];
4214                                        a[idx1 + 1] = t[idx2 + 1];
4215                                        a[idx1 + 2] = t[idx3];
4216                                        a[idx1 + 3] = t[idx3 + 1];
4217                                        a[idx1 + 4] = t[idx4];
4218                                        a[idx1 + 5] = t[idx4 + 1];
4219                                        a[idx1 + 6] = t[idx5];
4220                                        a[idx1 + 7] = t[idx5 + 1];
4221                                    }
4222                                }
4223                            }
4224                        } else if (columns == 4) {
4225                            for (int r = n0; r < rows; r += nthreads) {
4226                                idx0 = r * rowStride;
4227                                for (int s = 0; s < slices; s++) {
4228                                    idx1 = s * sliceStride + idx0;
4229                                    idx2 = startt + 2 * s;
4230                                    idx3 = startt + 2 * slices + 2 * s;
4231                                    t[idx2] = a[idx1];
4232                                    t[idx2 + 1] = a[idx1 + 1];
4233                                    t[idx3] = a[idx1 + 2];
4234                                    t[idx3 + 1] = a[idx1 + 3];
4235                                }
4236                                fftSlices.complexInverse(t, startt, scale);
4237                                fftSlices.complexInverse(t, startt + 2 * slices, scale);
4238                                for (int s = 0; s < slices; s++) {
4239                                    idx1 = s * sliceStride + idx0;
4240                                    idx2 = startt + 2 * s;
4241                                    idx3 = startt + 2 * slices + 2 * s;
4242                                    a[idx1] = t[idx2];
4243                                    a[idx1 + 1] = t[idx2 + 1];
4244                                    a[idx1 + 2] = t[idx3];
4245                                    a[idx1 + 3] = t[idx3 + 1];
4246                                }
4247                            }
4248                        } else if (columns == 2) {
4249                            for (int r = n0; r < rows; r += nthreads) {
4250                                idx0 = r * rowStride;
4251                                for (int s = 0; s < slices; s++) {
4252                                    idx1 = s * sliceStride + idx0;
4253                                    idx2 = startt + 2 * s;
4254                                    t[idx2] = a[idx1];
4255                                    t[idx2 + 1] = a[idx1 + 1];
4256                                }
4257                                fftSlices.complexInverse(t, startt, scale);
4258                                for (int s = 0; s < slices; s++) {
4259                                    idx1 = s * sliceStride + idx0;
4260                                    idx2 = startt + 2 * s;
4261                                    a[idx1] = t[idx2];
4262                                    a[idx1 + 1] = t[idx2 + 1];
4263                                }
4264                            }
4265                        }
4266                    }
4267
4268                }
4269            });
4270        }
4271        ConcurrencyUtils.waitForCompletion(futures);
4272    }
4273
4274    private void cdft3db_subth(final int isgn, final double[][][] a, final boolean scale) {
4275        int nt, i;
4276
4277        final int nthreads = Math.min(ConcurrencyUtils.getNumberOfThreads(), rows);
4278        nt = 8 * slices;
4279        if (columns == 4) {
4280            nt >>= 1;
4281        } else if (columns < 4) {
4282            nt >>= 2;
4283        }
4284        Future<?>[] futures = new Future[nthreads];
4285        for (i = 0; i < nthreads; i++) {
4286            final int n0 = i;
4287            final int startt = nt * i;
4288            futures[i] = ConcurrencyUtils.submit(new Runnable() {
4289
4290                @Override
4291                                public void run() {
4292                    int idx2, idx3, idx4, idx5;
4293
4294                    if (isgn == -1) {
4295                        if (columns > 4) {
4296                            for (int r = n0; r < rows; r += nthreads) {
4297                                for (int c = 0; c < columns; c += 8) {
4298                                    for (int s = 0; s < slices; s++) {
4299                                        idx2 = startt + 2 * s;
4300                                        idx3 = startt + 2 * slices + 2 * s;
4301                                        idx4 = idx3 + 2 * slices;
4302                                        idx5 = idx4 + 2 * slices;
4303                                        t[idx2] = a[s][r][c];
4304                                        t[idx2 + 1] = a[s][r][c + 1];
4305                                        t[idx3] = a[s][r][c + 2];
4306                                        t[idx3 + 1] = a[s][r][c + 3];
4307                                        t[idx4] = a[s][r][c + 4];
4308                                        t[idx4 + 1] = a[s][r][c + 5];
4309                                        t[idx5] = a[s][r][c + 6];
4310                                        t[idx5 + 1] = a[s][r][c + 7];
4311                                    }
4312                                    fftSlices.complexForward(t, startt);
4313                                    fftSlices.complexForward(t, startt + 2 * slices);
4314                                    fftSlices.complexForward(t, startt + 4 * slices);
4315                                    fftSlices.complexForward(t, startt + 6 * slices);
4316                                    for (int s = 0; s < slices; s++) {
4317                                        idx2 = startt + 2 * s;
4318                                        idx3 = startt + 2 * slices + 2 * s;
4319                                        idx4 = idx3 + 2 * slices;
4320                                        idx5 = idx4 + 2 * slices;
4321                                        a[s][r][c] = t[idx2];
4322                                        a[s][r][c + 1] = t[idx2 + 1];
4323                                        a[s][r][c + 2] = t[idx3];
4324                                        a[s][r][c + 3] = t[idx3 + 1];
4325                                        a[s][r][c + 4] = t[idx4];
4326                                        a[s][r][c + 5] = t[idx4 + 1];
4327                                        a[s][r][c + 6] = t[idx5];
4328                                        a[s][r][c + 7] = t[idx5 + 1];
4329                                    }
4330                                }
4331                            }
4332                        } else if (columns == 4) {
4333                            for (int r = n0; r < rows; r += nthreads) {
4334                                for (int s = 0; s < slices; s++) {
4335                                    idx2 = startt + 2 * s;
4336                                    idx3 = startt + 2 * slices + 2 * s;
4337                                    t[idx2] = a[s][r][0];
4338                                    t[idx2 + 1] = a[s][r][1];
4339                                    t[idx3] = a[s][r][2];
4340                                    t[idx3 + 1] = a[s][r][3];
4341                                }
4342                                fftSlices.complexForward(t, startt);
4343                                fftSlices.complexForward(t, startt + 2 * slices);
4344                                for (int s = 0; s < slices; s++) {
4345                                    idx2 = startt + 2 * s;
4346                                    idx3 = startt + 2 * slices + 2 * s;
4347                                    a[s][r][0] = t[idx2];
4348                                    a[s][r][1] = t[idx2 + 1];
4349                                    a[s][r][2] = t[idx3];
4350                                    a[s][r][3] = t[idx3 + 1];
4351                                }
4352                            }
4353                        } else if (columns == 2) {
4354                            for (int r = n0; r < rows; r += nthreads) {
4355                                for (int s = 0; s < slices; s++) {
4356                                    idx2 = startt + 2 * s;
4357                                    t[idx2] = a[s][r][0];
4358                                    t[idx2 + 1] = a[s][r][1];
4359                                }
4360                                fftSlices.complexForward(t, startt);
4361                                for (int s = 0; s < slices; s++) {
4362                                    idx2 = startt + 2 * s;
4363                                    a[s][r][0] = t[idx2];
4364                                    a[s][r][1] = t[idx2 + 1];
4365                                }
4366                            }
4367                        }
4368                    } else {
4369                        if (columns > 4) {
4370                            for (int r = n0; r < rows; r += nthreads) {
4371                                for (int c = 0; c < columns; c += 8) {
4372                                    for (int s = 0; s < slices; s++) {
4373                                        idx2 = startt + 2 * s;
4374                                        idx3 = startt + 2 * slices + 2 * s;
4375                                        idx4 = idx3 + 2 * slices;
4376                                        idx5 = idx4 + 2 * slices;
4377                                        t[idx2] = a[s][r][c];
4378                                        t[idx2 + 1] = a[s][r][c + 1];
4379                                        t[idx3] = a[s][r][c + 2];
4380                                        t[idx3 + 1] = a[s][r][c + 3];
4381                                        t[idx4] = a[s][r][c + 4];
4382                                        t[idx4 + 1] = a[s][r][c + 5];
4383                                        t[idx5] = a[s][r][c + 6];
4384                                        t[idx5 + 1] = a[s][r][c + 7];
4385                                    }
4386                                    fftSlices.complexInverse(t, startt, scale);
4387                                    fftSlices.complexInverse(t, startt + 2 * slices, scale);
4388                                    fftSlices.complexInverse(t, startt + 4 * slices, scale);
4389                                    fftSlices.complexInverse(t, startt + 6 * slices, scale);
4390                                    for (int s = 0; s < slices; s++) {
4391                                        idx2 = startt + 2 * s;
4392                                        idx3 = startt + 2 * slices + 2 * s;
4393                                        idx4 = idx3 + 2 * slices;
4394                                        idx5 = idx4 + 2 * slices;
4395                                        a[s][r][c] = t[idx2];
4396                                        a[s][r][c + 1] = t[idx2 + 1];
4397                                        a[s][r][c + 2] = t[idx3];
4398                                        a[s][r][c + 3] = t[idx3 + 1];
4399                                        a[s][r][c + 4] = t[idx4];
4400                                        a[s][r][c + 5] = t[idx4 + 1];
4401                                        a[s][r][c + 6] = t[idx5];
4402                                        a[s][r][c + 7] = t[idx5 + 1];
4403                                    }
4404                                }
4405                            }
4406                        } else if (columns == 4) {
4407                            for (int r = n0; r < rows; r += nthreads) {
4408                                for (int s = 0; s < slices; s++) {
4409                                    idx2 = startt + 2 * s;
4410                                    idx3 = startt + 2 * slices + 2 * s;
4411                                    t[idx2] = a[s][r][0];
4412                                    t[idx2 + 1] = a[s][r][1];
4413                                    t[idx3] = a[s][r][2];
4414                                    t[idx3 + 1] = a[s][r][3];
4415                                }
4416                                fftSlices.complexInverse(t, startt, scale);
4417                                fftSlices.complexInverse(t, startt + 2 * slices, scale);
4418                                for (int s = 0; s < slices; s++) {
4419                                    idx2 = startt + 2 * s;
4420                                    idx3 = startt + 2 * slices + 2 * s;
4421                                    a[s][r][0] = t[idx2];
4422                                    a[s][r][1] = t[idx2 + 1];
4423                                    a[s][r][2] = t[idx3];
4424                                    a[s][r][3] = t[idx3 + 1];
4425                                }
4426                            }
4427                        } else if (columns == 2) {
4428                            for (int r = n0; r < rows; r += nthreads) {
4429                                for (int s = 0; s < slices; s++) {
4430                                    idx2 = startt + 2 * s;
4431                                    t[idx2] = a[s][r][0];
4432                                    t[idx2 + 1] = a[s][r][1];
4433                                }
4434                                fftSlices.complexInverse(t, startt, scale);
4435                                for (int s = 0; s < slices; s++) {
4436                                    idx2 = startt + 2 * s;
4437                                    a[s][r][0] = t[idx2];
4438                                    a[s][r][1] = t[idx2 + 1];
4439                                }
4440                            }
4441                        }
4442                    }
4443
4444                }
4445            });
4446        }
4447        ConcurrencyUtils.waitForCompletion(futures);
4448    }
4449
4450    private void rdft3d_sub(int isgn, double[] a) {
4451        int n1h, n2h, i, j, k, l, idx1, idx2, idx3, idx4;
4452        double xi;
4453
4454        n1h = slices >> 1;
4455        n2h = rows >> 1;
4456        if (isgn < 0) {
4457            for (i = 1; i < n1h; i++) {
4458                j = slices - i;
4459                idx1 = i * sliceStride;
4460                idx2 = j * sliceStride;
4461                idx3 = i * sliceStride + n2h * rowStride;
4462                idx4 = j * sliceStride + n2h * rowStride;
4463                xi = a[idx1] - a[idx2];
4464                a[idx1] += a[idx2];
4465                a[idx2] = xi;
4466                xi = a[idx2 + 1] - a[idx1 + 1];
4467                a[idx1 + 1] += a[idx2 + 1];
4468                a[idx2 + 1] = xi;
4469                xi = a[idx3] - a[idx4];
4470                a[idx3] += a[idx4];
4471                a[idx4] = xi;
4472                xi = a[idx4 + 1] - a[idx3 + 1];
4473                a[idx3 + 1] += a[idx4 + 1];
4474                a[idx4 + 1] = xi;
4475                for (k = 1; k < n2h; k++) {
4476                    l = rows - k;
4477                    idx1 = i * sliceStride + k * rowStride;
4478                    idx2 = j * sliceStride + l * rowStride;
4479                    xi = a[idx1] - a[idx2];
4480                    a[idx1] += a[idx2];
4481                    a[idx2] = xi;
4482                    xi = a[idx2 + 1] - a[idx1 + 1];
4483                    a[idx1 + 1] += a[idx2 + 1];
4484                    a[idx2 + 1] = xi;
4485                    idx3 = j * sliceStride + k * rowStride;
4486                    idx4 = i * sliceStride + l * rowStride;
4487                    xi = a[idx3] - a[idx4];
4488                    a[idx3] += a[idx4];
4489                    a[idx4] = xi;
4490                    xi = a[idx4 + 1] - a[idx3 + 1];
4491                    a[idx3 + 1] += a[idx4 + 1];
4492                    a[idx4 + 1] = xi;
4493                }
4494            }
4495            for (k = 1; k < n2h; k++) {
4496                l = rows - k;
4497                idx1 = k * rowStride;
4498                idx2 = l * rowStride;
4499                xi = a[idx1] - a[idx2];
4500                a[idx1] += a[idx2];
4501                a[idx2] = xi;
4502                xi = a[idx2 + 1] - a[idx1 + 1];
4503                a[idx1 + 1] += a[idx2 + 1];
4504                a[idx2 + 1] = xi;
4505                idx3 = n1h * sliceStride + k * rowStride;
4506                idx4 = n1h * sliceStride + l * rowStride;
4507                xi = a[idx3] - a[idx4];
4508                a[idx3] += a[idx4];
4509                a[idx4] = xi;
4510                xi = a[idx4 + 1] - a[idx3 + 1];
4511                a[idx3 + 1] += a[idx4 + 1];
4512                a[idx4 + 1] = xi;
4513            }
4514        } else {
4515            for (i = 1; i < n1h; i++) {
4516                j = slices - i;
4517                idx1 = j * sliceStride;
4518                idx2 = i * sliceStride;
4519                a[idx1] = 0.5f * (a[idx2] - a[idx1]);
4520                a[idx2] -= a[idx1];
4521                a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]);
4522                a[idx2 + 1] -= a[idx1 + 1];
4523                idx3 = j * sliceStride + n2h * rowStride;
4524                idx4 = i * sliceStride + n2h * rowStride;
4525                a[idx3] = 0.5f * (a[idx4] - a[idx3]);
4526                a[idx4] -= a[idx3];
4527                a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]);
4528                a[idx4 + 1] -= a[idx3 + 1];
4529                for (k = 1; k < n2h; k++) {
4530                    l = rows - k;
4531                    idx1 = j * sliceStride + l * rowStride;
4532                    idx2 = i * sliceStride + k * rowStride;
4533                    a[idx1] = 0.5f * (a[idx2] - a[idx1]);
4534                    a[idx2] -= a[idx1];
4535                    a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]);
4536                    a[idx2 + 1] -= a[idx1 + 1];
4537                    idx3 = i * sliceStride + l * rowStride;
4538                    idx4 = j * sliceStride + k * rowStride;
4539                    a[idx3] = 0.5f * (a[idx4] - a[idx3]);
4540                    a[idx4] -= a[idx3];
4541                    a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]);
4542                    a[idx4 + 1] -= a[idx3 + 1];
4543                }
4544            }
4545            for (k = 1; k < n2h; k++) {
4546                l = rows - k;
4547                idx1 = l * rowStride;
4548                idx2 = k * rowStride;
4549                a[idx1] = 0.5f * (a[idx2] - a[idx1]);
4550                a[idx2] -= a[idx1];
4551                a[idx1 + 1] = 0.5f * (a[idx2 + 1] + a[idx1 + 1]);
4552                a[idx2 + 1] -= a[idx1 + 1];
4553                idx3 = n1h * sliceStride + l * rowStride;
4554                idx4 = n1h * sliceStride + k * rowStride;
4555                a[idx3] = 0.5f * (a[idx4] - a[idx3]);
4556                a[idx4] -= a[idx3];
4557                a[idx3 + 1] = 0.5f * (a[idx4 + 1] + a[idx3 + 1]);
4558                a[idx4 + 1] -= a[idx3 + 1];
4559            }
4560        }
4561    }
4562
4563    private void rdft3d_sub(int isgn, double[][][] a) {
4564        int n1h, n2h, i, j, k, l;
4565        double xi;
4566
4567        n1h = slices >> 1;
4568        n2h = rows >> 1;
4569        if (isgn < 0) {
4570            for (i = 1; i < n1h; i++) {
4571                j = slices - i;
4572                xi = a[i][0][0] - a[j][0][0];
4573                a[i][0][0] += a[j][0][0];
4574                a[j][0][0] = xi;
4575                xi = a[j][0][1] - a[i][0][1];
4576                a[i][0][1] += a[j][0][1];
4577                a[j][0][1] = xi;
4578                xi = a[i][n2h][0] - a[j][n2h][0];
4579                a[i][n2h][0] += a[j][n2h][0];
4580                a[j][n2h][0] = xi;
4581                xi = a[j][n2h][1] - a[i][n2h][1];
4582                a[i][n2h][1] += a[j][n2h][1];
4583                a[j][n2h][1] = xi;
4584                for (k = 1; k < n2h; k++) {
4585                    l = rows - k;
4586                    xi = a[i][k][0] - a[j][l][0];
4587                    a[i][k][0] += a[j][l][0];
4588                    a[j][l][0] = xi;
4589                    xi = a[j][l][1] - a[i][k][1];
4590                    a[i][k][1] += a[j][l][1];
4591                    a[j][l][1] = xi;
4592                    xi = a[j][k][0] - a[i][l][0];
4593                    a[j][k][0] += a[i][l][0];
4594                    a[i][l][0] = xi;
4595                    xi = a[i][l][1] - a[j][k][1];
4596                    a[j][k][1] += a[i][l][1];
4597                    a[i][l][1] = xi;
4598                }
4599            }
4600            for (k = 1; k < n2h; k++) {
4601                l = rows - k;
4602                xi = a[0][k][0] - a[0][l][0];
4603                a[0][k][0] += a[0][l][0];
4604                a[0][l][0] = xi;
4605                xi = a[0][l][1] - a[0][k][1];
4606                a[0][k][1] += a[0][l][1];
4607                a[0][l][1] = xi;
4608                xi = a[n1h][k][0] - a[n1h][l][0];
4609                a[n1h][k][0] += a[n1h][l][0];
4610                a[n1h][l][0] = xi;
4611                xi = a[n1h][l][1] - a[n1h][k][1];
4612                a[n1h][k][1] += a[n1h][l][1];
4613                a[n1h][l][1] = xi;
4614            }
4615        } else {
4616            for (i = 1; i < n1h; i++) {
4617                j = slices - i;
4618                a[j][0][0] = 0.5f * (a[i][0][0] - a[j][0][0]);
4619                a[i][0][0] -= a[j][0][0];
4620                a[j][0][1] = 0.5f * (a[i][0][1] + a[j][0][1]);
4621                a[i][0][1] -= a[j][0][1];
4622                a[j][n2h][0] = 0.5f * (a[i][n2h][0] - a[j][n2h][0]);
4623                a[i][n2h][0] -= a[j][n2h][0];
4624                a[j][n2h][1] = 0.5f * (a[i][n2h][1] + a[j][n2h][1]);
4625                a[i][n2h][1] -= a[j][n2h][1];
4626                for (k = 1; k < n2h; k++) {
4627                    l = rows - k;
4628                    a[j][l][0] = 0.5f * (a[i][k][0] - a[j][l][0]);
4629                    a[i][k][0] -= a[j][l][0];
4630                    a[j][l][1] = 0.5f * (a[i][k][1] + a[j][l][1]);
4631                    a[i][k][1] -= a[j][l][1];
4632                    a[i][l][0] = 0.5f * (a[j][k][0] - a[i][l][0]);
4633                    a[j][k][0] -= a[i][l][0];
4634                    a[i][l][1] = 0.5f * (a[j][k][1] + a[i][l][1]);
4635                    a[j][k][1] -= a[i][l][1];
4636                }
4637            }
4638            for (k = 1; k < n2h; k++) {
4639                l = rows - k;
4640                a[0][l][0] = 0.5f * (a[0][k][0] - a[0][l][0]);
4641                a[0][k][0] -= a[0][l][0];
4642                a[0][l][1] = 0.5f * (a[0][k][1] + a[0][l][1]);
4643                a[0][k][1] -= a[0][l][1];
4644                a[n1h][l][0] = 0.5f * (a[n1h][k][0] - a[n1h][l][0]);
4645                a[n1h][k][0] -= a[n1h][l][0];
4646                a[n1h][l][1] = 0.5f * (a[n1h][k][1] + a[n1h][l][1]);
4647                a[n1h][k][1] -= a[n1h][l][1];
4648            }
4649        }
4650    }
4651
4652    private void fillSymmetric(final double[][][] a) {
4653        final int twon3 = 2 * columns;
4654        final int n2d2 = rows / 2;
4655        int n1d2 = slices / 2;
4656        int nthreads = ConcurrencyUtils.getNumberOfThreads();
4657        if ((nthreads > 1) && useThreads && (slices >= nthreads)) {
4658            Future<?>[] futures = new Future[nthreads];
4659            int p = slices / nthreads;
4660            for (int l = 0; l < nthreads; l++) {
4661                final int firstSlice = l * p;
4662                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4663                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4664                    @Override
4665                                        public void run() {
4666                        for (int s = firstSlice; s < lastSlice; s++) {
4667                            int idx1 = (slices - s) % slices;
4668                            for (int r = 0; r < rows; r++) {
4669                                int idx2 = (rows - r) % rows;
4670                                for (int c = 1; c < columns; c += 2) {
4671                                    int idx3 = twon3 - c;
4672                                    a[idx1][idx2][idx3] = -a[s][r][c + 2];
4673                                    a[idx1][idx2][idx3 - 1] = a[s][r][c + 1];
4674                                }
4675                            }
4676                        }
4677                    }
4678                });
4679            }
4680            ConcurrencyUtils.waitForCompletion(futures);
4681
4682            // ---------------------------------------------
4683
4684            for (int l = 0; l < nthreads; l++) {
4685                final int firstSlice = l * p;
4686                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4687                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4688                    @Override
4689                                        public void run() {
4690                        for (int s = firstSlice; s < lastSlice; s++) {
4691                            int idx1 = (slices - s) % slices;
4692                            for (int r = 1; r < n2d2; r++) {
4693                                int idx2 = rows - r;
4694                                a[idx1][r][columns] = a[s][idx2][1];
4695                                a[s][idx2][columns] = a[s][idx2][1];
4696                                a[idx1][r][columns + 1] = -a[s][idx2][0];
4697                                a[s][idx2][columns + 1] = a[s][idx2][0];
4698                            }
4699                        }
4700                    }
4701                });
4702            }
4703            ConcurrencyUtils.waitForCompletion(futures);
4704
4705            for (int l = 0; l < nthreads; l++) {
4706                final int firstSlice = l * p;
4707                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4708                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4709                    @Override
4710                                        public void run() {
4711                        for (int s = firstSlice; s < lastSlice; s++) {
4712                            int idx1 = (slices - s) % slices;
4713                            for (int r = 1; r < n2d2; r++) {
4714                                int idx2 = rows - r;
4715                                a[idx1][idx2][0] = a[s][r][0];
4716                                a[idx1][idx2][1] = -a[s][r][1];
4717                            }
4718                        }
4719                    }
4720                });
4721            }
4722            ConcurrencyUtils.waitForCompletion(futures);
4723
4724        } else {
4725
4726            for (int s = 0; s < slices; s++) {
4727                int idx1 = (slices - s) % slices;
4728                for (int r = 0; r < rows; r++) {
4729                    int idx2 = (rows - r) % rows;
4730                    for (int c = 1; c < columns; c += 2) {
4731                        int idx3 = twon3 - c;
4732                        a[idx1][idx2][idx3] = -a[s][r][c + 2];
4733                        a[idx1][idx2][idx3 - 1] = a[s][r][c + 1];
4734                    }
4735                }
4736            }
4737
4738            // ---------------------------------------------
4739
4740            for (int s = 0; s < slices; s++) {
4741                int idx1 = (slices - s) % slices;
4742                for (int r = 1; r < n2d2; r++) {
4743                    int idx2 = rows - r;
4744                    a[idx1][r][columns] = a[s][idx2][1];
4745                    a[s][idx2][columns] = a[s][idx2][1];
4746                    a[idx1][r][columns + 1] = -a[s][idx2][0];
4747                    a[s][idx2][columns + 1] = a[s][idx2][0];
4748                }
4749            }
4750
4751            for (int s = 0; s < slices; s++) {
4752                int idx1 = (slices - s) % slices;
4753                for (int r = 1; r < n2d2; r++) {
4754                    int idx2 = rows - r;
4755                    a[idx1][idx2][0] = a[s][r][0];
4756                    a[idx1][idx2][1] = -a[s][r][1];
4757                }
4758            }
4759        }
4760
4761        // ----------------------------------------------------------
4762
4763        for (int s = 1; s < n1d2; s++) {
4764            int idx1 = slices - s;
4765            a[s][0][columns] = a[idx1][0][1];
4766            a[idx1][0][columns] = a[idx1][0][1];
4767            a[s][0][columns + 1] = -a[idx1][0][0];
4768            a[idx1][0][columns + 1] = a[idx1][0][0];
4769            a[s][n2d2][columns] = a[idx1][n2d2][1];
4770            a[idx1][n2d2][columns] = a[idx1][n2d2][1];
4771            a[s][n2d2][columns + 1] = -a[idx1][n2d2][0];
4772            a[idx1][n2d2][columns + 1] = a[idx1][n2d2][0];
4773            a[idx1][0][0] = a[s][0][0];
4774            a[idx1][0][1] = -a[s][0][1];
4775            a[idx1][n2d2][0] = a[s][n2d2][0];
4776            a[idx1][n2d2][1] = -a[s][n2d2][1];
4777
4778        }
4779        // ----------------------------------------
4780
4781        a[0][0][columns] = a[0][0][1];
4782        a[0][0][1] = 0;
4783        a[0][n2d2][columns] = a[0][n2d2][1];
4784        a[0][n2d2][1] = 0;
4785        a[n1d2][0][columns] = a[n1d2][0][1];
4786        a[n1d2][0][1] = 0;
4787        a[n1d2][n2d2][columns] = a[n1d2][n2d2][1];
4788        a[n1d2][n2d2][1] = 0;
4789        a[n1d2][0][columns + 1] = 0;
4790        a[n1d2][n2d2][columns + 1] = 0;
4791    }
4792
4793    private void fillSymmetric(final double[] a) {
4794        final int twon3 = 2 * columns;
4795        final int n2d2 = rows / 2;
4796        int n1d2 = slices / 2;
4797
4798        final int twoSliceStride = rows * twon3;
4799        final int twoRowStride = twon3;
4800
4801        int idx1, idx2, idx3, idx4, idx5, idx6;
4802
4803        for (int s = (slices - 1); s >= 1; s--) {
4804            idx3 = s * sliceStride;
4805            idx4 = 2 * idx3;
4806            for (int r = 0; r < rows; r++) {
4807                idx5 = r * rowStride;
4808                idx6 = 2 * idx5;
4809                for (int c = 0; c < columns; c += 2) {
4810                    idx1 = idx3 + idx5 + c;
4811                    idx2 = idx4 + idx6 + c;
4812                    a[idx2] = a[idx1];
4813                    a[idx1] = 0;
4814                    idx1++;
4815                    idx2++;
4816                    a[idx2] = a[idx1];
4817                    a[idx1] = 0;
4818                }
4819            }
4820        }
4821
4822        for (int r = 1; r < rows; r++) {
4823            idx3 = (rows - r) * rowStride;
4824            idx4 = (rows - r) * twoRowStride;
4825            for (int c = 0; c < columns; c += 2) {
4826                idx1 = idx3 + c;
4827                idx2 = idx4 + c;
4828                a[idx2] = a[idx1];
4829                a[idx1] = 0;
4830                idx1++;
4831                idx2++;
4832                a[idx2] = a[idx1];
4833                a[idx1] = 0;
4834            }
4835        }
4836
4837        int nthreads = ConcurrencyUtils.getNumberOfThreads();
4838        if ((nthreads > 1) && useThreads && (slices >= nthreads)) {
4839            Future<?>[] futures = new Future[nthreads];
4840            int p = slices / nthreads;
4841            for (int l = 0; l < nthreads; l++) {
4842                final int firstSlice = l * p;
4843                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4844                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4845                    @Override
4846                                        public void run() {
4847                        for (int s = firstSlice; s < lastSlice; s++) {
4848                            int idx3 = ((slices - s) % slices) * twoSliceStride;
4849                            int idx5 = s * twoSliceStride;
4850                            for (int r = 0; r < rows; r++) {
4851                                int idx4 = ((rows - r) % rows) * twoRowStride;
4852                                int idx6 = r * twoRowStride;
4853                                for (int c = 1; c < columns; c += 2) {
4854                                    int idx1 = idx3 + idx4 + twon3 - c;
4855                                    int idx2 = idx5 + idx6 + c;
4856                                    a[idx1] = -a[idx2 + 2];
4857                                    a[idx1 - 1] = a[idx2 + 1];
4858                                }
4859                            }
4860                        }
4861                    }
4862                });
4863            }
4864            ConcurrencyUtils.waitForCompletion(futures);
4865
4866            // ---------------------------------------------
4867
4868            for (int l = 0; l < nthreads; l++) {
4869                final int firstSlice = l * p;
4870                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4871                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4872                    @Override
4873                                        public void run() {
4874                        for (int s = firstSlice; s < lastSlice; s++) {
4875                            int idx5 = ((slices - s) % slices) * twoSliceStride;
4876                            int idx6 = s * twoSliceStride;
4877                            for (int r = 1; r < n2d2; r++) {
4878                                int idx4 = idx6 + (rows - r) * twoRowStride;
4879                                int idx1 = idx5 + r * twoRowStride + columns;
4880                                int idx2 = idx4 + columns;
4881                                int idx3 = idx4 + 1;
4882                                a[idx1] = a[idx3];
4883                                a[idx2] = a[idx3];
4884                                a[idx1 + 1] = -a[idx4];
4885                                a[idx2 + 1] = a[idx4];
4886
4887                            }
4888                        }
4889                    }
4890                });
4891            }
4892            ConcurrencyUtils.waitForCompletion(futures);
4893            for (int l = 0; l < nthreads; l++) {
4894                final int firstSlice = l * p;
4895                final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
4896                futures[l] = ConcurrencyUtils.submit(new Runnable() {
4897                    @Override
4898                                        public void run() {
4899                        for (int s = firstSlice; s < lastSlice; s++) {
4900                            int idx3 = ((slices - s) % slices) * twoSliceStride;
4901                            int idx4 = s * twoSliceStride;
4902                            for (int r = 1; r < n2d2; r++) {
4903                                int idx1 = idx3 + (rows - r) * twoRowStride;
4904                                int idx2 = idx4 + r * twoRowStride;
4905                                a[idx1] = a[idx2];
4906                                a[idx1 + 1] = -a[idx2 + 1];
4907
4908                            }
4909                        }
4910                    }
4911                });
4912            }
4913            ConcurrencyUtils.waitForCompletion(futures);
4914        } else {
4915
4916            // -----------------------------------------------
4917            for (int s = 0; s < slices; s++) {
4918                idx3 = ((slices - s) % slices) * twoSliceStride;
4919                idx5 = s * twoSliceStride;
4920                for (int r = 0; r < rows; r++) {
4921                    idx4 = ((rows - r) % rows) * twoRowStride;
4922                    idx6 = r * twoRowStride;
4923                    for (int c = 1; c < columns; c += 2) {
4924                        idx1 = idx3 + idx4 + twon3 - c;
4925                        idx2 = idx5 + idx6 + c;
4926                        a[idx1] = -a[idx2 + 2];
4927                        a[idx1 - 1] = a[idx2 + 1];
4928                    }
4929                }
4930            }
4931
4932            // ---------------------------------------------
4933
4934            for (int s = 0; s < slices; s++) {
4935                idx5 = ((slices - s) % slices) * twoSliceStride;
4936                idx6 = s * twoSliceStride;
4937                for (int r = 1; r < n2d2; r++) {
4938                    idx4 = idx6 + (rows - r) * twoRowStride;
4939                    idx1 = idx5 + r * twoRowStride + columns;
4940                    idx2 = idx4 + columns;
4941                    idx3 = idx4 + 1;
4942                    a[idx1] = a[idx3];
4943                    a[idx2] = a[idx3];
4944                    a[idx1 + 1] = -a[idx4];
4945                    a[idx2 + 1] = a[idx4];
4946
4947                }
4948            }
4949
4950            for (int s = 0; s < slices; s++) {
4951                idx3 = ((slices - s) % slices) * twoSliceStride;
4952                idx4 = s * twoSliceStride;
4953                for (int r = 1; r < n2d2; r++) {
4954                    idx1 = idx3 + (rows - r) * twoRowStride;
4955                    idx2 = idx4 + r * twoRowStride;
4956                    a[idx1] = a[idx2];
4957                    a[idx1 + 1] = -a[idx2 + 1];
4958
4959                }
4960            }
4961        }
4962
4963        // ----------------------------------------------------------
4964
4965        for (int s = 1; s < n1d2; s++) {
4966            idx1 = s * twoSliceStride;
4967            idx2 = (slices - s) * twoSliceStride;
4968            idx3 = n2d2 * twoRowStride;
4969            idx4 = idx1 + idx3;
4970            idx5 = idx2 + idx3;
4971            a[idx1 + columns] = a[idx2 + 1];
4972            a[idx2 + columns] = a[idx2 + 1];
4973            a[idx1 + columns + 1] = -a[idx2];
4974            a[idx2 + columns + 1] = a[idx2];
4975            a[idx4 + columns] = a[idx5 + 1];
4976            a[idx5 + columns] = a[idx5 + 1];
4977            a[idx4 + columns + 1] = -a[idx5];
4978            a[idx5 + columns + 1] = a[idx5];
4979            a[idx2] = a[idx1];
4980            a[idx2 + 1] = -a[idx1 + 1];
4981            a[idx5] = a[idx4];
4982            a[idx5 + 1] = -a[idx4 + 1];
4983
4984        }
4985
4986        // ----------------------------------------
4987
4988        a[columns] = a[1];
4989        a[1] = 0;
4990        idx1 = n2d2 * twoRowStride;
4991        idx2 = n1d2 * twoSliceStride;
4992        idx3 = idx1 + idx2;
4993        a[idx1 + columns] = a[idx1 + 1];
4994        a[idx1 + 1] = 0;
4995        a[idx2 + columns] = a[idx2 + 1];
4996        a[idx2 + 1] = 0;
4997        a[idx3 + columns] = a[idx3 + 1];
4998        a[idx3 + 1] = 0;
4999        a[idx2 + columns + 1] = 0;
5000        a[idx3 + columns + 1] = 0;
5001    }
5002}