001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dst;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 3D Discrete Sine Transform (DST) of single precision data. The sizes
043 * of all three dimensions can be arbitrary numbers. This is a parallel
044 * implementation optimized for SMP systems.<br>
045 * <br>
046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
048 * 
049 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
050 * 
051 */
052public class FloatDST_3D {
053
054    private int slices;
055
056    private int rows;
057
058    private int columns;
059
060    private int sliceStride;
061
062    private int rowStride;
063
064    private float[] t;
065
066    private FloatDST_1D dstSlices, dstRows, dstColumns;
067
068    private int oldNthreads;
069
070    private int nt;
071
072    private boolean isPowerOfTwo = false;
073
074    private boolean useThreads = false;
075
076    /**
077     * Creates new instance of FloatDST_3D.
078     * 
079     * @param slices
080     *            number of slices
081     * @param rows
082     *            number of rows
083     * @param columns
084     *            number of columns
085     */
086    public FloatDST_3D(int slices, int rows, int columns) {
087        if (slices <= 1 || rows <= 1 || columns <= 1) {
088            throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
089        }
090        this.slices = slices;
091        this.rows = rows;
092        this.columns = columns;
093        this.sliceStride = rows * columns;
094        this.rowStride = columns;
095        if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
096            this.useThreads = true;
097        }
098        if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
099            isPowerOfTwo = true;
100            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
101            nt = slices;
102            if (nt < rows) {
103                nt = rows;
104            }
105            nt *= 4;
106            if (oldNthreads > 1) {
107                nt *= oldNthreads;
108            }
109            if (columns == 2) {
110                nt >>= 1;
111            }
112            t = new float[nt];
113        }
114        dstSlices = new FloatDST_1D(slices);
115        if (slices == rows) {
116            dstRows = dstSlices;
117        } else {
118            dstRows = new FloatDST_1D(rows);
119        }
120        if (slices == columns) {
121            dstColumns = dstSlices;
122        } else if (rows == columns) {
123            dstColumns = dstRows;
124        } else {
125            dstColumns = new FloatDST_1D(columns);
126        }
127    }
128
129    /**
130     * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code>
131     * . The data is stored in 1D array addressed in slice-major, then
132     * row-major, then column-major, in order of significance, i.e. the element
133     * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride
134     * + j*rowStride + k], where sliceStride = rows * columns and rowStride =
135     * columns.
136     * 
137     * @param a
138     *            data to transform
139     * @param scale
140     *            if true then scaling is performed
141     */
142    public void forward(final float[] a, final boolean scale) {
143        int nthreads = ConcurrencyUtils.getNumberOfThreads();
144        if (isPowerOfTwo) {
145            if (nthreads != oldNthreads) {
146                nt = slices;
147                if (nt < rows) {
148                    nt = rows;
149                }
150                nt *= 4;
151                if (nthreads > 1) {
152                    nt *= nthreads;
153                }
154                if (columns == 2) {
155                    nt >>= 1;
156                }
157                t = new float[nt];
158                oldNthreads = nthreads;
159            }
160            if ((nthreads > 1) && useThreads) {
161                ddxt3da_subth(-1, a, scale);
162                ddxt3db_subth(-1, a, scale);
163            } else {
164                ddxt3da_sub(-1, a, scale);
165                ddxt3db_sub(-1, a, scale);
166            }
167        } else {
168            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
169                Future<?>[] futures = new Future[nthreads];
170                int p = slices / nthreads;
171                for (int l = 0; l < nthreads; l++) {
172                    final int firstSlice = l * p;
173                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
174                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
175                        @Override
176                                                public void run() {
177                            for (int s = firstSlice; s < lastSlice; s++) {
178                                int idx1 = s * sliceStride;
179                                for (int r = 0; r < rows; r++) {
180                                    dstColumns.forward(a, idx1 + r * rowStride, scale);
181                                }
182                            }
183                        }
184                    });
185                }
186                ConcurrencyUtils.waitForCompletion(futures);
187
188                for (int l = 0; l < nthreads; l++) {
189                    final int firstSlice = l * p;
190                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
191                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
192                        @Override
193                                                public void run() {
194                            float[] temp = new float[rows];
195                            for (int s = firstSlice; s < lastSlice; s++) {
196                                int idx1 = s * sliceStride;
197                                for (int c = 0; c < columns; c++) {
198                                    for (int r = 0; r < rows; r++) {
199                                        int idx3 = idx1 + r * rowStride + c;
200                                        temp[r] = a[idx3];
201                                    }
202                                    dstRows.forward(temp, scale);
203                                    for (int r = 0; r < rows; r++) {
204                                        int idx3 = idx1 + r * rowStride + c;
205                                        a[idx3] = temp[r];
206                                    }
207                                }
208                            }
209                        }
210                    });
211                }
212                ConcurrencyUtils.waitForCompletion(futures);
213
214                p = rows / nthreads;
215                for (int l = 0; l < nthreads; l++) {
216                    final int firstRow = l * p;
217                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
218                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
219                        @Override
220                                                public void run() {
221                            float[] temp = new float[slices];
222                            for (int r = firstRow; r < lastRow; r++) {
223                                int idx1 = r * rowStride;
224                                for (int c = 0; c < columns; c++) {
225                                    for (int s = 0; s < slices; s++) {
226                                        int idx3 = s * sliceStride + idx1 + c;
227                                        temp[s] = a[idx3];
228                                    }
229                                    dstSlices.forward(temp, scale);
230                                    for (int s = 0; s < slices; s++) {
231                                        int idx3 = s * sliceStride + idx1 + c;
232                                        a[idx3] = temp[s];
233                                    }
234                                }
235                            }
236                        }
237                    });
238                }
239                ConcurrencyUtils.waitForCompletion(futures);
240
241            } else {
242                for (int s = 0; s < slices; s++) {
243                    int idx1 = s * sliceStride;
244                    for (int r = 0; r < rows; r++) {
245                        dstColumns.forward(a, idx1 + r * rowStride, scale);
246                    }
247                }
248                float[] temp = new float[rows];
249                for (int s = 0; s < slices; s++) {
250                    int idx1 = s * sliceStride;
251                    for (int c = 0; c < columns; c++) {
252                        for (int r = 0; r < rows; r++) {
253                            int idx3 = idx1 + r * rowStride + c;
254                            temp[r] = a[idx3];
255                        }
256                        dstRows.forward(temp, scale);
257                        for (int r = 0; r < rows; r++) {
258                            int idx3 = idx1 + r * rowStride + c;
259                            a[idx3] = temp[r];
260                        }
261                    }
262                }
263                temp = new float[slices];
264                for (int r = 0; r < rows; r++) {
265                    int idx1 = r * rowStride;
266                    for (int c = 0; c < columns; c++) {
267                        for (int s = 0; s < slices; s++) {
268                            int idx3 = s * sliceStride + idx1 + c;
269                            temp[s] = a[idx3];
270                        }
271                        dstSlices.forward(temp, scale);
272                        for (int s = 0; s < slices; s++) {
273                            int idx3 = s * sliceStride + idx1 + c;
274                            a[idx3] = temp[s];
275                        }
276                    }
277                }
278            }
279        }
280    }
281
282    /**
283     * Computes the 3D forward DST (DST-II) leaving the result in <code>a</code>
284     * . The data is stored in 3D array.
285     * 
286     * @param a
287     *            data to transform
288     * @param scale
289     *            if true then scaling is performed
290     */
291    public void forward(final float[][][] a, final boolean scale) {
292        int nthreads = ConcurrencyUtils.getNumberOfThreads();
293        if (isPowerOfTwo) {
294            if (nthreads != oldNthreads) {
295                nt = slices;
296                if (nt < rows) {
297                    nt = rows;
298                }
299                nt *= 4;
300                if (nthreads > 1) {
301                    nt *= nthreads;
302                }
303                if (columns == 2) {
304                    nt >>= 1;
305                }
306                t = new float[nt];
307                oldNthreads = nthreads;
308            }
309            if ((nthreads > 1) && useThreads) {
310                ddxt3da_subth(-1, a, scale);
311                ddxt3db_subth(-1, a, scale);
312            } else {
313                ddxt3da_sub(-1, a, scale);
314                ddxt3db_sub(-1, a, scale);
315            }
316        } else {
317            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
318                Future<?>[] futures = new Future[nthreads];
319                int p = slices / nthreads;
320                for (int l = 0; l < nthreads; l++) {
321                    final int firstSlice = l * p;
322                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
323                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
324                        @Override
325                                                public void run() {
326                            for (int s = firstSlice; s < lastSlice; s++) {
327                                for (int r = 0; r < rows; r++) {
328                                    dstColumns.forward(a[s][r], scale);
329                                }
330                            }
331                        }
332                    });
333                }
334                ConcurrencyUtils.waitForCompletion(futures);
335
336                for (int l = 0; l < nthreads; l++) {
337                    final int firstSlice = l * p;
338                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
339                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
340                        @Override
341                                                public void run() {
342                            float[] temp = new float[rows];
343                            for (int s = firstSlice; s < lastSlice; s++) {
344                                for (int c = 0; c < columns; c++) {
345                                    for (int r = 0; r < rows; r++) {
346                                        temp[r] = a[s][r][c];
347                                    }
348                                    dstRows.forward(temp, scale);
349                                    for (int r = 0; r < rows; r++) {
350                                        a[s][r][c] = temp[r];
351                                    }
352                                }
353                            }
354                        }
355                    });
356                }
357                ConcurrencyUtils.waitForCompletion(futures);
358
359                p = rows / nthreads;
360                for (int l = 0; l < nthreads; l++) {
361                    final int firstRow = l * p;
362                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
363                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
364                        @Override
365                                                public void run() {
366                            float[] temp = new float[slices];
367                            for (int r = firstRow; r < lastRow; r++) {
368                                for (int c = 0; c < columns; c++) {
369                                    for (int s = 0; s < slices; s++) {
370                                        temp[s] = a[s][r][c];
371                                    }
372                                    dstSlices.forward(temp, scale);
373                                    for (int s = 0; s < slices; s++) {
374                                        a[s][r][c] = temp[s];
375                                    }
376                                }
377                            }
378                        }
379                    });
380                }
381                ConcurrencyUtils.waitForCompletion(futures);
382
383            } else {
384                for (int s = 0; s < slices; s++) {
385                    for (int r = 0; r < rows; r++) {
386                        dstColumns.forward(a[s][r], scale);
387                    }
388                }
389                float[] temp = new float[rows];
390                for (int s = 0; s < slices; s++) {
391                    for (int c = 0; c < columns; c++) {
392                        for (int r = 0; r < rows; r++) {
393                            temp[r] = a[s][r][c];
394                        }
395                        dstRows.forward(temp, scale);
396                        for (int r = 0; r < rows; r++) {
397                            a[s][r][c] = temp[r];
398                        }
399                    }
400                }
401                temp = new float[slices];
402                for (int r = 0; r < rows; r++) {
403                    for (int c = 0; c < columns; c++) {
404                        for (int s = 0; s < slices; s++) {
405                            temp[s] = a[s][r][c];
406                        }
407                        dstSlices.forward(temp, scale);
408                        for (int s = 0; s < slices; s++) {
409                            a[s][r][c] = temp[s];
410                        }
411                    }
412                }
413            }
414        }
415    }
416
417    /**
418     * Computes the 3D inverse DST (DST-III) leaving the result in
419     * <code>a</code>. The data is stored in 1D array addressed in slice-major,
420     * then row-major, then column-major, in order of significance, i.e. the
421     * element (i,j,k) of 3D array x[slices][rows][columns] is stored in
422     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns
423     * and rowStride = columns.
424     * 
425     * @param a
426     *            data to transform
427     * @param scale
428     *            if true then scaling is performed
429     */
430    public void inverse(final float[] a, final boolean scale) {
431        int nthreads = ConcurrencyUtils.getNumberOfThreads();
432        if (isPowerOfTwo) {
433            if (nthreads != oldNthreads) {
434                nt = slices;
435                if (nt < rows) {
436                    nt = rows;
437                }
438                nt *= 4;
439                if (nthreads > 1) {
440                    nt *= nthreads;
441                }
442                if (columns == 2) {
443                    nt >>= 1;
444                }
445                t = new float[nt];
446                oldNthreads = nthreads;
447            }
448            if ((nthreads > 1) && useThreads) {
449                ddxt3da_subth(1, a, scale);
450                ddxt3db_subth(1, a, scale);
451            } else {
452                ddxt3da_sub(1, a, scale);
453                ddxt3db_sub(1, a, scale);
454            }
455        } else {
456            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
457                Future<?>[] futures = new Future[nthreads];
458                int p = slices / nthreads;
459                for (int l = 0; l < nthreads; l++) {
460                    final int firstSlice = l * p;
461                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
462                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
463                        @Override
464                                                public void run() {
465                            for (int s = firstSlice; s < lastSlice; s++) {
466                                int idx1 = s * sliceStride;
467                                for (int r = 0; r < rows; r++) {
468                                    dstColumns.inverse(a, idx1 + r * rowStride, scale);
469                                }
470                            }
471                        }
472                    });
473                }
474                ConcurrencyUtils.waitForCompletion(futures);
475
476                for (int l = 0; l < nthreads; l++) {
477                    final int firstSlice = l * p;
478                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
479                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
480                        @Override
481                                                public void run() {
482                            float[] temp = new float[rows];
483                            for (int s = firstSlice; s < lastSlice; s++) {
484                                int idx1 = s * sliceStride;
485                                for (int c = 0; c < columns; c++) {
486                                    for (int r = 0; r < rows; r++) {
487                                        int idx3 = idx1 + r * rowStride + c;
488                                        temp[r] = a[idx3];
489                                    }
490                                    dstRows.inverse(temp, scale);
491                                    for (int r = 0; r < rows; r++) {
492                                        int idx3 = idx1 + r * rowStride + c;
493                                        a[idx3] = temp[r];
494                                    }
495                                }
496                            }
497                        }
498                    });
499                }
500                ConcurrencyUtils.waitForCompletion(futures);
501
502                p = rows / nthreads;
503                for (int l = 0; l < nthreads; l++) {
504                    final int firstRow = l * p;
505                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
506                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
507                        @Override
508                                                public void run() {
509                            float[] temp = new float[slices];
510                            for (int r = firstRow; r < lastRow; r++) {
511                                int idx1 = r * rowStride;
512                                for (int c = 0; c < columns; c++) {
513                                    for (int s = 0; s < slices; s++) {
514                                        int idx3 = s * sliceStride + idx1 + c;
515                                        temp[s] = a[idx3];
516                                    }
517                                    dstSlices.inverse(temp, scale);
518                                    for (int s = 0; s < slices; s++) {
519                                        int idx3 = s * sliceStride + idx1 + c;
520                                        a[idx3] = temp[s];
521                                    }
522                                }
523                            }
524                        }
525                    });
526                }
527                ConcurrencyUtils.waitForCompletion(futures);
528
529            } else {
530                for (int s = 0; s < slices; s++) {
531                    int idx1 = s * sliceStride;
532                    for (int r = 0; r < rows; r++) {
533                        dstColumns.inverse(a, idx1 + r * rowStride, scale);
534                    }
535                }
536                float[] temp = new float[rows];
537                for (int s = 0; s < slices; s++) {
538                    int idx1 = s * sliceStride;
539                    for (int c = 0; c < columns; c++) {
540                        for (int r = 0; r < rows; r++) {
541                            int idx3 = idx1 + r * rowStride + c;
542                            temp[r] = a[idx3];
543                        }
544                        dstRows.inverse(temp, scale);
545                        for (int r = 0; r < rows; r++) {
546                            int idx3 = idx1 + r * rowStride + c;
547                            a[idx3] = temp[r];
548                        }
549                    }
550                }
551                temp = new float[slices];
552                for (int r = 0; r < rows; r++) {
553                    int idx1 = r * rowStride;
554                    for (int c = 0; c < columns; c++) {
555                        for (int s = 0; s < slices; s++) {
556                            int idx3 = s * sliceStride + idx1 + c;
557                            temp[s] = a[idx3];
558                        }
559                        dstSlices.inverse(temp, scale);
560                        for (int s = 0; s < slices; s++) {
561                            int idx3 = s * sliceStride + idx1 + c;
562                            a[idx3] = temp[s];
563                        }
564                    }
565                }
566            }
567        }
568
569    }
570
571    /**
572     * Computes the 3D inverse DST (DST-III) leaving the result in
573     * <code>a</code>. The data is stored in 3D array.
574     * 
575     * @param a
576     *            data to transform
577     * @param scale
578     *            if true then scaling is performed
579     */
580    public void inverse(final float[][][] a, final boolean scale) {
581        int nthreads = ConcurrencyUtils.getNumberOfThreads();
582        if (isPowerOfTwo) {
583            if (nthreads != oldNthreads) {
584                nt = slices;
585                if (nt < rows) {
586                    nt = rows;
587                }
588                nt *= 4;
589                if (nthreads > 1) {
590                    nt *= nthreads;
591                }
592                if (columns == 2) {
593                    nt >>= 1;
594                }
595                t = new float[nt];
596                oldNthreads = nthreads;
597            }
598            if ((nthreads > 1) && useThreads) {
599                ddxt3da_subth(1, a, scale);
600                ddxt3db_subth(1, a, scale);
601            } else {
602                ddxt3da_sub(1, a, scale);
603                ddxt3db_sub(1, a, scale);
604            }
605        } else {
606            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
607                Future<?>[] futures = new Future[nthreads];
608                int p = slices / nthreads;
609                for (int l = 0; l < nthreads; l++) {
610                    final int firstSlice = l * p;
611                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
612                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
613                        @Override
614                                                public void run() {
615                            for (int s = firstSlice; s < lastSlice; s++) {
616                                for (int r = 0; r < rows; r++) {
617                                    dstColumns.inverse(a[s][r], scale);
618                                }
619                            }
620                        }
621                    });
622                }
623                ConcurrencyUtils.waitForCompletion(futures);
624
625                for (int l = 0; l < nthreads; l++) {
626                    final int firstSlice = l * p;
627                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
628                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
629                        @Override
630                                                public void run() {
631                            float[] temp = new float[rows];
632                            for (int s = firstSlice; s < lastSlice; s++) {
633                                for (int c = 0; c < columns; c++) {
634                                    for (int r = 0; r < rows; r++) {
635                                        temp[r] = a[s][r][c];
636                                    }
637                                    dstRows.inverse(temp, scale);
638                                    for (int r = 0; r < rows; r++) {
639                                        a[s][r][c] = temp[r];
640                                    }
641                                }
642                            }
643                        }
644                    });
645                }
646                ConcurrencyUtils.waitForCompletion(futures);
647
648                p = rows / nthreads;
649                for (int l = 0; l < nthreads; l++) {
650                    final int firstRow = l * p;
651                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
652                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
653                        @Override
654                                                public void run() {
655                            float[] temp = new float[slices];
656                            for (int r = firstRow; r < lastRow; r++) {
657                                for (int c = 0; c < columns; c++) {
658                                    for (int s = 0; s < slices; s++) {
659                                        temp[s] = a[s][r][c];
660                                    }
661                                    dstSlices.inverse(temp, scale);
662                                    for (int s = 0; s < slices; s++) {
663                                        a[s][r][c] = temp[s];
664                                    }
665                                }
666                            }
667                        }
668                    });
669                }
670                ConcurrencyUtils.waitForCompletion(futures);
671
672            } else {
673                for (int s = 0; s < slices; s++) {
674                    for (int r = 0; r < rows; r++) {
675                        dstColumns.inverse(a[s][r], scale);
676                    }
677                }
678                float[] temp = new float[rows];
679                for (int s = 0; s < slices; s++) {
680                    for (int c = 0; c < columns; c++) {
681                        for (int r = 0; r < rows; r++) {
682                            temp[r] = a[s][r][c];
683                        }
684                        dstRows.inverse(temp, scale);
685                        for (int r = 0; r < rows; r++) {
686                            a[s][r][c] = temp[r];
687                        }
688                    }
689                }
690                temp = new float[slices];
691                for (int r = 0; r < rows; r++) {
692                    for (int c = 0; c < columns; c++) {
693                        for (int s = 0; s < slices; s++) {
694                            temp[s] = a[s][r][c];
695                        }
696                        dstSlices.inverse(temp, scale);
697                        for (int s = 0; s < slices; s++) {
698                            a[s][r][c] = temp[s];
699                        }
700                    }
701                }
702            }
703        }
704    }
705
706    private void ddxt3da_sub(int isgn, float[] a, boolean scale) {
707        int idx0, idx1, idx2;
708
709        if (isgn == -1) {
710            for (int s = 0; s < slices; s++) {
711                idx0 = s * sliceStride;
712                for (int r = 0; r < rows; r++) {
713                    dstColumns.forward(a, idx0 + r * rowStride, scale);
714                }
715                if (columns > 2) {
716                    for (int c = 0; c < columns; c += 4) {
717                        for (int r = 0; r < rows; r++) {
718                            idx1 = idx0 + r * rowStride + c;
719                            idx2 = rows + r;
720                            t[r] = a[idx1];
721                            t[idx2] = a[idx1 + 1];
722                            t[idx2 + rows] = a[idx1 + 2];
723                            t[idx2 + 2 * rows] = a[idx1 + 3];
724                        }
725                        dstRows.forward(t, 0, scale);
726                        dstRows.forward(t, rows, scale);
727                        dstRows.forward(t, 2 * rows, scale);
728                        dstRows.forward(t, 3 * rows, scale);
729                        for (int r = 0; r < rows; r++) {
730                            idx1 = idx0 + r * rowStride + c;
731                            idx2 = rows + r;
732                            a[idx1] = t[r];
733                            a[idx1 + 1] = t[idx2];
734                            a[idx1 + 2] = t[idx2 + rows];
735                            a[idx1 + 3] = t[idx2 + 2 * rows];
736                        }
737                    }
738                } else if (columns == 2) {
739                    for (int r = 0; r < rows; r++) {
740                        idx1 = idx0 + r * rowStride;
741                        t[r] = a[idx1];
742                        t[rows + r] = a[idx1 + 1];
743                    }
744                    dstRows.forward(t, 0, scale);
745                    dstRows.forward(t, rows, scale);
746                    for (int r = 0; r < rows; r++) {
747                        idx1 = idx0 + r * rowStride;
748                        a[idx1] = t[r];
749                        a[idx1 + 1] = t[rows + r];
750                    }
751                }
752            }
753        } else {
754            for (int s = 0; s < slices; s++) {
755                idx0 = s * sliceStride;
756                for (int r = 0; r < rows; r++) {
757                    dstColumns.inverse(a, idx0 + r * rowStride, scale);
758                }
759                if (columns > 2) {
760                    for (int c = 0; c < columns; c += 4) {
761                        for (int r = 0; r < rows; r++) {
762                            idx1 = idx0 + r * rowStride + c;
763                            idx2 = rows + r;
764                            t[r] = a[idx1];
765                            t[idx2] = a[idx1 + 1];
766                            t[idx2 + rows] = a[idx1 + 2];
767                            t[idx2 + 2 * rows] = a[idx1 + 3];
768                        }
769                        dstRows.inverse(t, 0, scale);
770                        dstRows.inverse(t, rows, scale);
771                        dstRows.inverse(t, 2 * rows, scale);
772                        dstRows.inverse(t, 3 * rows, scale);
773                        for (int r = 0; r < rows; r++) {
774                            idx1 = idx0 + r * rowStride + c;
775                            idx2 = rows + r;
776                            a[idx1] = t[r];
777                            a[idx1 + 1] = t[idx2];
778                            a[idx1 + 2] = t[idx2 + rows];
779                            a[idx1 + 3] = t[idx2 + 2 * rows];
780                        }
781                    }
782                } else if (columns == 2) {
783                    for (int r = 0; r < rows; r++) {
784                        idx1 = idx0 + r * rowStride;
785                        t[r] = a[idx1];
786                        t[rows + r] = a[idx1 + 1];
787                    }
788                    dstRows.inverse(t, 0, scale);
789                    dstRows.inverse(t, rows, scale);
790                    for (int r = 0; r < rows; r++) {
791                        idx1 = idx0 + r * rowStride;
792                        a[idx1] = t[r];
793                        a[idx1 + 1] = t[rows + r];
794                    }
795                }
796            }
797        }
798    }
799
800    private void ddxt3da_sub(int isgn, float[][][] a, boolean scale) {
801        int idx2;
802
803        if (isgn == -1) {
804            for (int s = 0; s < slices; s++) {
805                for (int r = 0; r < rows; r++) {
806                    dstColumns.forward(a[s][r], scale);
807                }
808                if (columns > 2) {
809                    for (int c = 0; c < columns; c += 4) {
810                        for (int r = 0; r < rows; r++) {
811                            idx2 = rows + r;
812                            t[r] = a[s][r][c];
813                            t[idx2] = a[s][r][c + 1];
814                            t[idx2 + rows] = a[s][r][c + 2];
815                            t[idx2 + 2 * rows] = a[s][r][c + 3];
816                        }
817                        dstRows.forward(t, 0, scale);
818                        dstRows.forward(t, rows, scale);
819                        dstRows.forward(t, 2 * rows, scale);
820                        dstRows.forward(t, 3 * rows, scale);
821                        for (int r = 0; r < rows; r++) {
822                            idx2 = rows + r;
823                            a[s][r][c] = t[r];
824                            a[s][r][c + 1] = t[idx2];
825                            a[s][r][c + 2] = t[idx2 + rows];
826                            a[s][r][c + 3] = t[idx2 + 2 * rows];
827                        }
828                    }
829                } else if (columns == 2) {
830                    for (int r = 0; r < rows; r++) {
831                        t[r] = a[s][r][0];
832                        t[rows + r] = a[s][r][1];
833                    }
834                    dstRows.forward(t, 0, scale);
835                    dstRows.forward(t, rows, scale);
836                    for (int r = 0; r < rows; r++) {
837                        a[s][r][0] = t[r];
838                        a[s][r][1] = t[rows + r];
839                    }
840                }
841            }
842        } else {
843            for (int s = 0; s < slices; s++) {
844                for (int r = 0; r < rows; r++) {
845                    dstColumns.inverse(a[s][r], scale);
846                }
847                if (columns > 2) {
848                    for (int c = 0; c < columns; c += 4) {
849                        for (int r = 0; r < rows; r++) {
850                            idx2 = rows + r;
851                            t[r] = a[s][r][c];
852                            t[idx2] = a[s][r][c + 1];
853                            t[idx2 + rows] = a[s][r][c + 2];
854                            t[idx2 + 2 * rows] = a[s][r][c + 3];
855                        }
856                        dstRows.inverse(t, 0, scale);
857                        dstRows.inverse(t, rows, scale);
858                        dstRows.inverse(t, 2 * rows, scale);
859                        dstRows.inverse(t, 3 * rows, scale);
860                        for (int r = 0; r < rows; r++) {
861                            idx2 = rows + r;
862                            a[s][r][c] = t[r];
863                            a[s][r][c + 1] = t[idx2];
864                            a[s][r][c + 2] = t[idx2 + rows];
865                            a[s][r][c + 3] = t[idx2 + 2 * rows];
866                        }
867                    }
868                } else if (columns == 2) {
869                    for (int r = 0; r < rows; r++) {
870                        t[r] = a[s][r][0];
871                        t[rows + r] = a[s][r][1];
872                    }
873                    dstRows.inverse(t, 0, scale);
874                    dstRows.inverse(t, rows, scale);
875                    for (int r = 0; r < rows; r++) {
876                        a[s][r][0] = t[r];
877                        a[s][r][1] = t[rows + r];
878                    }
879                }
880            }
881        }
882    }
883
884    private void ddxt3db_sub(int isgn, float[] a, boolean scale) {
885        int idx0, idx1, idx2;
886
887        if (isgn == -1) {
888            if (columns > 2) {
889                for (int r = 0; r < rows; r++) {
890                    idx0 = r * rowStride;
891                    for (int c = 0; c < columns; c += 4) {
892                        for (int s = 0; s < slices; s++) {
893                            idx1 = s * sliceStride + idx0 + c;
894                            idx2 = slices + s;
895                            t[s] = a[idx1];
896                            t[idx2] = a[idx1 + 1];
897                            t[idx2 + slices] = a[idx1 + 2];
898                            t[idx2 + 2 * slices] = a[idx1 + 3];
899                        }
900                        dstSlices.forward(t, 0, scale);
901                        dstSlices.forward(t, slices, scale);
902                        dstSlices.forward(t, 2 * slices, scale);
903                        dstSlices.forward(t, 3 * slices, scale);
904                        for (int s = 0; s < slices; s++) {
905                            idx1 = s * sliceStride + idx0 + c;
906                            idx2 = slices + s;
907                            a[idx1] = t[s];
908                            a[idx1 + 1] = t[idx2];
909                            a[idx1 + 2] = t[idx2 + slices];
910                            a[idx1 + 3] = t[idx2 + 2 * slices];
911                        }
912                    }
913                }
914            } else if (columns == 2) {
915                for (int r = 0; r < rows; r++) {
916                    idx0 = r * rowStride;
917                    for (int s = 0; s < slices; s++) {
918                        idx1 = s * sliceStride + idx0;
919                        t[s] = a[idx1];
920                        t[slices + s] = a[idx1 + 1];
921                    }
922                    dstSlices.forward(t, 0, scale);
923                    dstSlices.forward(t, slices, scale);
924                    for (int s = 0; s < slices; s++) {
925                        idx1 = s * sliceStride + idx0;
926                        a[idx1] = t[s];
927                        a[idx1 + 1] = t[slices + s];
928                    }
929                }
930            }
931        } else {
932            if (columns > 2) {
933                for (int r = 0; r < rows; r++) {
934                    idx0 = r * rowStride;
935                    for (int c = 0; c < columns; c += 4) {
936                        for (int s = 0; s < slices; s++) {
937                            idx1 = s * sliceStride + idx0 + c;
938                            idx2 = slices + s;
939                            t[s] = a[idx1];
940                            t[idx2] = a[idx1 + 1];
941                            t[idx2 + slices] = a[idx1 + 2];
942                            t[idx2 + 2 * slices] = a[idx1 + 3];
943                        }
944                        dstSlices.inverse(t, 0, scale);
945                        dstSlices.inverse(t, slices, scale);
946                        dstSlices.inverse(t, 2 * slices, scale);
947                        dstSlices.inverse(t, 3 * slices, scale);
948
949                        for (int s = 0; s < slices; s++) {
950                            idx1 = s * sliceStride + idx0 + c;
951                            idx2 = slices + s;
952                            a[idx1] = t[s];
953                            a[idx1 + 1] = t[idx2];
954                            a[idx1 + 2] = t[idx2 + slices];
955                            a[idx1 + 3] = t[idx2 + 2 * slices];
956                        }
957                    }
958                }
959            } else if (columns == 2) {
960                for (int r = 0; r < rows; r++) {
961                    idx0 = r * rowStride;
962                    for (int s = 0; s < slices; s++) {
963                        idx1 = s * sliceStride + idx0;
964                        t[s] = a[idx1];
965                        t[slices + s] = a[idx1 + 1];
966                    }
967                    dstSlices.inverse(t, 0, scale);
968                    dstSlices.inverse(t, slices, scale);
969                    for (int s = 0; s < slices; s++) {
970                        idx1 = s * sliceStride + idx0;
971                        a[idx1] = t[s];
972                        a[idx1 + 1] = t[slices + s];
973                    }
974                }
975            }
976        }
977    }
978
979    private void ddxt3db_sub(int isgn, float[][][] a, boolean scale) {
980        int idx2;
981
982        if (isgn == -1) {
983            if (columns > 2) {
984                for (int r = 0; r < rows; r++) {
985                    for (int c = 0; c < columns; c += 4) {
986                        for (int s = 0; s < slices; s++) {
987                            idx2 = slices + s;
988                            t[s] = a[s][r][c];
989                            t[idx2] = a[s][r][c + 1];
990                            t[idx2 + slices] = a[s][r][c + 2];
991                            t[idx2 + 2 * slices] = a[s][r][c + 3];
992                        }
993                        dstSlices.forward(t, 0, scale);
994                        dstSlices.forward(t, slices, scale);
995                        dstSlices.forward(t, 2 * slices, scale);
996                        dstSlices.forward(t, 3 * slices, scale);
997                        for (int s = 0; s < slices; s++) {
998                            idx2 = slices + s;
999                            a[s][r][c] = t[s];
1000                            a[s][r][c + 1] = t[idx2];
1001                            a[s][r][c + 2] = t[idx2 + slices];
1002                            a[s][r][c + 3] = t[idx2 + 2 * slices];
1003                        }
1004                    }
1005                }
1006            } else if (columns == 2) {
1007                for (int r = 0; r < rows; r++) {
1008                    for (int s = 0; s < slices; s++) {
1009                        t[s] = a[s][r][0];
1010                        t[slices + s] = a[s][r][1];
1011                    }
1012                    dstSlices.forward(t, 0, scale);
1013                    dstSlices.forward(t, slices, scale);
1014                    for (int s = 0; s < slices; s++) {
1015                        a[s][r][0] = t[s];
1016                        a[s][r][1] = t[slices + s];
1017                    }
1018                }
1019            }
1020        } else {
1021            if (columns > 2) {
1022                for (int r = 0; r < rows; r++) {
1023                    for (int c = 0; c < columns; c += 4) {
1024                        for (int s = 0; s < slices; s++) {
1025                            idx2 = slices + s;
1026                            t[s] = a[s][r][c];
1027                            t[idx2] = a[s][r][c + 1];
1028                            t[idx2 + slices] = a[s][r][c + 2];
1029                            t[idx2 + 2 * slices] = a[s][r][c + 3];
1030                        }
1031                        dstSlices.inverse(t, 0, scale);
1032                        dstSlices.inverse(t, slices, scale);
1033                        dstSlices.inverse(t, 2 * slices, scale);
1034                        dstSlices.inverse(t, 3 * slices, scale);
1035
1036                        for (int s = 0; s < slices; s++) {
1037                            idx2 = slices + s;
1038                            a[s][r][c] = t[s];
1039                            a[s][r][c + 1] = t[idx2];
1040                            a[s][r][c + 2] = t[idx2 + slices];
1041                            a[s][r][c + 3] = t[idx2 + 2 * slices];
1042                        }
1043                    }
1044                }
1045            } else if (columns == 2) {
1046                for (int r = 0; r < rows; r++) {
1047                    for (int s = 0; s < slices; s++) {
1048                        t[s] = a[s][r][0];
1049                        t[slices + s] = a[s][r][1];
1050                    }
1051                    dstSlices.inverse(t, 0, scale);
1052                    dstSlices.inverse(t, slices, scale);
1053                    for (int s = 0; s < slices; s++) {
1054                        a[s][r][0] = t[s];
1055                        a[s][r][1] = t[slices + s];
1056                    }
1057                }
1058            }
1059        }
1060    }
1061
1062    private void ddxt3da_subth(final int isgn, final float[] a, final boolean scale) {
1063        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1064
1065        int nt = 4 * rows;
1066        if (columns == 2) {
1067            nt >>= 1;
1068        }
1069        Future<?>[] futures = new Future[nthreads];
1070
1071        for (int i = 0; i < nthreads; i++) {
1072            final int n0 = i;
1073            final int startt = nt * i;
1074            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1075
1076                @Override
1077                                public void run() {
1078                    int idx0, idx1, idx2;
1079                    if (isgn == -1) {
1080                        for (int s = n0; s < slices; s += nthreads) {
1081                            idx0 = s * sliceStride;
1082                            for (int r = 0; r < rows; r++) {
1083                                dstColumns.forward(a, idx0 + r * rowStride, scale);
1084                            }
1085                            if (columns > 2) {
1086                                for (int c = 0; c < columns; c += 4) {
1087                                    for (int r = 0; r < rows; r++) {
1088                                        idx1 = idx0 + r * rowStride + c;
1089                                        idx2 = startt + rows + r;
1090                                        t[startt + r] = a[idx1];
1091                                        t[idx2] = a[idx1 + 1];
1092                                        t[idx2 + rows] = a[idx1 + 2];
1093                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1094                                    }
1095                                    dstRows.forward(t, startt, scale);
1096                                    dstRows.forward(t, startt + rows, scale);
1097                                    dstRows.forward(t, startt + 2 * rows, scale);
1098                                    dstRows.forward(t, startt + 3 * rows, scale);
1099                                    for (int r = 0; r < rows; r++) {
1100                                        idx1 = idx0 + r * rowStride + c;
1101                                        idx2 = startt + rows + r;
1102                                        a[idx1] = t[startt + r];
1103                                        a[idx1 + 1] = t[idx2];
1104                                        a[idx1 + 2] = t[idx2 + rows];
1105                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1106                                    }
1107                                }
1108                            } else if (columns == 2) {
1109                                for (int r = 0; r < rows; r++) {
1110                                    idx1 = idx0 + r * rowStride;
1111                                    t[startt + r] = a[idx1];
1112                                    t[startt + rows + r] = a[idx1 + 1];
1113                                }
1114                                dstRows.forward(t, startt, scale);
1115                                dstRows.forward(t, startt + rows, scale);
1116                                for (int r = 0; r < rows; r++) {
1117                                    idx1 = idx0 + r * rowStride;
1118                                    a[idx1] = t[startt + r];
1119                                    a[idx1 + 1] = t[startt + rows + r];
1120                                }
1121                            }
1122                        }
1123                    } else {
1124                        for (int s = n0; s < slices; s += nthreads) {
1125                            idx0 = s * sliceStride;
1126                            for (int r = 0; r < rows; r++) {
1127                                dstColumns.inverse(a, idx0 + r * rowStride, scale);
1128                            }
1129                            if (columns > 2) {
1130                                for (int c = 0; c < columns; c += 4) {
1131                                    for (int r = 0; r < rows; r++) {
1132                                        idx1 = idx0 + r * rowStride + c;
1133                                        idx2 = startt + rows + r;
1134                                        t[startt + r] = a[idx1];
1135                                        t[idx2] = a[idx1 + 1];
1136                                        t[idx2 + rows] = a[idx1 + 2];
1137                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1138                                    }
1139                                    dstRows.inverse(t, startt, scale);
1140                                    dstRows.inverse(t, startt + rows, scale);
1141                                    dstRows.inverse(t, startt + 2 * rows, scale);
1142                                    dstRows.inverse(t, startt + 3 * rows, scale);
1143                                    for (int r = 0; r < rows; r++) {
1144                                        idx1 = idx0 + r * rowStride + c;
1145                                        idx2 = startt + rows + r;
1146                                        a[idx1] = t[startt + r];
1147                                        a[idx1 + 1] = t[idx2];
1148                                        a[idx1 + 2] = t[idx2 + rows];
1149                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1150                                    }
1151                                }
1152                            } else if (columns == 2) {
1153                                for (int r = 0; r < rows; r++) {
1154                                    idx1 = idx0 + r * rowStride;
1155                                    t[startt + r] = a[idx1];
1156                                    t[startt + rows + r] = a[idx1 + 1];
1157                                }
1158                                dstRows.inverse(t, startt, scale);
1159                                dstRows.inverse(t, startt + rows, scale);
1160                                for (int r = 0; r < rows; r++) {
1161                                    idx1 = idx0 + r * rowStride;
1162                                    a[idx1] = t[startt + r];
1163                                    a[idx1 + 1] = t[startt + rows + r];
1164                                }
1165                            }
1166                        }
1167                    }
1168                }
1169            });
1170        }
1171        ConcurrencyUtils.waitForCompletion(futures);
1172    }
1173
1174    private void ddxt3da_subth(final int isgn, final float[][][] a, final boolean scale) {
1175        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1176        int nt = 4 * rows;
1177        if (columns == 2) {
1178            nt >>= 1;
1179        }
1180        Future<?>[] futures = new Future[nthreads];
1181
1182        for (int i = 0; i < nthreads; i++) {
1183            final int n0 = i;
1184            final int startt = nt * i;
1185            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1186
1187                @Override
1188                                public void run() {
1189                    int idx2;
1190                    if (isgn == -1) {
1191                        for (int s = n0; s < slices; s += nthreads) {
1192                            for (int r = 0; r < rows; r++) {
1193                                dstColumns.forward(a[s][r], scale);
1194                            }
1195                            if (columns > 2) {
1196                                for (int c = 0; c < columns; c += 4) {
1197                                    for (int r = 0; r < rows; r++) {
1198                                        idx2 = startt + rows + r;
1199                                        t[startt + r] = a[s][r][c];
1200                                        t[idx2] = a[s][r][c + 1];
1201                                        t[idx2 + rows] = a[s][r][c + 2];
1202                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1203                                    }
1204                                    dstRows.forward(t, startt, scale);
1205                                    dstRows.forward(t, startt + rows, scale);
1206                                    dstRows.forward(t, startt + 2 * rows, scale);
1207                                    dstRows.forward(t, startt + 3 * rows, scale);
1208                                    for (int r = 0; r < rows; r++) {
1209                                        idx2 = startt + rows + r;
1210                                        a[s][r][c] = t[startt + r];
1211                                        a[s][r][c + 1] = t[idx2];
1212                                        a[s][r][c + 2] = t[idx2 + rows];
1213                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1214                                    }
1215                                }
1216                            } else if (columns == 2) {
1217                                for (int r = 0; r < rows; r++) {
1218                                    t[startt + r] = a[s][r][0];
1219                                    t[startt + rows + r] = a[s][r][1];
1220                                }
1221                                dstRows.forward(t, startt, scale);
1222                                dstRows.forward(t, startt + rows, scale);
1223                                for (int r = 0; r < rows; r++) {
1224                                    a[s][r][0] = t[startt + r];
1225                                    a[s][r][1] = t[startt + rows + r];
1226                                }
1227                            }
1228                        }
1229                    } else {
1230                        for (int s = n0; s < slices; s += nthreads) {
1231                            for (int r = 0; r < rows; r++) {
1232                                dstColumns.inverse(a[s][r], scale);
1233                            }
1234                            if (columns > 2) {
1235                                for (int c = 0; c < columns; c += 4) {
1236                                    for (int r = 0; r < rows; r++) {
1237                                        idx2 = startt + rows + r;
1238                                        t[startt + r] = a[s][r][c];
1239                                        t[idx2] = a[s][r][c + 1];
1240                                        t[idx2 + rows] = a[s][r][c + 2];
1241                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1242                                    }
1243                                    dstRows.inverse(t, startt, scale);
1244                                    dstRows.inverse(t, startt + rows, scale);
1245                                    dstRows.inverse(t, startt + 2 * rows, scale);
1246                                    dstRows.inverse(t, startt + 3 * rows, scale);
1247                                    for (int r = 0; r < rows; r++) {
1248                                        idx2 = startt + rows + r;
1249                                        a[s][r][c] = t[startt + r];
1250                                        a[s][r][c + 1] = t[idx2];
1251                                        a[s][r][c + 2] = t[idx2 + rows];
1252                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1253                                    }
1254                                }
1255                            } else if (columns == 2) {
1256                                for (int r = 0; r < rows; r++) {
1257                                    t[startt + r] = a[s][r][0];
1258                                    t[startt + rows + r] = a[s][r][1];
1259                                }
1260                                dstRows.inverse(t, startt, scale);
1261                                dstRows.inverse(t, startt + rows, scale);
1262                                for (int r = 0; r < rows; r++) {
1263                                    a[s][r][0] = t[startt + r];
1264                                    a[s][r][1] = t[startt + rows + r];
1265                                }
1266                            }
1267                        }
1268                    }
1269                }
1270            });
1271        }
1272        ConcurrencyUtils.waitForCompletion(futures);
1273    }
1274
1275    private void ddxt3db_subth(final int isgn, final float[] a, final boolean scale) {
1276        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1277        int nt = 4 * slices;
1278        if (columns == 2) {
1279            nt >>= 1;
1280        }
1281        Future<?>[] futures = new Future[nthreads];
1282
1283        for (int i = 0; i < nthreads; i++) {
1284            final int n0 = i;
1285            final int startt = nt * i;
1286            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1287
1288                @Override
1289                                public void run() {
1290                    int idx0, idx1, idx2;
1291                    if (isgn == -1) {
1292                        if (columns > 2) {
1293                            for (int r = n0; r < rows; r += nthreads) {
1294                                idx0 = r * rowStride;
1295                                for (int c = 0; c < columns; c += 4) {
1296                                    for (int s = 0; s < slices; s++) {
1297                                        idx1 = s * sliceStride + idx0 + c;
1298                                        idx2 = startt + slices + s;
1299                                        t[startt + s] = a[idx1];
1300                                        t[idx2] = a[idx1 + 1];
1301                                        t[idx2 + slices] = a[idx1 + 2];
1302                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1303                                    }
1304                                    dstSlices.forward(t, startt, scale);
1305                                    dstSlices.forward(t, startt + slices, scale);
1306                                    dstSlices.forward(t, startt + 2 * slices, scale);
1307                                    dstSlices.forward(t, startt + 3 * slices, scale);
1308                                    for (int s = 0; s < slices; s++) {
1309                                        idx1 = s * sliceStride + idx0 + c;
1310                                        idx2 = startt + slices + s;
1311                                        a[idx1] = t[startt + s];
1312                                        a[idx1 + 1] = t[idx2];
1313                                        a[idx1 + 2] = t[idx2 + slices];
1314                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1315                                    }
1316                                }
1317                            }
1318                        } else if (columns == 2) {
1319                            for (int r = n0; r < rows; r += nthreads) {
1320                                idx0 = r * rowStride;
1321                                for (int s = 0; s < slices; s++) {
1322                                    idx1 = s * sliceStride + idx0;
1323                                    t[startt + s] = a[idx1];
1324                                    t[startt + slices + s] = a[idx1 + 1];
1325                                }
1326                                dstSlices.forward(t, startt, scale);
1327                                dstSlices.forward(t, startt + slices, scale);
1328                                for (int s = 0; s < slices; s++) {
1329                                    idx1 = s * sliceStride + idx0;
1330                                    a[idx1] = t[startt + s];
1331                                    a[idx1 + 1] = t[startt + slices + s];
1332                                }
1333                            }
1334                        }
1335                    } else {
1336                        if (columns > 2) {
1337                            for (int r = n0; r < rows; r += nthreads) {
1338                                idx0 = r * rowStride;
1339                                for (int c = 0; c < columns; c += 4) {
1340                                    for (int s = 0; s < slices; s++) {
1341                                        idx1 = s * sliceStride + idx0 + c;
1342                                        idx2 = startt + slices + s;
1343                                        t[startt + s] = a[idx1];
1344                                        t[idx2] = a[idx1 + 1];
1345                                        t[idx2 + slices] = a[idx1 + 2];
1346                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1347                                    }
1348                                    dstSlices.inverse(t, startt, scale);
1349                                    dstSlices.inverse(t, startt + slices, scale);
1350                                    dstSlices.inverse(t, startt + 2 * slices, scale);
1351                                    dstSlices.inverse(t, startt + 3 * slices, scale);
1352                                    for (int s = 0; s < slices; s++) {
1353                                        idx1 = s * sliceStride + idx0 + c;
1354                                        idx2 = startt + slices + s;
1355                                        a[idx1] = t[startt + s];
1356                                        a[idx1 + 1] = t[idx2];
1357                                        a[idx1 + 2] = t[idx2 + slices];
1358                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1359                                    }
1360                                }
1361                            }
1362                        } else if (columns == 2) {
1363                            for (int r = n0; r < rows; r += nthreads) {
1364                                idx0 = r * rowStride;
1365                                for (int s = 0; s < slices; s++) {
1366                                    idx1 = s * sliceStride + idx0;
1367                                    t[startt + s] = a[idx1];
1368                                    t[startt + slices + s] = a[idx1 + 1];
1369                                }
1370                                dstSlices.inverse(t, startt, scale);
1371                                dstSlices.inverse(t, startt + slices, scale);
1372
1373                                for (int s = 0; s < slices; s++) {
1374                                    idx1 = s * sliceStride + idx0;
1375                                    a[idx1] = t[startt + s];
1376                                    a[idx1 + 1] = t[startt + slices + s];
1377                                }
1378                            }
1379                        }
1380                    }
1381                }
1382            });
1383        }
1384        ConcurrencyUtils.waitForCompletion(futures);
1385    }
1386
1387    private void ddxt3db_subth(final int isgn, final float[][][] a, final boolean scale) {
1388        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1389        int nt = 4 * slices;
1390        if (columns == 2) {
1391            nt >>= 1;
1392        }
1393        Future<?>[] futures = new Future[nthreads];
1394
1395        for (int i = 0; i < nthreads; i++) {
1396            final int n0 = i;
1397            final int startt = nt * i;
1398            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1399
1400                @Override
1401                                public void run() {
1402                    int idx2;
1403                    if (isgn == -1) {
1404                        if (columns > 2) {
1405                            for (int r = n0; r < rows; r += nthreads) {
1406                                for (int c = 0; c < columns; c += 4) {
1407                                    for (int s = 0; s < slices; s++) {
1408                                        idx2 = startt + slices + s;
1409                                        t[startt + s] = a[s][r][c];
1410                                        t[idx2] = a[s][r][c + 1];
1411                                        t[idx2 + slices] = a[s][r][c + 2];
1412                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1413                                    }
1414                                    dstSlices.forward(t, startt, scale);
1415                                    dstSlices.forward(t, startt + slices, scale);
1416                                    dstSlices.forward(t, startt + 2 * slices, scale);
1417                                    dstSlices.forward(t, startt + 3 * slices, scale);
1418                                    for (int s = 0; s < slices; s++) {
1419                                        idx2 = startt + slices + s;
1420                                        a[s][r][c] = t[startt + s];
1421                                        a[s][r][c + 1] = t[idx2];
1422                                        a[s][r][c + 2] = t[idx2 + slices];
1423                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1424                                    }
1425                                }
1426                            }
1427                        } else if (columns == 2) {
1428                            for (int r = n0; r < rows; r += nthreads) {
1429                                for (int s = 0; s < slices; s++) {
1430                                    t[startt + s] = a[s][r][0];
1431                                    t[startt + slices + s] = a[s][r][1];
1432                                }
1433                                dstSlices.forward(t, startt, scale);
1434                                dstSlices.forward(t, startt + slices, scale);
1435                                for (int s = 0; s < slices; s++) {
1436                                    a[s][r][0] = t[startt + s];
1437                                    a[s][r][1] = t[startt + slices + s];
1438                                }
1439                            }
1440                        }
1441                    } else {
1442                        if (columns > 2) {
1443                            for (int r = n0; r < rows; r += nthreads) {
1444                                for (int c = 0; c < columns; c += 4) {
1445                                    for (int s = 0; s < slices; s++) {
1446                                        idx2 = startt + slices + s;
1447                                        t[startt + s] = a[s][r][c];
1448                                        t[idx2] = a[s][r][c + 1];
1449                                        t[idx2 + slices] = a[s][r][c + 2];
1450                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1451                                    }
1452                                    dstSlices.inverse(t, startt, scale);
1453                                    dstSlices.inverse(t, startt + slices, scale);
1454                                    dstSlices.inverse(t, startt + 2 * slices, scale);
1455                                    dstSlices.inverse(t, startt + 3 * slices, scale);
1456                                    for (int s = 0; s < slices; s++) {
1457                                        idx2 = startt + slices + s;
1458                                        a[s][r][c] = t[startt + s];
1459                                        a[s][r][c + 1] = t[idx2];
1460                                        a[s][r][c + 2] = t[idx2 + slices];
1461                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1462                                    }
1463                                }
1464                            }
1465                        } else if (columns == 2) {
1466                            for (int r = n0; r < rows; r += nthreads) {
1467                                for (int s = 0; s < slices; s++) {
1468                                    t[startt + s] = a[s][r][0];
1469                                    t[startt + slices + s] = a[s][r][1];
1470                                }
1471                                dstSlices.inverse(t, startt, scale);
1472                                dstSlices.inverse(t, startt + slices, scale);
1473
1474                                for (int s = 0; s < slices; s++) {
1475                                    a[s][r][0] = t[startt + s];
1476                                    a[s][r][1] = t[startt + slices + s];
1477                                }
1478                            }
1479                        }
1480                    }
1481                }
1482            });
1483        }
1484        ConcurrencyUtils.waitForCompletion(futures);
1485    }
1486}