001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dht;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 3D Discrete Hartley Transform (DHT) of real, double precision data.
043 * The sizes of all three dimensions can be arbitrary numbers. This is a
044 * parallel implementation optimized for SMP systems.<br>
045 * <br>
046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
048 * 
049 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
050 * 
051 */
052public class DoubleDHT_3D {
053
054    private int slices;
055
056    private int rows;
057
058    private int columns;
059
060    private int sliceStride;
061
062    private int rowStride;
063
064    private double[] t;
065
066    private DoubleDHT_1D dhtSlices, dhtRows, dhtColumns;
067
068    private int oldNthreads;
069
070    private int nt;
071
072    private boolean isPowerOfTwo = false;
073
074    private boolean useThreads = false;
075
076    /**
077     * Creates new instance of DoubleDHT_3D.
078     * 
079     * @param slices
080     *            number of slices
081     * @param rows
082     *            number of rows
083     * @param columns
084     *            number of columns
085     */
086    public DoubleDHT_3D(int slices, int rows, int columns) {
087        if (slices <= 1 || rows <= 1 || columns <= 1) {
088            throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
089        }
090        this.slices = slices;
091        this.rows = rows;
092        this.columns = columns;
093        this.sliceStride = rows * columns;
094        this.rowStride = columns;
095        if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
096            this.useThreads = true;
097        }
098        if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
099            isPowerOfTwo = true;
100            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
101            nt = slices;
102            if (nt < rows) {
103                nt = rows;
104            }
105            nt *= 4;
106            if (oldNthreads > 1) {
107                nt *= oldNthreads;
108            }
109            if (columns == 2) {
110                nt >>= 1;
111            }
112            t = new double[nt];
113        }
114        dhtSlices = new DoubleDHT_1D(slices);
115        if (slices == rows) {
116            dhtRows = dhtSlices;
117        } else {
118            dhtRows = new DoubleDHT_1D(rows);
119        }
120        if (slices == columns) {
121            dhtColumns = dhtSlices;
122        } else if (rows == columns) {
123            dhtColumns = dhtRows;
124        } else {
125            dhtColumns = new DoubleDHT_1D(columns);
126        }
127    }
128
129    /**
130     * Computes the 3D real, forward DHT leaving the result in <code>a</code>.
131     * The data is stored in 1D array addressed in slice-major, then row-major,
132     * then column-major, in order of significance, i.e. the element (i,j,k) of
133     * 3D array x[slices][rows][columns] is stored in a[i*sliceStride +
134     * j*rowStride + k], where sliceStride = rows * columns and rowStride =
135     * columns.
136     * 
137     * @param a
138     *            data to transform
139     */
140    public void forward(final double[] a) {
141        int nthreads = ConcurrencyUtils.getNumberOfThreads();
142        if (isPowerOfTwo) {
143            if (nthreads != oldNthreads) {
144                nt = slices;
145                if (nt < rows) {
146                    nt = rows;
147                }
148                nt *= 4;
149                if (nthreads > 1) {
150                    nt *= nthreads;
151                }
152                if (columns == 2) {
153                    nt >>= 1;
154                }
155                t = new double[nt];
156                oldNthreads = nthreads;
157            }
158            if ((nthreads > 1) && useThreads) {
159                ddxt3da_subth(-1, a, true);
160                ddxt3db_subth(-1, a, true);
161            } else {
162                ddxt3da_sub(-1, a, true);
163                ddxt3db_sub(-1, a, true);
164            }
165            yTransform(a);
166        } else {
167            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
168                Future<?>[] futures = new Future[nthreads];
169                int p = slices / nthreads;
170                for (int l = 0; l < nthreads; l++) {
171                    final int firstSlice = l * p;
172                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
173                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
174                        @Override
175                                                public void run() {
176                            for (int s = firstSlice; s < lastSlice; s++) {
177                                int idx1 = s * sliceStride;
178                                for (int r = 0; r < rows; r++) {
179                                    dhtColumns.forward(a, idx1 + r * rowStride);
180                                }
181                            }
182                        }
183                    });
184                }
185                ConcurrencyUtils.waitForCompletion(futures);
186
187                for (int l = 0; l < nthreads; l++) {
188                    final int firstSlice = l * p;
189                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
190                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
191                        @Override
192                                                public void run() {
193                            double[] temp = new double[rows];
194                            for (int s = firstSlice; s < lastSlice; s++) {
195                                int idx1 = s * sliceStride;
196                                for (int c = 0; c < columns; c++) {
197                                    for (int r = 0; r < rows; r++) {
198                                        int idx3 = idx1 + r * rowStride + c;
199                                        temp[r] = a[idx3];
200                                    }
201                                    dhtRows.forward(temp);
202                                    for (int r = 0; r < rows; r++) {
203                                        int idx3 = idx1 + r * rowStride + c;
204                                        a[idx3] = temp[r];
205                                    }
206                                }
207                            }
208                        }
209                    });
210                }
211                ConcurrencyUtils.waitForCompletion(futures);
212
213                p = rows / nthreads;
214                for (int l = 0; l < nthreads; l++) {
215                    final int startRow = l * p;
216                    final int stopRow;
217                    if (l == nthreads - 1) {
218                        stopRow = rows;
219                    } else {
220                        stopRow = startRow + p;
221                    }
222                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
223                        @Override
224                                                public void run() {
225                            double[] temp = new double[slices];
226                            for (int r = startRow; r < stopRow; r++) {
227                                int idx1 = r * rowStride;
228                                for (int c = 0; c < columns; c++) {
229                                    for (int s = 0; s < slices; s++) {
230                                        int idx3 = s * sliceStride + idx1 + c;
231                                        temp[s] = a[idx3];
232                                    }
233                                    dhtSlices.forward(temp);
234                                    for (int s = 0; s < slices; s++) {
235                                        int idx3 = s * sliceStride + idx1 + c;
236                                        a[idx3] = temp[s];
237                                    }
238                                }
239                            }
240                        }
241                    });
242                }
243                ConcurrencyUtils.waitForCompletion(futures);
244
245            } else {
246                for (int s = 0; s < slices; s++) {
247                    int idx1 = s * sliceStride;
248                    for (int r = 0; r < rows; r++) {
249                        dhtColumns.forward(a, idx1 + r * rowStride);
250                    }
251                }
252                double[] temp = new double[rows];
253                for (int s = 0; s < slices; s++) {
254                    int idx1 = s * sliceStride;
255                    for (int c = 0; c < columns; c++) {
256                        for (int r = 0; r < rows; r++) {
257                            int idx3 = idx1 + r * rowStride + c;
258                            temp[r] = a[idx3];
259                        }
260                        dhtRows.forward(temp);
261                        for (int r = 0; r < rows; r++) {
262                            int idx3 = idx1 + r * rowStride + c;
263                            a[idx3] = temp[r];
264                        }
265                    }
266                }
267                temp = new double[slices];
268                for (int r = 0; r < rows; r++) {
269                    int idx1 = r * rowStride;
270                    for (int c = 0; c < columns; c++) {
271                        for (int s = 0; s < slices; s++) {
272                            int idx3 = s * sliceStride + idx1 + c;
273                            temp[s] = a[idx3];
274                        }
275                        dhtSlices.forward(temp);
276                        for (int s = 0; s < slices; s++) {
277                            int idx3 = s * sliceStride + idx1 + c;
278                            a[idx3] = temp[s];
279                        }
280                    }
281                }
282            }
283            yTransform(a);
284        }
285    }
286
287    /**
288     * Computes the 3D real, forward DHT leaving the result in <code>a</code>.
289     * The data is stored in 3D array.
290     * 
291     * @param a
292     *            data to transform
293     */
294    public void forward(final double[][][] a) {
295        int nthreads = ConcurrencyUtils.getNumberOfThreads();
296        if (isPowerOfTwo) {
297            if (nthreads != oldNthreads) {
298                nt = slices;
299                if (nt < rows) {
300                    nt = rows;
301                }
302                nt *= 4;
303                if (nthreads > 1) {
304                    nt *= nthreads;
305                }
306                if (columns == 2) {
307                    nt >>= 1;
308                }
309                t = new double[nt];
310                oldNthreads = nthreads;
311            }
312            if ((nthreads > 1) && useThreads) {
313                ddxt3da_subth(-1, a, true);
314                ddxt3db_subth(-1, a, true);
315            } else {
316                ddxt3da_sub(-1, a, true);
317                ddxt3db_sub(-1, a, true);
318            }
319            yTransform(a);
320        } else {
321            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
322                Future<?>[] futures = new Future[nthreads];
323                int p = slices / nthreads;
324                for (int l = 0; l < nthreads; l++) {
325                    final int firstSlice = l * p;
326                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
327                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
328                        @Override
329                                                public void run() {
330                            for (int s = firstSlice; s < lastSlice; s++) {
331                                for (int r = 0; r < rows; r++) {
332                                    dhtColumns.forward(a[s][r]);
333                                }
334                            }
335                        }
336                    });
337                }
338                ConcurrencyUtils.waitForCompletion(futures);
339
340                for (int l = 0; l < nthreads; l++) {
341                    final int firstSlice = l * p;
342                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
343                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
344                        @Override
345                                                public void run() {
346                            double[] temp = new double[rows];
347                            for (int s = firstSlice; s < lastSlice; s++) {
348                                for (int c = 0; c < columns; c++) {
349                                    for (int r = 0; r < rows; r++) {
350                                        temp[r] = a[s][r][c];
351                                    }
352                                    dhtRows.forward(temp);
353                                    for (int r = 0; r < rows; r++) {
354                                        a[s][r][c] = temp[r];
355                                    }
356                                }
357                            }
358                        }
359                    });
360                }
361                ConcurrencyUtils.waitForCompletion(futures);
362
363                p = rows / nthreads;
364                for (int l = 0; l < nthreads; l++) {
365                    final int firstRow = l * p;
366                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
367                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
368                        @Override
369                                                public void run() {
370                            double[] temp = new double[slices];
371                            for (int r = firstRow; r < lastRow; r++) {
372                                for (int c = 0; c < columns; c++) {
373                                    for (int s = 0; s < slices; s++) {
374                                        temp[s] = a[s][r][c];
375                                    }
376                                    dhtSlices.forward(temp);
377                                    for (int s = 0; s < slices; s++) {
378                                        a[s][r][c] = temp[s];
379                                    }
380                                }
381                            }
382                        }
383                    });
384                }
385                ConcurrencyUtils.waitForCompletion(futures);
386
387            } else {
388                for (int s = 0; s < slices; s++) {
389                    for (int r = 0; r < rows; r++) {
390                        dhtColumns.forward(a[s][r]);
391                    }
392                }
393                double[] temp = new double[rows];
394                for (int s = 0; s < slices; s++) {
395                    for (int c = 0; c < columns; c++) {
396                        for (int r = 0; r < rows; r++) {
397                            temp[r] = a[s][r][c];
398                        }
399                        dhtRows.forward(temp);
400                        for (int r = 0; r < rows; r++) {
401                            a[s][r][c] = temp[r];
402                        }
403                    }
404                }
405                temp = new double[slices];
406                for (int r = 0; r < rows; r++) {
407                    for (int c = 0; c < columns; c++) {
408                        for (int s = 0; s < slices; s++) {
409                            temp[s] = a[s][r][c];
410                        }
411                        dhtSlices.forward(temp);
412                        for (int s = 0; s < slices; s++) {
413                            a[s][r][c] = temp[s];
414                        }
415                    }
416                }
417            }
418            yTransform(a);
419        }
420    }
421
422    /**
423     * Computes the 3D real, inverse DHT leaving the result in <code>a</code>.
424     * The data is stored in 1D array addressed in slice-major, then row-major,
425     * then column-major, in order of significance, i.e. the element (i,j,k) of
426     * 3D array x[slices][rows][columns] is stored in a[i*sliceStride +
427     * j*rowStride + k], where sliceStride = rows * columns and rowStride =
428     * columns.
429     * 
430     * @param a
431     *            data to transform
432     * @param scale
433     *            if true then scaling is performed
434     */
435    public void inverse(final double[] a, final boolean scale) {
436        int nthreads = ConcurrencyUtils.getNumberOfThreads();
437        if (isPowerOfTwo) {
438            if (nthreads != oldNthreads) {
439                nt = slices;
440                if (nt < rows) {
441                    nt = rows;
442                }
443                nt *= 4;
444                if (nthreads > 1) {
445                    nt *= nthreads;
446                }
447                if (columns == 2) {
448                    nt >>= 1;
449                }
450                t = new double[nt];
451                oldNthreads = nthreads;
452            }
453            if ((nthreads > 1) && useThreads) {
454                ddxt3da_subth(1, a, scale);
455                ddxt3db_subth(1, a, scale);
456            } else {
457                ddxt3da_sub(1, a, scale);
458                ddxt3db_sub(1, a, scale);
459            }
460            yTransform(a);
461        } else {
462            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
463                Future<?>[] futures = new Future[nthreads];
464                int p = slices / nthreads;
465                for (int l = 0; l < nthreads; l++) {
466                    final int firstSlice = l * p;
467                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
468                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
469                        @Override
470                                                public void run() {
471                            for (int s = firstSlice; s < lastSlice; s++) {
472                                int idx1 = s * sliceStride;
473                                for (int r = 0; r < rows; r++) {
474                                    dhtColumns.inverse(a, idx1 + r * rowStride, scale);
475                                }
476                            }
477                        }
478                    });
479                }
480                ConcurrencyUtils.waitForCompletion(futures);
481
482                for (int l = 0; l < nthreads; l++) {
483                    final int firstSlice = l * p;
484                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
485                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
486                        @Override
487                                                public void run() {
488                            double[] temp = new double[rows];
489                            for (int s = firstSlice; s < lastSlice; s++) {
490                                int idx1 = s * sliceStride;
491                                for (int c = 0; c < columns; c++) {
492                                    for (int r = 0; r < rows; r++) {
493                                        int idx3 = idx1 + r * rowStride + c;
494                                        temp[r] = a[idx3];
495                                    }
496                                    dhtRows.inverse(temp, scale);
497                                    for (int r = 0; r < rows; r++) {
498                                        int idx3 = idx1 + r * rowStride + c;
499                                        a[idx3] = temp[r];
500                                    }
501                                }
502                            }
503                        }
504                    });
505                }
506                ConcurrencyUtils.waitForCompletion(futures);
507
508                p = rows / nthreads;
509                for (int l = 0; l < nthreads; l++) {
510                    final int firstRow = l * p;
511                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
512                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
513                        @Override
514                                                public void run() {
515                            double[] temp = new double[slices];
516                            for (int r = firstRow; r < lastRow; r++) {
517                                int idx1 = r * rowStride;
518                                for (int c = 0; c < columns; c++) {
519                                    for (int s = 0; s < slices; s++) {
520                                        int idx3 = s * sliceStride + idx1 + c;
521                                        temp[s] = a[idx3];
522                                    }
523                                    dhtSlices.inverse(temp, scale);
524                                    for (int s = 0; s < slices; s++) {
525                                        int idx3 = s * sliceStride + idx1 + c;
526                                        a[idx3] = temp[s];
527                                    }
528                                }
529                            }
530                        }
531                    });
532                }
533                ConcurrencyUtils.waitForCompletion(futures);
534
535            } else {
536                for (int s = 0; s < slices; s++) {
537                    int idx1 = s * sliceStride;
538                    for (int r = 0; r < rows; r++) {
539                        dhtColumns.inverse(a, idx1 + r * rowStride, scale);
540                    }
541                }
542                double[] temp = new double[rows];
543                for (int s = 0; s < slices; s++) {
544                    int idx1 = s * sliceStride;
545                    for (int c = 0; c < columns; c++) {
546                        for (int r = 0; r < rows; r++) {
547                            int idx3 = idx1 + r * rowStride + c;
548                            temp[r] = a[idx3];
549                        }
550                        dhtRows.inverse(temp, scale);
551                        for (int r = 0; r < rows; r++) {
552                            int idx3 = idx1 + r * rowStride + c;
553                            a[idx3] = temp[r];
554                        }
555                    }
556                }
557                temp = new double[slices];
558                for (int r = 0; r < rows; r++) {
559                    int idx1 = r * rowStride;
560                    for (int c = 0; c < columns; c++) {
561                        for (int s = 0; s < slices; s++) {
562                            int idx3 = s * sliceStride + idx1 + c;
563                            temp[s] = a[idx3];
564                        }
565                        dhtSlices.inverse(temp, scale);
566                        for (int s = 0; s < slices; s++) {
567                            int idx3 = s * sliceStride + idx1 + c;
568                            a[idx3] = temp[s];
569                        }
570                    }
571                }
572            }
573            yTransform(a);
574        }
575    }
576
577    /**
578     * Computes the 3D real, inverse DHT leaving the result in <code>a</code>.
579     * The data is stored in 3D array.
580     * 
581     * @param a
582     *            data to transform
583     * @param scale
584     *            if true then scaling is performed
585     */
586    public void inverse(final double[][][] a, final boolean scale) {
587        int nthreads = ConcurrencyUtils.getNumberOfThreads();
588        if (isPowerOfTwo) {
589            if (nthreads != oldNthreads) {
590                nt = slices;
591                if (nt < rows) {
592                    nt = rows;
593                }
594                nt *= 4;
595                if (nthreads > 1) {
596                    nt *= nthreads;
597                }
598                if (columns == 2) {
599                    nt >>= 1;
600                }
601                t = new double[nt];
602                oldNthreads = nthreads;
603            }
604            if ((nthreads > 1) && useThreads) {
605                ddxt3da_subth(1, a, scale);
606                ddxt3db_subth(1, a, scale);
607            } else {
608                ddxt3da_sub(1, a, scale);
609                ddxt3db_sub(1, a, scale);
610            }
611            yTransform(a);
612        } else {
613            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
614                Future<?>[] futures = new Future[nthreads];
615                int p = slices / nthreads;
616                for (int l = 0; l < nthreads; l++) {
617                    final int firstSlice = l * p;
618                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
619                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
620                        @Override
621                                                public void run() {
622                            for (int s = firstSlice; s < lastSlice; s++) {
623                                for (int r = 0; r < rows; r++) {
624                                    dhtColumns.inverse(a[s][r], scale);
625                                }
626                            }
627                        }
628                    });
629                }
630                ConcurrencyUtils.waitForCompletion(futures);
631
632                for (int l = 0; l < nthreads; l++) {
633                    final int firstSlice = l * p;
634                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
635                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
636                        @Override
637                                                public void run() {
638                            double[] temp = new double[rows];
639                            for (int s = firstSlice; s < lastSlice; s++) {
640                                for (int c = 0; c < columns; c++) {
641                                    for (int r = 0; r < rows; r++) {
642                                        temp[r] = a[s][r][c];
643                                    }
644                                    dhtRows.inverse(temp, scale);
645                                    for (int r = 0; r < rows; r++) {
646                                        a[s][r][c] = temp[r];
647                                    }
648                                }
649                            }
650                        }
651                    });
652                }
653                ConcurrencyUtils.waitForCompletion(futures);
654
655                p = rows / nthreads;
656                for (int l = 0; l < nthreads; l++) {
657                    final int firstRow = l * p;
658                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
659                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
660                        @Override
661                                                public void run() {
662                            double[] temp = new double[slices];
663                            for (int r = firstRow; r < lastRow; r++) {
664                                for (int c = 0; c < columns; c++) {
665                                    for (int s = 0; s < slices; s++) {
666                                        temp[s] = a[s][r][c];
667                                    }
668                                    dhtSlices.inverse(temp, scale);
669                                    for (int s = 0; s < slices; s++) {
670                                        a[s][r][c] = temp[s];
671                                    }
672                                }
673                            }
674                        }
675                    });
676                }
677                ConcurrencyUtils.waitForCompletion(futures);
678
679            } else {
680                for (int s = 0; s < slices; s++) {
681                    for (int r = 0; r < rows; r++) {
682                        dhtColumns.inverse(a[s][r], scale);
683                    }
684                }
685                double[] temp = new double[rows];
686                for (int s = 0; s < slices; s++) {
687                    for (int c = 0; c < columns; c++) {
688                        for (int r = 0; r < rows; r++) {
689                            temp[r] = a[s][r][c];
690                        }
691                        dhtRows.inverse(temp, scale);
692                        for (int r = 0; r < rows; r++) {
693                            a[s][r][c] = temp[r];
694                        }
695                    }
696                }
697                temp = new double[slices];
698                for (int r = 0; r < rows; r++) {
699                    for (int c = 0; c < columns; c++) {
700                        for (int s = 0; s < slices; s++) {
701                            temp[s] = a[s][r][c];
702                        }
703                        dhtSlices.inverse(temp, scale);
704                        for (int s = 0; s < slices; s++) {
705                            a[s][r][c] = temp[s];
706                        }
707                    }
708                }
709            }
710            yTransform(a);
711        }
712    }
713
714    private void ddxt3da_sub(int isgn, double[] a, boolean scale) {
715        int idx0, idx1, idx2;
716
717        if (isgn == -1) {
718            for (int s = 0; s < slices; s++) {
719                idx0 = s * sliceStride;
720                for (int r = 0; r < rows; r++) {
721                    dhtColumns.forward(a, idx0 + r * rowStride);
722                }
723                if (columns > 2) {
724                    for (int c = 0; c < columns; c += 4) {
725                        for (int r = 0; r < rows; r++) {
726                            idx1 = idx0 + r * rowStride + c;
727                            idx2 = rows + r;
728                            t[r] = a[idx1];
729                            t[idx2] = a[idx1 + 1];
730                            t[idx2 + rows] = a[idx1 + 2];
731                            t[idx2 + 2 * rows] = a[idx1 + 3];
732                        }
733                        dhtRows.forward(t, 0);
734                        dhtRows.forward(t, rows);
735                        dhtRows.forward(t, 2 * rows);
736                        dhtRows.forward(t, 3 * rows);
737                        for (int r = 0; r < rows; r++) {
738                            idx1 = idx0 + r * rowStride + c;
739                            idx2 = rows + r;
740                            a[idx1] = t[r];
741                            a[idx1 + 1] = t[idx2];
742                            a[idx1 + 2] = t[idx2 + rows];
743                            a[idx1 + 3] = t[idx2 + 2 * rows];
744                        }
745                    }
746                } else if (columns == 2) {
747                    for (int r = 0; r < rows; r++) {
748                        idx1 = idx0 + r * rowStride;
749                        t[r] = a[idx1];
750                        t[rows + r] = a[idx1 + 1];
751                    }
752                    dhtRows.forward(t, 0);
753                    dhtRows.forward(t, rows);
754                    for (int r = 0; r < rows; r++) {
755                        idx1 = idx0 + r * rowStride;
756                        a[idx1] = t[r];
757                        a[idx1 + 1] = t[rows + r];
758                    }
759                }
760            }
761        } else {
762            for (int s = 0; s < slices; s++) {
763                idx0 = s * sliceStride;
764                for (int r = 0; r < rows; r++) {
765                    dhtColumns.inverse(a, idx0 + r * rowStride, scale);
766                }
767                if (columns > 2) {
768                    for (int c = 0; c < columns; c += 4) {
769                        for (int r = 0; r < rows; r++) {
770                            idx1 = idx0 + r * rowStride + c;
771                            idx2 = rows + r;
772                            t[r] = a[idx1];
773                            t[idx2] = a[idx1 + 1];
774                            t[idx2 + rows] = a[idx1 + 2];
775                            t[idx2 + 2 * rows] = a[idx1 + 3];
776                        }
777                        dhtRows.inverse(t, 0, scale);
778                        dhtRows.inverse(t, rows, scale);
779                        dhtRows.inverse(t, 2 * rows, scale);
780                        dhtRows.inverse(t, 3 * rows, scale);
781                        for (int r = 0; r < rows; r++) {
782                            idx1 = idx0 + r * rowStride + c;
783                            idx2 = rows + r;
784                            a[idx1] = t[r];
785                            a[idx1 + 1] = t[idx2];
786                            a[idx1 + 2] = t[idx2 + rows];
787                            a[idx1 + 3] = t[idx2 + 2 * rows];
788                        }
789                    }
790                } else if (columns == 2) {
791                    for (int r = 0; r < rows; r++) {
792                        idx1 = idx0 + r * rowStride;
793                        t[r] = a[idx1];
794                        t[rows + r] = a[idx1 + 1];
795                    }
796                    dhtRows.inverse(t, 0, scale);
797                    dhtRows.inverse(t, rows, scale);
798                    for (int r = 0; r < rows; r++) {
799                        idx1 = idx0 + r * rowStride;
800                        a[idx1] = t[r];
801                        a[idx1 + 1] = t[rows + r];
802                    }
803                }
804            }
805        }
806    }
807
808    private void ddxt3da_sub(int isgn, double[][][] a, boolean scale) {
809        int idx2;
810
811        if (isgn == -1) {
812            for (int s = 0; s < slices; s++) {
813                for (int r = 0; r < rows; r++) {
814                    dhtColumns.forward(a[s][r]);
815                }
816                if (columns > 2) {
817                    for (int c = 0; c < columns; c += 4) {
818                        for (int r = 0; r < rows; r++) {
819                            idx2 = rows + r;
820                            t[r] = a[s][r][c];
821                            t[idx2] = a[s][r][c + 1];
822                            t[idx2 + rows] = a[s][r][c + 2];
823                            t[idx2 + 2 * rows] = a[s][r][c + 3];
824                        }
825                        dhtRows.forward(t, 0);
826                        dhtRows.forward(t, rows);
827                        dhtRows.forward(t, 2 * rows);
828                        dhtRows.forward(t, 3 * rows);
829                        for (int r = 0; r < rows; r++) {
830                            idx2 = rows + r;
831                            a[s][r][c] = t[r];
832                            a[s][r][c + 1] = t[idx2];
833                            a[s][r][c + 2] = t[idx2 + rows];
834                            a[s][r][c + 3] = t[idx2 + 2 * rows];
835                        }
836                    }
837                } else if (columns == 2) {
838                    for (int r = 0; r < rows; r++) {
839                        t[r] = a[s][r][0];
840                        t[rows + r] = a[s][r][1];
841                    }
842                    dhtRows.forward(t, 0);
843                    dhtRows.forward(t, rows);
844                    for (int r = 0; r < rows; r++) {
845                        a[s][r][0] = t[r];
846                        a[s][r][1] = t[rows + r];
847                    }
848                }
849            }
850        } else {
851            for (int s = 0; s < slices; s++) {
852                for (int r = 0; r < rows; r++) {
853                    dhtColumns.inverse(a[s][r], scale);
854                }
855                if (columns > 2) {
856                    for (int c = 0; c < columns; c += 4) {
857                        for (int r = 0; r < rows; r++) {
858                            idx2 = rows + r;
859                            t[r] = a[s][r][c];
860                            t[idx2] = a[s][r][c + 1];
861                            t[idx2 + rows] = a[s][r][c + 2];
862                            t[idx2 + 2 * rows] = a[s][r][c + 3];
863                        }
864                        dhtRows.inverse(t, 0, scale);
865                        dhtRows.inverse(t, rows, scale);
866                        dhtRows.inverse(t, 2 * rows, scale);
867                        dhtRows.inverse(t, 3 * rows, scale);
868                        for (int r = 0; r < rows; r++) {
869                            idx2 = rows + r;
870                            a[s][r][c] = t[r];
871                            a[s][r][c + 1] = t[idx2];
872                            a[s][r][c + 2] = t[idx2 + rows];
873                            a[s][r][c + 3] = t[idx2 + 2 * rows];
874                        }
875                    }
876                } else if (columns == 2) {
877                    for (int r = 0; r < rows; r++) {
878                        t[r] = a[s][r][0];
879                        t[rows + r] = a[s][r][1];
880                    }
881                    dhtRows.inverse(t, 0, scale);
882                    dhtRows.inverse(t, rows, scale);
883                    for (int r = 0; r < rows; r++) {
884                        a[s][r][0] = t[r];
885                        a[s][r][1] = t[rows + r];
886                    }
887                }
888            }
889        }
890    }
891
892    private void ddxt3db_sub(int isgn, double[] a, boolean scale) {
893        int idx0, idx1, idx2;
894
895        if (isgn == -1) {
896            if (columns > 2) {
897                for (int r = 0; r < rows; r++) {
898                    idx0 = r * rowStride;
899                    for (int c = 0; c < columns; c += 4) {
900                        for (int s = 0; s < slices; s++) {
901                            idx1 = s * sliceStride + idx0 + c;
902                            idx2 = slices + s;
903                            t[s] = a[idx1];
904                            t[idx2] = a[idx1 + 1];
905                            t[idx2 + slices] = a[idx1 + 2];
906                            t[idx2 + 2 * slices] = a[idx1 + 3];
907                        }
908                        dhtSlices.forward(t, 0);
909                        dhtSlices.forward(t, slices);
910                        dhtSlices.forward(t, 2 * slices);
911                        dhtSlices.forward(t, 3 * slices);
912                        for (int s = 0; s < slices; s++) {
913                            idx1 = s * sliceStride + idx0 + c;
914                            idx2 = slices + s;
915                            a[idx1] = t[s];
916                            a[idx1 + 1] = t[idx2];
917                            a[idx1 + 2] = t[idx2 + slices];
918                            a[idx1 + 3] = t[idx2 + 2 * slices];
919                        }
920                    }
921                }
922            } else if (columns == 2) {
923                for (int r = 0; r < rows; r++) {
924                    idx0 = r * rowStride;
925                    for (int s = 0; s < slices; s++) {
926                        idx1 = s * sliceStride + idx0;
927                        t[s] = a[idx1];
928                        t[slices + s] = a[idx1 + 1];
929                    }
930                    dhtSlices.forward(t, 0);
931                    dhtSlices.forward(t, slices);
932                    for (int s = 0; s < slices; s++) {
933                        idx1 = s * sliceStride + idx0;
934                        a[idx1] = t[s];
935                        a[idx1 + 1] = t[slices + s];
936                    }
937                }
938            }
939        } else {
940            if (columns > 2) {
941                for (int r = 0; r < rows; r++) {
942                    idx0 = r * rowStride;
943                    for (int c = 0; c < columns; c += 4) {
944                        for (int s = 0; s < slices; s++) {
945                            idx1 = s * sliceStride + idx0 + c;
946                            idx2 = slices + s;
947                            t[s] = a[idx1];
948                            t[idx2] = a[idx1 + 1];
949                            t[idx2 + slices] = a[idx1 + 2];
950                            t[idx2 + 2 * slices] = a[idx1 + 3];
951                        }
952                        dhtSlices.inverse(t, 0, scale);
953                        dhtSlices.inverse(t, slices, scale);
954                        dhtSlices.inverse(t, 2 * slices, scale);
955                        dhtSlices.inverse(t, 3 * slices, scale);
956
957                        for (int s = 0; s < slices; s++) {
958                            idx1 = s * sliceStride + idx0 + c;
959                            idx2 = slices + s;
960                            a[idx1] = t[s];
961                            a[idx1 + 1] = t[idx2];
962                            a[idx1 + 2] = t[idx2 + slices];
963                            a[idx1 + 3] = t[idx2 + 2 * slices];
964                        }
965                    }
966                }
967            } else if (columns == 2) {
968                for (int r = 0; r < rows; r++) {
969                    idx0 = r * rowStride;
970                    for (int s = 0; s < slices; s++) {
971                        idx1 = s * sliceStride + idx0;
972                        t[s] = a[idx1];
973                        t[slices + s] = a[idx1 + 1];
974                    }
975                    dhtSlices.inverse(t, 0, scale);
976                    dhtSlices.inverse(t, slices, scale);
977                    for (int s = 0; s < slices; s++) {
978                        idx1 = s * sliceStride + idx0;
979                        a[idx1] = t[s];
980                        a[idx1 + 1] = t[slices + s];
981                    }
982                }
983            }
984        }
985    }
986
987    private void ddxt3db_sub(int isgn, double[][][] a, boolean scale) {
988        int idx2;
989
990        if (isgn == -1) {
991            if (columns > 2) {
992                for (int r = 0; r < rows; r++) {
993                    for (int c = 0; c < columns; c += 4) {
994                        for (int s = 0; s < slices; s++) {
995                            idx2 = slices + s;
996                            t[s] = a[s][r][c];
997                            t[idx2] = a[s][r][c + 1];
998                            t[idx2 + slices] = a[s][r][c + 2];
999                            t[idx2 + 2 * slices] = a[s][r][c + 3];
1000                        }
1001                        dhtSlices.forward(t, 0);
1002                        dhtSlices.forward(t, slices);
1003                        dhtSlices.forward(t, 2 * slices);
1004                        dhtSlices.forward(t, 3 * slices);
1005                        for (int s = 0; s < slices; s++) {
1006                            idx2 = slices + s;
1007                            a[s][r][c] = t[s];
1008                            a[s][r][c + 1] = t[idx2];
1009                            a[s][r][c + 2] = t[idx2 + slices];
1010                            a[s][r][c + 3] = t[idx2 + 2 * slices];
1011                        }
1012                    }
1013                }
1014            } else if (columns == 2) {
1015                for (int r = 0; r < rows; r++) {
1016                    for (int s = 0; s < slices; s++) {
1017                        t[s] = a[s][r][0];
1018                        t[slices + s] = a[s][r][1];
1019                    }
1020                    dhtSlices.forward(t, 0);
1021                    dhtSlices.forward(t, slices);
1022                    for (int s = 0; s < slices; s++) {
1023                        a[s][r][0] = t[s];
1024                        a[s][r][1] = t[slices + s];
1025                    }
1026                }
1027            }
1028        } else {
1029            if (columns > 2) {
1030                for (int r = 0; r < rows; r++) {
1031                    for (int c = 0; c < columns; c += 4) {
1032                        for (int s = 0; s < slices; s++) {
1033                            idx2 = slices + s;
1034                            t[s] = a[s][r][c];
1035                            t[idx2] = a[s][r][c + 1];
1036                            t[idx2 + slices] = a[s][r][c + 2];
1037                            t[idx2 + 2 * slices] = a[s][r][c + 3];
1038                        }
1039                        dhtSlices.inverse(t, 0, scale);
1040                        dhtSlices.inverse(t, slices, scale);
1041                        dhtSlices.inverse(t, 2 * slices, scale);
1042                        dhtSlices.inverse(t, 3 * slices, scale);
1043
1044                        for (int s = 0; s < slices; s++) {
1045                            idx2 = slices + s;
1046                            a[s][r][c] = t[s];
1047                            a[s][r][c + 1] = t[idx2];
1048                            a[s][r][c + 2] = t[idx2 + slices];
1049                            a[s][r][c + 3] = t[idx2 + 2 * slices];
1050                        }
1051                    }
1052                }
1053            } else if (columns == 2) {
1054                for (int r = 0; r < rows; r++) {
1055                    for (int s = 0; s < slices; s++) {
1056                        t[s] = a[s][r][0];
1057                        t[slices + s] = a[s][r][1];
1058                    }
1059                    dhtSlices.inverse(t, 0, scale);
1060                    dhtSlices.inverse(t, slices, scale);
1061                    for (int s = 0; s < slices; s++) {
1062                        a[s][r][0] = t[s];
1063                        a[s][r][1] = t[slices + s];
1064                    }
1065                }
1066            }
1067        }
1068    }
1069
1070    private void ddxt3da_subth(final int isgn, final double[] a, final boolean scale) {
1071        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1072        int nt = 4 * rows;
1073        if (columns == 2) {
1074            nt >>= 1;
1075        }
1076        Future<?>[] futures = new Future[nthreads];
1077
1078        for (int i = 0; i < nthreads; i++) {
1079            final int n0 = i;
1080            final int startt = nt * i;
1081            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1082
1083                @Override
1084                                public void run() {
1085                    int idx0, idx1, idx2;
1086                    if (isgn == -1) {
1087                        for (int s = n0; s < slices; s += nthreads) {
1088                            idx0 = s * sliceStride;
1089                            for (int r = 0; r < rows; r++) {
1090                                dhtColumns.forward(a, idx0 + r * rowStride);
1091                            }
1092                            if (columns > 2) {
1093                                for (int c = 0; c < columns; c += 4) {
1094                                    for (int r = 0; r < rows; r++) {
1095                                        idx1 = idx0 + r * rowStride + c;
1096                                        idx2 = startt + rows + r;
1097                                        t[startt + r] = a[idx1];
1098                                        t[idx2] = a[idx1 + 1];
1099                                        t[idx2 + rows] = a[idx1 + 2];
1100                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1101                                    }
1102                                    dhtRows.forward(t, startt);
1103                                    dhtRows.forward(t, startt + rows);
1104                                    dhtRows.forward(t, startt + 2 * rows);
1105                                    dhtRows.forward(t, startt + 3 * rows);
1106                                    for (int r = 0; r < rows; r++) {
1107                                        idx1 = idx0 + r * rowStride + c;
1108                                        idx2 = startt + rows + r;
1109                                        a[idx1] = t[startt + r];
1110                                        a[idx1 + 1] = t[idx2];
1111                                        a[idx1 + 2] = t[idx2 + rows];
1112                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1113                                    }
1114                                }
1115                            } else if (columns == 2) {
1116                                for (int r = 0; r < rows; r++) {
1117                                    idx1 = idx0 + r * rowStride;
1118                                    t[startt + r] = a[idx1];
1119                                    t[startt + rows + r] = a[idx1 + 1];
1120                                }
1121                                dhtRows.forward(t, startt);
1122                                dhtRows.forward(t, startt + rows);
1123                                for (int r = 0; r < rows; r++) {
1124                                    idx1 = idx0 + r * rowStride;
1125                                    a[idx1] = t[startt + r];
1126                                    a[idx1 + 1] = t[startt + rows + r];
1127                                }
1128                            }
1129                        }
1130                    } else {
1131                        for (int s = n0; s < slices; s += nthreads) {
1132                            idx0 = s * sliceStride;
1133                            for (int r = 0; r < rows; r++) {
1134                                dhtColumns.inverse(a, idx0 + r * rowStride, scale);
1135                            }
1136                            if (columns > 2) {
1137                                for (int c = 0; c < columns; c += 4) {
1138                                    for (int r = 0; r < rows; r++) {
1139                                        idx1 = idx0 + r * rowStride + c;
1140                                        idx2 = startt + rows + r;
1141                                        t[startt + r] = a[idx1];
1142                                        t[idx2] = a[idx1 + 1];
1143                                        t[idx2 + rows] = a[idx1 + 2];
1144                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1145                                    }
1146                                    dhtRows.inverse(t, startt, scale);
1147                                    dhtRows.inverse(t, startt + rows, scale);
1148                                    dhtRows.inverse(t, startt + 2 * rows, scale);
1149                                    dhtRows.inverse(t, startt + 3 * rows, scale);
1150                                    for (int r = 0; r < rows; r++) {
1151                                        idx1 = idx0 + r * rowStride + c;
1152                                        idx2 = startt + rows + r;
1153                                        a[idx1] = t[startt + r];
1154                                        a[idx1 + 1] = t[idx2];
1155                                        a[idx1 + 2] = t[idx2 + rows];
1156                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1157                                    }
1158                                }
1159                            } else if (columns == 2) {
1160                                for (int r = 0; r < rows; r++) {
1161                                    idx1 = idx0 + r * rowStride;
1162                                    t[startt + r] = a[idx1];
1163                                    t[startt + rows + r] = a[idx1 + 1];
1164                                }
1165                                dhtRows.inverse(t, startt, scale);
1166                                dhtRows.inverse(t, startt + rows, scale);
1167                                for (int r = 0; r < rows; r++) {
1168                                    idx1 = idx0 + r * rowStride;
1169                                    a[idx1] = t[startt + r];
1170                                    a[idx1 + 1] = t[startt + rows + r];
1171                                }
1172                            }
1173                        }
1174                    }
1175                }
1176            });
1177        }
1178        ConcurrencyUtils.waitForCompletion(futures);
1179    }
1180
1181    private void ddxt3da_subth(final int isgn, final double[][][] a, final boolean scale) {
1182        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1183        int nt = 4 * rows;
1184        if (columns == 2) {
1185            nt >>= 1;
1186        }
1187        Future<?>[] futures = new Future[nthreads];
1188
1189        for (int i = 0; i < nthreads; i++) {
1190            final int n0 = i;
1191            final int startt = nt * i;
1192            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1193
1194                @Override
1195                                public void run() {
1196                    int idx2;
1197                    if (isgn == -1) {
1198                        for (int s = n0; s < slices; s += nthreads) {
1199                            for (int r = 0; r < rows; r++) {
1200                                dhtColumns.forward(a[s][r]);
1201                            }
1202                            if (columns > 2) {
1203                                for (int c = 0; c < columns; c += 4) {
1204                                    for (int r = 0; r < rows; r++) {
1205                                        idx2 = startt + rows + r;
1206                                        t[startt + r] = a[s][r][c];
1207                                        t[idx2] = a[s][r][c + 1];
1208                                        t[idx2 + rows] = a[s][r][c + 2];
1209                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1210                                    }
1211                                    dhtRows.forward(t, startt);
1212                                    dhtRows.forward(t, startt + rows);
1213                                    dhtRows.forward(t, startt + 2 * rows);
1214                                    dhtRows.forward(t, startt + 3 * rows);
1215                                    for (int r = 0; r < rows; r++) {
1216                                        idx2 = startt + rows + r;
1217                                        a[s][r][c] = t[startt + r];
1218                                        a[s][r][c + 1] = t[idx2];
1219                                        a[s][r][c + 2] = t[idx2 + rows];
1220                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1221                                    }
1222                                }
1223                            } else if (columns == 2) {
1224                                for (int r = 0; r < rows; r++) {
1225                                    t[startt + r] = a[s][r][0];
1226                                    t[startt + rows + r] = a[s][r][1];
1227                                }
1228                                dhtRows.forward(t, startt);
1229                                dhtRows.forward(t, startt + rows);
1230                                for (int r = 0; r < rows; r++) {
1231                                    a[s][r][0] = t[startt + r];
1232                                    a[s][r][1] = t[startt + rows + r];
1233                                }
1234                            }
1235                        }
1236                    } else {
1237                        for (int s = n0; s < slices; s += nthreads) {
1238                            for (int r = 0; r < rows; r++) {
1239                                dhtColumns.inverse(a[s][r], scale);
1240                            }
1241                            if (columns > 2) {
1242                                for (int c = 0; c < columns; c += 4) {
1243                                    for (int r = 0; r < rows; r++) {
1244                                        idx2 = startt + rows + r;
1245                                        t[startt + r] = a[s][r][c];
1246                                        t[idx2] = a[s][r][c + 1];
1247                                        t[idx2 + rows] = a[s][r][c + 2];
1248                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1249                                    }
1250                                    dhtRows.inverse(t, startt, scale);
1251                                    dhtRows.inverse(t, startt + rows, scale);
1252                                    dhtRows.inverse(t, startt + 2 * rows, scale);
1253                                    dhtRows.inverse(t, startt + 3 * rows, scale);
1254                                    for (int r = 0; r < rows; r++) {
1255                                        idx2 = startt + rows + r;
1256                                        a[s][r][c] = t[startt + r];
1257                                        a[s][r][c + 1] = t[idx2];
1258                                        a[s][r][c + 2] = t[idx2 + rows];
1259                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1260                                    }
1261                                }
1262                            } else if (columns == 2) {
1263                                for (int r = 0; r < rows; r++) {
1264                                    t[startt + r] = a[s][r][0];
1265                                    t[startt + rows + r] = a[s][r][1];
1266                                }
1267                                dhtRows.inverse(t, startt, scale);
1268                                dhtRows.inverse(t, startt + rows, scale);
1269                                for (int r = 0; r < rows; r++) {
1270                                    a[s][r][0] = t[startt + r];
1271                                    a[s][r][1] = t[startt + rows + r];
1272                                }
1273                            }
1274                        }
1275                    }
1276                }
1277            });
1278        }
1279        ConcurrencyUtils.waitForCompletion(futures);
1280    }
1281
1282    private void ddxt3db_subth(final int isgn, final double[] a, final boolean scale) {
1283        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1284        int nt = 4 * slices;
1285        if (columns == 2) {
1286            nt >>= 1;
1287        }
1288        Future<?>[] futures = new Future[nthreads];
1289
1290        for (int i = 0; i < nthreads; i++) {
1291            final int n0 = i;
1292            final int startt = nt * i;
1293            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1294
1295                @Override
1296                                public void run() {
1297                    int idx0, idx1, idx2;
1298                    if (isgn == -1) {
1299                        if (columns > 2) {
1300                            for (int r = n0; r < rows; r += nthreads) {
1301                                idx0 = r * rowStride;
1302                                for (int c = 0; c < columns; c += 4) {
1303                                    for (int s = 0; s < slices; s++) {
1304                                        idx1 = s * sliceStride + idx0 + c;
1305                                        idx2 = startt + slices + s;
1306                                        t[startt + s] = a[idx1];
1307                                        t[idx2] = a[idx1 + 1];
1308                                        t[idx2 + slices] = a[idx1 + 2];
1309                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1310                                    }
1311                                    dhtSlices.forward(t, startt);
1312                                    dhtSlices.forward(t, startt + slices);
1313                                    dhtSlices.forward(t, startt + 2 * slices);
1314                                    dhtSlices.forward(t, startt + 3 * slices);
1315                                    for (int s = 0; s < slices; s++) {
1316                                        idx1 = s * sliceStride + idx0 + c;
1317                                        idx2 = startt + slices + s;
1318                                        a[idx1] = t[startt + s];
1319                                        a[idx1 + 1] = t[idx2];
1320                                        a[idx1 + 2] = t[idx2 + slices];
1321                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1322                                    }
1323                                }
1324                            }
1325                        } else if (columns == 2) {
1326                            for (int r = n0; r < rows; r += nthreads) {
1327                                idx0 = r * rowStride;
1328                                for (int s = 0; s < slices; s++) {
1329                                    idx1 = s * sliceStride + idx0;
1330                                    t[startt + s] = a[idx1];
1331                                    t[startt + slices + s] = a[idx1 + 1];
1332                                }
1333                                dhtSlices.forward(t, startt);
1334                                dhtSlices.forward(t, startt + slices);
1335                                for (int s = 0; s < slices; s++) {
1336                                    idx1 = s * sliceStride + idx0;
1337                                    a[idx1] = t[startt + s];
1338                                    a[idx1 + 1] = t[startt + slices + s];
1339                                }
1340                            }
1341                        }
1342                    } else {
1343                        if (columns > 2) {
1344                            for (int r = n0; r < rows; r += nthreads) {
1345                                idx0 = r * rowStride;
1346                                for (int c = 0; c < columns; c += 4) {
1347                                    for (int s = 0; s < slices; s++) {
1348                                        idx1 = s * sliceStride + idx0 + c;
1349                                        idx2 = startt + slices + s;
1350                                        t[startt + s] = a[idx1];
1351                                        t[idx2] = a[idx1 + 1];
1352                                        t[idx2 + slices] = a[idx1 + 2];
1353                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1354                                    }
1355                                    dhtSlices.inverse(t, startt, scale);
1356                                    dhtSlices.inverse(t, startt + slices, scale);
1357                                    dhtSlices.inverse(t, startt + 2 * slices, scale);
1358                                    dhtSlices.inverse(t, startt + 3 * slices, scale);
1359                                    for (int s = 0; s < slices; s++) {
1360                                        idx1 = s * sliceStride + idx0 + c;
1361                                        idx2 = startt + slices + s;
1362                                        a[idx1] = t[startt + s];
1363                                        a[idx1 + 1] = t[idx2];
1364                                        a[idx1 + 2] = t[idx2 + slices];
1365                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1366                                    }
1367                                }
1368                            }
1369                        } else if (columns == 2) {
1370                            for (int r = n0; r < rows; r += nthreads) {
1371                                idx0 = r * rowStride;
1372                                for (int s = 0; s < slices; s++) {
1373                                    idx1 = s * sliceStride + idx0;
1374                                    t[startt + s] = a[idx1];
1375                                    t[startt + slices + s] = a[idx1 + 1];
1376                                }
1377                                dhtSlices.inverse(t, startt, scale);
1378                                dhtSlices.inverse(t, startt + slices, scale);
1379                                for (int s = 0; s < slices; s++) {
1380                                    idx1 = s * sliceStride + idx0;
1381                                    a[idx1] = t[startt + s];
1382                                    a[idx1 + 1] = t[startt + slices + s];
1383                                }
1384                            }
1385                        }
1386                    }
1387                }
1388            });
1389        }
1390        ConcurrencyUtils.waitForCompletion(futures);
1391    }
1392
1393    private void ddxt3db_subth(final int isgn, final double[][][] a, final boolean scale) {
1394        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1395        int nt = 4 * slices;
1396        if (columns == 2) {
1397            nt >>= 1;
1398        }
1399        Future<?>[] futures = new Future[nthreads];
1400
1401        for (int i = 0; i < nthreads; i++) {
1402            final int n0 = i;
1403            final int startt = nt * i;
1404            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1405
1406                @Override
1407                                public void run() {
1408                    int idx2;
1409                    if (isgn == -1) {
1410                        if (columns > 2) {
1411                            for (int r = n0; r < rows; r += nthreads) {
1412                                for (int c = 0; c < columns; c += 4) {
1413                                    for (int s = 0; s < slices; s++) {
1414                                        idx2 = startt + slices + s;
1415                                        t[startt + s] = a[s][r][c];
1416                                        t[idx2] = a[s][r][c + 1];
1417                                        t[idx2 + slices] = a[s][r][c + 2];
1418                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1419                                    }
1420                                    dhtSlices.forward(t, startt);
1421                                    dhtSlices.forward(t, startt + slices);
1422                                    dhtSlices.forward(t, startt + 2 * slices);
1423                                    dhtSlices.forward(t, startt + 3 * slices);
1424                                    for (int s = 0; s < slices; s++) {
1425                                        idx2 = startt + slices + s;
1426                                        a[s][r][c] = t[startt + s];
1427                                        a[s][r][c + 1] = t[idx2];
1428                                        a[s][r][c + 2] = t[idx2 + slices];
1429                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1430                                    }
1431                                }
1432                            }
1433                        } else if (columns == 2) {
1434                            for (int r = n0; r < rows; r += nthreads) {
1435                                for (int s = 0; s < slices; s++) {
1436                                    t[startt + s] = a[s][r][0];
1437                                    t[startt + slices + s] = a[s][r][1];
1438                                }
1439                                dhtSlices.forward(t, startt);
1440                                dhtSlices.forward(t, startt + slices);
1441                                for (int s = 0; s < slices; s++) {
1442                                    a[s][r][0] = t[startt + s];
1443                                    a[s][r][1] = t[startt + slices + s];
1444                                }
1445                            }
1446                        }
1447                    } else {
1448                        if (columns > 2) {
1449                            for (int r = n0; r < rows; r += nthreads) {
1450                                for (int c = 0; c < columns; c += 4) {
1451                                    for (int s = 0; s < slices; s++) {
1452                                        idx2 = startt + slices + s;
1453                                        t[startt + s] = a[s][r][c];
1454                                        t[idx2] = a[s][r][c + 1];
1455                                        t[idx2 + slices] = a[s][r][c + 2];
1456                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1457                                    }
1458                                    dhtSlices.inverse(t, startt, scale);
1459                                    dhtSlices.inverse(t, startt + slices, scale);
1460                                    dhtSlices.inverse(t, startt + 2 * slices, scale);
1461                                    dhtSlices.inverse(t, startt + 3 * slices, scale);
1462                                    for (int s = 0; s < slices; s++) {
1463                                        idx2 = startt + slices + s;
1464                                        a[s][r][c] = t[startt + s];
1465                                        a[s][r][c + 1] = t[idx2];
1466                                        a[s][r][c + 2] = t[idx2 + slices];
1467                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1468                                    }
1469                                }
1470                            }
1471                        } else if (columns == 2) {
1472                            for (int r = n0; r < rows; r += nthreads) {
1473                                for (int s = 0; s < slices; s++) {
1474                                    t[startt + s] = a[s][r][0];
1475                                    t[startt + slices + s] = a[s][r][1];
1476                                }
1477                                dhtSlices.inverse(t, startt, scale);
1478                                dhtSlices.inverse(t, startt + slices, scale);
1479
1480                                for (int s = 0; s < slices; s++) {
1481                                    a[s][r][0] = t[startt + s];
1482                                    a[s][r][1] = t[startt + slices + s];
1483                                }
1484                            }
1485                        }
1486                    }
1487                }
1488            });
1489        }
1490        ConcurrencyUtils.waitForCompletion(futures);
1491    }
1492
1493    private void yTransform(double[] a) {
1494        double A, B, C, D, E, F, G, H;
1495        int cC, rC, sC;
1496        int idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10, idx11, idx12;
1497        for (int s = 0; s <= slices / 2; s++) {
1498            sC = (slices - s) % slices;
1499            idx9 = s * sliceStride;
1500            idx10 = sC * sliceStride;
1501            for (int r = 0; r <= rows / 2; r++) {
1502                rC = (rows - r) % rows;
1503                idx11 = r * rowStride;
1504                idx12 = rC * rowStride;
1505                for (int c = 0; c <= columns / 2; c++) {
1506                    cC = (columns - c) % columns;
1507                    idx1 = idx9 + idx12 + c;
1508                    idx2 = idx9 + idx11 + cC;
1509                    idx3 = idx10 + idx11 + c;
1510                    idx4 = idx10 + idx12 + cC;
1511                    idx5 = idx10 + idx12 + c;
1512                    idx6 = idx10 + idx11 + cC;
1513                    idx7 = idx9 + idx11 + c;
1514                    idx8 = idx9 + idx12 + cC;
1515                    A = a[idx1];
1516                    B = a[idx2];
1517                    C = a[idx3];
1518                    D = a[idx4];
1519                    E = a[idx5];
1520                    F = a[idx6];
1521                    G = a[idx7];
1522                    H = a[idx8];
1523                    a[idx7] = (A + B + C - D) / 2;
1524                    a[idx3] = (E + F + G - H) / 2;
1525                    a[idx1] = (G + H + E - F) / 2;
1526                    a[idx5] = (C + D + A - B) / 2;
1527                    a[idx2] = (H + G + F - E) / 2;
1528                    a[idx6] = (D + C + B - A) / 2;
1529                    a[idx8] = (B + A + D - C) / 2;
1530                    a[idx4] = (F + E + H - G) / 2;
1531                }
1532            }
1533        }
1534    }
1535
1536    private void yTransform(double[][][] a) {
1537        double A, B, C, D, E, F, G, H;
1538        int cC, rC, sC;
1539        for (int s = 0; s <= slices / 2; s++) {
1540            sC = (slices - s) % slices;
1541            for (int r = 0; r <= rows / 2; r++) {
1542                rC = (rows - r) % rows;
1543                for (int c = 0; c <= columns / 2; c++) {
1544                    cC = (columns - c) % columns;
1545                    A = a[s][rC][c];
1546                    B = a[s][r][cC];
1547                    C = a[sC][r][c];
1548                    D = a[sC][rC][cC];
1549                    E = a[sC][rC][c];
1550                    F = a[sC][r][cC];
1551                    G = a[s][r][c];
1552                    H = a[s][rC][cC];
1553                    a[s][r][c] = (A + B + C - D) / 2;
1554                    a[sC][r][c] = (E + F + G - H) / 2;
1555                    a[s][rC][c] = (G + H + E - F) / 2;
1556                    a[sC][rC][c] = (C + D + A - B) / 2;
1557                    a[s][r][cC] = (H + G + F - E) / 2;
1558                    a[sC][r][cC] = (D + C + B - A) / 2;
1559                    a[s][rC][cC] = (B + A + D - C) / 2;
1560                    a[sC][rC][cC] = (F + E + H - G) / 2;
1561                }
1562            }
1563        }
1564    }
1565
1566}