001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dct;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 3D Discrete Cosine Transform (DCT) of single precision data. The
043 * sizes of all three dimensions can be arbitrary numbers. This is a parallel
044 * implementation of split-radix and mixed-radix algorithms optimized for SMP
045 * systems. <br>
046 * <br>
047 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public class FloatDCT_3D {
054
055    private int slices;
056
057    private int rows;
058
059    private int columns;
060
061    private int sliceStride;
062
063    private int rowStride;
064
065    private float[] t;
066
067    private FloatDCT_1D dctSlices, dctRows, dctColumns;
068
069    private int oldNthreads;
070
071    private int nt;
072
073    private boolean isPowerOfTwo = false;
074
075    private boolean useThreads = false;
076
077    /**
078     * Creates new instance of FloatDCT_3D.
079     * 
080     * @param slices
081     *            number of slices
082     * @param rows
083     *            number of rows
084     * @param columns
085     *            number of columns
086     */
087    public FloatDCT_3D(int slices, int rows, int columns) {
088        if (slices <= 1 || rows <= 1 || columns <= 1) {
089            throw new IllegalArgumentException("slices, rows and columns must be greater than 1");
090        }
091        this.slices = slices;
092        this.rows = rows;
093        this.columns = columns;
094        this.sliceStride = rows * columns;
095        this.rowStride = columns;
096        if (slices * rows * columns >= ConcurrencyUtils.getThreadsBeginN_3D()) {
097            this.useThreads = true;
098        }
099        if (ConcurrencyUtils.isPowerOf2(slices) && ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
100            isPowerOfTwo = true;
101
102            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
103            nt = slices;
104            if (nt < rows) {
105                nt = rows;
106            }
107            nt *= 4;
108            if (oldNthreads > 1) {
109                nt *= oldNthreads;
110            }
111            if (columns == 2) {
112                nt >>= 1;
113            }
114            t = new float[nt];
115        }
116        dctSlices = new FloatDCT_1D(slices);
117        if (slices == rows) {
118            dctRows = dctSlices;
119        } else {
120            dctRows = new FloatDCT_1D(rows);
121        }
122        if (slices == columns) {
123            dctColumns = dctSlices;
124        } else if (rows == columns) {
125            dctColumns = dctRows;
126        } else {
127            dctColumns = new FloatDCT_1D(columns);
128        }
129    }
130
131    /**
132     * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code>
133     * . The data is stored in 1D array addressed in slice-major, then
134     * row-major, then column-major, in order of significance, i.e. the element
135     * (i,j,k) of 3D array x[slices][rows][columns] is stored in a[i*sliceStride
136     * + j*rowStride + k], where sliceStride = rows * columns and rowStride =
137     * columns.
138     * 
139     * @param a
140     *            data to transform
141     * @param scale
142     *            if true then scaling is performed
143     */
144    public void forward(final float[] a, final boolean scale) {
145        int nthreads = ConcurrencyUtils.getNumberOfThreads();
146        if (isPowerOfTwo) {
147            if (nthreads != oldNthreads) {
148                nt = slices;
149                if (nt < rows) {
150                    nt = rows;
151                }
152                nt *= 4;
153                if (nthreads > 1) {
154                    nt *= nthreads;
155                }
156                if (columns == 2) {
157                    nt >>= 1;
158                }
159                t = new float[nt];
160                oldNthreads = nthreads;
161            }
162            if ((nthreads > 1) && useThreads) {
163                ddxt3da_subth(-1, a, scale);
164                ddxt3db_subth(-1, a, scale);
165            } else {
166                ddxt3da_sub(-1, a, scale);
167                ddxt3db_sub(-1, a, scale);
168            }
169        } else {
170            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
171                Future<?>[] futures = new Future[nthreads];
172                int p = slices / nthreads;
173                for (int l = 0; l < nthreads; l++) {
174                    final int firstSlice = l * p;
175                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
176                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
177                        @Override
178                                                public void run() {
179                            for (int s = firstSlice; s < lastSlice; s++) {
180                                int idx1 = s * sliceStride;
181                                for (int r = 0; r < rows; r++) {
182                                    dctColumns.forward(a, idx1 + r * rowStride, scale);
183                                }
184                            }
185                        }
186                    });
187                }
188                ConcurrencyUtils.waitForCompletion(futures);
189
190                for (int l = 0; l < nthreads; l++) {
191                    final int firstSlice = l * p;
192                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
193                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
194                        @Override
195                                                public void run() {
196                            float[] temp = new float[rows];
197                            for (int s = firstSlice; s < lastSlice; s++) {
198                                int idx1 = s * sliceStride;
199                                for (int c = 0; c < columns; c++) {
200                                    for (int r = 0; r < rows; r++) {
201                                        int idx3 = idx1 + r * rowStride + c;
202                                        temp[r] = a[idx3];
203                                    }
204                                    dctRows.forward(temp, scale);
205                                    for (int r = 0; r < rows; r++) {
206                                        int idx3 = idx1 + r * rowStride + c;
207                                        a[idx3] = temp[r];
208                                    }
209                                }
210                            }
211                        }
212                    });
213                }
214                ConcurrencyUtils.waitForCompletion(futures);
215
216                p = rows / nthreads;
217                for (int l = 0; l < nthreads; l++) {
218                    final int firstRow = l * p;
219                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
220                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
221                        @Override
222                                                public void run() {
223                            float[] temp = new float[slices];
224                            for (int r = firstRow; r < lastRow; r++) {
225                                int idx1 = r * rowStride;
226                                for (int c = 0; c < columns; c++) {
227                                    for (int s = 0; s < slices; s++) {
228                                        int idx3 = s * sliceStride + idx1 + c;
229                                        temp[s] = a[idx3];
230                                    }
231                                    dctSlices.forward(temp, scale);
232                                    for (int s = 0; s < slices; s++) {
233                                        int idx3 = s * sliceStride + idx1 + c;
234                                        a[idx3] = temp[s];
235                                    }
236                                }
237                            }
238                        }
239                    });
240                }
241                ConcurrencyUtils.waitForCompletion(futures);
242
243            } else {
244                for (int s = 0; s < slices; s++) {
245                    int idx1 = s * sliceStride;
246                    for (int r = 0; r < rows; r++) {
247                        dctColumns.forward(a, idx1 + r * rowStride, scale);
248                    }
249                }
250                float[] temp = new float[rows];
251                for (int s = 0; s < slices; s++) {
252                    int idx1 = s * sliceStride;
253                    for (int c = 0; c < columns; c++) {
254                        for (int r = 0; r < rows; r++) {
255                            int idx3 = idx1 + r * rowStride + c;
256                            temp[r] = a[idx3];
257                        }
258                        dctRows.forward(temp, scale);
259                        for (int r = 0; r < rows; r++) {
260                            int idx3 = idx1 + r * rowStride + c;
261                            a[idx3] = temp[r];
262                        }
263                    }
264                }
265                temp = new float[slices];
266                for (int r = 0; r < rows; r++) {
267                    int idx1 = r * rowStride;
268                    for (int c = 0; c < columns; c++) {
269                        for (int s = 0; s < slices; s++) {
270                            int idx3 = s * sliceStride + idx1 + c;
271                            temp[s] = a[idx3];
272                        }
273                        dctSlices.forward(temp, scale);
274                        for (int s = 0; s < slices; s++) {
275                            int idx3 = s * sliceStride + idx1 + c;
276                            a[idx3] = temp[s];
277                        }
278                    }
279                }
280            }
281        }
282    }
283
284    /**
285     * Computes the 3D forward DCT (DCT-II) leaving the result in <code>a</code>
286     * . The data is stored in 3D array
287     * 
288     * @param a
289     *            data to transform
290     * @param scale
291     *            if true then scaling is performed
292     */
293    public void forward(final float[][][] a, final boolean scale) {
294        int nthreads = ConcurrencyUtils.getNumberOfThreads();
295        if (isPowerOfTwo) {
296            if (nthreads != oldNthreads) {
297                nt = slices;
298                if (nt < rows) {
299                    nt = rows;
300                }
301                nt *= 4;
302                if (nthreads > 1) {
303                    nt *= nthreads;
304                }
305                if (columns == 2) {
306                    nt >>= 1;
307                }
308                t = new float[nt];
309                oldNthreads = nthreads;
310            }
311            if ((nthreads > 1) && useThreads) {
312                ddxt3da_subth(-1, a, scale);
313                ddxt3db_subth(-1, a, scale);
314            } else {
315                ddxt3da_sub(-1, a, scale);
316                ddxt3db_sub(-1, a, scale);
317            }
318        } else {
319            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
320                Future<?>[] futures = new Future[nthreads];
321                int p = slices / nthreads;
322                for (int l = 0; l < nthreads; l++) {
323                    final int firstSlice = l * p;
324                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
325                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
326                        @Override
327                                                public void run() {
328                            for (int s = firstSlice; s < lastSlice; s++) {
329                                for (int r = 0; r < rows; r++) {
330                                    dctColumns.forward(a[s][r], scale);
331                                }
332                            }
333                        }
334                    });
335                }
336                ConcurrencyUtils.waitForCompletion(futures);
337
338                for (int l = 0; l < nthreads; l++) {
339                    final int firstSlice = l * p;
340                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
341                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
342                        @Override
343                                                public void run() {
344                            float[] temp = new float[rows];
345                            for (int s = firstSlice; s < lastSlice; s++) {
346                                for (int c = 0; c < columns; c++) {
347                                    for (int r = 0; r < rows; r++) {
348                                        temp[r] = a[s][r][c];
349                                    }
350                                    dctRows.forward(temp, scale);
351                                    for (int r = 0; r < rows; r++) {
352                                        a[s][r][c] = temp[r];
353                                    }
354                                }
355                            }
356                        }
357                    });
358                }
359                ConcurrencyUtils.waitForCompletion(futures);
360
361                p = rows / nthreads;
362                for (int l = 0; l < nthreads; l++) {
363                    final int firstRow = l * p;
364                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
365                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
366                        @Override
367                                                public void run() {
368                            float[] temp = new float[slices];
369                            for (int r = firstRow; r < lastRow; r++) {
370                                for (int c = 0; c < columns; c++) {
371                                    for (int s = 0; s < slices; s++) {
372                                        temp[s] = a[s][r][c];
373                                    }
374                                    dctSlices.forward(temp, scale);
375                                    for (int s = 0; s < slices; s++) {
376                                        a[s][r][c] = temp[s];
377                                    }
378                                }
379                            }
380                        }
381                    });
382                }
383                ConcurrencyUtils.waitForCompletion(futures);
384
385            } else {
386                for (int s = 0; s < slices; s++) {
387                    for (int r = 0; r < rows; r++) {
388                        dctColumns.forward(a[s][r], scale);
389                    }
390                }
391                float[] temp = new float[rows];
392                for (int s = 0; s < slices; s++) {
393                    for (int c = 0; c < columns; c++) {
394                        for (int r = 0; r < rows; r++) {
395                            temp[r] = a[s][r][c];
396                        }
397                        dctRows.forward(temp, scale);
398                        for (int r = 0; r < rows; r++) {
399                            a[s][r][c] = temp[r];
400                        }
401                    }
402                }
403                temp = new float[slices];
404                for (int r = 0; r < rows; r++) {
405                    for (int c = 0; c < columns; c++) {
406                        for (int s = 0; s < slices; s++) {
407                            temp[s] = a[s][r][c];
408                        }
409                        dctSlices.forward(temp, scale);
410                        for (int s = 0; s < slices; s++) {
411                            a[s][r][c] = temp[s];
412                        }
413                    }
414                }
415            }
416        }
417    }
418
419    /**
420     * Computes the 3D inverse DCT (DCT-III) leaving the result in
421     * <code>a</code>. The data is stored in 1D array addressed in slice-major,
422     * then row-major, then column-major, in order of significance, i.e. the
423     * element (i,j,k) of 3D array x[slices][rows][columns] is stored in
424     * a[i*sliceStride + j*rowStride + k], where sliceStride = rows * columns
425     * and rowStride = columns.
426     * 
427     * @param a
428     *            data to transform
429     * @param scale
430     *            if true then scaling is performed
431     */
432    public void inverse(final float[] a, final boolean scale) {
433        int nthreads = ConcurrencyUtils.getNumberOfThreads();
434        if (isPowerOfTwo) {
435            if (nthreads != oldNthreads) {
436                nt = slices;
437                if (nt < rows) {
438                    nt = rows;
439                }
440                nt *= 4;
441                if (nthreads > 1) {
442                    nt *= nthreads;
443                }
444                if (columns == 2) {
445                    nt >>= 1;
446                }
447                t = new float[nt];
448                oldNthreads = nthreads;
449            }
450            if ((nthreads > 1) && useThreads) {
451                ddxt3da_subth(1, a, scale);
452                ddxt3db_subth(1, a, scale);
453            } else {
454                ddxt3da_sub(1, a, scale);
455                ddxt3db_sub(1, a, scale);
456            }
457        } else {
458            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
459                Future<?>[] futures = new Future[nthreads];
460                int p = slices / nthreads;
461                for (int l = 0; l < nthreads; l++) {
462                    final int firstSlice = l * p;
463                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
464                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
465                        @Override
466                                                public void run() {
467                            for (int s = firstSlice; s < lastSlice; s++) {
468                                int idx1 = s * sliceStride;
469                                for (int r = 0; r < rows; r++) {
470                                    dctColumns.inverse(a, idx1 + r * rowStride, scale);
471                                }
472                            }
473                        }
474                    });
475                }
476                ConcurrencyUtils.waitForCompletion(futures);
477
478                for (int l = 0; l < nthreads; l++) {
479                    final int firstSlice = l * p;
480                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
481                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
482                        @Override
483                                                public void run() {
484                            float[] temp = new float[rows];
485                            for (int s = firstSlice; s < lastSlice; s++) {
486                                int idx1 = s * sliceStride;
487                                for (int c = 0; c < columns; c++) {
488                                    for (int r = 0; r < rows; r++) {
489                                        int idx3 = idx1 + r * rowStride + c;
490                                        temp[r] = a[idx3];
491                                    }
492                                    dctRows.inverse(temp, scale);
493                                    for (int r = 0; r < rows; r++) {
494                                        int idx3 = idx1 + r * rowStride + c;
495                                        a[idx3] = temp[r];
496                                    }
497                                }
498                            }
499                        }
500                    });
501                }
502                ConcurrencyUtils.waitForCompletion(futures);
503
504                p = rows / nthreads;
505                for (int l = 0; l < nthreads; l++) {
506                    final int firstRow = l * p;
507                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
508                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
509                        @Override
510                                                public void run() {
511                            float[] temp = new float[slices];
512                            for (int r = firstRow; r < lastRow; r++) {
513                                int idx1 = r * rowStride;
514                                for (int c = 0; c < columns; c++) {
515                                    for (int s = 0; s < slices; s++) {
516                                        int idx3 = s * sliceStride + idx1 + c;
517                                        temp[s] = a[idx3];
518                                    }
519                                    dctSlices.inverse(temp, scale);
520                                    for (int s = 0; s < slices; s++) {
521                                        int idx3 = s * sliceStride + idx1 + c;
522                                        a[idx3] = temp[s];
523                                    }
524                                }
525                            }
526                        }
527                    });
528                }
529                ConcurrencyUtils.waitForCompletion(futures);
530
531            } else {
532                for (int s = 0; s < slices; s++) {
533                    int idx1 = s * sliceStride;
534                    for (int r = 0; r < rows; r++) {
535                        dctColumns.inverse(a, idx1 + r * rowStride, scale);
536                    }
537                }
538                float[] temp = new float[rows];
539                for (int s = 0; s < slices; s++) {
540                    int idx1 = s * sliceStride;
541                    for (int c = 0; c < columns; c++) {
542                        for (int r = 0; r < rows; r++) {
543                            int idx3 = idx1 + r * rowStride + c;
544                            temp[r] = a[idx3];
545                        }
546                        dctRows.inverse(temp, scale);
547                        for (int r = 0; r < rows; r++) {
548                            int idx3 = idx1 + r * rowStride + c;
549                            a[idx3] = temp[r];
550                        }
551                    }
552                }
553                temp = new float[slices];
554                for (int r = 0; r < rows; r++) {
555                    int idx1 = r * rowStride;
556                    for (int c = 0; c < columns; c++) {
557                        for (int s = 0; s < slices; s++) {
558                            int idx3 = s * sliceStride + idx1 + c;
559                            temp[s] = a[idx3];
560                        }
561                        dctSlices.inverse(temp, scale);
562                        for (int s = 0; s < slices; s++) {
563                            int idx3 = s * sliceStride + idx1 + c;
564                            a[idx3] = temp[s];
565                        }
566                    }
567                }
568            }
569        }
570    }
571
572    /**
573     * Computes the 3D inverse DCT (DCT-III) leaving the result in
574     * <code>a</code>. The data is stored in 3D array.
575     * 
576     * @param a
577     *            data to transform
578     * @param scale
579     *            if true then scaling is performed
580     */
581    public void inverse(final float[][][] a, final boolean scale) {
582        int nthreads = ConcurrencyUtils.getNumberOfThreads();
583        if (isPowerOfTwo) {
584            if (nthreads != oldNthreads) {
585                nt = slices;
586                if (nt < rows) {
587                    nt = rows;
588                }
589                nt *= 4;
590                if (nthreads > 1) {
591                    nt *= nthreads;
592                }
593                if (columns == 2) {
594                    nt >>= 1;
595                }
596                t = new float[nt];
597                oldNthreads = nthreads;
598            }
599            if ((nthreads > 1) && useThreads) {
600                ddxt3da_subth(1, a, scale);
601                ddxt3db_subth(1, a, scale);
602            } else {
603                ddxt3da_sub(1, a, scale);
604                ddxt3db_sub(1, a, scale);
605            }
606        } else {
607            if ((nthreads > 1) && useThreads && (slices >= nthreads) && (rows >= nthreads) && (columns >= nthreads)) {
608                Future<?>[] futures = new Future[nthreads];
609                int p = slices / nthreads;
610                for (int l = 0; l < nthreads; l++) {
611                    final int firstSlice = l * p;
612                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
613                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
614                        @Override
615                                                public void run() {
616                            for (int s = firstSlice; s < lastSlice; s++) {
617                                for (int r = 0; r < rows; r++) {
618                                    dctColumns.inverse(a[s][r], scale);
619                                }
620                            }
621                        }
622                    });
623                }
624                ConcurrencyUtils.waitForCompletion(futures);
625
626                for (int l = 0; l < nthreads; l++) {
627                    final int firstSlice = l * p;
628                    final int lastSlice = (l == (nthreads - 1)) ? slices : firstSlice + p;
629                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
630                        @Override
631                                                public void run() {
632                            float[] temp = new float[rows];
633                            for (int s = firstSlice; s < lastSlice; s++) {
634                                for (int c = 0; c < columns; c++) {
635                                    for (int r = 0; r < rows; r++) {
636                                        temp[r] = a[s][r][c];
637                                    }
638                                    dctRows.inverse(temp, scale);
639                                    for (int r = 0; r < rows; r++) {
640                                        a[s][r][c] = temp[r];
641                                    }
642                                }
643                            }
644                        }
645                    });
646                }
647                ConcurrencyUtils.waitForCompletion(futures);
648
649                p = rows / nthreads;
650                for (int l = 0; l < nthreads; l++) {
651                    final int firstRow = l * p;
652                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
653                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
654                        @Override
655                                                public void run() {
656                            float[] temp = new float[slices];
657                            for (int r = firstRow; r < lastRow; r++) {
658                                for (int c = 0; c < columns; c++) {
659                                    for (int s = 0; s < slices; s++) {
660                                        temp[s] = a[s][r][c];
661                                    }
662                                    dctSlices.inverse(temp, scale);
663                                    for (int s = 0; s < slices; s++) {
664                                        a[s][r][c] = temp[s];
665                                    }
666                                }
667                            }
668                        }
669                    });
670                }
671                ConcurrencyUtils.waitForCompletion(futures);
672
673            } else {
674                for (int s = 0; s < slices; s++) {
675                    for (int r = 0; r < rows; r++) {
676                        dctColumns.inverse(a[s][r], scale);
677                    }
678                }
679                float[] temp = new float[rows];
680                for (int s = 0; s < slices; s++) {
681                    for (int c = 0; c < columns; c++) {
682                        for (int r = 0; r < rows; r++) {
683                            temp[r] = a[s][r][c];
684                        }
685                        dctRows.inverse(temp, scale);
686                        for (int r = 0; r < rows; r++) {
687                            a[s][r][c] = temp[r];
688                        }
689                    }
690                }
691                temp = new float[slices];
692                for (int r = 0; r < rows; r++) {
693                    for (int c = 0; c < columns; c++) {
694                        for (int s = 0; s < slices; s++) {
695                            temp[s] = a[s][r][c];
696                        }
697                        dctSlices.inverse(temp, scale);
698                        for (int s = 0; s < slices; s++) {
699                            a[s][r][c] = temp[s];
700                        }
701                    }
702                }
703            }
704        }
705    }
706
707    private void ddxt3da_sub(int isgn, float[] a, boolean scale) {
708        int idx0, idx1, idx2;
709
710        if (isgn == -1) {
711            for (int s = 0; s < slices; s++) {
712                idx0 = s * sliceStride;
713                for (int r = 0; r < rows; r++) {
714                    dctColumns.forward(a, idx0 + r * rowStride, scale);
715                }
716                if (columns > 2) {
717                    for (int c = 0; c < columns; c += 4) {
718                        for (int r = 0; r < rows; r++) {
719                            idx1 = idx0 + r * rowStride + c;
720                            idx2 = rows + r;
721                            t[r] = a[idx1];
722                            t[idx2] = a[idx1 + 1];
723                            t[idx2 + rows] = a[idx1 + 2];
724                            t[idx2 + 2 * rows] = a[idx1 + 3];
725                        }
726                        dctRows.forward(t, 0, scale);
727                        dctRows.forward(t, rows, scale);
728                        dctRows.forward(t, 2 * rows, scale);
729                        dctRows.forward(t, 3 * rows, scale);
730                        for (int r = 0; r < rows; r++) {
731                            idx1 = idx0 + r * rowStride + c;
732                            idx2 = rows + r;
733                            a[idx1] = t[r];
734                            a[idx1 + 1] = t[idx2];
735                            a[idx1 + 2] = t[idx2 + rows];
736                            a[idx1 + 3] = t[idx2 + 2 * rows];
737                        }
738                    }
739                } else if (columns == 2) {
740                    for (int r = 0; r < rows; r++) {
741                        idx1 = idx0 + r * rowStride;
742                        t[r] = a[idx1];
743                        t[rows + r] = a[idx1 + 1];
744                    }
745                    dctRows.forward(t, 0, scale);
746                    dctRows.forward(t, rows, scale);
747                    for (int r = 0; r < rows; r++) {
748                        idx1 = idx0 + r * rowStride;
749                        a[idx1] = t[r];
750                        a[idx1 + 1] = t[rows + r];
751                    }
752                }
753            }
754        } else {
755            for (int s = 0; s < slices; s++) {
756                idx0 = s * sliceStride;
757                for (int r = 0; r < rows; r++) {
758                    dctColumns.inverse(a, idx0 + r * rowStride, scale);
759                }
760                if (columns > 2) {
761                    for (int c = 0; c < columns; c += 4) {
762                        for (int r = 0; r < rows; r++) {
763                            idx1 = idx0 + r * rowStride + c;
764                            idx2 = rows + r;
765                            t[r] = a[idx1];
766                            t[idx2] = a[idx1 + 1];
767                            t[idx2 + rows] = a[idx1 + 2];
768                            t[idx2 + 2 * rows] = a[idx1 + 3];
769                        }
770                        dctRows.inverse(t, 0, scale);
771                        dctRows.inverse(t, rows, scale);
772                        dctRows.inverse(t, 2 * rows, scale);
773                        dctRows.inverse(t, 3 * rows, scale);
774                        for (int r = 0; r < rows; r++) {
775                            idx1 = idx0 + r * rowStride + c;
776                            idx2 = rows + r;
777                            a[idx1] = t[r];
778                            a[idx1 + 1] = t[idx2];
779                            a[idx1 + 2] = t[idx2 + rows];
780                            a[idx1 + 3] = t[idx2 + 2 * rows];
781                        }
782                    }
783                } else if (columns == 2) {
784                    for (int r = 0; r < rows; r++) {
785                        idx1 = idx0 + r * rowStride;
786                        t[r] = a[idx1];
787                        t[rows + r] = a[idx1 + 1];
788                    }
789                    dctRows.inverse(t, 0, scale);
790                    dctRows.inverse(t, rows, scale);
791                    for (int r = 0; r < rows; r++) {
792                        idx1 = idx0 + r * rowStride;
793                        a[idx1] = t[r];
794                        a[idx1 + 1] = t[rows + r];
795                    }
796                }
797            }
798        }
799    }
800
801    private void ddxt3da_sub(int isgn, float[][][] a, boolean scale) {
802        int idx2;
803
804        if (isgn == -1) {
805            for (int s = 0; s < slices; s++) {
806                for (int r = 0; r < rows; r++) {
807                    dctColumns.forward(a[s][r], scale);
808                }
809                if (columns > 2) {
810                    for (int c = 0; c < columns; c += 4) {
811                        for (int r = 0; r < rows; r++) {
812                            idx2 = rows + r;
813                            t[r] = a[s][r][c];
814                            t[idx2] = a[s][r][c + 1];
815                            t[idx2 + rows] = a[s][r][c + 2];
816                            t[idx2 + 2 * rows] = a[s][r][c + 3];
817                        }
818                        dctRows.forward(t, 0, scale);
819                        dctRows.forward(t, rows, scale);
820                        dctRows.forward(t, 2 * rows, scale);
821                        dctRows.forward(t, 3 * rows, scale);
822                        for (int r = 0; r < rows; r++) {
823                            idx2 = rows + r;
824                            a[s][r][c] = t[r];
825                            a[s][r][c + 1] = t[idx2];
826                            a[s][r][c + 2] = t[idx2 + rows];
827                            a[s][r][c + 3] = t[idx2 + 2 * rows];
828                        }
829                    }
830                } else if (columns == 2) {
831                    for (int r = 0; r < rows; r++) {
832                        t[r] = a[s][r][0];
833                        t[rows + r] = a[s][r][1];
834                    }
835                    dctRows.forward(t, 0, scale);
836                    dctRows.forward(t, rows, scale);
837                    for (int r = 0; r < rows; r++) {
838                        a[s][r][0] = t[r];
839                        a[s][r][1] = t[rows + r];
840                    }
841                }
842            }
843        } else {
844            for (int s = 0; s < slices; s++) {
845                for (int r = 0; r < rows; r++) {
846                    dctColumns.inverse(a[s][r], scale);
847                }
848                if (columns > 2) {
849                    for (int c = 0; c < columns; c += 4) {
850                        for (int r = 0; r < rows; r++) {
851                            idx2 = rows + r;
852                            t[r] = a[s][r][c];
853                            t[idx2] = a[s][r][c + 1];
854                            t[idx2 + rows] = a[s][r][c + 2];
855                            t[idx2 + 2 * rows] = a[s][r][c + 3];
856                        }
857                        dctRows.inverse(t, 0, scale);
858                        dctRows.inverse(t, rows, scale);
859                        dctRows.inverse(t, 2 * rows, scale);
860                        dctRows.inverse(t, 3 * rows, scale);
861                        for (int r = 0; r < rows; r++) {
862                            idx2 = rows + r;
863                            a[s][r][c] = t[r];
864                            a[s][r][c + 1] = t[idx2];
865                            a[s][r][c + 2] = t[idx2 + rows];
866                            a[s][r][c + 3] = t[idx2 + 2 * rows];
867                        }
868                    }
869                } else if (columns == 2) {
870                    for (int r = 0; r < rows; r++) {
871                        t[r] = a[s][r][0];
872                        t[rows + r] = a[s][r][1];
873                    }
874                    dctRows.inverse(t, 0, scale);
875                    dctRows.inverse(t, rows, scale);
876                    for (int r = 0; r < rows; r++) {
877                        a[s][r][0] = t[r];
878                        a[s][r][1] = t[rows + r];
879                    }
880                }
881            }
882        }
883    }
884
885    private void ddxt3db_sub(int isgn, float[] a, boolean scale) {
886        int idx0, idx1, idx2;
887
888        if (isgn == -1) {
889            if (columns > 2) {
890                for (int r = 0; r < rows; r++) {
891                    idx0 = r * rowStride;
892                    for (int c = 0; c < columns; c += 4) {
893                        for (int s = 0; s < slices; s++) {
894                            idx1 = s * sliceStride + idx0 + c;
895                            idx2 = slices + s;
896                            t[s] = a[idx1];
897                            t[idx2] = a[idx1 + 1];
898                            t[idx2 + slices] = a[idx1 + 2];
899                            t[idx2 + 2 * slices] = a[idx1 + 3];
900                        }
901                        dctSlices.forward(t, 0, scale);
902                        dctSlices.forward(t, slices, scale);
903                        dctSlices.forward(t, 2 * slices, scale);
904                        dctSlices.forward(t, 3 * slices, scale);
905                        for (int s = 0; s < slices; s++) {
906                            idx1 = s * sliceStride + idx0 + c;
907                            idx2 = slices + s;
908                            a[idx1] = t[s];
909                            a[idx1 + 1] = t[idx2];
910                            a[idx1 + 2] = t[idx2 + slices];
911                            a[idx1 + 3] = t[idx2 + 2 * slices];
912                        }
913                    }
914                }
915            } else if (columns == 2) {
916                for (int r = 0; r < rows; r++) {
917                    idx0 = r * rowStride;
918                    for (int s = 0; s < slices; s++) {
919                        idx1 = s * sliceStride + idx0;
920                        t[s] = a[idx1];
921                        t[slices + s] = a[idx1 + 1];
922                    }
923                    dctSlices.forward(t, 0, scale);
924                    dctSlices.forward(t, slices, scale);
925                    for (int s = 0; s < slices; s++) {
926                        idx1 = s * sliceStride + idx0;
927                        a[idx1] = t[s];
928                        a[idx1 + 1] = t[slices + s];
929                    }
930                }
931            }
932        } else {
933            if (columns > 2) {
934                for (int r = 0; r < rows; r++) {
935                    idx0 = r * rowStride;
936                    for (int c = 0; c < columns; c += 4) {
937                        for (int s = 0; s < slices; s++) {
938                            idx1 = s * sliceStride + idx0 + c;
939                            idx2 = slices + s;
940                            t[s] = a[idx1];
941                            t[idx2] = a[idx1 + 1];
942                            t[idx2 + slices] = a[idx1 + 2];
943                            t[idx2 + 2 * slices] = a[idx1 + 3];
944                        }
945                        dctSlices.inverse(t, 0, scale);
946                        dctSlices.inverse(t, slices, scale);
947                        dctSlices.inverse(t, 2 * slices, scale);
948                        dctSlices.inverse(t, 3 * slices, scale);
949
950                        for (int s = 0; s < slices; s++) {
951                            idx1 = s * sliceStride + idx0 + c;
952                            idx2 = slices + s;
953                            a[idx1] = t[s];
954                            a[idx1 + 1] = t[idx2];
955                            a[idx1 + 2] = t[idx2 + slices];
956                            a[idx1 + 3] = t[idx2 + 2 * slices];
957                        }
958                    }
959                }
960            } else if (columns == 2) {
961                for (int r = 0; r < rows; r++) {
962                    idx0 = r * rowStride;
963                    for (int s = 0; s < slices; s++) {
964                        idx1 = s * sliceStride + idx0;
965                        t[s] = a[idx1];
966                        t[slices + s] = a[idx1 + 1];
967                    }
968                    dctSlices.inverse(t, 0, scale);
969                    dctSlices.inverse(t, slices, scale);
970                    for (int s = 0; s < slices; s++) {
971                        idx1 = s * sliceStride + idx0;
972                        a[idx1] = t[s];
973                        a[idx1 + 1] = t[slices + s];
974                    }
975                }
976            }
977        }
978    }
979
980    private void ddxt3db_sub(int isgn, float[][][] a, boolean scale) {
981        int idx2;
982
983        if (isgn == -1) {
984            if (columns > 2) {
985                for (int r = 0; r < rows; r++) {
986                    for (int c = 0; c < columns; c += 4) {
987                        for (int s = 0; s < slices; s++) {
988                            idx2 = slices + s;
989                            t[s] = a[s][r][c];
990                            t[idx2] = a[s][r][c + 1];
991                            t[idx2 + slices] = a[s][r][c + 2];
992                            t[idx2 + 2 * slices] = a[s][r][c + 3];
993                        }
994                        dctSlices.forward(t, 0, scale);
995                        dctSlices.forward(t, slices, scale);
996                        dctSlices.forward(t, 2 * slices, scale);
997                        dctSlices.forward(t, 3 * slices, scale);
998                        for (int s = 0; s < slices; s++) {
999                            idx2 = slices + s;
1000                            a[s][r][c] = t[s];
1001                            a[s][r][c + 1] = t[idx2];
1002                            a[s][r][c + 2] = t[idx2 + slices];
1003                            a[s][r][c + 3] = t[idx2 + 2 * slices];
1004                        }
1005                    }
1006                }
1007            } else if (columns == 2) {
1008                for (int r = 0; r < rows; r++) {
1009                    for (int s = 0; s < slices; s++) {
1010                        t[s] = a[s][r][0];
1011                        t[slices + s] = a[s][r][1];
1012                    }
1013                    dctSlices.forward(t, 0, scale);
1014                    dctSlices.forward(t, slices, scale);
1015                    for (int s = 0; s < slices; s++) {
1016                        a[s][r][0] = t[s];
1017                        a[s][r][1] = t[slices + s];
1018                    }
1019                }
1020            }
1021        } else {
1022            if (columns > 2) {
1023                for (int r = 0; r < rows; r++) {
1024                    for (int c = 0; c < columns; c += 4) {
1025                        for (int s = 0; s < slices; s++) {
1026                            idx2 = slices + s;
1027                            t[s] = a[s][r][c];
1028                            t[idx2] = a[s][r][c + 1];
1029                            t[idx2 + slices] = a[s][r][c + 2];
1030                            t[idx2 + 2 * slices] = a[s][r][c + 3];
1031                        }
1032                        dctSlices.inverse(t, 0, scale);
1033                        dctSlices.inverse(t, slices, scale);
1034                        dctSlices.inverse(t, 2 * slices, scale);
1035                        dctSlices.inverse(t, 3 * slices, scale);
1036
1037                        for (int s = 0; s < slices; s++) {
1038                            idx2 = slices + s;
1039                            a[s][r][c] = t[s];
1040                            a[s][r][c + 1] = t[idx2];
1041                            a[s][r][c + 2] = t[idx2 + slices];
1042                            a[s][r][c + 3] = t[idx2 + 2 * slices];
1043                        }
1044                    }
1045                }
1046            } else if (columns == 2) {
1047                for (int r = 0; r < rows; r++) {
1048                    for (int s = 0; s < slices; s++) {
1049                        t[s] = a[s][r][0];
1050                        t[slices + s] = a[s][r][1];
1051                    }
1052                    dctSlices.inverse(t, 0, scale);
1053                    dctSlices.inverse(t, slices, scale);
1054                    for (int s = 0; s < slices; s++) {
1055                        a[s][r][0] = t[s];
1056                        a[s][r][1] = t[slices + s];
1057                    }
1058                }
1059            }
1060        }
1061    }
1062
1063    private void ddxt3da_subth(final int isgn, final float[] a, final boolean scale) {
1064        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1065        int nt = 4 * rows;
1066        if (columns == 2) {
1067            nt >>= 1;
1068        }
1069        Future<?>[] futures = new Future[nthreads];
1070
1071        for (int i = 0; i < nthreads; i++) {
1072            final int n0 = i;
1073            final int startt = nt * i;
1074            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1075
1076                @Override
1077                                public void run() {
1078                    int idx0, idx1, idx2;
1079                    if (isgn == -1) {
1080                        for (int s = n0; s < slices; s += nthreads) {
1081                            idx0 = s * sliceStride;
1082                            for (int r = 0; r < rows; r++) {
1083                                dctColumns.forward(a, idx0 + r * rowStride, scale);
1084                            }
1085                            if (columns > 2) {
1086                                for (int c = 0; c < columns; c += 4) {
1087                                    for (int r = 0; r < rows; r++) {
1088                                        idx1 = idx0 + r * rowStride + c;
1089                                        idx2 = startt + rows + r;
1090                                        t[startt + r] = a[idx1];
1091                                        t[idx2] = a[idx1 + 1];
1092                                        t[idx2 + rows] = a[idx1 + 2];
1093                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1094                                    }
1095                                    dctRows.forward(t, startt, scale);
1096                                    dctRows.forward(t, startt + rows, scale);
1097                                    dctRows.forward(t, startt + 2 * rows, scale);
1098                                    dctRows.forward(t, startt + 3 * rows, scale);
1099                                    for (int r = 0; r < rows; r++) {
1100                                        idx1 = idx0 + r * rowStride + c;
1101                                        idx2 = startt + rows + r;
1102                                        a[idx1] = t[startt + r];
1103                                        a[idx1 + 1] = t[idx2];
1104                                        a[idx1 + 2] = t[idx2 + rows];
1105                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1106                                    }
1107                                }
1108                            } else if (columns == 2) {
1109                                for (int r = 0; r < rows; r++) {
1110                                    idx1 = idx0 + r * rowStride;
1111                                    t[startt + r] = a[idx1];
1112                                    t[startt + rows + r] = a[idx1 + 1];
1113                                }
1114                                dctRows.forward(t, startt, scale);
1115                                dctRows.forward(t, startt + rows, scale);
1116                                for (int r = 0; r < rows; r++) {
1117                                    idx1 = idx0 + r * rowStride;
1118                                    a[idx1] = t[startt + r];
1119                                    a[idx1 + 1] = t[startt + rows + r];
1120                                }
1121                            }
1122                        }
1123                    } else {
1124                        for (int s = n0; s < slices; s += nthreads) {
1125                            idx0 = s * sliceStride;
1126                            for (int r = 0; r < rows; r++) {
1127                                dctColumns.inverse(a, idx0 + r * rowStride, scale);
1128                            }
1129                            if (columns > 2) {
1130                                for (int c = 0; c < columns; c += 4) {
1131                                    for (int j = 0; j < rows; j++) {
1132                                        idx1 = idx0 + j * rowStride + c;
1133                                        idx2 = startt + rows + j;
1134                                        t[startt + j] = a[idx1];
1135                                        t[idx2] = a[idx1 + 1];
1136                                        t[idx2 + rows] = a[idx1 + 2];
1137                                        t[idx2 + 2 * rows] = a[idx1 + 3];
1138                                    }
1139                                    dctRows.inverse(t, startt, scale);
1140                                    dctRows.inverse(t, startt + rows, scale);
1141                                    dctRows.inverse(t, startt + 2 * rows, scale);
1142                                    dctRows.inverse(t, startt + 3 * rows, scale);
1143                                    for (int j = 0; j < rows; j++) {
1144                                        idx1 = idx0 + j * rowStride + c;
1145                                        idx2 = startt + rows + j;
1146                                        a[idx1] = t[startt + j];
1147                                        a[idx1 + 1] = t[idx2];
1148                                        a[idx1 + 2] = t[idx2 + rows];
1149                                        a[idx1 + 3] = t[idx2 + 2 * rows];
1150                                    }
1151                                }
1152                            } else if (columns == 2) {
1153                                for (int r = 0; r < rows; r++) {
1154                                    idx1 = idx0 + r * rowStride;
1155                                    t[startt + r] = a[idx1];
1156                                    t[startt + rows + r] = a[idx1 + 1];
1157                                }
1158                                dctRows.inverse(t, startt, scale);
1159                                dctRows.inverse(t, startt + rows, scale);
1160                                for (int r = 0; r < rows; r++) {
1161                                    idx1 = idx0 + r * rowStride;
1162                                    a[idx1] = t[startt + r];
1163                                    a[idx1 + 1] = t[startt + rows + r];
1164                                }
1165                            }
1166                        }
1167                    }
1168                }
1169            });
1170        }
1171        ConcurrencyUtils.waitForCompletion(futures);
1172    }
1173
1174    private void ddxt3da_subth(final int isgn, final float[][][] a, final boolean scale) {
1175        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > slices ? slices : ConcurrencyUtils.getNumberOfThreads();
1176        int nt = 4 * rows;
1177        if (columns == 2) {
1178            nt >>= 1;
1179        }
1180        Future<?>[] futures = new Future[nthreads];
1181
1182        for (int i = 0; i < nthreads; i++) {
1183            final int n0 = i;
1184            final int startt = nt * i;
1185            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1186
1187                @Override
1188                                public void run() {
1189                    int idx2;
1190                    if (isgn == -1) {
1191                        for (int s = n0; s < slices; s += nthreads) {
1192                            for (int r = 0; r < rows; r++) {
1193                                dctColumns.forward(a[s][r], scale);
1194                            }
1195                            if (columns > 2) {
1196                                for (int c = 0; c < columns; c += 4) {
1197                                    for (int r = 0; r < rows; r++) {
1198                                        idx2 = startt + rows + r;
1199                                        t[startt + r] = a[s][r][c];
1200                                        t[idx2] = a[s][r][c + 1];
1201                                        t[idx2 + rows] = a[s][r][c + 2];
1202                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1203                                    }
1204                                    dctRows.forward(t, startt, scale);
1205                                    dctRows.forward(t, startt + rows, scale);
1206                                    dctRows.forward(t, startt + 2 * rows, scale);
1207                                    dctRows.forward(t, startt + 3 * rows, scale);
1208                                    for (int r = 0; r < rows; r++) {
1209                                        idx2 = startt + rows + r;
1210                                        a[s][r][c] = t[startt + r];
1211                                        a[s][r][c + 1] = t[idx2];
1212                                        a[s][r][c + 2] = t[idx2 + rows];
1213                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1214                                    }
1215                                }
1216                            } else if (columns == 2) {
1217                                for (int r = 0; r < rows; r++) {
1218                                    t[startt + r] = a[s][r][0];
1219                                    t[startt + rows + r] = a[s][r][1];
1220                                }
1221                                dctRows.forward(t, startt, scale);
1222                                dctRows.forward(t, startt + rows, scale);
1223                                for (int r = 0; r < rows; r++) {
1224                                    a[s][r][0] = t[startt + r];
1225                                    a[s][r][1] = t[startt + rows + r];
1226                                }
1227                            }
1228                        }
1229                    } else {
1230                        for (int s = n0; s < slices; s += nthreads) {
1231                            for (int r = 0; r < rows; r++) {
1232                                dctColumns.inverse(a[s][r], scale);
1233                            }
1234                            if (columns > 2) {
1235                                for (int c = 0; c < columns; c += 4) {
1236                                    for (int r = 0; r < rows; r++) {
1237                                        idx2 = startt + rows + r;
1238                                        t[startt + r] = a[s][r][c];
1239                                        t[idx2] = a[s][r][c + 1];
1240                                        t[idx2 + rows] = a[s][r][c + 2];
1241                                        t[idx2 + 2 * rows] = a[s][r][c + 3];
1242                                    }
1243                                    dctRows.inverse(t, startt, scale);
1244                                    dctRows.inverse(t, startt + rows, scale);
1245                                    dctRows.inverse(t, startt + 2 * rows, scale);
1246                                    dctRows.inverse(t, startt + 3 * rows, scale);
1247                                    for (int r = 0; r < rows; r++) {
1248                                        idx2 = startt + rows + r;
1249                                        a[s][r][c] = t[startt + r];
1250                                        a[s][r][c + 1] = t[idx2];
1251                                        a[s][r][c + 2] = t[idx2 + rows];
1252                                        a[s][r][c + 3] = t[idx2 + 2 * rows];
1253                                    }
1254                                }
1255                            } else if (columns == 2) {
1256                                for (int r = 0; r < rows; r++) {
1257                                    t[startt + r] = a[s][r][0];
1258                                    t[startt + rows + r] = a[s][r][1];
1259                                }
1260                                dctRows.inverse(t, startt, scale);
1261                                dctRows.inverse(t, startt + rows, scale);
1262                                for (int r = 0; r < rows; r++) {
1263                                    a[s][r][0] = t[startt + r];
1264                                    a[s][r][1] = t[startt + rows + r];
1265                                }
1266                            }
1267                        }
1268                    }
1269                }
1270            });
1271        }
1272        ConcurrencyUtils.waitForCompletion(futures);
1273    }
1274
1275    private void ddxt3db_subth(final int isgn, final float[] a, final boolean scale) {
1276        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1277        int nt = 4 * slices;
1278        if (columns == 2) {
1279            nt >>= 1;
1280        }
1281        Future<?>[] futures = new Future[nthreads];
1282        for (int i = 0; i < nthreads; i++) {
1283            final int n0 = i;
1284            final int startt = nt * i;
1285            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1286
1287                @Override
1288                                public void run() {
1289                    int idx0, idx1, idx2;
1290                    if (isgn == -1) {
1291                        if (columns > 2) {
1292                            for (int r = n0; r < rows; r += nthreads) {
1293                                idx0 = r * rowStride;
1294                                for (int c = 0; c < columns; c += 4) {
1295                                    for (int s = 0; s < slices; s++) {
1296                                        idx1 = s * sliceStride + idx0 + c;
1297                                        idx2 = startt + slices + s;
1298                                        t[startt + s] = a[idx1];
1299                                        t[idx2] = a[idx1 + 1];
1300                                        t[idx2 + slices] = a[idx1 + 2];
1301                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1302                                    }
1303                                    dctSlices.forward(t, startt, scale);
1304                                    dctSlices.forward(t, startt + slices, scale);
1305                                    dctSlices.forward(t, startt + 2 * slices, scale);
1306                                    dctSlices.forward(t, startt + 3 * slices, scale);
1307                                    for (int s = 0; s < slices; s++) {
1308                                        idx1 = s * sliceStride + idx0 + c;
1309                                        idx2 = startt + slices + s;
1310                                        a[idx1] = t[startt + s];
1311                                        a[idx1 + 1] = t[idx2];
1312                                        a[idx1 + 2] = t[idx2 + slices];
1313                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1314                                    }
1315                                }
1316                            }
1317                        } else if (columns == 2) {
1318                            for (int r = n0; r < rows; r += nthreads) {
1319                                idx0 = r * rowStride;
1320                                for (int s = 0; s < slices; s++) {
1321                                    idx1 = s * sliceStride + idx0;
1322                                    t[startt + s] = a[idx1];
1323                                    t[startt + slices + s] = a[idx1 + 1];
1324                                }
1325                                dctSlices.forward(t, startt, scale);
1326                                dctSlices.forward(t, startt + slices, scale);
1327                                for (int s = 0; s < slices; s++) {
1328                                    idx1 = s * sliceStride + idx0;
1329                                    a[idx1] = t[startt + s];
1330                                    a[idx1 + 1] = t[startt + slices + s];
1331                                }
1332                            }
1333                        }
1334                    } else {
1335                        if (columns > 2) {
1336                            for (int r = n0; r < rows; r += nthreads) {
1337                                idx0 = r * rowStride;
1338                                for (int c = 0; c < columns; c += 4) {
1339                                    for (int s = 0; s < slices; s++) {
1340                                        idx1 = s * sliceStride + idx0 + c;
1341                                        idx2 = startt + slices + s;
1342                                        t[startt + s] = a[idx1];
1343                                        t[idx2] = a[idx1 + 1];
1344                                        t[idx2 + slices] = a[idx1 + 2];
1345                                        t[idx2 + 2 * slices] = a[idx1 + 3];
1346                                    }
1347                                    dctSlices.inverse(t, startt, scale);
1348                                    dctSlices.inverse(t, startt + slices, scale);
1349                                    dctSlices.inverse(t, startt + 2 * slices, scale);
1350                                    dctSlices.inverse(t, startt + 3 * slices, scale);
1351                                    for (int s = 0; s < slices; s++) {
1352                                        idx1 = s * sliceStride + idx0 + c;
1353                                        idx2 = startt + slices + s;
1354                                        a[idx1] = t[startt + s];
1355                                        a[idx1 + 1] = t[idx2];
1356                                        a[idx1 + 2] = t[idx2 + slices];
1357                                        a[idx1 + 3] = t[idx2 + 2 * slices];
1358                                    }
1359                                }
1360                            }
1361                        } else if (columns == 2) {
1362                            for (int r = n0; r < rows; r += nthreads) {
1363                                idx0 = r * rowStride;
1364                                for (int s = 0; s < slices; s++) {
1365                                    idx1 = s * sliceStride + idx0;
1366                                    t[startt + s] = a[idx1];
1367                                    t[startt + slices + s] = a[idx1 + 1];
1368                                }
1369                                dctSlices.inverse(t, startt, scale);
1370                                dctSlices.inverse(t, startt + slices, scale);
1371
1372                                for (int s = 0; s < slices; s++) {
1373                                    idx1 = s * sliceStride + idx0;
1374                                    a[idx1] = t[startt + s];
1375                                    a[idx1 + 1] = t[startt + slices + s];
1376                                }
1377                            }
1378                        }
1379                    }
1380                }
1381            });
1382        }
1383        ConcurrencyUtils.waitForCompletion(futures);
1384    }
1385
1386    private void ddxt3db_subth(final int isgn, final float[][][] a, final boolean scale) {
1387        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
1388        int nt = 4 * slices;
1389        if (columns == 2) {
1390            nt >>= 1;
1391        }
1392        Future<?>[] futures = new Future[nthreads];
1393
1394        for (int i = 0; i < nthreads; i++) {
1395            final int n0 = i;
1396            final int startt = nt * i;
1397            futures[i] = ConcurrencyUtils.submit(new Runnable() {
1398
1399                @Override
1400                                public void run() {
1401                    int idx2;
1402                    if (isgn == -1) {
1403                        if (columns > 2) {
1404                            for (int r = n0; r < rows; r += nthreads) {
1405                                for (int c = 0; c < columns; c += 4) {
1406                                    for (int s = 0; s < slices; s++) {
1407                                        idx2 = startt + slices + s;
1408                                        t[startt + s] = a[s][r][c];
1409                                        t[idx2] = a[s][r][c + 1];
1410                                        t[idx2 + slices] = a[s][r][c + 2];
1411                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1412                                    }
1413                                    dctSlices.forward(t, startt, scale);
1414                                    dctSlices.forward(t, startt + slices, scale);
1415                                    dctSlices.forward(t, startt + 2 * slices, scale);
1416                                    dctSlices.forward(t, startt + 3 * slices, scale);
1417                                    for (int s = 0; s < slices; s++) {
1418                                        idx2 = startt + slices + s;
1419                                        a[s][r][c] = t[startt + s];
1420                                        a[s][r][c + 1] = t[idx2];
1421                                        a[s][r][c + 2] = t[idx2 + slices];
1422                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1423                                    }
1424                                }
1425                            }
1426                        } else if (columns == 2) {
1427                            for (int r = n0; r < rows; r += nthreads) {
1428                                for (int s = 0; s < slices; s++) {
1429                                    t[startt + s] = a[s][r][0];
1430                                    t[startt + slices + s] = a[s][r][1];
1431                                }
1432                                dctSlices.forward(t, startt, scale);
1433                                dctSlices.forward(t, startt + slices, scale);
1434                                for (int s = 0; s < slices; s++) {
1435                                    a[s][r][0] = t[startt + s];
1436                                    a[s][r][1] = t[startt + slices + s];
1437                                }
1438                            }
1439                        }
1440                    } else {
1441                        if (columns > 2) {
1442                            for (int r = n0; r < rows; r += nthreads) {
1443                                for (int c = 0; c < columns; c += 4) {
1444                                    for (int s = 0; s < slices; s++) {
1445                                        idx2 = startt + slices + s;
1446                                        t[startt + s] = a[s][r][c];
1447                                        t[idx2] = a[s][r][c + 1];
1448                                        t[idx2 + slices] = a[s][r][c + 2];
1449                                        t[idx2 + 2 * slices] = a[s][r][c + 3];
1450                                    }
1451                                    dctSlices.inverse(t, startt, scale);
1452                                    dctSlices.inverse(t, startt + slices, scale);
1453                                    dctSlices.inverse(t, startt + 2 * slices, scale);
1454                                    dctSlices.inverse(t, startt + 3 * slices, scale);
1455                                    for (int s = 0; s < slices; s++) {
1456                                        idx2 = startt + slices + s;
1457                                        a[s][r][c] = t[startt + s];
1458                                        a[s][r][c + 1] = t[idx2];
1459                                        a[s][r][c + 2] = t[idx2 + slices];
1460                                        a[s][r][c + 3] = t[idx2 + 2 * slices];
1461                                    }
1462                                }
1463                            }
1464                        } else if (columns == 2) {
1465                            for (int r = n0; r < rows; r += nthreads) {
1466                                for (int s = 0; s < slices; s++) {
1467                                    t[startt + s] = a[s][r][0];
1468                                    t[startt + slices + s] = a[s][r][1];
1469                                }
1470                                dctSlices.inverse(t, startt, scale);
1471                                dctSlices.inverse(t, startt + slices, scale);
1472
1473                                for (int s = 0; s < slices; s++) {
1474                                    a[s][r][0] = t[startt + s];
1475                                    a[s][r][1] = t[startt + slices + s];
1476                                }
1477                            }
1478                        }
1479                    }
1480                }
1481            });
1482        }
1483        ConcurrencyUtils.waitForCompletion(futures);
1484    }
1485}