001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dct;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Cosine Transform (DCT) of single precision data. The
043 * sizes of both dimensions can be arbitrary numbers. This is a parallel
044 * implementation of split-radix and mixed-radix algorithms optimized for SMP
045 * systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public class FloatDCT_2D {
054
055    private int rows;
056
057    private int columns;
058
059    private float[] t;
060
061    private FloatDCT_1D dctColumns, dctRows;
062
063    private int nt;
064
065    private int oldNthreads;
066
067    private boolean isPowerOfTwo = false;
068
069    private boolean useThreads = false;
070
071    /**
072     * Creates new instance of FloatDCT_2D.
073     * 
074     * @param rows
075     *            number of rows
076     * @param columns
077     *            number of columns
078     */
079    public FloatDCT_2D(int rows, int columns) {
080        if (rows <= 1 || columns <= 1) {
081            throw new IllegalArgumentException("rows and columns must be greater than 1");
082        }
083        this.rows = rows;
084        this.columns = columns;
085        if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) {
086            this.useThreads = true;
087        }
088        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
089            isPowerOfTwo = true;
090            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
091            nt = 4 * oldNthreads * rows;
092            if (columns == 2 * oldNthreads) {
093                nt >>= 1;
094            } else if (columns < 2 * oldNthreads) {
095                nt >>= 2;
096            }
097            t = new float[nt];
098        }
099        dctColumns = new FloatDCT_1D(columns);
100        if (columns == rows) {
101            dctRows = dctColumns;
102        } else {
103            dctRows = new FloatDCT_1D(rows);
104        }
105    }
106
107    /**
108     * Computes 2D forward DCT (DCT-II) leaving the result in <code>a</code>.
109     * The data is stored in 1D array in row-major order.
110     * 
111     * @param a
112     *            data to transform
113     * @param scale
114     *            if true then scaling is performed
115     */
116    public void forward(final float[] a, final boolean scale) {
117        int nthreads = ConcurrencyUtils.getNumberOfThreads();
118        if (isPowerOfTwo) {
119            if (nthreads != oldNthreads) {
120                nt = 4 * nthreads * rows;
121                if (columns == 2 * nthreads) {
122                    nt >>= 1;
123                } else if (columns < 2 * nthreads) {
124                    nt >>= 2;
125                }
126                t = new float[nt];
127                oldNthreads = nthreads;
128            }
129            if ((nthreads > 1) && useThreads) {
130                ddxt2d_subth(-1, a, scale);
131                ddxt2d0_subth(-1, a, scale);
132            } else {
133                ddxt2d_sub(-1, a, scale);
134                for (int i = 0; i < rows; i++) {
135                    dctColumns.forward(a, i * columns, scale);
136                }
137            }
138        } else {
139            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
140                Future<?>[] futures = new Future[nthreads];
141                int p = rows / nthreads;
142                for (int l = 0; l < nthreads; l++) {
143                    final int firstRow = l * p;
144                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
145                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
146                        @Override
147                                                public void run() {
148                            for (int r = firstRow; r < lastRow; r++) {
149                                dctColumns.forward(a, r * columns, scale);
150                            }
151                        }
152                    });
153                }
154                ConcurrencyUtils.waitForCompletion(futures);
155                p = columns / nthreads;
156                for (int l = 0; l < nthreads; l++) {
157                    final int firstColumn = l * p;
158                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
159                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
160                        @Override
161                                                public void run() {
162                            float[] temp = new float[rows];
163                            for (int c = firstColumn; c < lastColumn; c++) {
164                                for (int r = 0; r < rows; r++) {
165                                    temp[r] = a[r * columns + c];
166                                }
167                                dctRows.forward(temp, scale);
168                                for (int r = 0; r < rows; r++) {
169                                    a[r * columns + c] = temp[r];
170                                }
171                            }
172                        }
173                    });
174                }
175                ConcurrencyUtils.waitForCompletion(futures);
176            } else {
177                for (int i = 0; i < rows; i++) {
178                    dctColumns.forward(a, i * columns, scale);
179                }
180                float[] temp = new float[rows];
181                for (int c = 0; c < columns; c++) {
182                    for (int r = 0; r < rows; r++) {
183                        temp[r] = a[r * columns + c];
184                    }
185                    dctRows.forward(temp, scale);
186                    for (int r = 0; r < rows; r++) {
187                        a[r * columns + c] = temp[r];
188                    }
189                }
190            }
191        }
192    }
193
194    /**
195     * Computes 2D forward DCT (DCT-II) leaving the result in <code>a</code>.
196     * The data is stored in 2D array.
197     * 
198     * @param a
199     *            data to transform
200     * @param scale
201     *            if true then scaling is performed
202     */
203    public void forward(final float[][] a, final boolean scale) {
204        int nthreads = ConcurrencyUtils.getNumberOfThreads();
205        if (isPowerOfTwo) {
206            if (nthreads != oldNthreads) {
207                nt = 4 * nthreads * rows;
208                if (columns == 2 * nthreads) {
209                    nt >>= 1;
210                } else if (columns < 2 * nthreads) {
211                    nt >>= 2;
212                }
213                t = new float[nt];
214                oldNthreads = nthreads;
215            }
216            if ((nthreads > 1) && useThreads) {
217                ddxt2d_subth(-1, a, scale);
218                ddxt2d0_subth(-1, a, scale);
219            } else {
220                ddxt2d_sub(-1, a, scale);
221                for (int i = 0; i < rows; i++) {
222                    dctColumns.forward(a[i], scale);
223                }
224            }
225        } else {
226            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
227                Future<?>[] futures = new Future[nthreads];
228                int p = rows / nthreads;
229                for (int l = 0; l < nthreads; l++) {
230                    final int firstRow = l * p;
231                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
232                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
233                        @Override
234                                                public void run() {
235                            for (int i = firstRow; i < lastRow; i++) {
236                                dctColumns.forward(a[i], scale);
237                            }
238                        }
239                    });
240                }
241                ConcurrencyUtils.waitForCompletion(futures);
242                p = columns / nthreads;
243                for (int l = 0; l < nthreads; l++) {
244                    final int firstColumn = l * p;
245                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
246                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
247                        @Override
248                                                public void run() {
249                            float[] temp = new float[rows];
250                            for (int c = firstColumn; c < lastColumn; c++) {
251                                for (int r = 0; r < rows; r++) {
252                                    temp[r] = a[r][c];
253                                }
254                                dctRows.forward(temp, scale);
255                                for (int r = 0; r < rows; r++) {
256                                    a[r][c] = temp[r];
257                                }
258                            }
259                        }
260                    });
261                }
262                ConcurrencyUtils.waitForCompletion(futures);
263            } else {
264                for (int i = 0; i < rows; i++) {
265                    dctColumns.forward(a[i], scale);
266                }
267                float[] temp = new float[rows];
268                for (int c = 0; c < columns; c++) {
269                    for (int r = 0; r < rows; r++) {
270                        temp[r] = a[r][c];
271                    }
272                    dctRows.forward(temp, scale);
273                    for (int r = 0; r < rows; r++) {
274                        a[r][c] = temp[r];
275                    }
276                }
277            }
278        }
279    }
280
281    /**
282     * Computes 2D inverse DCT (DCT-III) leaving the result in <code>a</code>.
283     * The data is stored in 1D array in row-major order.
284     * 
285     * @param a
286     *            data to transform
287     * @param scale
288     *            if true then scaling is performed
289     */
290    public void inverse(final float[] a, final boolean scale) {
291        int nthreads = ConcurrencyUtils.getNumberOfThreads();
292        if (isPowerOfTwo) {
293            if (nthreads != oldNthreads) {
294                nt = 4 * nthreads * rows;
295                if (columns == 2 * nthreads) {
296                    nt >>= 1;
297                } else if (columns < 2 * nthreads) {
298                    nt >>= 2;
299                }
300                t = new float[nt];
301                oldNthreads = nthreads;
302            }
303            if ((nthreads > 1) && useThreads) {
304                ddxt2d_subth(1, a, scale);
305                ddxt2d0_subth(1, a, scale);
306            } else {
307                ddxt2d_sub(1, a, scale);
308                for (int i = 0; i < rows; i++) {
309                    dctColumns.inverse(a, i * columns, scale);
310                }
311            }
312        } else {
313            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
314                Future<?>[] futures = new Future[nthreads];
315                int p = rows / nthreads;
316                for (int l = 0; l < nthreads; l++) {
317                    final int firstRow = l * p;
318                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
319                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
320                        @Override
321                                                public void run() {
322                            for (int i = firstRow; i < lastRow; i++) {
323                                dctColumns.inverse(a, i * columns, scale);
324                            }
325                        }
326                    });
327                }
328                ConcurrencyUtils.waitForCompletion(futures);
329                p = columns / nthreads;
330                for (int l = 0; l < nthreads; l++) {
331                    final int firstColumn = l * p;
332                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
333                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
334                        @Override
335                                                public void run() {
336                            float[] temp = new float[rows];
337                            for (int c = firstColumn; c < lastColumn; c++) {
338                                for (int r = 0; r < rows; r++) {
339                                    temp[r] = a[r * columns + c];
340                                }
341                                dctRows.inverse(temp, scale);
342                                for (int r = 0; r < rows; r++) {
343                                    a[r * columns + c] = temp[r];
344                                }
345                            }
346                        }
347                    });
348                }
349                ConcurrencyUtils.waitForCompletion(futures);
350            } else {
351                for (int i = 0; i < rows; i++) {
352                    dctColumns.inverse(a, i * columns, scale);
353                }
354                float[] temp = new float[rows];
355                for (int c = 0; c < columns; c++) {
356                    for (int r = 0; r < rows; r++) {
357                        temp[r] = a[r * columns + c];
358                    }
359                    dctRows.inverse(temp, scale);
360                    for (int r = 0; r < rows; r++) {
361                        a[r * columns + c] = temp[r];
362                    }
363                }
364            }
365        }
366    }
367
368    /**
369     * Computes 2D inverse DCT (DCT-III) leaving the result in <code>a</code>.
370     * The data is stored in 2D array.
371     * 
372     * @param a
373     *            data to transform
374     * @param scale
375     *            if true then scaling is performed
376     */
377    public void inverse(final float[][] a, final boolean scale) {
378        int nthreads = ConcurrencyUtils.getNumberOfThreads();
379        if (isPowerOfTwo) {
380            if (nthreads != oldNthreads) {
381                nt = 4 * nthreads * rows;
382                if (columns == 2 * nthreads) {
383                    nt >>= 1;
384                } else if (columns < 2 * nthreads) {
385                    nt >>= 2;
386                }
387                t = new float[nt];
388                oldNthreads = nthreads;
389            }
390            if ((nthreads > 1) && useThreads) {
391                ddxt2d_subth(1, a, scale);
392                ddxt2d0_subth(1, a, scale);
393            } else {
394                ddxt2d_sub(1, a, scale);
395                for (int i = 0; i < rows; i++) {
396                    dctColumns.inverse(a[i], scale);
397                }
398            }
399        } else {
400            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
401                Future<?>[] futures = new Future[nthreads];
402                int p = rows / nthreads;
403                for (int l = 0; l < nthreads; l++) {
404                    final int firstRow = l * p;
405                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
406                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
407                        @Override
408                                                public void run() {
409                            for (int i = firstRow; i < lastRow; i++) {
410                                dctColumns.inverse(a[i], scale);
411                            }
412                        }
413                    });
414                }
415                ConcurrencyUtils.waitForCompletion(futures);
416                p = columns / nthreads;
417                for (int l = 0; l < nthreads; l++) {
418                    final int firstColumn = l * p;
419                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
420                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
421                        @Override
422                                                public void run() {
423                            float[] temp = new float[rows];
424                            for (int c = firstColumn; c < lastColumn; c++) {
425                                for (int r = 0; r < rows; r++) {
426                                    temp[r] = a[r][c];
427                                }
428                                dctRows.inverse(temp, scale);
429                                for (int r = 0; r < rows; r++) {
430                                    a[r][c] = temp[r];
431                                }
432                            }
433                        }
434                    });
435                }
436                ConcurrencyUtils.waitForCompletion(futures);
437            } else {
438                for (int r = 0; r < rows; r++) {
439                    dctColumns.inverse(a[r], scale);
440                }
441                float[] temp = new float[rows];
442                for (int c = 0; c < columns; c++) {
443                    for (int r = 0; r < rows; r++) {
444                        temp[r] = a[r][c];
445                    }
446                    dctRows.inverse(temp, scale);
447                    for (int r = 0; r < rows; r++) {
448                        a[r][c] = temp[r];
449                    }
450                }
451            }
452        }
453    }
454
455    private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) {
456        int nthread = ConcurrencyUtils.getNumberOfThreads();
457        int nt = 4 * rows;
458        if (columns == 2 * nthread) {
459            nt >>= 1;
460        } else if (columns < 2 * nthread) {
461            nthread = columns;
462            nt >>= 2;
463        }
464        final int nthreads = nthread;
465        Future<?>[] futures = new Future[nthread];
466
467        for (int i = 0; i < nthread; i++) {
468            final int n0 = i;
469            final int startt = nt * i;
470            futures[i] = ConcurrencyUtils.submit(new Runnable() {
471                @Override
472                                public void run() {
473                    int idx1, idx2;
474                    if (columns > 2 * nthreads) {
475                        if (isgn == -1) {
476                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
477                                for (int r = 0; r < rows; r++) {
478                                    idx1 = r * columns + c;
479                                    idx2 = startt + rows + r;
480                                    t[startt + r] = a[idx1];
481                                    t[idx2] = a[idx1 + 1];
482                                    t[idx2 + rows] = a[idx1 + 2];
483                                    t[idx2 + 2 * rows] = a[idx1 + 3];
484                                }
485                                dctRows.forward(t, startt, scale);
486                                dctRows.forward(t, startt + rows, scale);
487                                dctRows.forward(t, startt + 2 * rows, scale);
488                                dctRows.forward(t, startt + 3 * rows, scale);
489                                for (int r = 0; r < rows; r++) {
490                                    idx1 = r * columns + c;
491                                    idx2 = startt + rows + r;
492                                    a[idx1] = t[startt + r];
493                                    a[idx1 + 1] = t[idx2];
494                                    a[idx1 + 2] = t[idx2 + rows];
495                                    a[idx1 + 3] = t[idx2 + 2 * rows];
496                                }
497                            }
498                        } else {
499                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
500                                for (int r = 0; r < rows; r++) {
501                                    idx1 = r * columns + c;
502                                    idx2 = startt + rows + r;
503                                    t[startt + r] = a[idx1];
504                                    t[idx2] = a[idx1 + 1];
505                                    t[idx2 + rows] = a[idx1 + 2];
506                                    t[idx2 + 2 * rows] = a[idx1 + 3];
507                                }
508                                dctRows.inverse(t, startt, scale);
509                                dctRows.inverse(t, startt + rows, scale);
510                                dctRows.inverse(t, startt + 2 * rows, scale);
511                                dctRows.inverse(t, startt + 3 * rows, scale);
512                                for (int r = 0; r < rows; r++) {
513                                    idx1 = r * columns + c;
514                                    idx2 = startt + rows + r;
515                                    a[idx1] = t[startt + r];
516                                    a[idx1 + 1] = t[idx2];
517                                    a[idx1 + 2] = t[idx2 + rows];
518                                    a[idx1 + 3] = t[idx2 + 2 * rows];
519                                }
520                            }
521                        }
522                    } else if (columns == 2 * nthreads) {
523                        for (int r = 0; r < rows; r++) {
524                            idx1 = r * columns + 2 * n0;
525                            idx2 = startt + r;
526                            t[idx2] = a[idx1];
527                            t[idx2 + rows] = a[idx1 + 1];
528                        }
529                        if (isgn == -1) {
530                            dctRows.forward(t, startt, scale);
531                            dctRows.forward(t, startt + rows, scale);
532                        } else {
533                            dctRows.inverse(t, startt, scale);
534                            dctRows.inverse(t, startt + rows, scale);
535                        }
536                        for (int r = 0; r < rows; r++) {
537                            idx1 = r * columns + 2 * n0;
538                            idx2 = startt + r;
539                            a[idx1] = t[idx2];
540                            a[idx1 + 1] = t[idx2 + rows];
541                        }
542                    } else if (columns == nthreads) {
543                        for (int r = 0; r < rows; r++) {
544                            t[startt + r] = a[r * columns + n0];
545                        }
546                        if (isgn == -1) {
547                            dctRows.forward(t, startt, scale);
548                        } else {
549                            dctRows.inverse(t, startt, scale);
550                        }
551                        for (int r = 0; r < rows; r++) {
552                            a[r * columns + n0] = t[startt + r];
553                        }
554                    }
555                }
556            });
557        }
558        ConcurrencyUtils.waitForCompletion(futures);
559    }
560
561    private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) {
562        int nthread = ConcurrencyUtils.getNumberOfThreads();
563        int nt = 4 * rows;
564        if (columns == 2 * nthread) {
565            nt >>= 1;
566        } else if (columns < 2 * nthread) {
567            nthread = columns;
568            nt >>= 2;
569        }
570        final int nthreads = nthread;
571        Future<?>[] futures = new Future[nthread];
572
573        for (int i = 0; i < nthread; i++) {
574            final int n0 = i;
575            final int startt = nt * i;
576            futures[i] = ConcurrencyUtils.submit(new Runnable() {
577                @Override
578                                public void run() {
579                    int idx2;
580                    if (columns > 2 * nthreads) {
581                        if (isgn == -1) {
582                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
583                                for (int r = 0; r < rows; r++) {
584                                    idx2 = startt + rows + r;
585                                    t[startt + r] = a[r][c];
586                                    t[idx2] = a[r][c + 1];
587                                    t[idx2 + rows] = a[r][c + 2];
588                                    t[idx2 + 2 * rows] = a[r][c + 3];
589                                }
590                                dctRows.forward(t, startt, scale);
591                                dctRows.forward(t, startt + rows, scale);
592                                dctRows.forward(t, startt + 2 * rows, scale);
593                                dctRows.forward(t, startt + 3 * rows, scale);
594                                for (int r = 0; r < rows; r++) {
595                                    idx2 = startt + rows + r;
596                                    a[r][c] = t[startt + r];
597                                    a[r][c + 1] = t[idx2];
598                                    a[r][c + 2] = t[idx2 + rows];
599                                    a[r][c + 3] = t[idx2 + 2 * rows];
600                                }
601                            }
602                        } else {
603                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
604                                for (int r = 0; r < rows; r++) {
605                                    idx2 = startt + rows + r;
606                                    t[startt + r] = a[r][c];
607                                    t[idx2] = a[r][c + 1];
608                                    t[idx2 + rows] = a[r][c + 2];
609                                    t[idx2 + 2 * rows] = a[r][c + 3];
610                                }
611                                dctRows.inverse(t, startt, scale);
612                                dctRows.inverse(t, startt + rows, scale);
613                                dctRows.inverse(t, startt + 2 * rows, scale);
614                                dctRows.inverse(t, startt + 3 * rows, scale);
615                                for (int r = 0; r < rows; r++) {
616                                    idx2 = startt + rows + r;
617                                    a[r][c] = t[startt + r];
618                                    a[r][c + 1] = t[idx2];
619                                    a[r][c + 2] = t[idx2 + rows];
620                                    a[r][c + 3] = t[idx2 + 2 * rows];
621                                }
622                            }
623                        }
624                    } else if (columns == 2 * nthreads) {
625                        for (int r = 0; r < rows; r++) {
626                            idx2 = startt + r;
627                            t[idx2] = a[r][2 * n0];
628                            t[idx2 + rows] = a[r][2 * n0 + 1];
629                        }
630                        if (isgn == -1) {
631                            dctRows.forward(t, startt, scale);
632                            dctRows.forward(t, startt + rows, scale);
633                        } else {
634                            dctRows.inverse(t, startt, scale);
635                            dctRows.inverse(t, startt + rows, scale);
636                        }
637                        for (int r = 0; r < rows; r++) {
638                            idx2 = startt + r;
639                            a[r][2 * n0] = t[idx2];
640                            a[r][2 * n0 + 1] = t[idx2 + rows];
641                        }
642                    } else if (columns == nthreads) {
643                        for (int r = 0; r < rows; r++) {
644                            t[startt + r] = a[r][n0];
645                        }
646                        if (isgn == -1) {
647                            dctRows.forward(t, startt, scale);
648                        } else {
649                            dctRows.inverse(t, startt, scale);
650                        }
651                        for (int r = 0; r < rows; r++) {
652                            a[r][n0] = t[startt + r];
653                        }
654                    }
655                }
656            });
657        }
658        ConcurrencyUtils.waitForCompletion(futures);
659    }
660
661    private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) {
662        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
663
664        Future<?>[] futures = new Future[nthreads];
665
666        for (int i = 0; i < nthreads; i++) {
667            final int n0 = i;
668            futures[i] = ConcurrencyUtils.submit(new Runnable() {
669
670                @Override
671                                public void run() {
672                    if (isgn == -1) {
673                        for (int r = n0; r < rows; r += nthreads) {
674                            dctColumns.forward(a, r * columns, scale);
675                        }
676                    } else {
677                        for (int r = n0; r < rows; r += nthreads) {
678                            dctColumns.inverse(a, r * columns, scale);
679                        }
680                    }
681                }
682            });
683        }
684        ConcurrencyUtils.waitForCompletion(futures);
685    }
686
687    private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) {
688        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
689
690        Future<?>[] futures = new Future[nthreads];
691
692        for (int i = 0; i < nthreads; i++) {
693            final int n0 = i;
694            futures[i] = ConcurrencyUtils.submit(new Runnable() {
695
696                @Override
697                                public void run() {
698                    if (isgn == -1) {
699                        for (int r = n0; r < rows; r += nthreads) {
700                            dctColumns.forward(a[r], scale);
701                        }
702                    } else {
703                        for (int r = n0; r < rows; r += nthreads) {
704                            dctColumns.inverse(a[r], scale);
705                        }
706                    }
707                }
708            });
709        }
710        ConcurrencyUtils.waitForCompletion(futures);
711    }
712
713    private void ddxt2d_sub(int isgn, float[] a, boolean scale) {
714        int idx1, idx2;
715
716        if (columns > 2) {
717            if (isgn == -1) {
718                for (int c = 0; c < columns; c += 4) {
719                    for (int r = 0; r < rows; r++) {
720                        idx1 = r * columns + c;
721                        idx2 = rows + r;
722                        t[r] = a[idx1];
723                        t[idx2] = a[idx1 + 1];
724                        t[idx2 + rows] = a[idx1 + 2];
725                        t[idx2 + 2 * rows] = a[idx1 + 3];
726                    }
727                    dctRows.forward(t, 0, scale);
728                    dctRows.forward(t, rows, scale);
729                    dctRows.forward(t, 2 * rows, scale);
730                    dctRows.forward(t, 3 * rows, scale);
731                    for (int r = 0; r < rows; r++) {
732                        idx1 = r * columns + c;
733                        idx2 = rows + r;
734                        a[idx1] = t[r];
735                        a[idx1 + 1] = t[idx2];
736                        a[idx1 + 2] = t[idx2 + rows];
737                        a[idx1 + 3] = t[idx2 + 2 * rows];
738                    }
739                }
740            } else {
741                for (int c = 0; c < columns; c += 4) {
742                    for (int r = 0; r < rows; r++) {
743                        idx1 = r * columns + c;
744                        idx2 = rows + r;
745                        t[r] = a[idx1];
746                        t[idx2] = a[idx1 + 1];
747                        t[idx2 + rows] = a[idx1 + 2];
748                        t[idx2 + 2 * rows] = a[idx1 + 3];
749                    }
750                    dctRows.inverse(t, 0, scale);
751                    dctRows.inverse(t, rows, scale);
752                    dctRows.inverse(t, 2 * rows, scale);
753                    dctRows.inverse(t, 3 * rows, scale);
754                    for (int r = 0; r < rows; r++) {
755                        idx1 = r * columns + c;
756                        idx2 = rows + r;
757                        a[idx1] = t[r];
758                        a[idx1 + 1] = t[idx2];
759                        a[idx1 + 2] = t[idx2 + rows];
760                        a[idx1 + 3] = t[idx2 + 2 * rows];
761                    }
762                }
763            }
764        } else if (columns == 2) {
765            for (int r = 0; r < rows; r++) {
766                idx1 = r * columns;
767                t[r] = a[idx1];
768                t[rows + r] = a[idx1 + 1];
769            }
770            if (isgn == -1) {
771                dctRows.forward(t, 0, scale);
772                dctRows.forward(t, rows, scale);
773            } else {
774                dctRows.inverse(t, 0, scale);
775                dctRows.inverse(t, rows, scale);
776            }
777            for (int r = 0; r < rows; r++) {
778                idx1 = r * columns;
779                a[idx1] = t[r];
780                a[idx1 + 1] = t[rows + r];
781            }
782        }
783    }
784
785    private void ddxt2d_sub(int isgn, float[][] a, boolean scale) {
786        int idx2;
787
788        if (columns > 2) {
789            if (isgn == -1) {
790                for (int c = 0; c < columns; c += 4) {
791                    for (int r = 0; r < rows; r++) {
792                        idx2 = rows + r;
793                        t[r] = a[r][c];
794                        t[idx2] = a[r][c + 1];
795                        t[idx2 + rows] = a[r][c + 2];
796                        t[idx2 + 2 * rows] = a[r][c + 3];
797                    }
798                    dctRows.forward(t, 0, scale);
799                    dctRows.forward(t, rows, scale);
800                    dctRows.forward(t, 2 * rows, scale);
801                    dctRows.forward(t, 3 * rows, scale);
802                    for (int r = 0; r < rows; r++) {
803                        idx2 = rows + r;
804                        a[r][c] = t[r];
805                        a[r][c + 1] = t[idx2];
806                        a[r][c + 2] = t[idx2 + rows];
807                        a[r][c + 3] = t[idx2 + 2 * rows];
808                    }
809                }
810            } else {
811                for (int c = 0; c < columns; c += 4) {
812                    for (int r = 0; r < rows; r++) {
813                        idx2 = rows + r;
814                        t[r] = a[r][c];
815                        t[idx2] = a[r][c + 1];
816                        t[idx2 + rows] = a[r][c + 2];
817                        t[idx2 + 2 * rows] = a[r][c + 3];
818                    }
819                    dctRows.inverse(t, 0, scale);
820                    dctRows.inverse(t, rows, scale);
821                    dctRows.inverse(t, 2 * rows, scale);
822                    dctRows.inverse(t, 3 * rows, scale);
823                    for (int r = 0; r < rows; r++) {
824                        idx2 = rows + r;
825                        a[r][c] = t[r];
826                        a[r][c + 1] = t[idx2];
827                        a[r][c + 2] = t[idx2 + rows];
828                        a[r][c + 3] = t[idx2 + 2 * rows];
829                    }
830                }
831            }
832        } else if (columns == 2) {
833            for (int r = 0; r < rows; r++) {
834                t[r] = a[r][0];
835                t[rows + r] = a[r][1];
836            }
837            if (isgn == -1) {
838                dctRows.forward(t, 0, scale);
839                dctRows.forward(t, rows, scale);
840            } else {
841                dctRows.inverse(t, 0, scale);
842                dctRows.inverse(t, rows, scale);
843            }
844            for (int r = 0; r < rows; r++) {
845                a[r][0] = t[r];
846                a[r][1] = t[rows + r];
847            }
848        }
849    }
850}