001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dst;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Sine Transform (DST) of single precision data. The sizes
043 * of both dimensions can be arbitrary numbers. This is a parallel
044 * implementation optimized for SMP systems.<br>
045 * <br>
046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
048 * 
049 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
050 * 
051 */
052public class FloatDST_2D {
053
054    private int rows;
055
056    private int columns;
057
058    private float[] t;
059
060    private FloatDST_1D dstColumns, dstRows;
061
062    private int oldNthreads;
063
064    private int nt;
065
066    private boolean isPowerOfTwo = false;
067
068    private boolean useThreads = false;
069
070    /**
071     * Creates new instance of FloatDST_2D.
072     * 
073     * @param rows
074     *            number of rows
075     * @param columns
076     *            number of columns
077     */
078    public FloatDST_2D(int rows, int columns) {
079        if (rows <= 1 || columns <= 1) {
080            throw new IllegalArgumentException("rows and columns must be greater than 1");
081        }
082        this.rows = rows;
083        this.columns = columns;
084        if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) {
085            useThreads = true;
086        }
087        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
088            isPowerOfTwo = true;
089            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
090            nt = 4 * oldNthreads * rows;
091            if (columns == 2 * oldNthreads) {
092                nt >>= 1;
093            } else if (columns < 2 * oldNthreads) {
094                nt >>= 2;
095            }
096            t = new float[nt];
097        }
098        dstColumns = new FloatDST_1D(columns);
099        if (columns == rows) {
100            dstRows = dstColumns;
101        } else {
102            dstRows = new FloatDST_1D(rows);
103        }
104    }
105
106    /**
107     * Computes 2D forward DST (DST-II) leaving the result in <code>a</code>.
108     * The data is stored in 1D array in row-major order.
109     * 
110     * @param a
111     *            data to transform
112     * @param scale
113     *            if true then scaling is performed
114     */
115    public void forward(final float[] a, final boolean scale) {
116        int nthreads = ConcurrencyUtils.getNumberOfThreads();
117        if (isPowerOfTwo) {
118            if (nthreads != oldNthreads) {
119                nt = 4 * nthreads * rows;
120                if (columns == 2 * nthreads) {
121                    nt >>= 1;
122                } else if (columns < 2 * nthreads) {
123                    nt >>= 2;
124                }
125                t = new float[nt];
126                oldNthreads = nthreads;
127            }
128            if ((nthreads > 1) && useThreads) {
129                ddxt2d_subth(-1, a, scale);
130                ddxt2d0_subth(-1, a, scale);
131            } else {
132                ddxt2d_sub(-1, a, scale);
133                for (int i = 0; i < rows; i++) {
134                    dstColumns.forward(a, i * columns, scale);
135                }
136            }
137        } else {
138            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
139                Future<?>[] futures = new Future[nthreads];
140                int p = rows / nthreads;
141                for (int l = 0; l < nthreads; l++) {
142                    final int firstRow = l * p;
143                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
144                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
145                        @Override
146                                                public void run() {
147                            for (int i = firstRow; i < lastRow; i++) {
148                                dstColumns.forward(a, i * columns, scale);
149                            }
150                        }
151                    });
152                }
153                ConcurrencyUtils.waitForCompletion(futures);
154                p = columns / nthreads;
155                for (int l = 0; l < nthreads; l++) {
156                    final int firstColumn = l * p;
157                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
158                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
159                        @Override
160                                                public void run() {
161                            float[] temp = new float[rows];
162                            for (int c = firstColumn; c < lastColumn; c++) {
163                                for (int r = 0; r < rows; r++) {
164                                    temp[r] = a[r * columns + c];
165                                }
166                                dstRows.forward(temp, scale);
167                                for (int r = 0; r < rows; r++) {
168                                    a[r * columns + c] = temp[r];
169                                }
170                            }
171                        }
172                    });
173                }
174                ConcurrencyUtils.waitForCompletion(futures);
175            } else {
176                for (int i = 0; i < rows; i++) {
177                    dstColumns.forward(a, i * columns, scale);
178                }
179                float[] temp = new float[rows];
180                for (int c = 0; c < columns; c++) {
181                    for (int r = 0; r < rows; r++) {
182                        temp[r] = a[r * columns + c];
183                    }
184                    dstRows.forward(temp, scale);
185                    for (int r = 0; r < rows; r++) {
186                        a[r * columns + c] = temp[r];
187                    }
188                }
189            }
190        }
191    }
192
193    /**
194     * Computes 2D forward DST (DST-II) leaving the result in <code>a</code>.
195     * The data is stored in 2D array.
196     * 
197     * @param a
198     *            data to transform
199     * @param scale
200     *            if true then scaling is performed
201     */
202    public void forward(final float[][] a, final boolean scale) {
203        int nthreads = ConcurrencyUtils.getNumberOfThreads();
204        if (isPowerOfTwo) {
205            if (nthreads != oldNthreads) {
206                nt = 4 * nthreads * rows;
207                if (columns == 2 * nthreads) {
208                    nt >>= 1;
209                } else if (columns < 2 * nthreads) {
210                    nt >>= 2;
211                }
212                t = new float[nt];
213                oldNthreads = nthreads;
214            }
215            if ((nthreads > 1) && useThreads) {
216                ddxt2d_subth(-1, a, scale);
217                ddxt2d0_subth(-1, a, scale);
218            } else {
219                ddxt2d_sub(-1, a, scale);
220                for (int i = 0; i < rows; i++) {
221                    dstColumns.forward(a[i], scale);
222                }
223            }
224        } else {
225            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
226                Future<?>[] futures = new Future[nthreads];
227                int p = rows / nthreads;
228                for (int l = 0; l < nthreads; l++) {
229                    final int firstRow = l * p;
230                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
231                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
232                        @Override
233                                                public void run() {
234                            for (int i = firstRow; i < lastRow; i++) {
235                                dstColumns.forward(a[i], scale);
236                            }
237                        }
238                    });
239                }
240                ConcurrencyUtils.waitForCompletion(futures);
241                p = columns / nthreads;
242                for (int l = 0; l < nthreads; l++) {
243                    final int firstColumn = l * p;
244                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
245                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
246                        @Override
247                                                public void run() {
248                            float[] temp = new float[rows];
249                            for (int c = firstColumn; c < lastColumn; c++) {
250                                for (int r = 0; r < rows; r++) {
251                                    temp[r] = a[r][c];
252                                }
253                                dstRows.forward(temp, scale);
254                                for (int r = 0; r < rows; r++) {
255                                    a[r][c] = temp[r];
256                                }
257                            }
258                        }
259                    });
260                }
261                ConcurrencyUtils.waitForCompletion(futures);
262            } else {
263                for (int i = 0; i < rows; i++) {
264                    dstColumns.forward(a[i], scale);
265                }
266                float[] temp = new float[rows];
267                for (int c = 0; c < columns; c++) {
268                    for (int r = 0; r < rows; r++) {
269                        temp[r] = a[r][c];
270                    }
271                    dstRows.forward(temp, scale);
272                    for (int r = 0; r < rows; r++) {
273                        a[r][c] = temp[r];
274                    }
275                }
276            }
277        }
278    }
279
280    /**
281     * Computes 2D inverse DST (DST-III) leaving the result in <code>a</code>.
282     * The data is stored in 1D array in row-major order.
283     * 
284     * @param a
285     *            data to transform
286     * @param scale
287     *            if true then scaling is performed
288     */
289    public void inverse(final float[] a, final boolean scale) {
290        int nthreads = ConcurrencyUtils.getNumberOfThreads();
291        if (isPowerOfTwo) {
292            if (nthreads != oldNthreads) {
293                nt = 4 * nthreads * rows;
294                if (columns == 2 * nthreads) {
295                    nt >>= 1;
296                } else if (columns < 2 * nthreads) {
297                    nt >>= 2;
298                }
299                t = new float[nt];
300                oldNthreads = nthreads;
301            }
302            if ((nthreads > 1) && useThreads) {
303                ddxt2d_subth(1, a, scale);
304                ddxt2d0_subth(1, a, scale);
305            } else {
306                ddxt2d_sub(1, a, scale);
307                for (int i = 0; i < rows; i++) {
308                    dstColumns.inverse(a, i * columns, scale);
309                }
310            }
311        } else {
312            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
313                Future<?>[] futures = new Future[nthreads];
314                int p = rows / nthreads;
315                for (int l = 0; l < nthreads; l++) {
316                    final int firstRow = l * p;
317                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
318                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
319                        @Override
320                                                public void run() {
321                            for (int i = firstRow; i < lastRow; i++) {
322                                dstColumns.inverse(a, i * columns, scale);
323                            }
324                        }
325                    });
326                }
327                ConcurrencyUtils.waitForCompletion(futures);
328                p = columns / nthreads;
329                for (int l = 0; l < nthreads; l++) {
330                    final int firstColumn = l * p;
331                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
332                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
333                        @Override
334                                                public void run() {
335                            float[] temp = new float[rows];
336                            for (int c = firstColumn; c < lastColumn; c++) {
337                                for (int r = 0; r < rows; r++) {
338                                    temp[r] = a[r * columns + c];
339                                }
340                                dstRows.inverse(temp, scale);
341                                for (int r = 0; r < rows; r++) {
342                                    a[r * columns + c] = temp[r];
343                                }
344                            }
345                        }
346                    });
347                }
348                ConcurrencyUtils.waitForCompletion(futures);
349            } else {
350                for (int i = 0; i < rows; i++) {
351                    dstColumns.inverse(a, i * columns, scale);
352                }
353                float[] temp = new float[rows];
354                for (int c = 0; c < columns; c++) {
355                    for (int r = 0; r < rows; r++) {
356                        temp[r] = a[r * columns + c];
357                    }
358                    dstRows.inverse(temp, scale);
359                    for (int r = 0; r < rows; r++) {
360                        a[r * columns + c] = temp[r];
361                    }
362                }
363            }
364        }
365    }
366
367    /**
368     * Computes 2D inverse DST (DST-III) leaving the result in <code>a</code>.
369     * The data is stored in 2D array.
370     * 
371     * @param a
372     *            data to transform
373     * @param scale
374     *            if true then scaling is performed
375     */
376    public void inverse(final float[][] a, final boolean scale) {
377        int nthreads = ConcurrencyUtils.getNumberOfThreads();
378        if (isPowerOfTwo) {
379            if (nthreads != oldNthreads) {
380                nt = 4 * nthreads * rows;
381                if (columns == 2 * nthreads) {
382                    nt >>= 1;
383                } else if (columns < 2 * nthreads) {
384                    nt >>= 2;
385                }
386                t = new float[nt];
387                oldNthreads = nthreads;
388            }
389            if ((nthreads > 1) && useThreads) {
390                ddxt2d_subth(1, a, scale);
391                ddxt2d0_subth(1, a, scale);
392            } else {
393                ddxt2d_sub(1, a, scale);
394                for (int i = 0; i < rows; i++) {
395                    dstColumns.inverse(a[i], scale);
396                }
397            }
398        } else {
399            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
400                Future<?>[] futures = new Future[nthreads];
401                int p = rows / nthreads;
402                for (int l = 0; l < nthreads; l++) {
403                    final int firstRow = l * p;
404                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
405                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
406                        @Override
407                                                public void run() {
408                            for (int i = firstRow; i < lastRow; i++) {
409                                dstColumns.inverse(a[i], scale);
410                            }
411                        }
412                    });
413                }
414                ConcurrencyUtils.waitForCompletion(futures);
415                p = columns / nthreads;
416                for (int l = 0; l < nthreads; l++) {
417                    final int firstColumn = l * p;
418                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
419                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
420                        @Override
421                                                public void run() {
422                            float[] temp = new float[rows];
423                            for (int c = firstColumn; c < lastColumn; c++) {
424                                for (int r = 0; r < rows; r++) {
425                                    temp[r] = a[r][c];
426                                }
427                                dstRows.inverse(temp, scale);
428                                for (int r = 0; r < rows; r++) {
429                                    a[r][c] = temp[r];
430                                }
431                            }
432                        }
433                    });
434                }
435                ConcurrencyUtils.waitForCompletion(futures);
436            } else {
437                for (int i = 0; i < rows; i++) {
438                    dstColumns.inverse(a[i], scale);
439                }
440                float[] temp = new float[rows];
441                for (int c = 0; c < columns; c++) {
442                    for (int r = 0; r < rows; r++) {
443                        temp[r] = a[r][c];
444                    }
445                    dstRows.inverse(temp, scale);
446                    for (int r = 0; r < rows; r++) {
447                        a[r][c] = temp[r];
448                    }
449                }
450            }
451        }
452    }
453
454    private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) {
455        int nthread = ConcurrencyUtils.getNumberOfThreads();
456        int nt = 4 * rows;
457        if (columns == 2 * nthread) {
458            nt >>= 1;
459        } else if (columns < 2 * nthread) {
460            nthread = columns;
461            nt >>= 2;
462        }
463        final int nthreads = nthread;
464        Future<?>[] futures = new Future[nthreads];
465
466        for (int i = 0; i < nthreads; i++) {
467            final int n0 = i;
468            final int startt = nt * i;
469            futures[i] = ConcurrencyUtils.submit(new Runnable() {
470                @Override
471                                public void run() {
472                    int idx1, idx2;
473                    if (columns > 2 * nthreads) {
474                        if (isgn == -1) {
475                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
476                                for (int r = 0; r < rows; r++) {
477                                    idx1 = r * columns + c;
478                                    idx2 = startt + rows + r;
479                                    t[startt + r] = a[idx1];
480                                    t[idx2] = a[idx1 + 1];
481                                    t[idx2 + rows] = a[idx1 + 2];
482                                    t[idx2 + 2 * rows] = a[idx1 + 3];
483                                }
484                                dstRows.forward(t, startt, scale);
485                                dstRows.forward(t, startt + rows, scale);
486                                dstRows.forward(t, startt + 2 * rows, scale);
487                                dstRows.forward(t, startt + 3 * rows, scale);
488                                for (int r = 0; r < rows; r++) {
489                                    idx1 = r * columns + c;
490                                    idx2 = startt + rows + r;
491                                    a[idx1] = t[startt + r];
492                                    a[idx1 + 1] = t[idx2];
493                                    a[idx1 + 2] = t[idx2 + rows];
494                                    a[idx1 + 3] = t[idx2 + 2 * rows];
495                                }
496                            }
497                        } else {
498                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
499                                for (int r = 0; r < rows; r++) {
500                                    idx1 = r * columns + c;
501                                    idx2 = startt + rows + r;
502                                    t[startt + r] = a[idx1];
503                                    t[idx2] = a[idx1 + 1];
504                                    t[idx2 + rows] = a[idx1 + 2];
505                                    t[idx2 + 2 * rows] = a[idx1 + 3];
506                                }
507                                dstRows.inverse(t, startt, scale);
508                                dstRows.inverse(t, startt + rows, scale);
509                                dstRows.inverse(t, startt + 2 * rows, scale);
510                                dstRows.inverse(t, startt + 3 * rows, scale);
511                                for (int r = 0; r < rows; r++) {
512                                    idx1 = r * columns + c;
513                                    idx2 = startt + rows + r;
514                                    a[idx1] = t[startt + r];
515                                    a[idx1 + 1] = t[idx2];
516                                    a[idx1 + 2] = t[idx2 + rows];
517                                    a[idx1 + 3] = t[idx2 + 2 * rows];
518                                }
519                            }
520                        }
521                    } else if (columns == 2 * nthreads) {
522                        for (int r = 0; r < rows; r++) {
523                            idx1 = r * columns + 2 * n0;
524                            idx2 = startt + r;
525                            t[idx2] = a[idx1];
526                            t[idx2 + rows] = a[idx1 + 1];
527                        }
528                        if (isgn == -1) {
529                            dstRows.forward(t, startt, scale);
530                            dstRows.forward(t, startt + rows, scale);
531                        } else {
532                            dstRows.inverse(t, startt, scale);
533                            dstRows.inverse(t, startt + rows, scale);
534                        }
535                        for (int r = 0; r < rows; r++) {
536                            idx1 = r * columns + 2 * n0;
537                            idx2 = startt + r;
538                            a[idx1] = t[idx2];
539                            a[idx1 + 1] = t[idx2 + rows];
540                        }
541                    } else if (columns == nthreads) {
542                        for (int r = 0; r < rows; r++) {
543                            t[startt + r] = a[r * columns + n0];
544                        }
545                        if (isgn == -1) {
546                            dstRows.forward(t, startt, scale);
547                        } else {
548                            dstRows.inverse(t, startt, scale);
549                        }
550                        for (int r = 0; r < rows; r++) {
551                            a[r * columns + n0] = t[startt + r];
552                        }
553                    }
554                }
555            });
556        }
557        ConcurrencyUtils.waitForCompletion(futures);
558    }
559
560    private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) {
561        int nthread = ConcurrencyUtils.getNumberOfThreads();
562        int nt = 4 * rows;
563        if (columns == 2 * nthread) {
564            nt >>= 1;
565        } else if (columns < 2 * nthread) {
566            nthread = columns;
567            nt >>= 2;
568        }
569        final int nthreads = nthread;
570        Future<?>[] futures = new Future[nthreads];
571
572        for (int i = 0; i < nthreads; i++) {
573            final int n0 = i;
574            final int startt = nt * i;
575            futures[i] = ConcurrencyUtils.submit(new Runnable() {
576                @Override
577                                public void run() {
578                    int idx2;
579                    if (columns > 2 * nthreads) {
580                        if (isgn == -1) {
581                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
582                                for (int r = 0; r < rows; r++) {
583                                    idx2 = startt + rows + r;
584                                    t[startt + r] = a[r][c];
585                                    t[idx2] = a[r][c + 1];
586                                    t[idx2 + rows] = a[r][c + 2];
587                                    t[idx2 + 2 * rows] = a[r][c + 3];
588                                }
589                                dstRows.forward(t, startt, scale);
590                                dstRows.forward(t, startt + rows, scale);
591                                dstRows.forward(t, startt + 2 * rows, scale);
592                                dstRows.forward(t, startt + 3 * rows, scale);
593                                for (int r = 0; r < rows; r++) {
594                                    idx2 = startt + rows + r;
595                                    a[r][c] = t[startt + r];
596                                    a[r][c + 1] = t[idx2];
597                                    a[r][c + 2] = t[idx2 + rows];
598                                    a[r][c + 3] = t[idx2 + 2 * rows];
599                                }
600                            }
601                        } else {
602                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
603                                for (int r = 0; r < rows; r++) {
604                                    idx2 = startt + rows + r;
605                                    t[startt + r] = a[r][c];
606                                    t[idx2] = a[r][c + 1];
607                                    t[idx2 + rows] = a[r][c + 2];
608                                    t[idx2 + 2 * rows] = a[r][c + 3];
609                                }
610                                dstRows.inverse(t, startt, scale);
611                                dstRows.inverse(t, startt + rows, scale);
612                                dstRows.inverse(t, startt + 2 * rows, scale);
613                                dstRows.inverse(t, startt + 3 * rows, scale);
614                                for (int r = 0; r < rows; r++) {
615                                    idx2 = startt + rows + r;
616                                    a[r][c] = t[startt + r];
617                                    a[r][c + 1] = t[idx2];
618                                    a[r][c + 2] = t[idx2 + rows];
619                                    a[r][c + 3] = t[idx2 + 2 * rows];
620                                }
621                            }
622                        }
623                    } else if (columns == 2 * nthreads) {
624                        for (int r = 0; r < rows; r++) {
625                            idx2 = startt + r;
626                            t[idx2] = a[r][2 * n0];
627                            t[idx2 + rows] = a[r][2 * n0 + 1];
628                        }
629                        if (isgn == -1) {
630                            dstRows.forward(t, startt, scale);
631                            dstRows.forward(t, startt + rows, scale);
632                        } else {
633                            dstRows.inverse(t, startt, scale);
634                            dstRows.inverse(t, startt + rows, scale);
635                        }
636                        for (int r = 0; r < rows; r++) {
637                            idx2 = startt + r;
638                            a[r][2 * n0] = t[idx2];
639                            a[r][2 * n0 + 1] = t[idx2 + rows];
640                        }
641                    } else if (columns == nthreads) {
642                        for (int r = 0; r < rows; r++) {
643                            t[startt + r] = a[r][n0];
644                        }
645                        if (isgn == -1) {
646                            dstRows.forward(t, startt, scale);
647                        } else {
648                            dstRows.inverse(t, startt, scale);
649                        }
650                        for (int r = 0; r < rows; r++) {
651                            a[r][n0] = t[startt + r];
652                        }
653                    }
654                }
655            });
656        }
657        ConcurrencyUtils.waitForCompletion(futures);
658    }
659
660    private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) {
661        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
662
663        Future<?>[] futures = new Future[nthreads];
664
665        for (int i = 0; i < nthreads; i++) {
666            final int n0 = i;
667            futures[i] = ConcurrencyUtils.submit(new Runnable() {
668
669                @Override
670                                public void run() {
671                    if (isgn == -1) {
672                        for (int r = n0; r < rows; r += nthreads) {
673                            dstColumns.forward(a, r * columns, scale);
674                        }
675                    } else {
676                        for (int r = n0; r < rows; r += nthreads) {
677                            dstColumns.inverse(a, r * columns, scale);
678                        }
679                    }
680                }
681            });
682        }
683        ConcurrencyUtils.waitForCompletion(futures);
684    }
685
686    private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) {
687        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
688
689        Future<?>[] futures = new Future[nthreads];
690
691        for (int i = 0; i < nthreads; i++) {
692            final int n0 = i;
693            futures[i] = ConcurrencyUtils.submit(new Runnable() {
694
695                @Override
696                                public void run() {
697                    if (isgn == -1) {
698                        for (int r = n0; r < rows; r += nthreads) {
699                            dstColumns.forward(a[r], scale);
700                        }
701                    } else {
702                        for (int r = n0; r < rows; r += nthreads) {
703                            dstColumns.inverse(a[r], scale);
704                        }
705                    }
706                }
707            });
708        }
709        ConcurrencyUtils.waitForCompletion(futures);
710    }
711
712    private void ddxt2d_sub(int isgn, float[] a, boolean scale) {
713        int idx1, idx2;
714
715        if (columns > 2) {
716            if (isgn == -1) {
717                for (int c = 0; c < columns; c += 4) {
718                    for (int r = 0; r < rows; r++) {
719                        idx1 = r * columns + c;
720                        idx2 = rows + r;
721                        t[r] = a[idx1];
722                        t[idx2] = a[idx1 + 1];
723                        t[idx2 + rows] = a[idx1 + 2];
724                        t[idx2 + 2 * rows] = a[idx1 + 3];
725                    }
726                    dstRows.forward(t, 0, scale);
727                    dstRows.forward(t, rows, scale);
728                    dstRows.forward(t, 2 * rows, scale);
729                    dstRows.forward(t, 3 * rows, scale);
730                    for (int r = 0; r < rows; r++) {
731                        idx1 = r * columns + c;
732                        idx2 = rows + r;
733                        a[idx1] = t[r];
734                        a[idx1 + 1] = t[idx2];
735                        a[idx1 + 2] = t[idx2 + rows];
736                        a[idx1 + 3] = t[idx2 + 2 * rows];
737                    }
738                }
739            } else {
740                for (int c = 0; c < columns; c += 4) {
741                    for (int r = 0; r < rows; r++) {
742                        idx1 = r * columns + c;
743                        idx2 = rows + r;
744                        t[r] = a[idx1];
745                        t[idx2] = a[idx1 + 1];
746                        t[idx2 + rows] = a[idx1 + 2];
747                        t[idx2 + 2 * rows] = a[idx1 + 3];
748                    }
749                    dstRows.inverse(t, 0, scale);
750                    dstRows.inverse(t, rows, scale);
751                    dstRows.inverse(t, 2 * rows, scale);
752                    dstRows.inverse(t, 3 * rows, scale);
753                    for (int r = 0; r < rows; r++) {
754                        idx1 = r * columns + c;
755                        idx2 = rows + r;
756                        a[idx1] = t[r];
757                        a[idx1 + 1] = t[idx2];
758                        a[idx1 + 2] = t[idx2 + rows];
759                        a[idx1 + 3] = t[idx2 + 2 * rows];
760                    }
761                }
762            }
763        } else if (columns == 2) {
764            for (int r = 0; r < rows; r++) {
765                idx1 = r * columns;
766                t[r] = a[idx1];
767                t[rows + r] = a[idx1 + 1];
768            }
769            if (isgn == -1) {
770                dstRows.forward(t, 0, scale);
771                dstRows.forward(t, rows, scale);
772            } else {
773                dstRows.inverse(t, 0, scale);
774                dstRows.inverse(t, rows, scale);
775            }
776            for (int r = 0; r < rows; r++) {
777                idx1 = r * columns;
778                a[idx1] = t[r];
779                a[idx1 + 1] = t[rows + r];
780            }
781        }
782    }
783
784    private void ddxt2d_sub(int isgn, float[][] a, boolean scale) {
785        int idx2;
786
787        if (columns > 2) {
788            if (isgn == -1) {
789                for (int c = 0; c < columns; c += 4) {
790                    for (int r = 0; r < rows; r++) {
791                        idx2 = rows + r;
792                        t[r] = a[r][c];
793                        t[idx2] = a[r][c + 1];
794                        t[idx2 + rows] = a[r][c + 2];
795                        t[idx2 + 2 * rows] = a[r][c + 3];
796                    }
797                    dstRows.forward(t, 0, scale);
798                    dstRows.forward(t, rows, scale);
799                    dstRows.forward(t, 2 * rows, scale);
800                    dstRows.forward(t, 3 * rows, scale);
801                    for (int r = 0; r < rows; r++) {
802                        idx2 = rows + r;
803                        a[r][c] = t[r];
804                        a[r][c + 1] = t[idx2];
805                        a[r][c + 2] = t[idx2 + rows];
806                        a[r][c + 3] = t[idx2 + 2 * rows];
807                    }
808                }
809            } else {
810                for (int c = 0; c < columns; c += 4) {
811                    for (int r = 0; r < rows; r++) {
812                        idx2 = rows + r;
813                        t[r] = a[r][c];
814                        t[idx2] = a[r][c + 1];
815                        t[idx2 + rows] = a[r][c + 2];
816                        t[idx2 + 2 * rows] = a[r][c + 3];
817                    }
818                    dstRows.inverse(t, 0, scale);
819                    dstRows.inverse(t, rows, scale);
820                    dstRows.inverse(t, 2 * rows, scale);
821                    dstRows.inverse(t, 3 * rows, scale);
822                    for (int r = 0; r < rows; r++) {
823                        idx2 = rows + r;
824                        a[r][c] = t[r];
825                        a[r][c + 1] = t[idx2];
826                        a[r][c + 2] = t[idx2 + rows];
827                        a[r][c + 3] = t[idx2 + 2 * rows];
828                    }
829                }
830            }
831        } else if (columns == 2) {
832            for (int r = 0; r < rows; r++) {
833                t[r] = a[r][0];
834                t[rows + r] = a[r][1];
835            }
836            if (isgn == -1) {
837                dstRows.forward(t, 0, scale);
838                dstRows.forward(t, rows, scale);
839            } else {
840                dstRows.inverse(t, 0, scale);
841                dstRows.inverse(t, rows, scale);
842            }
843            for (int r = 0; r < rows; r++) {
844                a[r][0] = t[r];
845                a[r][1] = t[rows + r];
846            }
847        }
848    }
849}