001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.dht;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Hartley Transform (DHT) of real, single precision data.
043 * The sizes of both dimensions can be arbitrary numbers. This is a parallel
044 * implementation optimized for SMP systems.<br>
045 * <br>
046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura
047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
048 * 
049 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
050 * 
051 */
052public class FloatDHT_2D {
053
054    private int rows;
055
056    private int columns;
057
058    private float[] t;
059
060    private FloatDHT_1D dhtColumns, dhtRows;
061
062    private int oldNthreads;
063
064    private int nt;
065
066    private boolean isPowerOfTwo = false;
067
068    private boolean useThreads = false;
069
070    /**
071     * Creates new instance of FloatDHT_2D.
072     * 
073     * @param rows
074     *            number of rows
075     * @param column
076     *            number of columns
077     */
078    public FloatDHT_2D(int rows, int column) {
079        if (rows <= 1 || column <= 1) {
080            throw new IllegalArgumentException("rows and columns must be greater than 1");
081        }
082        this.rows = rows;
083        this.columns = column;
084        if (rows * column >= ConcurrencyUtils.getThreadsBeginN_2D()) {
085            this.useThreads = true;
086        }
087        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(column)) {
088            isPowerOfTwo = true;
089            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
090            nt = 4 * oldNthreads * rows;
091            if (column == 2 * oldNthreads) {
092                nt >>= 1;
093            } else if (column < 2 * oldNthreads) {
094                nt >>= 2;
095            }
096            t = new float[nt];
097        }
098        dhtColumns = new FloatDHT_1D(column);
099        if (column == rows) {
100            dhtRows = dhtColumns;
101        } else {
102            dhtRows = new FloatDHT_1D(rows);
103        }
104    }
105
106    /**
107     * Computes 2D real, forward DHT leaving the result in <code>a</code>. The
108     * data is stored in 1D array in row-major order.
109     * 
110     * @param a
111     *            data to transform
112     */
113    public void forward(final float[] a) {
114        int nthreads = ConcurrencyUtils.getNumberOfThreads();
115        if (isPowerOfTwo) {
116            if (nthreads != oldNthreads) {
117                nt = 4 * nthreads * rows;
118                if (columns == 2 * nthreads) {
119                    nt >>= 1;
120                } else if (columns < 2 * nthreads) {
121                    nt >>= 2;
122                }
123                t = new float[nt];
124                oldNthreads = nthreads;
125            }
126            if ((nthreads > 1) && useThreads) {
127                ddxt2d_subth(-1, a, true);
128                ddxt2d0_subth(-1, a, true);
129            } else {
130                ddxt2d_sub(-1, a, true);
131                for (int i = 0; i < rows; i++) {
132                    dhtColumns.forward(a, i * columns);
133                }
134            }
135            yTransform(a);
136        } else {
137            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
138                Future<?>[] futures = new Future[nthreads];
139                int p = rows / nthreads;
140                for (int l = 0; l < nthreads; l++) {
141                    final int firstRow = l * p;
142                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
143                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
144                        @Override
145                                                public void run() {
146                            for (int i = firstRow; i < lastRow; i++) {
147                                dhtColumns.forward(a, i * columns);
148                            }
149                        }
150                    });
151                }
152                ConcurrencyUtils.waitForCompletion(futures);
153                p = columns / nthreads;
154                for (int l = 0; l < nthreads; l++) {
155                    final int firstColumn = l * p;
156                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
157                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
158                        @Override
159                                                public void run() {
160                            float[] temp = new float[rows];
161                            for (int c = firstColumn; c < lastColumn; c++) {
162                                for (int r = 0; r < rows; r++) {
163                                    temp[r] = a[r * columns + c];
164                                }
165                                dhtRows.forward(temp);
166                                for (int r = 0; r < rows; r++) {
167                                    a[r * columns + c] = temp[r];
168                                }
169                            }
170                        }
171                    });
172                }
173                ConcurrencyUtils.waitForCompletion(futures);
174
175            } else {
176                for (int i = 0; i < rows; i++) {
177                    dhtColumns.forward(a, i * columns);
178                }
179                float[] temp = new float[rows];
180                for (int c = 0; c < columns; c++) {
181                    for (int r = 0; r < rows; r++) {
182                        temp[r] = a[r * columns + c];
183                    }
184                    dhtRows.forward(temp);
185                    for (int r = 0; r < rows; r++) {
186                        a[r * columns + c] = temp[r];
187                    }
188                }
189            }
190            yTransform(a);
191        }
192    }
193
194    /**
195     * Computes 2D real, forward DHT leaving the result in <code>a</code>. The
196     * data is stored in 2D array.
197     * 
198     * @param a
199     *            data to transform
200     */
201    public void forward(final float[][] a) {
202        int nthreads = ConcurrencyUtils.getNumberOfThreads();
203        if (isPowerOfTwo) {
204            if (nthreads != oldNthreads) {
205                nt = 4 * nthreads * rows;
206                if (columns == 2 * nthreads) {
207                    nt >>= 1;
208                } else if (columns < 2 * nthreads) {
209                    nt >>= 2;
210                }
211                t = new float[nt];
212                oldNthreads = nthreads;
213            }
214            if ((nthreads > 1) && useThreads) {
215                ddxt2d_subth(-1, a, true);
216                ddxt2d0_subth(-1, a, true);
217            } else {
218                ddxt2d_sub(-1, a, true);
219                for (int i = 0; i < rows; i++) {
220                    dhtColumns.forward(a[i]);
221                }
222            }
223            y_transform(a);
224        } else {
225            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
226                Future<?>[] futures = new Future[nthreads];
227                int p = rows / nthreads;
228                for (int l = 0; l < nthreads; l++) {
229                    final int firstRow = l * p;
230                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
231                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
232                        @Override
233                                                public void run() {
234                            for (int i = firstRow; i < lastRow; i++) {
235                                dhtColumns.forward(a[i]);
236                            }
237                        }
238                    });
239                }
240                ConcurrencyUtils.waitForCompletion(futures);
241
242                p = columns / nthreads;
243                for (int l = 0; l < nthreads; l++) {
244                    final int firstColumn = l * p;
245                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
246                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
247                        @Override
248                                                public void run() {
249                            float[] temp = new float[rows];
250                            for (int c = firstColumn; c < lastColumn; c++) {
251                                for (int r = 0; r < rows; r++) {
252                                    temp[r] = a[r][c];
253                                }
254                                dhtRows.forward(temp);
255                                for (int r = 0; r < rows; r++) {
256                                    a[r][c] = temp[r];
257                                }
258                            }
259                        }
260                    });
261                }
262                ConcurrencyUtils.waitForCompletion(futures);
263
264            } else {
265                for (int i = 0; i < rows; i++) {
266                    dhtColumns.forward(a[i]);
267                }
268                float[] temp = new float[rows];
269                for (int c = 0; c < columns; c++) {
270                    for (int r = 0; r < rows; r++) {
271                        temp[r] = a[r][c];
272                    }
273                    dhtRows.forward(temp);
274                    for (int r = 0; r < rows; r++) {
275                        a[r][c] = temp[r];
276                    }
277                }
278            }
279            y_transform(a);
280        }
281    }
282
283    /**
284     * Computes 2D real, inverse DHT leaving the result in <code>a</code>. The
285     * data is stored in 1D array in row-major order.
286     * 
287     * @param a
288     *            data to transform
289     * @param scale
290     *            if true then scaling is performed
291     */
292    public void inverse(final float[] a, final boolean scale) {
293        int nthreads = ConcurrencyUtils.getNumberOfThreads();
294        if (isPowerOfTwo) {
295            if (nthreads != oldNthreads) {
296                nt = 4 * nthreads * rows;
297                if (columns == 2 * nthreads) {
298                    nt >>= 1;
299                } else if (columns < 2 * nthreads) {
300                    nt >>= 2;
301                }
302                t = new float[nt];
303                oldNthreads = nthreads;
304            }
305            if ((nthreads > 1) && useThreads) {
306                ddxt2d_subth(1, a, scale);
307                ddxt2d0_subth(1, a, scale);
308            } else {
309                ddxt2d_sub(1, a, scale);
310                for (int i = 0; i < rows; i++) {
311                    dhtColumns.inverse(a, i * columns, scale);
312                }
313            }
314            yTransform(a);
315        } else {
316            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
317                Future<?>[] futures = new Future[nthreads];
318                int p = rows / nthreads;
319                for (int l = 0; l < nthreads; l++) {
320                    final int firstRow = l * p;
321                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
322                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
323                        @Override
324                                                public void run() {
325                            for (int i = firstRow; i < lastRow; i++) {
326                                dhtColumns.inverse(a, i * columns, scale);
327                            }
328                        }
329                    });
330                }
331                ConcurrencyUtils.waitForCompletion(futures);
332
333                p = columns / nthreads;
334                for (int l = 0; l < nthreads; l++) {
335                    final int firstColumn = l * p;
336                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
337                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
338                        @Override
339                                                public void run() {
340                            float[] temp = new float[rows];
341                            for (int c = firstColumn; c < lastColumn; c++) {
342                                for (int r = 0; r < rows; r++) {
343                                    temp[r] = a[r * columns + c];
344                                }
345                                dhtRows.inverse(temp, scale);
346                                for (int r = 0; r < rows; r++) {
347                                    a[r * columns + c] = temp[r];
348                                }
349                            }
350                        }
351                    });
352                }
353                ConcurrencyUtils.waitForCompletion(futures);
354
355            } else {
356                for (int i = 0; i < rows; i++) {
357                    dhtColumns.inverse(a, i * columns, scale);
358                }
359                float[] temp = new float[rows];
360                for (int c = 0; c < columns; c++) {
361                    for (int r = 0; r < rows; r++) {
362                        temp[r] = a[r * columns + c];
363                    }
364                    dhtRows.inverse(temp, scale);
365                    for (int r = 0; r < rows; r++) {
366                        a[r * columns + c] = temp[r];
367                    }
368                }
369            }
370            yTransform(a);
371        }
372    }
373
374    /**
375     * Computes 2D real, inverse DHT leaving the result in <code>a</code>. The
376     * data is stored in 2D array.
377     * 
378     * @param a
379     *            data to transform
380     * @param scale
381     *            if true then scaling is performed
382     */
383    public void inverse(final float[][] a, final boolean scale) {
384        int nthreads = ConcurrencyUtils.getNumberOfThreads();
385        if (isPowerOfTwo) {
386            if (nthreads != oldNthreads) {
387                nt = 4 * nthreads * rows;
388                if (columns == 2 * nthreads) {
389                    nt >>= 1;
390                } else if (columns < 2 * nthreads) {
391                    nt >>= 2;
392                }
393                t = new float[nt];
394                oldNthreads = nthreads;
395            }
396            if ((nthreads > 1) && useThreads) {
397                ddxt2d_subth(1, a, scale);
398                ddxt2d0_subth(1, a, scale);
399            } else {
400                ddxt2d_sub(1, a, scale);
401                for (int i = 0; i < rows; i++) {
402                    dhtColumns.inverse(a[i], scale);
403                }
404            }
405            y_transform(a);
406        } else {
407            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
408                Future<?>[] futures = new Future[nthreads];
409                int p = rows / nthreads;
410                for (int l = 0; l < nthreads; l++) {
411                    final int firstRow = l * p;
412                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
413                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
414                        @Override
415                                                public void run() {
416                            for (int i = firstRow; i < lastRow; i++) {
417                                dhtColumns.inverse(a[i], scale);
418                            }
419                        }
420                    });
421                }
422                ConcurrencyUtils.waitForCompletion(futures);
423
424                p = columns / nthreads;
425                for (int l = 0; l < nthreads; l++) {
426                    final int firstColumn = l * p;
427                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
428                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
429                        @Override
430                                                public void run() {
431                            float[] temp = new float[rows];
432                            for (int c = firstColumn; c < lastColumn; c++) {
433                                for (int r = 0; r < rows; r++) {
434                                    temp[r] = a[r][c];
435                                }
436                                dhtRows.inverse(temp, scale);
437                                for (int r = 0; r < rows; r++) {
438                                    a[r][c] = temp[r];
439                                }
440                            }
441                        }
442                    });
443                }
444                ConcurrencyUtils.waitForCompletion(futures);
445
446            } else {
447                for (int i = 0; i < rows; i++) {
448                    dhtColumns.inverse(a[i], scale);
449                }
450                float[] temp = new float[rows];
451                for (int c = 0; c < columns; c++) {
452                    for (int r = 0; r < rows; r++) {
453                        temp[r] = a[r][c];
454                    }
455                    dhtRows.inverse(temp, scale);
456                    for (int r = 0; r < rows; r++) {
457                        a[r][c] = temp[r];
458                    }
459                }
460            }
461            y_transform(a);
462        }
463    }
464
465    private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) {
466        int nthread = ConcurrencyUtils.getNumberOfThreads();
467        int nt = 4 * rows;
468        if (columns == 2 * nthread) {
469            nt >>= 1;
470        } else if (columns < 2 * nthread) {
471            nthread = columns;
472            nt >>= 2;
473        }
474        final int nthreads = nthread;
475        Future<?>[] futures = new Future[nthreads];
476
477        for (int i = 0; i < nthreads; i++) {
478            final int n0 = i;
479            final int startt = nt * i;
480            futures[i] = ConcurrencyUtils.submit(new Runnable() {
481                @Override
482                                public void run() {
483                    int idx1, idx2;
484                    if (columns > 2 * nthreads) {
485                        if (isgn == -1) {
486                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
487                                for (int r = 0; r < rows; r++) {
488                                    idx1 = r * columns + c;
489                                    idx2 = startt + rows + r;
490                                    t[startt + r] = a[idx1];
491                                    t[idx2] = a[idx1 + 1];
492                                    t[idx2 + rows] = a[idx1 + 2];
493                                    t[idx2 + 2 * rows] = a[idx1 + 3];
494                                }
495                                dhtRows.forward(t, startt);
496                                dhtRows.forward(t, startt + rows);
497                                dhtRows.forward(t, startt + 2 * rows);
498                                dhtRows.forward(t, startt + 3 * rows);
499                                for (int r = 0; r < rows; r++) {
500                                    idx1 = r * columns + c;
501                                    idx2 = startt + rows + r;
502                                    a[idx1] = t[startt + r];
503                                    a[idx1 + 1] = t[idx2];
504                                    a[idx1 + 2] = t[idx2 + rows];
505                                    a[idx1 + 3] = t[idx2 + 2 * rows];
506                                }
507                            }
508                        } else {
509                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
510                                for (int r = 0; r < rows; r++) {
511                                    idx1 = r * columns + c;
512                                    idx2 = startt + rows + r;
513                                    t[startt + r] = a[idx1];
514                                    t[idx2] = a[idx1 + 1];
515                                    t[idx2 + rows] = a[idx1 + 2];
516                                    t[idx2 + 2 * rows] = a[idx1 + 3];
517                                }
518                                dhtRows.inverse(t, startt, scale);
519                                dhtRows.inverse(t, startt + rows, scale);
520                                dhtRows.inverse(t, startt + 2 * rows, scale);
521                                dhtRows.inverse(t, startt + 3 * rows, scale);
522                                for (int r = 0; r < rows; r++) {
523                                    idx1 = r * columns + c;
524                                    idx2 = startt + rows + r;
525                                    a[idx1] = t[startt + r];
526                                    a[idx1 + 1] = t[idx2];
527                                    a[idx1 + 2] = t[idx2 + rows];
528                                    a[idx1 + 3] = t[idx2 + 2 * rows];
529                                }
530                            }
531                        }
532                    } else if (columns == 2 * nthreads) {
533                        for (int r = 0; r < rows; r++) {
534                            idx1 = r * columns + 2 * n0;
535                            idx2 = startt + r;
536                            t[idx2] = a[idx1];
537                            t[idx2 + rows] = a[idx1 + 1];
538                        }
539                        if (isgn == -1) {
540                            dhtRows.forward(t, startt);
541                            dhtRows.forward(t, startt + rows);
542                        } else {
543                            dhtRows.inverse(t, startt, scale);
544                            dhtRows.inverse(t, startt + rows, scale);
545                        }
546                        for (int r = 0; r < rows; r++) {
547                            idx1 = r * columns + 2 * n0;
548                            idx2 = startt + r;
549                            a[idx1] = t[idx2];
550                            a[idx1 + 1] = t[idx2 + rows];
551                        }
552                    } else if (columns == nthreads) {
553                        for (int r = 0; r < rows; r++) {
554                            t[startt + r] = a[r * columns + n0];
555                        }
556                        if (isgn == -1) {
557                            dhtRows.forward(t, startt);
558                        } else {
559                            dhtRows.inverse(t, startt, scale);
560                        }
561                        for (int r = 0; r < rows; r++) {
562                            a[r * columns + n0] = t[startt + r];
563                        }
564                    }
565                }
566            });
567        }
568        ConcurrencyUtils.waitForCompletion(futures);
569    }
570
571    private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) {
572        int nthread = ConcurrencyUtils.getNumberOfThreads();
573        int nt = 4 * rows;
574        if (columns == 2 * nthread) {
575            nt >>= 1;
576        } else if (columns < 2 * nthread) {
577            nthread = columns;
578            nt >>= 2;
579        }
580        final int nthreads = nthread;
581        Future<?>[] futures = new Future[nthreads];
582
583        for (int i = 0; i < nthreads; i++) {
584            final int n0 = i;
585            final int startt = nt * i;
586            futures[i] = ConcurrencyUtils.submit(new Runnable() {
587                @Override
588                                public void run() {
589                    int idx2;
590                    if (columns > 2 * nthreads) {
591                        if (isgn == -1) {
592                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
593                                for (int r = 0; r < rows; r++) {
594                                    idx2 = startt + rows + r;
595                                    t[startt + r] = a[r][c];
596                                    t[idx2] = a[r][c + 1];
597                                    t[idx2 + rows] = a[r][c + 2];
598                                    t[idx2 + 2 * rows] = a[r][c + 3];
599                                }
600                                dhtRows.forward(t, startt);
601                                dhtRows.forward(t, startt + rows);
602                                dhtRows.forward(t, startt + 2 * rows);
603                                dhtRows.forward(t, startt + 3 * rows);
604                                for (int r = 0; r < rows; r++) {
605                                    idx2 = startt + rows + r;
606                                    a[r][c] = t[startt + r];
607                                    a[r][c + 1] = t[idx2];
608                                    a[r][c + 2] = t[idx2 + rows];
609                                    a[r][c + 3] = t[idx2 + 2 * rows];
610                                }
611                            }
612                        } else {
613                            for (int c = 4 * n0; c < columns; c += 4 * nthreads) {
614                                for (int r = 0; r < rows; r++) {
615                                    idx2 = startt + rows + r;
616                                    t[startt + r] = a[r][c];
617                                    t[idx2] = a[r][c + 1];
618                                    t[idx2 + rows] = a[r][c + 2];
619                                    t[idx2 + 2 * rows] = a[r][c + 3];
620                                }
621                                dhtRows.inverse(t, startt, scale);
622                                dhtRows.inverse(t, startt + rows, scale);
623                                dhtRows.inverse(t, startt + 2 * rows, scale);
624                                dhtRows.inverse(t, startt + 3 * rows, scale);
625                                for (int r = 0; r < rows; r++) {
626                                    idx2 = startt + rows + r;
627                                    a[r][c] = t[startt + r];
628                                    a[r][c + 1] = t[idx2];
629                                    a[r][c + 2] = t[idx2 + rows];
630                                    a[r][c + 3] = t[idx2 + 2 * rows];
631                                }
632                            }
633                        }
634                    } else if (columns == 2 * nthreads) {
635                        for (int r = 0; r < rows; r++) {
636                            idx2 = startt + r;
637                            t[idx2] = a[r][2 * n0];
638                            t[idx2 + rows] = a[r][2 * n0 + 1];
639                        }
640                        if (isgn == -1) {
641                            dhtRows.forward(t, startt);
642                            dhtRows.forward(t, startt + rows);
643                        } else {
644                            dhtRows.inverse(t, startt, scale);
645                            dhtRows.inverse(t, startt + rows, scale);
646                        }
647                        for (int r = 0; r < rows; r++) {
648                            idx2 = startt + r;
649                            a[r][2 * n0] = t[idx2];
650                            a[r][2 * n0 + 1] = t[idx2 + rows];
651                        }
652                    } else if (columns == nthreads) {
653                        for (int r = 0; r < rows; r++) {
654                            t[startt + r] = a[r][n0];
655                        }
656                        if (isgn == -1) {
657                            dhtRows.forward(t, startt);
658                        } else {
659                            dhtRows.inverse(t, startt, scale);
660                        }
661                        for (int r = 0; r < rows; r++) {
662                            a[r][n0] = t[startt + r];
663                        }
664                    }
665                }
666            });
667        }
668        ConcurrencyUtils.waitForCompletion(futures);
669    }
670
671    private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) {
672        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
673
674        Future<?>[] futures = new Future[nthreads];
675
676        for (int i = 0; i < nthreads; i++) {
677            final int n0 = i;
678            futures[i] = ConcurrencyUtils.submit(new Runnable() {
679
680                @Override
681                                public void run() {
682                    if (isgn == -1) {
683                        for (int r = n0; r < rows; r += nthreads) {
684                            dhtColumns.forward(a, r * columns);
685                        }
686                    } else {
687                        for (int r = n0; r < rows; r += nthreads) {
688                            dhtColumns.inverse(a, r * columns, scale);
689                        }
690                    }
691                }
692            });
693        }
694        ConcurrencyUtils.waitForCompletion(futures);
695    }
696
697    private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) {
698        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
699
700        Future<?>[] futures = new Future[nthreads];
701
702        for (int i = 0; i < nthreads; i++) {
703            final int n0 = i;
704            futures[i] = ConcurrencyUtils.submit(new Runnable() {
705
706                @Override
707                                public void run() {
708                    if (isgn == -1) {
709                        for (int r = n0; r < rows; r += nthreads) {
710                            dhtColumns.forward(a[r]);
711                        }
712                    } else {
713                        for (int r = n0; r < rows; r += nthreads) {
714                            dhtColumns.inverse(a[r], scale);
715                        }
716                    }
717                }
718            });
719        }
720        ConcurrencyUtils.waitForCompletion(futures);
721    }
722
723    private void ddxt2d_sub(int isgn, float[] a, boolean scale) {
724        int idx1, idx2;
725
726        if (columns > 2) {
727            if (isgn == -1) {
728                for (int c = 0; c < columns; c += 4) {
729                    for (int r = 0; r < rows; r++) {
730                        idx1 = r * columns + c;
731                        idx2 = rows + r;
732                        t[r] = a[idx1];
733                        t[idx2] = a[idx1 + 1];
734                        t[idx2 + rows] = a[idx1 + 2];
735                        t[idx2 + 2 * rows] = a[idx1 + 3];
736                    }
737                    dhtRows.forward(t, 0);
738                    dhtRows.forward(t, rows);
739                    dhtRows.forward(t, 2 * rows);
740                    dhtRows.forward(t, 3 * rows);
741                    for (int r = 0; r < rows; r++) {
742                        idx1 = r * columns + c;
743                        idx2 = rows + r;
744                        a[idx1] = t[r];
745                        a[idx1 + 1] = t[idx2];
746                        a[idx1 + 2] = t[idx2 + rows];
747                        a[idx1 + 3] = t[idx2 + 2 * rows];
748                    }
749                }
750            } else {
751                for (int c = 0; c < columns; c += 4) {
752                    for (int r = 0; r < rows; r++) {
753                        idx1 = r * columns + c;
754                        idx2 = rows + r;
755                        t[r] = a[idx1];
756                        t[idx2] = a[idx1 + 1];
757                        t[idx2 + rows] = a[idx1 + 2];
758                        t[idx2 + 2 * rows] = a[idx1 + 3];
759                    }
760                    dhtRows.inverse(t, 0, scale);
761                    dhtRows.inverse(t, rows, scale);
762                    dhtRows.inverse(t, 2 * rows, scale);
763                    dhtRows.inverse(t, 3 * rows, scale);
764                    for (int r = 0; r < rows; r++) {
765                        idx1 = r * columns + c;
766                        idx2 = rows + r;
767                        a[idx1] = t[r];
768                        a[idx1 + 1] = t[idx2];
769                        a[idx1 + 2] = t[idx2 + rows];
770                        a[idx1 + 3] = t[idx2 + 2 * rows];
771                    }
772                }
773            }
774        } else if (columns == 2) {
775            for (int r = 0; r < rows; r++) {
776                idx1 = r * columns;
777                t[r] = a[idx1];
778                t[rows + r] = a[idx1 + 1];
779            }
780            if (isgn == -1) {
781                dhtRows.forward(t, 0);
782                dhtRows.forward(t, rows);
783            } else {
784                dhtRows.inverse(t, 0, scale);
785                dhtRows.inverse(t, rows, scale);
786            }
787            for (int r = 0; r < rows; r++) {
788                idx1 = r * columns;
789                a[idx1] = t[r];
790                a[idx1 + 1] = t[rows + r];
791            }
792        }
793    }
794
795    private void ddxt2d_sub(int isgn, float[][] a, boolean scale) {
796        int idx2;
797
798        if (columns > 2) {
799            if (isgn == -1) {
800                for (int c = 0; c < columns; c += 4) {
801                    for (int r = 0; r < rows; r++) {
802                        idx2 = rows + r;
803                        t[r] = a[r][c];
804                        t[idx2] = a[r][c + 1];
805                        t[idx2 + rows] = a[r][c + 2];
806                        t[idx2 + 2 * rows] = a[r][c + 3];
807                    }
808                    dhtRows.forward(t, 0);
809                    dhtRows.forward(t, rows);
810                    dhtRows.forward(t, 2 * rows);
811                    dhtRows.forward(t, 3 * rows);
812                    for (int r = 0; r < rows; r++) {
813                        idx2 = rows + r;
814                        a[r][c] = t[r];
815                        a[r][c + 1] = t[idx2];
816                        a[r][c + 2] = t[idx2 + rows];
817                        a[r][c + 3] = t[idx2 + 2 * rows];
818                    }
819                }
820            } else {
821                for (int c = 0; c < columns; c += 4) {
822                    for (int r = 0; r < rows; r++) {
823                        idx2 = rows + r;
824                        t[r] = a[r][c];
825                        t[idx2] = a[r][c + 1];
826                        t[idx2 + rows] = a[r][c + 2];
827                        t[idx2 + 2 * rows] = a[r][c + 3];
828                    }
829                    dhtRows.inverse(t, 0, scale);
830                    dhtRows.inverse(t, rows, scale);
831                    dhtRows.inverse(t, 2 * rows, scale);
832                    dhtRows.inverse(t, 3 * rows, scale);
833                    for (int r = 0; r < rows; r++) {
834                        idx2 = rows + r;
835                        a[r][c] = t[r];
836                        a[r][c + 1] = t[idx2];
837                        a[r][c + 2] = t[idx2 + rows];
838                        a[r][c + 3] = t[idx2 + 2 * rows];
839                    }
840                }
841            }
842        } else if (columns == 2) {
843            for (int r = 0; r < rows; r++) {
844                t[r] = a[r][0];
845                t[rows + r] = a[r][1];
846            }
847            if (isgn == -1) {
848                dhtRows.forward(t, 0);
849                dhtRows.forward(t, rows);
850            } else {
851                dhtRows.inverse(t, 0, scale);
852                dhtRows.inverse(t, rows, scale);
853            }
854            for (int r = 0; r < rows; r++) {
855                a[r][0] = t[r];
856                a[r][1] = t[rows + r];
857            }
858        }
859    }
860
861    private void yTransform(float[] a) {
862        int mRow, mCol, idx1, idx2;
863        float A, B, C, D, E;
864        for (int r = 0; r <= rows / 2; r++) {
865            mRow = (rows - r) % rows;
866            idx1 = r * columns;
867            idx2 = mRow * columns;
868            for (int c = 0; c <= columns / 2; c++) {
869                mCol = (columns - c) % columns;
870                A = a[idx1 + c];
871                B = a[idx2 + c];
872                C = a[idx1 + mCol];
873                D = a[idx2 + mCol];
874                E = ((A + D) - (B + C)) / 2;
875                a[idx1 + c] = A - E;
876                a[idx2 + c] = B + E;
877                a[idx1 + mCol] = C + E;
878                a[idx2 + mCol] = D - E;
879            }
880        }
881    }
882
883    private void y_transform(float[][] a) {
884        int mRow, mCol;
885        float A, B, C, D, E;
886        for (int r = 0; r <= rows / 2; r++) {
887            mRow = (rows - r) % rows;
888            for (int c = 0; c <= columns / 2; c++) {
889                mCol = (columns - c) % columns;
890                A = a[r][c];
891                B = a[mRow][c];
892                C = a[r][mCol];
893                D = a[mRow][mCol];
894                E = ((A + D) - (B + C)) / 2;
895                a[r][c] = A - E;
896                a[mRow][c] = B + E;
897                a[r][mCol] = C + E;
898                a[mRow][mCol] = D - E;
899            }
900        }
901    }
902
903}