001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.fft;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Fourier Transform (DFT) of complex and real, single
043 * precision data. The sizes of both dimensions can be arbitrary numbers. This
044 * is a parallel implementation of split-radix and mixed-radix algorithms
045 * optimized for SMP systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public strictfp class FloatFFT_2D {
054
055    private int rows;
056
057    private int columns;
058
059    private float[] t;
060
061    private FloatFFT_1D fftColumns, fftRows;
062
063    private int oldNthreads;
064
065    private int nt;
066
067    private boolean isPowerOfTwo = false;
068
069    private boolean useThreads = false;
070
071    /**
072     * Creates new instance of FloatFFT_2D.
073     * 
074     * @param rows
075     *            number of rows
076     * @param columns
077     *            number of columns
078     */
079    public FloatFFT_2D(int rows, int columns) {
080        if (rows <= 1 || columns <= 1) {
081            throw new IllegalArgumentException("rows and columns must be greater than 1");
082        }
083        this.rows = rows;
084        this.columns = columns;
085        if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) {
086            this.useThreads = true;
087        }
088        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
089            isPowerOfTwo = true;
090            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
091            nt = 8 * oldNthreads * rows;
092            if (2 * columns == 4 * oldNthreads) {
093                nt >>= 1;
094            } else if (2 * columns < 4 * oldNthreads) {
095                nt >>= 2;
096            }
097            t = new float[nt];
098        }
099        fftRows = new FloatFFT_1D(rows);
100        if (rows == columns) {
101            fftColumns = fftRows;
102        } else {
103            fftColumns = new FloatFFT_1D(columns);
104        }
105    }
106
107    /**
108     * Computes 2D forward DFT of complex data leaving the result in
109     * <code>a</code>. The data is stored in 1D array in row-major order.
110     * Complex number is stored as two float values in sequence: the real and
111     * imaginary part, i.e. the input array must be of size rows*2*columns. The
112     * physical layout of the input data has to be as follows:<br>
113     * 
114     * <pre>
115     * a[k1*2*columns+2*k2] = Re[k1][k2], 
116     * a[k1*2*columns+2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
117     * </pre>
118     * 
119     * @param a
120     *            data to transform
121     */
122    public void complexForward(final float[] a) {
123        int nthreads = ConcurrencyUtils.getNumberOfThreads();
124        if (isPowerOfTwo) {
125            int oldn2 = columns;
126            columns = 2 * columns;
127            if (nthreads != oldNthreads) {
128                nt = 8 * nthreads * rows;
129                if (columns == 4 * nthreads) {
130                    nt >>= 1;
131                } else if (columns < 4 * nthreads) {
132                    nt >>= 2;
133                }
134                t = new float[nt];
135                oldNthreads = nthreads;
136            }
137            if ((nthreads > 1) && useThreads) {
138                xdft2d0_subth1(0, -1, a, true);
139                cdft2d_subth(-1, a, true);
140            } else {
141                for (int r = 0; r < rows; r++) {
142                    fftColumns.complexForward(a, r * columns);
143                }
144                cdft2d_sub(-1, a, true);
145            }
146            columns = oldn2;
147        } else {
148            final int rowStride = 2 * columns;
149            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
150                Future<?>[] futures = new Future[nthreads];
151                int p = rows / nthreads;
152                for (int l = 0; l < nthreads; l++) {
153                    final int firstRow = l * p;
154                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
155                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
156                        @Override
157                                                public void run() {
158                            for (int r = firstRow; r < lastRow; r++) {
159                                fftColumns.complexForward(a, r * rowStride);
160                            }
161                        }
162                    });
163                }
164                ConcurrencyUtils.waitForCompletion(futures);
165                p = columns / nthreads;
166                for (int l = 0; l < nthreads; l++) {
167                    final int firstColumn = l * p;
168                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
169                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
170                        @Override
171                                                public void run() {
172                            float[] temp = new float[2 * rows];
173                            for (int c = firstColumn; c < lastColumn; c++) {
174                                int idx0 = 2 * c;
175                                for (int r = 0; r < rows; r++) {
176                                    int idx1 = 2 * r;
177                                    int idx2 = r * rowStride + idx0;
178                                    temp[idx1] = a[idx2];
179                                    temp[idx1 + 1] = a[idx2 + 1];
180                                }
181                                fftRows.complexForward(temp);
182                                for (int r = 0; r < rows; r++) {
183                                    int idx1 = 2 * r;
184                                    int idx2 = r * rowStride + idx0;
185                                    a[idx2] = temp[idx1];
186                                    a[idx2 + 1] = temp[idx1 + 1];
187                                }
188                            }
189                        }
190                    });
191                }
192                ConcurrencyUtils.waitForCompletion(futures);
193            } else {
194                for (int r = 0; r < rows; r++) {
195                    fftColumns.complexForward(a, r * rowStride);
196                }
197                float[] temp = new float[2 * rows];
198                for (int c = 0; c < columns; c++) {
199                    int idx0 = 2 * c;
200                    for (int r = 0; r < rows; r++) {
201                        int idx1 = 2 * r;
202                        int idx2 = r * rowStride + idx0;
203                        temp[idx1] = a[idx2];
204                        temp[idx1 + 1] = a[idx2 + 1];
205                    }
206                    fftRows.complexForward(temp);
207                    for (int r = 0; r < rows; r++) {
208                        int idx1 = 2 * r;
209                        int idx2 = r * rowStride + idx0;
210                        a[idx2] = temp[idx1];
211                        a[idx2 + 1] = temp[idx1 + 1];
212                    }
213                }
214            }
215        }
216    }
217
218    /**
219     * Computes 2D forward DFT of complex data leaving the result in
220     * <code>a</code>. The data is stored in 2D array. Complex data is
221     * represented by 2 float values in sequence: the real and imaginary part,
222     * i.e. the input array must be of size rows by 2*columns. The physical
223     * layout of the input data has to be as follows:<br>
224     * 
225     * <pre>
226     * a[k1][2*k2] = Re[k1][k2], 
227     * a[k1][2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
228     * </pre>
229     * 
230     * @param a
231     *            data to transform
232     */
233    public void complexForward(final float[][] a) {
234        int nthreads = ConcurrencyUtils.getNumberOfThreads();
235        if (isPowerOfTwo) {
236            int oldn2 = columns;
237            columns = 2 * columns;
238            if (nthreads != oldNthreads) {
239                nt = 8 * nthreads * rows;
240                if (columns == 4 * nthreads) {
241                    nt >>= 1;
242                } else if (columns < 4 * nthreads) {
243                    nt >>= 2;
244                }
245                t = new float[nt];
246                oldNthreads = nthreads;
247            }
248            if ((nthreads > 1) && useThreads) {
249                xdft2d0_subth1(0, -1, a, true);
250                cdft2d_subth(-1, a, true);
251            } else {
252                for (int r = 0; r < rows; r++) {
253                    fftColumns.complexForward(a[r]);
254                }
255                cdft2d_sub(-1, a, true);
256            }
257            columns = oldn2;
258        } else {
259            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
260                Future<?>[] futures = new Future[nthreads];
261                int p = rows / nthreads;
262                for (int l = 0; l < nthreads; l++) {
263                    final int firstRow = l * p;
264                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
265                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
266                        @Override
267                                                public void run() {
268                            for (int r = firstRow; r < lastRow; r++) {
269                                fftColumns.complexForward(a[r]);
270                            }
271                        }
272                    });
273                }
274                ConcurrencyUtils.waitForCompletion(futures);
275                p = columns / nthreads;
276                for (int l = 0; l < nthreads; l++) {
277                    final int firstColumn = l * p;
278                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
279                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
280                        @Override
281                                                public void run() {
282                            float[] temp = new float[2 * rows];
283                            for (int c = firstColumn; c < lastColumn; c++) {
284                                int idx1 = 2 * c;
285                                for (int r = 0; r < rows; r++) {
286                                    int idx2 = 2 * r;
287                                    temp[idx2] = a[r][idx1];
288                                    temp[idx2 + 1] = a[r][idx1 + 1];
289                                }
290                                fftRows.complexForward(temp);
291                                for (int r = 0; r < rows; r++) {
292                                    int idx2 = 2 * r;
293                                    a[r][idx1] = temp[idx2];
294                                    a[r][idx1 + 1] = temp[idx2 + 1];
295                                }
296                            }
297                        }
298                    });
299                }
300                ConcurrencyUtils.waitForCompletion(futures);
301            } else {
302                for (int r = 0; r < rows; r++) {
303                    fftColumns.complexForward(a[r]);
304                }
305                float[] temp = new float[2 * rows];
306                for (int c = 0; c < columns; c++) {
307                    int idx1 = 2 * c;
308                    for (int r = 0; r < rows; r++) {
309                        int idx2 = 2 * r;
310                        temp[idx2] = a[r][idx1];
311                        temp[idx2 + 1] = a[r][idx1 + 1];
312                    }
313                    fftRows.complexForward(temp);
314                    for (int r = 0; r < rows; r++) {
315                        int idx2 = 2 * r;
316                        a[r][idx1] = temp[idx2];
317                        a[r][idx1 + 1] = temp[idx2 + 1];
318                    }
319                }
320            }
321        }
322    }
323
324    /**
325     * Computes 2D inverse DFT of complex data leaving the result in
326     * <code>a</code>. The data is stored in 1D array in row-major order.
327     * Complex number is stored as two float values in sequence: the real and
328     * imaginary part, i.e. the input array must be of size rows*2*columns. The
329     * physical layout of the input data has to be as follows:<br>
330     * 
331     * <pre>
332     * a[k1*2*columns+2*k2] = Re[k1][k2], 
333     * a[k1*2*columns+2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
334     * </pre>
335     * 
336     * @param a
337     *            data to transform
338     * @param scale
339     *            if true then scaling is performed
340     * 
341     */
342    public void complexInverse(final float[] a, final boolean scale) {
343        int nthreads = ConcurrencyUtils.getNumberOfThreads();
344        if (isPowerOfTwo) {
345            int oldn2 = columns;
346            columns = 2 * columns;
347            if (nthreads != oldNthreads) {
348                nt = 8 * nthreads * rows;
349                if (columns == 4 * nthreads) {
350                    nt >>= 1;
351                } else if (columns < 4 * nthreads) {
352                    nt >>= 2;
353                }
354                t = new float[nt];
355                oldNthreads = nthreads;
356            }
357            if ((nthreads > 1) && useThreads) {
358                xdft2d0_subth1(0, 1, a, scale);
359                cdft2d_subth(1, a, scale);
360            } else {
361
362                for (int r = 0; r < rows; r++) {
363                    fftColumns.complexInverse(a, r * columns, scale);
364                }
365                cdft2d_sub(1, a, scale);
366            }
367            columns = oldn2;
368        } else {
369            final int rowspan = 2 * columns;
370            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
371                Future<?>[] futures = new Future[nthreads];
372                int p = rows / nthreads;
373                for (int l = 0; l < nthreads; l++) {
374                    final int firstRow = l * p;
375                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
376                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
377                        @Override
378                                                public void run() {
379                            for (int r = firstRow; r < lastRow; r++) {
380                                fftColumns.complexInverse(a, r * rowspan, scale);
381                            }
382                        }
383                    });
384                }
385                ConcurrencyUtils.waitForCompletion(futures);
386                p = columns / nthreads;
387                for (int l = 0; l < nthreads; l++) {
388                    final int firstColumn = l * p;
389                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
390                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
391                        @Override
392                                                public void run() {
393                            float[] temp = new float[2 * rows];
394                            for (int c = firstColumn; c < lastColumn; c++) {
395                                int idx1 = 2 * c;
396                                for (int r = 0; r < rows; r++) {
397                                    int idx2 = 2 * r;
398                                    int idx3 = r * rowspan + idx1;
399                                    temp[idx2] = a[idx3];
400                                    temp[idx2 + 1] = a[idx3 + 1];
401                                }
402                                fftRows.complexInverse(temp, scale);
403                                for (int r = 0; r < rows; r++) {
404                                    int idx2 = 2 * r;
405                                    int idx3 = r * rowspan + idx1;
406                                    a[idx3] = temp[idx2];
407                                    a[idx3 + 1] = temp[idx2 + 1];
408                                }
409                            }
410                        }
411                    });
412                }
413                ConcurrencyUtils.waitForCompletion(futures);
414            } else {
415                for (int r = 0; r < rows; r++) {
416                    fftColumns.complexInverse(a, r * rowspan, scale);
417                }
418                float[] temp = new float[2 * rows];
419                for (int c = 0; c < columns; c++) {
420                    int idx1 = 2 * c;
421                    for (int r = 0; r < rows; r++) {
422                        int idx2 = 2 * r;
423                        int idx3 = r * rowspan + idx1;
424                        temp[idx2] = a[idx3];
425                        temp[idx2 + 1] = a[idx3 + 1];
426                    }
427                    fftRows.complexInverse(temp, scale);
428                    for (int r = 0; r < rows; r++) {
429                        int idx2 = 2 * r;
430                        int idx3 = r * rowspan + idx1;
431                        a[idx3] = temp[idx2];
432                        a[idx3 + 1] = temp[idx2 + 1];
433                    }
434                }
435            }
436        }
437    }
438
439    /**
440     * Computes 2D inverse DFT of complex data leaving the result in
441     * <code>a</code>. The data is stored in 2D array. Complex data is
442     * represented by 2 float values in sequence: the real and imaginary part,
443     * i.e. the input array must be of size rows by 2*columns. The physical
444     * layout of the input data has to be as follows:<br>
445     * 
446     * <pre>
447     * a[k1][2*k2] = Re[k1][k2], 
448     * a[k1][2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
449     * </pre>
450     * 
451     * @param a
452     *            data to transform
453     * @param scale
454     *            if true then scaling is performed
455     * 
456     */
457    public void complexInverse(final float[][] a, final boolean scale) {
458        int nthreads = ConcurrencyUtils.getNumberOfThreads();
459        if (isPowerOfTwo) {
460            int oldn2 = columns;
461            columns = 2 * columns;
462
463            if (nthreads != oldNthreads) {
464                nt = 8 * nthreads * rows;
465                if (columns == 4 * nthreads) {
466                    nt >>= 1;
467                } else if (columns < 4 * nthreads) {
468                    nt >>= 2;
469                }
470                t = new float[nt];
471                oldNthreads = nthreads;
472            }
473            if ((nthreads > 1) && useThreads) {
474                xdft2d0_subth1(0, 1, a, scale);
475                cdft2d_subth(1, a, scale);
476            } else {
477
478                for (int r = 0; r < rows; r++) {
479                    fftColumns.complexInverse(a[r], scale);
480                }
481                cdft2d_sub(1, a, scale);
482            }
483            columns = oldn2;
484        } else {
485            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
486                Future<?>[] futures = new Future[nthreads];
487                int p = rows / nthreads;
488                for (int l = 0; l < nthreads; l++) {
489                    final int firstRow = l * p;
490                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
491                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
492                        @Override
493                                                public void run() {
494                            for (int r = firstRow; r < lastRow; r++) {
495                                fftColumns.complexInverse(a[r], scale);
496                            }
497                        }
498                    });
499                }
500                ConcurrencyUtils.waitForCompletion(futures);
501                p = columns / nthreads;
502                for (int l = 0; l < nthreads; l++) {
503                    final int firstColumn = l * p;
504                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
505                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
506                        @Override
507                                                public void run() {
508                            float[] temp = new float[2 * rows];
509                            for (int c = firstColumn; c < lastColumn; c++) {
510                                int idx1 = 2 * c;
511                                for (int r = 0; r < rows; r++) {
512                                    int idx2 = 2 * r;
513                                    temp[idx2] = a[r][idx1];
514                                    temp[idx2 + 1] = a[r][idx1 + 1];
515                                }
516                                fftRows.complexInverse(temp, scale);
517                                for (int r = 0; r < rows; r++) {
518                                    int idx2 = 2 * r;
519                                    a[r][idx1] = temp[idx2];
520                                    a[r][idx1 + 1] = temp[idx2 + 1];
521                                }
522                            }
523                        }
524                    });
525                }
526                ConcurrencyUtils.waitForCompletion(futures);
527            } else {
528                for (int r = 0; r < rows; r++) {
529                    fftColumns.complexInverse(a[r], scale);
530                }
531                float[] temp = new float[2 * rows];
532                for (int c = 0; c < columns; c++) {
533                    int idx1 = 2 * c;
534                    for (int r = 0; r < rows; r++) {
535                        int idx2 = 2 * r;
536                        temp[idx2] = a[r][idx1];
537                        temp[idx2 + 1] = a[r][idx1 + 1];
538                    }
539                    fftRows.complexInverse(temp, scale);
540                    for (int r = 0; r < rows; r++) {
541                        int idx2 = 2 * r;
542                        a[r][idx1] = temp[idx2];
543                        a[r][idx1 + 1] = temp[idx2 + 1];
544                    }
545                }
546            }
547        }
548    }
549
550    /**
551     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
552     * . This method only works when the sizes of both dimensions are
553     * power-of-two numbers. The physical layout of the output data is as
554     * follows:
555     * 
556     * <pre>
557     * a[k1*columns+2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
558     * a[k1*columns+2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
559     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
560     * a[2*k2] = Re[0][k2] = Re[0][columns-k2], 
561     * a[2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
562     *       0&lt;k2&lt;columns/2, 
563     * a[k1*columns] = Re[k1][0] = Re[rows-k1][0], 
564     * a[k1*columns+1] = Im[k1][0] = -Im[rows-k1][0], 
565     * a[(rows-k1)*columns+1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
566     * a[(rows-k1)*columns] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
567     *       0&lt;k1&lt;rows/2, 
568     * a[0] = Re[0][0], 
569     * a[1] = Re[0][columns/2], 
570     * a[(rows/2)*columns] = Re[rows/2][0], 
571     * a[(rows/2)*columns+1] = Re[rows/2][columns/2]
572     * </pre>
573     * 
574     * This method computes only half of the elements of the real transform. The
575     * other half satisfies the symmetry condition. If you want the full real
576     * forward transform, use <code>realForwardFull</code>. To get back the
577     * original data, use <code>realInverse</code> on the output of this method.
578     * 
579     * @param a
580     *            data to transform
581     */
582    public void realForward(float[] a) {
583        if (isPowerOfTwo == false) {
584            throw new IllegalArgumentException("rows and columns must be power of two numbers");
585        } else {
586            int nthreads;
587
588            nthreads = ConcurrencyUtils.getNumberOfThreads();
589            if (nthreads != oldNthreads) {
590                nt = 8 * nthreads * rows;
591                if (columns == 4 * nthreads) {
592                    nt >>= 1;
593                } else if (columns < 4 * nthreads) {
594                    nt >>= 2;
595                }
596                t = new float[nt];
597                oldNthreads = nthreads;
598            }
599            if ((nthreads > 1) && useThreads) {
600                xdft2d0_subth1(1, 1, a, true);
601                cdft2d_subth(-1, a, true);
602                rdft2d_sub(1, a);
603            } else {
604                for (int r = 0; r < rows; r++) {
605                    fftColumns.realForward(a, r * columns);
606                }
607                cdft2d_sub(-1, a, true);
608                rdft2d_sub(1, a);
609            }
610        }
611    }
612
613    /**
614     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
615     * . This method only works when the sizes of both dimensions are
616     * power-of-two numbers. The physical layout of the output data is as
617     * follows:
618     * 
619     * <pre>
620     * a[k1][2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
621     * a[k1][2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
622     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
623     * a[0][2*k2] = Re[0][k2] = Re[0][columns-k2], 
624     * a[0][2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
625     *       0&lt;k2&lt;columns/2, 
626     * a[k1][0] = Re[k1][0] = Re[rows-k1][0], 
627     * a[k1][1] = Im[k1][0] = -Im[rows-k1][0], 
628     * a[rows-k1][1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
629     * a[rows-k1][0] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
630     *       0&lt;k1&lt;rows/2, 
631     * a[0][0] = Re[0][0], 
632     * a[0][1] = Re[0][columns/2], 
633     * a[rows/2][0] = Re[rows/2][0], 
634     * a[rows/2][1] = Re[rows/2][columns/2]
635     * </pre>
636     * 
637     * This method computes only half of the elements of the real transform. The
638     * other half satisfies the symmetry condition. If you want the full real
639     * forward transform, use <code>realForwardFull</code>. To get back the
640     * original data, use <code>realInverse</code> on the output of this method.
641     * 
642     * @param a
643     *            data to transform
644     */
645    public void realForward(float[][] a) {
646        if (isPowerOfTwo == false) {
647            throw new IllegalArgumentException("rows and columns must be power of two numbers");
648        } else {
649            int nthreads;
650
651            nthreads = ConcurrencyUtils.getNumberOfThreads();
652            if (nthreads != oldNthreads) {
653                nt = 8 * nthreads * rows;
654                if (columns == 4 * nthreads) {
655                    nt >>= 1;
656                } else if (columns < 4 * nthreads) {
657                    nt >>= 2;
658                }
659                t = new float[nt];
660                oldNthreads = nthreads;
661            }
662            if ((nthreads > 1) && useThreads) {
663                xdft2d0_subth1(1, 1, a, true);
664                cdft2d_subth(-1, a, true);
665                rdft2d_sub(1, a);
666            } else {
667                for (int r = 0; r < rows; r++) {
668                    fftColumns.realForward(a[r]);
669                }
670                cdft2d_sub(-1, a, true);
671                rdft2d_sub(1, a);
672            }
673        }
674    }
675
676    /**
677     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
678     * . This method computes full real forward transform, i.e. you will get the
679     * same result as from <code>complexForward</code> called with all imaginary
680     * part equal 0. Because the result is stored in <code>a</code>, the input
681     * array must be of size rows*2*columns, with only the first rows*columns
682     * elements filled with real data. To get back the original data, use
683     * <code>complexInverse</code> on the output of this method.
684     * 
685     * @param a
686     *            data to transform
687     */
688    public void realForwardFull(float[] a) {
689        if (isPowerOfTwo) {
690            int nthreads;
691
692            nthreads = ConcurrencyUtils.getNumberOfThreads();
693            if (nthreads != oldNthreads) {
694                nt = 8 * nthreads * rows;
695                if (columns == 4 * nthreads) {
696                    nt >>= 1;
697                } else if (columns < 4 * nthreads) {
698                    nt >>= 2;
699                }
700                t = new float[nt];
701                oldNthreads = nthreads;
702            }
703            if ((nthreads > 1) && useThreads) {
704                xdft2d0_subth1(1, 1, a, true);
705                cdft2d_subth(-1, a, true);
706                rdft2d_sub(1, a);
707            } else {
708                for (int r = 0; r < rows; r++) {
709                    fftColumns.realForward(a, r * columns);
710                }
711                cdft2d_sub(-1, a, true);
712                rdft2d_sub(1, a);
713            }
714            fillSymmetric(a);
715        } else {
716            mixedRadixRealForwardFull(a);
717        }
718    }
719
720    /**
721     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
722     * . This method computes full real forward transform, i.e. you will get the
723     * same result as from <code>complexForward</code> called with all imaginary
724     * part equal 0. Because the result is stored in <code>a</code>, the input
725     * array must be of size rows by 2*columns, with only the first rows by
726     * columns elements filled with real data. To get back the original data,
727     * use <code>complexInverse</code> on the output of this method.
728     * 
729     * @param a
730     *            data to transform
731     */
732    public void realForwardFull(float[][] a) {
733        if (isPowerOfTwo) {
734            int nthreads;
735
736            nthreads = ConcurrencyUtils.getNumberOfThreads();
737            if (nthreads != oldNthreads) {
738                nt = 8 * nthreads * rows;
739                if (columns == 4 * nthreads) {
740                    nt >>= 1;
741                } else if (columns < 4 * nthreads) {
742                    nt >>= 2;
743                }
744                t = new float[nt];
745                oldNthreads = nthreads;
746            }
747            if ((nthreads > 1) && useThreads) {
748                xdft2d0_subth1(1, 1, a, true);
749                cdft2d_subth(-1, a, true);
750                rdft2d_sub(1, a);
751            } else {
752                for (int r = 0; r < rows; r++) {
753                    fftColumns.realForward(a[r]);
754                }
755                cdft2d_sub(-1, a, true);
756                rdft2d_sub(1, a);
757            }
758            fillSymmetric(a);
759        } else {
760            mixedRadixRealForwardFull(a);
761        }
762    }
763
764    /**
765     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
766     * . This method only works when the sizes of both dimensions are
767     * power-of-two numbers. The physical layout of the input data has to be as
768     * follows:
769     * 
770     * <pre>
771     * a[k1*columns+2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
772     * a[k1*columns+2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
773     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
774     * a[2*k2] = Re[0][k2] = Re[0][columns-k2], 
775     * a[2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
776     *       0&lt;k2&lt;columns/2, 
777     * a[k1*columns] = Re[k1][0] = Re[rows-k1][0], 
778     * a[k1*columns+1] = Im[k1][0] = -Im[rows-k1][0], 
779     * a[(rows-k1)*columns+1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
780     * a[(rows-k1)*columns] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
781     *       0&lt;k1&lt;rows/2, 
782     * a[0] = Re[0][0], 
783     * a[1] = Re[0][columns/2], 
784     * a[(rows/2)*columns] = Re[rows/2][0], 
785     * a[(rows/2)*columns+1] = Re[rows/2][columns/2]
786     * </pre>
787     * 
788     * This method computes only half of the elements of the real transform. The
789     * other half satisfies the symmetry condition. If you want the full real
790     * inverse transform, use <code>realInverseFull</code>.
791     * 
792     * @param a
793     *            data to transform
794     * 
795     * @param scale
796     *            if true then scaling is performed
797     */
798    public void realInverse(float[] a, boolean scale) {
799        if (isPowerOfTwo == false) {
800            throw new IllegalArgumentException("rows and columns must be power of two numbers");
801        } else {
802            int nthreads;
803            nthreads = ConcurrencyUtils.getNumberOfThreads();
804            if (nthreads != oldNthreads) {
805                nt = 8 * nthreads * rows;
806                if (columns == 4 * nthreads) {
807                    nt >>= 1;
808                } else if (columns < 4 * nthreads) {
809                    nt >>= 2;
810                }
811                t = new float[nt];
812                oldNthreads = nthreads;
813            }
814            if ((nthreads > 1) && useThreads) {
815                rdft2d_sub(-1, a);
816                cdft2d_subth(1, a, scale);
817                xdft2d0_subth1(1, -1, a, scale);
818            } else {
819                rdft2d_sub(-1, a);
820                cdft2d_sub(1, a, scale);
821                for (int r = 0; r < rows; r++) {
822                    fftColumns.realInverse(a, r * columns, scale);
823                }
824            }
825        }
826    }
827
828    /**
829     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
830     * . This method only works when the sizes of both dimensions are
831     * power-of-two numbers. The physical layout of the input data has to be as
832     * follows:
833     * 
834     * <pre>
835     * a[k1][2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
836     * a[k1][2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
837     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
838     * a[0][2*k2] = Re[0][k2] = Re[0][columns-k2], 
839     * a[0][2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
840     *       0&lt;k2&lt;columns/2, 
841     * a[k1][0] = Re[k1][0] = Re[rows-k1][0], 
842     * a[k1][1] = Im[k1][0] = -Im[rows-k1][0], 
843     * a[rows-k1][1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
844     * a[rows-k1][0] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
845     *       0&lt;k1&lt;rows/2, 
846     * a[0][0] = Re[0][0], 
847     * a[0][1] = Re[0][columns/2], 
848     * a[rows/2][0] = Re[rows/2][0], 
849     * a[rows/2][1] = Re[rows/2][columns/2]
850     * </pre>
851     * 
852     * This method computes only half of the elements of the real transform. The
853     * other half satisfies the symmetry condition. If you want the full real
854     * inverse transform, use <code>realInverseFull</code>.
855     * 
856     * @param a
857     *            data to transform
858     * 
859     * @param scale
860     *            if true then scaling is performed
861     */
862    public void realInverse(float[][] a, boolean scale) {
863        if (isPowerOfTwo == false) {
864            throw new IllegalArgumentException("rows and columns must be power of two numbers");
865        } else {
866            int nthreads;
867
868            nthreads = ConcurrencyUtils.getNumberOfThreads();
869            if (nthreads != oldNthreads) {
870                nt = 8 * nthreads * rows;
871                if (columns == 4 * nthreads) {
872                    nt >>= 1;
873                } else if (columns < 4 * nthreads) {
874                    nt >>= 2;
875                }
876                t = new float[nt];
877                oldNthreads = nthreads;
878            }
879            if ((nthreads > 1) && useThreads) {
880                rdft2d_sub(-1, a);
881                cdft2d_subth(1, a, scale);
882                xdft2d0_subth1(1, -1, a, scale);
883            } else {
884                rdft2d_sub(-1, a);
885                cdft2d_sub(1, a, scale);
886                for (int r = 0; r < rows; r++) {
887                    fftColumns.realInverse(a[r], scale);
888                }
889            }
890        }
891    }
892
893    /**
894     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
895     * . This method computes full real inverse transform, i.e. you will get the
896     * same result as from <code>complexInverse</code> called with all imaginary
897     * part equal 0. Because the result is stored in <code>a</code>, the input
898     * array must be of size rows*2*columns, with only the first rows*columns
899     * elements filled with real data.
900     * 
901     * @param a
902     *            data to transform
903     * 
904     * @param scale
905     *            if true then scaling is performed
906     */
907    public void realInverseFull(float[] a, boolean scale) {
908        if (isPowerOfTwo) {
909            int nthreads;
910
911            nthreads = ConcurrencyUtils.getNumberOfThreads();
912            if (nthreads != oldNthreads) {
913                nt = 8 * nthreads * rows;
914                if (columns == 4 * nthreads) {
915                    nt >>= 1;
916                } else if (columns < 4 * nthreads) {
917                    nt >>= 2;
918                }
919                t = new float[nt];
920                oldNthreads = nthreads;
921            }
922            if ((nthreads > 1) && useThreads) {
923                xdft2d0_subth2(1, -1, a, scale);
924                cdft2d_subth(1, a, scale);
925                rdft2d_sub(1, a);
926            } else {
927                for (int r = 0; r < rows; r++) {
928                    fftColumns.realInverse2(a, r * columns, scale);
929                }
930                cdft2d_sub(1, a, scale);
931                rdft2d_sub(1, a);
932            }
933            fillSymmetric(a);
934        } else {
935            mixedRadixRealInverseFull(a, scale);
936        }
937    }
938
939    /**
940     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
941     * . This method computes full real inverse transform, i.e. you will get the
942     * same result as from <code>complexInverse</code> called with all imaginary
943     * part equal 0. Because the result is stored in <code>a</code>, the input
944     * array must be of size rows by 2*columns, with only the first rows by
945     * columns elements filled with real data.
946     * 
947     * @param a
948     *            data to transform
949     * 
950     * @param scale
951     *            if true then scaling is performed
952     */
953    public void realInverseFull(float[][] a, boolean scale) {
954        if (isPowerOfTwo) {
955            int nthreads;
956
957            nthreads = ConcurrencyUtils.getNumberOfThreads();
958            if (nthreads != oldNthreads) {
959                nt = 8 * nthreads * rows;
960                if (columns == 4 * nthreads) {
961                    nt >>= 1;
962                } else if (columns < 4 * nthreads) {
963                    nt >>= 2;
964                }
965                t = new float[nt];
966                oldNthreads = nthreads;
967            }
968            if ((nthreads > 1) && useThreads) {
969                xdft2d0_subth2(1, -1, a, scale);
970                cdft2d_subth(1, a, scale);
971                rdft2d_sub(1, a);
972            } else {
973                for (int r = 0; r < rows; r++) {
974                    fftColumns.realInverse2(a[r], 0, scale);
975                }
976                cdft2d_sub(1, a, scale);
977                rdft2d_sub(1, a);
978            }
979            fillSymmetric(a);
980        } else {
981            mixedRadixRealInverseFull(a, scale);
982        }
983    }
984
985    private void mixedRadixRealForwardFull(final float[][] a) {
986        final int n2d2 = columns / 2 + 1;
987        final float[][] temp = new float[n2d2][2 * rows];
988
989        int nthreads = ConcurrencyUtils.getNumberOfThreads();
990        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
991            Future<?>[] futures = new Future[nthreads];
992            int p = rows / nthreads;
993            for (int l = 0; l < nthreads; l++) {
994                final int firstRow = l * p;
995                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
996                futures[l] = ConcurrencyUtils.submit(new Runnable() {
997                    @Override
998                                        public void run() {
999                        for (int i = firstRow; i < lastRow; i++) {
1000                            fftColumns.realForward(a[i]);
1001                        }
1002                    }
1003                });
1004            }
1005            ConcurrencyUtils.waitForCompletion(futures);
1006
1007            for (int r = 0; r < rows; r++) {
1008                temp[0][r] = a[r][0]; //first column is always real
1009            }
1010            fftRows.realForwardFull(temp[0]);
1011
1012            p = (n2d2 - 2) / nthreads;
1013            for (int l = 0; l < nthreads; l++) {
1014                final int firstColumn = 1 + l * p;
1015                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1016                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1017                    @Override
1018                                        public void run() {
1019                        for (int c = firstColumn; c < lastColumn; c++) {
1020                            int idx2 = 2 * c;
1021                            for (int r = 0; r < rows; r++) {
1022                                int idx1 = 2 * r;
1023                                temp[c][idx1] = a[r][idx2];
1024                                temp[c][idx1 + 1] = a[r][idx2 + 1];
1025                            }
1026                            fftRows.complexForward(temp[c]);
1027                        }
1028                    }
1029                });
1030            }
1031            ConcurrencyUtils.waitForCompletion(futures);
1032
1033            if ((columns % 2) == 0) {
1034                for (int r = 0; r < rows; r++) {
1035                    temp[n2d2 - 1][r] = a[r][1];
1036                    //imaginary part = 0;
1037                }
1038                fftRows.realForwardFull(temp[n2d2 - 1]);
1039
1040            } else {
1041                for (int r = 0; r < rows; r++) {
1042                    int idx1 = 2 * r;
1043                    int idx2 = n2d2 - 1;
1044                    temp[idx2][idx1] = a[r][2 * idx2];
1045                    temp[idx2][idx1 + 1] = a[r][1];
1046                }
1047                fftRows.complexForward(temp[n2d2 - 1]);
1048
1049            }
1050
1051            p = rows / nthreads;
1052            for (int l = 0; l < nthreads; l++) {
1053                final int firstRow = l * p;
1054                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1055                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1056                    @Override
1057                                        public void run() {
1058                        for (int r = firstRow; r < lastRow; r++) {
1059                            int idx1 = 2 * r;
1060                            for (int c = 0; c < n2d2; c++) {
1061                                int idx2 = 2 * c;
1062                                a[r][idx2] = temp[c][idx1];
1063                                a[r][idx2 + 1] = temp[c][idx1 + 1];
1064                            }
1065                        }
1066                    }
1067                });
1068            }
1069            ConcurrencyUtils.waitForCompletion(futures);
1070
1071            for (int l = 0; l < nthreads; l++) {
1072                final int firstRow = 1 + l * p;
1073                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1074                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1075                    @Override
1076                                        public void run() {
1077                        for (int r = firstRow; r < lastRow; r++) {
1078                            int idx3 = rows - r;
1079                            for (int c = n2d2; c < columns; c++) {
1080                                int idx1 = 2 * c;
1081                                int idx2 = 2 * (columns - c);
1082                                a[0][idx1] = a[0][idx2];
1083                                a[0][idx1 + 1] = -a[0][idx2 + 1];
1084                                a[r][idx1] = a[idx3][idx2];
1085                                a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1086                            }
1087                        }
1088                    }
1089                });
1090            }
1091            ConcurrencyUtils.waitForCompletion(futures);
1092
1093        } else {
1094            for (int r = 0; r < rows; r++) {
1095                fftColumns.realForward(a[r]);
1096            }
1097
1098            for (int r = 0; r < rows; r++) {
1099                temp[0][r] = a[r][0]; //first column is always real
1100            }
1101            fftRows.realForwardFull(temp[0]);
1102
1103            for (int c = 1; c < n2d2 - 1; c++) {
1104                int idx2 = 2 * c;
1105                for (int r = 0; r < rows; r++) {
1106                    int idx1 = 2 * r;
1107                    temp[c][idx1] = a[r][idx2];
1108                    temp[c][idx1 + 1] = a[r][idx2 + 1];
1109                }
1110                fftRows.complexForward(temp[c]);
1111            }
1112
1113            if ((columns % 2) == 0) {
1114                for (int r = 0; r < rows; r++) {
1115                    temp[n2d2 - 1][r] = a[r][1];
1116                    //imaginary part = 0;
1117                }
1118                fftRows.realForwardFull(temp[n2d2 - 1]);
1119
1120            } else {
1121                for (int r = 0; r < rows; r++) {
1122                    int idx1 = 2 * r;
1123                    int idx2 = n2d2 - 1;
1124                    temp[idx2][idx1] = a[r][2 * idx2];
1125                    temp[idx2][idx1 + 1] = a[r][1];
1126                }
1127                fftRows.complexForward(temp[n2d2 - 1]);
1128
1129            }
1130
1131            for (int r = 0; r < rows; r++) {
1132                int idx1 = 2 * r;
1133                for (int c = 0; c < n2d2; c++) {
1134                    int idx2 = 2 * c;
1135                    a[r][idx2] = temp[c][idx1];
1136                    a[r][idx2 + 1] = temp[c][idx1 + 1];
1137                }
1138            }
1139
1140            //fill symmetric
1141            for (int r = 1; r < rows; r++) {
1142                int idx3 = rows - r;
1143                for (int c = n2d2; c < columns; c++) {
1144                    int idx1 = 2 * c;
1145                    int idx2 = 2 * (columns - c);
1146                    a[0][idx1] = a[0][idx2];
1147                    a[0][idx1 + 1] = -a[0][idx2 + 1];
1148                    a[r][idx1] = a[idx3][idx2];
1149                    a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1150                }
1151            }
1152        }
1153    }
1154
1155    private void mixedRadixRealForwardFull(final float[] a) {
1156        final int rowStride = 2 * columns;
1157        final int n2d2 = columns / 2 + 1;
1158        final float[][] temp = new float[n2d2][2 * rows];
1159
1160        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1161        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1162            Future<?>[] futures = new Future[nthreads];
1163            int p = rows / nthreads;
1164            for (int l = 0; l < nthreads; l++) {
1165                final int firstRow = l * p;
1166                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1167                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1168                    @Override
1169                                        public void run() {
1170                        for (int i = firstRow; i < lastRow; i++) {
1171                            fftColumns.realForward(a, i * columns);
1172                        }
1173                    }
1174                });
1175            }
1176            ConcurrencyUtils.waitForCompletion(futures);
1177
1178            for (int r = 0; r < rows; r++) {
1179                temp[0][r] = a[r * columns]; //first column is always real
1180            }
1181            fftRows.realForwardFull(temp[0]);
1182
1183            p = (n2d2 - 2) / nthreads;
1184            for (int l = 0; l < nthreads; l++) {
1185                final int firstColumn = 1 + l * p;
1186                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1187                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1188                    @Override
1189                                        public void run() {
1190                        for (int c = firstColumn; c < lastColumn; c++) {
1191                            int idx0 = 2 * c;
1192                            for (int r = 0; r < rows; r++) {
1193                                int idx1 = 2 * r;
1194                                int idx2 = r * columns + idx0;
1195                                temp[c][idx1] = a[idx2];
1196                                temp[c][idx1 + 1] = a[idx2 + 1];
1197                            }
1198                            fftRows.complexForward(temp[c]);
1199                        }
1200                    }
1201                });
1202            }
1203            ConcurrencyUtils.waitForCompletion(futures);
1204
1205            if ((columns % 2) == 0) {
1206                for (int r = 0; r < rows; r++) {
1207                    temp[n2d2 - 1][r] = a[r * columns + 1];
1208                    //imaginary part = 0;
1209                }
1210                fftRows.realForwardFull(temp[n2d2 - 1]);
1211
1212            } else {
1213                for (int r = 0; r < rows; r++) {
1214                    int idx1 = 2 * r;
1215                    int idx2 = r * columns;
1216                    int idx3 = n2d2 - 1;
1217                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1218                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1219                }
1220                fftRows.complexForward(temp[n2d2 - 1]);
1221            }
1222
1223            p = rows / nthreads;
1224            for (int l = 0; l < nthreads; l++) {
1225                final int firstRow = l * p;
1226                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1227                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1228                    @Override
1229                                        public void run() {
1230                        for (int r = firstRow; r < lastRow; r++) {
1231                            int idx1 = 2 * r;
1232                            for (int c = 0; c < n2d2; c++) {
1233                                int idx0 = 2 * c;
1234                                int idx2 = r * rowStride + idx0;
1235                                a[idx2] = temp[c][idx1];
1236                                a[idx2 + 1] = temp[c][idx1 + 1];
1237                            }
1238                        }
1239                    }
1240                });
1241            }
1242            ConcurrencyUtils.waitForCompletion(futures);
1243
1244            for (int l = 0; l < nthreads; l++) {
1245                final int firstRow = 1 + l * p;
1246                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1247                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1248                    @Override
1249                                        public void run() {
1250                        for (int r = firstRow; r < lastRow; r++) {
1251                            int idx5 = r * rowStride;
1252                            int idx6 = (rows - r + 1) * rowStride;
1253                            for (int c = n2d2; c < columns; c++) {
1254                                int idx1 = 2 * c;
1255                                int idx2 = 2 * (columns - c);
1256                                a[idx1] = a[idx2];
1257                                a[idx1 + 1] = -a[idx2 + 1];
1258                                int idx3 = idx5 + idx1;
1259                                int idx4 = idx6 - idx1;
1260                                a[idx3] = a[idx4];
1261                                a[idx3 + 1] = -a[idx4 + 1];
1262                            }
1263                        }
1264                    }
1265                });
1266            }
1267            ConcurrencyUtils.waitForCompletion(futures);
1268        } else {
1269            for (int r = 0; r < rows; r++) {
1270                fftColumns.realForward(a, r * columns);
1271            }
1272            for (int r = 0; r < rows; r++) {
1273                temp[0][r] = a[r * columns]; //first column is always real
1274            }
1275            fftRows.realForwardFull(temp[0]);
1276
1277            for (int c = 1; c < n2d2 - 1; c++) {
1278                int idx0 = 2 * c;
1279                for (int r = 0; r < rows; r++) {
1280                    int idx1 = 2 * r;
1281                    int idx2 = r * columns + idx0;
1282                    temp[c][idx1] = a[idx2];
1283                    temp[c][idx1 + 1] = a[idx2 + 1];
1284                }
1285                fftRows.complexForward(temp[c]);
1286            }
1287
1288            if ((columns % 2) == 0) {
1289                for (int r = 0; r < rows; r++) {
1290                    temp[n2d2 - 1][r] = a[r * columns + 1];
1291                    //imaginary part = 0;
1292                }
1293                fftRows.realForwardFull(temp[n2d2 - 1]);
1294
1295            } else {
1296                for (int r = 0; r < rows; r++) {
1297                    int idx1 = 2 * r;
1298                    int idx2 = r * columns;
1299                    int idx3 = n2d2 - 1;
1300                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1301                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1302                }
1303                fftRows.complexForward(temp[n2d2 - 1]);
1304            }
1305
1306            for (int r = 0; r < rows; r++) {
1307                int idx1 = 2 * r;
1308                for (int c = 0; c < n2d2; c++) {
1309                    int idx0 = 2 * c;
1310                    int idx2 = r * rowStride + idx0;
1311                    a[idx2] = temp[c][idx1];
1312                    a[idx2 + 1] = temp[c][idx1 + 1];
1313                }
1314            }
1315
1316            //fill symmetric
1317            for (int r = 1; r < rows; r++) {
1318                int idx5 = r * rowStride;
1319                int idx6 = (rows - r + 1) * rowStride;
1320                for (int c = n2d2; c < columns; c++) {
1321                    int idx1 = 2 * c;
1322                    int idx2 = 2 * (columns - c);
1323                    a[idx1] = a[idx2];
1324                    a[idx1 + 1] = -a[idx2 + 1];
1325                    int idx3 = idx5 + idx1;
1326                    int idx4 = idx6 - idx1;
1327                    a[idx3] = a[idx4];
1328                    a[idx3 + 1] = -a[idx4 + 1];
1329                }
1330            }
1331        }
1332    }
1333
1334    private void mixedRadixRealInverseFull(final float[][] a, final boolean scale) {
1335        final int n2d2 = columns / 2 + 1;
1336        final float[][] temp = new float[n2d2][2 * rows];
1337
1338        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1339        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1340            Future<?>[] futures = new Future[nthreads];
1341            int p = rows / nthreads;
1342            for (int l = 0; l < nthreads; l++) {
1343                final int firstRow = l * p;
1344                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1345                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1346                    @Override
1347                                        public void run() {
1348                        for (int i = firstRow; i < lastRow; i++) {
1349                            fftColumns.realInverse2(a[i], 0, scale);
1350                        }
1351                    }
1352                });
1353            }
1354            ConcurrencyUtils.waitForCompletion(futures);
1355
1356            for (int r = 0; r < rows; r++) {
1357                temp[0][r] = a[r][0]; //first column is always real
1358            }
1359            fftRows.realInverseFull(temp[0], scale);
1360
1361            p = (n2d2 - 2) / nthreads;
1362            for (int l = 0; l < nthreads; l++) {
1363                final int firstColumn = 1 + l * p;
1364                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1365                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1366                    @Override
1367                                        public void run() {
1368                        for (int c = firstColumn; c < lastColumn; c++) {
1369                            int idx2 = 2 * c;
1370                            for (int r = 0; r < rows; r++) {
1371                                int idx1 = 2 * r;
1372                                temp[c][idx1] = a[r][idx2];
1373                                temp[c][idx1 + 1] = a[r][idx2 + 1];
1374                            }
1375                            fftRows.complexInverse(temp[c], scale);
1376                        }
1377                    }
1378                });
1379            }
1380            ConcurrencyUtils.waitForCompletion(futures);
1381
1382            if ((columns % 2) == 0) {
1383                for (int r = 0; r < rows; r++) {
1384                    temp[n2d2 - 1][r] = a[r][1];
1385                    //imaginary part = 0;
1386                }
1387                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1388
1389            } else {
1390                for (int r = 0; r < rows; r++) {
1391                    int idx1 = 2 * r;
1392                    int idx2 = n2d2 - 1;
1393                    temp[idx2][idx1] = a[r][2 * idx2];
1394                    temp[idx2][idx1 + 1] = a[r][1];
1395                }
1396                fftRows.complexInverse(temp[n2d2 - 1], scale);
1397
1398            }
1399
1400            p = rows / nthreads;
1401            for (int l = 0; l < nthreads; l++) {
1402                final int firstRow = l * p;
1403                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1404                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1405                    @Override
1406                                        public void run() {
1407                        for (int r = firstRow; r < lastRow; r++) {
1408                            int idx1 = 2 * r;
1409                            for (int c = 0; c < n2d2; c++) {
1410                                int idx2 = 2 * c;
1411                                a[r][idx2] = temp[c][idx1];
1412                                a[r][idx2 + 1] = temp[c][idx1 + 1];
1413                            }
1414                        }
1415                    }
1416                });
1417            }
1418            ConcurrencyUtils.waitForCompletion(futures);
1419
1420            for (int l = 0; l < nthreads; l++) {
1421                final int firstRow = 1 + l * p;
1422                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1423                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1424                    @Override
1425                                        public void run() {
1426                        for (int r = firstRow; r < lastRow; r++) {
1427                            int idx3 = rows - r;
1428                            for (int c = n2d2; c < columns; c++) {
1429                                int idx1 = 2 * c;
1430                                int idx2 = 2 * (columns - c);
1431                                a[0][idx1] = a[0][idx2];
1432                                a[0][idx1 + 1] = -a[0][idx2 + 1];
1433                                a[r][idx1] = a[idx3][idx2];
1434                                a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1435                            }
1436                        }
1437                    }
1438                });
1439            }
1440            ConcurrencyUtils.waitForCompletion(futures);
1441
1442        } else {
1443            for (int r = 0; r < rows; r++) {
1444                fftColumns.realInverse2(a[r], 0, scale);
1445            }
1446
1447            for (int r = 0; r < rows; r++) {
1448                temp[0][r] = a[r][0]; //first column is always real
1449            }
1450            fftRows.realInverseFull(temp[0], scale);
1451
1452            for (int c = 1; c < n2d2 - 1; c++) {
1453                int idx2 = 2 * c;
1454                for (int r = 0; r < rows; r++) {
1455                    int idx1 = 2 * r;
1456                    temp[c][idx1] = a[r][idx2];
1457                    temp[c][idx1 + 1] = a[r][idx2 + 1];
1458                }
1459                fftRows.complexInverse(temp[c], scale);
1460            }
1461
1462            if ((columns % 2) == 0) {
1463                for (int r = 0; r < rows; r++) {
1464                    temp[n2d2 - 1][r] = a[r][1];
1465                    //imaginary part = 0;
1466                }
1467                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1468
1469            } else {
1470                for (int r = 0; r < rows; r++) {
1471                    int idx1 = 2 * r;
1472                    int idx2 = n2d2 - 1;
1473                    temp[idx2][idx1] = a[r][2 * idx2];
1474                    temp[idx2][idx1 + 1] = a[r][1];
1475                }
1476                fftRows.complexInverse(temp[n2d2 - 1], scale);
1477
1478            }
1479
1480            for (int r = 0; r < rows; r++) {
1481                int idx1 = 2 * r;
1482                for (int c = 0; c < n2d2; c++) {
1483                    int idx2 = 2 * c;
1484                    a[r][idx2] = temp[c][idx1];
1485                    a[r][idx2 + 1] = temp[c][idx1 + 1];
1486                }
1487            }
1488
1489            //fill symmetric
1490            for (int r = 1; r < rows; r++) {
1491                int idx3 = rows - r;
1492                for (int c = n2d2; c < columns; c++) {
1493                    int idx1 = 2 * c;
1494                    int idx2 = 2 * (columns - c);
1495                    a[0][idx1] = a[0][idx2];
1496                    a[0][idx1 + 1] = -a[0][idx2 + 1];
1497                    a[r][idx1] = a[idx3][idx2];
1498                    a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1499                }
1500            }
1501        }
1502    }
1503
1504    private void mixedRadixRealInverseFull(final float[] a, final boolean scale) {
1505        final int rowStride = 2 * columns;
1506        final int n2d2 = columns / 2 + 1;
1507        final float[][] temp = new float[n2d2][2 * rows];
1508
1509        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1510        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1511            Future<?>[] futures = new Future[nthreads];
1512            int p = rows / nthreads;
1513            for (int l = 0; l < nthreads; l++) {
1514                final int firstRow = l * p;
1515                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1516                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1517                    @Override
1518                                        public void run() {
1519                        for (int i = firstRow; i < lastRow; i++) {
1520                            fftColumns.realInverse2(a, i * columns, scale);
1521                        }
1522                    }
1523                });
1524            }
1525            ConcurrencyUtils.waitForCompletion(futures);
1526
1527            for (int r = 0; r < rows; r++) {
1528                temp[0][r] = a[r * columns]; //first column is always real
1529            }
1530            fftRows.realInverseFull(temp[0], scale);
1531
1532            p = (n2d2 - 2) / nthreads;
1533            for (int l = 0; l < nthreads; l++) {
1534                final int firstColumn = 1 + l * p;
1535                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1536                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1537                    @Override
1538                                        public void run() {
1539                        for (int c = firstColumn; c < lastColumn; c++) {
1540                            int idx0 = 2 * c;
1541                            for (int r = 0; r < rows; r++) {
1542                                int idx1 = 2 * r;
1543                                int idx2 = r * columns + idx0;
1544                                temp[c][idx1] = a[idx2];
1545                                temp[c][idx1 + 1] = a[idx2 + 1];
1546                            }
1547                            fftRows.complexInverse(temp[c], scale);
1548                        }
1549                    }
1550                });
1551            }
1552            ConcurrencyUtils.waitForCompletion(futures);
1553
1554            if ((columns % 2) == 0) {
1555                for (int r = 0; r < rows; r++) {
1556                    temp[n2d2 - 1][r] = a[r * columns + 1];
1557                    //imaginary part = 0;
1558                }
1559                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1560
1561            } else {
1562                for (int r = 0; r < rows; r++) {
1563                    int idx1 = 2 * r;
1564                    int idx2 = r * columns;
1565                    int idx3 = n2d2 - 1;
1566                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1567                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1568                }
1569                fftRows.complexInverse(temp[n2d2 - 1], scale);
1570            }
1571
1572            p = rows / nthreads;
1573            for (int l = 0; l < nthreads; l++) {
1574                final int firstRow = l * p;
1575                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1576                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1577                    @Override
1578                                        public void run() {
1579                        for (int r = firstRow; r < lastRow; r++) {
1580                            int idx1 = 2 * r;
1581                            for (int c = 0; c < n2d2; c++) {
1582                                int idx0 = 2 * c;
1583                                int idx2 = r * rowStride + idx0;
1584                                a[idx2] = temp[c][idx1];
1585                                a[idx2 + 1] = temp[c][idx1 + 1];
1586                            }
1587                        }
1588                    }
1589                });
1590            }
1591            ConcurrencyUtils.waitForCompletion(futures);
1592
1593            for (int l = 0; l < nthreads; l++) {
1594                final int firstRow = 1 + l * p;
1595                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1596                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1597                    @Override
1598                                        public void run() {
1599                        for (int r = firstRow; r < lastRow; r++) {
1600                            int idx5 = r * rowStride;
1601                            int idx6 = (rows - r + 1) * rowStride;
1602                            for (int c = n2d2; c < columns; c++) {
1603                                int idx1 = 2 * c;
1604                                int idx2 = 2 * (columns - c);
1605                                a[idx1] = a[idx2];
1606                                a[idx1 + 1] = -a[idx2 + 1];
1607                                int idx3 = idx5 + idx1;
1608                                int idx4 = idx6 - idx1;
1609                                a[idx3] = a[idx4];
1610                                a[idx3 + 1] = -a[idx4 + 1];
1611                            }
1612                        }
1613                    }
1614                });
1615            }
1616            ConcurrencyUtils.waitForCompletion(futures);
1617        } else {
1618            for (int r = 0; r < rows; r++) {
1619                fftColumns.realInverse2(a, r * columns, scale);
1620            }
1621            for (int r = 0; r < rows; r++) {
1622                temp[0][r] = a[r * columns]; //first column is always real
1623            }
1624            fftRows.realInverseFull(temp[0], scale);
1625
1626            for (int c = 1; c < n2d2 - 1; c++) {
1627                int idx0 = 2 * c;
1628                for (int r = 0; r < rows; r++) {
1629                    int idx1 = 2 * r;
1630                    int idx2 = r * columns + idx0;
1631                    temp[c][idx1] = a[idx2];
1632                    temp[c][idx1 + 1] = a[idx2 + 1];
1633                }
1634                fftRows.complexInverse(temp[c], scale);
1635            }
1636
1637            if ((columns % 2) == 0) {
1638                for (int r = 0; r < rows; r++) {
1639                    temp[n2d2 - 1][r] = a[r * columns + 1];
1640                    //imaginary part = 0;
1641                }
1642                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1643
1644            } else {
1645                for (int r = 0; r < rows; r++) {
1646                    int idx1 = 2 * r;
1647                    int idx2 = r * columns;
1648                    int idx3 = n2d2 - 1;
1649                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1650                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1651                }
1652                fftRows.complexInverse(temp[n2d2 - 1], scale);
1653            }
1654
1655            for (int r = 0; r < rows; r++) {
1656                int idx1 = 2 * r;
1657                for (int c = 0; c < n2d2; c++) {
1658                    int idx0 = 2 * c;
1659                    int idx2 = r * rowStride + idx0;
1660                    a[idx2] = temp[c][idx1];
1661                    a[idx2 + 1] = temp[c][idx1 + 1];
1662                }
1663            }
1664
1665            //fill symmetric
1666            for (int r = 1; r < rows; r++) {
1667                int idx5 = r * rowStride;
1668                int idx6 = (rows - r + 1) * rowStride;
1669                for (int c = n2d2; c < columns; c++) {
1670                    int idx1 = 2 * c;
1671                    int idx2 = 2 * (columns - c);
1672                    a[idx1] = a[idx2];
1673                    a[idx1 + 1] = -a[idx2 + 1];
1674                    int idx3 = idx5 + idx1;
1675                    int idx4 = idx6 - idx1;
1676                    a[idx3] = a[idx4];
1677                    a[idx3 + 1] = -a[idx4 + 1];
1678                }
1679            }
1680        }
1681    }
1682
1683    private void rdft2d_sub(int isgn, float[] a) {
1684        int n1h, j;
1685        float xi;
1686        int idx1, idx2;
1687
1688        n1h = rows >> 1;
1689        if (isgn < 0) {
1690            for (int i = 1; i < n1h; i++) {
1691                j = rows - i;
1692                idx1 = i * columns;
1693                idx2 = j * columns;
1694                xi = a[idx1] - a[idx2];
1695                a[idx1] += a[idx2];
1696                a[idx2] = xi;
1697                xi = a[idx2 + 1] - a[idx1 + 1];
1698                a[idx1 + 1] += a[idx2 + 1];
1699                a[idx2 + 1] = xi;
1700            }
1701        } else {
1702            for (int i = 1; i < n1h; i++) {
1703                j = rows - i;
1704                idx1 = i * columns;
1705                idx2 = j * columns;
1706                a[idx2] = 0.5f * (a[idx1] - a[idx2]);
1707                a[idx1] -= a[idx2];
1708                a[idx2 + 1] = 0.5f * (a[idx1 + 1] + a[idx2 + 1]);
1709                a[idx1 + 1] -= a[idx2 + 1];
1710            }
1711        }
1712    }
1713
1714    private void rdft2d_sub(int isgn, float[][] a) {
1715        int n1h, j;
1716        float xi;
1717
1718        n1h = rows >> 1;
1719        if (isgn < 0) {
1720            for (int i = 1; i < n1h; i++) {
1721                j = rows - i;
1722                xi = a[i][0] - a[j][0];
1723                a[i][0] += a[j][0];
1724                a[j][0] = xi;
1725                xi = a[j][1] - a[i][1];
1726                a[i][1] += a[j][1];
1727                a[j][1] = xi;
1728            }
1729        } else {
1730            for (int i = 1; i < n1h; i++) {
1731                j = rows - i;
1732                a[j][0] = 0.5f * (a[i][0] - a[j][0]);
1733                a[i][0] -= a[j][0];
1734                a[j][1] = 0.5f * (a[i][1] + a[j][1]);
1735                a[i][1] -= a[j][1];
1736            }
1737        }
1738    }
1739
1740    private void cdft2d_sub(int isgn, float[] a, boolean scale) {
1741        int idx1, idx2, idx3, idx4, idx5;
1742        if (isgn == -1) {
1743            if (columns > 4) {
1744                for (int c = 0; c < columns; c += 8) {
1745                    for (int r = 0; r < rows; r++) {
1746                        idx1 = r * columns + c;
1747                        idx2 = 2 * r;
1748                        idx3 = 2 * rows + 2 * r;
1749                        idx4 = idx3 + 2 * rows;
1750                        idx5 = idx4 + 2 * rows;
1751                        t[idx2] = a[idx1];
1752                        t[idx2 + 1] = a[idx1 + 1];
1753                        t[idx3] = a[idx1 + 2];
1754                        t[idx3 + 1] = a[idx1 + 3];
1755                        t[idx4] = a[idx1 + 4];
1756                        t[idx4 + 1] = a[idx1 + 5];
1757                        t[idx5] = a[idx1 + 6];
1758                        t[idx5 + 1] = a[idx1 + 7];
1759                    }
1760                    fftRows.complexForward(t, 0);
1761                    fftRows.complexForward(t, 2 * rows);
1762                    fftRows.complexForward(t, 4 * rows);
1763                    fftRows.complexForward(t, 6 * rows);
1764                    for (int r = 0; r < rows; r++) {
1765                        idx1 = r * columns + c;
1766                        idx2 = 2 * r;
1767                        idx3 = 2 * rows + 2 * r;
1768                        idx4 = idx3 + 2 * rows;
1769                        idx5 = idx4 + 2 * rows;
1770                        a[idx1] = t[idx2];
1771                        a[idx1 + 1] = t[idx2 + 1];
1772                        a[idx1 + 2] = t[idx3];
1773                        a[idx1 + 3] = t[idx3 + 1];
1774                        a[idx1 + 4] = t[idx4];
1775                        a[idx1 + 5] = t[idx4 + 1];
1776                        a[idx1 + 6] = t[idx5];
1777                        a[idx1 + 7] = t[idx5 + 1];
1778                    }
1779                }
1780            } else if (columns == 4) {
1781                for (int r = 0; r < rows; r++) {
1782                    idx1 = r * columns;
1783                    idx2 = 2 * r;
1784                    idx3 = 2 * rows + 2 * r;
1785                    t[idx2] = a[idx1];
1786                    t[idx2 + 1] = a[idx1 + 1];
1787                    t[idx3] = a[idx1 + 2];
1788                    t[idx3 + 1] = a[idx1 + 3];
1789                }
1790                fftRows.complexForward(t, 0);
1791                fftRows.complexForward(t, 2 * rows);
1792                for (int r = 0; r < rows; r++) {
1793                    idx1 = r * columns;
1794                    idx2 = 2 * r;
1795                    idx3 = 2 * rows + 2 * r;
1796                    a[idx1] = t[idx2];
1797                    a[idx1 + 1] = t[idx2 + 1];
1798                    a[idx1 + 2] = t[idx3];
1799                    a[idx1 + 3] = t[idx3 + 1];
1800                }
1801            } else if (columns == 2) {
1802                for (int r = 0; r < rows; r++) {
1803                    idx1 = r * columns;
1804                    idx2 = 2 * r;
1805                    t[idx2] = a[idx1];
1806                    t[idx2 + 1] = a[idx1 + 1];
1807                }
1808                fftRows.complexForward(t, 0);
1809                for (int r = 0; r < rows; r++) {
1810                    idx1 = r * columns;
1811                    idx2 = 2 * r;
1812                    a[idx1] = t[idx2];
1813                    a[idx1 + 1] = t[idx2 + 1];
1814                }
1815            }
1816        } else {
1817            if (columns > 4) {
1818                for (int c = 0; c < columns; c += 8) {
1819                    for (int r = 0; r < rows; r++) {
1820                        idx1 = r * columns + c;
1821                        idx2 = 2 * r;
1822                        idx3 = 2 * rows + 2 * r;
1823                        idx4 = idx3 + 2 * rows;
1824                        idx5 = idx4 + 2 * rows;
1825                        t[idx2] = a[idx1];
1826                        t[idx2 + 1] = a[idx1 + 1];
1827                        t[idx3] = a[idx1 + 2];
1828                        t[idx3 + 1] = a[idx1 + 3];
1829                        t[idx4] = a[idx1 + 4];
1830                        t[idx4 + 1] = a[idx1 + 5];
1831                        t[idx5] = a[idx1 + 6];
1832                        t[idx5 + 1] = a[idx1 + 7];
1833                    }
1834                    fftRows.complexInverse(t, 0, scale);
1835                    fftRows.complexInverse(t, 2 * rows, scale);
1836                    fftRows.complexInverse(t, 4 * rows, scale);
1837                    fftRows.complexInverse(t, 6 * rows, scale);
1838                    for (int r = 0; r < rows; r++) {
1839                        idx1 = r * columns + c;
1840                        idx2 = 2 * r;
1841                        idx3 = 2 * rows + 2 * r;
1842                        idx4 = idx3 + 2 * rows;
1843                        idx5 = idx4 + 2 * rows;
1844                        a[idx1] = t[idx2];
1845                        a[idx1 + 1] = t[idx2 + 1];
1846                        a[idx1 + 2] = t[idx3];
1847                        a[idx1 + 3] = t[idx3 + 1];
1848                        a[idx1 + 4] = t[idx4];
1849                        a[idx1 + 5] = t[idx4 + 1];
1850                        a[idx1 + 6] = t[idx5];
1851                        a[idx1 + 7] = t[idx5 + 1];
1852                    }
1853                }
1854            } else if (columns == 4) {
1855                for (int r = 0; r < rows; r++) {
1856                    idx1 = r * columns;
1857                    idx2 = 2 * r;
1858                    idx3 = 2 * rows + 2 * r;
1859                    t[idx2] = a[idx1];
1860                    t[idx2 + 1] = a[idx1 + 1];
1861                    t[idx3] = a[idx1 + 2];
1862                    t[idx3 + 1] = a[idx1 + 3];
1863                }
1864                fftRows.complexInverse(t, 0, scale);
1865                fftRows.complexInverse(t, 2 * rows, scale);
1866                for (int r = 0; r < rows; r++) {
1867                    idx1 = r * columns;
1868                    idx2 = 2 * r;
1869                    idx3 = 2 * rows + 2 * r;
1870                    a[idx1] = t[idx2];
1871                    a[idx1 + 1] = t[idx2 + 1];
1872                    a[idx1 + 2] = t[idx3];
1873                    a[idx1 + 3] = t[idx3 + 1];
1874                }
1875            } else if (columns == 2) {
1876                for (int r = 0; r < rows; r++) {
1877                    idx1 = r * columns;
1878                    idx2 = 2 * r;
1879                    t[idx2] = a[idx1];
1880                    t[idx2 + 1] = a[idx1 + 1];
1881                }
1882                fftRows.complexInverse(t, 0, scale);
1883                for (int r = 0; r < rows; r++) {
1884                    idx1 = r * columns;
1885                    idx2 = 2 * r;
1886                    a[idx1] = t[idx2];
1887                    a[idx1 + 1] = t[idx2 + 1];
1888                }
1889            }
1890        }
1891    }
1892
1893    private void cdft2d_sub(int isgn, float[][] a, boolean scale) {
1894        int idx2, idx3, idx4, idx5;
1895        if (isgn == -1) {
1896            if (columns > 4) {
1897                for (int c = 0; c < columns; c += 8) {
1898                    for (int r = 0; r < rows; r++) {
1899                        idx2 = 2 * r;
1900                        idx3 = 2 * rows + 2 * r;
1901                        idx4 = idx3 + 2 * rows;
1902                        idx5 = idx4 + 2 * rows;
1903                        t[idx2] = a[r][c];
1904                        t[idx2 + 1] = a[r][c + 1];
1905                        t[idx3] = a[r][c + 2];
1906                        t[idx3 + 1] = a[r][c + 3];
1907                        t[idx4] = a[r][c + 4];
1908                        t[idx4 + 1] = a[r][c + 5];
1909                        t[idx5] = a[r][c + 6];
1910                        t[idx5 + 1] = a[r][c + 7];
1911                    }
1912                    fftRows.complexForward(t, 0);
1913                    fftRows.complexForward(t, 2 * rows);
1914                    fftRows.complexForward(t, 4 * rows);
1915                    fftRows.complexForward(t, 6 * rows);
1916                    for (int r = 0; r < rows; r++) {
1917                        idx2 = 2 * r;
1918                        idx3 = 2 * rows + 2 * r;
1919                        idx4 = idx3 + 2 * rows;
1920                        idx5 = idx4 + 2 * rows;
1921                        a[r][c] = t[idx2];
1922                        a[r][c + 1] = t[idx2 + 1];
1923                        a[r][c + 2] = t[idx3];
1924                        a[r][c + 3] = t[idx3 + 1];
1925                        a[r][c + 4] = t[idx4];
1926                        a[r][c + 5] = t[idx4 + 1];
1927                        a[r][c + 6] = t[idx5];
1928                        a[r][c + 7] = t[idx5 + 1];
1929                    }
1930                }
1931            } else if (columns == 4) {
1932                for (int r = 0; r < rows; r++) {
1933                    idx2 = 2 * r;
1934                    idx3 = 2 * rows + 2 * r;
1935                    t[idx2] = a[r][0];
1936                    t[idx2 + 1] = a[r][1];
1937                    t[idx3] = a[r][2];
1938                    t[idx3 + 1] = a[r][3];
1939                }
1940                fftRows.complexForward(t, 0);
1941                fftRows.complexForward(t, 2 * rows);
1942                for (int r = 0; r < rows; r++) {
1943                    idx2 = 2 * r;
1944                    idx3 = 2 * rows + 2 * r;
1945                    a[r][0] = t[idx2];
1946                    a[r][1] = t[idx2 + 1];
1947                    a[r][2] = t[idx3];
1948                    a[r][3] = t[idx3 + 1];
1949                }
1950            } else if (columns == 2) {
1951                for (int r = 0; r < rows; r++) {
1952                    idx2 = 2 * r;
1953                    t[idx2] = a[r][0];
1954                    t[idx2 + 1] = a[r][1];
1955                }
1956                fftRows.complexForward(t, 0);
1957                for (int r = 0; r < rows; r++) {
1958                    idx2 = 2 * r;
1959                    a[r][0] = t[idx2];
1960                    a[r][1] = t[idx2 + 1];
1961                }
1962            }
1963        } else {
1964            if (columns > 4) {
1965                for (int c = 0; c < columns; c += 8) {
1966                    for (int r = 0; r < rows; r++) {
1967                        idx2 = 2 * r;
1968                        idx3 = 2 * rows + 2 * r;
1969                        idx4 = idx3 + 2 * rows;
1970                        idx5 = idx4 + 2 * rows;
1971                        t[idx2] = a[r][c];
1972                        t[idx2 + 1] = a[r][c + 1];
1973                        t[idx3] = a[r][c + 2];
1974                        t[idx3 + 1] = a[r][c + 3];
1975                        t[idx4] = a[r][c + 4];
1976                        t[idx4 + 1] = a[r][c + 5];
1977                        t[idx5] = a[r][c + 6];
1978                        t[idx5 + 1] = a[r][c + 7];
1979                    }
1980                    fftRows.complexInverse(t, 0, scale);
1981                    fftRows.complexInverse(t, 2 * rows, scale);
1982                    fftRows.complexInverse(t, 4 * rows, scale);
1983                    fftRows.complexInverse(t, 6 * rows, scale);
1984                    for (int r = 0; r < rows; r++) {
1985                        idx2 = 2 * r;
1986                        idx3 = 2 * rows + 2 * r;
1987                        idx4 = idx3 + 2 * rows;
1988                        idx5 = idx4 + 2 * rows;
1989                        a[r][c] = t[idx2];
1990                        a[r][c + 1] = t[idx2 + 1];
1991                        a[r][c + 2] = t[idx3];
1992                        a[r][c + 3] = t[idx3 + 1];
1993                        a[r][c + 4] = t[idx4];
1994                        a[r][c + 5] = t[idx4 + 1];
1995                        a[r][c + 6] = t[idx5];
1996                        a[r][c + 7] = t[idx5 + 1];
1997                    }
1998                }
1999            } else if (columns == 4) {
2000                for (int r = 0; r < rows; r++) {
2001                    idx2 = 2 * r;
2002                    idx3 = 2 * rows + 2 * r;
2003                    t[idx2] = a[r][0];
2004                    t[idx2 + 1] = a[r][1];
2005                    t[idx3] = a[r][2];
2006                    t[idx3 + 1] = a[r][3];
2007                }
2008                fftRows.complexInverse(t, 0, scale);
2009                fftRows.complexInverse(t, 2 * rows, scale);
2010                for (int r = 0; r < rows; r++) {
2011                    idx2 = 2 * r;
2012                    idx3 = 2 * rows + 2 * r;
2013                    a[r][0] = t[idx2];
2014                    a[r][1] = t[idx2 + 1];
2015                    a[r][2] = t[idx3];
2016                    a[r][3] = t[idx3 + 1];
2017                }
2018            } else if (columns == 2) {
2019                for (int r = 0; r < rows; r++) {
2020                    idx2 = 2 * r;
2021                    t[idx2] = a[r][0];
2022                    t[idx2 + 1] = a[r][1];
2023                }
2024                fftRows.complexInverse(t, 0, scale);
2025                for (int r = 0; r < rows; r++) {
2026                    idx2 = 2 * r;
2027                    a[r][0] = t[idx2];
2028                    a[r][1] = t[idx2 + 1];
2029                }
2030            }
2031        }
2032    }
2033
2034    private void xdft2d0_subth1(final int icr, final int isgn, final float[] a, final boolean scale) {
2035        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2036
2037        Future<?>[] futures = new Future[nthreads];
2038        for (int i = 0; i < nthreads; i++) {
2039            final int n0 = i;
2040            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2041                @Override
2042                                public void run() {
2043                    if (icr == 0) {
2044                        if (isgn == -1) {
2045                            for (int r = n0; r < rows; r += nthreads) {
2046                                fftColumns.complexForward(a, r * columns);
2047                            }
2048                        } else {
2049                            for (int r = n0; r < rows; r += nthreads) {
2050                                fftColumns.complexInverse(a, r * columns, scale);
2051                            }
2052                        }
2053                    } else {
2054                        if (isgn == 1) {
2055                            for (int r = n0; r < rows; r += nthreads) {
2056                                fftColumns.realForward(a, r * columns);
2057                            }
2058                        } else {
2059                            for (int r = n0; r < rows; r += nthreads) {
2060                                fftColumns.realInverse(a, r * columns, scale);
2061                            }
2062                        }
2063                    }
2064                }
2065            });
2066        }
2067        ConcurrencyUtils.waitForCompletion(futures);
2068    }
2069
2070    private void xdft2d0_subth2(final int icr, final int isgn, final float[] a, final boolean scale) {
2071        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2072
2073        Future<?>[] futures = new Future[nthreads];
2074        for (int i = 0; i < nthreads; i++) {
2075            final int n0 = i;
2076            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2077                @Override
2078                                public void run() {
2079                    if (icr == 0) {
2080                        if (isgn == -1) {
2081                            for (int r = n0; r < rows; r += nthreads) {
2082                                fftColumns.complexForward(a, r * columns);
2083                            }
2084                        } else {
2085                            for (int r = n0; r < rows; r += nthreads) {
2086                                fftColumns.complexInverse(a, r * columns, scale);
2087                            }
2088                        }
2089                    } else {
2090                        if (isgn == 1) {
2091                            for (int r = n0; r < rows; r += nthreads) {
2092                                fftColumns.realForward(a, r * columns);
2093                            }
2094                        } else {
2095                            for (int r = n0; r < rows; r += nthreads) {
2096                                fftColumns.realInverse2(a, r * columns, scale);
2097                            }
2098                        }
2099                    }
2100                }
2101            });
2102        }
2103        ConcurrencyUtils.waitForCompletion(futures);
2104    }
2105
2106    private void xdft2d0_subth1(final int icr, final int isgn, final float[][] a, final boolean scale) {
2107        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2108
2109        Future<?>[] futures = new Future[nthreads];
2110        for (int i = 0; i < nthreads; i++) {
2111            final int n0 = i;
2112            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2113                @Override
2114                                public void run() {
2115                    if (icr == 0) {
2116                        if (isgn == -1) {
2117                            for (int r = n0; r < rows; r += nthreads) {
2118                                fftColumns.complexForward(a[r]);
2119                            }
2120                        } else {
2121                            for (int r = n0; r < rows; r += nthreads) {
2122                                fftColumns.complexInverse(a[r], scale);
2123                            }
2124                        }
2125                    } else {
2126                        if (isgn == 1) {
2127                            for (int r = n0; r < rows; r += nthreads) {
2128                                fftColumns.realForward(a[r]);
2129                            }
2130                        } else {
2131                            for (int r = n0; r < rows; r += nthreads) {
2132                                fftColumns.realInverse(a[r], scale);
2133                            }
2134                        }
2135                    }
2136                }
2137            });
2138        }
2139        ConcurrencyUtils.waitForCompletion(futures);
2140    }
2141
2142    private void xdft2d0_subth2(final int icr, final int isgn, final float[][] a, final boolean scale) {
2143        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2144
2145        Future<?>[] futures = new Future[nthreads];
2146        for (int i = 0; i < nthreads; i++) {
2147            final int n0 = i;
2148            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2149                @Override
2150                                public void run() {
2151                    if (icr == 0) {
2152                        if (isgn == -1) {
2153                            for (int r = n0; r < rows; r += nthreads) {
2154                                fftColumns.complexForward(a[r]);
2155                            }
2156                        } else {
2157                            for (int r = n0; r < rows; r += nthreads) {
2158                                fftColumns.complexInverse(a[r], scale);
2159                            }
2160                        }
2161                    } else {
2162                        if (isgn == 1) {
2163                            for (int r = n0; r < rows; r += nthreads) {
2164                                fftColumns.realForward(a[r]);
2165                            }
2166                        } else {
2167                            for (int r = n0; r < rows; r += nthreads) {
2168                                fftColumns.realInverse2(a[r], 0, scale);
2169                            }
2170                        }
2171                    }
2172                }
2173            });
2174        }
2175        ConcurrencyUtils.waitForCompletion(futures);
2176    }
2177
2178    private void cdft2d_subth(final int isgn, final float[] a, final boolean scale) {
2179        int nthread = ConcurrencyUtils.getNumberOfThreads();
2180        int nt = 8 * rows;
2181        if (columns == 4 * nthread) {
2182            nt >>= 1;
2183        } else if (columns < 4 * nthread) {
2184            nthread = columns >> 1;
2185            nt >>= 2;
2186        }
2187        Future<?>[] futures = new Future[nthread];
2188        final int nthreads = nthread;
2189        for (int i = 0; i < nthread; i++) {
2190            final int n0 = i;
2191            final int startt = nt * i;
2192            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2193                @Override
2194                                public void run() {
2195                    int idx1, idx2, idx3, idx4, idx5;
2196                    if (isgn == -1) {
2197                        if (columns > 4 * nthreads) {
2198                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2199                                for (int r = 0; r < rows; r++) {
2200                                    idx1 = r * columns + c;
2201                                    idx2 = startt + 2 * r;
2202                                    idx3 = startt + 2 * rows + 2 * r;
2203                                    idx4 = idx3 + 2 * rows;
2204                                    idx5 = idx4 + 2 * rows;
2205                                    t[idx2] = a[idx1];
2206                                    t[idx2 + 1] = a[idx1 + 1];
2207                                    t[idx3] = a[idx1 + 2];
2208                                    t[idx3 + 1] = a[idx1 + 3];
2209                                    t[idx4] = a[idx1 + 4];
2210                                    t[idx4 + 1] = a[idx1 + 5];
2211                                    t[idx5] = a[idx1 + 6];
2212                                    t[idx5 + 1] = a[idx1 + 7];
2213                                }
2214                                fftRows.complexForward(t, startt);
2215                                fftRows.complexForward(t, startt + 2 * rows);
2216                                fftRows.complexForward(t, startt + 4 * rows);
2217                                fftRows.complexForward(t, startt + 6 * rows);
2218                                for (int r = 0; r < rows; r++) {
2219                                    idx1 = r * columns + c;
2220                                    idx2 = startt + 2 * r;
2221                                    idx3 = startt + 2 * rows + 2 * r;
2222                                    idx4 = idx3 + 2 * rows;
2223                                    idx5 = idx4 + 2 * rows;
2224                                    a[idx1] = t[idx2];
2225                                    a[idx1 + 1] = t[idx2 + 1];
2226                                    a[idx1 + 2] = t[idx3];
2227                                    a[idx1 + 3] = t[idx3 + 1];
2228                                    a[idx1 + 4] = t[idx4];
2229                                    a[idx1 + 5] = t[idx4 + 1];
2230                                    a[idx1 + 6] = t[idx5];
2231                                    a[idx1 + 7] = t[idx5 + 1];
2232                                }
2233                            }
2234                        } else if (columns == 4 * nthreads) {
2235                            for (int r = 0; r < rows; r++) {
2236                                idx1 = r * columns + 4 * n0;
2237                                idx2 = startt + 2 * r;
2238                                idx3 = startt + 2 * rows + 2 * r;
2239                                t[idx2] = a[idx1];
2240                                t[idx2 + 1] = a[idx1 + 1];
2241                                t[idx3] = a[idx1 + 2];
2242                                t[idx3 + 1] = a[idx1 + 3];
2243                            }
2244                            fftRows.complexForward(t, startt);
2245                            fftRows.complexForward(t, startt + 2 * rows);
2246                            for (int r = 0; r < rows; r++) {
2247                                idx1 = r * columns + 4 * n0;
2248                                idx2 = startt + 2 * r;
2249                                idx3 = startt + 2 * rows + 2 * r;
2250                                a[idx1] = t[idx2];
2251                                a[idx1 + 1] = t[idx2 + 1];
2252                                a[idx1 + 2] = t[idx3];
2253                                a[idx1 + 3] = t[idx3 + 1];
2254                            }
2255                        } else if (columns == 2 * nthreads) {
2256                            for (int r = 0; r < rows; r++) {
2257                                idx1 = r * columns + 2 * n0;
2258                                idx2 = startt + 2 * r;
2259                                t[idx2] = a[idx1];
2260                                t[idx2 + 1] = a[idx1 + 1];
2261                            }
2262                            fftRows.complexForward(t, startt);
2263                            for (int r = 0; r < rows; r++) {
2264                                idx1 = r * columns + 2 * n0;
2265                                idx2 = startt + 2 * r;
2266                                a[idx1] = t[idx2];
2267                                a[idx1 + 1] = t[idx2 + 1];
2268                            }
2269                        }
2270                    } else {
2271                        if (columns > 4 * nthreads) {
2272                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2273                                for (int r = 0; r < rows; r++) {
2274                                    idx1 = r * columns + c;
2275                                    idx2 = startt + 2 * r;
2276                                    idx3 = startt + 2 * rows + 2 * r;
2277                                    idx4 = idx3 + 2 * rows;
2278                                    idx5 = idx4 + 2 * rows;
2279                                    t[idx2] = a[idx1];
2280                                    t[idx2 + 1] = a[idx1 + 1];
2281                                    t[idx3] = a[idx1 + 2];
2282                                    t[idx3 + 1] = a[idx1 + 3];
2283                                    t[idx4] = a[idx1 + 4];
2284                                    t[idx4 + 1] = a[idx1 + 5];
2285                                    t[idx5] = a[idx1 + 6];
2286                                    t[idx5 + 1] = a[idx1 + 7];
2287                                }
2288                                fftRows.complexInverse(t, startt, scale);
2289                                fftRows.complexInverse(t, startt + 2 * rows, scale);
2290                                fftRows.complexInverse(t, startt + 4 * rows, scale);
2291                                fftRows.complexInverse(t, startt + 6 * rows, scale);
2292                                for (int r = 0; r < rows; r++) {
2293                                    idx1 = r * columns + c;
2294                                    idx2 = startt + 2 * r;
2295                                    idx3 = startt + 2 * rows + 2 * r;
2296                                    idx4 = idx3 + 2 * rows;
2297                                    idx5 = idx4 + 2 * rows;
2298                                    a[idx1] = t[idx2];
2299                                    a[idx1 + 1] = t[idx2 + 1];
2300                                    a[idx1 + 2] = t[idx3];
2301                                    a[idx1 + 3] = t[idx3 + 1];
2302                                    a[idx1 + 4] = t[idx4];
2303                                    a[idx1 + 5] = t[idx4 + 1];
2304                                    a[idx1 + 6] = t[idx5];
2305                                    a[idx1 + 7] = t[idx5 + 1];
2306                                }
2307                            }
2308                        } else if (columns == 4 * nthreads) {
2309                            for (int r = 0; r < rows; r++) {
2310                                idx1 = r * columns + 4 * n0;
2311                                idx2 = startt + 2 * r;
2312                                idx3 = startt + 2 * rows + 2 * r;
2313                                t[idx2] = a[idx1];
2314                                t[idx2 + 1] = a[idx1 + 1];
2315                                t[idx3] = a[idx1 + 2];
2316                                t[idx3 + 1] = a[idx1 + 3];
2317                            }
2318                            fftRows.complexInverse(t, startt, scale);
2319                            fftRows.complexInverse(t, startt + 2 * rows, scale);
2320                            for (int r = 0; r < rows; r++) {
2321                                idx1 = r * columns + 4 * n0;
2322                                idx2 = startt + 2 * r;
2323                                idx3 = startt + 2 * rows + 2 * r;
2324                                a[idx1] = t[idx2];
2325                                a[idx1 + 1] = t[idx2 + 1];
2326                                a[idx1 + 2] = t[idx3];
2327                                a[idx1 + 3] = t[idx3 + 1];
2328                            }
2329                        } else if (columns == 2 * nthreads) {
2330                            for (int r = 0; r < rows; r++) {
2331                                idx1 = r * columns + 2 * n0;
2332                                idx2 = startt + 2 * r;
2333                                t[idx2] = a[idx1];
2334                                t[idx2 + 1] = a[idx1 + 1];
2335                            }
2336                            fftRows.complexInverse(t, startt, scale);
2337                            for (int r = 0; r < rows; r++) {
2338                                idx1 = r * columns + 2 * n0;
2339                                idx2 = startt + 2 * r;
2340                                a[idx1] = t[idx2];
2341                                a[idx1 + 1] = t[idx2 + 1];
2342                            }
2343                        }
2344                    }
2345                }
2346            });
2347        }
2348        ConcurrencyUtils.waitForCompletion(futures);
2349    }
2350
2351    private void cdft2d_subth(final int isgn, final float[][] a, final boolean scale) {
2352        int nthread = ConcurrencyUtils.getNumberOfThreads();
2353        int nt = 8 * rows;
2354        if (columns == 4 * nthread) {
2355            nt >>= 1;
2356        } else if (columns < 4 * nthread) {
2357            nthread = columns >> 1;
2358            nt >>= 2;
2359        }
2360        Future<?>[] futures = new Future[nthread];
2361        final int nthreads = nthread;
2362        for (int i = 0; i < nthreads; i++) {
2363            final int n0 = i;
2364            final int startt = nt * i;
2365            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2366                @Override
2367                                public void run() {
2368                    int idx2, idx3, idx4, idx5;
2369                    if (isgn == -1) {
2370                        if (columns > 4 * nthreads) {
2371                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2372                                for (int r = 0; r < rows; r++) {
2373                                    idx2 = startt + 2 * r;
2374                                    idx3 = startt + 2 * rows + 2 * r;
2375                                    idx4 = idx3 + 2 * rows;
2376                                    idx5 = idx4 + 2 * rows;
2377                                    t[idx2] = a[r][c];
2378                                    t[idx2 + 1] = a[r][c + 1];
2379                                    t[idx3] = a[r][c + 2];
2380                                    t[idx3 + 1] = a[r][c + 3];
2381                                    t[idx4] = a[r][c + 4];
2382                                    t[idx4 + 1] = a[r][c + 5];
2383                                    t[idx5] = a[r][c + 6];
2384                                    t[idx5 + 1] = a[r][c + 7];
2385                                }
2386                                fftRows.complexForward(t, startt);
2387                                fftRows.complexForward(t, startt + 2 * rows);
2388                                fftRows.complexForward(t, startt + 4 * rows);
2389                                fftRows.complexForward(t, startt + 6 * rows);
2390                                for (int r = 0; r < rows; r++) {
2391                                    idx2 = startt + 2 * r;
2392                                    idx3 = startt + 2 * rows + 2 * r;
2393                                    idx4 = idx3 + 2 * rows;
2394                                    idx5 = idx4 + 2 * rows;
2395                                    a[r][c] = t[idx2];
2396                                    a[r][c + 1] = t[idx2 + 1];
2397                                    a[r][c + 2] = t[idx3];
2398                                    a[r][c + 3] = t[idx3 + 1];
2399                                    a[r][c + 4] = t[idx4];
2400                                    a[r][c + 5] = t[idx4 + 1];
2401                                    a[r][c + 6] = t[idx5];
2402                                    a[r][c + 7] = t[idx5 + 1];
2403                                }
2404                            }
2405                        } else if (columns == 4 * nthreads) {
2406                            for (int r = 0; r < rows; r++) {
2407                                idx2 = startt + 2 * r;
2408                                idx3 = startt + 2 * rows + 2 * r;
2409                                t[idx2] = a[r][4 * n0];
2410                                t[idx2 + 1] = a[r][4 * n0 + 1];
2411                                t[idx3] = a[r][4 * n0 + 2];
2412                                t[idx3 + 1] = a[r][4 * n0 + 3];
2413                            }
2414                            fftRows.complexForward(t, startt);
2415                            fftRows.complexForward(t, startt + 2 * rows);
2416                            for (int r = 0; r < rows; r++) {
2417                                idx2 = startt + 2 * r;
2418                                idx3 = startt + 2 * rows + 2 * r;
2419                                a[r][4 * n0] = t[idx2];
2420                                a[r][4 * n0 + 1] = t[idx2 + 1];
2421                                a[r][4 * n0 + 2] = t[idx3];
2422                                a[r][4 * n0 + 3] = t[idx3 + 1];
2423                            }
2424                        } else if (columns == 2 * nthreads) {
2425                            for (int r = 0; r < rows; r++) {
2426                                idx2 = startt + 2 * r;
2427                                t[idx2] = a[r][2 * n0];
2428                                t[idx2 + 1] = a[r][2 * n0 + 1];
2429                            }
2430                            fftRows.complexForward(t, startt);
2431                            for (int r = 0; r < rows; r++) {
2432                                idx2 = startt + 2 * r;
2433                                a[r][2 * n0] = t[idx2];
2434                                a[r][2 * n0 + 1] = t[idx2 + 1];
2435                            }
2436                        }
2437                    } else {
2438                        if (columns > 4 * nthreads) {
2439                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2440                                for (int r = 0; r < rows; r++) {
2441                                    idx2 = startt + 2 * r;
2442                                    idx3 = startt + 2 * rows + 2 * r;
2443                                    idx4 = idx3 + 2 * rows;
2444                                    idx5 = idx4 + 2 * rows;
2445                                    t[idx2] = a[r][c];
2446                                    t[idx2 + 1] = a[r][c + 1];
2447                                    t[idx3] = a[r][c + 2];
2448                                    t[idx3 + 1] = a[r][c + 3];
2449                                    t[idx4] = a[r][c + 4];
2450                                    t[idx4 + 1] = a[r][c + 5];
2451                                    t[idx5] = a[r][c + 6];
2452                                    t[idx5 + 1] = a[r][c + 7];
2453                                }
2454                                fftRows.complexInverse(t, startt, scale);
2455                                fftRows.complexInverse(t, startt + 2 * rows, scale);
2456                                fftRows.complexInverse(t, startt + 4 * rows, scale);
2457                                fftRows.complexInverse(t, startt + 6 * rows, scale);
2458                                for (int r = 0; r < rows; r++) {
2459                                    idx2 = startt + 2 * r;
2460                                    idx3 = startt + 2 * rows + 2 * r;
2461                                    idx4 = idx3 + 2 * rows;
2462                                    idx5 = idx4 + 2 * rows;
2463                                    a[r][c] = t[idx2];
2464                                    a[r][c + 1] = t[idx2 + 1];
2465                                    a[r][c + 2] = t[idx3];
2466                                    a[r][c + 3] = t[idx3 + 1];
2467                                    a[r][c + 4] = t[idx4];
2468                                    a[r][c + 5] = t[idx4 + 1];
2469                                    a[r][c + 6] = t[idx5];
2470                                    a[r][c + 7] = t[idx5 + 1];
2471                                }
2472                            }
2473                        } else if (columns == 4 * nthreads) {
2474                            for (int r = 0; r < rows; r++) {
2475                                idx2 = startt + 2 * r;
2476                                idx3 = startt + 2 * rows + 2 * r;
2477                                t[idx2] = a[r][4 * n0];
2478                                t[idx2 + 1] = a[r][4 * n0 + 1];
2479                                t[idx3] = a[r][4 * n0 + 2];
2480                                t[idx3 + 1] = a[r][4 * n0 + 3];
2481                            }
2482                            fftRows.complexInverse(t, startt, scale);
2483                            fftRows.complexInverse(t, startt + 2 * rows, scale);
2484                            for (int r = 0; r < rows; r++) {
2485                                idx2 = startt + 2 * r;
2486                                idx3 = startt + 2 * rows + 2 * r;
2487                                a[r][4 * n0] = t[idx2];
2488                                a[r][4 * n0 + 1] = t[idx2 + 1];
2489                                a[r][4 * n0 + 2] = t[idx3];
2490                                a[r][4 * n0 + 3] = t[idx3 + 1];
2491                            }
2492                        } else if (columns == 2 * nthreads) {
2493                            for (int r = 0; r < rows; r++) {
2494                                idx2 = startt + 2 * r;
2495                                t[idx2] = a[r][2 * n0];
2496                                t[idx2 + 1] = a[r][2 * n0 + 1];
2497                            }
2498                            fftRows.complexInverse(t, startt, scale);
2499                            for (int r = 0; r < rows; r++) {
2500                                idx2 = startt + 2 * r;
2501                                a[r][2 * n0] = t[idx2];
2502                                a[r][2 * n0 + 1] = t[idx2 + 1];
2503                            }
2504                        }
2505                    }
2506                }
2507            });
2508        }
2509        ConcurrencyUtils.waitForCompletion(futures);
2510    }
2511
2512    private void fillSymmetric(final float[] a) {
2513        final int twon2 = 2 * columns;
2514        int idx1, idx2, idx3, idx4;
2515        int n1d2 = rows / 2;
2516
2517        for (int r = (rows - 1); r >= 1; r--) {
2518            idx1 = r * columns;
2519            idx2 = 2 * idx1;
2520            for (int c = 0; c < columns; c += 2) {
2521                a[idx2 + c] = a[idx1 + c];
2522                a[idx1 + c] = 0;
2523                a[idx2 + c + 1] = a[idx1 + c + 1];
2524                a[idx1 + c + 1] = 0;
2525            }
2526        }
2527        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2528        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads)) {
2529            Future<?>[] futures = new Future[nthreads];
2530            int l1k = n1d2 / nthreads;
2531            final int newn2 = 2 * columns;
2532            for (int i = 0; i < nthreads; i++) {
2533                final int l1offa, l1stopa, l2offa, l2stopa;
2534                if (i == 0)
2535                    l1offa = i * l1k + 1;
2536                else {
2537                    l1offa = i * l1k;
2538                }
2539                l1stopa = i * l1k + l1k;
2540                l2offa = i * l1k;
2541                if (i == nthreads - 1) {
2542                    l2stopa = i * l1k + l1k + 1;
2543                } else {
2544                    l2stopa = i * l1k + l1k;
2545                }
2546                futures[i] = ConcurrencyUtils.submit(new Runnable() {
2547                    @Override
2548                                        public void run() {
2549                        int idx1, idx2, idx3, idx4;
2550
2551                        for (int r = l1offa; r < l1stopa; r++) {
2552                            idx1 = r * newn2;
2553                            idx2 = (rows - r) * newn2;
2554                            idx3 = idx1 + columns;
2555                            a[idx3] = a[idx2 + 1];
2556                            a[idx3 + 1] = -a[idx2];
2557                        }
2558                        for (int r = l1offa; r < l1stopa; r++) {
2559                            idx1 = r * newn2;
2560                            idx3 = (rows - r + 1) * newn2;
2561                            for (int c = columns + 2; c < newn2; c += 2) {
2562                                idx2 = idx3 - c;
2563                                idx4 = idx1 + c;
2564                                a[idx4] = a[idx2];
2565                                a[idx4 + 1] = -a[idx2 + 1];
2566
2567                            }
2568                        }
2569                        for (int r = l2offa; r < l2stopa; r++) {
2570                            idx3 = ((rows - r) % rows) * newn2;
2571                            idx4 = r * newn2;
2572                            for (int c = 0; c < newn2; c += 2) {
2573                                idx1 = idx3 + (newn2 - c) % newn2;
2574                                idx2 = idx4 + c;
2575                                a[idx1] = a[idx2];
2576                                a[idx1 + 1] = -a[idx2 + 1];
2577                            }
2578                        }
2579                    }
2580                });
2581            }
2582            ConcurrencyUtils.waitForCompletion(futures);
2583
2584        } else {
2585
2586            for (int r = 1; r < n1d2; r++) {
2587                idx2 = r * twon2;
2588                idx3 = (rows - r) * twon2;
2589                a[idx2 + columns] = a[idx3 + 1];
2590                a[idx2 + columns + 1] = -a[idx3];
2591            }
2592
2593            for (int r = 1; r < n1d2; r++) {
2594                idx2 = r * twon2;
2595                idx3 = (rows - r + 1) * twon2;
2596                for (int c = columns + 2; c < twon2; c += 2) {
2597                    a[idx2 + c] = a[idx3 - c];
2598                    a[idx2 + c + 1] = -a[idx3 - c + 1];
2599
2600                }
2601            }
2602            for (int r = 0; r <= rows / 2; r++) {
2603                idx1 = r * twon2;
2604                idx4 = ((rows - r) % rows) * twon2;
2605                for (int c = 0; c < twon2; c += 2) {
2606                    idx2 = idx1 + c;
2607                    idx3 = idx4 + (twon2 - c) % twon2;
2608                    a[idx3] = a[idx2];
2609                    a[idx3 + 1] = -a[idx2 + 1];
2610                }
2611            }
2612        }
2613        a[columns] = -a[1];
2614        a[1] = 0;
2615        idx1 = n1d2 * twon2;
2616        a[idx1 + columns] = -a[idx1 + 1];
2617        a[idx1 + 1] = 0;
2618        a[idx1 + columns + 1] = 0;
2619    }
2620
2621    private void fillSymmetric(final float[][] a) {
2622        final int newn2 = 2 * columns;
2623        int n1d2 = rows / 2;
2624
2625        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2626        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads)) {
2627            Future<?>[] futures = new Future[nthreads];
2628            int l1k = n1d2 / nthreads;
2629            for (int i = 0; i < nthreads; i++) {
2630                final int l1offa, l1stopa, l2offa, l2stopa;
2631                if (i == 0)
2632                    l1offa = i * l1k + 1;
2633                else {
2634                    l1offa = i * l1k;
2635                }
2636                l1stopa = i * l1k + l1k;
2637                l2offa = i * l1k;
2638                if (i == nthreads - 1) {
2639                    l2stopa = i * l1k + l1k + 1;
2640                } else {
2641                    l2stopa = i * l1k + l1k;
2642                }
2643                futures[i] = ConcurrencyUtils.submit(new Runnable() {
2644                    @Override
2645                                        public void run() {
2646                        int idx1, idx2;
2647                        for (int r = l1offa; r < l1stopa; r++) {
2648                            idx1 = rows - r;
2649                            a[r][columns] = a[idx1][1];
2650                            a[r][columns + 1] = -a[idx1][0];
2651                        }
2652                        for (int r = l1offa; r < l1stopa; r++) {
2653                            idx1 = rows - r;
2654                            for (int c = columns + 2; c < newn2; c += 2) {
2655                                idx2 = newn2 - c;
2656                                a[r][c] = a[idx1][idx2];
2657                                a[r][c + 1] = -a[idx1][idx2 + 1];
2658
2659                            }
2660                        }
2661                        for (int r = l2offa; r < l2stopa; r++) {
2662                            idx1 = (rows - r) % rows;
2663                            for (int c = 0; c < newn2; c = c + 2) {
2664                                idx2 = (newn2 - c) % newn2;
2665                                a[idx1][idx2] = a[r][c];
2666                                a[idx1][idx2 + 1] = -a[r][c + 1];
2667                            }
2668                        }
2669                    }
2670                });
2671            }
2672            ConcurrencyUtils.waitForCompletion(futures);
2673        } else {
2674
2675            for (int r = 1; r < n1d2; r++) {
2676                int idx1 = rows - r;
2677                a[r][columns] = a[idx1][1];
2678                a[r][columns + 1] = -a[idx1][0];
2679            }
2680            for (int r = 1; r < n1d2; r++) {
2681                int idx1 = rows - r;
2682                for (int c = columns + 2; c < newn2; c += 2) {
2683                    int idx2 = newn2 - c;
2684                    a[r][c] = a[idx1][idx2];
2685                    a[r][c + 1] = -a[idx1][idx2 + 1];
2686                }
2687            }
2688            for (int r = 0; r <= rows / 2; r++) {
2689                int idx1 = (rows - r) % rows;
2690                for (int c = 0; c < newn2; c += 2) {
2691                    int idx2 = (newn2 - c) % newn2;
2692                    a[idx1][idx2] = a[r][c];
2693                    a[idx1][idx2 + 1] = -a[r][c + 1];
2694                }
2695            }
2696        }
2697        a[0][columns] = -a[0][1];
2698        a[0][1] = 0;
2699        a[n1d2][columns] = -a[n1d2][1];
2700        a[n1d2][1] = 0;
2701        a[n1d2][columns + 1] = 0;
2702    }
2703}