001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is JTransforms.
015 *
016 * The Initial Developer of the Original Code is
017 * Piotr Wendykier, Emory University.
018 * Portions created by the Initial Developer are Copyright (C) 2007-2009
019 * the Initial Developer. All Rights Reserved.
020 *
021 * Alternatively, the contents of this file may be used under the terms of
022 * either the GNU General Public License Version 2 or later (the "GPL"), or
023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
024 * in which case the provisions of the GPL or the LGPL are applicable instead
025 * of those above. If you wish to allow use of your version of this file only
026 * under the terms of either the GPL or the LGPL, and not to allow others to
027 * use your version of this file under the terms of the MPL, indicate your
028 * decision by deleting the provisions above and replace them with the notice
029 * and other provisions required by the GPL or the LGPL. If you do not delete
030 * the provisions above, a recipient may use your version of this file under
031 * the terms of any one of the MPL, the GPL or the LGPL.
032 *
033 * ***** END LICENSE BLOCK ***** */
034
035package edu.emory.mathcs.jtransforms.fft;
036
037import java.util.concurrent.Future;
038
039import edu.emory.mathcs.utils.ConcurrencyUtils;
040
041/**
042 * Computes 2D Discrete Fourier Transform (DFT) of complex and real, double
043 * precision data. The sizes of both dimensions can be arbitrary numbers. This
044 * is a parallel implementation of split-radix and mixed-radix algorithms
045 * optimized for SMP systems. <br>
046 * <br>
047 * Part of the code is derived from General Purpose FFT Package written by Takuya Ooura
048 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
049 * 
050 * @author Piotr Wendykier (piotr.wendykier@gmail.com)
051 * 
052 */
053public class DoubleFFT_2D {
054
055    private int rows;
056
057    private int columns;
058
059    private double[] t;
060
061    private DoubleFFT_1D fftColumns, fftRows;
062
063    private int oldNthreads;
064
065    private int nt;
066
067    private boolean isPowerOfTwo = false;
068
069    private boolean useThreads = false;
070
071    /**
072     * Creates new instance of DoubleFFT_2D.
073     * 
074     * @param rows
075     *            number of rows
076     * @param columns
077     *            number of columns
078     */
079    public DoubleFFT_2D(int rows, int columns) {
080        if (rows <= 1 || columns <= 1) {
081            throw new IllegalArgumentException("rows and columns must be greater than 1");
082        }
083        this.rows = rows;
084        this.columns = columns;
085        if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) {
086            this.useThreads = true;
087        }
088        if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) {
089            isPowerOfTwo = true;
090            oldNthreads = ConcurrencyUtils.getNumberOfThreads();
091            nt = 8 * oldNthreads * rows;
092            if (2 * columns == 4 * oldNthreads) {
093                nt >>= 1;
094            } else if (2 * columns < 4 * oldNthreads) {
095                nt >>= 2;
096            }
097            t = new double[nt];
098        }
099        fftRows = new DoubleFFT_1D(rows);
100        if (rows == columns) {
101            fftColumns = fftRows;
102        } else {
103            fftColumns = new DoubleFFT_1D(columns);
104        }
105    }
106
107    /**
108     * Computes 2D forward DFT of complex data leaving the result in
109     * <code>a</code>. The data is stored in 1D array in row-major order.
110     * Complex number is stored as two double values in sequence: the real and
111     * imaginary part, i.e. the input array must be of size rows*2*columns. The
112     * physical layout of the input data has to be as follows:<br>
113     * 
114     * <pre>
115     * a[k1*2*columns+2*k2] = Re[k1][k2], 
116     * a[k1*2*columns+2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
117     * </pre>
118     * 
119     * @param a
120     *            data to transform
121     */
122    public void complexForward(final double[] a) {
123        int nthreads = ConcurrencyUtils.getNumberOfThreads();
124        if (isPowerOfTwo) {
125            int oldn2 = columns;
126            columns = 2 * columns;
127            if (nthreads != oldNthreads) {
128                nt = 8 * nthreads * rows;
129                if (columns == 4 * nthreads) {
130                    nt >>= 1;
131                } else if (columns < 4 * nthreads) {
132                    nt >>= 2;
133                }
134                t = new double[nt];
135                oldNthreads = nthreads;
136            }
137            if ((nthreads > 1) && useThreads) {
138                xdft2d0_subth1(0, -1, a, true);
139                cdft2d_subth(-1, a, true);
140            } else {
141                for (int r = 0; r < rows; r++) {
142                    fftColumns.complexForward(a, r * columns);
143                }
144                cdft2d_sub(-1, a, true);
145            }
146            columns = oldn2;
147        } else {
148            final int rowStride = 2 * columns;
149            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
150                Future<?>[] futures = new Future[nthreads];
151                int p = rows / nthreads;
152                for (int l = 0; l < nthreads; l++) {
153                    final int firstRow = l * p;
154                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
155                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
156                        @Override
157                                                public void run() {
158                            for (int r = firstRow; r < lastRow; r++) {
159                                fftColumns.complexForward(a, r * rowStride);
160                            }
161                        }
162                    });
163                }
164                ConcurrencyUtils.waitForCompletion(futures);
165                p = columns / nthreads;
166                for (int l = 0; l < nthreads; l++) {
167                    final int firstColumn = l * p;
168                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
169                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
170                        @Override
171                                                public void run() {
172                            double[] temp = new double[2 * rows];
173                            for (int c = firstColumn; c < lastColumn; c++) {
174                                int idx0 = 2 * c;
175                                for (int r = 0; r < rows; r++) {
176                                    int idx1 = 2 * r;
177                                    int idx2 = r * rowStride + idx0;
178                                    temp[idx1] = a[idx2];
179                                    temp[idx1 + 1] = a[idx2 + 1];
180                                }
181                                fftRows.complexForward(temp);
182                                for (int r = 0; r < rows; r++) {
183                                    int idx1 = 2 * r;
184                                    int idx2 = r * rowStride + idx0;
185                                    a[idx2] = temp[idx1];
186                                    a[idx2 + 1] = temp[idx1 + 1];
187                                }
188                            }
189                        }
190                    });
191                }
192                ConcurrencyUtils.waitForCompletion(futures);
193            } else {
194                for (int r = 0; r < rows; r++) {
195                    fftColumns.complexForward(a, r * rowStride);
196                }
197                double[] temp = new double[2 * rows];
198                for (int c = 0; c < columns; c++) {
199                    int idx0 = 2 * c;
200                    for (int r = 0; r < rows; r++) {
201                        int idx1 = 2 * r;
202                        int idx2 = r * rowStride + idx0;
203                        temp[idx1] = a[idx2];
204                        temp[idx1 + 1] = a[idx2 + 1];
205                    }
206                    fftRows.complexForward(temp);
207                    for (int r = 0; r < rows; r++) {
208                        int idx1 = 2 * r;
209                        int idx2 = r * rowStride + idx0;
210                        a[idx2] = temp[idx1];
211                        a[idx2 + 1] = temp[idx1 + 1];
212                    }
213                }
214            }
215        }
216    }
217
218    /**
219     * Computes 2D forward DFT of complex data leaving the result in
220     * <code>a</code>. The data is stored in 2D array. Complex data is
221     * represented by 2 double values in sequence: the real and imaginary part,
222     * i.e. the input array must be of size rows by 2*columns. The physical
223     * layout of the input data has to be as follows:<br>
224     * 
225     * <pre>
226     * a[k1][2*k2] = Re[k1][k2], 
227     * a[k1][2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
228     * </pre>
229     * 
230     * @param a
231     *            data to transform
232     */
233    public void complexForward(final double[][] a) {
234        int nthreads = ConcurrencyUtils.getNumberOfThreads();
235        if (isPowerOfTwo) {
236            int oldn2 = columns;
237            columns = 2 * columns;
238            if (nthreads != oldNthreads) {
239                nt = 8 * nthreads * rows;
240                if (columns == 4 * nthreads) {
241                    nt >>= 1;
242                } else if (columns < 4 * nthreads) {
243                    nt >>= 2;
244                }
245                t = new double[nt];
246                oldNthreads = nthreads;
247            }
248            if ((nthreads > 1) && useThreads) {
249                xdft2d0_subth1(0, -1, a, true);
250                cdft2d_subth(-1, a, true);
251            } else {
252                for (int r = 0; r < rows; r++) {
253                    fftColumns.complexForward(a[r]);
254                }
255                cdft2d_sub(-1, a, true);
256            }
257            columns = oldn2;
258        } else {
259            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
260                Future<?>[] futures = new Future[nthreads];
261                int p = rows / nthreads;
262                for (int l = 0; l < nthreads; l++) {
263                    final int firstRow = l * p;
264                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
265                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
266                        @Override
267                                                public void run() {
268                            for (int r = firstRow; r < lastRow; r++) {
269                                fftColumns.complexForward(a[r]);
270                            }
271                        }
272                    });
273                }
274                ConcurrencyUtils.waitForCompletion(futures);
275                p = columns / nthreads;
276                for (int l = 0; l < nthreads; l++) {
277                    final int firstColumn = l * p;
278                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
279                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
280                        @Override
281                                                public void run() {
282                            double[] temp = new double[2 * rows];
283                            for (int c = firstColumn; c < lastColumn; c++) {
284                                int idx1 = 2 * c;
285                                for (int r = 0; r < rows; r++) {
286                                    int idx2 = 2 * r;
287                                    temp[idx2] = a[r][idx1];
288                                    temp[idx2 + 1] = a[r][idx1 + 1];
289                                }
290                                fftRows.complexForward(temp);
291                                for (int r = 0; r < rows; r++) {
292                                    int idx2 = 2 * r;
293                                    a[r][idx1] = temp[idx2];
294                                    a[r][idx1 + 1] = temp[idx2 + 1];
295                                }
296                            }
297                        }
298                    });
299                }
300                ConcurrencyUtils.waitForCompletion(futures);
301            } else {
302                for (int r = 0; r < rows; r++) {
303                    fftColumns.complexForward(a[r]);
304                }
305                double[] temp = new double[2 * rows];
306                for (int c = 0; c < columns; c++) {
307                    int idx1 = 2 * c;
308                    for (int r = 0; r < rows; r++) {
309                        int idx2 = 2 * r;
310                        temp[idx2] = a[r][idx1];
311                        temp[idx2 + 1] = a[r][idx1 + 1];
312                    }
313                    fftRows.complexForward(temp);
314                    for (int r = 0; r < rows; r++) {
315                        int idx2 = 2 * r;
316                        a[r][idx1] = temp[idx2];
317                        a[r][idx1 + 1] = temp[idx2 + 1];
318                    }
319                }
320            }
321        }
322    }
323
324    /**
325     * Computes 2D inverse DFT of complex data leaving the result in
326     * <code>a</code>. The data is stored in 1D array in row-major order.
327     * Complex number is stored as two double values in sequence: the real and
328     * imaginary part, i.e. the input array must be of size rows*2*columns. The
329     * physical layout of the input data has to be as follows:<br>
330     * 
331     * <pre>
332     * a[k1*2*columns+2*k2] = Re[k1][k2], 
333     * a[k1*2*columns+2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
334     * </pre>
335     * 
336     * @param a
337     *            data to transform
338     * @param scale
339     *            if true then scaling is performed
340     * 
341     */
342    public void complexInverse(final double[] a, final boolean scale) {
343        int nthreads = ConcurrencyUtils.getNumberOfThreads();
344        if (isPowerOfTwo) {
345            int oldn2 = columns;
346            columns = 2 * columns;
347            if (nthreads != oldNthreads) {
348                nt = 8 * nthreads * rows;
349                if (columns == 4 * nthreads) {
350                    nt >>= 1;
351                } else if (columns < 4 * nthreads) {
352                    nt >>= 2;
353                }
354                t = new double[nt];
355                oldNthreads = nthreads;
356            }
357            if ((nthreads > 1) && useThreads) {
358                xdft2d0_subth1(0, 1, a, scale);
359                cdft2d_subth(1, a, scale);
360            } else {
361
362                for (int r = 0; r < rows; r++) {
363                    fftColumns.complexInverse(a, r * columns, scale);
364                }
365                cdft2d_sub(1, a, scale);
366            }
367            columns = oldn2;
368        } else {
369            final int rowspan = 2 * columns;
370            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
371                Future<?>[] futures = new Future[nthreads];
372                int p = rows / nthreads;
373                for (int l = 0; l < nthreads; l++) {
374                    final int firstRow = l * p;
375                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
376                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
377                        @Override
378                                                public void run() {
379                            for (int r = firstRow; r < lastRow; r++) {
380                                fftColumns.complexInverse(a, r * rowspan, scale);
381                            }
382                        }
383                    });
384                }
385                ConcurrencyUtils.waitForCompletion(futures);
386                p = columns / nthreads;
387                for (int l = 0; l < nthreads; l++) {
388                    final int firstColumn = l * p;
389                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
390                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
391                        @Override
392                                                public void run() {
393                            double[] temp = new double[2 * rows];
394                            for (int c = firstColumn; c < lastColumn; c++) {
395                                int idx1 = 2 * c;
396                                for (int r = 0; r < rows; r++) {
397                                    int idx2 = 2 * r;
398                                    int idx3 = r * rowspan + idx1;
399                                    temp[idx2] = a[idx3];
400                                    temp[idx2 + 1] = a[idx3 + 1];
401                                }
402                                fftRows.complexInverse(temp, scale);
403                                for (int r = 0; r < rows; r++) {
404                                    int idx2 = 2 * r;
405                                    int idx3 = r * rowspan + idx1;
406                                    a[idx3] = temp[idx2];
407                                    a[idx3 + 1] = temp[idx2 + 1];
408                                }
409                            }
410                        }
411                    });
412                }
413                ConcurrencyUtils.waitForCompletion(futures);
414            } else {
415                for (int r = 0; r < rows; r++) {
416                    fftColumns.complexInverse(a, r * rowspan, scale);
417                }
418                double[] temp = new double[2 * rows];
419                for (int c = 0; c < columns; c++) {
420                    int idx1 = 2 * c;
421                    for (int r = 0; r < rows; r++) {
422                        int idx2 = 2 * r;
423                        int idx3 = r * rowspan + idx1;
424                        temp[idx2] = a[idx3];
425                        temp[idx2 + 1] = a[idx3 + 1];
426                    }
427                    fftRows.complexInverse(temp, scale);
428                    for (int r = 0; r < rows; r++) {
429                        int idx2 = 2 * r;
430                        int idx3 = r * rowspan + idx1;
431                        a[idx3] = temp[idx2];
432                        a[idx3 + 1] = temp[idx2 + 1];
433                    }
434                }
435            }
436        }
437    }
438
439    /**
440     * Computes 2D inverse DFT of complex data leaving the result in
441     * <code>a</code>. The data is stored in 2D array. Complex data is
442     * represented by 2 double values in sequence: the real and imaginary part,
443     * i.e. the input array must be of size rows by 2*columns. The physical
444     * layout of the input data has to be as follows:<br>
445     * 
446     * <pre>
447     * a[k1][2*k2] = Re[k1][k2], 
448     * a[k1][2*k2+1] = Im[k1][k2], 0&lt;=k1&lt;rows, 0&lt;=k2&lt;columns,
449     * </pre>
450     * 
451     * @param a
452     *            data to transform
453     * @param scale
454     *            if true then scaling is performed
455     * 
456     */
457    public void complexInverse(final double[][] a, final boolean scale) {
458        int nthreads = ConcurrencyUtils.getNumberOfThreads();
459        if (isPowerOfTwo) {
460            int oldn2 = columns;
461            columns = 2 * columns;
462
463            if (nthreads != oldNthreads) {
464                nt = 8 * nthreads * rows;
465                if (columns == 4 * nthreads) {
466                    nt >>= 1;
467                } else if (columns < 4 * nthreads) {
468                    nt >>= 2;
469                }
470                t = new double[nt];
471                oldNthreads = nthreads;
472            }
473            if ((nthreads > 1) && useThreads) {
474                xdft2d0_subth1(0, 1, a, scale);
475                cdft2d_subth(1, a, scale);
476            } else {
477
478                for (int r = 0; r < rows; r++) {
479                    fftColumns.complexInverse(a[r], scale);
480                }
481                cdft2d_sub(1, a, scale);
482            }
483            columns = oldn2;
484        } else {
485            if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) {
486                Future<?>[] futures = new Future[nthreads];
487                int p = rows / nthreads;
488                for (int l = 0; l < nthreads; l++) {
489                    final int firstRow = l * p;
490                    final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
491                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
492                        @Override
493                                                public void run() {
494                            for (int r = firstRow; r < lastRow; r++) {
495                                fftColumns.complexInverse(a[r], scale);
496                            }
497                        }
498                    });
499                }
500                ConcurrencyUtils.waitForCompletion(futures);
501                p = columns / nthreads;
502                for (int l = 0; l < nthreads; l++) {
503                    final int firstColumn = l * p;
504                    final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p;
505                    futures[l] = ConcurrencyUtils.submit(new Runnable() {
506                        @Override
507                                                public void run() {
508                            double[] temp = new double[2 * rows];
509                            for (int c = firstColumn; c < lastColumn; c++) {
510                                int idx1 = 2 * c;
511                                for (int r = 0; r < rows; r++) {
512                                    int idx2 = 2 * r;
513                                    temp[idx2] = a[r][idx1];
514                                    temp[idx2 + 1] = a[r][idx1 + 1];
515                                }
516                                fftRows.complexInverse(temp, scale);
517                                for (int r = 0; r < rows; r++) {
518                                    int idx2 = 2 * r;
519                                    a[r][idx1] = temp[idx2];
520                                    a[r][idx1 + 1] = temp[idx2 + 1];
521                                }
522                            }
523                        }
524                    });
525                }
526                ConcurrencyUtils.waitForCompletion(futures);
527            } else {
528                for (int r = 0; r < rows; r++) {
529                    fftColumns.complexInverse(a[r], scale);
530                }
531                double[] temp = new double[2 * rows];
532                for (int c = 0; c < columns; c++) {
533                    int idx1 = 2 * c;
534                    for (int r = 0; r < rows; r++) {
535                        int idx2 = 2 * r;
536                        temp[idx2] = a[r][idx1];
537                        temp[idx2 + 1] = a[r][idx1 + 1];
538                    }
539                    fftRows.complexInverse(temp, scale);
540                    for (int r = 0; r < rows; r++) {
541                        int idx2 = 2 * r;
542                        a[r][idx1] = temp[idx2];
543                        a[r][idx1 + 1] = temp[idx2 + 1];
544                    }
545                }
546            }
547        }
548    }
549
550    /**
551     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
552     * . This method only works when the sizes of both dimensions are
553     * power-of-two numbers. The physical layout of the output data is as
554     * follows:
555     * 
556     * <pre>
557     * a[k1*columns+2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
558     * a[k1*columns+2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
559     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
560     * a[2*k2] = Re[0][k2] = Re[0][columns-k2], 
561     * a[2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
562     *       0&lt;k2&lt;columns/2, 
563     * a[k1*columns] = Re[k1][0] = Re[rows-k1][0], 
564     * a[k1*columns+1] = Im[k1][0] = -Im[rows-k1][0], 
565     * a[(rows-k1)*columns+1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
566     * a[(rows-k1)*columns] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
567     *       0&lt;k1&lt;rows/2, 
568     * a[0] = Re[0][0], 
569     * a[1] = Re[0][columns/2], 
570     * a[(rows/2)*columns] = Re[rows/2][0], 
571     * a[(rows/2)*columns+1] = Re[rows/2][columns/2]
572     * </pre>
573     * 
574     * This method computes only half of the elements of the real transform. The
575     * other half satisfies the symmetry condition. If you want the full real
576     * forward transform, use <code>realForwardFull</code>. To get back the
577     * original data, use <code>realInverse</code> on the output of this method.
578     * 
579     * @param a
580     *            data to transform
581     */
582    public void realForward(double[] a) {
583        if (isPowerOfTwo == false) {
584            throw new IllegalArgumentException("rows and columns must be power of two numbers");
585        } else {
586            int nthreads;
587
588            nthreads = ConcurrencyUtils.getNumberOfThreads();
589            if (nthreads != oldNthreads) {
590                nt = 8 * nthreads * rows;
591                if (columns == 4 * nthreads) {
592                    nt >>= 1;
593                } else if (columns < 4 * nthreads) {
594                    nt >>= 2;
595                }
596                t = new double[nt];
597                oldNthreads = nthreads;
598            }
599            if ((nthreads > 1) && useThreads) {
600                xdft2d0_subth1(1, 1, a, true);
601                cdft2d_subth(-1, a, true);
602                rdft2d_sub(1, a);
603            } else {
604                for (int r = 0; r < rows; r++) {
605                    fftColumns.realForward(a, r * columns);
606                }
607                cdft2d_sub(-1, a, true);
608                rdft2d_sub(1, a);
609            }
610        }
611    }
612
613    /**
614     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
615     * . This method only works when the sizes of both dimensions are
616     * power-of-two numbers. The physical layout of the output data is as
617     * follows:
618     * 
619     * <pre>
620     * a[k1][2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
621     * a[k1][2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
622     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
623     * a[0][2*k2] = Re[0][k2] = Re[0][columns-k2], 
624     * a[0][2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
625     *       0&lt;k2&lt;columns/2, 
626     * a[k1][0] = Re[k1][0] = Re[rows-k1][0], 
627     * a[k1][1] = Im[k1][0] = -Im[rows-k1][0], 
628     * a[rows-k1][1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
629     * a[rows-k1][0] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
630     *       0&lt;k1&lt;rows/2, 
631     * a[0][0] = Re[0][0], 
632     * a[0][1] = Re[0][columns/2], 
633     * a[rows/2][0] = Re[rows/2][0], 
634     * a[rows/2][1] = Re[rows/2][columns/2]
635     * </pre>
636     * 
637     * This method computes only half of the elements of the real transform. The
638     * other half satisfies the symmetry condition. If you want the full real
639     * forward transform, use <code>realForwardFull</code>. To get back the
640     * original data, use <code>realInverse</code> on the output of this method.
641     * 
642     * @param a
643     *            data to transform
644     */
645    public void realForward(double[][] a) {
646        if (isPowerOfTwo == false) {
647            throw new IllegalArgumentException("rows and columns must be power of two numbers");
648        } else {
649            int nthreads;
650
651            nthreads = ConcurrencyUtils.getNumberOfThreads();
652            if (nthreads != oldNthreads) {
653                nt = 8 * nthreads * rows;
654                if (columns == 4 * nthreads) {
655                    nt >>= 1;
656                } else if (columns < 4 * nthreads) {
657                    nt >>= 2;
658                }
659                t = new double[nt];
660                oldNthreads = nthreads;
661            }
662            if ((nthreads > 1) && useThreads) {
663                xdft2d0_subth1(1, 1, a, true);
664                cdft2d_subth(-1, a, true);
665                rdft2d_sub(1, a);
666            } else {
667                for (int r = 0; r < rows; r++) {
668                    fftColumns.realForward(a[r]);
669                }
670                cdft2d_sub(-1, a, true);
671                rdft2d_sub(1, a);
672            }
673        }
674    }
675
676    /**
677     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
678     * . This method computes full real forward transform, i.e. you will get the
679     * same result as from <code>complexForward</code> called with all imaginary
680     * part equal 0. Because the result is stored in <code>a</code>, the input
681     * array must be of size rows*2*columns, with only the first rows*columns
682     * elements filled with real data. To get back the original data, use
683     * <code>complexInverse</code> on the output of this method.
684     * 
685     * @param a
686     *            data to transform
687     */
688    public void realForwardFull(double[] a) {
689        if (isPowerOfTwo) {
690            int nthreads;
691
692            nthreads = ConcurrencyUtils.getNumberOfThreads();
693            if (nthreads != oldNthreads) {
694                nt = 8 * nthreads * rows;
695                if (columns == 4 * nthreads) {
696                    nt >>= 1;
697                } else if (columns < 4 * nthreads) {
698                    nt >>= 2;
699                }
700                t = new double[nt];
701                oldNthreads = nthreads;
702            }
703            if ((nthreads > 1) && useThreads) {
704                xdft2d0_subth1(1, 1, a, true);
705                cdft2d_subth(-1, a, true);
706                rdft2d_sub(1, a);
707            } else {
708                for (int r = 0; r < rows; r++) {
709                    fftColumns.realForward(a, r * columns);
710                }
711                cdft2d_sub(-1, a, true);
712                rdft2d_sub(1, a);
713            }
714            fillSymmetric(a);
715        } else {
716            mixedRadixRealForwardFull(a);
717        }
718    }
719
720    /**
721     * Computes 2D forward DFT of real data leaving the result in <code>a</code>
722     * . This method computes full real forward transform, i.e. you will get the
723     * same result as from <code>complexForward</code> called with all imaginary
724     * part equal 0. Because the result is stored in <code>a</code>, the input
725     * array must be of size rows by 2*columns, with only the first rows by
726     * columns elements filled with real data. To get back the original data,
727     * use <code>complexInverse</code> on the output of this method.
728     * 
729     * @param a
730     *            data to transform
731     */
732    public void realForwardFull(double[][] a) {
733        if (isPowerOfTwo) {
734            int nthreads;
735
736            nthreads = ConcurrencyUtils.getNumberOfThreads();
737            if (nthreads != oldNthreads) {
738                nt = 8 * nthreads * rows;
739                if (columns == 4 * nthreads) {
740                    nt >>= 1;
741                } else if (columns < 4 * nthreads) {
742                    nt >>= 2;
743                }
744                t = new double[nt];
745                oldNthreads = nthreads;
746            }
747            if ((nthreads > 1) && useThreads) {
748                xdft2d0_subth1(1, 1, a, true);
749                cdft2d_subth(-1, a, true);
750                rdft2d_sub(1, a);
751            } else {
752                for (int r = 0; r < rows; r++) {
753                    fftColumns.realForward(a[r]);
754                }
755                cdft2d_sub(-1, a, true);
756                rdft2d_sub(1, a);
757            }
758            fillSymmetric(a);
759        } else {
760            mixedRadixRealForwardFull(a);
761        }
762    }
763
764    /**
765     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
766     * . This method only works when the sizes of both dimensions are
767     * power-of-two numbers. The physical layout of the input data has to be as
768     * follows:
769     * 
770     * <pre>
771     * a[k1*columns+2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
772     * a[k1*columns+2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
773     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
774     * a[2*k2] = Re[0][k2] = Re[0][columns-k2], 
775     * a[2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
776     *       0&lt;k2&lt;columns/2, 
777     * a[k1*columns] = Re[k1][0] = Re[rows-k1][0], 
778     * a[k1*columns+1] = Im[k1][0] = -Im[rows-k1][0], 
779     * a[(rows-k1)*columns+1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
780     * a[(rows-k1)*columns] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
781     *       0&lt;k1&lt;rows/2, 
782     * a[0] = Re[0][0], 
783     * a[1] = Re[0][columns/2], 
784     * a[(rows/2)*columns] = Re[rows/2][0], 
785     * a[(rows/2)*columns+1] = Re[rows/2][columns/2]
786     * </pre>
787     * 
788     * This method computes only half of the elements of the real transform. The
789     * other half satisfies the symmetry condition. If you want the full real
790     * inverse transform, use <code>realInverseFull</code>.
791     * 
792     * @param a
793     *            data to transform
794     * 
795     * @param scale
796     *            if true then scaling is performed
797     */
798    public void realInverse(double[] a, boolean scale) {
799        if (isPowerOfTwo == false) {
800            throw new IllegalArgumentException("rows and columns must be power of two numbers");
801        } else {
802            int nthreads;
803            nthreads = ConcurrencyUtils.getNumberOfThreads();
804            if (nthreads != oldNthreads) {
805                nt = 8 * nthreads * rows;
806                if (columns == 4 * nthreads) {
807                    nt >>= 1;
808                } else if (columns < 4 * nthreads) {
809                    nt >>= 2;
810                }
811                t = new double[nt];
812                oldNthreads = nthreads;
813            }
814            if ((nthreads > 1) && useThreads) {
815                rdft2d_sub(-1, a);
816                cdft2d_subth(1, a, scale);
817                xdft2d0_subth1(1, -1, a, scale);
818            } else {
819                rdft2d_sub(-1, a);
820                cdft2d_sub(1, a, scale);
821                for (int r = 0; r < rows; r++) {
822                    fftColumns.realInverse(a, r * columns, scale);
823                }
824            }
825        }
826    }
827
828    /**
829     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
830     * . This method only works when the sizes of both dimensions are
831     * power-of-two numbers. The physical layout of the input data has to be as
832     * follows:
833     * 
834     * <pre>
835     * a[k1][2*k2] = Re[k1][k2] = Re[rows-k1][columns-k2], 
836     * a[k1][2*k2+1] = Im[k1][k2] = -Im[rows-k1][columns-k2], 
837     *       0&lt;k1&lt;rows, 0&lt;k2&lt;columns/2, 
838     * a[0][2*k2] = Re[0][k2] = Re[0][columns-k2], 
839     * a[0][2*k2+1] = Im[0][k2] = -Im[0][columns-k2], 
840     *       0&lt;k2&lt;columns/2, 
841     * a[k1][0] = Re[k1][0] = Re[rows-k1][0], 
842     * a[k1][1] = Im[k1][0] = -Im[rows-k1][0], 
843     * a[rows-k1][1] = Re[k1][columns/2] = Re[rows-k1][columns/2], 
844     * a[rows-k1][0] = -Im[k1][columns/2] = Im[rows-k1][columns/2], 
845     *       0&lt;k1&lt;rows/2, 
846     * a[0][0] = Re[0][0], 
847     * a[0][1] = Re[0][columns/2], 
848     * a[rows/2][0] = Re[rows/2][0], 
849     * a[rows/2][1] = Re[rows/2][columns/2]
850     * </pre>
851     * 
852     * This method computes only half of the elements of the real transform. The
853     * other half satisfies the symmetry condition. If you want the full real
854     * inverse transform, use <code>realInverseFull</code>.
855     * 
856     * @param a
857     *            data to transform
858     * 
859     * @param scale
860     *            if true then scaling is performed
861     */
862    public void realInverse(double[][] a, boolean scale) {
863        if (isPowerOfTwo == false) {
864            throw new IllegalArgumentException("rows and columns must be power of two numbers");
865        } else {
866            int nthreads;
867
868            nthreads = ConcurrencyUtils.getNumberOfThreads();
869            if (nthreads != oldNthreads) {
870                nt = 8 * nthreads * rows;
871                if (columns == 4 * nthreads) {
872                    nt >>= 1;
873                } else if (columns < 4 * nthreads) {
874                    nt >>= 2;
875                }
876                t = new double[nt];
877                oldNthreads = nthreads;
878            }
879            if ((nthreads > 1) && useThreads) {
880                rdft2d_sub(-1, a);
881                cdft2d_subth(1, a, scale);
882                xdft2d0_subth1(1, -1, a, scale);
883            } else {
884                rdft2d_sub(-1, a);
885                cdft2d_sub(1, a, scale);
886                for (int r = 0; r < rows; r++) {
887                    fftColumns.realInverse(a[r], scale);
888                }
889            }
890        }
891    }
892
893    /**
894     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
895     * . This method computes full real inverse transform, i.e. you will get the
896     * same result as from <code>complexInverse</code> called with all imaginary
897     * part equal 0. Because the result is stored in <code>a</code>, the input
898     * array must be of size rows*2*columns, with only the first rows*columns
899     * elements filled with real data.
900     * 
901     * @param a
902     *            data to transform
903     * 
904     * @param scale
905     *            if true then scaling is performed
906     */
907    public void realInverseFull(double[] a, boolean scale) {
908        if (isPowerOfTwo) {
909            int nthreads;
910
911            nthreads = ConcurrencyUtils.getNumberOfThreads();
912            if (nthreads != oldNthreads) {
913                nt = 8 * nthreads * rows;
914                if (columns == 4 * nthreads) {
915                    nt >>= 1;
916                } else if (columns < 4 * nthreads) {
917                    nt >>= 2;
918                }
919                t = new double[nt];
920                oldNthreads = nthreads;
921            }
922            if ((nthreads > 1) && useThreads) {
923                xdft2d0_subth2(1, -1, a, scale);
924                cdft2d_subth(1, a, scale);
925                rdft2d_sub(1, a);
926            } else {
927                for (int r = 0; r < rows; r++) {
928                    fftColumns.realInverse2(a, r * columns, scale);
929                }
930                cdft2d_sub(1, a, scale);
931                rdft2d_sub(1, a);
932            }
933            fillSymmetric(a);
934        } else {
935            mixedRadixRealInverseFull(a, scale);
936        }
937    }
938
939    /**
940     * Computes 2D inverse DFT of real data leaving the result in <code>a</code>
941     * . This method computes full real inverse transform, i.e. you will get the
942     * same result as from <code>complexInverse</code> called with all imaginary
943     * part equal 0. Because the result is stored in <code>a</code>, the input
944     * array must be of size rows by 2*columns, with only the first rows by
945     * columns elements filled with real data.
946     * 
947     * @param a
948     *            data to transform
949     * 
950     * @param scale
951     *            if true then scaling is performed
952     */
953    public void realInverseFull(double[][] a, boolean scale) {
954        if (isPowerOfTwo) {
955            int nthreads;
956
957            nthreads = ConcurrencyUtils.getNumberOfThreads();
958            if (nthreads != oldNthreads) {
959                nt = 8 * nthreads * rows;
960                if (columns == 4 * nthreads) {
961                    nt >>= 1;
962                } else if (columns < 4 * nthreads) {
963                    nt >>= 2;
964                }
965                t = new double[nt];
966                oldNthreads = nthreads;
967            }
968            if ((nthreads > 1) && useThreads) {
969                xdft2d0_subth2(1, -1, a, scale);
970                cdft2d_subth(1, a, scale);
971                rdft2d_sub(1, a);
972            } else {
973                for (int r = 0; r < rows; r++) {
974                    fftColumns.realInverse2(a[r], 0, scale);
975                }
976                cdft2d_sub(1, a, scale);
977                rdft2d_sub(1, a);
978            }
979            fillSymmetric(a);
980        } else {
981            mixedRadixRealInverseFull(a, scale);
982        }
983    }
984
985    private void mixedRadixRealForwardFull(final double[][] a) {
986        final int n2d2 = columns / 2 + 1;
987        final double[][] temp = new double[n2d2][2 * rows];
988
989        int nthreads = ConcurrencyUtils.getNumberOfThreads();
990        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
991            Future<?>[] futures = new Future[nthreads];
992            int p = rows / nthreads;
993            for (int l = 0; l < nthreads; l++) {
994                final int firstRow = l * p;
995                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
996                futures[l] = ConcurrencyUtils.submit(new Runnable() {
997                    @Override
998                                        public void run() {
999                        for (int i = firstRow; i < lastRow; i++) {
1000                            fftColumns.realForward(a[i]);
1001                        }
1002                    }
1003                });
1004            }
1005            ConcurrencyUtils.waitForCompletion(futures);
1006
1007            for (int r = 0; r < rows; r++) {
1008                temp[0][r] = a[r][0]; //first column is always real
1009            }
1010            fftRows.realForwardFull(temp[0]);
1011
1012            p = (n2d2 - 2) / nthreads;
1013            for (int l = 0; l < nthreads; l++) {
1014                final int firstColumn = 1 + l * p;
1015                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1016                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1017                    @Override
1018                                        public void run() {
1019                        for (int c = firstColumn; c < lastColumn; c++) {
1020                            int idx2 = 2 * c;
1021                            for (int r = 0; r < rows; r++) {
1022                                int idx1 = 2 * r;
1023                                temp[c][idx1] = a[r][idx2];
1024                                temp[c][idx1 + 1] = a[r][idx2 + 1];
1025                            }
1026                            fftRows.complexForward(temp[c]);
1027                        }
1028                    }
1029                });
1030            }
1031            ConcurrencyUtils.waitForCompletion(futures);
1032
1033            if ((columns % 2) == 0) {
1034                for (int r = 0; r < rows; r++) {
1035                    temp[n2d2 - 1][r] = a[r][1];
1036                    //imaginary part = 0;
1037                }
1038                fftRows.realForwardFull(temp[n2d2 - 1]);
1039
1040            } else {
1041                for (int r = 0; r < rows; r++) {
1042                    int idx1 = 2 * r;
1043                    int idx2 = n2d2 - 1;
1044                    temp[idx2][idx1] = a[r][2 * idx2];
1045                    temp[idx2][idx1 + 1] = a[r][1];
1046                }
1047                fftRows.complexForward(temp[n2d2 - 1]);
1048
1049            }
1050
1051            p = rows / nthreads;
1052            for (int l = 0; l < nthreads; l++) {
1053                final int firstRow = l * p;
1054                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1055                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1056                    @Override
1057                                        public void run() {
1058                        for (int r = firstRow; r < lastRow; r++) {
1059                            int idx1 = 2 * r;
1060                            for (int c = 0; c < n2d2; c++) {
1061                                int idx2 = 2 * c;
1062                                a[r][idx2] = temp[c][idx1];
1063                                a[r][idx2 + 1] = temp[c][idx1 + 1];
1064                            }
1065                        }
1066                    }
1067                });
1068            }
1069            ConcurrencyUtils.waitForCompletion(futures);
1070
1071            for (int l = 0; l < nthreads; l++) {
1072                final int firstRow = 1 + l * p;
1073                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1074                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1075                    @Override
1076                                        public void run() {
1077                        for (int r = firstRow; r < lastRow; r++) {
1078                            int idx3 = rows - r;
1079                            for (int c = n2d2; c < columns; c++) {
1080                                int idx1 = 2 * c;
1081                                int idx2 = 2 * (columns - c);
1082                                a[0][idx1] = a[0][idx2];
1083                                a[0][idx1 + 1] = -a[0][idx2 + 1];
1084                                a[r][idx1] = a[idx3][idx2];
1085                                a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1086                            }
1087                        }
1088                    }
1089                });
1090            }
1091            ConcurrencyUtils.waitForCompletion(futures);
1092
1093        } else {
1094            for (int r = 0; r < rows; r++) {
1095                fftColumns.realForward(a[r]);
1096            }
1097
1098            for (int r = 0; r < rows; r++) {
1099                temp[0][r] = a[r][0]; //first column is always real
1100            }
1101            fftRows.realForwardFull(temp[0]);
1102
1103            for (int c = 1; c < n2d2 - 1; c++) {
1104                int idx2 = 2 * c;
1105                for (int r = 0; r < rows; r++) {
1106                    int idx1 = 2 * r;
1107                    temp[c][idx1] = a[r][idx2];
1108                    temp[c][idx1 + 1] = a[r][idx2 + 1];
1109                }
1110                fftRows.complexForward(temp[c]);
1111            }
1112
1113            if ((columns % 2) == 0) {
1114                for (int r = 0; r < rows; r++) {
1115                    temp[n2d2 - 1][r] = a[r][1];
1116                    //imaginary part = 0;
1117                }
1118                fftRows.realForwardFull(temp[n2d2 - 1]);
1119
1120            } else {
1121                for (int r = 0; r < rows; r++) {
1122                    int idx1 = 2 * r;
1123                    int idx2 = n2d2 - 1;
1124                    temp[idx2][idx1] = a[r][2 * idx2];
1125                    temp[idx2][idx1 + 1] = a[r][1];
1126                }
1127                fftRows.complexForward(temp[n2d2 - 1]);
1128
1129            }
1130
1131            for (int r = 0; r < rows; r++) {
1132                int idx1 = 2 * r;
1133                for (int c = 0; c < n2d2; c++) {
1134                    int idx2 = 2 * c;
1135                    a[r][idx2] = temp[c][idx1];
1136                    a[r][idx2 + 1] = temp[c][idx1 + 1];
1137                }
1138            }
1139
1140            //fill symmetric
1141            for (int r = 1; r < rows; r++) {
1142                int idx3 = rows - r;
1143                for (int c = n2d2; c < columns; c++) {
1144                    int idx1 = 2 * c;
1145                    int idx2 = 2 * (columns - c);
1146                    a[0][idx1] = a[0][idx2];
1147                    a[0][idx1 + 1] = -a[0][idx2 + 1];
1148                    a[r][idx1] = a[idx3][idx2];
1149                    a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1150                }
1151            }
1152        }
1153    }
1154
1155    private void mixedRadixRealForwardFull(final double[] a) {
1156        final int rowStride = 2 * columns;
1157        final int n2d2 = columns / 2 + 1;
1158        final double[][] temp = new double[n2d2][2 * rows];
1159
1160        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1161        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1162            Future<?>[] futures = new Future[nthreads];
1163            int p = rows / nthreads;
1164            for (int l = 0; l < nthreads; l++) {
1165                final int firstRow = l * p;
1166                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1167                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1168                    @Override
1169                                        public void run() {
1170                        for (int i = firstRow; i < lastRow; i++) {
1171                            fftColumns.realForward(a, i * columns);
1172                        }
1173                    }
1174                });
1175            }
1176            ConcurrencyUtils.waitForCompletion(futures);
1177
1178            for (int r = 0; r < rows; r++) {
1179                temp[0][r] = a[r * columns]; //first column is always real
1180            }
1181            fftRows.realForwardFull(temp[0]);
1182
1183            p = (n2d2 - 2) / nthreads;
1184            for (int l = 0; l < nthreads; l++) {
1185                final int firstColumn = 1 + l * p;
1186                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1187                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1188                    @Override
1189                                        public void run() {
1190                        for (int c = firstColumn; c < lastColumn; c++) {
1191                            int idx0 = 2 * c;
1192                            for (int r = 0; r < rows; r++) {
1193                                int idx1 = 2 * r;
1194                                int idx2 = r * columns + idx0;
1195                                temp[c][idx1] = a[idx2];
1196                                temp[c][idx1 + 1] = a[idx2 + 1];
1197                            }
1198                            fftRows.complexForward(temp[c]);
1199                        }
1200                    }
1201                });
1202            }
1203            ConcurrencyUtils.waitForCompletion(futures);
1204            
1205            if ((columns % 2) == 0) {
1206                for (int r = 0; r < rows; r++) {
1207                    temp[n2d2 - 1][r] = a[r * columns + 1];
1208                    //imaginary part = 0;
1209                }
1210                fftRows.realForwardFull(temp[n2d2 - 1]);
1211
1212            } else {
1213                for (int r = 0; r < rows; r++) {
1214                    int idx1 = 2 * r;
1215                    int idx2 = r * columns;
1216                    int idx3 = n2d2 - 1;
1217                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1218                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1219                }
1220                fftRows.complexForward(temp[n2d2 - 1]);
1221            }
1222
1223            p = rows / nthreads;
1224            for (int l = 0; l < nthreads; l++) {
1225                final int firstRow = l * p;
1226                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1227                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1228                    @Override
1229                                        public void run() {
1230                        for (int r = firstRow; r < lastRow; r++) {
1231                            int idx1 = 2 * r;
1232                            for (int c = 0; c < n2d2; c++) {
1233                                int idx0 = 2 * c;
1234                                int idx2 = r * rowStride + idx0;
1235                                a[idx2] = temp[c][idx1];
1236                                a[idx2 + 1] = temp[c][idx1 + 1];
1237                            }
1238                        }
1239                    }
1240                });
1241            }
1242            ConcurrencyUtils.waitForCompletion(futures);
1243            
1244            for (int l = 0; l < nthreads; l++) {
1245                final int firstRow = 1 + l * p;
1246                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1247                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1248                    @Override
1249                                        public void run() {
1250                        for (int r = firstRow; r < lastRow; r++) {
1251                            int idx5 = r * rowStride;
1252                            int idx6 = (rows - r + 1) * rowStride;
1253                            for (int c = n2d2; c < columns; c++) {
1254                                int idx1 = 2 * c;
1255                                int idx2 = 2 * (columns - c);
1256                                a[idx1] = a[idx2];
1257                                a[idx1 + 1] = -a[idx2 + 1];
1258                                int idx3 = idx5 + idx1;
1259                                int idx4 = idx6 - idx1;
1260                                a[idx3] = a[idx4];
1261                                a[idx3 + 1] = -a[idx4 + 1];
1262                            }
1263                        }
1264                    }
1265                });
1266            }
1267            ConcurrencyUtils.waitForCompletion(futures);
1268            
1269        } else {
1270            for (int r = 0; r < rows; r++) {
1271                fftColumns.realForward(a, r * columns);
1272            }
1273            for (int r = 0; r < rows; r++) {
1274                temp[0][r] = a[r * columns]; //first column is always real
1275            }
1276            fftRows.realForwardFull(temp[0]);
1277
1278            for (int c = 1; c < n2d2 - 1; c++) {
1279                int idx0 = 2 * c;
1280                for (int r = 0; r < rows; r++) {
1281                    int idx1 = 2 * r;
1282                    int idx2 = r * columns + idx0;
1283                    temp[c][idx1] = a[idx2];
1284                    temp[c][idx1 + 1] = a[idx2 + 1];
1285                }
1286                fftRows.complexForward(temp[c]);
1287            }
1288
1289            if ((columns % 2) == 0) {
1290                for (int r = 0; r < rows; r++) {
1291                    temp[n2d2 - 1][r] = a[r * columns + 1];
1292                    //imaginary part = 0;
1293                }
1294                fftRows.realForwardFull(temp[n2d2 - 1]);
1295
1296            } else {
1297                for (int r = 0; r < rows; r++) {
1298                    int idx1 = 2 * r;
1299                    int idx2 = r * columns;
1300                    int idx3 = n2d2 - 1;
1301                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1302                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1303                }
1304                fftRows.complexForward(temp[n2d2 - 1]);
1305            }
1306
1307            for (int r = 0; r < rows; r++) {
1308                int idx1 = 2 * r;
1309                for (int c = 0; c < n2d2; c++) {
1310                    int idx0 = 2 * c;
1311                    int idx2 = r * rowStride + idx0;
1312                    a[idx2] = temp[c][idx1];
1313                    a[idx2 + 1] = temp[c][idx1 + 1];
1314                }
1315            }
1316
1317            //fill symmetric
1318            for (int r = 1; r < rows; r++) {
1319                int idx5 = r * rowStride;
1320                int idx6 = (rows - r + 1) * rowStride;
1321                for (int c = n2d2; c < columns; c++) {
1322                    int idx1 = 2 * c;
1323                    int idx2 = 2 * (columns - c);
1324                    a[idx1] = a[idx2];
1325                    a[idx1 + 1] = -a[idx2 + 1];
1326                    int idx3 = idx5 + idx1;
1327                    int idx4 = idx6 - idx1;
1328                    a[idx3] = a[idx4];
1329                    a[idx3 + 1] = -a[idx4 + 1];
1330                }
1331            }
1332        }
1333    }
1334
1335    private void mixedRadixRealInverseFull(final double[][] a, final boolean scale) {
1336        final int n2d2 = columns / 2 + 1;
1337        final double[][] temp = new double[n2d2][2 * rows];
1338
1339        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1340        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1341            Future<?>[] futures = new Future[nthreads];
1342            int p = rows / nthreads;
1343            for (int l = 0; l < nthreads; l++) {
1344                final int firstRow = l * p;
1345                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1346                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1347                    @Override
1348                                        public void run() {
1349                        for (int i = firstRow; i < lastRow; i++) {
1350                            fftColumns.realInverse2(a[i], 0, scale);
1351                        }
1352                    }
1353                });
1354            }
1355            ConcurrencyUtils.waitForCompletion(futures);
1356
1357            for (int r = 0; r < rows; r++) {
1358                temp[0][r] = a[r][0]; //first column is always real
1359            }
1360            fftRows.realInverseFull(temp[0], scale);
1361
1362            p = (n2d2 - 2) / nthreads;
1363            for (int l = 0; l < nthreads; l++) {
1364                final int firstColumn = 1 + l * p;
1365                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1366                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1367                    @Override
1368                                        public void run() {
1369                        for (int c = firstColumn; c < lastColumn; c++) {
1370                            int idx2 = 2 * c;
1371                            for (int r = 0; r < rows; r++) {
1372                                int idx1 = 2 * r;
1373                                temp[c][idx1] = a[r][idx2];
1374                                temp[c][idx1 + 1] = a[r][idx2 + 1];
1375                            }
1376                            fftRows.complexInverse(temp[c], scale);
1377                        }
1378                    }
1379                });
1380            }
1381            ConcurrencyUtils.waitForCompletion(futures);
1382
1383            if ((columns % 2) == 0) {
1384                for (int r = 0; r < rows; r++) {
1385                    temp[n2d2 - 1][r] = a[r][1];
1386                    //imaginary part = 0;
1387                }
1388                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1389
1390            } else {
1391                for (int r = 0; r < rows; r++) {
1392                    int idx1 = 2 * r;
1393                    int idx2 = n2d2 - 1;
1394                    temp[idx2][idx1] = a[r][2 * idx2];
1395                    temp[idx2][idx1 + 1] = a[r][1];
1396                }
1397                fftRows.complexInverse(temp[n2d2 - 1], scale);
1398
1399            }
1400
1401            p = rows / nthreads;
1402            for (int l = 0; l < nthreads; l++) {
1403                final int firstRow = l * p;
1404                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1405                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1406                    @Override
1407                                        public void run() {
1408                        for (int r = firstRow; r < lastRow; r++) {
1409                            int idx1 = 2 * r;
1410                            for (int c = 0; c < n2d2; c++) {
1411                                int idx2 = 2 * c;
1412                                a[r][idx2] = temp[c][idx1];
1413                                a[r][idx2 + 1] = temp[c][idx1 + 1];
1414                            }
1415                        }
1416                    }
1417                });
1418            }
1419            ConcurrencyUtils.waitForCompletion(futures);
1420
1421            for (int l = 0; l < nthreads; l++) {
1422                final int firstRow = 1 + l * p;
1423                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1424                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1425                    @Override
1426                                        public void run() {
1427                        for (int r = firstRow; r < lastRow; r++) {
1428                            int idx3 = rows - r;
1429                            for (int c = n2d2; c < columns; c++) {
1430                                int idx1 = 2 * c;
1431                                int idx2 = 2 * (columns - c);
1432                                a[0][idx1] = a[0][idx2];
1433                                a[0][idx1 + 1] = -a[0][idx2 + 1];
1434                                a[r][idx1] = a[idx3][idx2];
1435                                a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1436                            }
1437                        }
1438                    }
1439                });
1440            }
1441            ConcurrencyUtils.waitForCompletion(futures);
1442
1443        } else {
1444            for (int r = 0; r < rows; r++) {
1445                fftColumns.realInverse2(a[r], 0, scale);
1446            }
1447
1448            for (int r = 0; r < rows; r++) {
1449                temp[0][r] = a[r][0]; //first column is always real
1450            }
1451            fftRows.realInverseFull(temp[0], scale);
1452
1453            for (int c = 1; c < n2d2 - 1; c++) {
1454                int idx2 = 2 * c;
1455                for (int r = 0; r < rows; r++) {
1456                    int idx1 = 2 * r;
1457                    temp[c][idx1] = a[r][idx2];
1458                    temp[c][idx1 + 1] = a[r][idx2 + 1];
1459                }
1460                fftRows.complexInverse(temp[c], scale);
1461            }
1462
1463            if ((columns % 2) == 0) {
1464                for (int r = 0; r < rows; r++) {
1465                    temp[n2d2 - 1][r] = a[r][1];
1466                    //imaginary part = 0;
1467                }
1468                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1469
1470            } else {
1471                for (int r = 0; r < rows; r++) {
1472                    int idx1 = 2 * r;
1473                    int idx2 = n2d2 - 1;
1474                    temp[idx2][idx1] = a[r][2 * idx2];
1475                    temp[idx2][idx1 + 1] = a[r][1];
1476                }
1477                fftRows.complexInverse(temp[n2d2 - 1], scale);
1478
1479            }
1480
1481            for (int r = 0; r < rows; r++) {
1482                int idx1 = 2 * r;
1483                for (int c = 0; c < n2d2; c++) {
1484                    int idx2 = 2 * c;
1485                    a[r][idx2] = temp[c][idx1];
1486                    a[r][idx2 + 1] = temp[c][idx1 + 1];
1487                }
1488            }
1489
1490            //fill symmetric
1491            for (int r = 1; r < rows; r++) {
1492                int idx3 = rows - r;
1493                for (int c = n2d2; c < columns; c++) {
1494                    int idx1 = 2 * c;
1495                    int idx2 = 2 * (columns - c);
1496                    a[0][idx1] = a[0][idx2];
1497                    a[0][idx1 + 1] = -a[0][idx2 + 1];
1498                    a[r][idx1] = a[idx3][idx2];
1499                    a[r][idx1 + 1] = -a[idx3][idx2 + 1];
1500                }
1501            }
1502        }
1503    }
1504
1505    private void mixedRadixRealInverseFull(final double[] a, final boolean scale) {
1506        final int rowStride = 2 * columns;
1507        final int n2d2 = columns / 2 + 1;
1508        final double[][] temp = new double[n2d2][2 * rows];
1509
1510        int nthreads = ConcurrencyUtils.getNumberOfThreads();
1511        if ((nthreads > 1) && useThreads && (rows >= nthreads) && (n2d2 - 2 >= nthreads)) {
1512            Future<?>[] futures = new Future[nthreads];
1513            int p = rows / nthreads;
1514            for (int l = 0; l < nthreads; l++) {
1515                final int firstRow = l * p;
1516                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1517                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1518                    @Override
1519                                        public void run() {
1520                        for (int i = firstRow; i < lastRow; i++) {
1521                            fftColumns.realInverse2(a, i * columns, scale);
1522                        }
1523                    }
1524                });
1525            }
1526            ConcurrencyUtils.waitForCompletion(futures);
1527
1528            for (int r = 0; r < rows; r++) {
1529                temp[0][r] = a[r * columns]; //first column is always real
1530            }
1531            fftRows.realInverseFull(temp[0], scale);
1532
1533            p = (n2d2 - 2) / nthreads;
1534            for (int l = 0; l < nthreads; l++) {
1535                final int firstColumn = 1 + l * p;
1536                final int lastColumn = (l == (nthreads - 1)) ? n2d2 - 1 : firstColumn + p;
1537                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1538                    @Override
1539                                        public void run() {
1540                        for (int c = firstColumn; c < lastColumn; c++) {
1541                            int idx0 = 2 * c;
1542                            for (int r = 0; r < rows; r++) {
1543                                int idx1 = 2 * r;
1544                                int idx2 = r * columns + idx0;
1545                                temp[c][idx1] = a[idx2];
1546                                temp[c][idx1 + 1] = a[idx2 + 1];
1547                            }
1548                            fftRows.complexInverse(temp[c], scale);
1549                        }
1550                    }
1551                });
1552            }
1553            ConcurrencyUtils.waitForCompletion(futures);
1554
1555            if ((columns % 2) == 0) {
1556                for (int r = 0; r < rows; r++) {
1557                    temp[n2d2 - 1][r] = a[r * columns + 1];
1558                    //imaginary part = 0;
1559                }
1560                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1561
1562            } else {
1563                for (int r = 0; r < rows; r++) {
1564                    int idx1 = 2 * r;
1565                    int idx2 = r * columns;
1566                    int idx3 = n2d2 - 1;
1567                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1568                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1569                }
1570                fftRows.complexInverse(temp[n2d2 - 1], scale);
1571            }
1572
1573            p = rows / nthreads;
1574            for (int l = 0; l < nthreads; l++) {
1575                final int firstRow = l * p;
1576                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1577                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1578                    @Override
1579                                        public void run() {
1580                        for (int r = firstRow; r < lastRow; r++) {
1581                            int idx1 = 2 * r;
1582                            for (int c = 0; c < n2d2; c++) {
1583                                int idx0 = 2 * c;
1584                                int idx2 = r * rowStride + idx0;
1585                                a[idx2] = temp[c][idx1];
1586                                a[idx2 + 1] = temp[c][idx1 + 1];
1587                            }
1588                        }
1589                    }
1590                });
1591            }
1592            ConcurrencyUtils.waitForCompletion(futures);
1593
1594            for (int l = 0; l < nthreads; l++) {
1595                final int firstRow = 1 + l * p;
1596                final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p;
1597                futures[l] = ConcurrencyUtils.submit(new Runnable() {
1598                    @Override
1599                                        public void run() {
1600                        for (int r = firstRow; r < lastRow; r++) {
1601                            int idx5 = r * rowStride;
1602                            int idx6 = (rows - r + 1) * rowStride;
1603                            for (int c = n2d2; c < columns; c++) {
1604                                int idx1 = 2 * c;
1605                                int idx2 = 2 * (columns - c);
1606                                a[idx1] = a[idx2];
1607                                a[idx1 + 1] = -a[idx2 + 1];
1608                                int idx3 = idx5 + idx1;
1609                                int idx4 = idx6 - idx1;
1610                                a[idx3] = a[idx4];
1611                                a[idx3 + 1] = -a[idx4 + 1];
1612                            }
1613                        }
1614                    }
1615                });
1616            }
1617            ConcurrencyUtils.waitForCompletion(futures);
1618        } else {
1619            for (int r = 0; r < rows; r++) {
1620                fftColumns.realInverse2(a, r * columns, scale);
1621            }
1622            for (int r = 0; r < rows; r++) {
1623                temp[0][r] = a[r * columns]; //first column is always real
1624            }
1625            fftRows.realInverseFull(temp[0], scale);
1626
1627            for (int c = 1; c < n2d2 - 1; c++) {
1628                int idx0 = 2 * c;
1629                for (int r = 0; r < rows; r++) {
1630                    int idx1 = 2 * r;
1631                    int idx2 = r * columns + idx0;
1632                    temp[c][idx1] = a[idx2];
1633                    temp[c][idx1 + 1] = a[idx2 + 1];
1634                }
1635                fftRows.complexInverse(temp[c], scale);
1636            }
1637
1638            if ((columns % 2) == 0) {
1639                for (int r = 0; r < rows; r++) {
1640                    temp[n2d2 - 1][r] = a[r * columns + 1];
1641                    //imaginary part = 0;
1642                }
1643                fftRows.realInverseFull(temp[n2d2 - 1], scale);
1644
1645            } else {
1646                for (int r = 0; r < rows; r++) {
1647                    int idx1 = 2 * r;
1648                    int idx2 = r * columns;
1649                    int idx3 = n2d2 - 1;
1650                    temp[idx3][idx1] = a[idx2 + 2 * idx3];
1651                    temp[idx3][idx1 + 1] = a[idx2 + 1];
1652                }
1653                fftRows.complexInverse(temp[n2d2 - 1], scale);
1654            }
1655
1656            for (int r = 0; r < rows; r++) {
1657                int idx1 = 2 * r;
1658                for (int c = 0; c < n2d2; c++) {
1659                    int idx0 = 2 * c;
1660                    int idx2 = r * rowStride + idx0;
1661                    a[idx2] = temp[c][idx1];
1662                    a[idx2 + 1] = temp[c][idx1 + 1];
1663                }
1664            }
1665
1666            //fill symmetric
1667            for (int r = 1; r < rows; r++) {
1668                int idx5 = r * rowStride;
1669                int idx6 = (rows - r + 1) * rowStride;
1670                for (int c = n2d2; c < columns; c++) {
1671                    int idx1 = 2 * c;
1672                    int idx2 = 2 * (columns - c);
1673                    a[idx1] = a[idx2];
1674                    a[idx1 + 1] = -a[idx2 + 1];
1675                    int idx3 = idx5 + idx1;
1676                    int idx4 = idx6 - idx1;
1677                    a[idx3] = a[idx4];
1678                    a[idx3 + 1] = -a[idx4 + 1];
1679                }
1680            }
1681        }
1682    }
1683
1684    private void rdft2d_sub(int isgn, double[] a) {
1685        int n1h, j;
1686        double xi;
1687        int idx1, idx2;
1688
1689        n1h = rows >> 1;
1690        if (isgn < 0) {
1691            for (int i = 1; i < n1h; i++) {
1692                j = rows - i;
1693                idx1 = i * columns;
1694                idx2 = j * columns;
1695                xi = a[idx1] - a[idx2];
1696                a[idx1] += a[idx2];
1697                a[idx2] = xi;
1698                xi = a[idx2 + 1] - a[idx1 + 1];
1699                a[idx1 + 1] += a[idx2 + 1];
1700                a[idx2 + 1] = xi;
1701            }
1702        } else {
1703            for (int i = 1; i < n1h; i++) {
1704                j = rows - i;
1705                idx1 = i * columns;
1706                idx2 = j * columns;
1707                a[idx2] = 0.5f * (a[idx1] - a[idx2]);
1708                a[idx1] -= a[idx2];
1709                a[idx2 + 1] = 0.5f * (a[idx1 + 1] + a[idx2 + 1]);
1710                a[idx1 + 1] -= a[idx2 + 1];
1711            }
1712        }
1713    }
1714
1715    private void rdft2d_sub(int isgn, double[][] a) {
1716        int n1h, j;
1717        double xi;
1718
1719        n1h = rows >> 1;
1720        if (isgn < 0) {
1721            for (int i = 1; i < n1h; i++) {
1722                j = rows - i;
1723                xi = a[i][0] - a[j][0];
1724                a[i][0] += a[j][0];
1725                a[j][0] = xi;
1726                xi = a[j][1] - a[i][1];
1727                a[i][1] += a[j][1];
1728                a[j][1] = xi;
1729            }
1730        } else {
1731            for (int i = 1; i < n1h; i++) {
1732                j = rows - i;
1733                a[j][0] = 0.5f * (a[i][0] - a[j][0]);
1734                a[i][0] -= a[j][0];
1735                a[j][1] = 0.5f * (a[i][1] + a[j][1]);
1736                a[i][1] -= a[j][1];
1737            }
1738        }
1739    }
1740
1741    private void cdft2d_sub(int isgn, double[] a, boolean scale) {
1742        int idx1, idx2, idx3, idx4, idx5;
1743        if (isgn == -1) {
1744            if (columns > 4) {
1745                for (int c = 0; c < columns; c += 8) {
1746                    for (int r = 0; r < rows; r++) {
1747                        idx1 = r * columns + c;
1748                        idx2 = 2 * r;
1749                        idx3 = 2 * rows + 2 * r;
1750                        idx4 = idx3 + 2 * rows;
1751                        idx5 = idx4 + 2 * rows;
1752                        t[idx2] = a[idx1];
1753                        t[idx2 + 1] = a[idx1 + 1];
1754                        t[idx3] = a[idx1 + 2];
1755                        t[idx3 + 1] = a[idx1 + 3];
1756                        t[idx4] = a[idx1 + 4];
1757                        t[idx4 + 1] = a[idx1 + 5];
1758                        t[idx5] = a[idx1 + 6];
1759                        t[idx5 + 1] = a[idx1 + 7];
1760                    }
1761                    fftRows.complexForward(t, 0);
1762                    fftRows.complexForward(t, 2 * rows);
1763                    fftRows.complexForward(t, 4 * rows);
1764                    fftRows.complexForward(t, 6 * rows);
1765                    for (int r = 0; r < rows; r++) {
1766                        idx1 = r * columns + c;
1767                        idx2 = 2 * r;
1768                        idx3 = 2 * rows + 2 * r;
1769                        idx4 = idx3 + 2 * rows;
1770                        idx5 = idx4 + 2 * rows;
1771                        a[idx1] = t[idx2];
1772                        a[idx1 + 1] = t[idx2 + 1];
1773                        a[idx1 + 2] = t[idx3];
1774                        a[idx1 + 3] = t[idx3 + 1];
1775                        a[idx1 + 4] = t[idx4];
1776                        a[idx1 + 5] = t[idx4 + 1];
1777                        a[idx1 + 6] = t[idx5];
1778                        a[idx1 + 7] = t[idx5 + 1];
1779                    }
1780                }
1781            } else if (columns == 4) {
1782                for (int r = 0; r < rows; r++) {
1783                    idx1 = r * columns;
1784                    idx2 = 2 * r;
1785                    idx3 = 2 * rows + 2 * r;
1786                    t[idx2] = a[idx1];
1787                    t[idx2 + 1] = a[idx1 + 1];
1788                    t[idx3] = a[idx1 + 2];
1789                    t[idx3 + 1] = a[idx1 + 3];
1790                }
1791                fftRows.complexForward(t, 0);
1792                fftRows.complexForward(t, 2 * rows);
1793                for (int r = 0; r < rows; r++) {
1794                    idx1 = r * columns;
1795                    idx2 = 2 * r;
1796                    idx3 = 2 * rows + 2 * r;
1797                    a[idx1] = t[idx2];
1798                    a[idx1 + 1] = t[idx2 + 1];
1799                    a[idx1 + 2] = t[idx3];
1800                    a[idx1 + 3] = t[idx3 + 1];
1801                }
1802            } else if (columns == 2) {
1803                for (int r = 0; r < rows; r++) {
1804                    idx1 = r * columns;
1805                    idx2 = 2 * r;
1806                    t[idx2] = a[idx1];
1807                    t[idx2 + 1] = a[idx1 + 1];
1808                }
1809                fftRows.complexForward(t, 0);
1810                for (int r = 0; r < rows; r++) {
1811                    idx1 = r * columns;
1812                    idx2 = 2 * r;
1813                    a[idx1] = t[idx2];
1814                    a[idx1 + 1] = t[idx2 + 1];
1815                }
1816            }
1817        } else {
1818            if (columns > 4) {
1819                for (int c = 0; c < columns; c += 8) {
1820                    for (int r = 0; r < rows; r++) {
1821                        idx1 = r * columns + c;
1822                        idx2 = 2 * r;
1823                        idx3 = 2 * rows + 2 * r;
1824                        idx4 = idx3 + 2 * rows;
1825                        idx5 = idx4 + 2 * rows;
1826                        t[idx2] = a[idx1];
1827                        t[idx2 + 1] = a[idx1 + 1];
1828                        t[idx3] = a[idx1 + 2];
1829                        t[idx3 + 1] = a[idx1 + 3];
1830                        t[idx4] = a[idx1 + 4];
1831                        t[idx4 + 1] = a[idx1 + 5];
1832                        t[idx5] = a[idx1 + 6];
1833                        t[idx5 + 1] = a[idx1 + 7];
1834                    }
1835                    fftRows.complexInverse(t, 0, scale);
1836                    fftRows.complexInverse(t, 2 * rows, scale);
1837                    fftRows.complexInverse(t, 4 * rows, scale);
1838                    fftRows.complexInverse(t, 6 * rows, scale);
1839                    for (int r = 0; r < rows; r++) {
1840                        idx1 = r * columns + c;
1841                        idx2 = 2 * r;
1842                        idx3 = 2 * rows + 2 * r;
1843                        idx4 = idx3 + 2 * rows;
1844                        idx5 = idx4 + 2 * rows;
1845                        a[idx1] = t[idx2];
1846                        a[idx1 + 1] = t[idx2 + 1];
1847                        a[idx1 + 2] = t[idx3];
1848                        a[idx1 + 3] = t[idx3 + 1];
1849                        a[idx1 + 4] = t[idx4];
1850                        a[idx1 + 5] = t[idx4 + 1];
1851                        a[idx1 + 6] = t[idx5];
1852                        a[idx1 + 7] = t[idx5 + 1];
1853                    }
1854                }
1855            } else if (columns == 4) {
1856                for (int r = 0; r < rows; r++) {
1857                    idx1 = r * columns;
1858                    idx2 = 2 * r;
1859                    idx3 = 2 * rows + 2 * r;
1860                    t[idx2] = a[idx1];
1861                    t[idx2 + 1] = a[idx1 + 1];
1862                    t[idx3] = a[idx1 + 2];
1863                    t[idx3 + 1] = a[idx1 + 3];
1864                }
1865                fftRows.complexInverse(t, 0, scale);
1866                fftRows.complexInverse(t, 2 * rows, scale);
1867                for (int r = 0; r < rows; r++) {
1868                    idx1 = r * columns;
1869                    idx2 = 2 * r;
1870                    idx3 = 2 * rows + 2 * r;
1871                    a[idx1] = t[idx2];
1872                    a[idx1 + 1] = t[idx2 + 1];
1873                    a[idx1 + 2] = t[idx3];
1874                    a[idx1 + 3] = t[idx3 + 1];
1875                }
1876            } else if (columns == 2) {
1877                for (int r = 0; r < rows; r++) {
1878                    idx1 = r * columns;
1879                    idx2 = 2 * r;
1880                    t[idx2] = a[idx1];
1881                    t[idx2 + 1] = a[idx1 + 1];
1882                }
1883                fftRows.complexInverse(t, 0, scale);
1884                for (int r = 0; r < rows; r++) {
1885                    idx1 = r * columns;
1886                    idx2 = 2 * r;
1887                    a[idx1] = t[idx2];
1888                    a[idx1 + 1] = t[idx2 + 1];
1889                }
1890            }
1891        }
1892    }
1893
1894    private void cdft2d_sub(int isgn, double[][] a, boolean scale) {
1895        int idx2, idx3, idx4, idx5;
1896        if (isgn == -1) {
1897            if (columns > 4) {
1898                for (int c = 0; c < columns; c += 8) {
1899                    for (int r = 0; r < rows; r++) {
1900                        idx2 = 2 * r;
1901                        idx3 = 2 * rows + 2 * r;
1902                        idx4 = idx3 + 2 * rows;
1903                        idx5 = idx4 + 2 * rows;
1904                        t[idx2] = a[r][c];
1905                        t[idx2 + 1] = a[r][c + 1];
1906                        t[idx3] = a[r][c + 2];
1907                        t[idx3 + 1] = a[r][c + 3];
1908                        t[idx4] = a[r][c + 4];
1909                        t[idx4 + 1] = a[r][c + 5];
1910                        t[idx5] = a[r][c + 6];
1911                        t[idx5 + 1] = a[r][c + 7];
1912                    }
1913                    fftRows.complexForward(t, 0);
1914                    fftRows.complexForward(t, 2 * rows);
1915                    fftRows.complexForward(t, 4 * rows);
1916                    fftRows.complexForward(t, 6 * rows);
1917                    for (int r = 0; r < rows; r++) {
1918                        idx2 = 2 * r;
1919                        idx3 = 2 * rows + 2 * r;
1920                        idx4 = idx3 + 2 * rows;
1921                        idx5 = idx4 + 2 * rows;
1922                        a[r][c] = t[idx2];
1923                        a[r][c + 1] = t[idx2 + 1];
1924                        a[r][c + 2] = t[idx3];
1925                        a[r][c + 3] = t[idx3 + 1];
1926                        a[r][c + 4] = t[idx4];
1927                        a[r][c + 5] = t[idx4 + 1];
1928                        a[r][c + 6] = t[idx5];
1929                        a[r][c + 7] = t[idx5 + 1];
1930                    }
1931                }
1932            } else if (columns == 4) {
1933                for (int r = 0; r < rows; r++) {
1934                    idx2 = 2 * r;
1935                    idx3 = 2 * rows + 2 * r;
1936                    t[idx2] = a[r][0];
1937                    t[idx2 + 1] = a[r][1];
1938                    t[idx3] = a[r][2];
1939                    t[idx3 + 1] = a[r][3];
1940                }
1941                fftRows.complexForward(t, 0);
1942                fftRows.complexForward(t, 2 * rows);
1943                for (int r = 0; r < rows; r++) {
1944                    idx2 = 2 * r;
1945                    idx3 = 2 * rows + 2 * r;
1946                    a[r][0] = t[idx2];
1947                    a[r][1] = t[idx2 + 1];
1948                    a[r][2] = t[idx3];
1949                    a[r][3] = t[idx3 + 1];
1950                }
1951            } else if (columns == 2) {
1952                for (int r = 0; r < rows; r++) {
1953                    idx2 = 2 * r;
1954                    t[idx2] = a[r][0];
1955                    t[idx2 + 1] = a[r][1];
1956                }
1957                fftRows.complexForward(t, 0);
1958                for (int r = 0; r < rows; r++) {
1959                    idx2 = 2 * r;
1960                    a[r][0] = t[idx2];
1961                    a[r][1] = t[idx2 + 1];
1962                }
1963            }
1964        } else {
1965            if (columns > 4) {
1966                for (int c = 0; c < columns; c += 8) {
1967                    for (int r = 0; r < rows; r++) {
1968                        idx2 = 2 * r;
1969                        idx3 = 2 * rows + 2 * r;
1970                        idx4 = idx3 + 2 * rows;
1971                        idx5 = idx4 + 2 * rows;
1972                        t[idx2] = a[r][c];
1973                        t[idx2 + 1] = a[r][c + 1];
1974                        t[idx3] = a[r][c + 2];
1975                        t[idx3 + 1] = a[r][c + 3];
1976                        t[idx4] = a[r][c + 4];
1977                        t[idx4 + 1] = a[r][c + 5];
1978                        t[idx5] = a[r][c + 6];
1979                        t[idx5 + 1] = a[r][c + 7];
1980                    }
1981                    fftRows.complexInverse(t, 0, scale);
1982                    fftRows.complexInverse(t, 2 * rows, scale);
1983                    fftRows.complexInverse(t, 4 * rows, scale);
1984                    fftRows.complexInverse(t, 6 * rows, scale);
1985                    for (int r = 0; r < rows; r++) {
1986                        idx2 = 2 * r;
1987                        idx3 = 2 * rows + 2 * r;
1988                        idx4 = idx3 + 2 * rows;
1989                        idx5 = idx4 + 2 * rows;
1990                        a[r][c] = t[idx2];
1991                        a[r][c + 1] = t[idx2 + 1];
1992                        a[r][c + 2] = t[idx3];
1993                        a[r][c + 3] = t[idx3 + 1];
1994                        a[r][c + 4] = t[idx4];
1995                        a[r][c + 5] = t[idx4 + 1];
1996                        a[r][c + 6] = t[idx5];
1997                        a[r][c + 7] = t[idx5 + 1];
1998                    }
1999                }
2000            } else if (columns == 4) {
2001                for (int r = 0; r < rows; r++) {
2002                    idx2 = 2 * r;
2003                    idx3 = 2 * rows + 2 * r;
2004                    t[idx2] = a[r][0];
2005                    t[idx2 + 1] = a[r][1];
2006                    t[idx3] = a[r][2];
2007                    t[idx3 + 1] = a[r][3];
2008                }
2009                fftRows.complexInverse(t, 0, scale);
2010                fftRows.complexInverse(t, 2 * rows, scale);
2011                for (int r = 0; r < rows; r++) {
2012                    idx2 = 2 * r;
2013                    idx3 = 2 * rows + 2 * r;
2014                    a[r][0] = t[idx2];
2015                    a[r][1] = t[idx2 + 1];
2016                    a[r][2] = t[idx3];
2017                    a[r][3] = t[idx3 + 1];
2018                }
2019            } else if (columns == 2) {
2020                for (int r = 0; r < rows; r++) {
2021                    idx2 = 2 * r;
2022                    t[idx2] = a[r][0];
2023                    t[idx2 + 1] = a[r][1];
2024                }
2025                fftRows.complexInverse(t, 0, scale);
2026                for (int r = 0; r < rows; r++) {
2027                    idx2 = 2 * r;
2028                    a[r][0] = t[idx2];
2029                    a[r][1] = t[idx2 + 1];
2030                }
2031            }
2032        }
2033    }
2034
2035    private void xdft2d0_subth1(final int icr, final int isgn, final double[] a, final boolean scale) {
2036        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2037
2038        Future<?>[] futures = new Future[nthreads];
2039        for (int i = 0; i < nthreads; i++) {
2040            final int n0 = i;
2041            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2042                @Override
2043                                public void run() {
2044                    if (icr == 0) {
2045                        if (isgn == -1) {
2046                            for (int r = n0; r < rows; r += nthreads) {
2047                                fftColumns.complexForward(a, r * columns);
2048                            }
2049                        } else {
2050                            for (int r = n0; r < rows; r += nthreads) {
2051                                fftColumns.complexInverse(a, r * columns, scale);
2052                            }
2053                        }
2054                    } else {
2055                        if (isgn == 1) {
2056                            for (int r = n0; r < rows; r += nthreads) {
2057                                fftColumns.realForward(a, r * columns);
2058                            }
2059                        } else {
2060                            for (int r = n0; r < rows; r += nthreads) {
2061                                fftColumns.realInverse(a, r * columns, scale);
2062                            }
2063                        }
2064                    }
2065                }
2066            });
2067        }
2068        ConcurrencyUtils.waitForCompletion(futures);
2069    }
2070
2071    private void xdft2d0_subth2(final int icr, final int isgn, final double[] a, final boolean scale) {
2072        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2073
2074        Future<?>[] futures = new Future[nthreads];
2075        for (int i = 0; i < nthreads; i++) {
2076            final int n0 = i;
2077            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2078                @Override
2079                                public void run() {
2080                    if (icr == 0) {
2081                        if (isgn == -1) {
2082                            for (int r = n0; r < rows; r += nthreads) {
2083                                fftColumns.complexForward(a, r * columns);
2084                            }
2085                        } else {
2086                            for (int r = n0; r < rows; r += nthreads) {
2087                                fftColumns.complexInverse(a, r * columns, scale);
2088                            }
2089                        }
2090                    } else {
2091                        if (isgn == 1) {
2092                            for (int r = n0; r < rows; r += nthreads) {
2093                                fftColumns.realForward(a, r * columns);
2094                            }
2095                        } else {
2096                            for (int r = n0; r < rows; r += nthreads) {
2097                                fftColumns.realInverse2(a, r * columns, scale);
2098                            }
2099                        }
2100                    }
2101                }
2102            });
2103        }
2104        ConcurrencyUtils.waitForCompletion(futures);
2105    }
2106
2107    private void xdft2d0_subth1(final int icr, final int isgn, final double[][] a, final boolean scale) {
2108        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2109
2110        Future<?>[] futures = new Future[nthreads];
2111        for (int i = 0; i < nthreads; i++) {
2112            final int n0 = i;
2113            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2114                @Override
2115                                public void run() {
2116                    if (icr == 0) {
2117                        if (isgn == -1) {
2118                            for (int r = n0; r < rows; r += nthreads) {
2119                                fftColumns.complexForward(a[r]);
2120                            }
2121                        } else {
2122                            for (int r = n0; r < rows; r += nthreads) {
2123                                fftColumns.complexInverse(a[r], scale);
2124                            }
2125                        }
2126                    } else {
2127                        if (isgn == 1) {
2128                            for (int r = n0; r < rows; r += nthreads) {
2129                                fftColumns.realForward(a[r]);
2130                            }
2131                        } else {
2132                            for (int r = n0; r < rows; r += nthreads) {
2133                                fftColumns.realInverse(a[r], scale);
2134                            }
2135                        }
2136                    }
2137                }
2138            });
2139        }
2140        ConcurrencyUtils.waitForCompletion(futures);
2141    }
2142
2143    private void xdft2d0_subth2(final int icr, final int isgn, final double[][] a, final boolean scale) {
2144        final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads();
2145
2146        Future<?>[] futures = new Future[nthreads];
2147        for (int i = 0; i < nthreads; i++) {
2148            final int n0 = i;
2149            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2150                @Override
2151                                public void run() {
2152                    if (icr == 0) {
2153                        if (isgn == -1) {
2154                            for (int r = n0; r < rows; r += nthreads) {
2155                                fftColumns.complexForward(a[r]);
2156                            }
2157                        } else {
2158                            for (int r = n0; r < rows; r += nthreads) {
2159                                fftColumns.complexInverse(a[r], scale);
2160                            }
2161                        }
2162                    } else {
2163                        if (isgn == 1) {
2164                            for (int r = n0; r < rows; r += nthreads) {
2165                                fftColumns.realForward(a[r]);
2166                            }
2167                        } else {
2168                            for (int r = n0; r < rows; r += nthreads) {
2169                                fftColumns.realInverse2(a[r], 0, scale);
2170                            }
2171                        }
2172                    }
2173                }
2174            });
2175        }
2176        ConcurrencyUtils.waitForCompletion(futures);
2177    }
2178
2179    private void cdft2d_subth(final int isgn, final double[] a, final boolean scale) {
2180        int nthread = ConcurrencyUtils.getNumberOfThreads();
2181        int nt = 8 * rows;
2182        if (columns == 4 * nthread) {
2183            nt >>= 1;
2184        } else if (columns < 4 * nthread) {
2185            nthread = columns >> 1;
2186            nt >>= 2;
2187        }
2188        Future<?>[] futures = new Future[nthread];
2189        final int nthreads = nthread;
2190        for (int i = 0; i < nthread; i++) {
2191            final int n0 = i;
2192            final int startt = nt * i;
2193            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2194                @Override
2195                                public void run() {
2196                    int idx1, idx2, idx3, idx4, idx5;
2197                    if (isgn == -1) {
2198                        if (columns > 4 * nthreads) {
2199                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2200                                for (int r = 0; r < rows; r++) {
2201                                    idx1 = r * columns + c;
2202                                    idx2 = startt + 2 * r;
2203                                    idx3 = startt + 2 * rows + 2 * r;
2204                                    idx4 = idx3 + 2 * rows;
2205                                    idx5 = idx4 + 2 * rows;
2206                                    t[idx2] = a[idx1];
2207                                    t[idx2 + 1] = a[idx1 + 1];
2208                                    t[idx3] = a[idx1 + 2];
2209                                    t[idx3 + 1] = a[idx1 + 3];
2210                                    t[idx4] = a[idx1 + 4];
2211                                    t[idx4 + 1] = a[idx1 + 5];
2212                                    t[idx5] = a[idx1 + 6];
2213                                    t[idx5 + 1] = a[idx1 + 7];
2214                                }
2215                                fftRows.complexForward(t, startt);
2216                                fftRows.complexForward(t, startt + 2 * rows);
2217                                fftRows.complexForward(t, startt + 4 * rows);
2218                                fftRows.complexForward(t, startt + 6 * rows);
2219                                for (int r = 0; r < rows; r++) {
2220                                    idx1 = r * columns + c;
2221                                    idx2 = startt + 2 * r;
2222                                    idx3 = startt + 2 * rows + 2 * r;
2223                                    idx4 = idx3 + 2 * rows;
2224                                    idx5 = idx4 + 2 * rows;
2225                                    a[idx1] = t[idx2];
2226                                    a[idx1 + 1] = t[idx2 + 1];
2227                                    a[idx1 + 2] = t[idx3];
2228                                    a[idx1 + 3] = t[idx3 + 1];
2229                                    a[idx1 + 4] = t[idx4];
2230                                    a[idx1 + 5] = t[idx4 + 1];
2231                                    a[idx1 + 6] = t[idx5];
2232                                    a[idx1 + 7] = t[idx5 + 1];
2233                                }
2234                            }
2235                        } else if (columns == 4 * nthreads) {
2236                            for (int r = 0; r < rows; r++) {
2237                                idx1 = r * columns + 4 * n0;
2238                                idx2 = startt + 2 * r;
2239                                idx3 = startt + 2 * rows + 2 * r;
2240                                t[idx2] = a[idx1];
2241                                t[idx2 + 1] = a[idx1 + 1];
2242                                t[idx3] = a[idx1 + 2];
2243                                t[idx3 + 1] = a[idx1 + 3];
2244                            }
2245                            fftRows.complexForward(t, startt);
2246                            fftRows.complexForward(t, startt + 2 * rows);
2247                            for (int r = 0; r < rows; r++) {
2248                                idx1 = r * columns + 4 * n0;
2249                                idx2 = startt + 2 * r;
2250                                idx3 = startt + 2 * rows + 2 * r;
2251                                a[idx1] = t[idx2];
2252                                a[idx1 + 1] = t[idx2 + 1];
2253                                a[idx1 + 2] = t[idx3];
2254                                a[idx1 + 3] = t[idx3 + 1];
2255                            }
2256                        } else if (columns == 2 * nthreads) {
2257                            for (int r = 0; r < rows; r++) {
2258                                idx1 = r * columns + 2 * n0;
2259                                idx2 = startt + 2 * r;
2260                                t[idx2] = a[idx1];
2261                                t[idx2 + 1] = a[idx1 + 1];
2262                            }
2263                            fftRows.complexForward(t, startt);
2264                            for (int r = 0; r < rows; r++) {
2265                                idx1 = r * columns + 2 * n0;
2266                                idx2 = startt + 2 * r;
2267                                a[idx1] = t[idx2];
2268                                a[idx1 + 1] = t[idx2 + 1];
2269                            }
2270                        }
2271                    } else {
2272                        if (columns > 4 * nthreads) {
2273                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2274                                for (int r = 0; r < rows; r++) {
2275                                    idx1 = r * columns + c;
2276                                    idx2 = startt + 2 * r;
2277                                    idx3 = startt + 2 * rows + 2 * r;
2278                                    idx4 = idx3 + 2 * rows;
2279                                    idx5 = idx4 + 2 * rows;
2280                                    t[idx2] = a[idx1];
2281                                    t[idx2 + 1] = a[idx1 + 1];
2282                                    t[idx3] = a[idx1 + 2];
2283                                    t[idx3 + 1] = a[idx1 + 3];
2284                                    t[idx4] = a[idx1 + 4];
2285                                    t[idx4 + 1] = a[idx1 + 5];
2286                                    t[idx5] = a[idx1 + 6];
2287                                    t[idx5 + 1] = a[idx1 + 7];
2288                                }
2289                                fftRows.complexInverse(t, startt, scale);
2290                                fftRows.complexInverse(t, startt + 2 * rows, scale);
2291                                fftRows.complexInverse(t, startt + 4 * rows, scale);
2292                                fftRows.complexInverse(t, startt + 6 * rows, scale);
2293                                for (int r = 0; r < rows; r++) {
2294                                    idx1 = r * columns + c;
2295                                    idx2 = startt + 2 * r;
2296                                    idx3 = startt + 2 * rows + 2 * r;
2297                                    idx4 = idx3 + 2 * rows;
2298                                    idx5 = idx4 + 2 * rows;
2299                                    a[idx1] = t[idx2];
2300                                    a[idx1 + 1] = t[idx2 + 1];
2301                                    a[idx1 + 2] = t[idx3];
2302                                    a[idx1 + 3] = t[idx3 + 1];
2303                                    a[idx1 + 4] = t[idx4];
2304                                    a[idx1 + 5] = t[idx4 + 1];
2305                                    a[idx1 + 6] = t[idx5];
2306                                    a[idx1 + 7] = t[idx5 + 1];
2307                                }
2308                            }
2309                        } else if (columns == 4 * nthreads) {
2310                            for (int r = 0; r < rows; r++) {
2311                                idx1 = r * columns + 4 * n0;
2312                                idx2 = startt + 2 * r;
2313                                idx3 = startt + 2 * rows + 2 * r;
2314                                t[idx2] = a[idx1];
2315                                t[idx2 + 1] = a[idx1 + 1];
2316                                t[idx3] = a[idx1 + 2];
2317                                t[idx3 + 1] = a[idx1 + 3];
2318                            }
2319                            fftRows.complexInverse(t, startt, scale);
2320                            fftRows.complexInverse(t, startt + 2 * rows, scale);
2321                            for (int r = 0; r < rows; r++) {
2322                                idx1 = r * columns + 4 * n0;
2323                                idx2 = startt + 2 * r;
2324                                idx3 = startt + 2 * rows + 2 * r;
2325                                a[idx1] = t[idx2];
2326                                a[idx1 + 1] = t[idx2 + 1];
2327                                a[idx1 + 2] = t[idx3];
2328                                a[idx1 + 3] = t[idx3 + 1];
2329                            }
2330                        } else if (columns == 2 * nthreads) {
2331                            for (int r = 0; r < rows; r++) {
2332                                idx1 = r * columns + 2 * n0;
2333                                idx2 = startt + 2 * r;
2334                                t[idx2] = a[idx1];
2335                                t[idx2 + 1] = a[idx1 + 1];
2336                            }
2337                            fftRows.complexInverse(t, startt, scale);
2338                            for (int r = 0; r < rows; r++) {
2339                                idx1 = r * columns + 2 * n0;
2340                                idx2 = startt + 2 * r;
2341                                a[idx1] = t[idx2];
2342                                a[idx1 + 1] = t[idx2 + 1];
2343                            }
2344                        }
2345                    }
2346                }
2347            });
2348        }
2349        ConcurrencyUtils.waitForCompletion(futures);
2350    }
2351
2352    private void cdft2d_subth(final int isgn, final double[][] a, final boolean scale) {
2353        int nthread = ConcurrencyUtils.getNumberOfThreads();
2354        int nt = 8 * rows;
2355        if (columns == 4 * nthread) {
2356            nt >>= 1;
2357        } else if (columns < 4 * nthread) {
2358            nthread = columns >> 1;
2359            nt >>= 2;
2360        }
2361        Future<?>[] futures = new Future[nthread];
2362        final int nthreads = nthread;
2363        for (int i = 0; i < nthreads; i++) {
2364            final int n0 = i;
2365            final int startt = nt * i;
2366            futures[i] = ConcurrencyUtils.submit(new Runnable() {
2367                @Override
2368                                public void run() {
2369                    int idx2, idx3, idx4, idx5;
2370                    if (isgn == -1) {
2371                        if (columns > 4 * nthreads) {
2372                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2373                                for (int r = 0; r < rows; r++) {
2374                                    idx2 = startt + 2 * r;
2375                                    idx3 = startt + 2 * rows + 2 * r;
2376                                    idx4 = idx3 + 2 * rows;
2377                                    idx5 = idx4 + 2 * rows;
2378                                    t[idx2] = a[r][c];
2379                                    t[idx2 + 1] = a[r][c + 1];
2380                                    t[idx3] = a[r][c + 2];
2381                                    t[idx3 + 1] = a[r][c + 3];
2382                                    t[idx4] = a[r][c + 4];
2383                                    t[idx4 + 1] = a[r][c + 5];
2384                                    t[idx5] = a[r][c + 6];
2385                                    t[idx5 + 1] = a[r][c + 7];
2386                                }
2387                                fftRows.complexForward(t, startt);
2388                                fftRows.complexForward(t, startt + 2 * rows);
2389                                fftRows.complexForward(t, startt + 4 * rows);
2390                                fftRows.complexForward(t, startt + 6 * rows);
2391                                for (int r = 0; r < rows; r++) {
2392                                    idx2 = startt + 2 * r;
2393                                    idx3 = startt + 2 * rows + 2 * r;
2394                                    idx4 = idx3 + 2 * rows;
2395                                    idx5 = idx4 + 2 * rows;
2396                                    a[r][c] = t[idx2];
2397                                    a[r][c + 1] = t[idx2 + 1];
2398                                    a[r][c + 2] = t[idx3];
2399                                    a[r][c + 3] = t[idx3 + 1];
2400                                    a[r][c + 4] = t[idx4];
2401                                    a[r][c + 5] = t[idx4 + 1];
2402                                    a[r][c + 6] = t[idx5];
2403                                    a[r][c + 7] = t[idx5 + 1];
2404                                }
2405                            }
2406                        } else if (columns == 4 * nthreads) {
2407                            for (int r = 0; r < rows; r++) {
2408                                idx2 = startt + 2 * r;
2409                                idx3 = startt + 2 * rows + 2 * r;
2410                                t[idx2] = a[r][4 * n0];
2411                                t[idx2 + 1] = a[r][4 * n0 + 1];
2412                                t[idx3] = a[r][4 * n0 + 2];
2413                                t[idx3 + 1] = a[r][4 * n0 + 3];
2414                            }
2415                            fftRows.complexForward(t, startt);
2416                            fftRows.complexForward(t, startt + 2 * rows);
2417                            for (int r = 0; r < rows; r++) {
2418                                idx2 = startt + 2 * r;
2419                                idx3 = startt + 2 * rows + 2 * r;
2420                                a[r][4 * n0] = t[idx2];
2421                                a[r][4 * n0 + 1] = t[idx2 + 1];
2422                                a[r][4 * n0 + 2] = t[idx3];
2423                                a[r][4 * n0 + 3] = t[idx3 + 1];
2424                            }
2425                        } else if (columns == 2 * nthreads) {
2426                            for (int r = 0; r < rows; r++) {
2427                                idx2 = startt + 2 * r;
2428                                t[idx2] = a[r][2 * n0];
2429                                t[idx2 + 1] = a[r][2 * n0 + 1];
2430                            }
2431                            fftRows.complexForward(t, startt);
2432                            for (int r = 0; r < rows; r++) {
2433                                idx2 = startt + 2 * r;
2434                                a[r][2 * n0] = t[idx2];
2435                                a[r][2 * n0 + 1] = t[idx2 + 1];
2436                            }
2437                        }
2438                    } else {
2439                        if (columns > 4 * nthreads) {
2440                            for (int c = 8 * n0; c < columns; c += 8 * nthreads) {
2441                                for (int r = 0; r < rows; r++) {
2442                                    idx2 = startt + 2 * r;
2443                                    idx3 = startt + 2 * rows + 2 * r;
2444                                    idx4 = idx3 + 2 * rows;
2445                                    idx5 = idx4 + 2 * rows;
2446                                    t[idx2] = a[r][c];
2447                                    t[idx2 + 1] = a[r][c + 1];
2448                                    t[idx3] = a[r][c + 2];
2449                                    t[idx3 + 1] = a[r][c + 3];
2450                                    t[idx4] = a[r][c + 4];
2451                                    t[idx4 + 1] = a[r][c + 5];
2452                                    t[idx5] = a[r][c + 6];
2453                                    t[idx5 + 1] = a[r][c + 7];
2454                                }
2455                                fftRows.complexInverse(t, startt, scale);
2456                                fftRows.complexInverse(t, startt + 2 * rows, scale);
2457                                fftRows.complexInverse(t, startt + 4 * rows, scale);
2458                                fftRows.complexInverse(t, startt + 6 * rows, scale);
2459                                for (int r = 0; r < rows; r++) {
2460                                    idx2 = startt + 2 * r;
2461                                    idx3 = startt + 2 * rows + 2 * r;
2462                                    idx4 = idx3 + 2 * rows;
2463                                    idx5 = idx4 + 2 * rows;
2464                                    a[r][c] = t[idx2];
2465                                    a[r][c + 1] = t[idx2 + 1];
2466                                    a[r][c + 2] = t[idx3];
2467                                    a[r][c + 3] = t[idx3 + 1];
2468                                    a[r][c + 4] = t[idx4];
2469                                    a[r][c + 5] = t[idx4 + 1];
2470                                    a[r][c + 6] = t[idx5];
2471                                    a[r][c + 7] = t[idx5 + 1];
2472                                }
2473                            }
2474                        } else if (columns == 4 * nthreads) {
2475                            for (int r = 0; r < rows; r++) {
2476                                idx2 = startt + 2 * r;
2477                                idx3 = startt + 2 * rows + 2 * r;
2478                                t[idx2] = a[r][4 * n0];
2479                                t[idx2 + 1] = a[r][4 * n0 + 1];
2480                                t[idx3] = a[r][4 * n0 + 2];
2481                                t[idx3 + 1] = a[r][4 * n0 + 3];
2482                            }
2483                            fftRows.complexInverse(t, startt, scale);
2484                            fftRows.complexInverse(t, startt + 2 * rows, scale);
2485                            for (int r = 0; r < rows; r++) {
2486                                idx2 = startt + 2 * r;
2487                                idx3 = startt + 2 * rows + 2 * r;
2488                                a[r][4 * n0] = t[idx2];
2489                                a[r][4 * n0 + 1] = t[idx2 + 1];
2490                                a[r][4 * n0 + 2] = t[idx3];
2491                                a[r][4 * n0 + 3] = t[idx3 + 1];
2492                            }
2493                        } else if (columns == 2 * nthreads) {
2494                            for (int r = 0; r < rows; r++) {
2495                                idx2 = startt + 2 * r;
2496                                t[idx2] = a[r][2 * n0];
2497                                t[idx2 + 1] = a[r][2 * n0 + 1];
2498                            }
2499                            fftRows.complexInverse(t, startt, scale);
2500                            for (int r = 0; r < rows; r++) {
2501                                idx2 = startt + 2 * r;
2502                                a[r][2 * n0] = t[idx2];
2503                                a[r][2 * n0 + 1] = t[idx2 + 1];
2504                            }
2505                        }
2506                    }
2507                }
2508            });
2509        }
2510        ConcurrencyUtils.waitForCompletion(futures);
2511    }
2512
2513    private void fillSymmetric(final double[] a) {
2514        final int twon2 = 2 * columns;
2515        int idx1, idx2, idx3, idx4;
2516        int n1d2 = rows / 2;
2517
2518        for (int r = (rows - 1); r >= 1; r--) {
2519            idx1 = r * columns;
2520            idx2 = 2 * idx1;
2521            for (int c = 0; c < columns; c += 2) {
2522                a[idx2 + c] = a[idx1 + c];
2523                a[idx1 + c] = 0;
2524                a[idx2 + c + 1] = a[idx1 + c + 1];
2525                a[idx1 + c + 1] = 0;
2526            }
2527        }
2528        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2529        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads)) {
2530            Future<?>[] futures = new Future[nthreads];
2531            int l1k = n1d2 / nthreads;
2532            final int newn2 = 2 * columns;
2533            for (int i = 0; i < nthreads; i++) {
2534                final int l1offa, l1stopa, l2offa, l2stopa;
2535                if (i == 0)
2536                    l1offa = i * l1k + 1;
2537                else {
2538                    l1offa = i * l1k;
2539                }
2540                l1stopa = i * l1k + l1k;
2541                l2offa = i * l1k;
2542                if (i == nthreads - 1) {
2543                    l2stopa = i * l1k + l1k + 1;
2544                } else {
2545                    l2stopa = i * l1k + l1k;
2546                }
2547                futures[i] = ConcurrencyUtils.submit(new Runnable() {
2548                    @Override
2549                                        public void run() {
2550                        int idx1, idx2, idx3, idx4;
2551
2552                        for (int r = l1offa; r < l1stopa; r++) {
2553                            idx1 = r * newn2;
2554                            idx2 = (rows - r) * newn2;
2555                            idx3 = idx1 + columns;
2556                            a[idx3] = a[idx2 + 1];
2557                            a[idx3 + 1] = -a[idx2];
2558                        }
2559                        for (int r = l1offa; r < l1stopa; r++) {
2560                            idx1 = r * newn2;
2561                            idx3 = (rows - r + 1) * newn2;
2562                            for (int c = columns + 2; c < newn2; c += 2) {
2563                                idx2 = idx3 - c;
2564                                idx4 = idx1 + c;
2565                                a[idx4] = a[idx2];
2566                                a[idx4 + 1] = -a[idx2 + 1];
2567
2568                            }
2569                        }
2570                        for (int r = l2offa; r < l2stopa; r++) {
2571                            idx3 = ((rows - r) % rows) * newn2;
2572                            idx4 = r * newn2;
2573                            for (int c = 0; c < newn2; c += 2) {
2574                                idx1 = idx3 + (newn2 - c) % newn2;
2575                                idx2 = idx4 + c;
2576                                a[idx1] = a[idx2];
2577                                a[idx1 + 1] = -a[idx2 + 1];
2578                            }
2579                        }
2580                    }
2581                });
2582            }
2583            ConcurrencyUtils.waitForCompletion(futures);
2584
2585        } else {
2586
2587            for (int r = 1; r < n1d2; r++) {
2588                idx2 = r * twon2;
2589                idx3 = (rows - r) * twon2;
2590                a[idx2 + columns] = a[idx3 + 1];
2591                a[idx2 + columns + 1] = -a[idx3];
2592            }
2593
2594            for (int r = 1; r < n1d2; r++) {
2595                idx2 = r * twon2;
2596                idx3 = (rows - r + 1) * twon2;
2597                for (int c = columns + 2; c < twon2; c += 2) {
2598                    a[idx2 + c] = a[idx3 - c];
2599                    a[idx2 + c + 1] = -a[idx3 - c + 1];
2600
2601                }
2602            }
2603            for (int r = 0; r <= rows / 2; r++) {
2604                idx1 = r * twon2;
2605                idx4 = ((rows - r) % rows) * twon2;
2606                for (int c = 0; c < twon2; c += 2) {
2607                    idx2 = idx1 + c;
2608                    idx3 = idx4 + (twon2 - c) % twon2;
2609                    a[idx3] = a[idx2];
2610                    a[idx3 + 1] = -a[idx2 + 1];
2611                }
2612            }
2613        }
2614        a[columns] = -a[1];
2615        a[1] = 0;
2616        idx1 = n1d2 * twon2;
2617        a[idx1 + columns] = -a[idx1 + 1];
2618        a[idx1 + 1] = 0;
2619        a[idx1 + columns + 1] = 0;
2620    }
2621
2622    private void fillSymmetric(final double[][] a) {
2623        final int newn2 = 2 * columns;
2624        int n1d2 = rows / 2;
2625
2626        int nthreads = ConcurrencyUtils.getNumberOfThreads();
2627        if ((nthreads > 1) && useThreads && (n1d2 >= nthreads)) {
2628            Future<?>[] futures = new Future[nthreads];
2629            int l1k = n1d2 / nthreads;
2630            for (int i = 0; i < nthreads; i++) {
2631                final int l1offa, l1stopa, l2offa, l2stopa;
2632                if (i == 0)
2633                    l1offa = i * l1k + 1;
2634                else {
2635                    l1offa = i * l1k;
2636                }
2637                l1stopa = i * l1k + l1k;
2638                l2offa = i * l1k;
2639                if (i == nthreads - 1) {
2640                    l2stopa = i * l1k + l1k + 1;
2641                } else {
2642                    l2stopa = i * l1k + l1k;
2643                }
2644                futures[i] = ConcurrencyUtils.submit(new Runnable() {
2645                    @Override
2646                                        public void run() {
2647                        int idx1, idx2;
2648                        for (int r = l1offa; r < l1stopa; r++) {
2649                            idx1 = rows - r;
2650                            a[r][columns] = a[idx1][1];
2651                            a[r][columns + 1] = -a[idx1][0];
2652                        }
2653                        for (int r = l1offa; r < l1stopa; r++) {
2654                            idx1 = rows - r;
2655                            for (int c = columns + 2; c < newn2; c += 2) {
2656                                idx2 = newn2 - c;
2657                                a[r][c] = a[idx1][idx2];
2658                                a[r][c + 1] = -a[idx1][idx2 + 1];
2659
2660                            }
2661                        }
2662                        for (int r = l2offa; r < l2stopa; r++) {
2663                            idx1 = (rows - r) % rows;
2664                            for (int c = 0; c < newn2; c = c + 2) {
2665                                idx2 = (newn2 - c) % newn2;
2666                                a[idx1][idx2] = a[r][c];
2667                                a[idx1][idx2 + 1] = -a[r][c + 1];
2668                            }
2669                        }
2670                    }
2671                });
2672            }
2673            ConcurrencyUtils.waitForCompletion(futures);
2674        } else {
2675
2676            for (int r = 1; r < n1d2; r++) {
2677                int idx1 = rows - r;
2678                a[r][columns] = a[idx1][1];
2679                a[r][columns + 1] = -a[idx1][0];
2680            }
2681            for (int r = 1; r < n1d2; r++) {
2682                int idx1 = rows - r;
2683                for (int c = columns + 2; c < newn2; c += 2) {
2684                    int idx2 = newn2 - c;
2685                    a[r][c] = a[idx1][idx2];
2686                    a[r][c + 1] = -a[idx1][idx2 + 1];
2687                }
2688            }
2689            for (int r = 0; r <= rows / 2; r++) {
2690                int idx1 = (rows - r) % rows;
2691                for (int c = 0; c < newn2; c += 2) {
2692                    int idx2 = (newn2 - c) % newn2;
2693                    a[idx1][idx2] = a[r][c];
2694                    a[idx1][idx2 + 1] = -a[r][c + 1];
2695                }
2696            }
2697        }
2698        a[0][columns] = -a[0][1];
2699        a[0][1] = 0;
2700        a[n1d2][columns] = -a[n1d2][1];
2701        a[n1d2][1] = 0;
2702        a[n1d2][columns + 1] = 0;
2703    }
2704}