001/* ***** BEGIN LICENSE BLOCK ***** 002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 003 * 004 * The contents of this file are subject to the Mozilla Public License Version 005 * 1.1 (the "License"); you may not use this file except in compliance with 006 * the License. You may obtain a copy of the License at 007 * http://www.mozilla.org/MPL/ 008 * 009 * Software distributed under the License is distributed on an "AS IS" basis, 010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 011 * for the specific language governing rights and limitations under the 012 * License. 013 * 014 * The Original Code is JTransforms. 015 * 016 * The Initial Developer of the Original Code is 017 * Piotr Wendykier, Emory University. 018 * Portions created by the Initial Developer are Copyright (C) 2007-2009 019 * the Initial Developer. All Rights Reserved. 020 * 021 * Alternatively, the contents of this file may be used under the terms of 022 * either the GNU General Public License Version 2 or later (the "GPL"), or 023 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 024 * in which case the provisions of the GPL or the LGPL are applicable instead 025 * of those above. If you wish to allow use of your version of this file only 026 * under the terms of either the GPL or the LGPL, and not to allow others to 027 * use your version of this file under the terms of the MPL, indicate your 028 * decision by deleting the provisions above and replace them with the notice 029 * and other provisions required by the GPL or the LGPL. If you do not delete 030 * the provisions above, a recipient may use your version of this file under 031 * the terms of any one of the MPL, the GPL or the LGPL. 032 * 033 * ***** END LICENSE BLOCK ***** */ 034 035package edu.emory.mathcs.jtransforms.dst; 036 037import java.util.concurrent.Future; 038 039import edu.emory.mathcs.utils.ConcurrencyUtils; 040 041/** 042 * Computes 2D Discrete Sine Transform (DST) of single precision data. The sizes 043 * of both dimensions can be arbitrary numbers. This is a parallel 044 * implementation optimized for SMP systems.<br> 045 * <br> 046 * Part of code is derived from General Purpose FFT Package written by Takuya Ooura 047 * (http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) 048 * 049 * @author Piotr Wendykier (piotr.wendykier@gmail.com) 050 * 051 */ 052public class FloatDST_2D { 053 054 private int rows; 055 056 private int columns; 057 058 private float[] t; 059 060 private FloatDST_1D dstColumns, dstRows; 061 062 private int oldNthreads; 063 064 private int nt; 065 066 private boolean isPowerOfTwo = false; 067 068 private boolean useThreads = false; 069 070 /** 071 * Creates new instance of FloatDST_2D. 072 * 073 * @param rows 074 * number of rows 075 * @param columns 076 * number of columns 077 */ 078 public FloatDST_2D(int rows, int columns) { 079 if (rows <= 1 || columns <= 1) { 080 throw new IllegalArgumentException("rows and columns must be greater than 1"); 081 } 082 this.rows = rows; 083 this.columns = columns; 084 if (rows * columns >= ConcurrencyUtils.getThreadsBeginN_2D()) { 085 useThreads = true; 086 } 087 if (ConcurrencyUtils.isPowerOf2(rows) && ConcurrencyUtils.isPowerOf2(columns)) { 088 isPowerOfTwo = true; 089 oldNthreads = ConcurrencyUtils.getNumberOfThreads(); 090 nt = 4 * oldNthreads * rows; 091 if (columns == 2 * oldNthreads) { 092 nt >>= 1; 093 } else if (columns < 2 * oldNthreads) { 094 nt >>= 2; 095 } 096 t = new float[nt]; 097 } 098 dstColumns = new FloatDST_1D(columns); 099 if (columns == rows) { 100 dstRows = dstColumns; 101 } else { 102 dstRows = new FloatDST_1D(rows); 103 } 104 } 105 106 /** 107 * Computes 2D forward DST (DST-II) leaving the result in <code>a</code>. 108 * The data is stored in 1D array in row-major order. 109 * 110 * @param a 111 * data to transform 112 * @param scale 113 * if true then scaling is performed 114 */ 115 public void forward(final float[] a, final boolean scale) { 116 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 117 if (isPowerOfTwo) { 118 if (nthreads != oldNthreads) { 119 nt = 4 * nthreads * rows; 120 if (columns == 2 * nthreads) { 121 nt >>= 1; 122 } else if (columns < 2 * nthreads) { 123 nt >>= 2; 124 } 125 t = new float[nt]; 126 oldNthreads = nthreads; 127 } 128 if ((nthreads > 1) && useThreads) { 129 ddxt2d_subth(-1, a, scale); 130 ddxt2d0_subth(-1, a, scale); 131 } else { 132 ddxt2d_sub(-1, a, scale); 133 for (int i = 0; i < rows; i++) { 134 dstColumns.forward(a, i * columns, scale); 135 } 136 } 137 } else { 138 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 139 Future<?>[] futures = new Future[nthreads]; 140 int p = rows / nthreads; 141 for (int l = 0; l < nthreads; l++) { 142 final int firstRow = l * p; 143 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 144 futures[l] = ConcurrencyUtils.submit(new Runnable() { 145 @Override 146 public void run() { 147 for (int i = firstRow; i < lastRow; i++) { 148 dstColumns.forward(a, i * columns, scale); 149 } 150 } 151 }); 152 } 153 ConcurrencyUtils.waitForCompletion(futures); 154 p = columns / nthreads; 155 for (int l = 0; l < nthreads; l++) { 156 final int firstColumn = l * p; 157 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 158 futures[l] = ConcurrencyUtils.submit(new Runnable() { 159 @Override 160 public void run() { 161 float[] temp = new float[rows]; 162 for (int c = firstColumn; c < lastColumn; c++) { 163 for (int r = 0; r < rows; r++) { 164 temp[r] = a[r * columns + c]; 165 } 166 dstRows.forward(temp, scale); 167 for (int r = 0; r < rows; r++) { 168 a[r * columns + c] = temp[r]; 169 } 170 } 171 } 172 }); 173 } 174 ConcurrencyUtils.waitForCompletion(futures); 175 } else { 176 for (int i = 0; i < rows; i++) { 177 dstColumns.forward(a, i * columns, scale); 178 } 179 float[] temp = new float[rows]; 180 for (int c = 0; c < columns; c++) { 181 for (int r = 0; r < rows; r++) { 182 temp[r] = a[r * columns + c]; 183 } 184 dstRows.forward(temp, scale); 185 for (int r = 0; r < rows; r++) { 186 a[r * columns + c] = temp[r]; 187 } 188 } 189 } 190 } 191 } 192 193 /** 194 * Computes 2D forward DST (DST-II) leaving the result in <code>a</code>. 195 * The data is stored in 2D array. 196 * 197 * @param a 198 * data to transform 199 * @param scale 200 * if true then scaling is performed 201 */ 202 public void forward(final float[][] a, final boolean scale) { 203 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 204 if (isPowerOfTwo) { 205 if (nthreads != oldNthreads) { 206 nt = 4 * nthreads * rows; 207 if (columns == 2 * nthreads) { 208 nt >>= 1; 209 } else if (columns < 2 * nthreads) { 210 nt >>= 2; 211 } 212 t = new float[nt]; 213 oldNthreads = nthreads; 214 } 215 if ((nthreads > 1) && useThreads) { 216 ddxt2d_subth(-1, a, scale); 217 ddxt2d0_subth(-1, a, scale); 218 } else { 219 ddxt2d_sub(-1, a, scale); 220 for (int i = 0; i < rows; i++) { 221 dstColumns.forward(a[i], scale); 222 } 223 } 224 } else { 225 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 226 Future<?>[] futures = new Future[nthreads]; 227 int p = rows / nthreads; 228 for (int l = 0; l < nthreads; l++) { 229 final int firstRow = l * p; 230 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 231 futures[l] = ConcurrencyUtils.submit(new Runnable() { 232 @Override 233 public void run() { 234 for (int i = firstRow; i < lastRow; i++) { 235 dstColumns.forward(a[i], scale); 236 } 237 } 238 }); 239 } 240 ConcurrencyUtils.waitForCompletion(futures); 241 p = columns / nthreads; 242 for (int l = 0; l < nthreads; l++) { 243 final int firstColumn = l * p; 244 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 245 futures[l] = ConcurrencyUtils.submit(new Runnable() { 246 @Override 247 public void run() { 248 float[] temp = new float[rows]; 249 for (int c = firstColumn; c < lastColumn; c++) { 250 for (int r = 0; r < rows; r++) { 251 temp[r] = a[r][c]; 252 } 253 dstRows.forward(temp, scale); 254 for (int r = 0; r < rows; r++) { 255 a[r][c] = temp[r]; 256 } 257 } 258 } 259 }); 260 } 261 ConcurrencyUtils.waitForCompletion(futures); 262 } else { 263 for (int i = 0; i < rows; i++) { 264 dstColumns.forward(a[i], scale); 265 } 266 float[] temp = new float[rows]; 267 for (int c = 0; c < columns; c++) { 268 for (int r = 0; r < rows; r++) { 269 temp[r] = a[r][c]; 270 } 271 dstRows.forward(temp, scale); 272 for (int r = 0; r < rows; r++) { 273 a[r][c] = temp[r]; 274 } 275 } 276 } 277 } 278 } 279 280 /** 281 * Computes 2D inverse DST (DST-III) leaving the result in <code>a</code>. 282 * The data is stored in 1D array in row-major order. 283 * 284 * @param a 285 * data to transform 286 * @param scale 287 * if true then scaling is performed 288 */ 289 public void inverse(final float[] a, final boolean scale) { 290 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 291 if (isPowerOfTwo) { 292 if (nthreads != oldNthreads) { 293 nt = 4 * nthreads * rows; 294 if (columns == 2 * nthreads) { 295 nt >>= 1; 296 } else if (columns < 2 * nthreads) { 297 nt >>= 2; 298 } 299 t = new float[nt]; 300 oldNthreads = nthreads; 301 } 302 if ((nthreads > 1) && useThreads) { 303 ddxt2d_subth(1, a, scale); 304 ddxt2d0_subth(1, a, scale); 305 } else { 306 ddxt2d_sub(1, a, scale); 307 for (int i = 0; i < rows; i++) { 308 dstColumns.inverse(a, i * columns, scale); 309 } 310 } 311 } else { 312 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 313 Future<?>[] futures = new Future[nthreads]; 314 int p = rows / nthreads; 315 for (int l = 0; l < nthreads; l++) { 316 final int firstRow = l * p; 317 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 318 futures[l] = ConcurrencyUtils.submit(new Runnable() { 319 @Override 320 public void run() { 321 for (int i = firstRow; i < lastRow; i++) { 322 dstColumns.inverse(a, i * columns, scale); 323 } 324 } 325 }); 326 } 327 ConcurrencyUtils.waitForCompletion(futures); 328 p = columns / nthreads; 329 for (int l = 0; l < nthreads; l++) { 330 final int firstColumn = l * p; 331 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 332 futures[l] = ConcurrencyUtils.submit(new Runnable() { 333 @Override 334 public void run() { 335 float[] temp = new float[rows]; 336 for (int c = firstColumn; c < lastColumn; c++) { 337 for (int r = 0; r < rows; r++) { 338 temp[r] = a[r * columns + c]; 339 } 340 dstRows.inverse(temp, scale); 341 for (int r = 0; r < rows; r++) { 342 a[r * columns + c] = temp[r]; 343 } 344 } 345 } 346 }); 347 } 348 ConcurrencyUtils.waitForCompletion(futures); 349 } else { 350 for (int i = 0; i < rows; i++) { 351 dstColumns.inverse(a, i * columns, scale); 352 } 353 float[] temp = new float[rows]; 354 for (int c = 0; c < columns; c++) { 355 for (int r = 0; r < rows; r++) { 356 temp[r] = a[r * columns + c]; 357 } 358 dstRows.inverse(temp, scale); 359 for (int r = 0; r < rows; r++) { 360 a[r * columns + c] = temp[r]; 361 } 362 } 363 } 364 } 365 } 366 367 /** 368 * Computes 2D inverse DST (DST-III) leaving the result in <code>a</code>. 369 * The data is stored in 2D array. 370 * 371 * @param a 372 * data to transform 373 * @param scale 374 * if true then scaling is performed 375 */ 376 public void inverse(final float[][] a, final boolean scale) { 377 int nthreads = ConcurrencyUtils.getNumberOfThreads(); 378 if (isPowerOfTwo) { 379 if (nthreads != oldNthreads) { 380 nt = 4 * nthreads * rows; 381 if (columns == 2 * nthreads) { 382 nt >>= 1; 383 } else if (columns < 2 * nthreads) { 384 nt >>= 2; 385 } 386 t = new float[nt]; 387 oldNthreads = nthreads; 388 } 389 if ((nthreads > 1) && useThreads) { 390 ddxt2d_subth(1, a, scale); 391 ddxt2d0_subth(1, a, scale); 392 } else { 393 ddxt2d_sub(1, a, scale); 394 for (int i = 0; i < rows; i++) { 395 dstColumns.inverse(a[i], scale); 396 } 397 } 398 } else { 399 if ((nthreads > 1) && useThreads && (rows >= nthreads) && (columns >= nthreads)) { 400 Future<?>[] futures = new Future[nthreads]; 401 int p = rows / nthreads; 402 for (int l = 0; l < nthreads; l++) { 403 final int firstRow = l * p; 404 final int lastRow = (l == (nthreads - 1)) ? rows : firstRow + p; 405 futures[l] = ConcurrencyUtils.submit(new Runnable() { 406 @Override 407 public void run() { 408 for (int i = firstRow; i < lastRow; i++) { 409 dstColumns.inverse(a[i], scale); 410 } 411 } 412 }); 413 } 414 ConcurrencyUtils.waitForCompletion(futures); 415 p = columns / nthreads; 416 for (int l = 0; l < nthreads; l++) { 417 final int firstColumn = l * p; 418 final int lastColumn = (l == (nthreads - 1)) ? columns : firstColumn + p; 419 futures[l] = ConcurrencyUtils.submit(new Runnable() { 420 @Override 421 public void run() { 422 float[] temp = new float[rows]; 423 for (int c = firstColumn; c < lastColumn; c++) { 424 for (int r = 0; r < rows; r++) { 425 temp[r] = a[r][c]; 426 } 427 dstRows.inverse(temp, scale); 428 for (int r = 0; r < rows; r++) { 429 a[r][c] = temp[r]; 430 } 431 } 432 } 433 }); 434 } 435 ConcurrencyUtils.waitForCompletion(futures); 436 } else { 437 for (int i = 0; i < rows; i++) { 438 dstColumns.inverse(a[i], scale); 439 } 440 float[] temp = new float[rows]; 441 for (int c = 0; c < columns; c++) { 442 for (int r = 0; r < rows; r++) { 443 temp[r] = a[r][c]; 444 } 445 dstRows.inverse(temp, scale); 446 for (int r = 0; r < rows; r++) { 447 a[r][c] = temp[r]; 448 } 449 } 450 } 451 } 452 } 453 454 private void ddxt2d_subth(final int isgn, final float[] a, final boolean scale) { 455 int nthread = ConcurrencyUtils.getNumberOfThreads(); 456 int nt = 4 * rows; 457 if (columns == 2 * nthread) { 458 nt >>= 1; 459 } else if (columns < 2 * nthread) { 460 nthread = columns; 461 nt >>= 2; 462 } 463 final int nthreads = nthread; 464 Future<?>[] futures = new Future[nthreads]; 465 466 for (int i = 0; i < nthreads; i++) { 467 final int n0 = i; 468 final int startt = nt * i; 469 futures[i] = ConcurrencyUtils.submit(new Runnable() { 470 @Override 471 public void run() { 472 int idx1, idx2; 473 if (columns > 2 * nthreads) { 474 if (isgn == -1) { 475 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 476 for (int r = 0; r < rows; r++) { 477 idx1 = r * columns + c; 478 idx2 = startt + rows + r; 479 t[startt + r] = a[idx1]; 480 t[idx2] = a[idx1 + 1]; 481 t[idx2 + rows] = a[idx1 + 2]; 482 t[idx2 + 2 * rows] = a[idx1 + 3]; 483 } 484 dstRows.forward(t, startt, scale); 485 dstRows.forward(t, startt + rows, scale); 486 dstRows.forward(t, startt + 2 * rows, scale); 487 dstRows.forward(t, startt + 3 * rows, scale); 488 for (int r = 0; r < rows; r++) { 489 idx1 = r * columns + c; 490 idx2 = startt + rows + r; 491 a[idx1] = t[startt + r]; 492 a[idx1 + 1] = t[idx2]; 493 a[idx1 + 2] = t[idx2 + rows]; 494 a[idx1 + 3] = t[idx2 + 2 * rows]; 495 } 496 } 497 } else { 498 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 499 for (int r = 0; r < rows; r++) { 500 idx1 = r * columns + c; 501 idx2 = startt + rows + r; 502 t[startt + r] = a[idx1]; 503 t[idx2] = a[idx1 + 1]; 504 t[idx2 + rows] = a[idx1 + 2]; 505 t[idx2 + 2 * rows] = a[idx1 + 3]; 506 } 507 dstRows.inverse(t, startt, scale); 508 dstRows.inverse(t, startt + rows, scale); 509 dstRows.inverse(t, startt + 2 * rows, scale); 510 dstRows.inverse(t, startt + 3 * rows, scale); 511 for (int r = 0; r < rows; r++) { 512 idx1 = r * columns + c; 513 idx2 = startt + rows + r; 514 a[idx1] = t[startt + r]; 515 a[idx1 + 1] = t[idx2]; 516 a[idx1 + 2] = t[idx2 + rows]; 517 a[idx1 + 3] = t[idx2 + 2 * rows]; 518 } 519 } 520 } 521 } else if (columns == 2 * nthreads) { 522 for (int r = 0; r < rows; r++) { 523 idx1 = r * columns + 2 * n0; 524 idx2 = startt + r; 525 t[idx2] = a[idx1]; 526 t[idx2 + rows] = a[idx1 + 1]; 527 } 528 if (isgn == -1) { 529 dstRows.forward(t, startt, scale); 530 dstRows.forward(t, startt + rows, scale); 531 } else { 532 dstRows.inverse(t, startt, scale); 533 dstRows.inverse(t, startt + rows, scale); 534 } 535 for (int r = 0; r < rows; r++) { 536 idx1 = r * columns + 2 * n0; 537 idx2 = startt + r; 538 a[idx1] = t[idx2]; 539 a[idx1 + 1] = t[idx2 + rows]; 540 } 541 } else if (columns == nthreads) { 542 for (int r = 0; r < rows; r++) { 543 t[startt + r] = a[r * columns + n0]; 544 } 545 if (isgn == -1) { 546 dstRows.forward(t, startt, scale); 547 } else { 548 dstRows.inverse(t, startt, scale); 549 } 550 for (int r = 0; r < rows; r++) { 551 a[r * columns + n0] = t[startt + r]; 552 } 553 } 554 } 555 }); 556 } 557 ConcurrencyUtils.waitForCompletion(futures); 558 } 559 560 private void ddxt2d_subth(final int isgn, final float[][] a, final boolean scale) { 561 int nthread = ConcurrencyUtils.getNumberOfThreads(); 562 int nt = 4 * rows; 563 if (columns == 2 * nthread) { 564 nt >>= 1; 565 } else if (columns < 2 * nthread) { 566 nthread = columns; 567 nt >>= 2; 568 } 569 final int nthreads = nthread; 570 Future<?>[] futures = new Future[nthreads]; 571 572 for (int i = 0; i < nthreads; i++) { 573 final int n0 = i; 574 final int startt = nt * i; 575 futures[i] = ConcurrencyUtils.submit(new Runnable() { 576 @Override 577 public void run() { 578 int idx2; 579 if (columns > 2 * nthreads) { 580 if (isgn == -1) { 581 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 582 for (int r = 0; r < rows; r++) { 583 idx2 = startt + rows + r; 584 t[startt + r] = a[r][c]; 585 t[idx2] = a[r][c + 1]; 586 t[idx2 + rows] = a[r][c + 2]; 587 t[idx2 + 2 * rows] = a[r][c + 3]; 588 } 589 dstRows.forward(t, startt, scale); 590 dstRows.forward(t, startt + rows, scale); 591 dstRows.forward(t, startt + 2 * rows, scale); 592 dstRows.forward(t, startt + 3 * rows, scale); 593 for (int r = 0; r < rows; r++) { 594 idx2 = startt + rows + r; 595 a[r][c] = t[startt + r]; 596 a[r][c + 1] = t[idx2]; 597 a[r][c + 2] = t[idx2 + rows]; 598 a[r][c + 3] = t[idx2 + 2 * rows]; 599 } 600 } 601 } else { 602 for (int c = 4 * n0; c < columns; c += 4 * nthreads) { 603 for (int r = 0; r < rows; r++) { 604 idx2 = startt + rows + r; 605 t[startt + r] = a[r][c]; 606 t[idx2] = a[r][c + 1]; 607 t[idx2 + rows] = a[r][c + 2]; 608 t[idx2 + 2 * rows] = a[r][c + 3]; 609 } 610 dstRows.inverse(t, startt, scale); 611 dstRows.inverse(t, startt + rows, scale); 612 dstRows.inverse(t, startt + 2 * rows, scale); 613 dstRows.inverse(t, startt + 3 * rows, scale); 614 for (int r = 0; r < rows; r++) { 615 idx2 = startt + rows + r; 616 a[r][c] = t[startt + r]; 617 a[r][c + 1] = t[idx2]; 618 a[r][c + 2] = t[idx2 + rows]; 619 a[r][c + 3] = t[idx2 + 2 * rows]; 620 } 621 } 622 } 623 } else if (columns == 2 * nthreads) { 624 for (int r = 0; r < rows; r++) { 625 idx2 = startt + r; 626 t[idx2] = a[r][2 * n0]; 627 t[idx2 + rows] = a[r][2 * n0 + 1]; 628 } 629 if (isgn == -1) { 630 dstRows.forward(t, startt, scale); 631 dstRows.forward(t, startt + rows, scale); 632 } else { 633 dstRows.inverse(t, startt, scale); 634 dstRows.inverse(t, startt + rows, scale); 635 } 636 for (int r = 0; r < rows; r++) { 637 idx2 = startt + r; 638 a[r][2 * n0] = t[idx2]; 639 a[r][2 * n0 + 1] = t[idx2 + rows]; 640 } 641 } else if (columns == nthreads) { 642 for (int r = 0; r < rows; r++) { 643 t[startt + r] = a[r][n0]; 644 } 645 if (isgn == -1) { 646 dstRows.forward(t, startt, scale); 647 } else { 648 dstRows.inverse(t, startt, scale); 649 } 650 for (int r = 0; r < rows; r++) { 651 a[r][n0] = t[startt + r]; 652 } 653 } 654 } 655 }); 656 } 657 ConcurrencyUtils.waitForCompletion(futures); 658 } 659 660 private void ddxt2d0_subth(final int isgn, final float[] a, final boolean scale) { 661 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 662 663 Future<?>[] futures = new Future[nthreads]; 664 665 for (int i = 0; i < nthreads; i++) { 666 final int n0 = i; 667 futures[i] = ConcurrencyUtils.submit(new Runnable() { 668 669 @Override 670 public void run() { 671 if (isgn == -1) { 672 for (int r = n0; r < rows; r += nthreads) { 673 dstColumns.forward(a, r * columns, scale); 674 } 675 } else { 676 for (int r = n0; r < rows; r += nthreads) { 677 dstColumns.inverse(a, r * columns, scale); 678 } 679 } 680 } 681 }); 682 } 683 ConcurrencyUtils.waitForCompletion(futures); 684 } 685 686 private void ddxt2d0_subth(final int isgn, final float[][] a, final boolean scale) { 687 final int nthreads = ConcurrencyUtils.getNumberOfThreads() > rows ? rows : ConcurrencyUtils.getNumberOfThreads(); 688 689 Future<?>[] futures = new Future[nthreads]; 690 691 for (int i = 0; i < nthreads; i++) { 692 final int n0 = i; 693 futures[i] = ConcurrencyUtils.submit(new Runnable() { 694 695 @Override 696 public void run() { 697 if (isgn == -1) { 698 for (int r = n0; r < rows; r += nthreads) { 699 dstColumns.forward(a[r], scale); 700 } 701 } else { 702 for (int r = n0; r < rows; r += nthreads) { 703 dstColumns.inverse(a[r], scale); 704 } 705 } 706 } 707 }); 708 } 709 ConcurrencyUtils.waitForCompletion(futures); 710 } 711 712 private void ddxt2d_sub(int isgn, float[] a, boolean scale) { 713 int idx1, idx2; 714 715 if (columns > 2) { 716 if (isgn == -1) { 717 for (int c = 0; c < columns; c += 4) { 718 for (int r = 0; r < rows; r++) { 719 idx1 = r * columns + c; 720 idx2 = rows + r; 721 t[r] = a[idx1]; 722 t[idx2] = a[idx1 + 1]; 723 t[idx2 + rows] = a[idx1 + 2]; 724 t[idx2 + 2 * rows] = a[idx1 + 3]; 725 } 726 dstRows.forward(t, 0, scale); 727 dstRows.forward(t, rows, scale); 728 dstRows.forward(t, 2 * rows, scale); 729 dstRows.forward(t, 3 * rows, scale); 730 for (int r = 0; r < rows; r++) { 731 idx1 = r * columns + c; 732 idx2 = rows + r; 733 a[idx1] = t[r]; 734 a[idx1 + 1] = t[idx2]; 735 a[idx1 + 2] = t[idx2 + rows]; 736 a[idx1 + 3] = t[idx2 + 2 * rows]; 737 } 738 } 739 } else { 740 for (int c = 0; c < columns; c += 4) { 741 for (int r = 0; r < rows; r++) { 742 idx1 = r * columns + c; 743 idx2 = rows + r; 744 t[r] = a[idx1]; 745 t[idx2] = a[idx1 + 1]; 746 t[idx2 + rows] = a[idx1 + 2]; 747 t[idx2 + 2 * rows] = a[idx1 + 3]; 748 } 749 dstRows.inverse(t, 0, scale); 750 dstRows.inverse(t, rows, scale); 751 dstRows.inverse(t, 2 * rows, scale); 752 dstRows.inverse(t, 3 * rows, scale); 753 for (int r = 0; r < rows; r++) { 754 idx1 = r * columns + c; 755 idx2 = rows + r; 756 a[idx1] = t[r]; 757 a[idx1 + 1] = t[idx2]; 758 a[idx1 + 2] = t[idx2 + rows]; 759 a[idx1 + 3] = t[idx2 + 2 * rows]; 760 } 761 } 762 } 763 } else if (columns == 2) { 764 for (int r = 0; r < rows; r++) { 765 idx1 = r * columns; 766 t[r] = a[idx1]; 767 t[rows + r] = a[idx1 + 1]; 768 } 769 if (isgn == -1) { 770 dstRows.forward(t, 0, scale); 771 dstRows.forward(t, rows, scale); 772 } else { 773 dstRows.inverse(t, 0, scale); 774 dstRows.inverse(t, rows, scale); 775 } 776 for (int r = 0; r < rows; r++) { 777 idx1 = r * columns; 778 a[idx1] = t[r]; 779 a[idx1 + 1] = t[rows + r]; 780 } 781 } 782 } 783 784 private void ddxt2d_sub(int isgn, float[][] a, boolean scale) { 785 int idx2; 786 787 if (columns > 2) { 788 if (isgn == -1) { 789 for (int c = 0; c < columns; c += 4) { 790 for (int r = 0; r < rows; r++) { 791 idx2 = rows + r; 792 t[r] = a[r][c]; 793 t[idx2] = a[r][c + 1]; 794 t[idx2 + rows] = a[r][c + 2]; 795 t[idx2 + 2 * rows] = a[r][c + 3]; 796 } 797 dstRows.forward(t, 0, scale); 798 dstRows.forward(t, rows, scale); 799 dstRows.forward(t, 2 * rows, scale); 800 dstRows.forward(t, 3 * rows, scale); 801 for (int r = 0; r < rows; r++) { 802 idx2 = rows + r; 803 a[r][c] = t[r]; 804 a[r][c + 1] = t[idx2]; 805 a[r][c + 2] = t[idx2 + rows]; 806 a[r][c + 3] = t[idx2 + 2 * rows]; 807 } 808 } 809 } else { 810 for (int c = 0; c < columns; c += 4) { 811 for (int r = 0; r < rows; r++) { 812 idx2 = rows + r; 813 t[r] = a[r][c]; 814 t[idx2] = a[r][c + 1]; 815 t[idx2 + rows] = a[r][c + 2]; 816 t[idx2 + 2 * rows] = a[r][c + 3]; 817 } 818 dstRows.inverse(t, 0, scale); 819 dstRows.inverse(t, rows, scale); 820 dstRows.inverse(t, 2 * rows, scale); 821 dstRows.inverse(t, 3 * rows, scale); 822 for (int r = 0; r < rows; r++) { 823 idx2 = rows + r; 824 a[r][c] = t[r]; 825 a[r][c + 1] = t[idx2]; 826 a[r][c + 2] = t[idx2 + rows]; 827 a[r][c + 3] = t[idx2 + 2 * rows]; 828 } 829 } 830 } 831 } else if (columns == 2) { 832 for (int r = 0; r < rows; r++) { 833 t[r] = a[r][0]; 834 t[rows + r] = a[r][1]; 835 } 836 if (isgn == -1) { 837 dstRows.forward(t, 0, scale); 838 dstRows.forward(t, rows, scale); 839 } else { 840 dstRows.inverse(t, 0, scale); 841 dstRows.inverse(t, rows, scale); 842 } 843 for (int r = 0; r < rows; r++) { 844 a[r][0] = t[r]; 845 a[r][1] = t[rows + r]; 846 } 847 } 848 } 849}