diff options
author | Brian Duff <bduff@google.com> | 2013-11-30 21:15:50 -0800 |
---|---|---|
committer | Brian Duff <bduff@google.com> | 2013-11-30 21:17:46 -0800 |
commit | 598e0aeda91e8ed536b4b2ed670be0055dde3289 (patch) | |
tree | 348efaaa434a28c5df76570c815fbd67eb3c7a22 | |
parent | 85ba2a83c6cd8a10c26124550736a89a2e03f14b (diff) | |
download | fft2d-598e0aeda91e8ed536b4b2ed670be0055dde3289.tar.gz |
Initial commit of fft2d.
Bug: 11946001
Change-Id: I79609cc5a1a011e25a1dfb597a7ddcec532366cb
29 files changed, 15818 insertions, 0 deletions
diff --git a/Android.mk b/Android.mk new file mode 100644 index 0000000..ea70b19 --- /dev/null +++ b/Android.mk @@ -0,0 +1,18 @@ +# Copyright 2013 Google, Inc. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := libfft2d_static +LOCAL_MODULE_TAGS := optional + +LOCAL_SDK_VERSION := 9 + +LOCAL_SRC_FILES := \ + src/fft2d/fft2d/fftsg.c \ + src/fft2d/fft2d/fftsg2d.c \ + src/fft2d/fft2d/alloc.c \ + src/fft2d/fft2d/shrtdct.c + +include $(BUILD_STATIC_LIBRARY) diff --git a/CleanSpec.mk b/CleanSpec.mk new file mode 100644 index 0000000..b84e1b6 --- /dev/null +++ b/CleanSpec.mk @@ -0,0 +1,49 @@ +# Copyright (C) 2007 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# If you don't need to do a full clean build but would like to touch +# a file or delete some intermediate files, add a clean step to the end +# of the list. These steps will only be run once, if they haven't been +# run before. +# +# E.g.: +# $(call add-clean-step, touch -c external/sqlite/sqlite3.h) +# $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libz_intermediates) +# +# Always use "touch -c" and "rm -f" or "rm -rf" to gracefully deal with +# files that are missing or have been moved. +# +# Use $(PRODUCT_OUT) to get to the "out/target/product/blah/" directory. +# Use $(OUT_DIR) to refer to the "out" directory. +# +# If you need to re-do something that's already mentioned, just copy +# the command and add it to the bottom of the list. E.g., if a change +# that you made last week required touching a file and a change you +# made today requires touching the same file, just copy the old +# touch step and add it to the end of the list. +# +# ************************************************ +# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST +# ************************************************ + +# For example: +#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/APPS/AndroidTests_intermediates) +#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/JAVA_LIBRARIES/core_intermediates) +#$(call add-clean-step, find $(OUT_DIR) -type f -name "IGTalkSession*" -print0 | xargs -0 rm -f) +#$(call add-clean-step, rm -rf $(PRODUCT_OUT)/data/*) + +# ************************************************ +# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST +# ************************************************ diff --git a/MODULE_LICENSE_NOTICE b/MODULE_LICENSE_NOTICE new file mode 100644 index 0000000..2bd8550 --- /dev/null +++ b/MODULE_LICENSE_NOTICE @@ -0,0 +1,3 @@ +Copyright(C) 1997,2001 Takuya OOURA (email: ooura@kurims.kyoto-u.ac.jp). +You may use, copy, modify this code for any purpose and +without fee. You may distribute this ORIGINAL package. diff --git a/README.android b/README.android new file mode 100644 index 0000000..2575079 --- /dev/null +++ b/README.android @@ -0,0 +1,42 @@ + Third-party FFT (fast Fourier transform) code + +This directory contains the third-party one- and two-dimensional +fast Fourier transform and related sinusoidal transform code written +by Takuya Ooura. + +Depending on your needs, there may be better choices than this +package, e.g.: + +1. David Talkin has written a fast and flexible radix-2 FFT class: + google3/speech/dsp/fft.h + +2. fftw (http://www.fftw.org/) likely offers speed advantages over the +routines in this package and undoubtedly has a much larger user base. +However, it's a much bigger and more complex package. + +Package origin URL: +------------------- + http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html + +Version: +-------- + "fft2d.tgz (53KB) updated: 2001/11/22" + md5sum fft2d.tgz: c02e1826a34a06ad1413209843c8e80f fft2d.tgz + +License: +-------- + + See the bottom of fft2d/readme2d.txt. + + My (popat's) reading of it is + + licenses = ["notice"], + +Downloading and unpacking history: +---------------------------------- + + Tue Jun 6 14:29:50 2006: + wget http://momonga.t.u-tokyo.ac.jp/~ooura/fft2d.tgz + tar xzf fft2d.tgz + rm fft2d.tgz + diff --git a/src/fft2d/fft.h b/src/fft2d/fft.h new file mode 100644 index 0000000..c64d31c --- /dev/null +++ b/src/fft2d/fft.h @@ -0,0 +1,24 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// Author: popat@google.com (Ashok C. Popat) +// +// Declarations for third-party 1D FFT routines in third_party/fft2d/fft2d. + +#ifndef THIRD_PARTY_FFT2D_FFT_H__ +#define THIRD_PARTY_FFT2D_FFT_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +extern void cdft(int, int, double *, int *, double *); +extern void rdft(int, int, double *, int *, double *); +extern void ddct(int, int, double *, int *, double *); +extern void ddst(int, int, double *, int *, double *); +extern void dfct(int, double *, double *, int *, double *); +extern void dfst(int, double *, double *, int *, double *); + +#ifdef __cplusplus +} +#endif + +#endif // THIRD_PARTY_FFT2D_FFT_H__ diff --git a/src/fft2d/fft2d.h b/src/fft2d/fft2d.h new file mode 100644 index 0000000..fa32a20 --- /dev/null +++ b/src/fft2d/fft2d.h @@ -0,0 +1,24 @@ +// Copyright 2006 Google Inc. All Rights Reserved. +// Author: popat@google.com (Ashok C. Popat) +// +// Declarations for third-party 2D FFT routines in third_party/fft2d/fft2d. + +#ifndef THIRD_PARTY_FFT2D_FFT2D_H__ +#define THIRD_PARTY_FFT2D_FFT2D_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +extern void cdft2d(int, int, int, double **, double *, int *, double *); +extern void rdft2d(int, int, int, double **, double *, int *, double *); +extern void ddct2d(int, int, int, double **, double *, int *, double *); +extern void ddst2d(int, int, int, double **, double *, int *, double *); +extern void ddct8x8s(int isgn, double **a); +extern void ddct16x16s(int isgn, double **a); + +#ifdef __cplusplus +} +#endif + +#endif // THIRD_PARTY_FFT2D_FFT2D_H__ diff --git a/src/fft2d/fft2d/alloc.c b/src/fft2d/fft2d/alloc.c new file mode 100644 index 0000000..7833d88 --- /dev/null +++ b/src/fft2d/fft2d/alloc.c @@ -0,0 +1,153 @@ +/* ---- memory allocation ---- */ +#include "alloc.h" + + +#define alloc_error_check(p) { \ + if ((p) == NULL) { \ + fprintf(stderr, "Allocation Failure!\n"); \ + exit(1); \ + } \ +} + + +int *alloc_1d_int(int n1) +{ + int *i; + + i = (int *) malloc(sizeof(int) * n1); + alloc_error_check(i); + return i; +} + + +void free_1d_int(int *i) +{ + free(i); +} + + +double *alloc_1d_double(int n1) +{ + double *d; + + d = (double *) malloc(sizeof(double) * n1); + alloc_error_check(d); + return d; +} + + +void free_1d_double(double *d) +{ + free(d); +} + + +int **alloc_2d_int(int n1, int n2) +{ + int **ii, *i; + int j; + + ii = (int **) malloc(sizeof(int *) * n1); + alloc_error_check(ii); + i = (int *) malloc(sizeof(int) * n1 * n2); + alloc_error_check(i); + ii[0] = i; + for (j = 1; j < n1; j++) { + ii[j] = ii[j - 1] + n2; + } + return ii; +} + + +void free_2d_int(int **ii) +{ + free(ii[0]); + free(ii); +} + + +double **alloc_2d_double(int n1, int n2) +{ + double **dd, *d; + int j; + + dd = (double **) malloc(sizeof(double *) * n1); + alloc_error_check(dd); + d = (double *) malloc(sizeof(double) * n1 * n2); + alloc_error_check(d); + dd[0] = d; + for (j = 1; j < n1; j++) { + dd[j] = dd[j - 1] + n2; + } + return dd; +} + + +void free_2d_double(double **dd) +{ + free(dd[0]); + free(dd); +} + + +int ***alloc_3d_int(int n1, int n2, int n3) +{ + int ***iii, **ii, *i; + int j; + + iii = (int ***) malloc(sizeof(int **) * n1); + alloc_error_check(iii); + ii = (int **) malloc(sizeof(int *) * n1 * n2); + alloc_error_check(ii); + iii[0] = ii; + for (j = 1; j < n1; j++) { + iii[j] = iii[j - 1] + n2; + } + i = (int *) malloc(sizeof(int) * n1 * n2 * n3); + alloc_error_check(i); + ii[0] = i; + for (j = 1; j < n1 * n2; j++) { + ii[j] = ii[j - 1] + n3; + } + return iii; +} + + +void free_3d_int(int ***iii) +{ + free(iii[0][0]); + free(iii[0]); + free(iii); +} + + +double ***alloc_3d_double(int n1, int n2, int n3) +{ + double ***ddd, **dd, *d; + int j; + + ddd = (double ***) malloc(sizeof(double **) * n1); + alloc_error_check(ddd); + dd = (double **) malloc(sizeof(double *) * n1 * n2); + alloc_error_check(dd); + ddd[0] = dd; + for (j = 1; j < n1; j++) { + ddd[j] = ddd[j - 1] + n2; + } + d = (double *) malloc(sizeof(double) * n1 * n2 * n3); + alloc_error_check(d); + dd[0] = d; + for (j = 1; j < n1 * n2; j++) { + dd[j] = dd[j - 1] + n3; + } + return ddd; +} + + +void free_3d_double(double ***ddd) +{ + free(ddd[0][0]); + free(ddd[0]); + free(ddd); +} + diff --git a/src/fft2d/fft2d/alloc.h b/src/fft2d/fft2d/alloc.h new file mode 100644 index 0000000..3467cc4 --- /dev/null +++ b/src/fft2d/fft2d/alloc.h @@ -0,0 +1,20 @@ +/* ---- memory allocation ---- */ + + +#include <stdlib.h> +#include <stdio.h> + + +int *alloc_1d_int(int n1); +void free_1d_int(int *i); +double *alloc_1d_double(int n1); +void free_1d_double(double *d); +int **alloc_2d_int(int n1, int n2); +void free_2d_int(int **ii); +double **alloc_2d_double(int n1, int n2); +void free_2d_double(double **dd); +int ***alloc_3d_int(int n1, int n2, int n3); +void free_3d_int(int ***iii); +double ***alloc_3d_double(int n1, int n2, int n3); +void free_3d_double(double ***ddd); + diff --git a/src/fft2d/fft2d/fft4f2d.c b/src/fft2d/fft2d/fft4f2d.c new file mode 100644 index 0000000..354c80e --- /dev/null +++ b/src/fft2d/fft2d/fft4f2d.c @@ -0,0 +1,1705 @@ +/* +Fast Fourier/Cosine/Sine Transform + dimension :two + data length :power of 2 + decimation :frequency + radix :4, 2, row-column + data :inplace + table :use +functions + cdft2d: Complex Discrete Fourier Transform + rdft2d: Real Discrete Fourier Transform + ddct2d: Discrete Cosine Transform + ddst2d: Discrete Sine Transform +function prototypes + void cdft2d(int, int, int, double **, int *, double *); + void rdft2d(int, int, int, double **, int *, double *); + void ddct2d(int, int, int, double **, double **, int *, double *); + void ddst2d(int, int, int, double **, double **, int *, double *); + + +-------- Complex DFT (Discrete Fourier Transform) -------- + [definition] + <case1> + X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * + exp(2*pi*i*j1*k1/n1) * + exp(2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 + <case2> + X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * + exp(-2*pi*i*j1*k1/n1) * + exp(-2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 + (notes: sum_j=0^n-1 is a summation from j=0 to n-1) + [usage] + <case1> + ip[0] = 0; // first time only + cdft2d(n1, 2*n2, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + cdft2d(n1, 2*n2, -1, a, ip, w); + [parameters] + n1 :data length (int) + n1 >= 1, n1 = power of 2 + 2*n2 :data length (int) + n2 >= 1, n2 = power of 2 + a[0...n1-1][0...2*n2-1] + :input/output data (double **) + input data + a[j1][2*j2] = Re(x[j1][j2]), + a[j1][2*j2+1] = Im(x[j1][j2]), + 0<=j1<n1, 0<=j2<n2 + output data + a[k1][2*k2] = Re(X[k1][k2]), + a[k1][2*k2+1] = Im(X[k1][k2]), + 0<=k1<n1, 0<=k2<n2 + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1, n2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1/2, n2/2) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + cdft2d(n1, 2*n2, -1, a, ip, w); + is + cdft2d(n1, 2*n2, 1, a, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= 2 * n2 - 1; j2++) { + a[j1][j2] *= 1.0 / (n1 * n2); + } + } + . + + +-------- Real DFT / Inverse of Real DFT -------- + [definition] + <case1> RDFT + R[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), + 0<=k1<n1, 0<=k2<n2 + I[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), + 0<=k1<n1, 0<=k2<n2 + <case2> IRDFT (excluding scale) + a[k1][k2] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 + (R[j1][j2] * + cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + + I[j1][j2] * + sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), + 0<=k1<n1, 0<=k2<n2 + (notes: R[n1-k1][n2-k2] = R[k1][k2], + I[n1-k1][n2-k2] = -I[k1][k2], + R[n1-k1][0] = R[k1][0], + I[n1-k1][0] = -I[k1][0], + R[0][n2-k2] = R[0][k2], + I[0][n2-k2] = -I[0][k2], + 0<k1<n1, 0<k2<n2) + [usage] + <case1> + ip[0] = 0; // first time only + rdft2d(n1, n2, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + rdft2d(n1, n2, -1, a, ip, w); + [parameters] + n1 :data length (int) + n1 >= 2, n1 = power of 2 + n2 :data length (int) + n2 >= 2, n2 = power of 2 + a[0...n1-1][0...n2-1] + :input/output data (double **) + <case1> + output data + a[k1][2*k2] = R[k1][k2] = R[n1-k1][n2-k2], + a[k1][2*k2+1] = I[k1][k2] = -I[n1-k1][n2-k2], + 0<k1<n1, 0<k2<n2/2, + a[0][2*k2] = R[0][k2] = R[0][n2-k2], + a[0][2*k2+1] = I[0][k2] = -I[0][n2-k2], + 0<k2<n2/2, + a[k1][0] = R[k1][0] = R[n1-k1][0], + a[k1][1] = I[k1][0] = -I[n1-k1][0], + a[n1-k1][1] = R[k1][n2/2] = R[n1-k1][n2/2], + a[n1-k1][0] = -I[k1][n2/2] = I[n1-k1][n2/2], + 0<k1<n1/2, + a[0][0] = R[0][0], + a[0][1] = R[0][n2/2], + a[n1/2][0] = R[n1/2][0], + a[n1/2][1] = R[n1/2][n2/2] + <case2> + input data + a[j1][2*j2] = R[j1][j2] = R[n1-j1][n2-j2], + a[j1][2*j2+1] = I[j1][j2] = -I[n1-j1][n2-j2], + 0<j1<n1, 0<j2<n2/2, + a[0][2*j2] = R[0][j2] = R[0][n2-j2], + a[0][2*j2+1] = I[0][j2] = -I[0][n2-j2], + 0<j2<n2/2, + a[j1][0] = R[j1][0] = R[n1-j1][0], + a[j1][1] = I[j1][0] = -I[n1-j1][0], + a[n1-j1][1] = R[j1][n2/2] = R[n1-j1][n2/2], + a[n1-j1][0] = -I[j1][n2/2] = I[n1-j1][n2/2], + 0<j1<n1/2, + a[0][0] = R[0][0], + a[0][1] = R[0][n2/2], + a[n1/2][0] = R[n1/2][0], + a[n1/2][1] = R[n1/2][n2/2] + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1, n2/2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1/2, n2/4) + n2/4 + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + rdft2d(n1, n2, 1, a, ip, w); + is + rdft2d(n1, n2, -1, a, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2] *= 2.0 / (n1 * n2); + } + } + . + + +-------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + <case1> IDCT (excluding scale) + C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + cos(pi*j1*(k1+1/2)/n1) * + cos(pi*j2*(k2+1/2)/n2), + 0<=k1<n1, 0<=k2<n2 + <case2> DCT + C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + cos(pi*(j1+1/2)*k1/n1) * + cos(pi*(j2+1/2)*k2/n2), + 0<=k1<n1, 0<=k2<n2 + [usage] + <case1> + ip[0] = 0; // first time only + ddct2d(n1, n2, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + ddct2d(n1, n2, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 2, n1 = power of 2 + n2 :data length (int) + n2 >= 2, n2 = power of 2 + a[0...n1-1][0...n2-1] + :input/output data (double **) + output data + a[k1][k2] = C[k1][k2], 0<=k1<n1, 0<=k2<n2 + t[0...n1-1][0...n2-1] + :work area (double **) + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1, n2/2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1/2, n2/4) + max(n1, n2) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddct2d(n1, n2, -1, a, t, ip, w); + is + for (j1 = 0; j1 <= n1 - 1; j1++) { + a[j1][0] *= 0.5; + } + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[0][j2] *= 0.5; + } + ddct2d(n1, n2, 1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2] *= 4.0 / (n1 * n2); + } + } + . + + +-------- DST (Discrete Sine Transform) / Inverse of DST -------- + [definition] + <case1> IDST (excluding scale) + S[k1][k2] = sum_j1=1^n1 sum_j2=1^n2 A[j1][j2] * + sin(pi*j1*(k1+1/2)/n1) * + sin(pi*j2*(k2+1/2)/n2), + 0<=k1<n1, 0<=k2<n2 + <case2> DST + S[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + sin(pi*(j1+1/2)*k1/n1) * + sin(pi*(j2+1/2)*k2/n2), + 0<k1<=n1, 0<k2<=n2 + [usage] + <case1> + ip[0] = 0; // first time only + ddst2d(n1, n2, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + ddst2d(n1, n2, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 2, n1 = power of 2 + n2 :data length (int) + n2 >= 2, n2 = power of 2 + a[0...n1-1][0...n2-1] + :input/output data (double **) + <case1> + input data + a[j1][j2] = A[j1][j2], 0<j1<n1, 0<j2<n2, + a[j1][0] = A[j1][n2], 0<j1<n1, + a[0][j2] = A[n1][j2], 0<j2<n2, + a[0][0] = A[n1][n2] + (i.e. A[j1][j2] = a[j1 % n1][j2 % n2]) + output data + a[k1][k2] = S[k1][k2], 0<=k1<n1, 0<=k2<n2 + <case2> + output data + a[k1][k2] = S[k1][k2], 0<k1<n1, 0<k2<n2, + a[k1][0] = S[k1][n2], 0<k1<n1, + a[0][k2] = S[n1][k2], 0<k2<n2, + a[0][0] = S[n1][n2] + (i.e. S[k1][k2] = a[k1 % n1][k2 % n2]) + t[0...n1-1][0...n2-1] + :work area (double **) + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1, n2/2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1/2, n2/4) + max(n1, n2) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddst2d(n1, n2, -1, a, t, ip, w); + is + for (j1 = 0; j1 <= n1 - 1; j1++) { + a[j1][0] *= 0.5; + } + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[0][j2] *= 0.5; + } + ddst2d(n1, n2, 1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2] *= 4.0 / (n1 * n2); + } + } + . +*/ + + +void cdft2d(int n1, int n2, int isgn, double **a, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void bitrv2col(int n1, int n, int *ip, double **a); + void bitrv2row(int n, int n2, int *ip, double **a); + void cftbcol(int n1, int n, double **a, double *w); + void cftbrow(int n, int n2, double **a, double *w); + void cftfcol(int n1, int n, double **a, double *w); + void cftfrow(int n, int n2, double **a, double *w); + int n; + + n = n1 << 1; + if (n < n2) { + n = n2; + } + if (n > (ip[0] << 2)) { + makewt(n >> 2, ip, w); + } + if (n2 > 4) { + bitrv2col(n1, n2, ip + 2, a); + } + if (n1 > 2) { + bitrv2row(n1, n2, ip + 2, a); + } + if (isgn < 0) { + cftfcol(n1, n2, a, w); + cftfrow(n1, n2, a, w); + } else { + cftbcol(n1, n2, a, w); + cftbrow(n1, n2, a, w); + } +} + + +void rdft2d(int n1, int n2, int isgn, double **a, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void bitrv2col(int n1, int n, int *ip, double **a); + void bitrv2row(int n, int n2, int *ip, double **a); + void cftbcol(int n1, int n, double **a, double *w); + void cftbrow(int n, int n2, double **a, double *w); + void cftfcol(int n1, int n, double **a, double *w); + void cftfrow(int n, int n2, double **a, double *w); + void rftbcol(int n1, int n, double **a, int nc, double *c); + void rftfcol(int n1, int n, double **a, int nc, double *c); + int n, nw, nc, n1h, i, j; + double xi; + + n = n1 << 1; + if (n < n2) { + n = n2; + } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n2 > (nc << 2)) { + nc = n2 >> 2; + makect(nc, ip, w + nw); + } + n1h = n1 >> 1; + if (isgn < 0) { + for (i = 1; i <= n1h - 1; i++) { + j = n1 - i; + xi = a[i][0] - a[j][0]; + a[i][0] += a[j][0]; + a[j][0] = xi; + xi = a[j][1] - a[i][1]; + a[i][1] += a[j][1]; + a[j][1] = xi; + } + if (n1 > 2) { + bitrv2row(n1, n2, ip + 2, a); + } + cftfrow(n1, n2, a, w); + for (i = 0; i <= n1 - 1; i++) { + a[i][1] = 0.5 * (a[i][0] - a[i][1]); + a[i][0] -= a[i][1]; + } + if (n2 > 4) { + rftfcol(n1, n2, a, nc, w + nw); + bitrv2col(n1, n2, ip + 2, a); + } + cftfcol(n1, n2, a, w); + } else { + if (n2 > 4) { + bitrv2col(n1, n2, ip + 2, a); + } + cftbcol(n1, n2, a, w); + if (n2 > 4) { + rftbcol(n1, n2, a, nc, w + nw); + } + for (i = 0; i <= n1 - 1; i++) { + xi = a[i][0] - a[i][1]; + a[i][0] += a[i][1]; + a[i][1] = xi; + } + if (n1 > 2) { + bitrv2row(n1, n2, ip + 2, a); + } + cftbrow(n1, n2, a, w); + for (i = 1; i <= n1h - 1; i++) { + j = n1 - i; + a[j][0] = 0.5 * (a[i][0] - a[j][0]); + a[i][0] -= a[j][0]; + a[j][1] = 0.5 * (a[i][1] + a[j][1]); + a[i][1] -= a[j][1]; + } + } +} + + +void ddct2d(int n1, int n2, int isgn, double **a, double **t, + int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void bitrv2col(int n1, int n, int *ip, double **a); + void bitrv2row(int n, int n2, int *ip, double **a); + void cftbcol(int n1, int n, double **a, double *w); + void cftbrow(int n, int n2, double **a, double *w); + void cftfcol(int n1, int n, double **a, double *w); + void cftfrow(int n, int n2, double **a, double *w); + void rftbcol(int n1, int n, double **a, int nc, double *c); + void rftfcol(int n1, int n, double **a, int nc, double *c); + void dctbsub(int n1, int n2, double **a, int nc, double *c); + void dctfsub(int n1, int n2, double **a, int nc, double *c); + int n, nw, nc, n1h, n2h, i, ix, ic, j, jx, jc; + double xi; + + n = n1 << 1; + if (n < n2) { + n = n2; + } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n1 > nc || n2 > nc) { + if (n1 > n2) { + nc = n1; + } else { + nc = n2; + } + makect(nc, ip, w + nw); + } + n1h = n1 >> 1; + n2h = n2 >> 1; + if (isgn >= 0) { + for (i = 0; i <= n1 - 1; i++) { + for (j = 1; j <= n2h - 1; j++) { + jx = j << 1; + t[i][jx] = a[i][j]; + t[i][jx + 1] = a[i][n2 - j]; + } + } + t[0][0] = a[0][0]; + t[0][1] = a[0][n2h]; + t[n1h][0] = a[n1h][0]; + t[n1h][1] = a[n1h][n2h]; + for (i = 1; i <= n1h - 1; i++) { + ic = n1 - i; + t[i][0] = a[i][0]; + t[ic][1] = a[i][n2h]; + t[i][1] = a[ic][0]; + t[ic][0] = a[ic][n2h]; + } + dctfsub(n1, n2, t, nc, w + nw); + if (n1 > 2) { + bitrv2row(n1, n2, ip + 2, t); + } + cftfrow(n1, n2, t, w); + for (i = 0; i <= n1 - 1; i++) { + t[i][1] = 0.5 * (t[i][0] - t[i][1]); + t[i][0] -= t[i][1]; + } + if (n2 > 4) { + rftfcol(n1, n2, t, nc, w + nw); + bitrv2col(n1, n2, ip + 2, t); + } + cftfcol(n1, n2, t, w); + for (i = 0; i <= n1h - 1; i++) { + ix = i << 1; + ic = n1 - 1 - i; + for (j = 0; j <= n2h - 1; j++) { + jx = j << 1; + jc = n2 - 1 - j; + a[ix][jx] = t[i][j]; + a[ix][jx + 1] = t[i][jc]; + a[ix + 1][jx] = t[ic][j]; + a[ix + 1][jx + 1] = t[ic][jc]; + } + } + } else { + for (i = 0; i <= n1h - 1; i++) { + ix = i << 1; + ic = n1 - 1 - i; + for (j = 0; j <= n2h - 1; j++) { + jx = j << 1; + jc = n2 - 1 - j; + t[i][j] = a[ix][jx]; + t[i][jc] = a[ix][jx + 1]; + t[ic][j] = a[ix + 1][jx]; + t[ic][jc] = a[ix + 1][jx + 1]; + } + } + if (n2 > 4) { + bitrv2col(n1, n2, ip + 2, t); + } + cftbcol(n1, n2, t, w); + if (n2 > 4) { + rftbcol(n1, n2, t, nc, w + nw); + } + for (i = 0; i <= n1 - 1; i++) { + xi = t[i][0] - t[i][1]; + t[i][0] += t[i][1]; + t[i][1] = xi; + } + if (n1 > 2) { + bitrv2row(n1, n2, ip + 2, t); + } + cftbrow(n1, n2, t, w); + dctbsub(n1, n2, t, nc, w + nw); + for (i = 0; i <= n1 - 1; i++) { + for (j = 1; j <= n2h - 1; j++) { + jx = j << 1; + a[i][j] = t[i][jx]; + a[i][n2 - j] = t[i][jx + 1]; + } + } + a[0][0] = t[0][0]; + a[0][n2h] = t[0][1]; + a[n1h][0] = t[n1h][0]; + a[n1h][n2h] = t[n1h][1]; + for (i = 1; i <= n1h - 1; i++) { + ic = n1 - i; + a[i][0] = t[i][0]; + a[i][n2h] = t[ic][1]; + a[ic][0] = t[i][1]; + a[ic][n2h] = t[ic][0]; + } + } +} + + +void ddst2d(int n1, int n2, int isgn, double **a, double **t, + int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void bitrv2col(int n1, int n, int *ip, double **a); + void bitrv2row(int n, int n2, int *ip, double **a); + void cftbcol(int n1, int n, double **a, double *w); + void cftbrow(int n, int n2, double **a, double *w); + void cftfcol(int n1, int n, double **a, double *w); + void cftfrow(int n, int n2, double **a, double *w); + void rftbcol(int n1, int n, double **a, int nc, double *c); + void rftfcol(int n1, int n, double **a, int nc, double *c); + void dstbsub(int n1, int n2, double **a, int nc, double *c); + void dstfsub(int n1, int n2, double **a, int nc, double *c); + int n, nw, nc, n1h, n2h, i, ix, ic, j, jx, jc; + double xi; + + n = n1 << 1; + if (n < n2) { + n = n2; + } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n1 > nc || n2 > nc) { + if (n1 > n2) { + nc = n1; + } else { + nc = n2; + } + makect(nc, ip, w + nw); + } + n1h = n1 >> 1; + n2h = n2 >> 1; + if (isgn >= 0) { + for (i = 0; i <= n1 - 1; i++) { + for (j = 1; j <= n2h - 1; j++) { + jx = j << 1; + t[i][jx] = a[i][j]; + t[i][jx + 1] = a[i][n2 - j]; + } + } + t[0][0] = a[0][0]; + t[0][1] = a[0][n2h]; + t[n1h][0] = a[n1h][0]; + t[n1h][1] = a[n1h][n2h]; + for (i = 1; i <= n1h - 1; i++) { + ic = n1 - i; + t[i][0] = a[i][0]; + t[ic][1] = a[i][n2h]; + t[i][1] = a[ic][0]; + t[ic][0] = a[ic][n2h]; + } + dstfsub(n1, n2, t, nc, w + nw); + if (n1 > 2) { + bitrv2row(n1, n2, ip + 2, t); + } + cftfrow(n1, n2, t, w); + for (i = 0; i <= n1 - 1; i++) { + t[i][1] = 0.5 * (t[i][0] - t[i][1]); + t[i][0] -= t[i][1]; + } + if (n2 > 4) { + rftfcol(n1, n2, t, nc, w + nw); + bitrv2col(n1, n2, ip + 2, t); + } + cftfcol(n1, n2, t, w); + for (i = 0; i <= n1h - 1; i++) { + ix = i << 1; + ic = n1 - 1 - i; + for (j = 0; j <= n2h - 1; j++) { + jx = j << 1; + jc = n2 - 1 - j; + a[ix][jx] = t[i][j]; + a[ix][jx + 1] = -t[i][jc]; + a[ix + 1][jx] = -t[ic][j]; + a[ix + 1][jx + 1] = t[ic][jc]; + } + } + } else { + for (i = 0; i <= n1h - 1; i++) { + ix = i << 1; + ic = n1 - 1 - i; + for (j = 0; j <= n2h - 1; j++) { + jx = j << 1; + jc = n2 - 1 - j; + t[i][j] = a[ix][jx]; + t[i][jc] = -a[ix][jx + 1]; + t[ic][j] = -a[ix + 1][jx]; + t[ic][jc] = a[ix + 1][jx + 1]; + } + } + if (n2 > 4) { + bitrv2col(n1, n2, ip + 2, t); + } + cftbcol(n1, n2, t, w); + if (n2 > 4) { + rftbcol(n1, n2, t, nc, w + nw); + } + for (i = 0; i <= n1 - 1; i++) { + xi = t[i][0] - t[i][1]; + t[i][0] += t[i][1]; + t[i][1] = xi; + } + if (n1 > 2) { + bitrv2row(n1, n2, ip + 2, t); + } + cftbrow(n1, n2, t, w); + dstbsub(n1, n2, t, nc, w + nw); + for (i = 0; i <= n1 - 1; i++) { + for (j = 1; j <= n2h - 1; j++) { + jx = j << 1; + a[i][j] = t[i][jx]; + a[i][n2 - j] = t[i][jx + 1]; + } + } + a[0][0] = t[0][0]; + a[0][n2h] = t[0][1]; + a[n1h][0] = t[n1h][0]; + a[n1h][n2h] = t[n1h][1]; + for (i = 1; i <= n1h - 1; i++) { + ic = n1 - i; + a[i][0] = t[i][0]; + a[i][n2h] = t[ic][1]; + a[ic][0] = t[i][1]; + a[ic][n2h] = t[ic][0]; + } + } +} + + +/* -------- initializing routines -------- */ + + +#include <math.h> + +void makewt(int nw, int *ip, double *w) +{ + void bitrv2(int n, int *ip, double *a); + int nwh, j; + double delta, x, y; + + ip[0] = nw; + ip[1] = 1; + if (nw > 2) { + nwh = nw >> 1; + delta = atan(1.0) / nwh; + w[0] = 1; + w[1] = 0; + w[nwh] = cos(delta * nwh); + w[nwh + 1] = w[nwh]; + for (j = 2; j <= nwh - 2; j += 2) { + x = cos(delta * j); + y = sin(delta * j); + w[j] = x; + w[j + 1] = y; + w[nw - j] = y; + w[nw - j + 1] = x; + } + bitrv2(nw, ip + 2, w); + } +} + + +void makect(int nc, int *ip, double *c) +{ + int nch, j; + double delta; + + ip[1] = nc; + if (nc > 1) { + nch = nc >> 1; + delta = atan(1.0) / nch; + c[0] = 0.5; + c[nch] = 0.5 * cos(delta * nch); + for (j = 1; j <= nch - 1; j++) { + c[j] = 0.5 * cos(delta * j); + c[nc - j] = 0.5 * sin(delta * j); + } + } +} + + +/* -------- child routines -------- */ + + +void bitrv2(int n, int *ip, double *a) +{ + int j, j1, k, k1, l, m, m2; + double xr, xi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 2) < l) { + l >>= 1; + for (j = 0; j <= m - 1; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + if ((m << 2) > l) { + for (k = 1; k <= m - 1; k++) { + for (j = 0; j <= k - 1; j++) { + j1 = (j << 1) + ip[k]; + k1 = (k << 1) + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + a[j1] = a[k1]; + a[j1 + 1] = a[k1 + 1]; + a[k1] = xr; + a[k1 + 1] = xi; + } + } + } else { + m2 = m << 1; + for (k = 1; k <= m - 1; k++) { + for (j = 0; j <= k - 1; j++) { + j1 = (j << 1) + ip[k]; + k1 = (k << 1) + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + a[j1] = a[k1]; + a[j1 + 1] = a[k1 + 1]; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[j1]; + xi = a[j1 + 1]; + a[j1] = a[k1]; + a[j1 + 1] = a[k1 + 1]; + a[k1] = xr; + a[k1 + 1] = xi; + } + } + } +} + + +void bitrv2col(int n1, int n, int *ip, double **a) +{ + int i, j, j1, k, k1, l, m, m2; + double xr, xi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 2) < l) { + l >>= 1; + for (j = 0; j <= m - 1; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + if ((m << 2) > l) { + for (i = 0; i <= n1 - 1; i++) { + for (k = 1; k <= m - 1; k++) { + for (j = 0; j <= k - 1; j++) { + j1 = (j << 1) + ip[k]; + k1 = (k << 1) + ip[j]; + xr = a[i][j1]; + xi = a[i][j1 + 1]; + a[i][j1] = a[i][k1]; + a[i][j1 + 1] = a[i][k1 + 1]; + a[i][k1] = xr; + a[i][k1 + 1] = xi; + } + } + } + } else { + m2 = m << 1; + for (i = 0; i <= n1 - 1; i++) { + for (k = 1; k <= m - 1; k++) { + for (j = 0; j <= k - 1; j++) { + j1 = (j << 1) + ip[k]; + k1 = (k << 1) + ip[j]; + xr = a[i][j1]; + xi = a[i][j1 + 1]; + a[i][j1] = a[i][k1]; + a[i][j1 + 1] = a[i][k1 + 1]; + a[i][k1] = xr; + a[i][k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[i][j1]; + xi = a[i][j1 + 1]; + a[i][j1] = a[i][k1]; + a[i][j1 + 1] = a[i][k1 + 1]; + a[i][k1] = xr; + a[i][k1 + 1] = xi; + } + } + } + } +} + + +void bitrv2row(int n, int n2, int *ip, double **a) +{ + int i, j, j1, k, k1, l, m; + double xr, xi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 1) < l) { + l >>= 1; + for (j = 0; j <= m - 1; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + if ((m << 1) > l) { + for (k = 1; k <= m - 1; k++) { + for (j = 0; j <= k - 1; j++) { + j1 = j + ip[k]; + k1 = k + ip[j]; + for (i = 0; i <= n2 - 2; i += 2) { + xr = a[j1][i]; + xi = a[j1][i + 1]; + a[j1][i] = a[k1][i]; + a[j1][i + 1] = a[k1][i + 1]; + a[k1][i] = xr; + a[k1][i + 1] = xi; + } + } + } + } else { + for (k = 1; k <= m - 1; k++) { + for (j = 0; j <= k - 1; j++) { + j1 = j + ip[k]; + k1 = k + ip[j]; + for (i = 0; i <= n2 - 2; i += 2) { + xr = a[j1][i]; + xi = a[j1][i + 1]; + a[j1][i] = a[k1][i]; + a[j1][i + 1] = a[k1][i + 1]; + a[k1][i] = xr; + a[k1][i + 1] = xi; + } + j1 += m; + k1 += m; + for (i = 0; i <= n2 - 2; i += 2) { + xr = a[j1][i]; + xi = a[j1][i + 1]; + a[j1][i] = a[k1][i]; + a[j1][i + 1] = a[k1][i + 1]; + a[k1][i] = xr; + a[k1][i + 1] = xi; + } + } + } + } +} + + +void cftbcol(int n1, int n, double **a, double *w) +{ + int i, j, j1, j2, j3, k, k1, ks, l, m; + double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + for (i = 0; i <= n1 - 1; i++) { + l = 2; + while ((l << 1) < n) { + m = l << 2; + for (j = 0; j <= l - 2; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[i][j] + a[i][j1]; + x0i = a[i][j + 1] + a[i][j1 + 1]; + x1r = a[i][j] - a[i][j1]; + x1i = a[i][j + 1] - a[i][j1 + 1]; + x2r = a[i][j2] + a[i][j3]; + x2i = a[i][j2 + 1] + a[i][j3 + 1]; + x3r = a[i][j2] - a[i][j3]; + x3i = a[i][j2 + 1] - a[i][j3 + 1]; + a[i][j] = x0r + x2r; + a[i][j + 1] = x0i + x2i; + a[i][j2] = x0r - x2r; + a[i][j2 + 1] = x0i - x2i; + a[i][j1] = x1r - x3i; + a[i][j1 + 1] = x1i + x3r; + a[i][j3] = x1r + x3i; + a[i][j3 + 1] = x1i - x3r; + } + if (m < n) { + wk1r = w[2]; + for (j = m; j <= l + m - 2; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[i][j] + a[i][j1]; + x0i = a[i][j + 1] + a[i][j1 + 1]; + x1r = a[i][j] - a[i][j1]; + x1i = a[i][j + 1] - a[i][j1 + 1]; + x2r = a[i][j2] + a[i][j3]; + x2i = a[i][j2 + 1] + a[i][j3 + 1]; + x3r = a[i][j2] - a[i][j3]; + x3i = a[i][j2 + 1] - a[i][j3 + 1]; + a[i][j] = x0r + x2r; + a[i][j + 1] = x0i + x2i; + a[i][j2] = x2i - x0i; + a[i][j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[i][j1] = wk1r * (x0r - x0i); + a[i][j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[i][j3] = wk1r * (x0i - x0r); + a[i][j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 1; + ks = -1; + for (k = (m << 1); k <= n - m; k += m) { + k1++; + ks = -ks; + wk1r = w[k1 << 1]; + wk1i = w[(k1 << 1) + 1]; + wk2r = ks * w[k1]; + wk2i = w[k1 + ks]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j <= l + k - 2; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[i][j] + a[i][j1]; + x0i = a[i][j + 1] + a[i][j1 + 1]; + x1r = a[i][j] - a[i][j1]; + x1i = a[i][j + 1] - a[i][j1 + 1]; + x2r = a[i][j2] + a[i][j3]; + x2i = a[i][j2 + 1] + a[i][j3 + 1]; + x3r = a[i][j2] - a[i][j3]; + x3i = a[i][j2 + 1] - a[i][j3 + 1]; + a[i][j] = x0r + x2r; + a[i][j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[i][j2] = wk2r * x0r - wk2i * x0i; + a[i][j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[i][j1] = wk1r * x0r - wk1i * x0i; + a[i][j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[i][j3] = wk3r * x0r - wk3i * x0i; + a[i][j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } + } + l = m; + } + if (l < n) { + for (j = 0; j <= l - 2; j += 2) { + j1 = j + l; + x0r = a[i][j] - a[i][j1]; + x0i = a[i][j + 1] - a[i][j1 + 1]; + a[i][j] += a[i][j1]; + a[i][j + 1] += a[i][j1 + 1]; + a[i][j1] = x0r; + a[i][j1 + 1] = x0i; + } + } + } +} + + +void cftbrow(int n, int n2, double **a, double *w) +{ + int i, j, j1, j2, j3, k, k1, ks, l, m; + double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 1; + while ((l << 1) < n) { + m = l << 2; + for (j = 0; j <= l - 1; j++) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + for (i = 0; i <= n2 - 2; i += 2) { + x0r = a[j][i] + a[j1][i]; + x0i = a[j][i + 1] + a[j1][i + 1]; + x1r = a[j][i] - a[j1][i]; + x1i = a[j][i + 1] - a[j1][i + 1]; + x2r = a[j2][i] + a[j3][i]; + x2i = a[j2][i + 1] + a[j3][i + 1]; + x3r = a[j2][i] - a[j3][i]; + x3i = a[j2][i + 1] - a[j3][i + 1]; + a[j][i] = x0r + x2r; + a[j][i + 1] = x0i + x2i; + a[j2][i] = x0r - x2r; + a[j2][i + 1] = x0i - x2i; + a[j1][i] = x1r - x3i; + a[j1][i + 1] = x1i + x3r; + a[j3][i] = x1r + x3i; + a[j3][i + 1] = x1i - x3r; + } + } + if (m < n) { + wk1r = w[2]; + for (j = m; j <= l + m - 1; j++) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + for (i = 0; i <= n2 - 2; i += 2) { + x0r = a[j][i] + a[j1][i]; + x0i = a[j][i + 1] + a[j1][i + 1]; + x1r = a[j][i] - a[j1][i]; + x1i = a[j][i + 1] - a[j1][i + 1]; + x2r = a[j2][i] + a[j3][i]; + x2i = a[j2][i + 1] + a[j3][i + 1]; + x3r = a[j2][i] - a[j3][i]; + x3i = a[j2][i + 1] - a[j3][i + 1]; + a[j][i] = x0r + x2r; + a[j][i + 1] = x0i + x2i; + a[j2][i] = x2i - x0i; + a[j2][i + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1][i] = wk1r * (x0r - x0i); + a[j1][i + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3][i] = wk1r * (x0i - x0r); + a[j3][i + 1] = wk1r * (x0i + x0r); + } + } + k1 = 1; + ks = -1; + for (k = (m << 1); k <= n - m; k += m) { + k1++; + ks = -ks; + wk1r = w[k1 << 1]; + wk1i = w[(k1 << 1) + 1]; + wk2r = ks * w[k1]; + wk2i = w[k1 + ks]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j <= l + k - 1; j++) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + for (i = 0; i <= n2 - 2; i += 2) { + x0r = a[j][i] + a[j1][i]; + x0i = a[j][i + 1] + a[j1][i + 1]; + x1r = a[j][i] - a[j1][i]; + x1i = a[j][i + 1] - a[j1][i + 1]; + x2r = a[j2][i] + a[j3][i]; + x2i = a[j2][i + 1] + a[j3][i + 1]; + x3r = a[j2][i] - a[j3][i]; + x3i = a[j2][i + 1] - a[j3][i + 1]; + a[j][i] = x0r + x2r; + a[j][i + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2][i] = wk2r * x0r - wk2i * x0i; + a[j2][i + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1][i] = wk1r * x0r - wk1i * x0i; + a[j1][i + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3][i] = wk3r * x0r - wk3i * x0i; + a[j3][i + 1] = wk3r * x0i + wk3i * x0r; + } + } + } + } + l = m; + } + if (l < n) { + for (j = 0; j <= l - 1; j++) { + j1 = j + l; + for (i = 0; i <= n2 - 2; i += 2) { + x0r = a[j][i] - a[j1][i]; + x0i = a[j][i + 1] - a[j1][i + 1]; + a[j][i] += a[j1][i]; + a[j][i + 1] += a[j1][i + 1]; + a[j1][i] = x0r; + a[j1][i + 1] = x0i; + } + } + } +} + + +void cftfcol(int n1, int n, double **a, double *w) +{ + int i, j, j1, j2, j3, k, k1, ks, l, m; + double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + for (i = 0; i <= n1 - 1; i++) { + l = 2; + while ((l << 1) < n) { + m = l << 2; + for (j = 0; j <= l - 2; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[i][j] + a[i][j1]; + x0i = a[i][j + 1] + a[i][j1 + 1]; + x1r = a[i][j] - a[i][j1]; + x1i = a[i][j + 1] - a[i][j1 + 1]; + x2r = a[i][j2] + a[i][j3]; + x2i = a[i][j2 + 1] + a[i][j3 + 1]; + x3r = a[i][j2] - a[i][j3]; + x3i = a[i][j2 + 1] - a[i][j3 + 1]; + a[i][j] = x0r + x2r; + a[i][j + 1] = x0i + x2i; + a[i][j2] = x0r - x2r; + a[i][j2 + 1] = x0i - x2i; + a[i][j1] = x1r + x3i; + a[i][j1 + 1] = x1i - x3r; + a[i][j3] = x1r - x3i; + a[i][j3 + 1] = x1i + x3r; + } + if (m < n) { + wk1r = w[2]; + for (j = m; j <= l + m - 2; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[i][j] + a[i][j1]; + x0i = a[i][j + 1] + a[i][j1 + 1]; + x1r = a[i][j] - a[i][j1]; + x1i = a[i][j + 1] - a[i][j1 + 1]; + x2r = a[i][j2] + a[i][j3]; + x2i = a[i][j2 + 1] + a[i][j3 + 1]; + x3r = a[i][j2] - a[i][j3]; + x3i = a[i][j2 + 1] - a[i][j3 + 1]; + a[i][j] = x0r + x2r; + a[i][j + 1] = x0i + x2i; + a[i][j2] = x0i - x2i; + a[i][j2 + 1] = x2r - x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[i][j1] = wk1r * (x0i + x0r); + a[i][j1 + 1] = wk1r * (x0i - x0r); + x0r = x3i - x1r; + x0i = x3r + x1i; + a[i][j3] = wk1r * (x0r + x0i); + a[i][j3 + 1] = wk1r * (x0r - x0i); + } + k1 = 1; + ks = -1; + for (k = (m << 1); k <= n - m; k += m) { + k1++; + ks = -ks; + wk1r = w[k1 << 1]; + wk1i = w[(k1 << 1) + 1]; + wk2r = ks * w[k1]; + wk2i = w[k1 + ks]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j <= l + k - 2; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[i][j] + a[i][j1]; + x0i = a[i][j + 1] + a[i][j1 + 1]; + x1r = a[i][j] - a[i][j1]; + x1i = a[i][j + 1] - a[i][j1 + 1]; + x2r = a[i][j2] + a[i][j3]; + x2i = a[i][j2 + 1] + a[i][j3 + 1]; + x3r = a[i][j2] - a[i][j3]; + x3i = a[i][j2 + 1] - a[i][j3 + 1]; + a[i][j] = x0r + x2r; + a[i][j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[i][j2] = wk2r * x0r + wk2i * x0i; + a[i][j2 + 1] = wk2r * x0i - wk2i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[i][j1] = wk1r * x0r + wk1i * x0i; + a[i][j1 + 1] = wk1r * x0i - wk1i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[i][j3] = wk3r * x0r + wk3i * x0i; + a[i][j3 + 1] = wk3r * x0i - wk3i * x0r; + } + } + } + l = m; + } + if (l < n) { + for (j = 0; j <= l - 2; j += 2) { + j1 = j + l; + x0r = a[i][j] - a[i][j1]; + x0i = a[i][j + 1] - a[i][j1 + 1]; + a[i][j] += a[i][j1]; + a[i][j + 1] += a[i][j1 + 1]; + a[i][j1] = x0r; + a[i][j1 + 1] = x0i; + } + } + } +} + + +void cftfrow(int n, int n2, double **a, double *w) +{ + int i, j, j1, j2, j3, k, k1, ks, l, m; + double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 1; + while ((l << 1) < n) { + m = l << 2; + for (j = 0; j <= l - 1; j++) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + for (i = 0; i <= n2 - 2; i += 2) { + x0r = a[j][i] + a[j1][i]; + x0i = a[j][i + 1] + a[j1][i + 1]; + x1r = a[j][i] - a[j1][i]; + x1i = a[j][i + 1] - a[j1][i + 1]; + x2r = a[j2][i] + a[j3][i]; + x2i = a[j2][i + 1] + a[j3][i + 1]; + x3r = a[j2][i] - a[j3][i]; + x3i = a[j2][i + 1] - a[j3][i + 1]; + a[j][i] = x0r + x2r; + a[j][i + 1] = x0i + x2i; + a[j2][i] = x0r - x2r; + a[j2][i + 1] = x0i - x2i; + a[j1][i] = x1r + x3i; + a[j1][i + 1] = x1i - x3r; + a[j3][i] = x1r - x3i; + a[j3][i + 1] = x1i + x3r; + } + } + if (m < n) { + wk1r = w[2]; + for (j = m; j <= l + m - 1; j++) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + for (i = 0; i <= n2 - 2; i += 2) { + x0r = a[j][i] + a[j1][i]; + x0i = a[j][i + 1] + a[j1][i + 1]; + x1r = a[j][i] - a[j1][i]; + x1i = a[j][i + 1] - a[j1][i + 1]; + x2r = a[j2][i] + a[j3][i]; + x2i = a[j2][i + 1] + a[j3][i + 1]; + x3r = a[j2][i] - a[j3][i]; + x3i = a[j2][i + 1] - a[j3][i + 1]; + a[j][i] = x0r + x2r; + a[j][i + 1] = x0i + x2i; + a[j2][i] = x0i - x2i; + a[j2][i + 1] = x2r - x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j1][i] = wk1r * (x0i + x0r); + a[j1][i + 1] = wk1r * (x0i - x0r); + x0r = x3i - x1r; + x0i = x3r + x1i; + a[j3][i] = wk1r * (x0r + x0i); + a[j3][i + 1] = wk1r * (x0r - x0i); + } + } + k1 = 1; + ks = -1; + for (k = (m << 1); k <= n - m; k += m) { + k1++; + ks = -ks; + wk1r = w[k1 << 1]; + wk1i = w[(k1 << 1) + 1]; + wk2r = ks * w[k1]; + wk2i = w[k1 + ks]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j <= l + k - 1; j++) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + for (i = 0; i <= n2 - 2; i += 2) { + x0r = a[j][i] + a[j1][i]; + x0i = a[j][i + 1] + a[j1][i + 1]; + x1r = a[j][i] - a[j1][i]; + x1i = a[j][i + 1] - a[j1][i + 1]; + x2r = a[j2][i] + a[j3][i]; + x2i = a[j2][i + 1] + a[j3][i + 1]; + x3r = a[j2][i] - a[j3][i]; + x3i = a[j2][i + 1] - a[j3][i + 1]; + a[j][i] = x0r + x2r; + a[j][i + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2][i] = wk2r * x0r + wk2i * x0i; + a[j2][i + 1] = wk2r * x0i - wk2i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j1][i] = wk1r * x0r + wk1i * x0i; + a[j1][i + 1] = wk1r * x0i - wk1i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j3][i] = wk3r * x0r + wk3i * x0i; + a[j3][i + 1] = wk3r * x0i - wk3i * x0r; + } + } + } + } + l = m; + } + if (l < n) { + for (j = 0; j <= l - 1; j++) { + j1 = j + l; + for (i = 0; i <= n2 - 2; i += 2) { + x0r = a[j][i] - a[j1][i]; + x0i = a[j][i + 1] - a[j1][i + 1]; + a[j][i] += a[j1][i]; + a[j][i + 1] += a[j1][i + 1]; + a[j1][i] = x0r; + a[j1][i + 1] = x0i; + } + } + } +} + + +void rftbcol(int n1, int n, double **a, int nc, double *c) +{ + int i, j, k, kk, ks; + double wkr, wki, xr, xi, yr, yi; + + ks = (nc << 2) / n; + for (i = 0; i <= n1 - 1; i++) { + kk = 0; + for (k = (n >> 1) - 2; k >= 2; k -= 2) { + j = n - k; + kk += ks; + wkr = 0.5 - c[kk]; + wki = c[nc - kk]; + xr = a[i][k] - a[i][j]; + xi = a[i][k + 1] + a[i][j + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[i][k] -= yr; + a[i][k + 1] -= yi; + a[i][j] += yr; + a[i][j + 1] -= yi; + } + } +} + + +void rftfcol(int n1, int n, double **a, int nc, double *c) +{ + int i, j, k, kk, ks; + double wkr, wki, xr, xi, yr, yi; + + ks = (nc << 2) / n; + for (i = 0; i <= n1 - 1; i++) { + kk = 0; + for (k = (n >> 1) - 2; k >= 2; k -= 2) { + j = n - k; + kk += ks; + wkr = 0.5 - c[kk]; + wki = c[nc - kk]; + xr = a[i][k] - a[i][j]; + xi = a[i][k + 1] + a[i][j + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[i][k] -= yr; + a[i][k + 1] -= yi; + a[i][j] += yr; + a[i][j + 1] -= yi; + } + } +} + + +void dctbsub(int n1, int n2, double **a, int nc, double *c) +{ + int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; + double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; + + ks1 = nc / n1; + ks2 = nc / n2; + n1h = n1 >> 1; + kk1 = ks1; + for (k1 = 1; k1 <= n1h - 1; k1++) { + j1 = n1 - k1; + w1r = 2 * c[kk1]; + w1i = 2 * c[nc - kk1]; + kk1 += ks1; + kk2 = ks2; + for (k2 = 2; k2 <= n2 - 2; k2 += 2) { + x0r = w1r * c[kk2]; + x0i = w1i * c[kk2]; + x1r = w1r * c[nc - kk2]; + x1i = w1i * c[nc - kk2]; + wkr = x0r - x1i; + wki = x0i + x1r; + wji = x0r + x1i; + wjr = x0i - x1r; + kk2 += ks2; + x0r = wkr * a[k1][k2] - wki * a[k1][k2 + 1]; + x0i = wkr * a[k1][k2 + 1] + wki * a[k1][k2]; + x1r = wjr * a[j1][k2] - wji * a[j1][k2 + 1]; + x1i = wjr * a[j1][k2 + 1] + wji * a[j1][k2]; + a[k1][k2] = x0r + x1i; + a[k1][k2 + 1] = x0i - x1r; + a[j1][k2] = x1r + x0i; + a[j1][k2 + 1] = x1i - x0r; + } + wkr = w1r * 0.5; + wki = w1i * 0.5; + wjr = w1r * c[kk2]; + wji = w1i * c[kk2]; + x0r = a[k1][0] + a[j1][0]; + x0i = a[k1][1] - a[j1][1]; + x1r = a[k1][0] - a[j1][0]; + x1i = a[k1][1] + a[j1][1]; + a[k1][0] = wkr * x0r - wki * x0i; + a[k1][1] = wkr * x0i + wki * x0r; + a[j1][0] = -wjr * x1r + wji * x1i; + a[j1][1] = wjr * x1i + wji * x1r; + } + w1r = 2 * c[kk1]; + kk2 = ks2; + for (k2 = 2; k2 <= n2 - 2; k2 += 2) { + wkr = 2 * c[kk2]; + wki = 2 * c[nc - kk2]; + wjr = w1r * wkr; + wji = w1r * wki; + kk2 += ks2; + x0i = wkr * a[0][k2 + 1] + wki * a[0][k2]; + a[0][k2] = wkr * a[0][k2] - wki * a[0][k2 + 1]; + a[0][k2 + 1] = x0i; + x0i = wjr * a[n1h][k2 + 1] + wji * a[n1h][k2]; + a[n1h][k2] = wjr * a[n1h][k2] - wji * a[n1h][k2 + 1]; + a[n1h][k2 + 1] = x0i; + } + a[0][1] *= w1r; + a[n1h][0] *= w1r; + a[n1h][1] *= 0.5; +} + + +void dctfsub(int n1, int n2, double **a, int nc, double *c) +{ + int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; + double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; + + ks1 = nc / n1; + ks2 = nc / n2; + n1h = n1 >> 1; + kk1 = ks1; + for (k1 = 1; k1 <= n1h - 1; k1++) { + j1 = n1 - k1; + w1r = 2 * c[kk1]; + w1i = 2 * c[nc - kk1]; + kk1 += ks1; + kk2 = ks2; + for (k2 = 2; k2 <= n2 - 2; k2 += 2) { + x0r = w1r * c[kk2]; + x0i = w1i * c[kk2]; + x1r = w1r * c[nc - kk2]; + x1i = w1i * c[nc - kk2]; + wkr = x0r - x1i; + wki = x0i + x1r; + wji = x0r + x1i; + wjr = x0i - x1r; + kk2 += ks2; + x0r = a[k1][k2] - a[j1][k2 + 1]; + x0i = a[j1][k2] + a[k1][k2 + 1]; + x1r = a[j1][k2] - a[k1][k2 + 1]; + x1i = a[k1][k2] + a[j1][k2 + 1]; + a[k1][k2] = wkr * x0r + wki * x0i; + a[k1][k2 + 1] = wkr * x0i - wki * x0r; + a[j1][k2] = wjr * x1r + wji * x1i; + a[j1][k2 + 1] = wjr * x1i - wji * x1r; + } + x0r = 2 * c[kk2]; + wjr = x0r * w1r; + wji = x0r * w1i; + x0r = w1r * a[k1][0] + w1i * a[k1][1]; + x0i = w1r * a[k1][1] - w1i * a[k1][0]; + x1r = -wjr * a[j1][0] + wji * a[j1][1]; + x1i = wjr * a[j1][1] + wji * a[j1][0]; + a[k1][0] = x0r + x1r; + a[k1][1] = x1i + x0i; + a[j1][0] = x0r - x1r; + a[j1][1] = x1i - x0i; + } + w1r = 2 * c[kk1]; + kk2 = ks2; + for (k2 = 2; k2 <= n2 - 2; k2 += 2) { + wkr = 2 * c[kk2]; + wki = 2 * c[nc - kk2]; + wjr = w1r * wkr; + wji = w1r * wki; + kk2 += ks2; + x0i = wkr * a[0][k2 + 1] - wki * a[0][k2]; + a[0][k2] = wkr * a[0][k2] + wki * a[0][k2 + 1]; + a[0][k2 + 1] = x0i; + x0i = wjr * a[n1h][k2 + 1] - wji * a[n1h][k2]; + a[n1h][k2] = wjr * a[n1h][k2] + wji * a[n1h][k2 + 1]; + a[n1h][k2 + 1] = x0i; + } + w1r *= 2; + a[0][0] *= 2; + a[0][1] *= w1r; + a[n1h][0] *= w1r; +} + + +void dstbsub(int n1, int n2, double **a, int nc, double *c) +{ + int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; + double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; + + ks1 = nc / n1; + ks2 = nc / n2; + n1h = n1 >> 1; + kk1 = ks1; + for (k1 = 1; k1 <= n1h - 1; k1++) { + j1 = n1 - k1; + w1r = 2 * c[kk1]; + w1i = 2 * c[nc - kk1]; + kk1 += ks1; + kk2 = ks2; + for (k2 = 2; k2 <= n2 - 2; k2 += 2) { + x0r = w1r * c[kk2]; + x0i = w1i * c[kk2]; + x1r = w1r * c[nc - kk2]; + x1i = w1i * c[nc - kk2]; + wkr = x0r - x1i; + wki = x0i + x1r; + wji = x0r + x1i; + wjr = x0i - x1r; + kk2 += ks2; + x0r = wkr * a[k1][k2] - wki * a[k1][k2 + 1]; + x0i = wkr * a[k1][k2 + 1] + wki * a[k1][k2]; + x1r = wjr * a[j1][k2] - wji * a[j1][k2 + 1]; + x1i = wjr * a[j1][k2 + 1] + wji * a[j1][k2]; + a[k1][k2] = x1i - x0r; + a[k1][k2 + 1] = x1r + x0i; + a[j1][k2] = x0i - x1r; + a[j1][k2 + 1] = x0r + x1i; + } + wkr = w1r * 0.5; + wki = w1i * 0.5; + wjr = w1r * c[kk2]; + wji = w1i * c[kk2]; + x0r = a[k1][0] + a[j1][0]; + x0i = a[k1][1] - a[j1][1]; + x1r = a[k1][0] - a[j1][0]; + x1i = a[k1][1] + a[j1][1]; + a[k1][1] = wkr * x0r - wki * x0i; + a[k1][0] = wkr * x0i + wki * x0r; + a[j1][1] = -wjr * x1r + wji * x1i; + a[j1][0] = wjr * x1i + wji * x1r; + } + w1r = 2 * c[kk1]; + kk2 = ks2; + for (k2 = 2; k2 <= n2 - 2; k2 += 2) { + wkr = 2 * c[kk2]; + wki = 2 * c[nc - kk2]; + wjr = w1r * wkr; + wji = w1r * wki; + kk2 += ks2; + x0i = wkr * a[0][k2 + 1] + wki * a[0][k2]; + a[0][k2 + 1] = wkr * a[0][k2] - wki * a[0][k2 + 1]; + a[0][k2] = x0i; + x0i = wjr * a[n1h][k2 + 1] + wji * a[n1h][k2]; + a[n1h][k2 + 1] = wjr * a[n1h][k2] - wji * a[n1h][k2 + 1]; + a[n1h][k2] = x0i; + } + a[0][1] *= w1r; + a[n1h][0] *= w1r; + a[n1h][1] *= 0.5; +} + + +void dstfsub(int n1, int n2, double **a, int nc, double *c) +{ + int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; + double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; + + ks1 = nc / n1; + ks2 = nc / n2; + n1h = n1 >> 1; + kk1 = ks1; + for (k1 = 1; k1 <= n1h - 1; k1++) { + j1 = n1 - k1; + w1r = 2 * c[kk1]; + w1i = 2 * c[nc - kk1]; + kk1 += ks1; + kk2 = ks2; + for (k2 = 2; k2 <= n2 - 2; k2 += 2) { + x0r = w1r * c[kk2]; + x0i = w1i * c[kk2]; + x1r = w1r * c[nc - kk2]; + x1i = w1i * c[nc - kk2]; + wkr = x0r - x1i; + wki = x0i + x1r; + wji = x0r + x1i; + wjr = x0i - x1r; + kk2 += ks2; + x0r = a[j1][k2 + 1] - a[k1][k2]; + x0i = a[k1][k2 + 1] + a[j1][k2]; + x1r = a[k1][k2 + 1] - a[j1][k2]; + x1i = a[j1][k2 + 1] + a[k1][k2]; + a[k1][k2] = wkr * x0r + wki * x0i; + a[k1][k2 + 1] = wkr * x0i - wki * x0r; + a[j1][k2] = wjr * x1r + wji * x1i; + a[j1][k2 + 1] = wjr * x1i - wji * x1r; + } + x0r = 2 * c[kk2]; + wjr = x0r * w1r; + wji = x0r * w1i; + x0r = w1r * a[k1][1] + w1i * a[k1][0]; + x0i = w1r * a[k1][0] - w1i * a[k1][1]; + x1r = -wjr * a[j1][1] + wji * a[j1][0]; + x1i = wjr * a[j1][0] + wji * a[j1][1]; + a[k1][0] = x0r + x1r; + a[k1][1] = x1i + x0i; + a[j1][0] = x0r - x1r; + a[j1][1] = x1i - x0i; + } + w1r = 2 * c[kk1]; + kk2 = ks2; + for (k2 = 2; k2 <= n2 - 2; k2 += 2) { + wkr = 2 * c[kk2]; + wki = 2 * c[nc - kk2]; + wjr = w1r * wkr; + wji = w1r * wki; + kk2 += ks2; + x0i = wkr * a[0][k2] - wki * a[0][k2 + 1]; + a[0][k2] = wkr * a[0][k2 + 1] + wki * a[0][k2]; + a[0][k2 + 1] = x0i; + x0i = wjr * a[n1h][k2] - wji * a[n1h][k2 + 1]; + a[n1h][k2] = wjr * a[n1h][k2 + 1] + wji * a[n1h][k2]; + a[n1h][k2 + 1] = x0i; + } + w1r *= 2; + a[0][0] *= 2; + a[0][1] *= w1r; + a[n1h][0] *= w1r; +} + diff --git a/src/fft2d/fft2d/fft4f2d.f b/src/fft2d/fft2d/fft4f2d.f new file mode 100644 index 0000000..af529e3 --- /dev/null +++ b/src/fft2d/fft2d/fft4f2d.f @@ -0,0 +1,1591 @@ +! Fast Fourier/Cosine/Sine Transform +! dimension :two +! data length :power of 2 +! decimation :frequency +! radix :4, 2, row-column +! data :inplace +! table :use +! subroutines +! cdft2d: Complex Discrete Fourier Transform +! rdft2d: Real Discrete Fourier Transform +! ddct2d: Discrete Cosine Transform +! ddst2d: Discrete Sine Transform +! +! +! -------- Complex DFT (Discrete Fourier Transform) -------- +! [definition] +! <case1> +! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * +! exp(2*pi*i*j1*k1/n1) * +! exp(2*pi*i*j2*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! <case2> +! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * +! exp(-2*pi*i*j1*k1/n1) * +! exp(-2*pi*i*j2*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! (notes: sum_j=0^n-1 is a summation from j=0 to n-1) +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call cdft2d(n1max, 2*n1, n2, 1, a, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call cdft2d(n1max, 2*n1, n2, -1, a, ip, w) +! [parameters] +! n1max :row size of the 2D array (integer) +! 2*n1 :data length (integer) +! n1 >= 1, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 1, n2 = power of 2 +! a(0:2*n1-1,0:n2-1) +! :input/output data (real*8) +! input data +! a(2*j1,j2) = Re(x(j1,j2)), +! a(2*j1+1,j2) = Im(x(j1,j2)), +! 0<=j1<n1, 0<=j2<n2 +! output data +! a(2*k1,k2) = Re(X(k1,k2)), +! a(2*k1+1,k2) = Im(X(k1,k2)), +! 0<=k1<n1, 0<=k2<n2 +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1, n2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1/2, n2/2) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call cdft2d(n1max, 2*n1, n2, -1, a, ip, w) +! is +! call cdft2d(n1max, 2*n1, n2, 1, a, ip, w) +! do j2 = 0, n2 - 1 +! do j1 = 0, 2 * n1 - 1 +! a(j1, j2) = a(j1, j2) * (1.0d0 / (n1 * n2)) +! end do +! end do +! . +! +! +! -------- Real DFT / Inverse of Real DFT -------- +! [definition] +! <case1> RDFT +! R(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! I(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! <case2> IRDFT (excluding scale) +! a(k1,k2) = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 +! (R(j1,j2) * +! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + +! I(j1,j2) * +! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), +! 0<=k1<n1, 0<=k2<n2 +! (notes: R(n1-k1,n2-k2) = R(k1,k2), +! I(n1-k1,n2-k2) = -I(k1,k2), +! R(n1-k1,0) = R(k1,0), +! I(n1-k1,0) = -I(k1,0), +! R(0,n2-k2) = R(0,k2), +! I(0,n2-k2) = -I(0,k2), +! 0<k1<n1, 0<k2<n2) +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call rdft2d(n1max, n1, n2, 1, a, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call rdft2d(n1max, n1, n2, -1, a, ip, w) +! [parameters] +! n1max :row size of the 2D array (integer) +! n1 :data length (integer) +! n1 >= 2, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 2, n2 = power of 2 +! a(0:n1-1,0:n2-1) +! :input/output data (real*8) +! <case1> +! output data +! a(2*k1,k2) = R(k1,k2) = R(n1-k1,n2-k2), +! a(2*k1+1,k2) = I(k1,k2) = -I(n1-k1,n2-k2), +! 0<k1<n1/2, 0<k2<n2, +! a(2*k1,0) = R(k1,0) = R(n1-k1,0), +! a(2*k1+1,0) = I(k1,0) = -I(n1-k1,0), +! 0<k1<n1/2, +! a(0,k2) = R(0,k2) = R(0,n2-k2), +! a(1,k2) = I(0,k2) = -I(0,n2-k2), +! a(1,n2-k2) = R(n1/2,k2) = R(n1/2,n2-k2), +! a(0,n2-k2) = -I(n1/2,k2) = I(n1/2,n2-k2), +! 0<k2<n2/2, +! a(0,0) = R(0,0), +! a(1,0) = R(n1/2,0), +! a(0,n2/2) = R(0,n2/2), +! a(1,n2/2) = R(n1/2,n2/2) +! <case2> +! input data +! a(2*j1,j2) = R(j1,j2) = R(n1-j1,n2-j2), +! a(2*j1+1,j2) = I(j1,j2) = -I(n1-j1,n2-j2), +! 0<j1<n1/2, 0<j2<n2, +! a(2*j1,0) = R(j1,0) = R(n1-j1,0), +! a(2*j1+1,0) = I(j1,0) = -I(n1-j1,0), +! 0<j1<n1/2, +! a(0,j2) = R(0,j2) = R(0,n2-j2), +! a(1,j2) = I(0,j2) = -I(0,n2-j2), +! a(1,n2-j2) = R(n1/2,j2) = R(n1/2,n2-j2), +! a(0,n2-j2) = -I(n1/2,j2) = I(n1/2,n2-j2), +! 0<j2<n2/2, +! a(0,0) = R(0,0), +! a(1,0) = R(n1/2,0), +! a(0,n2/2) = R(0,n2/2), +! a(1,n2/2) = R(n1/2,n2/2) +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1/2, n2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1/4, n2/2) + n1/4 +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call rdft2d(n1max, n1, n2, 1, a, ip, w) +! is +! call rdft2d(n1max, n1, n2, -1, a, ip, w) +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1, j2) = a(j1, j2) * (2.0d0 / (n1 * n2)) +! end do +! end do +! . +! +! +! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- +! [definition] +! <case1> IDCT (excluding scale) +! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! cos(pi*j1*(k1+1/2)/n1) * +! cos(pi*j2*(k2+1/2)/n2), +! 0<=k1<n1, 0<=k2<n2 +! <case2> DCT +! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! cos(pi*(j1+1/2)*k1/n1) * +! cos(pi*(j2+1/2)*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) +! [parameters] +! n1max :row size of the 2D array (integer) +! n1 :data length (integer) +! n1 >= 2, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 2, n2 = power of 2 +! a(0:n1-1,0:n2-1) +! :input/output data (real*8) +! output data +! a(k1,k2) = C(k1,k2), 0<=k1<n1, 0<=k2<n2 +! t(0:n1-1,0:n2-1) +! :work area (real*8) +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1/2, n2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1/4, n2/2) + max(n1, n2) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) +! is +! do j1 = 0, n1 - 1 +! a(j1, 0) = a(j1, 0) * 0.5d0 +! end do +! do j2 = 0, n2 - 1 +! a(0, j2) = a(0, j2) * 0.5d0 +! end do +! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1, j2) = a(j1, j2) * (4.0d0 / (n1 * n2)) +! end do +! end do +! . +! +! +! -------- DST (Discrete Sine Transform) / Inverse of DST -------- +! [definition] +! <case1> IDST (excluding scale) +! S(k1,k2) = sum_j1=1^n1 sum_j2=1^n2 A(j1,j2) * +! sin(pi*j1*(k1+1/2)/n1) * +! sin(pi*j2*(k2+1/2)/n2), +! 0<=k1<n1, 0<=k2<n2 +! <case2> DST +! S(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! sin(pi*(j1+1/2)*k1/n1) * +! sin(pi*(j2+1/2)*k2/n2), +! 0<k1<=n1, 0<k2<=n2 +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) +! [parameters] +! n1max :row size of the 2D array (integer) +! n1 :data length (integer) +! n1 >= 2, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 2, n2 = power of 2 +! a(0:n1-1,0:n2-1) +! :input/output data (real*8) +! <case1> +! input data +! a(j1,j2) = A(j1,j2), 0<j1<n1, 0<j2<n2, +! a(j1,0) = A(j1,n2), 0<j1<n1, +! a(0,j2) = A(n1,j2), 0<j2<n2, +! a(0,0) = A(n1,n2) +! (i.e. A(j1,j2) = a(mod(j1,n1),mod(j2,n2))) +! output data +! a(k1,k2) = S(k1,k2), 0<=k1<n1, 0<=k2<n2 +! <case2> +! output data +! a(k1,k2) = S(k1,k2), 0<k1<n1, 0<k2<n2, +! a(k1,0) = S(k1,n2), 0<k1<n1, +! a(0,k2) = S(n1,k2), 0<k2<n2, +! a(0,0) = S(n1,n2) +! (i.e. S(k1,k2) = a(mod(k1,n1),mod(k2,n2))) +! t(0:n1-1,0:n2-1) +! :work area (real*8) +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1/2, n2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1/4, n2/2) + max(n1, n2) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) +! is +! do j1 = 0, n1 - 1 +! a(j1, 0) = a(j1, 0) * 0.5d0 +! end do +! do j2 = 0, n2 - 1 +! a(0, j2) = a(0, j2) * 0.5d0 +! end do +! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1, j2) = a(j1, j2) * (4.0d0 / (n1 * n2)) +! end do +! end do +! . +! +! + subroutine cdft2d(n1max, n1, n2, isgn, a, ip, w) + integer n1max, n1, n2, isgn, ip(0 : *), n + real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *) + n = max(n1, 2 * n2) + if (n .gt. 4 * ip(0)) then + call makewt(n / 4, ip, w) + end if + if (n1 .gt. 4) then + call bitrv2row(n1max, n1, n2, ip(2), a) + end if + if (n2 .gt. 2) then + call bitrv2col(n1max, n1, n2, ip(2), a) + end if + if (isgn .lt. 0) then + call cftfrow(n1max, n1, n2, a, w) + call cftfcol(n1max, n1, n2, a, w) + else + call cftbrow(n1max, n1, n2, a, w) + call cftbcol(n1max, n1, n2, a, w) + end if + end +! + subroutine rdft2d(n1max, n1, n2, isgn, a, ip, w) + integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, n2h, i, j + real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *), xi + n = max(n1, 2 * n2) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n1 .gt. 4 * nc) then + nc = n1 / 4 + call makect(nc, ip, w(nw)) + end if + n2h = n2 / 2 + if (isgn .lt. 0) then + do i = 1, n2h - 1 + j = n2 - i + xi = a(0, i) - a(0, j) + a(0, i) = a(0, i) + a(0, j) + a(0, j) = xi + xi = a(1, j) - a(1, i) + a(1, i) = a(1, i) + a(1, j) + a(1, j) = xi + end do + if (n2 .gt. 2) then + call bitrv2col(n1max, n1, n2, ip(2), a) + end if + call cftfcol(n1max, n1, n2, a, w) + do i = 0, n2 - 1 + a(1, i) = 0.5d0 * (a(0, i) - a(1, i)) + a(0, i) = a(0, i) - a(1, i) + end do + if (n1 .gt. 4) then + call rftfrow(n1max, n1, n2, a, nc, w(nw)) + call bitrv2row(n1max, n1, n2, ip(2), a) + end if + call cftfrow(n1max, n1, n2, a, w) + else + if (n1 .gt. 4) then + call bitrv2row(n1max, n1, n2, ip(2), a) + end if + call cftbrow(n1max, n1, n2, a, w) + if (n1 .gt. 4) then + call rftbrow(n1max, n1, n2, a, nc, w(nw)) + end if + do i = 0, n2 - 1 + xi = a(0, i) - a(1, i) + a(0, i) = a(0, i) + a(1, i) + a(1, i) = xi + end do + if (n2 .gt. 2) then + call bitrv2col(n1max, n1, n2, ip(2), a) + end if + call cftbcol(n1max, n1, n2, a, w) + do i = 1, n2h - 1 + j = n2 - i + a(0, j) = 0.5d0 * (a(0, i) - a(0, j)) + a(0, i) = a(0, i) - a(0, j) + a(1, j) = 0.5d0 * (a(1, i) + a(1, j)) + a(1, i) = a(1, i) - a(1, j) + end do + end if + end +! + subroutine ddct2d(n1max, n1, n2, isgn, a, t, ip, w) + integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, n1h, n2h, + & i, ix, ic, j, jx, jc + real*8 a(0 : n1max - 1, 0 : n2 - 1), + & t(0 : n1max - 1, 0 : n2 - 1), w(0 : *), xi + n = max(n1, 2 * n2) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n1 .gt. nc .or. n2 .gt. nc) then + nc = max(n1, n2) + call makect(nc, ip, w(nw)) + end if + n1h = n1 / 2 + n2h = n2 / 2 + if (isgn .ge. 0) then + do i = 0, n2 - 1 + do j = 1, n1h - 1 + jx = 2 * j + t(jx, i) = a(j, i) + t(jx + 1, i) = a(n1 - j, i) + end do + end do + t(0, 0) = a(0, 0) + t(1, 0) = a(n1h, 0) + t(0, n2h) = a(0, n2h) + t(1, n2h) = a(n1h, n2h) + do i = 1, n2h - 1 + ic = n2 - i + t(0, i) = a(0, i) + t(1, ic) = a(n1h, i) + t(1, i) = a(0, ic) + t(0, ic) = a(n1h, ic) + end do + call dctfsub(n1max, n1, n2, t, nc, w(nw)) + if (n2 .gt. 2) then + call bitrv2col(n1max, n1, n2, ip(2), t) + end if + call cftfcol(n1max, n1, n2, t, w) + do i = 0, n2 - 1 + t(1, i) = 0.5d0 * (t(0, i) - t(1, i)) + t(0, i) = t(0, i) - t(1, i) + end do + if (n1 .gt. 4) then + call rftfrow(n1max, n1, n2, t, nc, w(nw)) + call bitrv2row(n1max, n1, n2, ip(2), t) + end if + call cftfrow(n1max, n1, n2, t, w) + do i = 0, n2h - 1 + ix = 2 * i + ic = n2 - 1 - i + do j = 0, n1h - 1 + jx = 2 * j + jc = n1 - 1 - j + a(jx, ix) = t(j, i) + a(jx + 1, ix) = t(jc, i) + a(jx, ix + 1) = t(j, ic) + a(jx + 1, ix + 1) = t(jc, ic) + end do + end do + else + do i = 0, n2h - 1 + ix = 2 * i + ic = n2 - 1 - i + do j = 0, n1h - 1 + jx = 2 * j + jc = n1 - 1 - j + t(j, i) = a(jx, ix) + t(jc, i) = a(jx + 1, ix) + t(j, ic) = a(jx, ix + 1) + t(jc, ic) = a(jx + 1, ix + 1) + end do + end do + if (n1 .gt. 4) then + call bitrv2row(n1max, n1, n2, ip(2), t) + end if + call cftbrow(n1max, n1, n2, t, w) + if (n1 .gt. 4) then + call rftbrow(n1max, n1, n2, t, nc, w(nw)) + end if + do i = 0, n2 - 1 + xi = t(0, i) - t(1, i) + t(0, i) = t(0, i) + t(1, i) + t(1, i) = xi + end do + if (n2 .gt. 2) then + call bitrv2col(n1max, n1, n2, ip(2), t) + end if + call cftbcol(n1max, n1, n2, t, w) + call dctbsub(n1max, n1, n2, t, nc, w(nw)) + do i = 0, n2 - 1 + do j = 1, n1h - 1 + jx = 2 * j + a(j, i) = t(jx, i) + a(n1 - j, i) = t(jx + 1, i) + end do + end do + a(0, 0) = t(0, 0) + a(n1h, 0) = t(1, 0) + a(0, n2h) = t(0, n2h) + a(n1h, n2h) = t(1, n2h) + do i = 1, n2h - 1 + ic = n2 - i + a(0, i) = t(0, i) + a(n1h, i) = t(1, ic) + a(0, ic) = t(1, i) + a(n1h, ic) = t(0, ic) + end do + end if + end +! + subroutine ddst2d(n1max, n1, n2, isgn, a, t, ip, w) + integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, n1h, n2h, + & i, ix, ic, j, jx, jc + real*8 a(0 : n1max - 1, 0 : n2 - 1), + & t(0 : n1max - 1, 0 : n2 - 1), w(0 : *), xi + n = max(n1, 2 * n2) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n1 .gt. nc .or. n2 .gt. nc) then + nc = max(n1, n2) + call makect(nc, ip, w(nw)) + end if + n1h = n1 / 2 + n2h = n2 / 2 + if (isgn .ge. 0) then + do i = 0, n2 - 1 + do j = 1, n1h - 1 + jx = 2 * j + t(jx, i) = a(j, i) + t(jx + 1, i) = a(n1 - j, i) + end do + end do + t(0, 0) = a(0, 0) + t(1, 0) = a(n1h, 0) + t(0, n2h) = a(0, n2h) + t(1, n2h) = a(n1h, n2h) + do i = 1, n2h - 1 + ic = n2 - i + t(0, i) = a(0, i) + t(1, ic) = a(n1h, i) + t(1, i) = a(0, ic) + t(0, ic) = a(n1h, ic) + end do + call dstfsub(n1max, n1, n2, t, nc, w(nw)) + if (n2 .gt. 2) then + call bitrv2col(n1max, n1, n2, ip(2), t) + end if + call cftfcol(n1max, n1, n2, t, w) + do i = 0, n2 - 1 + t(1, i) = 0.5d0 * (t(0, i) - t(1, i)) + t(0, i) = t(0, i) - t(1, i) + end do + if (n1 .gt. 4) then + call rftfrow(n1max, n1, n2, t, nc, w(nw)) + call bitrv2row(n1max, n1, n2, ip(2), t) + end if + call cftfrow(n1max, n1, n2, t, w) + do i = 0, n2h - 1 + ix = 2 * i + ic = n2 - 1 - i + do j = 0, n1h - 1 + jx = 2 * j + jc = n1 - 1 - j + a(jx, ix) = t(j, i) + a(jx + 1, ix) = -t(jc, i) + a(jx, ix + 1) = -t(j, ic) + a(jx + 1, ix + 1) = t(jc, ic) + end do + end do + else + do i = 0, n2h - 1 + ix = 2 * i + ic = n2 - 1 - i + do j = 0, n1h - 1 + jx = 2 * j + jc = n1 - 1 - j + t(j, i) = a(jx, ix) + t(jc, i) = -a(jx + 1, ix) + t(j, ic) = -a(jx, ix + 1) + t(jc, ic) = a(jx + 1, ix + 1) + end do + end do + if (n1 .gt. 4) then + call bitrv2row(n1max, n1, n2, ip(2), t) + end if + call cftbrow(n1max, n1, n2, t, w) + if (n1 .gt. 4) then + call rftbrow(n1max, n1, n2, t, nc, w(nw)) + end if + do i = 0, n2 - 1 + xi = t(0, i) - t(1, i) + t(0, i) = t(0, i) + t(1, i) + t(1, i) = xi + end do + if (n2 .gt. 2) then + call bitrv2col(n1max, n1, n2, ip(2), t) + end if + call cftbcol(n1max, n1, n2, t, w) + call dstbsub(n1max, n1, n2, t, nc, w(nw)) + do i = 0, n2 - 1 + do j = 1, n1h - 1 + jx = 2 * j + a(j, i) = t(jx, i) + a(n1 - j, i) = t(jx + 1, i) + end do + end do + a(0, 0) = t(0, 0) + a(n1h, 0) = t(1, 0) + a(0, n2h) = t(0, n2h) + a(n1h, n2h) = t(1, n2h) + do i = 1, n2h - 1 + ic = n2 - i + a(0, i) = t(0, i) + a(n1h, i) = t(1, ic) + a(0, ic) = t(1, i) + a(n1h, ic) = t(0, ic) + end do + end if + end +! +! -------- initializing routines -------- +! + subroutine makewt(nw, ip, w) + integer nw, ip(0 : *), nwh, j + real*8 w(0 : nw - 1), delta, x, y + ip(0) = nw + ip(1) = 1 + if (nw .gt. 2) then + nwh = nw / 2 + delta = atan(1.0d0) / nwh + w(0) = 1 + w(1) = 0 + w(nwh) = cos(delta * nwh) + w(nwh + 1) = w(nwh) + do j = 2, nwh - 2, 2 + x = cos(delta * j) + y = sin(delta * j) + w(j) = x + w(j + 1) = y + w(nw - j) = y + w(nw - j + 1) = x + end do + call bitrv2(nw, ip(2), w) + end if + end +! + subroutine makect(nc, ip, c) + integer nc, ip(0 : *), nch, j + real*8 c(0 : nc - 1), delta + ip(1) = nc + if (nc .gt. 1) then + nch = nc / 2 + delta = atan(1.0d0) / nch + c(0) = 0.5d0 + c(nch) = 0.5d0 * cos(delta * nch) + do j = 1, nch - 1 + c(j) = 0.5d0 * cos(delta * j) + c(nc - j) = 0.5d0 * sin(delta * j) + end do + end if + end +! +! -------- child routines -------- +! + subroutine bitrv2(n, ip, a) + integer n, ip(0 : *), j, j1, k, k1, l, m, m2 + real*8 a(0 : n - 1), xr, xi + ip(0) = 0 + l = n + m = 1 + do while (4 * m .lt. l) + l = l / 2 + do j = 0, m - 1 + ip(m + j) = ip(j) + l + end do + m = m * 2 + end do + if (4 * m .gt. l) then + do k = 1, m - 1 + do j = 0, k - 1 + j1 = 2 * j + ip(k) + k1 = 2 * k + ip(j) + xr = a(j1) + xi = a(j1 + 1) + a(j1) = a(k1) + a(j1 + 1) = a(k1 + 1) + a(k1) = xr + a(k1 + 1) = xi + end do + end do + else + m2 = 2 * m + do k = 1, m - 1 + do j = 0, k - 1 + j1 = 2 * j + ip(k) + k1 = 2 * k + ip(j) + xr = a(j1) + xi = a(j1 + 1) + a(j1) = a(k1) + a(j1 + 1) = a(k1 + 1) + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + m2 + k1 = k1 + m2 + xr = a(j1) + xi = a(j1 + 1) + a(j1) = a(k1) + a(j1 + 1) = a(k1 + 1) + a(k1) = xr + a(k1 + 1) = xi + end do + end do + end if + end +! + subroutine bitrv2row(n1max, n, n2, ip, a) + integer n1max, n, n2, ip(0 : *), i, j, j1, k, k1, l, m, m2 + real*8 a(0 : n1max - 1, 0 : n2 - 1), xr, xi + ip(0) = 0 + l = n + m = 1 + do while (4 * m .lt. l) + l = l / 2 + do j = 0, m - 1 + ip(m + j) = ip(j) + l + end do + m = m * 2 + end do + if (4 * m .gt. l) then + do i = 0, n2 - 1 + do k = 1, m - 1 + do j = 0, k - 1 + j1 = 2 * j + ip(k) + k1 = 2 * k + ip(j) + xr = a(j1, i) + xi = a(j1 + 1, i) + a(j1, i) = a(k1, i) + a(j1 + 1, i) = a(k1 + 1, i) + a(k1, i) = xr + a(k1 + 1, i) = xi + end do + end do + end do + else + m2 = 2 * m + do i = 0, n2 - 1 + do k = 1, m - 1 + do j = 0, k - 1 + j1 = 2 * j + ip(k) + k1 = 2 * k + ip(j) + xr = a(j1, i) + xi = a(j1 + 1, i) + a(j1, i) = a(k1, i) + a(j1 + 1, i) = a(k1 + 1, i) + a(k1, i) = xr + a(k1 + 1, i) = xi + j1 = j1 + m2 + k1 = k1 + m2 + xr = a(j1, i) + xi = a(j1 + 1, i) + a(j1, i) = a(k1, i) + a(j1 + 1, i) = a(k1 + 1, i) + a(k1, i) = xr + a(k1 + 1, i) = xi + end do + end do + end do + end if + end +! + subroutine bitrv2col(n1max, n1, n, ip, a) + integer n1max, n1, n, ip(0 : *), i, j, j1, k, k1, l, m + real*8 a(0 : n1max - 1, 0 : n - 1), xr, xi + ip(0) = 0 + l = n + m = 1 + do while (2 * m .lt. l) + l = l / 2 + do j = 0, m - 1 + ip(m + j) = ip(j) + l + end do + m = m * 2 + end do + if (2 * m .gt. l) then + do k = 1, m - 1 + do j = 0, k - 1 + j1 = j + ip(k) + k1 = k + ip(j) + do i = 0, n1 - 2, 2 + xr = a(i, j1) + xi = a(i + 1, j1) + a(i, j1) = a(i, k1) + a(i + 1, j1) = a(i + 1, k1) + a(i, k1) = xr + a(i + 1, k1) = xi + end do + end do + end do + else + do k = 1, m - 1 + do j = 0, k - 1 + j1 = j + ip(k) + k1 = k + ip(j) + do i = 0, n1 - 2, 2 + xr = a(i, j1) + xi = a(i + 1, j1) + a(i, j1) = a(i, k1) + a(i + 1, j1) = a(i + 1, k1) + a(i, k1) = xr + a(i + 1, k1) = xi + end do + j1 = j1 + m + k1 = k1 + m + do i = 0, n1 - 2, 2 + xr = a(i, j1) + xi = a(i + 1, j1) + a(i, j1) = a(i, k1) + a(i + 1, j1) = a(i + 1, k1) + a(i, k1) = xr + a(i + 1, k1) = xi + end do + end do + end do + end if + end +! + subroutine cftbrow(n1max, n, n2, a, w) + integer n1max, n, n2, i, j, j1, j2, j3, k, k1, ks, l, m + real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *) + real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i + real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + do i = 0, n2 - 1 + l = 2 + do while (2 * l .lt. n) + m = 4 * l + do j = 0, l - 2, 2 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + x0r = a(j, i) + a(j1, i) + x0i = a(j + 1, i) + a(j1 + 1, i) + x1r = a(j, i) - a(j1, i) + x1i = a(j + 1, i) - a(j1 + 1, i) + x2r = a(j2, i) + a(j3, i) + x2i = a(j2 + 1, i) + a(j3 + 1, i) + x3r = a(j2, i) - a(j3, i) + x3i = a(j2 + 1, i) - a(j3 + 1, i) + a(j, i) = x0r + x2r + a(j + 1, i) = x0i + x2i + a(j2, i) = x0r - x2r + a(j2 + 1, i) = x0i - x2i + a(j1, i) = x1r - x3i + a(j1 + 1, i) = x1i + x3r + a(j3, i) = x1r + x3i + a(j3 + 1, i) = x1i - x3r + end do + if (m .lt. n) then + wk1r = w(2) + do j = m, l + m - 2, 2 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + x0r = a(j, i) + a(j1, i) + x0i = a(j + 1, i) + a(j1 + 1, i) + x1r = a(j, i) - a(j1, i) + x1i = a(j + 1, i) - a(j1 + 1, i) + x2r = a(j2, i) + a(j3, i) + x2i = a(j2 + 1, i) + a(j3 + 1, i) + x3r = a(j2, i) - a(j3, i) + x3i = a(j2 + 1, i) - a(j3 + 1, i) + a(j, i) = x0r + x2r + a(j + 1, i) = x0i + x2i + a(j2, i) = x2i - x0i + a(j2 + 1, i) = x0r - x2r + x0r = x1r - x3i + x0i = x1i + x3r + a(j1, i) = wk1r * (x0r - x0i) + a(j1 + 1, i) = wk1r * (x0r + x0i) + x0r = x3i + x1r + x0i = x3r - x1i + a(j3, i) = wk1r * (x0i - x0r) + a(j3 + 1, i) = wk1r * (x0i + x0r) + end do + k1 = 1 + ks = -1 + do k = 2 * m, n - m, m + k1 = k1 + 1 + ks = -ks + wk1r = w(2 * k1) + wk1i = w(2 * k1 + 1) + wk2r = ks * w(k1) + wk2i = w(k1 + ks) + wk3r = wk1r - 2 * wk2i * wk1i + wk3i = 2 * wk2i * wk1r - wk1i + do j = k, l + k - 2, 2 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + x0r = a(j, i) + a(j1, i) + x0i = a(j + 1, i) + a(j1 + 1, i) + x1r = a(j, i) - a(j1, i) + x1i = a(j + 1, i) - a(j1 + 1, i) + x2r = a(j2, i) + a(j3, i) + x2i = a(j2 + 1, i) + a(j3 + 1, i) + x3r = a(j2, i) - a(j3, i) + x3i = a(j2 + 1, i) - a(j3 + 1, i) + a(j, i) = x0r + x2r + a(j + 1, i) = x0i + x2i + x0r = x0r - x2r + x0i = x0i - x2i + a(j2, i) = wk2r * x0r - wk2i * x0i + a(j2 + 1, i) = wk2r * x0i + wk2i * x0r + x0r = x1r - x3i + x0i = x1i + x3r + a(j1, i) = wk1r * x0r - wk1i * x0i + a(j1 + 1, i) = wk1r * x0i + wk1i * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(j3, i) = wk3r * x0r - wk3i * x0i + a(j3 + 1, i) = wk3r * x0i + wk3i * x0r + end do + end do + end if + l = m + end do + if (l .lt. n) then + do j = 0, l - 2, 2 + j1 = j + l + x0r = a(j, i) - a(j1, i) + x0i = a(j + 1, i) - a(j1 + 1, i) + a(j, i) = a(j, i) + a(j1, i) + a(j + 1, i) = a(j + 1, i) + a(j1 + 1, i) + a(j1, i) = x0r + a(j1 + 1, i) = x0i + end do + end if + end do + end +! + subroutine cftbcol(n1max, n1, n, a, w) + integer n1max, n1, n, i, j, j1, j2, j3, k, k1, ks, l, m + real*8 a(0 : n1max - 1, 0 : n - 1), w(0 : *) + real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i + real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + l = 1 + do while (2 * l .lt. n) + m = 4 * l + do j = 0, l - 1 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + do i = 0, n1 - 2, 2 + x0r = a(i, j) + a(i, j1) + x0i = a(i + 1, j) + a(i + 1, j1) + x1r = a(i, j) - a(i, j1) + x1i = a(i + 1, j) - a(i + 1, j1) + x2r = a(i, j2) + a(i, j3) + x2i = a(i + 1, j2) + a(i + 1, j3) + x3r = a(i, j2) - a(i, j3) + x3i = a(i + 1, j2) - a(i + 1, j3) + a(i, j) = x0r + x2r + a(i + 1, j) = x0i + x2i + a(i, j2) = x0r - x2r + a(i + 1, j2) = x0i - x2i + a(i, j1) = x1r - x3i + a(i + 1, j1) = x1i + x3r + a(i, j3) = x1r + x3i + a(i + 1, j3) = x1i - x3r + end do + end do + if (m .lt. n) then + wk1r = w(2) + do j = m, l + m - 1 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + do i = 0, n1 - 2, 2 + x0r = a(i, j) + a(i, j1) + x0i = a(i + 1, j) + a(i + 1, j1) + x1r = a(i, j) - a(i, j1) + x1i = a(i + 1, j) - a(i + 1, j1) + x2r = a(i, j2) + a(i, j3) + x2i = a(i + 1, j2) + a(i + 1, j3) + x3r = a(i, j2) - a(i, j3) + x3i = a(i + 1, j2) - a(i + 1, j3) + a(i, j) = x0r + x2r + a(i + 1, j) = x0i + x2i + a(i, j2) = x2i - x0i + a(i + 1, j2) = x0r - x2r + x0r = x1r - x3i + x0i = x1i + x3r + a(i, j1) = wk1r * (x0r - x0i) + a(i + 1, j1) = wk1r * (x0r + x0i) + x0r = x3i + x1r + x0i = x3r - x1i + a(i, j3) = wk1r * (x0i - x0r) + a(i + 1, j3) = wk1r * (x0i + x0r) + end do + end do + k1 = 1 + ks = -1 + do k = 2 * m, n - m, m + k1 = k1 + 1 + ks = -ks + wk1r = w(2 * k1) + wk1i = w(2 * k1 + 1) + wk2r = ks * w(k1) + wk2i = w(k1 + ks) + wk3r = wk1r - 2 * wk2i * wk1i + wk3i = 2 * wk2i * wk1r - wk1i + do j = k, l + k - 1 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + do i = 0, n1 - 2, 2 + x0r = a(i, j) + a(i, j1) + x0i = a(i + 1, j) + a(i + 1, j1) + x1r = a(i, j) - a(i, j1) + x1i = a(i + 1, j) - a(i + 1, j1) + x2r = a(i, j2) + a(i, j3) + x2i = a(i + 1, j2) + a(i + 1, j3) + x3r = a(i, j2) - a(i, j3) + x3i = a(i + 1, j2) - a(i + 1, j3) + a(i, j) = x0r + x2r + a(i + 1, j) = x0i + x2i + x0r = x0r - x2r + x0i = x0i - x2i + a(i, j2) = wk2r * x0r - wk2i * x0i + a(i + 1, j2) = wk2r * x0i + wk2i * x0r + x0r = x1r - x3i + x0i = x1i + x3r + a(i, j1) = wk1r * x0r - wk1i * x0i + a(i + 1, j1) = wk1r * x0i + wk1i * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(i, j3) = wk3r * x0r - wk3i * x0i + a(i + 1, j3) = wk3r * x0i + wk3i * x0r + end do + end do + end do + end if + l = m + end do + if (l .lt. n) then + do j = 0, l - 1 + j1 = j + l + do i = 0, n1 - 2, 2 + x0r = a(i, j) - a(i, j1) + x0i = a(i + 1, j) - a(i + 1, j1) + a(i, j) = a(i, j) + a(i, j1) + a(i + 1, j) = a(i + 1, j) + a(i + 1, j1) + a(i, j1) = x0r + a(i + 1, j1) = x0i + end do + end do + end if + end +! + subroutine cftfrow(n1max, n, n2, a, w) + integer n1max, n, n2, i, j, j1, j2, j3, k, k1, ks, l, m + real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *) + real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i + real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + do i = 0, n2 - 1 + l = 2 + do while (2 * l .lt. n) + m = 4 * l + do j = 0, l - 2, 2 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + x0r = a(j, i) + a(j1, i) + x0i = a(j + 1, i) + a(j1 + 1, i) + x1r = a(j, i) - a(j1, i) + x1i = a(j + 1, i) - a(j1 + 1, i) + x2r = a(j2, i) + a(j3, i) + x2i = a(j2 + 1, i) + a(j3 + 1, i) + x3r = a(j2, i) - a(j3, i) + x3i = a(j2 + 1, i) - a(j3 + 1, i) + a(j, i) = x0r + x2r + a(j + 1, i) = x0i + x2i + a(j2, i) = x0r - x2r + a(j2 + 1, i) = x0i - x2i + a(j1, i) = x1r + x3i + a(j1 + 1, i) = x1i - x3r + a(j3, i) = x1r - x3i + a(j3 + 1, i) = x1i + x3r + end do + if (m .lt. n) then + wk1r = w(2) + do j = m, l + m - 2, 2 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + x0r = a(j, i) + a(j1, i) + x0i = a(j + 1, i) + a(j1 + 1, i) + x1r = a(j, i) - a(j1, i) + x1i = a(j + 1, i) - a(j1 + 1, i) + x2r = a(j2, i) + a(j3, i) + x2i = a(j2 + 1, i) + a(j3 + 1, i) + x3r = a(j2, i) - a(j3, i) + x3i = a(j2 + 1, i) - a(j3 + 1, i) + a(j, i) = x0r + x2r + a(j + 1, i) = x0i + x2i + a(j2, i) = x0i - x2i + a(j2 + 1, i) = x2r - x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(j1, i) = wk1r * (x0i + x0r) + a(j1 + 1, i) = wk1r * (x0i - x0r) + x0r = x3i - x1r + x0i = x3r + x1i + a(j3, i) = wk1r * (x0r + x0i) + a(j3 + 1, i) = wk1r * (x0r - x0i) + end do + k1 = 1 + ks = -1 + do k = 2 * m, n - m, m + k1 = k1 + 1 + ks = -ks + wk1r = w(2 * k1) + wk1i = w(2 * k1 + 1) + wk2r = ks * w(k1) + wk2i = w(k1 + ks) + wk3r = wk1r - 2 * wk2i * wk1i + wk3i = 2 * wk2i * wk1r - wk1i + do j = k, l + k - 2, 2 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + x0r = a(j, i) + a(j1, i) + x0i = a(j + 1, i) + a(j1 + 1, i) + x1r = a(j, i) - a(j1, i) + x1i = a(j + 1, i) - a(j1 + 1, i) + x2r = a(j2, i) + a(j3, i) + x2i = a(j2 + 1, i) + a(j3 + 1, i) + x3r = a(j2, i) - a(j3, i) + x3i = a(j2 + 1, i) - a(j3 + 1, i) + a(j, i) = x0r + x2r + a(j + 1, i) = x0i + x2i + x0r = x0r - x2r + x0i = x0i - x2i + a(j2, i) = wk2r * x0r + wk2i * x0i + a(j2 + 1, i) = wk2r * x0i - wk2i * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(j1, i) = wk1r * x0r + wk1i * x0i + a(j1 + 1, i) = wk1r * x0i - wk1i * x0r + x0r = x1r - x3i + x0i = x1i + x3r + a(j3, i) = wk3r * x0r + wk3i * x0i + a(j3 + 1, i) = wk3r * x0i - wk3i * x0r + end do + end do + end if + l = m + end do + if (l .lt. n) then + do j = 0, l - 2, 2 + j1 = j + l + x0r = a(j, i) - a(j1, i) + x0i = a(j + 1, i) - a(j1 + 1, i) + a(j, i) = a(j, i) + a(j1, i) + a(j + 1, i) = a(j + 1, i) + a(j1 + 1, i) + a(j1, i) = x0r + a(j1 + 1, i) = x0i + end do + end if + end do + end +! + subroutine cftfcol(n1max, n1, n, a, w) + integer n1max, n1, n, i, j, j1, j2, j3, k, k1, ks, l, m + real*8 a(0 : n1max - 1, 0 : n - 1), w(0 : *) + real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i + real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + l = 1 + do while (2 * l .lt. n) + m = 4 * l + do j = 0, l - 1 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + do i = 0, n1 - 2, 2 + x0r = a(i, j) + a(i, j1) + x0i = a(i + 1, j) + a(i + 1, j1) + x1r = a(i, j) - a(i, j1) + x1i = a(i + 1, j) - a(i + 1, j1) + x2r = a(i, j2) + a(i, j3) + x2i = a(i + 1, j2) + a(i + 1, j3) + x3r = a(i, j2) - a(i, j3) + x3i = a(i + 1, j2) - a(i + 1, j3) + a(i, j) = x0r + x2r + a(i + 1, j) = x0i + x2i + a(i, j2) = x0r - x2r + a(i + 1, j2) = x0i - x2i + a(i, j1) = x1r + x3i + a(i + 1, j1) = x1i - x3r + a(i, j3) = x1r - x3i + a(i + 1, j3) = x1i + x3r + end do + end do + if (m .lt. n) then + wk1r = w(2) + do j = m, l + m - 1 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + do i = 0, n1 - 2, 2 + x0r = a(i, j) + a(i, j1) + x0i = a(i + 1, j) + a(i + 1, j1) + x1r = a(i, j) - a(i, j1) + x1i = a(i + 1, j) - a(i + 1, j1) + x2r = a(i, j2) + a(i, j3) + x2i = a(i + 1, j2) + a(i + 1, j3) + x3r = a(i, j2) - a(i, j3) + x3i = a(i + 1, j2) - a(i + 1, j3) + a(i, j) = x0r + x2r + a(i + 1, j) = x0i + x2i + a(i, j2) = x0i - x2i + a(i + 1, j2) = x2r - x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(i, j1) = wk1r * (x0i + x0r) + a(i + 1, j1) = wk1r * (x0i - x0r) + x0r = x3i - x1r + x0i = x3r + x1i + a(i, j3) = wk1r * (x0r + x0i) + a(i + 1, j3) = wk1r * (x0r - x0i) + end do + end do + k1 = 1 + ks = -1 + do k = 2 * m, n - m, m + k1 = k1 + 1 + ks = -ks + wk1r = w(2 * k1) + wk1i = w(2 * k1 + 1) + wk2r = ks * w(k1) + wk2i = w(k1 + ks) + wk3r = wk1r - 2 * wk2i * wk1i + wk3i = 2 * wk2i * wk1r - wk1i + do j = k, l + k - 1 + j1 = j + l + j2 = j1 + l + j3 = j2 + l + do i = 0, n1 - 2, 2 + x0r = a(i, j) + a(i, j1) + x0i = a(i + 1, j) + a(i + 1, j1) + x1r = a(i, j) - a(i, j1) + x1i = a(i + 1, j) - a(i + 1, j1) + x2r = a(i, j2) + a(i, j3) + x2i = a(i + 1, j2) + a(i + 1, j3) + x3r = a(i, j2) - a(i, j3) + x3i = a(i + 1, j2) - a(i + 1, j3) + a(i, j) = x0r + x2r + a(i + 1, j) = x0i + x2i + x0r = x0r - x2r + x0i = x0i - x2i + a(i, j2) = wk2r * x0r + wk2i * x0i + a(i + 1, j2) = wk2r * x0i - wk2i * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(i, j1) = wk1r * x0r + wk1i * x0i + a(i + 1, j1) = wk1r * x0i - wk1i * x0r + x0r = x1r - x3i + x0i = x1i + x3r + a(i, j3) = wk3r * x0r + wk3i * x0i + a(i + 1, j3) = wk3r * x0i - wk3i * x0r + end do + end do + end do + end if + l = m + end do + if (l .lt. n) then + do j = 0, l - 1 + j1 = j + l + do i = 0, n1 - 2, 2 + x0r = a(i, j) - a(i, j1) + x0i = a(i + 1, j) - a(i + 1, j1) + a(i, j) = a(i, j) + a(i, j1) + a(i + 1, j) = a(i + 1, j) + a(i + 1, j1) + a(i, j1) = x0r + a(i + 1, j1) = x0i + end do + end do + end if + end +! + subroutine rftbrow(n1max, n, n2, a, nc, c) + integer n1max, n, n2, nc, i, j, k, kk, ks + real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), + & wkr, wki, xr, xi, yr, yi + ks = 4 * nc / n + do i = 0, n2 - 1 + kk = 0 + do k = n / 2 - 2, 2, -2 + j = n - k + kk = kk + ks + wkr = 0.5d0 - c(kk) + wki = c(nc - kk) + xr = a(k, i) - a(j, i) + xi = a(k + 1, i) + a(j + 1, i) + yr = wkr * xr - wki * xi + yi = wkr * xi + wki * xr + a(k, i) = a(k, i) - yr + a(k + 1, i) = a(k + 1, i) - yi + a(j, i) = a(j, i) + yr + a(j + 1, i) = a(j + 1, i) - yi + end do + end do + end +! + subroutine rftfrow(n1max, n, n2, a, nc, c) + integer n1max, n, n2, nc, i, j, k, kk, ks + real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), + & wkr, wki, xr, xi, yr, yi + ks = 4 * nc / n + do i = 0, n2 - 1 + kk = 0 + do k = n / 2 - 2, 2, -2 + j = n - k + kk = kk + ks + wkr = 0.5d0 - c(kk) + wki = c(nc - kk) + xr = a(k, i) - a(j, i) + xi = a(k + 1, i) + a(j + 1, i) + yr = wkr * xr + wki * xi + yi = wkr * xi - wki * xr + a(k, i) = a(k, i) - yr + a(k + 1, i) = a(k + 1, i) - yi + a(j, i) = a(j, i) + yr + a(j + 1, i) = a(j + 1, i) - yi + end do + end do + end +! + subroutine dctbsub(n1max, n1, n2, a, nc, c) + integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, + & k1, k2 + real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), + & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i + ks1 = nc / n1 + ks2 = nc / n2 + n2h = n2 / 2 + kk2 = ks2 + do k2 = 1, n2h - 1 + j2 = n2 - k2 + w2r = 2 * c(kk2) + w2i = 2 * c(nc - kk2) + kk2 = kk2 + ks2 + kk1 = ks1 + do k1 = 2, n1 - 2, 2 + x0r = w2r * c(kk1) + x0i = w2i * c(kk1) + x1r = w2r * c(nc - kk1) + x1i = w2i * c(nc - kk1) + wkr = x0r - x1i + wki = x0i + x1r + wji = x0r + x1i + wjr = x0i - x1r + kk1 = kk1 + ks1 + x0r = wkr * a(k1, k2) - wki * a(k1 + 1, k2) + x0i = wkr * a(k1 + 1, k2) + wki * a(k1, k2) + x1r = wjr * a(k1, j2) - wji * a(k1 + 1, j2) + x1i = wjr * a(k1 + 1, j2) + wji * a(k1, j2) + a(k1, k2) = x0r + x1i + a(k1 + 1, k2) = x0i - x1r + a(k1, j2) = x1r + x0i + a(k1 + 1, j2) = x1i - x0r + end do + wkr = w2r * 0.5d0 + wki = w2i * 0.5d0 + wjr = w2r * c(kk1) + wji = w2i * c(kk1) + x0r = a(0, k2) + a(0, j2) + x0i = a(1, k2) - a(1, j2) + x1r = a(0, k2) - a(0, j2) + x1i = a(1, k2) + a(1, j2) + a(0, k2) = wkr * x0r - wki * x0i + a(1, k2) = wkr * x0i + wki * x0r + a(0, j2) = -wjr * x1r + wji * x1i + a(1, j2) = wjr * x1i + wji * x1r + end do + w2r = 2 * c(kk2) + kk1 = ks1 + do k1 = 2, n1 - 2, 2 + wkr = 2 * c(kk1) + wki = 2 * c(nc - kk1) + wjr = w2r * wkr + wji = w2r * wki + kk1 = kk1 + ks1 + x0i = wkr * a(k1 + 1, 0) + wki * a(k1, 0) + a(k1, 0) = wkr * a(k1, 0) - wki * a(k1 + 1, 0) + a(k1 + 1, 0) = x0i + x0i = wjr * a(k1 + 1, n2h) + wji * a(k1, n2h) + a(k1, n2h) = wjr * a(k1, n2h) - wji * a(k1 + 1, n2h) + a(k1 + 1, n2h) = x0i + end do + a(1, 0) = a(1, 0) * w2r + a(0, n2h) = a(0, n2h) * w2r + a(1, n2h) = a(1, n2h) * 0.5d0 + end +! + subroutine dctfsub(n1max, n1, n2, a, nc, c) + integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, + & k1, k2 + real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), + & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i + ks1 = nc / n1 + ks2 = nc / n2 + n2h = n2 / 2 + kk2 = ks2 + do k2 = 1, n2h - 1 + j2 = n2 - k2 + w2r = 2 * c(kk2) + w2i = 2 * c(nc - kk2) + kk2 = kk2 + ks2 + kk1 = ks1 + do k1 = 2, n1 - 2, 2 + x0r = w2r * c(kk1) + x0i = w2i * c(kk1) + x1r = w2r * c(nc - kk1) + x1i = w2i * c(nc - kk1) + wkr = x0r - x1i + wki = x0i + x1r + wji = x0r + x1i + wjr = x0i - x1r + kk1 = kk1 + ks1 + x0r = a(k1, k2) - a(k1 + 1, j2) + x0i = a(k1, j2) + a(k1 + 1, k2) + x1r = a(k1, j2) - a(k1 + 1, k2) + x1i = a(k1, k2) + a(k1 + 1, j2) + a(k1, k2) = wkr * x0r + wki * x0i + a(k1 + 1, k2) = wkr * x0i - wki * x0r + a(k1, j2) = wjr * x1r + wji * x1i + a(k1 + 1, j2) = wjr * x1i - wji * x1r + end do + x0r = 2 * c(kk1) + wjr = x0r * w2r + wji = x0r * w2i + x0r = w2r * a(0, k2) + w2i * a(1, k2) + x0i = w2r * a(1, k2) - w2i * a(0, k2) + x1r = -wjr * a(0, j2) + wji * a(1, j2) + x1i = wjr * a(1, j2) + wji * a(0, j2) + a(0, k2) = x0r + x1r + a(1, k2) = x1i + x0i + a(0, j2) = x0r - x1r + a(1, j2) = x1i - x0i + end do + w2r = 2 * c(kk2) + kk1 = ks1 + do k1 = 2, n1 - 2, 2 + wkr = 2 * c(kk1) + wki = 2 * c(nc - kk1) + wjr = w2r * wkr + wji = w2r * wki + kk1 = kk1 + ks1 + x0i = wkr * a(k1 + 1, 0) - wki * a(k1, 0) + a(k1, 0) = wkr * a(k1, 0) + wki * a(k1 + 1, 0) + a(k1 + 1, 0) = x0i + x0i = wjr * a(k1 + 1, n2h) - wji * a(k1, n2h) + a(k1, n2h) = wjr * a(k1, n2h) + wji * a(k1 + 1, n2h) + a(k1 + 1, n2h) = x0i + end do + w2r = w2r * 2 + a(0, 0) = a(0, 0) * 2 + a(1, 0) = a(1, 0) * w2r + a(0, n2h) = a(0, n2h) * w2r + end +! + subroutine dstbsub(n1max, n1, n2, a, nc, c) + integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, + & k1, k2 + real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), + & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i + ks1 = nc / n1 + ks2 = nc / n2 + n2h = n2 / 2 + kk2 = ks2 + do k2 = 1, n2h - 1 + j2 = n2 - k2 + w2r = 2 * c(kk2) + w2i = 2 * c(nc - kk2) + kk2 = kk2 + ks2 + kk1 = ks1 + do k1 = 2, n1 - 2, 2 + x0r = w2r * c(kk1) + x0i = w2i * c(kk1) + x1r = w2r * c(nc - kk1) + x1i = w2i * c(nc - kk1) + wkr = x0r - x1i + wki = x0i + x1r + wji = x0r + x1i + wjr = x0i - x1r + kk1 = kk1 + ks1 + x0r = wkr * a(k1, k2) - wki * a(k1 + 1, k2) + x0i = wkr * a(k1 + 1, k2) + wki * a(k1, k2) + x1r = wjr * a(k1, j2) - wji * a(k1 + 1, j2) + x1i = wjr * a(k1 + 1, j2) + wji * a(k1, j2) + a(k1, k2) = x1i - x0r + a(k1 + 1, k2) = x1r + x0i + a(k1, j2) = x0i - x1r + a(k1 + 1, j2) = x0r + x1i + end do + wkr = w2r * 0.5d0 + wki = w2i * 0.5d0 + wjr = w2r * c(kk1) + wji = w2i * c(kk1) + x0r = a(0, k2) + a(0, j2) + x0i = a(1, k2) - a(1, j2) + x1r = a(0, k2) - a(0, j2) + x1i = a(1, k2) + a(1, j2) + a(1, k2) = wkr * x0r - wki * x0i + a(0, k2) = wkr * x0i + wki * x0r + a(1, j2) = -wjr * x1r + wji * x1i + a(0, j2) = wjr * x1i + wji * x1r + end do + w2r = 2 * c(kk2) + kk1 = ks1 + do k1 = 2, n1 - 2, 2 + wkr = 2 * c(kk1) + wki = 2 * c(nc - kk1) + wjr = w2r * wkr + wji = w2r * wki + kk1 = kk1 + ks1 + x0i = wkr * a(k1 + 1, 0) + wki * a(k1, 0) + a(k1 + 1, 0) = wkr * a(k1, 0) - wki * a(k1 + 1, 0) + a(k1, 0) = x0i + x0i = wjr * a(k1 + 1, n2h) + wji * a(k1, n2h) + a(k1 + 1, n2h) = wjr * a(k1, n2h) - wji * a(k1 + 1, n2h) + a(k1, n2h) = x0i + end do + a(1, 0) = a(1, 0) * w2r + a(0, n2h) = a(0, n2h) * w2r + a(1, n2h) = a(1, n2h) * 0.5d0 + end +! + subroutine dstfsub(n1max, n1, n2, a, nc, c) + integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, + & k1, k2 + real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), + & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i + ks1 = nc / n1 + ks2 = nc / n2 + n2h = n2 / 2 + kk2 = ks2 + do k2 = 1, n2h - 1 + j2 = n2 - k2 + w2r = 2 * c(kk2) + w2i = 2 * c(nc - kk2) + kk2 = kk2 + ks2 + kk1 = ks1 + do k1 = 2, n1 - 2, 2 + x0r = w2r * c(kk1) + x0i = w2i * c(kk1) + x1r = w2r * c(nc - kk1) + x1i = w2i * c(nc - kk1) + wkr = x0r - x1i + wki = x0i + x1r + wji = x0r + x1i + wjr = x0i - x1r + kk1 = kk1 + ks1 + x0r = a(k1 + 1, j2) - a(k1, k2) + x0i = a(k1 + 1, k2) + a(k1, j2) + x1r = a(k1 + 1, k2) - a(k1, j2) + x1i = a(k1 + 1, j2) + a(k1, k2) + a(k1, k2) = wkr * x0r + wki * x0i + a(k1 + 1, k2) = wkr * x0i - wki * x0r + a(k1, j2) = wjr * x1r + wji * x1i + a(k1 + 1, j2) = wjr * x1i - wji * x1r + end do + x0r = 2 * c(kk1) + wjr = x0r * w2r + wji = x0r * w2i + x0r = w2r * a(1, k2) + w2i * a(0, k2) + x0i = w2r * a(0, k2) - w2i * a(1, k2) + x1r = -wjr * a(1, j2) + wji * a(0, j2) + x1i = wjr * a(0, j2) + wji * a(1, j2) + a(0, k2) = x0r + x1r + a(1, k2) = x1i + x0i + a(0, j2) = x0r - x1r + a(1, j2) = x1i - x0i + end do + w2r = 2 * c(kk2) + kk1 = ks1 + do k1 = 2, n1 - 2, 2 + wkr = 2 * c(kk1) + wki = 2 * c(nc - kk1) + wjr = w2r * wkr + wji = w2r * wki + kk1 = kk1 + ks1 + x0i = wkr * a(k1, 0) - wki * a(k1 + 1, 0) + a(k1, 0) = wkr * a(k1 + 1, 0) + wki * a(k1, 0) + a(k1 + 1, 0) = x0i + x0i = wjr * a(k1, n2h) - wji * a(k1 + 1, n2h) + a(k1, n2h) = wjr * a(k1 + 1, n2h) + wji * a(k1, n2h) + a(k1 + 1, n2h) = x0i + end do + w2r = w2r * 2 + a(0, 0) = a(0, 0) * 2 + a(1, 0) = a(1, 0) * w2r + a(0, n2h) = a(0, n2h) * w2r + end +! diff --git a/src/fft2d/fft2d/fftsg.c b/src/fft2d/fft2d/fftsg.c new file mode 100644 index 0000000..43d7534 --- /dev/null +++ b/src/fft2d/fft2d/fftsg.c @@ -0,0 +1,3314 @@ +/* +Fast Fourier/Cosine/Sine Transform + dimension :one + data length :power of 2 + decimation :frequency + radix :split-radix + data :inplace + table :use +functions + cdft: Complex Discrete Fourier Transform + rdft: Real Discrete Fourier Transform + ddct: Discrete Cosine Transform + ddst: Discrete Sine Transform + dfct: Cosine Transform of RDFT (Real Symmetric DFT) + dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) +function prototypes + void cdft(int, int, double *, int *, double *); + void rdft(int, int, double *, int *, double *); + void ddct(int, int, double *, int *, double *); + void ddst(int, int, double *, int *, double *); + void dfct(int, double *, double *, int *, double *); + void dfst(int, double *, double *, int *, double *); +macro definitions + USE_CDFT_PTHREADS : default=not defined + CDFT_THREADS_BEGIN_N : must be >= 512, default=8192 + CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536 + USE_CDFT_WINTHREADS : default=not defined + CDFT_THREADS_BEGIN_N : must be >= 512, default=32768 + CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288 + + +-------- Complex DFT (Discrete Fourier Transform) -------- + [definition] + <case1> + X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n + <case2> + X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n + (notes: sum_j=0^n-1 is a summation from j=0 to n-1) + [usage] + <case1> + ip[0] = 0; // first time only + cdft(2*n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + cdft(2*n, -1, a, ip, w); + [parameters] + 2*n :data length (int) + n >= 1, n = power of 2 + a[0...2*n-1] :input/output data (double *) + input data + a[2*j] = Re(x[j]), + a[2*j+1] = Im(x[j]), 0<=j<n + output data + a[2*k] = Re(X[k]), + a[2*k+1] = Im(X[k]), 0<=k<n + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + strictly, + length of ip >= + 2+(1<<(int)(log(n+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (double *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + cdft(2*n, -1, a, ip, w); + is + cdft(2*n, 1, a, ip, w); + for (j = 0; j <= 2 * n - 1; j++) { + a[j] *= 1.0 / n; + } + . + + +-------- Real DFT / Inverse of Real DFT -------- + [definition] + <case1> RDFT + R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 + I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2 + <case2> IRDFT (excluding scale) + a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n + [usage] + <case1> + ip[0] = 0; // first time only + rdft(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + rdft(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (double *) + <case1> + output data + a[2*k] = R[k], 0<=k<n/2 + a[2*k+1] = I[k], 0<k<n/2 + a[1] = R[n/2] + <case2> + input data + a[2*j] = R[j], 0<=j<n/2 + a[2*j+1] = I[j], 0<j<n/2 + a[1] = R[n/2] + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (double *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + rdft(n, 1, a, ip, w); + is + rdft(n, -1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + <case1> IDCT (excluding scale) + C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n + <case2> DCT + C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n + [usage] + <case1> + ip[0] = 0; // first time only + ddct(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + ddct(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (double *) + output data + a[k] = C[k], 0<=k<n + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (double *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddct(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddct(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DST (Discrete Sine Transform) / Inverse of DST -------- + [definition] + <case1> IDST (excluding scale) + S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n + <case2> DST + S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n + [usage] + <case1> + ip[0] = 0; // first time only + ddst(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + ddst(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (double *) + <case1> + input data + a[j] = A[j], 0<j<n + a[0] = A[n] + output data + a[k] = S[k], 0<=k<n + <case2> + output data + a[k] = S[k], 0<k<n + a[0] = S[n] + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (double *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddst(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddst(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Cosine Transform of RDFT (Real Symmetric DFT) -------- + [definition] + C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n + [usage] + ip[0] = 0; // first time only + dfct(n, a, t, ip, w); + [parameters] + n :data length - 1 (int) + n >= 2, n = power of 2 + a[0...n] :input/output data (double *) + output data + a[k] = C[k], 0<=k<=n + t[0...n/2] :work area (double *) + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (double *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + is + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + for (j = 0; j <= n; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- + [definition] + S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n + [usage] + ip[0] = 0; // first time only + dfst(n, a, t, ip, w); + [parameters] + n :data length + 1 (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (double *) + output data + a[k] = S[k], 0<k<n + (a[0] is used for work area) + t[0...n/2-1] :work area (double *) + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (double *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + dfst(n, a, t, ip, w); + is + dfst(n, a, t, ip, w); + for (j = 1; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +Appendix : + The cos/sin table is recalculated when the larger table required. + w[] and ip[] are compatible with all routines. +*/ + + +void cdft(int n, int isgn, double *a, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void cftbsub(int n, double *a, int *ip, int nw, double *w); + int nw; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + if (isgn >= 0) { + cftfsub(n, a, ip, nw, w); + } else { + cftbsub(n, a, ip, nw, w); + } +} + + +void rdft(int n, int isgn, double *a, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void cftbsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void rftbsub(int n, double *a, int nc, double *c); + int nw, nc; + double xi; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 2)) { + nc = n >> 2; + makect(nc, ip, w + nw); + } + if (isgn >= 0) { + if (n > 4) { + cftfsub(n, a, ip, nw, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, ip, nw, w); + } + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; + } else { + a[1] = 0.5 * (a[0] - a[1]); + a[0] -= a[1]; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + cftbsub(n, a, ip, nw, w); + } else if (n == 4) { + cftbsub(n, a, ip, nw, w); + } + } +} + + +void ddct(int n, int isgn, double *a, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void cftbsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void rftbsub(int n, double *a, int nc, double *c); + void dctsub(int n, double *a, int nc, double *c); + int j, nw, nc; + double xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = a[j] - a[j - 1]; + a[j] += a[j - 1]; + } + a[1] = a[0] - xr; + a[0] += xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + cftbsub(n, a, ip, nw, w); + } else if (n == 4) { + cftbsub(n, a, ip, nw, w); + } + } + dctsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + cftfsub(n, a, ip, nw, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, ip, nw, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = a[j] - a[j + 1]; + a[j] += a[j + 1]; + } + a[n - 1] = xr; + } +} + + +void ddst(int n, int isgn, double *a, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void cftbsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void rftbsub(int n, double *a, int nc, double *c); + void dstsub(int n, double *a, int nc, double *c); + int j, nw, nc; + double xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = -a[j] - a[j - 1]; + a[j] -= a[j - 1]; + } + a[1] = a[0] + xr; + a[0] -= xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + cftbsub(n, a, ip, nw, w); + } else if (n == 4) { + cftbsub(n, a, ip, nw, w); + } + } + dstsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + cftfsub(n, a, ip, nw, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, ip, nw, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = -a[j] - a[j + 1]; + a[j] -= a[j + 1]; + } + a[n - 1] = -xr; + } +} + + +void dfct(int n, double *a, double *t, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void dctsub(int n, double *a, int nc, double *c); + int j, k, l, m, mh, nw, nc; + double xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + m = n >> 1; + yi = a[m]; + xi = a[0] + a[n]; + a[0] -= a[n]; + t[0] = xi - yi; + t[m] = xi + yi; + if (n > 2) { + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] - a[n - j]; + xi = a[j] + a[n - j]; + yr = a[k] - a[n - k]; + yi = a[k] + a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi - yi; + t[k] = xi + yi; + } + t[mh] = a[mh] + a[n - mh]; + a[mh] -= a[n - mh]; + dctsub(m, a, nc, w + nw); + if (m > 4) { + cftfsub(m, a, ip, nw, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, ip, nw, w); + } + a[n - 1] = a[0] - a[1]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] + a[j + 1]; + a[2 * j - 1] = a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dctsub(m, t, nc, w + nw); + if (m > 4) { + cftfsub(m, t, ip, nw, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, ip, nw, w); + } + a[n - l] = t[0] - t[1]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = t[j] - t[j + 1]; + a[k + l] = t[j] + t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 0; j < mh; j++) { + k = m - j; + t[j] = t[m + k] - t[m + j]; + t[k] = t[m + k] + t[m + j]; + } + t[mh] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + a[n] = t[2] - t[1]; + a[0] = t[2] + t[1]; + } else { + a[1] = a[0]; + a[2] = t[0]; + a[0] = t[1]; + } +} + + +void dfst(int n, double *a, double *t, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void cftfsub(int n, double *a, int *ip, int nw, double *w); + void rftfsub(int n, double *a, int nc, double *c); + void dstsub(int n, double *a, int nc, double *c); + int j, k, l, m, mh, nw, nc; + double xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + if (n > 2) { + m = n >> 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] + a[n - j]; + xi = a[j] - a[n - j]; + yr = a[k] + a[n - k]; + yi = a[k] - a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi + yi; + t[k] = xi - yi; + } + t[0] = a[mh] - a[n - mh]; + a[mh] += a[n - mh]; + a[0] = a[m]; + dstsub(m, a, nc, w + nw); + if (m > 4) { + cftfsub(m, a, ip, nw, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, ip, nw, w); + } + a[n - 1] = a[1] - a[0]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] - a[j + 1]; + a[2 * j - 1] = -a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dstsub(m, t, nc, w + nw); + if (m > 4) { + cftfsub(m, t, ip, nw, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, ip, nw, w); + } + a[n - l] = t[1] - t[0]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = -t[j] - t[j + 1]; + a[k + l] = t[j] - t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + t[j] = t[m + k] + t[m + j]; + t[k] = t[m + k] - t[m + j]; + } + t[0] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + } + a[0] = 0; +} + + +/* -------- initializing routines -------- */ + + +#include <math.h> + +void makewt(int nw, int *ip, double *w) +{ + void makeipt(int nw, int *ip); + int j, nwh, nw0, nw1; + double delta, wn4r, wk1r, wk1i, wk3r, wk3i; + + ip[0] = nw; + ip[1] = 1; + if (nw > 2) { + nwh = nw >> 1; + delta = atan(1.0) / nwh; + wn4r = cos(delta * nwh); + w[0] = 1; + w[1] = wn4r; + if (nwh == 4) { + w[2] = cos(delta * 2); + w[3] = sin(delta * 2); + } else if (nwh > 4) { + makeipt(nw, ip); + w[2] = 0.5 / cos(delta * 2); + w[3] = 0.5 / cos(delta * 6); + for (j = 4; j < nwh; j += 4) { + w[j] = cos(delta * j); + w[j + 1] = sin(delta * j); + w[j + 2] = cos(3 * delta * j); + w[j + 3] = -sin(3 * delta * j); + } + } + nw0 = 0; + while (nwh > 2) { + nw1 = nw0 + nwh; + nwh >>= 1; + w[nw1] = 1; + w[nw1 + 1] = wn4r; + if (nwh == 4) { + wk1r = w[nw0 + 4]; + wk1i = w[nw0 + 5]; + w[nw1 + 2] = wk1r; + w[nw1 + 3] = wk1i; + } else if (nwh > 4) { + wk1r = w[nw0 + 4]; + wk3r = w[nw0 + 6]; + w[nw1 + 2] = 0.5 / wk1r; + w[nw1 + 3] = 0.5 / wk3r; + for (j = 4; j < nwh; j += 4) { + wk1r = w[nw0 + 2 * j]; + wk1i = w[nw0 + 2 * j + 1]; + wk3r = w[nw0 + 2 * j + 2]; + wk3i = w[nw0 + 2 * j + 3]; + w[nw1 + j] = wk1r; + w[nw1 + j + 1] = wk1i; + w[nw1 + j + 2] = wk3r; + w[nw1 + j + 3] = wk3i; + } + } + nw0 = nw1; + } + } +} + + +void makeipt(int nw, int *ip) +{ + int j, l, m, m2, p, q; + + ip[2] = 0; + ip[3] = 16; + m = 2; + for (l = nw; l > 32; l >>= 2) { + m2 = m << 1; + q = m2 << 3; + for (j = m; j < m2; j++) { + p = ip[j] << 2; + ip[m + j] = p; + ip[m2 + j] = p + q; + } + m = m2; + } +} + + +void makect(int nc, int *ip, double *c) +{ + int j, nch; + double delta; + + ip[1] = nc; + if (nc > 1) { + nch = nc >> 1; + delta = atan(1.0) / nch; + c[0] = cos(delta * nch); + c[nch] = 0.5 * c[0]; + for (j = 1; j < nch; j++) { + c[j] = 0.5 * cos(delta * j); + c[nc - j] = 0.5 * sin(delta * j); + } + } +} + + +/* -------- child routines -------- */ + + +#ifdef USE_CDFT_PTHREADS +#define USE_CDFT_THREADS +#ifndef CDFT_THREADS_BEGIN_N +#define CDFT_THREADS_BEGIN_N 8192 +#endif +#ifndef CDFT_4THREADS_BEGIN_N +#define CDFT_4THREADS_BEGIN_N 65536 +#endif +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#define cdft_thread_t pthread_t +#define cdft_thread_create(thp,func,argp) { \ + if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \ + fprintf(stderr, "cdft thread error\n"); \ + exit(1); \ + } \ +} +#define cdft_thread_wait(th) { \ + if (pthread_join(th, NULL) != 0) { \ + fprintf(stderr, "cdft thread error\n"); \ + exit(1); \ + } \ +} +#endif /* USE_CDFT_PTHREADS */ + + +#ifdef USE_CDFT_WINTHREADS +#define USE_CDFT_THREADS +#ifndef CDFT_THREADS_BEGIN_N +#define CDFT_THREADS_BEGIN_N 32768 +#endif +#ifndef CDFT_4THREADS_BEGIN_N +#define CDFT_4THREADS_BEGIN_N 524288 +#endif +#include <windows.h> +#include <stdio.h> +#include <stdlib.h> +#define cdft_thread_t HANDLE +#define cdft_thread_create(thp,func,argp) { \ + DWORD thid; \ + *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \ + if (*(thp) == 0) { \ + fprintf(stderr, "cdft thread error\n"); \ + exit(1); \ + } \ +} +#define cdft_thread_wait(th) { \ + WaitForSingleObject(th, INFINITE); \ + CloseHandle(th); \ +} +#endif /* USE_CDFT_WINTHREADS */ + + +void cftfsub(int n, double *a, int *ip, int nw, double *w) +{ + void bitrv2(int n, int *ip, double *a); + void bitrv216(double *a); + void bitrv208(double *a); + void cftf1st(int n, double *a, double *w); + void cftrec4(int n, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftfx41(int n, double *a, int nw, double *w); + void cftf161(double *a, double *w); + void cftf081(double *a, double *w); + void cftf040(double *a); + void cftx020(double *a); +#ifdef USE_CDFT_THREADS + void cftrec4_th(int n, double *a, int nw, double *w); +#endif /* USE_CDFT_THREADS */ + + if (n > 8) { + if (n > 32) { + cftf1st(n, a, &w[nw - (n >> 2)]); +#ifdef USE_CDFT_THREADS + if (n > CDFT_THREADS_BEGIN_N) { + cftrec4_th(n, a, nw, w); + } else +#endif /* USE_CDFT_THREADS */ + if (n > 512) { + cftrec4(n, a, nw, w); + } else if (n > 128) { + cftleaf(n, 1, a, nw, w); + } else { + cftfx41(n, a, nw, w); + } + bitrv2(n, ip, a); + } else if (n == 32) { + cftf161(a, &w[nw - 8]); + bitrv216(a); + } else { + cftf081(a, w); + bitrv208(a); + } + } else if (n == 8) { + cftf040(a); + } else if (n == 4) { + cftx020(a); + } +} + + +void cftbsub(int n, double *a, int *ip, int nw, double *w) +{ + void bitrv2conj(int n, int *ip, double *a); + void bitrv216neg(double *a); + void bitrv208neg(double *a); + void cftb1st(int n, double *a, double *w); + void cftrec4(int n, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftfx41(int n, double *a, int nw, double *w); + void cftf161(double *a, double *w); + void cftf081(double *a, double *w); + void cftb040(double *a); + void cftx020(double *a); +#ifdef USE_CDFT_THREADS + void cftrec4_th(int n, double *a, int nw, double *w); +#endif /* USE_CDFT_THREADS */ + + if (n > 8) { + if (n > 32) { + cftb1st(n, a, &w[nw - (n >> 2)]); +#ifdef USE_CDFT_THREADS + if (n > CDFT_THREADS_BEGIN_N) { + cftrec4_th(n, a, nw, w); + } else +#endif /* USE_CDFT_THREADS */ + if (n > 512) { + cftrec4(n, a, nw, w); + } else if (n > 128) { + cftleaf(n, 1, a, nw, w); + } else { + cftfx41(n, a, nw, w); + } + bitrv2conj(n, ip, a); + } else if (n == 32) { + cftf161(a, &w[nw - 8]); + bitrv216neg(a); + } else { + cftf081(a, w); + bitrv208neg(a); + } + } else if (n == 8) { + cftb040(a); + } else if (n == 4) { + cftx020(a); + } +} + + +void bitrv2(int n, int *ip, double *a) +{ + int j, j1, k, k1, l, m, nh, nm; + double xr, xi, yr, yi; + + m = 1; + for (l = n >> 2; l > 8; l >>= 2) { + m <<= 1; + } + nh = n >> 1; + nm = 4 * m; + if (l == 8) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 4 * j + 2 * ip[m + k]; + k1 = 4 * k + 2 * ip[m + j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh; + k1 += 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += 2; + k1 += nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh; + k1 -= 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 4 * k + 2 * ip[m + k]; + j1 = k1 + 2; + k1 += nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= 2; + k1 -= nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh + 2; + k1 += nh + 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh - nm; + k1 += 2 * nm - 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } else { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 4 * j + ip[m + k]; + k1 = 4 * k + ip[m + j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh; + k1 += 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += 2; + k1 += nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh; + k1 -= 2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 4 * k + ip[m + k]; + j1 = k1 + 2; + k1 += nh; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } +} + + +void bitrv2conj(int n, int *ip, double *a) +{ + int j, j1, k, k1, l, m, nh, nm; + double xr, xi, yr, yi; + + m = 1; + for (l = n >> 2; l > 8; l >>= 2) { + m <<= 1; + } + nh = n >> 1; + nm = 4 * m; + if (l == 8) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 4 * j + 2 * ip[m + k]; + k1 = 4 * k + 2 * ip[m + j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh; + k1 += 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 += nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += 2; + k1 += nh; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh; + k1 -= 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 += nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 4 * k + 2 * ip[m + k]; + j1 = k1 + 2; + k1 += nh; + a[j1 - 1] = -a[j1 - 1]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + a[k1 + 3] = -a[k1 + 3]; + j1 += nm; + k1 += 2 * nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= 2; + k1 -= nh; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh + 2; + k1 += nh + 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh - nm; + k1 += 2 * nm - 2; + a[j1 - 1] = -a[j1 - 1]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + a[k1 + 3] = -a[k1 + 3]; + } + } else { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 4 * j + ip[m + k]; + k1 = 4 * k + ip[m + j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nh; + k1 += 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += 2; + k1 += nh; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += nm; + k1 += nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nh; + k1 -= 2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 -= nm; + k1 -= nm; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 4 * k + ip[m + k]; + j1 = k1 + 2; + k1 += nh; + a[j1 - 1] = -a[j1 - 1]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + a[k1 + 3] = -a[k1 + 3]; + j1 += nm; + k1 += nm; + a[j1 - 1] = -a[j1 - 1]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + a[k1 + 3] = -a[k1 + 3]; + } + } +} + + +void bitrv216(double *a) +{ + double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, + x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, + x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i; + + x1r = a[2]; + x1i = a[3]; + x2r = a[4]; + x2i = a[5]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x5r = a[10]; + x5i = a[11]; + x7r = a[14]; + x7i = a[15]; + x8r = a[16]; + x8i = a[17]; + x10r = a[20]; + x10i = a[21]; + x11r = a[22]; + x11i = a[23]; + x12r = a[24]; + x12i = a[25]; + x13r = a[26]; + x13i = a[27]; + x14r = a[28]; + x14i = a[29]; + a[2] = x8r; + a[3] = x8i; + a[4] = x4r; + a[5] = x4i; + a[6] = x12r; + a[7] = x12i; + a[8] = x2r; + a[9] = x2i; + a[10] = x10r; + a[11] = x10i; + a[14] = x14r; + a[15] = x14i; + a[16] = x1r; + a[17] = x1i; + a[20] = x5r; + a[21] = x5i; + a[22] = x13r; + a[23] = x13i; + a[24] = x3r; + a[25] = x3i; + a[26] = x11r; + a[27] = x11i; + a[28] = x7r; + a[29] = x7i; +} + + +void bitrv216neg(double *a) +{ + double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, + x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, + x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, + x13r, x13i, x14r, x14i, x15r, x15i; + + x1r = a[2]; + x1i = a[3]; + x2r = a[4]; + x2i = a[5]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x5r = a[10]; + x5i = a[11]; + x6r = a[12]; + x6i = a[13]; + x7r = a[14]; + x7i = a[15]; + x8r = a[16]; + x8i = a[17]; + x9r = a[18]; + x9i = a[19]; + x10r = a[20]; + x10i = a[21]; + x11r = a[22]; + x11i = a[23]; + x12r = a[24]; + x12i = a[25]; + x13r = a[26]; + x13i = a[27]; + x14r = a[28]; + x14i = a[29]; + x15r = a[30]; + x15i = a[31]; + a[2] = x15r; + a[3] = x15i; + a[4] = x7r; + a[5] = x7i; + a[6] = x11r; + a[7] = x11i; + a[8] = x3r; + a[9] = x3i; + a[10] = x13r; + a[11] = x13i; + a[12] = x5r; + a[13] = x5i; + a[14] = x9r; + a[15] = x9i; + a[16] = x1r; + a[17] = x1i; + a[18] = x14r; + a[19] = x14i; + a[20] = x6r; + a[21] = x6i; + a[22] = x10r; + a[23] = x10i; + a[24] = x2r; + a[25] = x2i; + a[26] = x12r; + a[27] = x12i; + a[28] = x4r; + a[29] = x4i; + a[30] = x8r; + a[31] = x8i; +} + + +void bitrv208(double *a) +{ + double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i; + + x1r = a[2]; + x1i = a[3]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x6r = a[12]; + x6i = a[13]; + a[2] = x4r; + a[3] = x4i; + a[6] = x6r; + a[7] = x6i; + a[8] = x1r; + a[9] = x1i; + a[12] = x3r; + a[13] = x3i; +} + + +void bitrv208neg(double *a) +{ + double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, + x5r, x5i, x6r, x6i, x7r, x7i; + + x1r = a[2]; + x1i = a[3]; + x2r = a[4]; + x2i = a[5]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x5r = a[10]; + x5i = a[11]; + x6r = a[12]; + x6i = a[13]; + x7r = a[14]; + x7i = a[15]; + a[2] = x7r; + a[3] = x7i; + a[4] = x3r; + a[5] = x3i; + a[6] = x5r; + a[7] = x5i; + a[8] = x1r; + a[9] = x1i; + a[10] = x6r; + a[11] = x6i; + a[12] = x2r; + a[13] = x2i; + a[14] = x4r; + a[15] = x4i; +} + + +void cftf1st(int n, double *a, double *w) +{ + int j, j0, j1, j2, j3, k, m, mh; + double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, + wd1r, wd1i, wd3r, wd3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; + + mh = n >> 3; + m = 2 * mh; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] + a[j2]; + x0i = a[1] + a[j2 + 1]; + x1r = a[0] - a[j2]; + x1i = a[1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j2] = x1r - x3i; + a[j2 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + wn4r = w[1]; + csc1 = w[2]; + csc3 = w[3]; + wd1r = 1; + wd1i = 0; + wd3r = 1; + wd3i = 0; + k = 0; + for (j = 2; j < mh - 2; j += 4) { + k += 4; + wk1r = csc1 * (wd1r + w[k]); + wk1i = csc1 * (wd1i + w[k + 1]); + wk3r = csc3 * (wd3r + w[k + 2]); + wk3i = csc3 * (wd3i + w[k + 3]); + wd1r = w[k]; + wd1i = w[k + 1]; + wd3r = w[k + 2]; + wd3i = w[k + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] + a[j2]; + x0i = a[j + 1] + a[j2 + 1]; + x1r = a[j] - a[j2]; + x1i = a[j + 1] - a[j2 + 1]; + y0r = a[j + 2] + a[j2 + 2]; + y0i = a[j + 3] + a[j2 + 3]; + y1r = a[j + 2] - a[j2 + 2]; + y1i = a[j + 3] - a[j2 + 3]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 + 2] + a[j3 + 2]; + y2i = a[j1 + 3] + a[j3 + 3]; + y3r = a[j1 + 2] - a[j3 + 2]; + y3i = a[j1 + 3] - a[j3 + 3]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j + 2] = y0r + y2r; + a[j + 3] = y0i + y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j1 + 2] = y0r - y2r; + a[j1 + 3] = y0i - y2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1r * x0r - wk1i * x0i; + a[j2 + 1] = wk1r * x0i + wk1i * x0r; + x0r = y1r - y3i; + x0i = y1i + y3r; + a[j2 + 2] = wd1r * x0r - wd1i * x0i; + a[j2 + 3] = wd1r * x0i + wd1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r + wk3i * x0i; + a[j3 + 1] = wk3r * x0i - wk3i * x0r; + x0r = y1r + y3i; + x0i = y1i - y3r; + a[j3 + 2] = wd3r * x0r + wd3i * x0i; + a[j3 + 3] = wd3r * x0i - wd3i * x0r; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + y0r = a[j0 - 2] + a[j2 - 2]; + y0i = a[j0 - 1] + a[j2 - 1]; + y1r = a[j0 - 2] - a[j2 - 2]; + y1i = a[j0 - 1] - a[j2 - 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 - 2] + a[j3 - 2]; + y2i = a[j1 - 1] + a[j3 - 1]; + y3r = a[j1 - 2] - a[j3 - 2]; + y3i = a[j1 - 1] - a[j3 - 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j0 - 2] = y0r + y2r; + a[j0 - 1] = y0i + y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j1 - 2] = y0r - y2r; + a[j1 - 1] = y0i - y2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1i * x0r - wk1r * x0i; + a[j2 + 1] = wk1i * x0i + wk1r * x0r; + x0r = y1r - y3i; + x0i = y1i + y3r; + a[j2 - 2] = wd1i * x0r - wd1r * x0i; + a[j2 - 1] = wd1i * x0i + wd1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3i * x0r + wk3r * x0i; + a[j3 + 1] = wk3i * x0i - wk3r * x0r; + x0r = y1r + y3i; + x0i = y1i - y3r; + a[j3 - 2] = wd3i * x0r + wd3r * x0i; + a[j3 - 1] = wd3i * x0i - wd3r * x0r; + } + wk1r = csc1 * (wd1r + wn4r); + wk1i = csc1 * (wd1i + wn4r); + wk3r = csc3 * (wd3r - wn4r); + wk3i = csc3 * (wd3i - wn4r); + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0 - 2] + a[j2 - 2]; + x0i = a[j0 - 1] + a[j2 - 1]; + x1r = a[j0 - 2] - a[j2 - 2]; + x1i = a[j0 - 1] - a[j2 - 1]; + x2r = a[j1 - 2] + a[j3 - 2]; + x2i = a[j1 - 1] + a[j3 - 1]; + x3r = a[j1 - 2] - a[j3 - 2]; + x3i = a[j1 - 1] - a[j3 - 1]; + a[j0 - 2] = x0r + x2r; + a[j0 - 1] = x0i + x2i; + a[j1 - 2] = x0r - x2r; + a[j1 - 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2 - 2] = wk1r * x0r - wk1i * x0i; + a[j2 - 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 - 2] = wk3r * x0r + wk3i * x0i; + a[j3 - 1] = wk3r * x0i - wk3i * x0r; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wn4r * (x0r - x0i); + a[j2 + 1] = wn4r * (x0i + x0r); + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = -wn4r * (x0r + x0i); + a[j3 + 1] = -wn4r * (x0i - x0r); + x0r = a[j0 + 2] + a[j2 + 2]; + x0i = a[j0 + 3] + a[j2 + 3]; + x1r = a[j0 + 2] - a[j2 + 2]; + x1i = a[j0 + 3] - a[j2 + 3]; + x2r = a[j1 + 2] + a[j3 + 2]; + x2i = a[j1 + 3] + a[j3 + 3]; + x3r = a[j1 + 2] - a[j3 + 2]; + x3i = a[j1 + 3] - a[j3 + 3]; + a[j0 + 2] = x0r + x2r; + a[j0 + 3] = x0i + x2i; + a[j1 + 2] = x0r - x2r; + a[j1 + 3] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2 + 2] = wk1i * x0r - wk1r * x0i; + a[j2 + 3] = wk1i * x0i + wk1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 + 2] = wk3i * x0r + wk3r * x0i; + a[j3 + 3] = wk3i * x0i - wk3r * x0r; +} + + +void cftb1st(int n, double *a, double *w) +{ + int j, j0, j1, j2, j3, k, m, mh; + double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, + wd1r, wd1i, wd3r, wd3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; + + mh = n >> 3; + m = 2 * mh; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] + a[j2]; + x0i = -a[1] - a[j2 + 1]; + x1r = a[0] - a[j2]; + x1i = -a[1] + a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[0] = x0r + x2r; + a[1] = x0i - x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + a[j2] = x1r + x3i; + a[j2 + 1] = x1i + x3r; + a[j3] = x1r - x3i; + a[j3 + 1] = x1i - x3r; + wn4r = w[1]; + csc1 = w[2]; + csc3 = w[3]; + wd1r = 1; + wd1i = 0; + wd3r = 1; + wd3i = 0; + k = 0; + for (j = 2; j < mh - 2; j += 4) { + k += 4; + wk1r = csc1 * (wd1r + w[k]); + wk1i = csc1 * (wd1i + w[k + 1]); + wk3r = csc3 * (wd3r + w[k + 2]); + wk3i = csc3 * (wd3i + w[k + 3]); + wd1r = w[k]; + wd1i = w[k + 1]; + wd3r = w[k + 2]; + wd3i = w[k + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] + a[j2]; + x0i = -a[j + 1] - a[j2 + 1]; + x1r = a[j] - a[j2]; + x1i = -a[j + 1] + a[j2 + 1]; + y0r = a[j + 2] + a[j2 + 2]; + y0i = -a[j + 3] - a[j2 + 3]; + y1r = a[j + 2] - a[j2 + 2]; + y1i = -a[j + 3] + a[j2 + 3]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 + 2] + a[j3 + 2]; + y2i = a[j1 + 3] + a[j3 + 3]; + y3r = a[j1 + 2] - a[j3 + 2]; + y3i = a[j1 + 3] - a[j3 + 3]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j + 2] = y0r + y2r; + a[j + 3] = y0i - y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + a[j1 + 2] = y0r - y2r; + a[j1 + 3] = y0i + y2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2] = wk1r * x0r - wk1i * x0i; + a[j2 + 1] = wk1r * x0i + wk1i * x0r; + x0r = y1r + y3i; + x0i = y1i + y3r; + a[j2 + 2] = wd1r * x0r - wd1i * x0i; + a[j2 + 3] = wd1r * x0i + wd1i * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r + wk3i * x0i; + a[j3 + 1] = wk3r * x0i - wk3i * x0r; + x0r = y1r - y3i; + x0i = y1i - y3r; + a[j3 + 2] = wd3r * x0r + wd3i * x0i; + a[j3 + 3] = wd3r * x0i - wd3i * x0r; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = -a[j0 + 1] - a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = -a[j0 + 1] + a[j2 + 1]; + y0r = a[j0 - 2] + a[j2 - 2]; + y0i = -a[j0 - 1] - a[j2 - 1]; + y1r = a[j0 - 2] - a[j2 - 2]; + y1i = -a[j0 - 1] + a[j2 - 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 - 2] + a[j3 - 2]; + y2i = a[j1 - 1] + a[j3 - 1]; + y3r = a[j1 - 2] - a[j3 - 2]; + y3i = a[j1 - 1] - a[j3 - 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i - x2i; + a[j0 - 2] = y0r + y2r; + a[j0 - 1] = y0i - y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + a[j1 - 2] = y0r - y2r; + a[j1 - 1] = y0i + y2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2] = wk1i * x0r - wk1r * x0i; + a[j2 + 1] = wk1i * x0i + wk1r * x0r; + x0r = y1r + y3i; + x0i = y1i + y3r; + a[j2 - 2] = wd1i * x0r - wd1r * x0i; + a[j2 - 1] = wd1i * x0i + wd1r * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3] = wk3i * x0r + wk3r * x0i; + a[j3 + 1] = wk3i * x0i - wk3r * x0r; + x0r = y1r - y3i; + x0i = y1i - y3r; + a[j3 - 2] = wd3i * x0r + wd3r * x0i; + a[j3 - 1] = wd3i * x0i - wd3r * x0r; + } + wk1r = csc1 * (wd1r + wn4r); + wk1i = csc1 * (wd1i + wn4r); + wk3r = csc3 * (wd3r - wn4r); + wk3i = csc3 * (wd3i - wn4r); + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0 - 2] + a[j2 - 2]; + x0i = -a[j0 - 1] - a[j2 - 1]; + x1r = a[j0 - 2] - a[j2 - 2]; + x1i = -a[j0 - 1] + a[j2 - 1]; + x2r = a[j1 - 2] + a[j3 - 2]; + x2i = a[j1 - 1] + a[j3 - 1]; + x3r = a[j1 - 2] - a[j3 - 2]; + x3i = a[j1 - 1] - a[j3 - 1]; + a[j0 - 2] = x0r + x2r; + a[j0 - 1] = x0i - x2i; + a[j1 - 2] = x0r - x2r; + a[j1 - 1] = x0i + x2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2 - 2] = wk1r * x0r - wk1i * x0i; + a[j2 - 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3 - 2] = wk3r * x0r + wk3i * x0i; + a[j3 - 1] = wk3r * x0i - wk3i * x0r; + x0r = a[j0] + a[j2]; + x0i = -a[j0 + 1] - a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = -a[j0 + 1] + a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i - x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2] = wn4r * (x0r - x0i); + a[j2 + 1] = wn4r * (x0i + x0r); + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3] = -wn4r * (x0r + x0i); + a[j3 + 1] = -wn4r * (x0i - x0r); + x0r = a[j0 + 2] + a[j2 + 2]; + x0i = -a[j0 + 3] - a[j2 + 3]; + x1r = a[j0 + 2] - a[j2 + 2]; + x1i = -a[j0 + 3] + a[j2 + 3]; + x2r = a[j1 + 2] + a[j3 + 2]; + x2i = a[j1 + 3] + a[j3 + 3]; + x3r = a[j1 + 2] - a[j3 + 2]; + x3i = a[j1 + 3] - a[j3 + 3]; + a[j0 + 2] = x0r + x2r; + a[j0 + 3] = x0i - x2i; + a[j1 + 2] = x0r - x2r; + a[j1 + 3] = x0i + x2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2 + 2] = wk1i * x0r - wk1r * x0i; + a[j2 + 3] = wk1i * x0i + wk1r * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3 + 2] = wk3i * x0r + wk3r * x0i; + a[j3 + 3] = wk3i * x0i - wk3r * x0r; +} + + +#ifdef USE_CDFT_THREADS +struct cdft_arg_st { + int n0; + int n; + double *a; + int nw; + double *w; +}; +typedef struct cdft_arg_st cdft_arg_t; + + +void cftrec4_th(int n, double *a, int nw, double *w) +{ + void *cftrec1_th(void *p); + void *cftrec2_th(void *p); + int i, idiv4, m, nthread; + cdft_thread_t th[4]; + cdft_arg_t ag[4]; + + nthread = 2; + idiv4 = 0; + m = n >> 1; + if (n > CDFT_4THREADS_BEGIN_N) { + nthread = 4; + idiv4 = 1; + m >>= 1; + } + for (i = 0; i < nthread; i++) { + ag[i].n0 = n; + ag[i].n = m; + ag[i].a = &a[i * m]; + ag[i].nw = nw; + ag[i].w = w; + if (i != idiv4) { + cdft_thread_create(&th[i], cftrec1_th, &ag[i]); + } else { + cdft_thread_create(&th[i], cftrec2_th, &ag[i]); + } + } + for (i = 0; i < nthread; i++) { + cdft_thread_wait(th[i]); + } +} + + +void *cftrec1_th(void *p) +{ + int cfttree(int n, int j, int k, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftmdl1(int n, double *a, double *w); + int isplt, j, k, m, n, n0, nw; + double *a, *w; + + n0 = ((cdft_arg_t *) p)->n0; + n = ((cdft_arg_t *) p)->n; + a = ((cdft_arg_t *) p)->a; + nw = ((cdft_arg_t *) p)->nw; + w = ((cdft_arg_t *) p)->w; + m = n0; + while (m > 512) { + m >>= 2; + cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); + } + cftleaf(m, 1, &a[n - m], nw, w); + k = 0; + for (j = n - m; j > 0; j -= m) { + k++; + isplt = cfttree(m, j, k, a, nw, w); + cftleaf(m, isplt, &a[j - m], nw, w); + } + return (void *) 0; +} + + +void *cftrec2_th(void *p) +{ + int cfttree(int n, int j, int k, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftmdl2(int n, double *a, double *w); + int isplt, j, k, m, n, n0, nw; + double *a, *w; + + n0 = ((cdft_arg_t *) p)->n0; + n = ((cdft_arg_t *) p)->n; + a = ((cdft_arg_t *) p)->a; + nw = ((cdft_arg_t *) p)->nw; + w = ((cdft_arg_t *) p)->w; + k = 1; + m = n0; + while (m > 512) { + m >>= 2; + k <<= 2; + cftmdl2(m, &a[n - m], &w[nw - m]); + } + cftleaf(m, 0, &a[n - m], nw, w); + k >>= 1; + for (j = n - m; j > 0; j -= m) { + k++; + isplt = cfttree(m, j, k, a, nw, w); + cftleaf(m, isplt, &a[j - m], nw, w); + } + return (void *) 0; +} +#endif /* USE_CDFT_THREADS */ + + +void cftrec4(int n, double *a, int nw, double *w) +{ + int cfttree(int n, int j, int k, double *a, int nw, double *w); + void cftleaf(int n, int isplt, double *a, int nw, double *w); + void cftmdl1(int n, double *a, double *w); + int isplt, j, k, m; + + m = n; + while (m > 512) { + m >>= 2; + cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); + } + cftleaf(m, 1, &a[n - m], nw, w); + k = 0; + for (j = n - m; j > 0; j -= m) { + k++; + isplt = cfttree(m, j, k, a, nw, w); + cftleaf(m, isplt, &a[j - m], nw, w); + } +} + + +int cfttree(int n, int j, int k, double *a, int nw, double *w) +{ + void cftmdl1(int n, double *a, double *w); + void cftmdl2(int n, double *a, double *w); + int i, isplt, m; + + if ((k & 3) != 0) { + isplt = k & 1; + if (isplt != 0) { + cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]); + } else { + cftmdl2(n, &a[j - n], &w[nw - n]); + } + } else { + m = n; + for (i = k; (i & 3) == 0; i >>= 2) { + m <<= 2; + } + isplt = i & 1; + if (isplt != 0) { + while (m > 128) { + cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]); + m >>= 2; + } + } else { + while (m > 128) { + cftmdl2(m, &a[j - m], &w[nw - m]); + m >>= 2; + } + } + } + return isplt; +} + + +void cftleaf(int n, int isplt, double *a, int nw, double *w) +{ + void cftmdl1(int n, double *a, double *w); + void cftmdl2(int n, double *a, double *w); + void cftf161(double *a, double *w); + void cftf162(double *a, double *w); + void cftf081(double *a, double *w); + void cftf082(double *a, double *w); + + if (n == 512) { + cftmdl1(128, a, &w[nw - 64]); + cftf161(a, &w[nw - 8]); + cftf162(&a[32], &w[nw - 32]); + cftf161(&a[64], &w[nw - 8]); + cftf161(&a[96], &w[nw - 8]); + cftmdl2(128, &a[128], &w[nw - 128]); + cftf161(&a[128], &w[nw - 8]); + cftf162(&a[160], &w[nw - 32]); + cftf161(&a[192], &w[nw - 8]); + cftf162(&a[224], &w[nw - 32]); + cftmdl1(128, &a[256], &w[nw - 64]); + cftf161(&a[256], &w[nw - 8]); + cftf162(&a[288], &w[nw - 32]); + cftf161(&a[320], &w[nw - 8]); + cftf161(&a[352], &w[nw - 8]); + if (isplt != 0) { + cftmdl1(128, &a[384], &w[nw - 64]); + cftf161(&a[480], &w[nw - 8]); + } else { + cftmdl2(128, &a[384], &w[nw - 128]); + cftf162(&a[480], &w[nw - 32]); + } + cftf161(&a[384], &w[nw - 8]); + cftf162(&a[416], &w[nw - 32]); + cftf161(&a[448], &w[nw - 8]); + } else { + cftmdl1(64, a, &w[nw - 32]); + cftf081(a, &w[nw - 8]); + cftf082(&a[16], &w[nw - 8]); + cftf081(&a[32], &w[nw - 8]); + cftf081(&a[48], &w[nw - 8]); + cftmdl2(64, &a[64], &w[nw - 64]); + cftf081(&a[64], &w[nw - 8]); + cftf082(&a[80], &w[nw - 8]); + cftf081(&a[96], &w[nw - 8]); + cftf082(&a[112], &w[nw - 8]); + cftmdl1(64, &a[128], &w[nw - 32]); + cftf081(&a[128], &w[nw - 8]); + cftf082(&a[144], &w[nw - 8]); + cftf081(&a[160], &w[nw - 8]); + cftf081(&a[176], &w[nw - 8]); + if (isplt != 0) { + cftmdl1(64, &a[192], &w[nw - 32]); + cftf081(&a[240], &w[nw - 8]); + } else { + cftmdl2(64, &a[192], &w[nw - 64]); + cftf082(&a[240], &w[nw - 8]); + } + cftf081(&a[192], &w[nw - 8]); + cftf082(&a[208], &w[nw - 8]); + cftf081(&a[224], &w[nw - 8]); + } +} + + +void cftmdl1(int n, double *a, double *w) +{ + int j, j0, j1, j2, j3, k, m, mh; + double wn4r, wk1r, wk1i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + mh = n >> 3; + m = 2 * mh; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] + a[j2]; + x0i = a[1] + a[j2 + 1]; + x1r = a[0] - a[j2]; + x1i = a[1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j2] = x1r - x3i; + a[j2 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + wn4r = w[1]; + k = 0; + for (j = 2; j < mh; j += 2) { + k += 4; + wk1r = w[k]; + wk1i = w[k + 1]; + wk3r = w[k + 2]; + wk3i = w[k + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] + a[j2]; + x0i = a[j + 1] + a[j2 + 1]; + x1r = a[j] - a[j2]; + x1i = a[j + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1r * x0r - wk1i * x0i; + a[j2 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r + wk3i * x0i; + a[j3 + 1] = wk3r * x0i - wk3i * x0r; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1i * x0r - wk1r * x0i; + a[j2 + 1] = wk1i * x0i + wk1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3i * x0r + wk3r * x0i; + a[j3 + 1] = wk3i * x0i - wk3r * x0r; + } + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wn4r * (x0r - x0i); + a[j2 + 1] = wn4r * (x0i + x0r); + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = -wn4r * (x0r + x0i); + a[j3 + 1] = -wn4r * (x0i - x0r); +} + + +void cftmdl2(int n, double *a, double *w) +{ + int j, j0, j1, j2, j3, k, kr, m, mh; + double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i; + + mh = n >> 3; + m = 2 * mh; + wn4r = w[1]; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] - a[j2 + 1]; + x0i = a[1] + a[j2]; + x1r = a[0] + a[j2 + 1]; + x1i = a[1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wn4r * (x2r - x2i); + y0i = wn4r * (x2i + x2r); + a[0] = x0r + y0r; + a[1] = x0i + y0i; + a[j1] = x0r - y0r; + a[j1 + 1] = x0i - y0i; + y0r = wn4r * (x3r - x3i); + y0i = wn4r * (x3i + x3r); + a[j2] = x1r - y0i; + a[j2 + 1] = x1i + y0r; + a[j3] = x1r + y0i; + a[j3 + 1] = x1i - y0r; + k = 0; + kr = 2 * m; + for (j = 2; j < mh; j += 2) { + k += 4; + wk1r = w[k]; + wk1i = w[k + 1]; + wk3r = w[k + 2]; + wk3i = w[k + 3]; + kr -= 4; + wd1i = w[kr]; + wd1r = w[kr + 1]; + wd3i = w[kr + 2]; + wd3r = w[kr + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] - a[j2 + 1]; + x0i = a[j + 1] + a[j2]; + x1r = a[j] + a[j2 + 1]; + x1i = a[j + 1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wk1r * x0r - wk1i * x0i; + y0i = wk1r * x0i + wk1i * x0r; + y2r = wd1r * x2r - wd1i * x2i; + y2i = wd1r * x2i + wd1i * x2r; + a[j] = y0r + y2r; + a[j + 1] = y0i + y2i; + a[j1] = y0r - y2r; + a[j1 + 1] = y0i - y2i; + y0r = wk3r * x1r + wk3i * x1i; + y0i = wk3r * x1i - wk3i * x1r; + y2r = wd3r * x3r + wd3i * x3i; + y2i = wd3r * x3i - wd3i * x3r; + a[j2] = y0r + y2r; + a[j2 + 1] = y0i + y2i; + a[j3] = y0r - y2r; + a[j3 + 1] = y0i - y2i; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] - a[j2 + 1]; + x0i = a[j0 + 1] + a[j2]; + x1r = a[j0] + a[j2 + 1]; + x1i = a[j0 + 1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wd1i * x0r - wd1r * x0i; + y0i = wd1i * x0i + wd1r * x0r; + y2r = wk1i * x2r - wk1r * x2i; + y2i = wk1i * x2i + wk1r * x2r; + a[j0] = y0r + y2r; + a[j0 + 1] = y0i + y2i; + a[j1] = y0r - y2r; + a[j1 + 1] = y0i - y2i; + y0r = wd3i * x1r + wd3r * x1i; + y0i = wd3i * x1i - wd3r * x1r; + y2r = wk3i * x3r + wk3r * x3i; + y2i = wk3i * x3i - wk3r * x3r; + a[j2] = y0r + y2r; + a[j2 + 1] = y0i + y2i; + a[j3] = y0r - y2r; + a[j3 + 1] = y0i - y2i; + } + wk1r = w[m]; + wk1i = w[m + 1]; + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] - a[j2 + 1]; + x0i = a[j0 + 1] + a[j2]; + x1r = a[j0] + a[j2 + 1]; + x1i = a[j0 + 1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wk1r * x0r - wk1i * x0i; + y0i = wk1r * x0i + wk1i * x0r; + y2r = wk1i * x2r - wk1r * x2i; + y2i = wk1i * x2i + wk1r * x2r; + a[j0] = y0r + y2r; + a[j0 + 1] = y0i + y2i; + a[j1] = y0r - y2r; + a[j1 + 1] = y0i - y2i; + y0r = wk1i * x1r - wk1r * x1i; + y0i = wk1i * x1i + wk1r * x1r; + y2r = wk1r * x3r - wk1i * x3i; + y2i = wk1r * x3i + wk1i * x3r; + a[j2] = y0r - y2r; + a[j2 + 1] = y0i - y2i; + a[j3] = y0r + y2r; + a[j3 + 1] = y0i + y2i; +} + + +void cftfx41(int n, double *a, int nw, double *w) +{ + void cftf161(double *a, double *w); + void cftf162(double *a, double *w); + void cftf081(double *a, double *w); + void cftf082(double *a, double *w); + + if (n == 128) { + cftf161(a, &w[nw - 8]); + cftf162(&a[32], &w[nw - 32]); + cftf161(&a[64], &w[nw - 8]); + cftf161(&a[96], &w[nw - 8]); + } else { + cftf081(a, &w[nw - 8]); + cftf082(&a[16], &w[nw - 8]); + cftf081(&a[32], &w[nw - 8]); + cftf081(&a[48], &w[nw - 8]); + } +} + + +void cftf161(double *a, double *w) +{ + double wn4r, wk1r, wk1i, + x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, + y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, + y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, + y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; + + wn4r = w[1]; + wk1r = w[2]; + wk1i = w[3]; + x0r = a[0] + a[16]; + x0i = a[1] + a[17]; + x1r = a[0] - a[16]; + x1i = a[1] - a[17]; + x2r = a[8] + a[24]; + x2i = a[9] + a[25]; + x3r = a[8] - a[24]; + x3i = a[9] - a[25]; + y0r = x0r + x2r; + y0i = x0i + x2i; + y4r = x0r - x2r; + y4i = x0i - x2i; + y8r = x1r - x3i; + y8i = x1i + x3r; + y12r = x1r + x3i; + y12i = x1i - x3r; + x0r = a[2] + a[18]; + x0i = a[3] + a[19]; + x1r = a[2] - a[18]; + x1i = a[3] - a[19]; + x2r = a[10] + a[26]; + x2i = a[11] + a[27]; + x3r = a[10] - a[26]; + x3i = a[11] - a[27]; + y1r = x0r + x2r; + y1i = x0i + x2i; + y5r = x0r - x2r; + y5i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + y9r = wk1r * x0r - wk1i * x0i; + y9i = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + y13r = wk1i * x0r - wk1r * x0i; + y13i = wk1i * x0i + wk1r * x0r; + x0r = a[4] + a[20]; + x0i = a[5] + a[21]; + x1r = a[4] - a[20]; + x1i = a[5] - a[21]; + x2r = a[12] + a[28]; + x2i = a[13] + a[29]; + x3r = a[12] - a[28]; + x3i = a[13] - a[29]; + y2r = x0r + x2r; + y2i = x0i + x2i; + y6r = x0r - x2r; + y6i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + y10r = wn4r * (x0r - x0i); + y10i = wn4r * (x0i + x0r); + x0r = x1r + x3i; + x0i = x1i - x3r; + y14r = wn4r * (x0r + x0i); + y14i = wn4r * (x0i - x0r); + x0r = a[6] + a[22]; + x0i = a[7] + a[23]; + x1r = a[6] - a[22]; + x1i = a[7] - a[23]; + x2r = a[14] + a[30]; + x2i = a[15] + a[31]; + x3r = a[14] - a[30]; + x3i = a[15] - a[31]; + y3r = x0r + x2r; + y3i = x0i + x2i; + y7r = x0r - x2r; + y7i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + y11r = wk1i * x0r - wk1r * x0i; + y11i = wk1i * x0i + wk1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + y15r = wk1r * x0r - wk1i * x0i; + y15i = wk1r * x0i + wk1i * x0r; + x0r = y12r - y14r; + x0i = y12i - y14i; + x1r = y12r + y14r; + x1i = y12i + y14i; + x2r = y13r - y15r; + x2i = y13i - y15i; + x3r = y13r + y15r; + x3i = y13i + y15i; + a[24] = x0r + x2r; + a[25] = x0i + x2i; + a[26] = x0r - x2r; + a[27] = x0i - x2i; + a[28] = x1r - x3i; + a[29] = x1i + x3r; + a[30] = x1r + x3i; + a[31] = x1i - x3r; + x0r = y8r + y10r; + x0i = y8i + y10i; + x1r = y8r - y10r; + x1i = y8i - y10i; + x2r = y9r + y11r; + x2i = y9i + y11i; + x3r = y9r - y11r; + x3i = y9i - y11i; + a[16] = x0r + x2r; + a[17] = x0i + x2i; + a[18] = x0r - x2r; + a[19] = x0i - x2i; + a[20] = x1r - x3i; + a[21] = x1i + x3r; + a[22] = x1r + x3i; + a[23] = x1i - x3r; + x0r = y5r - y7i; + x0i = y5i + y7r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + x0r = y5r + y7i; + x0i = y5i - y7r; + x3r = wn4r * (x0r - x0i); + x3i = wn4r * (x0i + x0r); + x0r = y4r - y6i; + x0i = y4i + y6r; + x1r = y4r + y6i; + x1i = y4i - y6r; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[10] = x0r - x2r; + a[11] = x0i - x2i; + a[12] = x1r - x3i; + a[13] = x1i + x3r; + a[14] = x1r + x3i; + a[15] = x1i - x3r; + x0r = y0r + y2r; + x0i = y0i + y2i; + x1r = y0r - y2r; + x1i = y0i - y2i; + x2r = y1r + y3r; + x2i = y1i + y3i; + x3r = y1r - y3r; + x3i = y1i - y3i; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[2] = x0r - x2r; + a[3] = x0i - x2i; + a[4] = x1r - x3i; + a[5] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; +} + + +void cftf162(double *a, double *w) +{ + double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, + x0r, x0i, x1r, x1i, x2r, x2i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, + y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, + y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, + y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; + + wn4r = w[1]; + wk1r = w[4]; + wk1i = w[5]; + wk3r = w[6]; + wk3i = -w[7]; + wk2r = w[8]; + wk2i = w[9]; + x1r = a[0] - a[17]; + x1i = a[1] + a[16]; + x0r = a[8] - a[25]; + x0i = a[9] + a[24]; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + y0r = x1r + x2r; + y0i = x1i + x2i; + y4r = x1r - x2r; + y4i = x1i - x2i; + x1r = a[0] + a[17]; + x1i = a[1] - a[16]; + x0r = a[8] + a[25]; + x0i = a[9] - a[24]; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + y8r = x1r - x2i; + y8i = x1i + x2r; + y12r = x1r + x2i; + y12i = x1i - x2r; + x0r = a[2] - a[19]; + x0i = a[3] + a[18]; + x1r = wk1r * x0r - wk1i * x0i; + x1i = wk1r * x0i + wk1i * x0r; + x0r = a[10] - a[27]; + x0i = a[11] + a[26]; + x2r = wk3i * x0r - wk3r * x0i; + x2i = wk3i * x0i + wk3r * x0r; + y1r = x1r + x2r; + y1i = x1i + x2i; + y5r = x1r - x2r; + y5i = x1i - x2i; + x0r = a[2] + a[19]; + x0i = a[3] - a[18]; + x1r = wk3r * x0r - wk3i * x0i; + x1i = wk3r * x0i + wk3i * x0r; + x0r = a[10] + a[27]; + x0i = a[11] - a[26]; + x2r = wk1r * x0r + wk1i * x0i; + x2i = wk1r * x0i - wk1i * x0r; + y9r = x1r - x2r; + y9i = x1i - x2i; + y13r = x1r + x2r; + y13i = x1i + x2i; + x0r = a[4] - a[21]; + x0i = a[5] + a[20]; + x1r = wk2r * x0r - wk2i * x0i; + x1i = wk2r * x0i + wk2i * x0r; + x0r = a[12] - a[29]; + x0i = a[13] + a[28]; + x2r = wk2i * x0r - wk2r * x0i; + x2i = wk2i * x0i + wk2r * x0r; + y2r = x1r + x2r; + y2i = x1i + x2i; + y6r = x1r - x2r; + y6i = x1i - x2i; + x0r = a[4] + a[21]; + x0i = a[5] - a[20]; + x1r = wk2i * x0r - wk2r * x0i; + x1i = wk2i * x0i + wk2r * x0r; + x0r = a[12] + a[29]; + x0i = a[13] - a[28]; + x2r = wk2r * x0r - wk2i * x0i; + x2i = wk2r * x0i + wk2i * x0r; + y10r = x1r - x2r; + y10i = x1i - x2i; + y14r = x1r + x2r; + y14i = x1i + x2i; + x0r = a[6] - a[23]; + x0i = a[7] + a[22]; + x1r = wk3r * x0r - wk3i * x0i; + x1i = wk3r * x0i + wk3i * x0r; + x0r = a[14] - a[31]; + x0i = a[15] + a[30]; + x2r = wk1i * x0r - wk1r * x0i; + x2i = wk1i * x0i + wk1r * x0r; + y3r = x1r + x2r; + y3i = x1i + x2i; + y7r = x1r - x2r; + y7i = x1i - x2i; + x0r = a[6] + a[23]; + x0i = a[7] - a[22]; + x1r = wk1i * x0r + wk1r * x0i; + x1i = wk1i * x0i - wk1r * x0r; + x0r = a[14] + a[31]; + x0i = a[15] - a[30]; + x2r = wk3i * x0r - wk3r * x0i; + x2i = wk3i * x0i + wk3r * x0r; + y11r = x1r + x2r; + y11i = x1i + x2i; + y15r = x1r - x2r; + y15i = x1i - x2i; + x1r = y0r + y2r; + x1i = y0i + y2i; + x2r = y1r + y3r; + x2i = y1i + y3i; + a[0] = x1r + x2r; + a[1] = x1i + x2i; + a[2] = x1r - x2r; + a[3] = x1i - x2i; + x1r = y0r - y2r; + x1i = y0i - y2i; + x2r = y1r - y3r; + x2i = y1i - y3i; + a[4] = x1r - x2i; + a[5] = x1i + x2r; + a[6] = x1r + x2i; + a[7] = x1i - x2r; + x1r = y4r - y6i; + x1i = y4i + y6r; + x0r = y5r - y7i; + x0i = y5i + y7r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[8] = x1r + x2r; + a[9] = x1i + x2i; + a[10] = x1r - x2r; + a[11] = x1i - x2i; + x1r = y4r + y6i; + x1i = y4i - y6r; + x0r = y5r + y7i; + x0i = y5i - y7r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[12] = x1r - x2i; + a[13] = x1i + x2r; + a[14] = x1r + x2i; + a[15] = x1i - x2r; + x1r = y8r + y10r; + x1i = y8i + y10i; + x2r = y9r - y11r; + x2i = y9i - y11i; + a[16] = x1r + x2r; + a[17] = x1i + x2i; + a[18] = x1r - x2r; + a[19] = x1i - x2i; + x1r = y8r - y10r; + x1i = y8i - y10i; + x2r = y9r + y11r; + x2i = y9i + y11i; + a[20] = x1r - x2i; + a[21] = x1i + x2r; + a[22] = x1r + x2i; + a[23] = x1i - x2r; + x1r = y12r - y14i; + x1i = y12i + y14r; + x0r = y13r + y15i; + x0i = y13i - y15r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[24] = x1r + x2r; + a[25] = x1i + x2i; + a[26] = x1r - x2r; + a[27] = x1i - x2i; + x1r = y12r + y14i; + x1i = y12i - y14r; + x0r = y13r - y15i; + x0i = y13i + y15r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[28] = x1r - x2i; + a[29] = x1i + x2r; + a[30] = x1r + x2i; + a[31] = x1i - x2r; +} + + +void cftf081(double *a, double *w) +{ + double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, + y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; + + wn4r = w[1]; + x0r = a[0] + a[8]; + x0i = a[1] + a[9]; + x1r = a[0] - a[8]; + x1i = a[1] - a[9]; + x2r = a[4] + a[12]; + x2i = a[5] + a[13]; + x3r = a[4] - a[12]; + x3i = a[5] - a[13]; + y0r = x0r + x2r; + y0i = x0i + x2i; + y2r = x0r - x2r; + y2i = x0i - x2i; + y1r = x1r - x3i; + y1i = x1i + x3r; + y3r = x1r + x3i; + y3i = x1i - x3r; + x0r = a[2] + a[10]; + x0i = a[3] + a[11]; + x1r = a[2] - a[10]; + x1i = a[3] - a[11]; + x2r = a[6] + a[14]; + x2i = a[7] + a[15]; + x3r = a[6] - a[14]; + x3i = a[7] - a[15]; + y4r = x0r + x2r; + y4i = x0i + x2i; + y6r = x0r - x2r; + y6i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + x2r = x1r + x3i; + x2i = x1i - x3r; + y5r = wn4r * (x0r - x0i); + y5i = wn4r * (x0r + x0i); + y7r = wn4r * (x2r - x2i); + y7i = wn4r * (x2r + x2i); + a[8] = y1r + y5r; + a[9] = y1i + y5i; + a[10] = y1r - y5r; + a[11] = y1i - y5i; + a[12] = y3r - y7i; + a[13] = y3i + y7r; + a[14] = y3r + y7i; + a[15] = y3i - y7r; + a[0] = y0r + y4r; + a[1] = y0i + y4i; + a[2] = y0r - y4r; + a[3] = y0i - y4i; + a[4] = y2r - y6i; + a[5] = y2i + y6r; + a[6] = y2r + y6i; + a[7] = y2i - y6r; +} + + +void cftf082(double *a, double *w) +{ + double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, + y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; + + wn4r = w[1]; + wk1r = w[2]; + wk1i = w[3]; + y0r = a[0] - a[9]; + y0i = a[1] + a[8]; + y1r = a[0] + a[9]; + y1i = a[1] - a[8]; + x0r = a[4] - a[13]; + x0i = a[5] + a[12]; + y2r = wn4r * (x0r - x0i); + y2i = wn4r * (x0i + x0r); + x0r = a[4] + a[13]; + x0i = a[5] - a[12]; + y3r = wn4r * (x0r - x0i); + y3i = wn4r * (x0i + x0r); + x0r = a[2] - a[11]; + x0i = a[3] + a[10]; + y4r = wk1r * x0r - wk1i * x0i; + y4i = wk1r * x0i + wk1i * x0r; + x0r = a[2] + a[11]; + x0i = a[3] - a[10]; + y5r = wk1i * x0r - wk1r * x0i; + y5i = wk1i * x0i + wk1r * x0r; + x0r = a[6] - a[15]; + x0i = a[7] + a[14]; + y6r = wk1i * x0r - wk1r * x0i; + y6i = wk1i * x0i + wk1r * x0r; + x0r = a[6] + a[15]; + x0i = a[7] - a[14]; + y7r = wk1r * x0r - wk1i * x0i; + y7i = wk1r * x0i + wk1i * x0r; + x0r = y0r + y2r; + x0i = y0i + y2i; + x1r = y4r + y6r; + x1i = y4i + y6i; + a[0] = x0r + x1r; + a[1] = x0i + x1i; + a[2] = x0r - x1r; + a[3] = x0i - x1i; + x0r = y0r - y2r; + x0i = y0i - y2i; + x1r = y4r - y6r; + x1i = y4i - y6i; + a[4] = x0r - x1i; + a[5] = x0i + x1r; + a[6] = x0r + x1i; + a[7] = x0i - x1r; + x0r = y1r - y3i; + x0i = y1i + y3r; + x1r = y5r - y7r; + x1i = y5i - y7i; + a[8] = x0r + x1r; + a[9] = x0i + x1i; + a[10] = x0r - x1r; + a[11] = x0i - x1i; + x0r = y1r + y3i; + x0i = y1i - y3r; + x1r = y5r + y7r; + x1i = y5i + y7i; + a[12] = x0r - x1i; + a[13] = x0i + x1r; + a[14] = x0r + x1i; + a[15] = x0i - x1r; +} + + +void cftf040(double *a) +{ + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[4]; + x0i = a[1] + a[5]; + x1r = a[0] - a[4]; + x1i = a[1] - a[5]; + x2r = a[2] + a[6]; + x2i = a[3] + a[7]; + x3r = a[2] - a[6]; + x3i = a[3] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[6] = x1r + x3i; + a[7] = x1i - x3r; +} + + +void cftb040(double *a) +{ + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[4]; + x0i = a[1] + a[5]; + x1r = a[0] - a[4]; + x1i = a[1] - a[5]; + x2r = a[2] + a[6]; + x2i = a[3] + a[7]; + x3r = a[2] - a[6]; + x3i = a[3] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[2] = x1r + x3i; + a[3] = x1i - x3r; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[6] = x1r - x3i; + a[7] = x1i + x3r; +} + + +void cftx020(double *a) +{ + double x0r, x0i; + + x0r = a[0] - a[2]; + x0i = a[1] - a[3]; + a[0] += a[2]; + a[1] += a[3]; + a[2] = x0r; + a[3] = x0i; +} + + +void rftfsub(int n, double *a, int nc, double *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5 - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + + +void rftbsub(int n, double *a, int nc, double *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5 - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + + +void dctsub(int n, double *a, int nc, double *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[j] - wkr * a[k]; + a[j] = wkr * a[j] + wki * a[k]; + a[k] = xr; + } + a[m] *= c[0]; +} + + +void dstsub(int n, double *a, int nc, double *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[k] - wkr * a[j]; + a[k] = wkr * a[k] + wki * a[j]; + a[j] = xr; + } + a[m] *= c[0]; +} + diff --git a/src/fft2d/fft2d/fftsg.f b/src/fft2d/fft2d/fftsg.f new file mode 100644 index 0000000..c676066 --- /dev/null +++ b/src/fft2d/fft2d/fftsg.f @@ -0,0 +1,2967 @@ +! Fast Fourier/Cosine/Sine Transform +! dimension :one +! data length :power of 2 +! decimation :frequency +! radix :split-radix +! data :inplace +! table :use +! subroutines +! cdft: Complex Discrete Fourier Transform +! rdft: Real Discrete Fourier Transform +! ddct: Discrete Cosine Transform +! ddst: Discrete Sine Transform +! dfct: Cosine Transform of RDFT (Real Symmetric DFT) +! dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) +! +! +! -------- Complex DFT (Discrete Fourier Transform) -------- +! [definition] +! <case1> +! X(k) = sum_j=0^n-1 x(j)*exp(2*pi*i*j*k/n), 0<=k<n +! <case2> +! X(k) = sum_j=0^n-1 x(j)*exp(-2*pi*i*j*k/n), 0<=k<n +! (notes: sum_j=0^n-1 is a summation from j=0 to n-1) +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call cdft(2*n, 1, a, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call cdft(2*n, -1, a, ip, w) +! [parameters] +! 2*n :data length (integer) +! n >= 1, n = power of 2 +! a(0:2*n-1) :input/output data (real*8) +! input data +! a(2*j) = Re(x(j)), +! a(2*j+1) = Im(x(j)), 0<=j<n +! output data +! a(2*k) = Re(X(k)), +! a(2*k+1) = Im(X(k)), 0<=k<n +! ip(0:*) :work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! strictly, +! length of ip >= +! 2+2**(int(log(n+0.5)/log(2.0))/2). +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:n/2-1) :cos/sin table (real*8) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call cdft(2*n, -1, a, ip, w) +! is +! call cdft(2*n, 1, a, ip, w) +! do j = 0, 2 * n - 1 +! a(j) = a(j) / n +! end do +! . +! +! +! -------- Real DFT / Inverse of Real DFT -------- +! [definition] +! <case1> RDFT +! R(k) = sum_j=0^n-1 a(j)*cos(2*pi*j*k/n), 0<=k<=n/2 +! I(k) = sum_j=0^n-1 a(j)*sin(2*pi*j*k/n), 0<k<n/2 +! <case2> IRDFT (excluding scale) +! a(k) = (R(0) + R(n/2)*cos(pi*k))/2 + +! sum_j=1^n/2-1 R(j)*cos(2*pi*j*k/n) + +! sum_j=1^n/2-1 I(j)*sin(2*pi*j*k/n), 0<=k<n +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call rdft(n, 1, a, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call rdft(n, -1, a, ip, w) +! [parameters] +! n :data length (integer) +! n >= 2, n = power of 2 +! a(0:n-1) :input/output data (real*8) +! <case1> +! output data +! a(2*k) = R(k), 0<=k<n/2 +! a(2*k+1) = I(k), 0<k<n/2 +! a(1) = R(n/2) +! <case2> +! input data +! a(2*j) = R(j), 0<=j<n/2 +! a(2*j+1) = I(j), 0<j<n/2 +! a(1) = R(n/2) +! ip(0:*) :work area for bit reversal (integer) +! length of ip >= 2+sqrt(n/2) +! strictly, +! length of ip >= +! 2+2**(int(log(n/2+0.5)/log(2.0))/2). +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:n/2-1) :cos/sin table (real*8) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call rdft(n, 1, a, ip, w) +! is +! call rdft(n, -1, a, ip, w) +! do j = 0, n - 1 +! a(j) = a(j) * 2 / n +! end do +! . +! +! +! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- +! [definition] +! <case1> IDCT (excluding scale) +! C(k) = sum_j=0^n-1 a(j)*cos(pi*j*(k+1/2)/n), 0<=k<n +! <case2> DCT +! C(k) = sum_j=0^n-1 a(j)*cos(pi*(j+1/2)*k/n), 0<=k<n +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call ddct(n, 1, a, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call ddct(n, -1, a, ip, w) +! [parameters] +! n :data length (integer) +! n >= 2, n = power of 2 +! a(0:n-1) :input/output data (real*8) +! output data +! a(k) = C(k), 0<=k<n +! ip(0:*) :work area for bit reversal (integer) +! length of ip >= 2+sqrt(n/2) +! strictly, +! length of ip >= +! 2+2**(int(log(n/2+0.5)/log(2.0))/2). +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:n*5/4-1) :cos/sin table (real*8) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call ddct(n, -1, a, ip, w) +! is +! a(0) = a(0) / 2 +! call ddct(n, 1, a, ip, w) +! do j = 0, n - 1 +! a(j) = a(j) * 2 / n +! end do +! . +! +! +! -------- DST (Discrete Sine Transform) / Inverse of DST -------- +! [definition] +! <case1> IDST (excluding scale) +! S(k) = sum_j=1^n A(j)*sin(pi*j*(k+1/2)/n), 0<=k<n +! <case2> DST +! S(k) = sum_j=0^n-1 a(j)*sin(pi*(j+1/2)*k/n), 0<k<=n +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call ddst(n, 1, a, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call ddst(n, -1, a, ip, w) +! [parameters] +! n :data length (integer) +! n >= 2, n = power of 2 +! a(0:n-1) :input/output data (real*8) +! <case1> +! input data +! a(j) = A(j), 0<j<n +! a(0) = A(n) +! output data +! a(k) = S(k), 0<=k<n +! <case2> +! output data +! a(k) = S(k), 0<k<n +! a(0) = S(n) +! ip(0:*) :work area for bit reversal (integer) +! length of ip >= 2+sqrt(n/2) +! strictly, +! length of ip >= +! 2+2**(int(log(n/2+0.5)/log(2.0))/2). +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:n*5/4-1) :cos/sin table (real*8) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call ddst(n, -1, a, ip, w) +! is +! a(0) = a(0) / 2 +! call ddst(n, 1, a, ip, w) +! do j = 0, n - 1 +! a(j) = a(j) * 2 / n +! end do +! . +! +! +! -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- +! [definition] +! C(k) = sum_j=0^n a(j)*cos(pi*j*k/n), 0<=k<=n +! [usage] +! ip(0) = 0 ! first time only +! call dfct(n, a, t, ip, w) +! [parameters] +! n :data length - 1 (integer) +! n >= 2, n = power of 2 +! a(0:n) :input/output data (real*8) +! output data +! a(k) = C(k), 0<=k<=n +! t(0:n/2) :work area (real*8) +! ip(0:*) :work area for bit reversal (integer) +! length of ip >= 2+sqrt(n/4) +! strictly, +! length of ip >= +! 2+2**(int(log(n/4+0.5)/log(2.0))/2). +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:n*5/8-1) :cos/sin table (real*8) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! a(0) = a(0) / 2 +! a(n) = a(n) / 2 +! call dfct(n, a, t, ip, w) +! is +! a(0) = a(0) / 2 +! a(n) = a(n) / 2 +! call dfct(n, a, t, ip, w) +! do j = 0, n +! a(j) = a(j) * 2 / n +! end do +! . +! +! +! -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- +! [definition] +! S(k) = sum_j=1^n-1 a(j)*sin(pi*j*k/n), 0<k<n +! [usage] +! ip(0) = 0 ! first time only +! call dfst(n, a, t, ip, w) +! [parameters] +! n :data length + 1 (integer) +! n >= 2, n = power of 2 +! a(0:n-1) :input/output data (real*8) +! output data +! a(k) = S(k), 0<k<n +! (a(0) is used for work area) +! t(0:n/2-1) :work area (real*8) +! ip(0:*) :work area for bit reversal (integer) +! length of ip >= 2+sqrt(n/4) +! strictly, +! length of ip >= +! 2+2**(int(log(n/4+0.5)/log(2.0))/2). +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:n*5/8-1) :cos/sin table (real*8) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call dfst(n, a, t, ip, w) +! is +! call dfst(n, a, t, ip, w) +! do j = 1, n - 1 +! a(j) = a(j) * 2 / n +! end do +! . +! +! +! Appendix : +! The cos/sin table is recalculated when the larger table required. +! w() and ip() are compatible with all routines. +! +! + subroutine cdft(n, isgn, a, ip, w) + integer n, isgn, ip(0 : *), nw + real*8 a(0 : n - 1), w(0 : *) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + if (isgn .ge. 0) then + call cftfsub(n, a, ip, nw, w) + else + call cftbsub(n, a, ip, nw, w) + end if + end +! + subroutine rdft(n, isgn, a, ip, w) + integer n, isgn, ip(0 : *), nw, nc + real*8 a(0 : n - 1), w(0 : *), xi + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n .gt. 4 * nc) then + nc = n / 4 + call makect(nc, ip, w(nw)) + end if + if (isgn .ge. 0) then + if (n .gt. 4) then + call cftfsub(n, a, ip, nw, w) + call rftfsub(n, a, nc, w(nw)) + else if (n .eq. 4) then + call cftfsub(n, a, ip, nw, w) + end if + xi = a(0) - a(1) + a(0) = a(0) + a(1) + a(1) = xi + else + a(1) = 0.5d0 * (a(0) - a(1)) + a(0) = a(0) - a(1) + if (n .gt. 4) then + call rftbsub(n, a, nc, w(nw)) + call cftbsub(n, a, ip, nw, w) + else if (n .eq. 4) then + call cftbsub(n, a, ip, nw, w) + end if + end if + end +! + subroutine ddct(n, isgn, a, ip, w) + integer n, isgn, ip(0 : *), j, nw, nc + real*8 a(0 : n - 1), w(0 : *), xr + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n .gt. nc) then + nc = n + call makect(nc, ip, w(nw)) + end if + if (isgn .lt. 0) then + xr = a(n - 1) + do j = n - 2, 2, -2 + a(j + 1) = a(j) - a(j - 1) + a(j) = a(j) + a(j - 1) + end do + a(1) = a(0) - xr + a(0) = a(0) + xr + if (n .gt. 4) then + call rftbsub(n, a, nc, w(nw)) + call cftbsub(n, a, ip, nw, w) + else if (n .eq. 4) then + call cftbsub(n, a, ip, nw, w) + end if + end if + call dctsub(n, a, nc, w(nw)) + if (isgn .ge. 0) then + if (n .gt. 4) then + call cftfsub(n, a, ip, nw, w) + call rftfsub(n, a, nc, w(nw)) + else if (n .eq. 4) then + call cftfsub(n, a, ip, nw, w) + end if + xr = a(0) - a(1) + a(0) = a(0) + a(1) + do j = 2, n - 2, 2 + a(j - 1) = a(j) - a(j + 1) + a(j) = a(j) + a(j + 1) + end do + a(n - 1) = xr + end if + end +! + subroutine ddst(n, isgn, a, ip, w) + integer n, isgn, ip(0 : *), j, nw, nc + real*8 a(0 : n - 1), w(0 : *), xr + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n .gt. nc) then + nc = n + call makect(nc, ip, w(nw)) + end if + if (isgn .lt. 0) then + xr = a(n - 1) + do j = n - 2, 2, -2 + a(j + 1) = -a(j) - a(j - 1) + a(j) = a(j) - a(j - 1) + end do + a(1) = a(0) + xr + a(0) = a(0) - xr + if (n .gt. 4) then + call rftbsub(n, a, nc, w(nw)) + call cftbsub(n, a, ip, nw, w) + else if (n .eq. 4) then + call cftbsub(n, a, ip, nw, w) + end if + end if + call dstsub(n, a, nc, w(nw)) + if (isgn .ge. 0) then + if (n .gt. 4) then + call cftfsub(n, a, ip, nw, w) + call rftfsub(n, a, nc, w(nw)) + else if (n .eq. 4) then + call cftfsub(n, a, ip, nw, w) + end if + xr = a(0) - a(1) + a(0) = a(0) + a(1) + do j = 2, n - 2, 2 + a(j - 1) = -a(j) - a(j + 1) + a(j) = a(j) - a(j + 1) + end do + a(n - 1) = -xr + end if + end +! + subroutine dfct(n, a, t, ip, w) + integer n, ip(0 : *), j, k, l, m, mh, nw, nc + real*8 a(0 : n), t(0 : n / 2), w(0 : *), xr, xi, yr, yi + nw = ip(0) + if (n .gt. 8 * nw) then + nw = n / 8 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n .gt. 2 * nc) then + nc = n / 2 + call makect(nc, ip, w(nw)) + end if + m = n / 2 + yi = a(m) + xi = a(0) + a(n) + a(0) = a(0) - a(n) + t(0) = xi - yi + t(m) = xi + yi + if (n .gt. 2) then + mh = m / 2 + do j = 1, mh - 1 + k = m - j + xr = a(j) - a(n - j) + xi = a(j) + a(n - j) + yr = a(k) - a(n - k) + yi = a(k) + a(n - k) + a(j) = xr + a(k) = yr + t(j) = xi - yi + t(k) = xi + yi + end do + t(mh) = a(mh) + a(n - mh) + a(mh) = a(mh) - a(n - mh) + call dctsub(m, a, nc, w(nw)) + if (m .gt. 4) then + call cftfsub(m, a, ip, nw, w) + call rftfsub(m, a, nc, w(nw)) + else if (m .eq. 4) then + call cftfsub(m, a, ip, nw, w) + end if + a(n - 1) = a(0) - a(1) + a(1) = a(0) + a(1) + do j = m - 2, 2, -2 + a(2 * j + 1) = a(j) + a(j + 1) + a(2 * j - 1) = a(j) - a(j + 1) + end do + l = 2 + m = mh + do while (m .ge. 2) + call dctsub(m, t, nc, w(nw)) + if (m .gt. 4) then + call cftfsub(m, t, ip, nw, w) + call rftfsub(m, t, nc, w(nw)) + else if (m .eq. 4) then + call cftfsub(m, t, ip, nw, w) + end if + a(n - l) = t(0) - t(1) + a(l) = t(0) + t(1) + k = 0 + do j = 2, m - 2, 2 + k = k + 4 * l + a(k - l) = t(j) - t(j + 1) + a(k + l) = t(j) + t(j + 1) + end do + l = 2 * l + mh = m / 2 + do j = 0, mh - 1 + k = m - j + t(j) = t(m + k) - t(m + j) + t(k) = t(m + k) + t(m + j) + end do + t(mh) = t(m + mh) + m = mh + end do + a(l) = t(0) + a(n) = t(2) - t(1) + a(0) = t(2) + t(1) + else + a(1) = a(0) + a(2) = t(0) + a(0) = t(1) + end if + end +! + subroutine dfst(n, a, t, ip, w) + integer n, ip(0 : *), j, k, l, m, mh, nw, nc + real*8 a(0 : n - 1), t(0 : n / 2 - 1), w(0 : *), xr, xi, yr, yi + nw = ip(0) + if (n .gt. 8 * nw) then + nw = n / 8 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n .gt. 2 * nc) then + nc = n / 2 + call makect(nc, ip, w(nw)) + end if + if (n .gt. 2) then + m = n / 2 + mh = m / 2 + do j = 1, mh - 1 + k = m - j + xr = a(j) + a(n - j) + xi = a(j) - a(n - j) + yr = a(k) + a(n - k) + yi = a(k) - a(n - k) + a(j) = xr + a(k) = yr + t(j) = xi + yi + t(k) = xi - yi + end do + t(0) = a(mh) - a(n - mh) + a(mh) = a(mh) + a(n - mh) + a(0) = a(m) + call dstsub(m, a, nc, w(nw)) + if (m .gt. 4) then + call cftfsub(m, a, ip, nw, w) + call rftfsub(m, a, nc, w(nw)) + else if (m .eq. 4) then + call cftfsub(m, a, ip, nw, w) + end if + a(n - 1) = a(1) - a(0) + a(1) = a(0) + a(1) + do j = m - 2, 2, -2 + a(2 * j + 1) = a(j) - a(j + 1) + a(2 * j - 1) = -a(j) - a(j + 1) + end do + l = 2 + m = mh + do while (m .ge. 2) + call dstsub(m, t, nc, w(nw)) + if (m .gt. 4) then + call cftfsub(m, t, ip, nw, w) + call rftfsub(m, t, nc, w(nw)) + else if (m .eq. 4) then + call cftfsub(m, t, ip, nw, w) + end if + a(n - l) = t(1) - t(0) + a(l) = t(0) + t(1) + k = 0 + do j = 2, m - 2, 2 + k = k + 4 * l + a(k - l) = -t(j) - t(j + 1) + a(k + l) = t(j) - t(j + 1) + end do + l = 2 * l + mh = m / 2 + do j = 1, mh - 1 + k = m - j + t(j) = t(m + k) + t(m + j) + t(k) = t(m + k) - t(m + j) + end do + t(0) = t(m + mh) + m = mh + end do + a(l) = t(0) + end if + a(0) = 0 + end +! +! -------- initializing routines -------- +! + subroutine makewt(nw, ip, w) + integer nw, ip(0 : *), j, nwh, nw0, nw1 + real*8 w(0 : nw - 1), delta, wn4r, wk1r, wk1i, wk3r, wk3i + ip(0) = nw + ip(1) = 1 + if (nw .gt. 2) then + nwh = nw / 2 + delta = atan(1.0d0) / nwh + wn4r = cos(delta * nwh) + w(0) = 1 + w(1) = wn4r + if (nwh .eq. 4) then + w(2) = cos(delta * 2) + w(3) = sin(delta * 2) + else if (nwh .gt. 4) then + call makeipt(nw, ip) + w(2) = 0.5d0 / cos(delta * 2) + w(3) = 0.5d0 / cos(delta * 6) + do j = 4, nwh - 4, 4 + w(j) = cos(delta * j) + w(j + 1) = sin(delta * j) + w(j + 2) = cos(3 * delta * j) + w(j + 3) = -sin(3 * delta * j) + end do + end if + nw0 = 0 + do while (nwh .gt. 2) + nw1 = nw0 + nwh + nwh = nwh / 2 + w(nw1) = 1 + w(nw1 + 1) = wn4r + if (nwh .eq. 4) then + wk1r = w(nw0 + 4) + wk1i = w(nw0 + 5) + w(nw1 + 2) = wk1r + w(nw1 + 3) = wk1i + else if (nwh .gt. 4) then + wk1r = w(nw0 + 4) + wk3r = w(nw0 + 6) + w(nw1 + 2) = 0.5d0 / wk1r + w(nw1 + 3) = 0.5d0 / wk3r + do j = 4, nwh - 4, 4 + wk1r = w(nw0 + 2 * j) + wk1i = w(nw0 + 2 * j + 1) + wk3r = w(nw0 + 2 * j + 2) + wk3i = w(nw0 + 2 * j + 3) + w(nw1 + j) = wk1r + w(nw1 + j + 1) = wk1i + w(nw1 + j + 2) = wk3r + w(nw1 + j + 3) = wk3i + end do + end if + nw0 = nw1 + end do + end if + end +! + subroutine makeipt(nw, ip) + integer nw, ip(0 : *), j, l, m, m2, p, q + ip(2) = 0 + ip(3) = 16 + m = 2 + l = nw + do while (l .gt. 32) + m2 = 2 * m + q = 8 * m2 + do j = m, m2 - 1 + p = 4 * ip(j) + ip(m + j) = p + ip(m2 + j) = p + q + end do + m = m2 + l = l / 4 + end do + end +! + subroutine makect(nc, ip, c) + integer nc, ip(0 : *), j, nch + real*8 c(0 : nc - 1), delta + ip(1) = nc + if (nc .gt. 1) then + nch = nc / 2 + delta = atan(1.0d0) / nch + c(0) = cos(delta * nch) + c(nch) = 0.5d0 * c(0) + do j = 1, nch - 1 + c(j) = 0.5d0 * cos(delta * j) + c(nc - j) = 0.5d0 * sin(delta * j) + end do + end if + end +! +! -------- child routines -------- +! + subroutine cftfsub(n, a, ip, nw, w) + integer n, ip(0 : *), nw + real*8 a(0 : n - 1), w(0 : nw - 1) + if (n .gt. 8) then + if (n .gt. 32) then + call cftf1st(n, a, w(nw - n / 4)) + if (n .gt. 512) then + call cftrec4(n, a, nw, w) + else if (n .gt. 128) then + call cftleaf(n, 1, a, nw, w) + else + call cftfx41(n, a, nw, w) + end if + call bitrv2(n, ip, a) + else if (n .eq. 32) then + call cftf161(a, w(nw - 8)) + call bitrv216(a) + else + call cftf081(a, w) + call bitrv208(a) + end if + else if (n .eq. 8) then + call cftf040(a) + else if (n .eq. 4) then + call cftx020(a) + end if + end +! + subroutine cftbsub(n, a, ip, nw, w) + integer n, ip(0 : *), nw + real*8 a(0 : n - 1), w(0 : nw - 1) + if (n .gt. 8) then + if (n .gt. 32) then + call cftb1st(n, a, w(nw - n / 4)) + if (n .gt. 512) then + call cftrec4(n, a, nw, w) + else if (n .gt. 128) then + call cftleaf(n, 1, a, nw, w) + else + call cftfx41(n, a, nw, w) + end if + call bitrv2conj(n, ip, a) + else if (n .eq. 32) then + call cftf161(a, w(nw - 8)) + call bitrv216neg(a) + else + call cftf081(a, w) + call bitrv208neg(a) + end if + else if (n .eq. 8) then + call cftb040(a) + else if (n .eq. 4) then + call cftx020(a) + end if + end +! + subroutine bitrv2(n, ip, a) + integer n, ip(0 : *), j, j1, k, k1, l, m, nh, nm + real*8 a(0 : n - 1), xr, xi, yr, yi + m = 1 + l = n / 4 + do while (l .gt. 8) + m = m * 2 + l = l / 4 + end do + nh = n / 2 + nm = 4 * m + if (l .eq. 8) then + do k = 0, m - 1 + do j = 0, k - 1 + j1 = 4 * j + 2 * ip(m + k) + k1 = 4 * k + 2 * ip(m + j) + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 - nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nh + k1 = k1 + 2 + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - 2 * nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 + nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - 2 * nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + 2 + k1 = k1 + nh + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 - nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nh + k1 = k1 - 2 + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - 2 * nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 + nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - 2 * nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + end do + k1 = 4 * k + 2 * ip(m + k) + j1 = k1 + 2 + k1 = k1 + nh + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 - nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - 2 + k1 = k1 - nh + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nh + 2 + k1 = k1 + nh + 2 + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nh + nm + k1 = k1 + 2 * nm - 2 + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + end do + else + do k = 0, m - 1 + do j = 0, k - 1 + j1 = 4 * j + ip(m + k) + k1 = 4 * k + ip(m + j) + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nh + k1 = k1 + 2 + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + 2 + k1 = k1 + nh + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nh + k1 = k1 - 2 + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + end do + k1 = 4 * k + ip(m + k) + j1 = k1 + 2 + k1 = k1 + nh + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + nm + xr = a(j1) + xi = a(j1 + 1) + yr = a(k1) + yi = a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + end do + end if + end +! + subroutine bitrv2conj(n, ip, a) + integer n, ip(0 : *), j, j1, k, k1, l, m, nh, nm + real*8 a(0 : n - 1), xr, xi, yr, yi + m = 1 + l = n / 4 + do while (l .gt. 8) + m = m * 2 + l = l / 4 + end do + nh = n / 2 + nm = 4 * m + if (l .eq. 8) then + do k = 0, m - 1 + do j = 0, k - 1 + j1 = 4 * j + 2 * ip(m + k) + k1 = 4 * k + 2 * ip(m + j) + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 - nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nh + k1 = k1 + 2 + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - 2 * nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 + nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - 2 * nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + 2 + k1 = k1 + nh + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 - nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nh + k1 = k1 - 2 + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - 2 * nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 + nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - 2 * nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + end do + k1 = 4 * k + 2 * ip(m + k) + j1 = k1 + 2 + k1 = k1 + nh + a(j1 - 1) = -a(j1 - 1) + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + a(k1 + 3) = -a(k1 + 3) + j1 = j1 + nm + k1 = k1 + 2 * nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 - nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - 2 + k1 = k1 - nh + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nh + 2 + k1 = k1 + nh + 2 + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nh + nm + k1 = k1 + 2 * nm - 2 + a(j1 - 1) = -a(j1 - 1) + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + a(k1 + 3) = -a(k1 + 3) + end do + else + do k = 0, m - 1 + do j = 0, k - 1 + j1 = 4 * j + ip(m + k) + k1 = 4 * k + ip(m + j) + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nh + k1 = k1 + 2 + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + 2 + k1 = k1 + nh + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 + nm + k1 = k1 + nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nh + k1 = k1 - 2 + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + j1 = j1 - nm + k1 = k1 - nm + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + end do + k1 = 4 * k + ip(m + k) + j1 = k1 + 2 + k1 = k1 + nh + a(j1 - 1) = -a(j1 - 1) + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + a(k1 + 3) = -a(k1 + 3) + j1 = j1 + nm + k1 = k1 + nm + a(j1 - 1) = -a(j1 - 1) + xr = a(j1) + xi = -a(j1 + 1) + yr = a(k1) + yi = -a(k1 + 1) + a(j1) = yr + a(j1 + 1) = yi + a(k1) = xr + a(k1 + 1) = xi + a(k1 + 3) = -a(k1 + 3) + end do + end if + end +! + subroutine bitrv216(a) + real*8 a(0 : 31), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i + real*8 x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i + real*8 x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i + x1r = a(2) + x1i = a(3) + x2r = a(4) + x2i = a(5) + x3r = a(6) + x3i = a(7) + x4r = a(8) + x4i = a(9) + x5r = a(10) + x5i = a(11) + x7r = a(14) + x7i = a(15) + x8r = a(16) + x8i = a(17) + x10r = a(20) + x10i = a(21) + x11r = a(22) + x11i = a(23) + x12r = a(24) + x12i = a(25) + x13r = a(26) + x13i = a(27) + x14r = a(28) + x14i = a(29) + a(2) = x8r + a(3) = x8i + a(4) = x4r + a(5) = x4i + a(6) = x12r + a(7) = x12i + a(8) = x2r + a(9) = x2i + a(10) = x10r + a(11) = x10i + a(14) = x14r + a(15) = x14i + a(16) = x1r + a(17) = x1i + a(20) = x5r + a(21) = x5i + a(22) = x13r + a(23) = x13i + a(24) = x3r + a(25) = x3i + a(26) = x11r + a(27) = x11i + a(28) = x7r + a(29) = x7i + end +! + subroutine bitrv216neg(a) + real*8 a(0 : 31), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i + real*8 x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i + real*8 x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i + real*8 x13r, x13i, x14r, x14i, x15r, x15i + x1r = a(2) + x1i = a(3) + x2r = a(4) + x2i = a(5) + x3r = a(6) + x3i = a(7) + x4r = a(8) + x4i = a(9) + x5r = a(10) + x5i = a(11) + x6r = a(12) + x6i = a(13) + x7r = a(14) + x7i = a(15) + x8r = a(16) + x8i = a(17) + x9r = a(18) + x9i = a(19) + x10r = a(20) + x10i = a(21) + x11r = a(22) + x11i = a(23) + x12r = a(24) + x12i = a(25) + x13r = a(26) + x13i = a(27) + x14r = a(28) + x14i = a(29) + x15r = a(30) + x15i = a(31) + a(2) = x15r + a(3) = x15i + a(4) = x7r + a(5) = x7i + a(6) = x11r + a(7) = x11i + a(8) = x3r + a(9) = x3i + a(10) = x13r + a(11) = x13i + a(12) = x5r + a(13) = x5i + a(14) = x9r + a(15) = x9i + a(16) = x1r + a(17) = x1i + a(18) = x14r + a(19) = x14i + a(20) = x6r + a(21) = x6i + a(22) = x10r + a(23) = x10i + a(24) = x2r + a(25) = x2i + a(26) = x12r + a(27) = x12i + a(28) = x4r + a(29) = x4i + a(30) = x8r + a(31) = x8i + end +! + subroutine bitrv208(a) + real*8 a(0 : 15), x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i + x1r = a(2) + x1i = a(3) + x3r = a(6) + x3i = a(7) + x4r = a(8) + x4i = a(9) + x6r = a(12) + x6i = a(13) + a(2) = x4r + a(3) = x4i + a(6) = x6r + a(7) = x6i + a(8) = x1r + a(9) = x1i + a(12) = x3r + a(13) = x3i + end +! + subroutine bitrv208neg(a) + real*8 a(0 : 15), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i + real*8 x5r, x5i, x6r, x6i, x7r, x7i + x1r = a(2) + x1i = a(3) + x2r = a(4) + x2i = a(5) + x3r = a(6) + x3i = a(7) + x4r = a(8) + x4i = a(9) + x5r = a(10) + x5i = a(11) + x6r = a(12) + x6i = a(13) + x7r = a(14) + x7i = a(15) + a(2) = x7r + a(3) = x7i + a(4) = x3r + a(5) = x3i + a(6) = x5r + a(7) = x5i + a(8) = x1r + a(9) = x1i + a(10) = x6r + a(11) = x6i + a(12) = x2r + a(13) = x2i + a(14) = x4r + a(15) = x4i + end +! + subroutine cftf1st(n, a, w) + integer n, j, j0, j1, j2, j3, k, m, mh + real*8 a(0 : n - 1), w(0 : *) + real*8 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i + real*8 wd1r, wd1i, wd3r, wd3i + real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i + mh = n / 8 + m = 2 * mh + j1 = m + j2 = j1 + m + j3 = j2 + m + x0r = a(0) + a(j2) + x0i = a(1) + a(j2 + 1) + x1r = a(0) - a(j2) + x1i = a(1) - a(j2 + 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + a(0) = x0r + x2r + a(1) = x0i + x2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i - x2i + a(j2) = x1r - x3i + a(j2 + 1) = x1i + x3r + a(j3) = x1r + x3i + a(j3 + 1) = x1i - x3r + wn4r = w(1) + csc1 = w(2) + csc3 = w(3) + wd1r = 1 + wd1i = 0 + wd3r = 1 + wd3i = 0 + k = 0 + do j = 2, mh - 6, 4 + k = k + 4 + wk1r = csc1 * (wd1r + w(k)) + wk1i = csc1 * (wd1i + w(k + 1)) + wk3r = csc3 * (wd3r + w(k + 2)) + wk3i = csc3 * (wd3i + w(k + 3)) + wd1r = w(k) + wd1i = w(k + 1) + wd3r = w(k + 2) + wd3i = w(k + 3) + j1 = j + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j) + a(j2) + x0i = a(j + 1) + a(j2 + 1) + x1r = a(j) - a(j2) + x1i = a(j + 1) - a(j2 + 1) + y0r = a(j + 2) + a(j2 + 2) + y0i = a(j + 3) + a(j2 + 3) + y1r = a(j + 2) - a(j2 + 2) + y1i = a(j + 3) - a(j2 + 3) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + y2r = a(j1 + 2) + a(j3 + 2) + y2i = a(j1 + 3) + a(j3 + 3) + y3r = a(j1 + 2) - a(j3 + 2) + y3i = a(j1 + 3) - a(j3 + 3) + a(j) = x0r + x2r + a(j + 1) = x0i + x2i + a(j + 2) = y0r + y2r + a(j + 3) = y0i + y2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i - x2i + a(j1 + 2) = y0r - y2r + a(j1 + 3) = y0i - y2i + x0r = x1r - x3i + x0i = x1i + x3r + a(j2) = wk1r * x0r - wk1i * x0i + a(j2 + 1) = wk1r * x0i + wk1i * x0r + x0r = y1r - y3i + x0i = y1i + y3r + a(j2 + 2) = wd1r * x0r - wd1i * x0i + a(j2 + 3) = wd1r * x0i + wd1i * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(j3) = wk3r * x0r + wk3i * x0i + a(j3 + 1) = wk3r * x0i - wk3i * x0r + x0r = y1r + y3i + x0i = y1i - y3r + a(j3 + 2) = wd3r * x0r + wd3i * x0i + a(j3 + 3) = wd3r * x0i - wd3i * x0r + j0 = m - j + j1 = j0 + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j0) + a(j2) + x0i = a(j0 + 1) + a(j2 + 1) + x1r = a(j0) - a(j2) + x1i = a(j0 + 1) - a(j2 + 1) + y0r = a(j0 - 2) + a(j2 - 2) + y0i = a(j0 - 1) + a(j2 - 1) + y1r = a(j0 - 2) - a(j2 - 2) + y1i = a(j0 - 1) - a(j2 - 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + y2r = a(j1 - 2) + a(j3 - 2) + y2i = a(j1 - 1) + a(j3 - 1) + y3r = a(j1 - 2) - a(j3 - 2) + y3i = a(j1 - 1) - a(j3 - 1) + a(j0) = x0r + x2r + a(j0 + 1) = x0i + x2i + a(j0 - 2) = y0r + y2r + a(j0 - 1) = y0i + y2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i - x2i + a(j1 - 2) = y0r - y2r + a(j1 - 1) = y0i - y2i + x0r = x1r - x3i + x0i = x1i + x3r + a(j2) = wk1i * x0r - wk1r * x0i + a(j2 + 1) = wk1i * x0i + wk1r * x0r + x0r = y1r - y3i + x0i = y1i + y3r + a(j2 - 2) = wd1i * x0r - wd1r * x0i + a(j2 - 1) = wd1i * x0i + wd1r * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(j3) = wk3i * x0r + wk3r * x0i + a(j3 + 1) = wk3i * x0i - wk3r * x0r + x0r = y1r + y3i + x0i = y1i - y3r + a(j3 - 2) = wd3i * x0r + wd3r * x0i + a(j3 - 1) = wd3i * x0i - wd3r * x0r + end do + wk1r = csc1 * (wd1r + wn4r) + wk1i = csc1 * (wd1i + wn4r) + wk3r = csc3 * (wd3r - wn4r) + wk3i = csc3 * (wd3i - wn4r) + j0 = mh + j1 = j0 + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j0 - 2) + a(j2 - 2) + x0i = a(j0 - 1) + a(j2 - 1) + x1r = a(j0 - 2) - a(j2 - 2) + x1i = a(j0 - 1) - a(j2 - 1) + x2r = a(j1 - 2) + a(j3 - 2) + x2i = a(j1 - 1) + a(j3 - 1) + x3r = a(j1 - 2) - a(j3 - 2) + x3i = a(j1 - 1) - a(j3 - 1) + a(j0 - 2) = x0r + x2r + a(j0 - 1) = x0i + x2i + a(j1 - 2) = x0r - x2r + a(j1 - 1) = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + a(j2 - 2) = wk1r * x0r - wk1i * x0i + a(j2 - 1) = wk1r * x0i + wk1i * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(j3 - 2) = wk3r * x0r + wk3i * x0i + a(j3 - 1) = wk3r * x0i - wk3i * x0r + x0r = a(j0) + a(j2) + x0i = a(j0 + 1) + a(j2 + 1) + x1r = a(j0) - a(j2) + x1i = a(j0 + 1) - a(j2 + 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + a(j0) = x0r + x2r + a(j0 + 1) = x0i + x2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + a(j2) = wn4r * (x0r - x0i) + a(j2 + 1) = wn4r * (x0i + x0r) + x0r = x1r + x3i + x0i = x1i - x3r + a(j3) = -wn4r * (x0r + x0i) + a(j3 + 1) = -wn4r * (x0i - x0r) + x0r = a(j0 + 2) + a(j2 + 2) + x0i = a(j0 + 3) + a(j2 + 3) + x1r = a(j0 + 2) - a(j2 + 2) + x1i = a(j0 + 3) - a(j2 + 3) + x2r = a(j1 + 2) + a(j3 + 2) + x2i = a(j1 + 3) + a(j3 + 3) + x3r = a(j1 + 2) - a(j3 + 2) + x3i = a(j1 + 3) - a(j3 + 3) + a(j0 + 2) = x0r + x2r + a(j0 + 3) = x0i + x2i + a(j1 + 2) = x0r - x2r + a(j1 + 3) = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + a(j2 + 2) = wk1i * x0r - wk1r * x0i + a(j2 + 3) = wk1i * x0i + wk1r * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(j3 + 2) = wk3i * x0r + wk3r * x0i + a(j3 + 3) = wk3i * x0i - wk3r * x0r + end +! + subroutine cftb1st(n, a, w) + integer n, j, j0, j1, j2, j3, k, m, mh + real*8 a(0 : n - 1), w(0 : *) + real*8 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i + real*8 wd1r, wd1i, wd3r, wd3i + real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i + mh = n / 8 + m = 2 * mh + j1 = m + j2 = j1 + m + j3 = j2 + m + x0r = a(0) + a(j2) + x0i = -a(1) - a(j2 + 1) + x1r = a(0) - a(j2) + x1i = -a(1) + a(j2 + 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + a(0) = x0r + x2r + a(1) = x0i - x2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i + x2i + a(j2) = x1r + x3i + a(j2 + 1) = x1i + x3r + a(j3) = x1r - x3i + a(j3 + 1) = x1i - x3r + wn4r = w(1) + csc1 = w(2) + csc3 = w(3) + wd1r = 1 + wd1i = 0 + wd3r = 1 + wd3i = 0 + k = 0 + do j = 2, mh - 6, 4 + k = k + 4 + wk1r = csc1 * (wd1r + w(k)) + wk1i = csc1 * (wd1i + w(k + 1)) + wk3r = csc3 * (wd3r + w(k + 2)) + wk3i = csc3 * (wd3i + w(k + 3)) + wd1r = w(k) + wd1i = w(k + 1) + wd3r = w(k + 2) + wd3i = w(k + 3) + j1 = j + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j) + a(j2) + x0i = -a(j + 1) - a(j2 + 1) + x1r = a(j) - a(j2) + x1i = -a(j + 1) + a(j2 + 1) + y0r = a(j + 2) + a(j2 + 2) + y0i = -a(j + 3) - a(j2 + 3) + y1r = a(j + 2) - a(j2 + 2) + y1i = -a(j + 3) + a(j2 + 3) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + y2r = a(j1 + 2) + a(j3 + 2) + y2i = a(j1 + 3) + a(j3 + 3) + y3r = a(j1 + 2) - a(j3 + 2) + y3i = a(j1 + 3) - a(j3 + 3) + a(j) = x0r + x2r + a(j + 1) = x0i - x2i + a(j + 2) = y0r + y2r + a(j + 3) = y0i - y2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i + x2i + a(j1 + 2) = y0r - y2r + a(j1 + 3) = y0i + y2i + x0r = x1r + x3i + x0i = x1i + x3r + a(j2) = wk1r * x0r - wk1i * x0i + a(j2 + 1) = wk1r * x0i + wk1i * x0r + x0r = y1r + y3i + x0i = y1i + y3r + a(j2 + 2) = wd1r * x0r - wd1i * x0i + a(j2 + 3) = wd1r * x0i + wd1i * x0r + x0r = x1r - x3i + x0i = x1i - x3r + a(j3) = wk3r * x0r + wk3i * x0i + a(j3 + 1) = wk3r * x0i - wk3i * x0r + x0r = y1r - y3i + x0i = y1i - y3r + a(j3 + 2) = wd3r * x0r + wd3i * x0i + a(j3 + 3) = wd3r * x0i - wd3i * x0r + j0 = m - j + j1 = j0 + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j0) + a(j2) + x0i = -a(j0 + 1) - a(j2 + 1) + x1r = a(j0) - a(j2) + x1i = -a(j0 + 1) + a(j2 + 1) + y0r = a(j0 - 2) + a(j2 - 2) + y0i = -a(j0 - 1) - a(j2 - 1) + y1r = a(j0 - 2) - a(j2 - 2) + y1i = -a(j0 - 1) + a(j2 - 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + y2r = a(j1 - 2) + a(j3 - 2) + y2i = a(j1 - 1) + a(j3 - 1) + y3r = a(j1 - 2) - a(j3 - 2) + y3i = a(j1 - 1) - a(j3 - 1) + a(j0) = x0r + x2r + a(j0 + 1) = x0i - x2i + a(j0 - 2) = y0r + y2r + a(j0 - 1) = y0i - y2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i + x2i + a(j1 - 2) = y0r - y2r + a(j1 - 1) = y0i + y2i + x0r = x1r + x3i + x0i = x1i + x3r + a(j2) = wk1i * x0r - wk1r * x0i + a(j2 + 1) = wk1i * x0i + wk1r * x0r + x0r = y1r + y3i + x0i = y1i + y3r + a(j2 - 2) = wd1i * x0r - wd1r * x0i + a(j2 - 1) = wd1i * x0i + wd1r * x0r + x0r = x1r - x3i + x0i = x1i - x3r + a(j3) = wk3i * x0r + wk3r * x0i + a(j3 + 1) = wk3i * x0i - wk3r * x0r + x0r = y1r - y3i + x0i = y1i - y3r + a(j3 - 2) = wd3i * x0r + wd3r * x0i + a(j3 - 1) = wd3i * x0i - wd3r * x0r + end do + wk1r = csc1 * (wd1r + wn4r) + wk1i = csc1 * (wd1i + wn4r) + wk3r = csc3 * (wd3r - wn4r) + wk3i = csc3 * (wd3i - wn4r) + j0 = mh + j1 = j0 + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j0 - 2) + a(j2 - 2) + x0i = -a(j0 - 1) - a(j2 - 1) + x1r = a(j0 - 2) - a(j2 - 2) + x1i = -a(j0 - 1) + a(j2 - 1) + x2r = a(j1 - 2) + a(j3 - 2) + x2i = a(j1 - 1) + a(j3 - 1) + x3r = a(j1 - 2) - a(j3 - 2) + x3i = a(j1 - 1) - a(j3 - 1) + a(j0 - 2) = x0r + x2r + a(j0 - 1) = x0i - x2i + a(j1 - 2) = x0r - x2r + a(j1 - 1) = x0i + x2i + x0r = x1r + x3i + x0i = x1i + x3r + a(j2 - 2) = wk1r * x0r - wk1i * x0i + a(j2 - 1) = wk1r * x0i + wk1i * x0r + x0r = x1r - x3i + x0i = x1i - x3r + a(j3 - 2) = wk3r * x0r + wk3i * x0i + a(j3 - 1) = wk3r * x0i - wk3i * x0r + x0r = a(j0) + a(j2) + x0i = -a(j0 + 1) - a(j2 + 1) + x1r = a(j0) - a(j2) + x1i = -a(j0 + 1) + a(j2 + 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + a(j0) = x0r + x2r + a(j0 + 1) = x0i - x2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i + x2i + x0r = x1r + x3i + x0i = x1i + x3r + a(j2) = wn4r * (x0r - x0i) + a(j2 + 1) = wn4r * (x0i + x0r) + x0r = x1r - x3i + x0i = x1i - x3r + a(j3) = -wn4r * (x0r + x0i) + a(j3 + 1) = -wn4r * (x0i - x0r) + x0r = a(j0 + 2) + a(j2 + 2) + x0i = -a(j0 + 3) - a(j2 + 3) + x1r = a(j0 + 2) - a(j2 + 2) + x1i = -a(j0 + 3) + a(j2 + 3) + x2r = a(j1 + 2) + a(j3 + 2) + x2i = a(j1 + 3) + a(j3 + 3) + x3r = a(j1 + 2) - a(j3 + 2) + x3i = a(j1 + 3) - a(j3 + 3) + a(j0 + 2) = x0r + x2r + a(j0 + 3) = x0i - x2i + a(j1 + 2) = x0r - x2r + a(j1 + 3) = x0i + x2i + x0r = x1r + x3i + x0i = x1i + x3r + a(j2 + 2) = wk1i * x0r - wk1r * x0i + a(j2 + 3) = wk1i * x0i + wk1r * x0r + x0r = x1r - x3i + x0i = x1i - x3r + a(j3 + 2) = wk3i * x0r + wk3r * x0i + a(j3 + 3) = wk3i * x0i - wk3r * x0r + end +! + subroutine cftrec4(n, a, nw, w) + integer n, nw, cfttree, isplt, j, k, m + real*8 a(0 : n - 1), w(0 : nw - 1) + m = n + do while (m .gt. 512) + m = m / 4 + call cftmdl1(m, a(n - m), w(nw - m / 2)) + end do + call cftleaf(m, 1, a(n - m), nw, w) + k = 0 + do j = n - m, m, -m + k = k + 1 + isplt = cfttree(m, j, k, a, nw, w) + call cftleaf(m, isplt, a(j - m), nw, w) + end do + end +! + integer function cfttree(n, j, k, a, nw, w) + integer n, j, k, nw, i, isplt, m + real*8 a(0 : n - 1), w(0 : nw - 1) + if (mod(k, 4) .ne. 0) then + isplt = mod(k, 2) + if (isplt .ne. 0) then + call cftmdl1(n, a(j - n), w(nw - n / 2)) + else + call cftmdl2(n, a(j - n), w(nw - n)) + end if + else + m = n + i = k + do while (mod(i, 4) .eq. 0) + m = m * 4 + i = i / 4 + end do + isplt = mod(i, 2) + if (isplt .ne. 0) then + do while (m .gt. 128) + call cftmdl1(m, a(j - m), w(nw - m / 2)) + m = m / 4 + end do + else + do while (m .gt. 128) + call cftmdl2(m, a(j - m), w(nw - m)) + m = m / 4 + end do + end if + end if + cfttree = isplt + end +! + subroutine cftleaf(n, isplt, a, nw, w) + integer n, isplt, nw + real*8 a(0 : n - 1), w(0 : nw - 1) + if (n .eq. 512) then + call cftmdl1(128, a, w(nw - 64)) + call cftf161(a, w(nw - 8)) + call cftf162(a(32), w(nw - 32)) + call cftf161(a(64), w(nw - 8)) + call cftf161(a(96), w(nw - 8)) + call cftmdl2(128, a(128), w(nw - 128)) + call cftf161(a(128), w(nw - 8)) + call cftf162(a(160), w(nw - 32)) + call cftf161(a(192), w(nw - 8)) + call cftf162(a(224), w(nw - 32)) + call cftmdl1(128, a(256), w(nw - 64)) + call cftf161(a(256), w(nw - 8)) + call cftf162(a(288), w(nw - 32)) + call cftf161(a(320), w(nw - 8)) + call cftf161(a(352), w(nw - 8)) + if (isplt .ne. 0) then + call cftmdl1(128, a(384), w(nw - 64)) + call cftf161(a(480), w(nw - 8)) + else + call cftmdl2(128, a(384), w(nw - 128)) + call cftf162(a(480), w(nw - 32)) + end if + call cftf161(a(384), w(nw - 8)) + call cftf162(a(416), w(nw - 32)) + call cftf161(a(448), w(nw - 8)) + else + call cftmdl1(64, a, w(nw - 32)) + call cftf081(a, w(nw - 8)) + call cftf082(a(16), w(nw - 8)) + call cftf081(a(32), w(nw - 8)) + call cftf081(a(48), w(nw - 8)) + call cftmdl2(64, a(64), w(nw - 64)) + call cftf081(a(64), w(nw - 8)) + call cftf082(a(80), w(nw - 8)) + call cftf081(a(96), w(nw - 8)) + call cftf082(a(112), w(nw - 8)) + call cftmdl1(64, a(128), w(nw - 32)) + call cftf081(a(128), w(nw - 8)) + call cftf082(a(144), w(nw - 8)) + call cftf081(a(160), w(nw - 8)) + call cftf081(a(176), w(nw - 8)) + if (isplt .ne. 0) then + call cftmdl1(64, a(192), w(nw - 32)) + call cftf081(a(240), w(nw - 8)) + else + call cftmdl2(64, a(192), w(nw - 64)) + call cftf082(a(240), w(nw - 8)) + end if + call cftf081(a(192), w(nw - 8)) + call cftf082(a(208), w(nw - 8)) + call cftf081(a(224), w(nw - 8)) + end if + end +! + subroutine cftmdl1(n, a, w) + integer n, j, j0, j1, j2, j3, k, m, mh + real*8 a(0 : n - 1), w(0 : *) + real*8 wn4r, wk1r, wk1i, wk3r, wk3i + real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + mh = n / 8 + m = 2 * mh + j1 = m + j2 = j1 + m + j3 = j2 + m + x0r = a(0) + a(j2) + x0i = a(1) + a(j2 + 1) + x1r = a(0) - a(j2) + x1i = a(1) - a(j2 + 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + a(0) = x0r + x2r + a(1) = x0i + x2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i - x2i + a(j2) = x1r - x3i + a(j2 + 1) = x1i + x3r + a(j3) = x1r + x3i + a(j3 + 1) = x1i - x3r + wn4r = w(1) + k = 0 + do j = 2, mh - 2, 2 + k = k + 4 + wk1r = w(k) + wk1i = w(k + 1) + wk3r = w(k + 2) + wk3i = w(k + 3) + j1 = j + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j) + a(j2) + x0i = a(j + 1) + a(j2 + 1) + x1r = a(j) - a(j2) + x1i = a(j + 1) - a(j2 + 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + a(j) = x0r + x2r + a(j + 1) = x0i + x2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + a(j2) = wk1r * x0r - wk1i * x0i + a(j2 + 1) = wk1r * x0i + wk1i * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(j3) = wk3r * x0r + wk3i * x0i + a(j3 + 1) = wk3r * x0i - wk3i * x0r + j0 = m - j + j1 = j0 + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j0) + a(j2) + x0i = a(j0 + 1) + a(j2 + 1) + x1r = a(j0) - a(j2) + x1i = a(j0 + 1) - a(j2 + 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + a(j0) = x0r + x2r + a(j0 + 1) = x0i + x2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + a(j2) = wk1i * x0r - wk1r * x0i + a(j2 + 1) = wk1i * x0i + wk1r * x0r + x0r = x1r + x3i + x0i = x1i - x3r + a(j3) = wk3i * x0r + wk3r * x0i + a(j3 + 1) = wk3i * x0i - wk3r * x0r + end do + j0 = mh + j1 = j0 + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j0) + a(j2) + x0i = a(j0 + 1) + a(j2 + 1) + x1r = a(j0) - a(j2) + x1i = a(j0 + 1) - a(j2 + 1) + x2r = a(j1) + a(j3) + x2i = a(j1 + 1) + a(j3 + 1) + x3r = a(j1) - a(j3) + x3i = a(j1 + 1) - a(j3 + 1) + a(j0) = x0r + x2r + a(j0 + 1) = x0i + x2i + a(j1) = x0r - x2r + a(j1 + 1) = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + a(j2) = wn4r * (x0r - x0i) + a(j2 + 1) = wn4r * (x0i + x0r) + x0r = x1r + x3i + x0i = x1i - x3r + a(j3) = -wn4r * (x0r + x0i) + a(j3 + 1) = -wn4r * (x0i - x0r) + end +! + subroutine cftmdl2(n, a, w) + integer n, j, j0, j1, j2, j3, k, kr, m, mh + real*8 a(0 : n - 1), w(0 : *) + real*8 wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i + real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + real*8 y0r, y0i, y2r, y2i + mh = n / 8 + m = 2 * mh + wn4r = w(1) + j1 = m + j2 = j1 + m + j3 = j2 + m + x0r = a(0) - a(j2 + 1) + x0i = a(1) + a(j2) + x1r = a(0) + a(j2 + 1) + x1i = a(1) - a(j2) + x2r = a(j1) - a(j3 + 1) + x2i = a(j1 + 1) + a(j3) + x3r = a(j1) + a(j3 + 1) + x3i = a(j1 + 1) - a(j3) + y0r = wn4r * (x2r - x2i) + y0i = wn4r * (x2i + x2r) + a(0) = x0r + y0r + a(1) = x0i + y0i + a(j1) = x0r - y0r + a(j1 + 1) = x0i - y0i + y0r = wn4r * (x3r - x3i) + y0i = wn4r * (x3i + x3r) + a(j2) = x1r - y0i + a(j2 + 1) = x1i + y0r + a(j3) = x1r + y0i + a(j3 + 1) = x1i - y0r + k = 0 + kr = 2 * m + do j = 2, mh - 2, 2 + k = k + 4 + wk1r = w(k) + wk1i = w(k + 1) + wk3r = w(k + 2) + wk3i = w(k + 3) + kr = kr - 4 + wd1i = w(kr) + wd1r = w(kr + 1) + wd3i = w(kr + 2) + wd3r = w(kr + 3) + j1 = j + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j) - a(j2 + 1) + x0i = a(j + 1) + a(j2) + x1r = a(j) + a(j2 + 1) + x1i = a(j + 1) - a(j2) + x2r = a(j1) - a(j3 + 1) + x2i = a(j1 + 1) + a(j3) + x3r = a(j1) + a(j3 + 1) + x3i = a(j1 + 1) - a(j3) + y0r = wk1r * x0r - wk1i * x0i + y0i = wk1r * x0i + wk1i * x0r + y2r = wd1r * x2r - wd1i * x2i + y2i = wd1r * x2i + wd1i * x2r + a(j) = y0r + y2r + a(j + 1) = y0i + y2i + a(j1) = y0r - y2r + a(j1 + 1) = y0i - y2i + y0r = wk3r * x1r + wk3i * x1i + y0i = wk3r * x1i - wk3i * x1r + y2r = wd3r * x3r + wd3i * x3i + y2i = wd3r * x3i - wd3i * x3r + a(j2) = y0r + y2r + a(j2 + 1) = y0i + y2i + a(j3) = y0r - y2r + a(j3 + 1) = y0i - y2i + j0 = m - j + j1 = j0 + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j0) - a(j2 + 1) + x0i = a(j0 + 1) + a(j2) + x1r = a(j0) + a(j2 + 1) + x1i = a(j0 + 1) - a(j2) + x2r = a(j1) - a(j3 + 1) + x2i = a(j1 + 1) + a(j3) + x3r = a(j1) + a(j3 + 1) + x3i = a(j1 + 1) - a(j3) + y0r = wd1i * x0r - wd1r * x0i + y0i = wd1i * x0i + wd1r * x0r + y2r = wk1i * x2r - wk1r * x2i + y2i = wk1i * x2i + wk1r * x2r + a(j0) = y0r + y2r + a(j0 + 1) = y0i + y2i + a(j1) = y0r - y2r + a(j1 + 1) = y0i - y2i + y0r = wd3i * x1r + wd3r * x1i + y0i = wd3i * x1i - wd3r * x1r + y2r = wk3i * x3r + wk3r * x3i + y2i = wk3i * x3i - wk3r * x3r + a(j2) = y0r + y2r + a(j2 + 1) = y0i + y2i + a(j3) = y0r - y2r + a(j3 + 1) = y0i - y2i + end do + wk1r = w(m) + wk1i = w(m + 1) + j0 = mh + j1 = j0 + m + j2 = j1 + m + j3 = j2 + m + x0r = a(j0) - a(j2 + 1) + x0i = a(j0 + 1) + a(j2) + x1r = a(j0) + a(j2 + 1) + x1i = a(j0 + 1) - a(j2) + x2r = a(j1) - a(j3 + 1) + x2i = a(j1 + 1) + a(j3) + x3r = a(j1) + a(j3 + 1) + x3i = a(j1 + 1) - a(j3) + y0r = wk1r * x0r - wk1i * x0i + y0i = wk1r * x0i + wk1i * x0r + y2r = wk1i * x2r - wk1r * x2i + y2i = wk1i * x2i + wk1r * x2r + a(j0) = y0r + y2r + a(j0 + 1) = y0i + y2i + a(j1) = y0r - y2r + a(j1 + 1) = y0i - y2i + y0r = wk1i * x1r - wk1r * x1i + y0i = wk1i * x1i + wk1r * x1r + y2r = wk1r * x3r - wk1i * x3i + y2i = wk1r * x3i + wk1i * x3r + a(j2) = y0r - y2r + a(j2 + 1) = y0i - y2i + a(j3) = y0r + y2r + a(j3 + 1) = y0i + y2i + end +! + subroutine cftfx41(n, a, nw, w) + integer n, nw + real*8 a(0 : n - 1), w(0 : nw - 1) + if (n .eq. 128) then + call cftf161(a, w(nw - 8)) + call cftf162(a(32), w(nw - 32)) + call cftf161(a(64), w(nw - 8)) + call cftf161(a(96), w(nw - 8)) + else + call cftf081(a, w(nw - 8)) + call cftf082(a(16), w(nw - 8)) + call cftf081(a(32), w(nw - 8)) + call cftf081(a(48), w(nw - 8)) + end if + end +! + subroutine cftf161(a, w) + real*8 a(0 : 31), w(0 : *), wn4r, wk1r, wk1i + real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i + real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i + real*8 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i + real*8 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i + wn4r = w(1) + wk1r = w(2) + wk1i = w(3) + x0r = a(0) + a(16) + x0i = a(1) + a(17) + x1r = a(0) - a(16) + x1i = a(1) - a(17) + x2r = a(8) + a(24) + x2i = a(9) + a(25) + x3r = a(8) - a(24) + x3i = a(9) - a(25) + y0r = x0r + x2r + y0i = x0i + x2i + y4r = x0r - x2r + y4i = x0i - x2i + y8r = x1r - x3i + y8i = x1i + x3r + y12r = x1r + x3i + y12i = x1i - x3r + x0r = a(2) + a(18) + x0i = a(3) + a(19) + x1r = a(2) - a(18) + x1i = a(3) - a(19) + x2r = a(10) + a(26) + x2i = a(11) + a(27) + x3r = a(10) - a(26) + x3i = a(11) - a(27) + y1r = x0r + x2r + y1i = x0i + x2i + y5r = x0r - x2r + y5i = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + y9r = wk1r * x0r - wk1i * x0i + y9i = wk1r * x0i + wk1i * x0r + x0r = x1r + x3i + x0i = x1i - x3r + y13r = wk1i * x0r - wk1r * x0i + y13i = wk1i * x0i + wk1r * x0r + x0r = a(4) + a(20) + x0i = a(5) + a(21) + x1r = a(4) - a(20) + x1i = a(5) - a(21) + x2r = a(12) + a(28) + x2i = a(13) + a(29) + x3r = a(12) - a(28) + x3i = a(13) - a(29) + y2r = x0r + x2r + y2i = x0i + x2i + y6r = x0r - x2r + y6i = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + y10r = wn4r * (x0r - x0i) + y10i = wn4r * (x0i + x0r) + x0r = x1r + x3i + x0i = x1i - x3r + y14r = wn4r * (x0r + x0i) + y14i = wn4r * (x0i - x0r) + x0r = a(6) + a(22) + x0i = a(7) + a(23) + x1r = a(6) - a(22) + x1i = a(7) - a(23) + x2r = a(14) + a(30) + x2i = a(15) + a(31) + x3r = a(14) - a(30) + x3i = a(15) - a(31) + y3r = x0r + x2r + y3i = x0i + x2i + y7r = x0r - x2r + y7i = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + y11r = wk1i * x0r - wk1r * x0i + y11i = wk1i * x0i + wk1r * x0r + x0r = x1r + x3i + x0i = x1i - x3r + y15r = wk1r * x0r - wk1i * x0i + y15i = wk1r * x0i + wk1i * x0r + x0r = y12r - y14r + x0i = y12i - y14i + x1r = y12r + y14r + x1i = y12i + y14i + x2r = y13r - y15r + x2i = y13i - y15i + x3r = y13r + y15r + x3i = y13i + y15i + a(24) = x0r + x2r + a(25) = x0i + x2i + a(26) = x0r - x2r + a(27) = x0i - x2i + a(28) = x1r - x3i + a(29) = x1i + x3r + a(30) = x1r + x3i + a(31) = x1i - x3r + x0r = y8r + y10r + x0i = y8i + y10i + x1r = y8r - y10r + x1i = y8i - y10i + x2r = y9r + y11r + x2i = y9i + y11i + x3r = y9r - y11r + x3i = y9i - y11i + a(16) = x0r + x2r + a(17) = x0i + x2i + a(18) = x0r - x2r + a(19) = x0i - x2i + a(20) = x1r - x3i + a(21) = x1i + x3r + a(22) = x1r + x3i + a(23) = x1i - x3r + x0r = y5r - y7i + x0i = y5i + y7r + x2r = wn4r * (x0r - x0i) + x2i = wn4r * (x0i + x0r) + x0r = y5r + y7i + x0i = y5i - y7r + x3r = wn4r * (x0r - x0i) + x3i = wn4r * (x0i + x0r) + x0r = y4r - y6i + x0i = y4i + y6r + x1r = y4r + y6i + x1i = y4i - y6r + a(8) = x0r + x2r + a(9) = x0i + x2i + a(10) = x0r - x2r + a(11) = x0i - x2i + a(12) = x1r - x3i + a(13) = x1i + x3r + a(14) = x1r + x3i + a(15) = x1i - x3r + x0r = y0r + y2r + x0i = y0i + y2i + x1r = y0r - y2r + x1i = y0i - y2i + x2r = y1r + y3r + x2i = y1i + y3i + x3r = y1r - y3r + x3i = y1i - y3i + a(0) = x0r + x2r + a(1) = x0i + x2i + a(2) = x0r - x2r + a(3) = x0i - x2i + a(4) = x1r - x3i + a(5) = x1i + x3r + a(6) = x1r + x3i + a(7) = x1i - x3r + end +! + subroutine cftf162(a, w) + real*8 a(0 : 31), w(0 : *) + real*8 wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i + real*8 x0r, x0i, x1r, x1i, x2r, x2i + real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i + real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i + real*8 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i + real*8 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i + wn4r = w(1) + wk1r = w(4) + wk1i = w(5) + wk3r = w(6) + wk3i = -w(7) + wk2r = w(8) + wk2i = w(9) + x1r = a(0) - a(17) + x1i = a(1) + a(16) + x0r = a(8) - a(25) + x0i = a(9) + a(24) + x2r = wn4r * (x0r - x0i) + x2i = wn4r * (x0i + x0r) + y0r = x1r + x2r + y0i = x1i + x2i + y4r = x1r - x2r + y4i = x1i - x2i + x1r = a(0) + a(17) + x1i = a(1) - a(16) + x0r = a(8) + a(25) + x0i = a(9) - a(24) + x2r = wn4r * (x0r - x0i) + x2i = wn4r * (x0i + x0r) + y8r = x1r - x2i + y8i = x1i + x2r + y12r = x1r + x2i + y12i = x1i - x2r + x0r = a(2) - a(19) + x0i = a(3) + a(18) + x1r = wk1r * x0r - wk1i * x0i + x1i = wk1r * x0i + wk1i * x0r + x0r = a(10) - a(27) + x0i = a(11) + a(26) + x2r = wk3i * x0r - wk3r * x0i + x2i = wk3i * x0i + wk3r * x0r + y1r = x1r + x2r + y1i = x1i + x2i + y5r = x1r - x2r + y5i = x1i - x2i + x0r = a(2) + a(19) + x0i = a(3) - a(18) + x1r = wk3r * x0r - wk3i * x0i + x1i = wk3r * x0i + wk3i * x0r + x0r = a(10) + a(27) + x0i = a(11) - a(26) + x2r = wk1r * x0r + wk1i * x0i + x2i = wk1r * x0i - wk1i * x0r + y9r = x1r - x2r + y9i = x1i - x2i + y13r = x1r + x2r + y13i = x1i + x2i + x0r = a(4) - a(21) + x0i = a(5) + a(20) + x1r = wk2r * x0r - wk2i * x0i + x1i = wk2r * x0i + wk2i * x0r + x0r = a(12) - a(29) + x0i = a(13) + a(28) + x2r = wk2i * x0r - wk2r * x0i + x2i = wk2i * x0i + wk2r * x0r + y2r = x1r + x2r + y2i = x1i + x2i + y6r = x1r - x2r + y6i = x1i - x2i + x0r = a(4) + a(21) + x0i = a(5) - a(20) + x1r = wk2i * x0r - wk2r * x0i + x1i = wk2i * x0i + wk2r * x0r + x0r = a(12) + a(29) + x0i = a(13) - a(28) + x2r = wk2r * x0r - wk2i * x0i + x2i = wk2r * x0i + wk2i * x0r + y10r = x1r - x2r + y10i = x1i - x2i + y14r = x1r + x2r + y14i = x1i + x2i + x0r = a(6) - a(23) + x0i = a(7) + a(22) + x1r = wk3r * x0r - wk3i * x0i + x1i = wk3r * x0i + wk3i * x0r + x0r = a(14) - a(31) + x0i = a(15) + a(30) + x2r = wk1i * x0r - wk1r * x0i + x2i = wk1i * x0i + wk1r * x0r + y3r = x1r + x2r + y3i = x1i + x2i + y7r = x1r - x2r + y7i = x1i - x2i + x0r = a(6) + a(23) + x0i = a(7) - a(22) + x1r = wk1i * x0r + wk1r * x0i + x1i = wk1i * x0i - wk1r * x0r + x0r = a(14) + a(31) + x0i = a(15) - a(30) + x2r = wk3i * x0r - wk3r * x0i + x2i = wk3i * x0i + wk3r * x0r + y11r = x1r + x2r + y11i = x1i + x2i + y15r = x1r - x2r + y15i = x1i - x2i + x1r = y0r + y2r + x1i = y0i + y2i + x2r = y1r + y3r + x2i = y1i + y3i + a(0) = x1r + x2r + a(1) = x1i + x2i + a(2) = x1r - x2r + a(3) = x1i - x2i + x1r = y0r - y2r + x1i = y0i - y2i + x2r = y1r - y3r + x2i = y1i - y3i + a(4) = x1r - x2i + a(5) = x1i + x2r + a(6) = x1r + x2i + a(7) = x1i - x2r + x1r = y4r - y6i + x1i = y4i + y6r + x0r = y5r - y7i + x0i = y5i + y7r + x2r = wn4r * (x0r - x0i) + x2i = wn4r * (x0i + x0r) + a(8) = x1r + x2r + a(9) = x1i + x2i + a(10) = x1r - x2r + a(11) = x1i - x2i + x1r = y4r + y6i + x1i = y4i - y6r + x0r = y5r + y7i + x0i = y5i - y7r + x2r = wn4r * (x0r - x0i) + x2i = wn4r * (x0i + x0r) + a(12) = x1r - x2i + a(13) = x1i + x2r + a(14) = x1r + x2i + a(15) = x1i - x2r + x1r = y8r + y10r + x1i = y8i + y10i + x2r = y9r - y11r + x2i = y9i - y11i + a(16) = x1r + x2r + a(17) = x1i + x2i + a(18) = x1r - x2r + a(19) = x1i - x2i + x1r = y8r - y10r + x1i = y8i - y10i + x2r = y9r + y11r + x2i = y9i + y11i + a(20) = x1r - x2i + a(21) = x1i + x2r + a(22) = x1r + x2i + a(23) = x1i - x2r + x1r = y12r - y14i + x1i = y12i + y14r + x0r = y13r + y15i + x0i = y13i - y15r + x2r = wn4r * (x0r - x0i) + x2i = wn4r * (x0i + x0r) + a(24) = x1r + x2r + a(25) = x1i + x2i + a(26) = x1r - x2r + a(27) = x1i - x2i + x1r = y12r + y14i + x1i = y12i - y14r + x0r = y13r - y15i + x0i = y13i + y15r + x2r = wn4r * (x0r - x0i) + x2i = wn4r * (x0i + x0r) + a(28) = x1r - x2i + a(29) = x1i + x2r + a(30) = x1r + x2i + a(31) = x1i - x2r + end +! + subroutine cftf081(a, w) + real*8 a(0 : 15), w(0 : *) + real*8 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i + real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i + wn4r = w(1) + x0r = a(0) + a(8) + x0i = a(1) + a(9) + x1r = a(0) - a(8) + x1i = a(1) - a(9) + x2r = a(4) + a(12) + x2i = a(5) + a(13) + x3r = a(4) - a(12) + x3i = a(5) - a(13) + y0r = x0r + x2r + y0i = x0i + x2i + y2r = x0r - x2r + y2i = x0i - x2i + y1r = x1r - x3i + y1i = x1i + x3r + y3r = x1r + x3i + y3i = x1i - x3r + x0r = a(2) + a(10) + x0i = a(3) + a(11) + x1r = a(2) - a(10) + x1i = a(3) - a(11) + x2r = a(6) + a(14) + x2i = a(7) + a(15) + x3r = a(6) - a(14) + x3i = a(7) - a(15) + y4r = x0r + x2r + y4i = x0i + x2i + y6r = x0r - x2r + y6i = x0i - x2i + x0r = x1r - x3i + x0i = x1i + x3r + x2r = x1r + x3i + x2i = x1i - x3r + y5r = wn4r * (x0r - x0i) + y5i = wn4r * (x0r + x0i) + y7r = wn4r * (x2r - x2i) + y7i = wn4r * (x2r + x2i) + a(8) = y1r + y5r + a(9) = y1i + y5i + a(10) = y1r - y5r + a(11) = y1i - y5i + a(12) = y3r - y7i + a(13) = y3i + y7r + a(14) = y3r + y7i + a(15) = y3i - y7r + a(0) = y0r + y4r + a(1) = y0i + y4i + a(2) = y0r - y4r + a(3) = y0i - y4i + a(4) = y2r - y6i + a(5) = y2i + y6r + a(6) = y2r + y6i + a(7) = y2i - y6r + end +! + subroutine cftf082(a, w) + real*8 a(0 : 15), w(0 : *) + real*8 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i + real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i + real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i + wn4r = w(1) + wk1r = w(2) + wk1i = w(3) + y0r = a(0) - a(9) + y0i = a(1) + a(8) + y1r = a(0) + a(9) + y1i = a(1) - a(8) + x0r = a(4) - a(13) + x0i = a(5) + a(12) + y2r = wn4r * (x0r - x0i) + y2i = wn4r * (x0i + x0r) + x0r = a(4) + a(13) + x0i = a(5) - a(12) + y3r = wn4r * (x0r - x0i) + y3i = wn4r * (x0i + x0r) + x0r = a(2) - a(11) + x0i = a(3) + a(10) + y4r = wk1r * x0r - wk1i * x0i + y4i = wk1r * x0i + wk1i * x0r + x0r = a(2) + a(11) + x0i = a(3) - a(10) + y5r = wk1i * x0r - wk1r * x0i + y5i = wk1i * x0i + wk1r * x0r + x0r = a(6) - a(15) + x0i = a(7) + a(14) + y6r = wk1i * x0r - wk1r * x0i + y6i = wk1i * x0i + wk1r * x0r + x0r = a(6) + a(15) + x0i = a(7) - a(14) + y7r = wk1r * x0r - wk1i * x0i + y7i = wk1r * x0i + wk1i * x0r + x0r = y0r + y2r + x0i = y0i + y2i + x1r = y4r + y6r + x1i = y4i + y6i + a(0) = x0r + x1r + a(1) = x0i + x1i + a(2) = x0r - x1r + a(3) = x0i - x1i + x0r = y0r - y2r + x0i = y0i - y2i + x1r = y4r - y6r + x1i = y4i - y6i + a(4) = x0r - x1i + a(5) = x0i + x1r + a(6) = x0r + x1i + a(7) = x0i - x1r + x0r = y1r - y3i + x0i = y1i + y3r + x1r = y5r - y7r + x1i = y5i - y7i + a(8) = x0r + x1r + a(9) = x0i + x1i + a(10) = x0r - x1r + a(11) = x0i - x1i + x0r = y1r + y3i + x0i = y1i - y3r + x1r = y5r + y7r + x1i = y5i + y7i + a(12) = x0r - x1i + a(13) = x0i + x1r + a(14) = x0r + x1i + a(15) = x0i - x1r + end +! + subroutine cftf040(a) + real*8 a(0 : 7), x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + x0r = a(0) + a(4) + x0i = a(1) + a(5) + x1r = a(0) - a(4) + x1i = a(1) - a(5) + x2r = a(2) + a(6) + x2i = a(3) + a(7) + x3r = a(2) - a(6) + x3i = a(3) - a(7) + a(0) = x0r + x2r + a(1) = x0i + x2i + a(2) = x1r - x3i + a(3) = x1i + x3r + a(4) = x0r - x2r + a(5) = x0i - x2i + a(6) = x1r + x3i + a(7) = x1i - x3r + end +! + subroutine cftb040(a) + real*8 a(0 : 7), x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i + x0r = a(0) + a(4) + x0i = a(1) + a(5) + x1r = a(0) - a(4) + x1i = a(1) - a(5) + x2r = a(2) + a(6) + x2i = a(3) + a(7) + x3r = a(2) - a(6) + x3i = a(3) - a(7) + a(0) = x0r + x2r + a(1) = x0i + x2i + a(2) = x1r + x3i + a(3) = x1i - x3r + a(4) = x0r - x2r + a(5) = x0i - x2i + a(6) = x1r - x3i + a(7) = x1i + x3r + end +! + subroutine cftx020(a) + real*8 a(0 : 3), x0r, x0i + x0r = a(0) - a(2) + x0i = a(1) - a(3) + a(0) = a(0) + a(2) + a(1) = a(1) + a(3) + a(2) = x0r + a(3) = x0i + end +! + subroutine rftfsub(n, a, nc, c) + integer n, nc, j, k, kk, ks, m + real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi + m = n / 2 + ks = 2 * nc / m + kk = 0 + do j = 2, m - 2, 2 + k = n - j + kk = kk + ks + wkr = 0.5d0 - c(nc - kk) + wki = c(kk) + xr = a(j) - a(k) + xi = a(j + 1) + a(k + 1) + yr = wkr * xr - wki * xi + yi = wkr * xi + wki * xr + a(j) = a(j) - yr + a(j + 1) = a(j + 1) - yi + a(k) = a(k) + yr + a(k + 1) = a(k + 1) - yi + end do + end +! + subroutine rftbsub(n, a, nc, c) + integer n, nc, j, k, kk, ks, m + real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi + m = n / 2 + ks = 2 * nc / m + kk = 0 + do j = 2, m - 2, 2 + k = n - j + kk = kk + ks + wkr = 0.5d0 - c(nc - kk) + wki = c(kk) + xr = a(j) - a(k) + xi = a(j + 1) + a(k + 1) + yr = wkr * xr + wki * xi + yi = wkr * xi - wki * xr + a(j) = a(j) - yr + a(j + 1) = a(j + 1) - yi + a(k) = a(k) + yr + a(k + 1) = a(k + 1) - yi + end do + end +! + subroutine dctsub(n, a, nc, c) + integer n, nc, j, k, kk, ks, m + real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr + m = n / 2 + ks = nc / n + kk = 0 + do j = 1, m - 1 + k = n - j + kk = kk + ks + wkr = c(kk) - c(nc - kk) + wki = c(kk) + c(nc - kk) + xr = wki * a(j) - wkr * a(k) + a(j) = wkr * a(j) + wki * a(k) + a(k) = xr + end do + a(m) = c(0) * a(m) + end +! + subroutine dstsub(n, a, nc, c) + integer n, nc, j, k, kk, ks, m + real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr + m = n / 2 + ks = nc / n + kk = 0 + do j = 1, m - 1 + k = n - j + kk = kk + ks + wkr = c(kk) - c(nc - kk) + wki = c(kk) + c(nc - kk) + xr = wki * a(k) - wkr * a(j) + a(k) = wkr * a(k) + wki * a(j) + a(j) = xr + end do + a(m) = c(0) * a(m) + end +! diff --git a/src/fft2d/fft2d/fftsg2d.c b/src/fft2d/fft2d/fftsg2d.c new file mode 100644 index 0000000..2ebfa93 --- /dev/null +++ b/src/fft2d/fft2d/fftsg2d.c @@ -0,0 +1,1190 @@ +/* +Fast Fourier/Cosine/Sine Transform + dimension :two + data length :power of 2 + decimation :frequency + radix :split-radix, row-column + data :inplace + table :use +functions + cdft2d: Complex Discrete Fourier Transform + rdft2d: Real Discrete Fourier Transform + ddct2d: Discrete Cosine Transform + ddst2d: Discrete Sine Transform +function prototypes + void cdft2d(int, int, int, double **, double *, int *, double *); + void rdft2d(int, int, int, double **, double *, int *, double *); + void rdft2dsort(int, int, int, double **); + void ddct2d(int, int, int, double **, double *, int *, double *); + void ddst2d(int, int, int, double **, double *, int *, double *); +necessary package + fftsg.c : 1D-FFT package +macro definitions + USE_FFT2D_PTHREADS : default=not defined + FFT2D_MAX_THREADS : must be 2^N, default=4 + FFT2D_THREADS_BEGIN_N : default=65536 + USE_FFT2D_WINTHREADS : default=not defined + FFT2D_MAX_THREADS : must be 2^N, default=4 + FFT2D_THREADS_BEGIN_N : default=131072 + + +-------- Complex DFT (Discrete Fourier Transform) -------- + [definition] + <case1> + X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * + exp(2*pi*i*j1*k1/n1) * + exp(2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 + <case2> + X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * + exp(-2*pi*i*j1*k1/n1) * + exp(-2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 + (notes: sum_j=0^n-1 is a summation from j=0 to n-1) + [usage] + <case1> + ip[0] = 0; // first time only + cdft2d(n1, 2*n2, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + cdft2d(n1, 2*n2, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 1, n1 = power of 2 + 2*n2 :data length (int) + n2 >= 1, n2 = power of 2 + a[0...n1-1][0...2*n2-1] + :input/output data (double **) + input data + a[j1][2*j2] = Re(x[j1][j2]), + a[j1][2*j2+1] = Im(x[j1][j2]), + 0<=j1<n1, 0<=j2<n2 + output data + a[k1][2*k2] = Re(X[k1][k2]), + a[k1][2*k2+1] = Im(X[k1][k2]), + 0<=k1<n1, 0<=k2<n2 + t[0...*] + :work area (double *) + length of t >= 8*n1, if single thread, + length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads, + t is dynamically allocated, if t == NULL. + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1, n2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1/2, n2/2) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + cdft2d(n1, 2*n2, -1, a, t, ip, w); + is + cdft2d(n1, 2*n2, 1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= 2 * n2 - 1; j2++) { + a[j1][j2] *= 1.0 / n1 / n2; + } + } + . + + +-------- Real DFT / Inverse of Real DFT -------- + [definition] + <case1> RDFT + R[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), + 0<=k1<n1, 0<=k2<n2 + I[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), + 0<=k1<n1, 0<=k2<n2 + <case2> IRDFT (excluding scale) + a[k1][k2] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 + (R[j1][j2] * + cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + + I[j1][j2] * + sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), + 0<=k1<n1, 0<=k2<n2 + (notes: R[n1-k1][n2-k2] = R[k1][k2], + I[n1-k1][n2-k2] = -I[k1][k2], + R[n1-k1][0] = R[k1][0], + I[n1-k1][0] = -I[k1][0], + R[0][n2-k2] = R[0][k2], + I[0][n2-k2] = -I[0][k2], + 0<k1<n1, 0<k2<n2) + [usage] + <case1> + ip[0] = 0; // first time only + rdft2d(n1, n2, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + rdft2d(n1, n2, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 2, n1 = power of 2 + n2 :data length (int) + n2 >= 2, n2 = power of 2 + a[0...n1-1][0...n2-1] + :input/output data (double **) + <case1> + output data + a[k1][2*k2] = R[k1][k2] = R[n1-k1][n2-k2], + a[k1][2*k2+1] = I[k1][k2] = -I[n1-k1][n2-k2], + 0<k1<n1, 0<k2<n2/2, + a[0][2*k2] = R[0][k2] = R[0][n2-k2], + a[0][2*k2+1] = I[0][k2] = -I[0][n2-k2], + 0<k2<n2/2, + a[k1][0] = R[k1][0] = R[n1-k1][0], + a[k1][1] = I[k1][0] = -I[n1-k1][0], + a[n1-k1][1] = R[k1][n2/2] = R[n1-k1][n2/2], + a[n1-k1][0] = -I[k1][n2/2] = I[n1-k1][n2/2], + 0<k1<n1/2, + a[0][0] = R[0][0], + a[0][1] = R[0][n2/2], + a[n1/2][0] = R[n1/2][0], + a[n1/2][1] = R[n1/2][n2/2] + <case2> + input data + a[j1][2*j2] = R[j1][j2] = R[n1-j1][n2-j2], + a[j1][2*j2+1] = I[j1][j2] = -I[n1-j1][n2-j2], + 0<j1<n1, 0<j2<n2/2, + a[0][2*j2] = R[0][j2] = R[0][n2-j2], + a[0][2*j2+1] = I[0][j2] = -I[0][n2-j2], + 0<j2<n2/2, + a[j1][0] = R[j1][0] = R[n1-j1][0], + a[j1][1] = I[j1][0] = -I[n1-j1][0], + a[n1-j1][1] = R[j1][n2/2] = R[n1-j1][n2/2], + a[n1-j1][0] = -I[j1][n2/2] = I[n1-j1][n2/2], + 0<j1<n1/2, + a[0][0] = R[0][0], + a[0][1] = R[0][n2/2], + a[n1/2][0] = R[n1/2][0], + a[n1/2][1] = R[n1/2][n2/2] + ---- output ordering ---- + rdft2d(n1, n2, 1, a, t, ip, w); + rdft2dsort(n1, n2, 1, a); + // stored data is a[0...n1-1][0...n2+1]: + // a[k1][2*k2] = R[k1][k2], + // a[k1][2*k2+1] = I[k1][k2], + // 0<=k1<n1, 0<=k2<=n2/2. + // the stored data is larger than the input data! + ---- input ordering ---- + rdft2dsort(n1, n2, -1, a); + rdft2d(n1, n2, -1, a, t, ip, w); + t[0...*] + :work area (double *) + length of t >= 8*n1, if single thread, + length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads, + t is dynamically allocated, if t == NULL. + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1, n2/2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1/2, n2/4) + n2/4 + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + rdft2d(n1, n2, 1, a, t, ip, w); + is + rdft2d(n1, n2, -1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2] *= 2.0 / n1 / n2; + } + } + . + + +-------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + <case1> IDCT (excluding scale) + C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + cos(pi*j1*(k1+1/2)/n1) * + cos(pi*j2*(k2+1/2)/n2), + 0<=k1<n1, 0<=k2<n2 + <case2> DCT + C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + cos(pi*(j1+1/2)*k1/n1) * + cos(pi*(j2+1/2)*k2/n2), + 0<=k1<n1, 0<=k2<n2 + [usage] + <case1> + ip[0] = 0; // first time only + ddct2d(n1, n2, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + ddct2d(n1, n2, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 2, n1 = power of 2 + n2 :data length (int) + n2 >= 2, n2 = power of 2 + a[0...n1-1][0...n2-1] + :input/output data (double **) + output data + a[k1][k2] = C[k1][k2], 0<=k1<n1, 0<=k2<n2 + t[0...*] + :work area (double *) + length of t >= 4*n1, if single thread, + length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads, + t is dynamically allocated, if t == NULL. + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1/2, n2/2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1*3/2, n2*3/2) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddct2d(n1, n2, -1, a, t, ip, w); + is + for (j1 = 0; j1 <= n1 - 1; j1++) { + a[j1][0] *= 0.5; + } + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[0][j2] *= 0.5; + } + ddct2d(n1, n2, 1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2] *= 4.0 / n1 / n2; + } + } + . + + +-------- DST (Discrete Sine Transform) / Inverse of DST -------- + [definition] + <case1> IDST (excluding scale) + S[k1][k2] = sum_j1=1^n1 sum_j2=1^n2 A[j1][j2] * + sin(pi*j1*(k1+1/2)/n1) * + sin(pi*j2*(k2+1/2)/n2), + 0<=k1<n1, 0<=k2<n2 + <case2> DST + S[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * + sin(pi*(j1+1/2)*k1/n1) * + sin(pi*(j2+1/2)*k2/n2), + 0<k1<=n1, 0<k2<=n2 + [usage] + <case1> + ip[0] = 0; // first time only + ddst2d(n1, n2, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + ddst2d(n1, n2, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 2, n1 = power of 2 + n2 :data length (int) + n2 >= 2, n2 = power of 2 + a[0...n1-1][0...n2-1] + :input/output data (double **) + <case1> + input data + a[j1][j2] = A[j1][j2], 0<j1<n1, 0<j2<n2, + a[j1][0] = A[j1][n2], 0<j1<n1, + a[0][j2] = A[n1][j2], 0<j2<n2, + a[0][0] = A[n1][n2] + (i.e. A[j1][j2] = a[j1 % n1][j2 % n2]) + output data + a[k1][k2] = S[k1][k2], 0<=k1<n1, 0<=k2<n2 + <case2> + output data + a[k1][k2] = S[k1][k2], 0<k1<n1, 0<k2<n2, + a[k1][0] = S[k1][n2], 0<k1<n1, + a[0][k2] = S[n1][k2], 0<k2<n2, + a[0][0] = S[n1][n2] + (i.e. S[k1][k2] = a[k1 % n1][k2 % n2]) + t[0...*] + :work area (double *) + length of t >= 4*n1, if single thread, + length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads, + t is dynamically allocated, if t == NULL. + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1/2, n2/2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1*3/2, n2*3/2) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddst2d(n1, n2, -1, a, t, ip, w); + is + for (j1 = 0; j1 <= n1 - 1; j1++) { + a[j1][0] *= 0.5; + } + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[0][j2] *= 0.5; + } + ddst2d(n1, n2, 1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2] *= 4.0 / n1 / n2; + } + } + . +*/ + + +#include <stdio.h> +#include <stdlib.h> +#define fft2d_alloc_error_check(p) { \ + if ((p) == NULL) { \ + fprintf(stderr, "fft2d memory allocation error\n"); \ + exit(1); \ + } \ +} + + +#ifdef USE_FFT2D_PTHREADS +#define USE_FFT2D_THREADS +#ifndef FFT2D_MAX_THREADS +#define FFT2D_MAX_THREADS 4 +#endif +#ifndef FFT2D_THREADS_BEGIN_N +#define FFT2D_THREADS_BEGIN_N 65536 +#endif +#include <pthread.h> +#define fft2d_thread_t pthread_t +#define fft2d_thread_create(thp,func,argp) { \ + if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \ + fprintf(stderr, "fft2d thread error\n"); \ + exit(1); \ + } \ +} +#define fft2d_thread_wait(th) { \ + if (pthread_join(th, NULL) != 0) { \ + fprintf(stderr, "fft2d thread error\n"); \ + exit(1); \ + } \ +} +#endif /* USE_FFT2D_PTHREADS */ + + +#ifdef USE_FFT2D_WINTHREADS +#define USE_FFT2D_THREADS +#ifndef FFT2D_MAX_THREADS +#define FFT2D_MAX_THREADS 4 +#endif +#ifndef FFT2D_THREADS_BEGIN_N +#define FFT2D_THREADS_BEGIN_N 131072 +#endif +#include <windows.h> +#define fft2d_thread_t HANDLE +#define fft2d_thread_create(thp,func,argp) { \ + DWORD thid; \ + *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \ + if (*(thp) == 0) { \ + fprintf(stderr, "fft2d thread error\n"); \ + exit(1); \ + } \ +} +#define fft2d_thread_wait(th) { \ + WaitForSingleObject(th, INFINITE); \ + CloseHandle(th); \ +} +#endif /* USE_FFT2D_WINTHREADS */ + + +void cdft2d(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void cdft(int n, int isgn, double *a, int *ip, double *w); + void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w); +#ifdef USE_FFT2D_THREADS + void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, + int *ip, double *w); + void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w); +#endif /* USE_FFT2D_THREADS */ + int n, itnull, nthread, nt, i; + + n = n1 << 1; + if (n < n2) { + n = n2; + } + if (n > (ip[0] << 2)) { + makewt(n >> 2, ip, w); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; +#ifdef USE_FFT2D_THREADS + nthread = FFT2D_MAX_THREADS; +#endif /* USE_FFT2D_THREADS */ + nt = 8 * nthread * n1; + if (n2 == 4 * nthread) { + nt >>= 1; + } else if (n2 < 4 * nthread) { + nt >>= 2; + } + t = (double *) malloc(sizeof(double) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { + xdft2d0_subth(n1, n2, 0, isgn, a, ip, w); + cdft2d_subth(n1, n2, isgn, a, t, ip, w); + } else +#endif /* USE_FFT2D_THREADS */ + { + for (i = 0; i < n1; i++) { + cdft(n2, isgn, a[i], ip, w); + } + cdft2d_sub(n1, n2, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); + } +} + + +void rdft2d(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void rdft(int n, int isgn, double *a, int *ip, double *w); + void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w); + void rdft2d_sub(int n1, int n2, int isgn, double **a); +#ifdef USE_FFT2D_THREADS + void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, + int *ip, double *w); + void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w); +#endif /* USE_FFT2D_THREADS */ + int n, nw, nc, itnull, nthread, nt, i; + + n = n1 << 1; + if (n < n2) { + n = n2; + } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n2 > (nc << 2)) { + nc = n2 >> 2; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; +#ifdef USE_FFT2D_THREADS + nthread = FFT2D_MAX_THREADS; +#endif /* USE_FFT2D_THREADS */ + nt = 8 * nthread * n1; + if (n2 == 4 * nthread) { + nt >>= 1; + } else if (n2 < 4 * nthread) { + nt >>= 2; + } + t = (double *) malloc(sizeof(double) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { + if (isgn < 0) { + rdft2d_sub(n1, n2, isgn, a); + cdft2d_subth(n1, n2, isgn, a, t, ip, w); + } + xdft2d0_subth(n1, n2, 1, isgn, a, ip, w); + if (isgn >= 0) { + cdft2d_subth(n1, n2, isgn, a, t, ip, w); + rdft2d_sub(n1, n2, isgn, a); + } + } else +#endif /* USE_FFT2D_THREADS */ + { + if (isgn < 0) { + rdft2d_sub(n1, n2, isgn, a); + cdft2d_sub(n1, n2, isgn, a, t, ip, w); + } + for (i = 0; i < n1; i++) { + rdft(n2, isgn, a[i], ip, w); + } + if (isgn >= 0) { + cdft2d_sub(n1, n2, isgn, a, t, ip, w); + rdft2d_sub(n1, n2, isgn, a); + } + } + if (itnull != 0) { + free(t); + } +} + + +void rdft2dsort(int n1, int n2, int isgn, double **a) +{ + int n1h, i; + double x, y; + + n1h = n1 >> 1; + if (isgn < 0) { + for (i = n1h + 1; i < n1; i++) { + a[i][0] = a[i][n2 + 1]; + a[i][1] = a[i][n2]; + } + a[0][1] = a[0][n2]; + a[n1h][1] = a[n1h][n2]; + } else { + for (i = n1h + 1; i < n1; i++) { + y = a[i][0]; + x = a[i][1]; + a[i][n2] = x; + a[i][n2 + 1] = y; + a[n1 - i][n2] = x; + a[n1 - i][n2 + 1] = -y; + a[i][0] = a[n1 - i][0]; + a[i][1] = -a[n1 - i][1]; + } + a[0][n2] = a[0][1]; + a[0][n2 + 1] = 0; + a[0][1] = 0; + a[n1h][n2] = a[n1h][1]; + a[n1h][n2 + 1] = 0; + a[n1h][1] = 0; + } +} + + +void ddct2d(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void ddct(int n, int isgn, double *a, int *ip, double *w); + void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, + double *t, int *ip, double *w); +#ifdef USE_FFT2D_THREADS + void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, + int *ip, double *w); + void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, + double *t, int *ip, double *w); +#endif /* USE_FFT2D_THREADS */ + int n, nw, nc, itnull, nthread, nt, i; + + n = n1; + if (n < n2) { + n = n2; + } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; +#ifdef USE_FFT2D_THREADS + nthread = FFT2D_MAX_THREADS; +#endif /* USE_FFT2D_THREADS */ + nt = 4 * nthread * n1; + if (n2 == 2 * nthread) { + nt >>= 1; + } else if (n2 < 2 * nthread) { + nt >>= 2; + } + t = (double *) malloc(sizeof(double) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { + ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w); + ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w); + } else +#endif /* USE_FFT2D_THREADS */ + { + for (i = 0; i < n1; i++) { + ddct(n2, isgn, a[i], ip, w); + } + ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); + } +} + + +void ddst2d(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void ddst(int n, int isgn, double *a, int *ip, double *w); + void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, + double *t, int *ip, double *w); +#ifdef USE_FFT2D_THREADS + void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, + int *ip, double *w); + void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, + double *t, int *ip, double *w); +#endif /* USE_FFT2D_THREADS */ + int n, nw, nc, itnull, nthread, nt, i; + + n = n1; + if (n < n2) { + n = n2; + } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nthread = 1; +#ifdef USE_FFT2D_THREADS + nthread = FFT2D_MAX_THREADS; +#endif /* USE_FFT2D_THREADS */ + nt = 4 * nthread * n1; + if (n2 == 2 * nthread) { + nt >>= 1; + } else if (n2 < 2 * nthread) { + nt >>= 2; + } + t = (double *) malloc(sizeof(double) * nt); + fft2d_alloc_error_check(t); + } +#ifdef USE_FFT2D_THREADS + if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { + ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w); + ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w); + } else +#endif /* USE_FFT2D_THREADS */ + { + for (i = 0; i < n1; i++) { + ddst(n2, isgn, a[i], ip, w); + } + ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); + } +} + + +/* -------- child routines -------- */ + + +void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w) +{ + void cdft(int n, int isgn, double *a, int *ip, double *w); + int i, j; + + if (n2 > 4) { + for (j = 0; j < n2; j += 8) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j]; + t[2 * i + 1] = a[i][j + 1]; + t[2 * n1 + 2 * i] = a[i][j + 2]; + t[2 * n1 + 2 * i + 1] = a[i][j + 3]; + t[4 * n1 + 2 * i] = a[i][j + 4]; + t[4 * n1 + 2 * i + 1] = a[i][j + 5]; + t[6 * n1 + 2 * i] = a[i][j + 6]; + t[6 * n1 + 2 * i + 1] = a[i][j + 7]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + cdft(2 * n1, isgn, &t[4 * n1], ip, w); + cdft(2 * n1, isgn, &t[6 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][j] = t[2 * i]; + a[i][j + 1] = t[2 * i + 1]; + a[i][j + 2] = t[2 * n1 + 2 * i]; + a[i][j + 3] = t[2 * n1 + 2 * i + 1]; + a[i][j + 4] = t[4 * n1 + 2 * i]; + a[i][j + 5] = t[4 * n1 + 2 * i + 1]; + a[i][j + 6] = t[6 * n1 + 2 * i]; + a[i][j + 7] = t[6 * n1 + 2 * i + 1]; + } + } + } else if (n2 == 4) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][0]; + t[2 * i + 1] = a[i][1]; + t[2 * n1 + 2 * i] = a[i][2]; + t[2 * n1 + 2 * i + 1] = a[i][3]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][0] = t[2 * i]; + a[i][1] = t[2 * i + 1]; + a[i][2] = t[2 * n1 + 2 * i]; + a[i][3] = t[2 * n1 + 2 * i + 1]; + } + } else if (n2 == 2) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][0]; + t[2 * i + 1] = a[i][1]; + } + cdft(2 * n1, isgn, t, ip, w); + for (i = 0; i < n1; i++) { + a[i][0] = t[2 * i]; + a[i][1] = t[2 * i + 1]; + } + } +} + + +void rdft2d_sub(int n1, int n2, int isgn, double **a) +{ + int n1h, i, j; + double xi; + + n1h = n1 >> 1; + if (isgn < 0) { + for (i = 1; i < n1h; i++) { + j = n1 - i; + xi = a[i][0] - a[j][0]; + a[i][0] += a[j][0]; + a[j][0] = xi; + xi = a[j][1] - a[i][1]; + a[i][1] += a[j][1]; + a[j][1] = xi; + } + } else { + for (i = 1; i < n1h; i++) { + j = n1 - i; + a[j][0] = 0.5 * (a[i][0] - a[j][0]); + a[i][0] -= a[j][0]; + a[j][1] = 0.5 * (a[i][1] + a[j][1]); + a[i][1] -= a[j][1]; + } + } +} + + +void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, + double *t, int *ip, double *w) +{ + void ddct(int n, int isgn, double *a, int *ip, double *w); + void ddst(int n, int isgn, double *a, int *ip, double *w); + int i, j; + + if (n2 > 2) { + for (j = 0; j < n2; j += 4) { + for (i = 0; i < n1; i++) { + t[i] = a[i][j]; + t[n1 + i] = a[i][j + 1]; + t[2 * n1 + i] = a[i][j + 2]; + t[3 * n1 + i] = a[i][j + 3]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + ddct(n1, isgn, &t[2 * n1], ip, w); + ddct(n1, isgn, &t[3 * n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + ddst(n1, isgn, &t[2 * n1], ip, w); + ddst(n1, isgn, &t[3 * n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][j] = t[i]; + a[i][j + 1] = t[n1 + i]; + a[i][j + 2] = t[2 * n1 + i]; + a[i][j + 3] = t[3 * n1 + i]; + } + } + } else if (n2 == 2) { + for (i = 0; i < n1; i++) { + t[i] = a[i][0]; + t[n1 + i] = a[i][1]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][0] = t[i]; + a[i][1] = t[n1 + i]; + } + } +} + + +#ifdef USE_FFT2D_THREADS +struct fft2d_arg_st { + int nthread; + int n0; + int n1; + int n2; + int ic; + int isgn; + double **a; + double *t; + int *ip; + double *w; +}; +typedef struct fft2d_arg_st fft2d_arg_t; + + +void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, + int *ip, double *w) +{ + void *xdft2d0_th(void *p); + fft2d_thread_t th[FFT2D_MAX_THREADS]; + fft2d_arg_t ag[FFT2D_MAX_THREADS]; + int nthread, i; + + nthread = FFT2D_MAX_THREADS; + if (nthread > n1) { + nthread = n1; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].ic = icr; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].ip = ip; + ag[i].w = w; + fft2d_thread_create(&th[i], xdft2d0_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft2d_thread_wait(th[i]); + } +} + + +void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, + int *ip, double *w) +{ + void *cdft2d_th(void *p); + fft2d_thread_t th[FFT2D_MAX_THREADS]; + fft2d_arg_t ag[FFT2D_MAX_THREADS]; + int nthread, nt, i; + + nthread = FFT2D_MAX_THREADS; + nt = 8 * n1; + if (n2 == 4 * FFT2D_MAX_THREADS) { + nt >>= 1; + } else if (n2 < 4 * FFT2D_MAX_THREADS) { + nthread = n2 >> 1; + nt >>= 2; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].t = &t[nt * i]; + ag[i].ip = ip; + ag[i].w = w; + fft2d_thread_create(&th[i], cdft2d_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft2d_thread_wait(th[i]); + } +} + + +void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, + int *ip, double *w) +{ + void *ddxt2d0_th(void *p); + fft2d_thread_t th[FFT2D_MAX_THREADS]; + fft2d_arg_t ag[FFT2D_MAX_THREADS]; + int nthread, i; + + nthread = FFT2D_MAX_THREADS; + if (nthread > n1) { + nthread = n1; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].ic = ics; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].ip = ip; + ag[i].w = w; + fft2d_thread_create(&th[i], ddxt2d0_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft2d_thread_wait(th[i]); + } +} + + +void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, + double *t, int *ip, double *w) +{ + void *ddxt2d_th(void *p); + fft2d_thread_t th[FFT2D_MAX_THREADS]; + fft2d_arg_t ag[FFT2D_MAX_THREADS]; + int nthread, nt, i; + + nthread = FFT2D_MAX_THREADS; + nt = 4 * n1; + if (n2 == 2 * FFT2D_MAX_THREADS) { + nt >>= 1; + } else if (n2 < 2 * FFT2D_MAX_THREADS) { + nthread = n2; + nt >>= 2; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].ic = ics; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].t = &t[nt * i]; + ag[i].ip = ip; + ag[i].w = w; + fft2d_thread_create(&th[i], ddxt2d_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft2d_thread_wait(th[i]); + } +} + + +void *xdft2d0_th(void *p) +{ + void cdft(int n, int isgn, double *a, int *ip, double *w); + void rdft(int n, int isgn, double *a, int *ip, double *w); + int nthread, n0, n1, n2, icr, isgn, *ip, i; + double **a, *w; + + nthread = ((fft2d_arg_t *) p)->nthread; + n0 = ((fft2d_arg_t *) p)->n0; + n1 = ((fft2d_arg_t *) p)->n1; + n2 = ((fft2d_arg_t *) p)->n2; + icr = ((fft2d_arg_t *) p)->ic; + isgn = ((fft2d_arg_t *) p)->isgn; + a = ((fft2d_arg_t *) p)->a; + ip = ((fft2d_arg_t *) p)->ip; + w = ((fft2d_arg_t *) p)->w; + if (icr == 0) { + for (i = n0; i < n1; i += nthread) { + cdft(n2, isgn, a[i], ip, w); + } + } else { + for (i = n0; i < n1; i += nthread) { + rdft(n2, isgn, a[i], ip, w); + } + } + return (void *) 0; +} + + +void *cdft2d_th(void *p) +{ + void cdft(int n, int isgn, double *a, int *ip, double *w); + int nthread, n0, n1, n2, isgn, *ip, i, j; + double **a, *t, *w; + + nthread = ((fft2d_arg_t *) p)->nthread; + n0 = ((fft2d_arg_t *) p)->n0; + n1 = ((fft2d_arg_t *) p)->n1; + n2 = ((fft2d_arg_t *) p)->n2; + isgn = ((fft2d_arg_t *) p)->isgn; + a = ((fft2d_arg_t *) p)->a; + t = ((fft2d_arg_t *) p)->t; + ip = ((fft2d_arg_t *) p)->ip; + w = ((fft2d_arg_t *) p)->w; + if (n2 > 4 * nthread) { + for (j = 8 * n0; j < n2; j += 8 * nthread) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j]; + t[2 * i + 1] = a[i][j + 1]; + t[2 * n1 + 2 * i] = a[i][j + 2]; + t[2 * n1 + 2 * i + 1] = a[i][j + 3]; + t[4 * n1 + 2 * i] = a[i][j + 4]; + t[4 * n1 + 2 * i + 1] = a[i][j + 5]; + t[6 * n1 + 2 * i] = a[i][j + 6]; + t[6 * n1 + 2 * i + 1] = a[i][j + 7]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + cdft(2 * n1, isgn, &t[4 * n1], ip, w); + cdft(2 * n1, isgn, &t[6 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][j] = t[2 * i]; + a[i][j + 1] = t[2 * i + 1]; + a[i][j + 2] = t[2 * n1 + 2 * i]; + a[i][j + 3] = t[2 * n1 + 2 * i + 1]; + a[i][j + 4] = t[4 * n1 + 2 * i]; + a[i][j + 5] = t[4 * n1 + 2 * i + 1]; + a[i][j + 6] = t[6 * n1 + 2 * i]; + a[i][j + 7] = t[6 * n1 + 2 * i + 1]; + } + } + } else if (n2 == 4 * nthread) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][4 * n0]; + t[2 * i + 1] = a[i][4 * n0 + 1]; + t[2 * n1 + 2 * i] = a[i][4 * n0 + 2]; + t[2 * n1 + 2 * i + 1] = a[i][4 * n0 + 3]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][4 * n0] = t[2 * i]; + a[i][4 * n0 + 1] = t[2 * i + 1]; + a[i][4 * n0 + 2] = t[2 * n1 + 2 * i]; + a[i][4 * n0 + 3] = t[2 * n1 + 2 * i + 1]; + } + } else if (n2 == 2 * nthread) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][2 * n0]; + t[2 * i + 1] = a[i][2 * n0 + 1]; + } + cdft(2 * n1, isgn, t, ip, w); + for (i = 0; i < n1; i++) { + a[i][2 * n0] = t[2 * i]; + a[i][2 * n0 + 1] = t[2 * i + 1]; + } + } + return (void *) 0; +} + + +void *ddxt2d0_th(void *p) +{ + void ddct(int n, int isgn, double *a, int *ip, double *w); + void ddst(int n, int isgn, double *a, int *ip, double *w); + int nthread, n0, n1, n2, ics, isgn, *ip, i; + double **a, *w; + + nthread = ((fft2d_arg_t *) p)->nthread; + n0 = ((fft2d_arg_t *) p)->n0; + n1 = ((fft2d_arg_t *) p)->n1; + n2 = ((fft2d_arg_t *) p)->n2; + ics = ((fft2d_arg_t *) p)->ic; + isgn = ((fft2d_arg_t *) p)->isgn; + a = ((fft2d_arg_t *) p)->a; + ip = ((fft2d_arg_t *) p)->ip; + w = ((fft2d_arg_t *) p)->w; + if (ics == 0) { + for (i = n0; i < n1; i += nthread) { + ddct(n2, isgn, a[i], ip, w); + } + } else { + for (i = n0; i < n1; i += nthread) { + ddst(n2, isgn, a[i], ip, w); + } + } + return (void *) 0; +} + + +void *ddxt2d_th(void *p) +{ + void ddct(int n, int isgn, double *a, int *ip, double *w); + void ddst(int n, int isgn, double *a, int *ip, double *w); + int nthread, n0, n1, n2, ics, isgn, *ip, i, j; + double **a, *t, *w; + + nthread = ((fft2d_arg_t *) p)->nthread; + n0 = ((fft2d_arg_t *) p)->n0; + n1 = ((fft2d_arg_t *) p)->n1; + n2 = ((fft2d_arg_t *) p)->n2; + ics = ((fft2d_arg_t *) p)->ic; + isgn = ((fft2d_arg_t *) p)->isgn; + a = ((fft2d_arg_t *) p)->a; + t = ((fft2d_arg_t *) p)->t; + ip = ((fft2d_arg_t *) p)->ip; + w = ((fft2d_arg_t *) p)->w; + if (n2 > 2 * nthread) { + for (j = 4 * n0; j < n2; j += 4 * nthread) { + for (i = 0; i < n1; i++) { + t[i] = a[i][j]; + t[n1 + i] = a[i][j + 1]; + t[2 * n1 + i] = a[i][j + 2]; + t[3 * n1 + i] = a[i][j + 3]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + ddct(n1, isgn, &t[2 * n1], ip, w); + ddct(n1, isgn, &t[3 * n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + ddst(n1, isgn, &t[2 * n1], ip, w); + ddst(n1, isgn, &t[3 * n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][j] = t[i]; + a[i][j + 1] = t[n1 + i]; + a[i][j + 2] = t[2 * n1 + i]; + a[i][j + 3] = t[3 * n1 + i]; + } + } + } else if (n2 == 2 * nthread) { + for (i = 0; i < n1; i++) { + t[i] = a[i][2 * n0]; + t[n1 + i] = a[i][2 * n0 + 1]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][2 * n0] = t[i]; + a[i][2 * n0 + 1] = t[n1 + i]; + } + } else if (n2 == nthread) { + for (i = 0; i < n1; i++) { + t[i] = a[i][n0]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + } else { + ddst(n1, isgn, t, ip, w); + } + for (i = 0; i < n1; i++) { + a[i][n0] = t[i]; + } + } + return (void *) 0; +} +#endif /* USE_FFT2D_THREADS */ + diff --git a/src/fft2d/fft2d/fftsg2d.f b/src/fft2d/fft2d/fftsg2d.f new file mode 100644 index 0000000..49a9d6c --- /dev/null +++ b/src/fft2d/fft2d/fftsg2d.f @@ -0,0 +1,562 @@ +! Fast Fourier/Cosine/Sine Transform +! dimension :two +! data length :power of 2 +! decimation :frequency +! radix :split-radix, row-column +! data :inplace +! table :use +! subroutines +! cdft2d: Complex Discrete Fourier Transform +! rdft2d: Real Discrete Fourier Transform +! ddct2d: Discrete Cosine Transform +! ddst2d: Discrete Sine Transform +! necessary package +! fftsg.f : 1D-FFT package +! +! +! -------- Complex DFT (Discrete Fourier Transform) -------- +! [definition] +! <case1> +! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * +! exp(2*pi*i*j1*k1/n1) * +! exp(2*pi*i*j2*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! <case2> +! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * +! exp(-2*pi*i*j1*k1/n1) * +! exp(-2*pi*i*j2*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! (notes: sum_j=0^n-1 is a summation from j=0 to n-1) +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call cdft2d(n1max, 2*n1, n2, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call cdft2d(n1max, 2*n1, n2, -1, a, t, ip, w) +! [parameters] +! n1max :row size of the 2D array (integer) +! 2*n1 :data length (integer) +! n1 >= 1, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 1, n2 = power of 2 +! a(0:2*n1-1,0:n2-1) +! :input/output data (real*8) +! input data +! a(2*j1,j2) = Re(x(j1,j2)), +! a(2*j1+1,j2) = Im(x(j1,j2)), +! 0<=j1<n1, 0<=j2<n2 +! output data +! a(2*k1,k2) = Re(X(k1,k2)), +! a(2*k1+1,k2) = Im(X(k1,k2)), +! 0<=k1<n1, 0<=k2<n2 +! t(0:8*n2-1) +! :work area (real*8) +! length of t >= 8*n2 +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1, n2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1/2, n2/2) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call cdft2d(n1max, 2*n1, n2, -1, a, t, ip, w) +! is +! call cdft2d(n1max, 2*n1, n2, 1, a, t, ip, w) +! do j2 = 0, n2 - 1 +! do j1 = 0, 2 * n1 - 1 +! a(j1, j2) = a(j1, j2) * (1.0d0 / n1 / n2) +! end do +! end do +! . +! +! +! -------- Real DFT / Inverse of Real DFT -------- +! [definition] +! <case1> RDFT +! R(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! I(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! <case2> IRDFT (excluding scale) +! a(k1,k2) = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 +! (R(j1,j2) * +! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + +! I(j1,j2) * +! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), +! 0<=k1<n1, 0<=k2<n2 +! (notes: R(n1-k1,n2-k2) = R(k1,k2), +! I(n1-k1,n2-k2) = -I(k1,k2), +! R(n1-k1,0) = R(k1,0), +! I(n1-k1,0) = -I(k1,0), +! R(0,n2-k2) = R(0,k2), +! I(0,n2-k2) = -I(0,k2), +! 0<k1<n1, 0<k2<n2) +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call rdft2d(n1max, n1, n2, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call rdft2d(n1max, n1, n2, -1, a, t, ip, w) +! [parameters] +! n1max :row size of the 2D array (integer) +! n1 :data length (integer) +! n1 >= 2, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 2, n2 = power of 2 +! a(0:n1-1,0:n2-1) +! :input/output data (real*8) +! <case1> +! output data +! a(2*k1,k2) = R(k1,k2) = R(n1-k1,n2-k2), +! a(2*k1+1,k2) = I(k1,k2) = -I(n1-k1,n2-k2), +! 0<k1<n1/2, 0<k2<n2, +! a(2*k1,0) = R(k1,0) = R(n1-k1,0), +! a(2*k1+1,0) = I(k1,0) = -I(n1-k1,0), +! 0<k1<n1/2, +! a(0,k2) = R(0,k2) = R(0,n2-k2), +! a(1,k2) = I(0,k2) = -I(0,n2-k2), +! a(1,n2-k2) = R(n1/2,k2) = R(n1/2,n2-k2), +! a(0,n2-k2) = -I(n1/2,k2) = I(n1/2,n2-k2), +! 0<k2<n2/2, +! a(0,0) = R(0,0), +! a(1,0) = R(n1/2,0), +! a(0,n2/2) = R(0,n2/2), +! a(1,n2/2) = R(n1/2,n2/2) +! <case2> +! input data +! a(2*j1,j2) = R(j1,j2) = R(n1-j1,n2-j2), +! a(2*j1+1,j2) = I(j1,j2) = -I(n1-j1,n2-j2), +! 0<j1<n1/2, 0<j2<n2, +! a(2*j1,0) = R(j1,0) = R(n1-j1,0), +! a(2*j1+1,0) = I(j1,0) = -I(n1-j1,0), +! 0<j1<n1/2, +! a(0,j2) = R(0,j2) = R(0,n2-j2), +! a(1,j2) = I(0,j2) = -I(0,n2-j2), +! a(1,n2-j2) = R(n1/2,j2) = R(n1/2,n2-j2), +! a(0,n2-j2) = -I(n1/2,j2) = I(n1/2,n2-j2), +! 0<j2<n2/2, +! a(0,0) = R(0,0), +! a(1,0) = R(n1/2,0), +! a(0,n2/2) = R(0,n2/2), +! a(1,n2/2) = R(n1/2,n2/2) +! ---- output ordering ---- +! call rdft2d(n1max, n1, n2, 1, a, t, ip, w) +! call rdft2dsort(n1max, n1, n2, 1, a) +! ! stored data is a(0:n1-1,0:n2+1): +! ! a(2*k1,k2) = R(k1,k2), +! ! a(2*k1+1,k2) = I(k1,k2), +! ! 0<=k1<=n1/2, 0<=k2<n2. +! ! the stored data is larger than the input data! +! ---- input ordering ---- +! call rdft2dsort(n1max, n1, n2, -1, a) +! call rdft2d(n1max, n1, n2, -1, a, t, ip, w) +! t(0:8*n2-1) +! :work area (real*8) +! length of t >= 8*n2 +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1/2, n2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1/4, n2/2) + n1/4 +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call rdft2d(n1max, n1, n2, 1, a, t, ip, w) +! is +! call rdft2d(n1max, n1, n2, -1, a, t, ip, w) +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1, j2) = a(j1, j2) * (2.0d0 / n1 / n2) +! end do +! end do +! . +! +! +! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- +! [definition] +! <case1> IDCT (excluding scale) +! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! cos(pi*j1*(k1+1/2)/n1) * +! cos(pi*j2*(k2+1/2)/n2), +! 0<=k1<n1, 0<=k2<n2 +! <case2> DCT +! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! cos(pi*(j1+1/2)*k1/n1) * +! cos(pi*(j2+1/2)*k2/n2), +! 0<=k1<n1, 0<=k2<n2 +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) +! [parameters] +! n1max :row size of the 2D array (integer) +! n1 :data length (integer) +! n1 >= 2, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 2, n2 = power of 2 +! a(0:n1-1,0:n2-1) +! :input/output data (real*8) +! output data +! a(k1,k2) = C(k1,k2), 0<=k1<n1, 0<=k2<n2 +! t(0:4*n2-1) +! :work area (real*8) +! length of t >= 4*n2 +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1/2, n2/2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1*3/2, n2*3/2) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) +! is +! do j1 = 0, n1 - 1 +! a(j1, 0) = a(j1, 0) * 0.5d0 +! end do +! do j2 = 0, n2 - 1 +! a(0, j2) = a(0, j2) * 0.5d0 +! end do +! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1, j2) = a(j1, j2) * (4.0d0 / n1 / n2) +! end do +! end do +! . +! +! +! -------- DST (Discrete Sine Transform) / Inverse of DST -------- +! [definition] +! <case1> IDST (excluding scale) +! S(k1,k2) = sum_j1=1^n1 sum_j2=1^n2 A(j1,j2) * +! sin(pi*j1*(k1+1/2)/n1) * +! sin(pi*j2*(k2+1/2)/n2), +! 0<=k1<n1, 0<=k2<n2 +! <case2> DST +! S(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * +! sin(pi*(j1+1/2)*k1/n1) * +! sin(pi*(j2+1/2)*k2/n2), +! 0<k1<=n1, 0<k2<=n2 +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) +! [parameters] +! n1max :row size of the 2D array (integer) +! n1 :data length (integer) +! n1 >= 2, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 2, n2 = power of 2 +! a(0:n1-1,0:n2-1) +! :input/output data (real*8) +! <case1> +! input data +! a(j1,j2) = A(j1,j2), 0<j1<n1, 0<j2<n2, +! a(j1,0) = A(j1,n2), 0<j1<n1, +! a(0,j2) = A(n1,j2), 0<j2<n2, +! a(0,0) = A(n1,n2) +! (i.e. A(j1,j2) = a(mod(j1,n1),mod(j2,n2))) +! output data +! a(k1,k2) = S(k1,k2), 0<=k1<n1, 0<=k2<n2 +! <case2> +! output data +! a(k1,k2) = S(k1,k2), 0<k1<n1, 0<k2<n2, +! a(k1,0) = S(k1,n2), 0<k1<n1, +! a(0,k2) = S(n1,k2), 0<k2<n2, +! a(0,0) = S(n1,n2) +! (i.e. S(k1,k2) = a(mod(k1,n1),mod(k2,n2))) +! t(0:4*n2-1) +! :work area (real*8) +! length of t >= 4*n2 +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1/2, n2/2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1*3/2, n2*3/2) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) +! is +! do j1 = 0, n1 - 1 +! a(j1, 0) = a(j1, 0) * 0.5d0 +! end do +! do j2 = 0, n2 - 1 +! a(0, j2) = a(0, j2) * 0.5d0 +! end do +! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1, j2) = a(j1, j2) * (4.0d0 / n1 / n2) +! end do +! end do +! . +! +! + subroutine cdft2d(n1max, n1, n2, isgn, a, t, ip, w) + integer n1max, n1, n2, isgn, ip(0 : *), n, j + real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 8 * n2 - 1), + & w(0 : *) + n = max(n1, 2 * n2) + if (n .gt. 4 * ip(0)) then + call makewt(n / 4, ip, w) + end if + do j = 0, n2 - 1 + call cdft(n1, isgn, a(0, j), ip, w) + end do + call cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) + end +! + subroutine rdft2d(n1max, n1, n2, isgn, a, t, ip, w) + integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, j + real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 8 * n2 - 1), + & w(0 : *) + n = max(n1, 2 * n2) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n1 .gt. 4 * nc) then + nc = n1 / 4 + call makect(nc, ip, w(nw)) + end if + if (isgn .lt. 0) then + call rdft2d_sub(n1max, n1, n2, isgn, a) + call cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) + end if + do j = 0, n2 - 1 + call rdft(n1, isgn, a(0, j), ip, w) + end do + if (isgn .ge. 0) then + call cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) + call rdft2d_sub(n1max, n1, n2, isgn, a) + end if + end +! + subroutine rdft2dsort(n1max, n1, n2, isgn, a) + integer n1max, n1, n2, isgn, n2h, j + real*8 a(0 : n1max - 1, 0 : n2 - 1), x, y + n2h = n2 / 2 + if (isgn .lt. 0) then + do j = n2h + 1, n2 - 1 + a(0, j) = a(n1 + 1, j) + a(1, j) = a(n1, j) + end do + a(1, 0) = a(n1, 0) + a(1, n2h) = a(n1, n2h) + else + do j = n2h + 1, n2 - 1 + y = a(0, j) + x = a(1, j) + a(n1, j) = x + a(n1 + 1, j) = y + a(n1, n2 - j) = x + a(n1 + 1, n2 - j) = -y + a(0, j) = a(0, n2 - j) + a(1, j) = -a(1, n2 - j) + end do + a(n1, 0) = a(1, 0) + a(n1 + 1, 0) = 0 + a(1, 0) = 0 + a(n1, n2h) = a(1, n2h) + a(n1 + 1, n2h) = 0 + a(1, n2h) = 0 + end if + end +! + subroutine ddct2d(n1max, n1, n2, isgn, a, t, ip, w) + integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, j + real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 4 * n2 - 1), + & w(0 : *) + n = max(n1, n2) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n .gt. nc) then + nc = n + call makect(nc, ip, w(nw)) + end if + do j = 0, n2 - 1 + call ddct(n1, isgn, a(0, j), ip, w) + end do + call ddxt2d_sub(n1max, n1, n2, 0, isgn, a, t, ip, w) + end +! + subroutine ddst2d(n1max, n1, n2, isgn, a, t, ip, w) + integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, j + real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 4 * n2 - 1), + & w(0 : *) + n = max(n1, n2) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n .gt. nc) then + nc = n + call makect(nc, ip, w(nw)) + end if + do j = 0, n2 - 1 + call ddst(n1, isgn, a(0, j), ip, w) + end do + call ddxt2d_sub(n1max, n1, n2, 1, isgn, a, t, ip, w) + end +! +! -------- child routines -------- +! + subroutine cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) + integer n1max, n1, n2, isgn, ip(0 : *), i, j + real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 8 * n2 - 1), + & w(0 : *) + if (n1 .gt. 4) then + do i = 0, n1 - 8, 8 + do j = 0, n2 - 1 + t(2 * j) = a(i, j) + t(2 * j + 1) = a(i + 1, j) + t(2 * n2 + 2 * j) = a(i + 2, j) + t(2 * n2 + 2 * j + 1) = a(i + 3, j) + t(4 * n2 + 2 * j) = a(i + 4, j) + t(4 * n2 + 2 * j + 1) = a(i + 5, j) + t(6 * n2 + 2 * j) = a(i + 6, j) + t(6 * n2 + 2 * j + 1) = a(i + 7, j) + end do + call cdft(2 * n2, isgn, t, ip, w) + call cdft(2 * n2, isgn, t(2 * n2), ip, w) + call cdft(2 * n2, isgn, t(4 * n2), ip, w) + call cdft(2 * n2, isgn, t(6 * n2), ip, w) + do j = 0, n2 - 1 + a(i, j) = t(2 * j) + a(i + 1, j) = t(2 * j + 1) + a(i + 2, j) = t(2 * n2 + 2 * j) + a(i + 3, j) = t(2 * n2 + 2 * j + 1) + a(i + 4, j) = t(4 * n2 + 2 * j) + a(i + 5, j) = t(4 * n2 + 2 * j + 1) + a(i + 6, j) = t(6 * n2 + 2 * j) + a(i + 7, j) = t(6 * n2 + 2 * j + 1) + end do + end do + else if (n1 .eq. 4) then + do j = 0, n2 - 1 + t(2 * j) = a(0, j) + t(2 * j + 1) = a(1, j) + t(2 * n2 + 2 * j) = a(2, j) + t(2 * n2 + 2 * j + 1) = a(3, j) + end do + call cdft(2 * n2, isgn, t, ip, w) + call cdft(2 * n2, isgn, t(2 * n2), ip, w) + do j = 0, n2 - 1 + a(0, j) = t(2 * j) + a(1, j) = t(2 * j + 1) + a(2, j) = t(2 * n2 + 2 * j) + a(3, j) = t(2 * n2 + 2 * j + 1) + end do + else if (n1 .eq. 2) then + do j = 0, n2 - 1 + t(2 * j) = a(0, j) + t(2 * j + 1) = a(1, j) + end do + call cdft(2 * n2, isgn, t, ip, w) + do j = 0, n2 - 1 + a(0, j) = t(2 * j) + a(1, j) = t(2 * j + 1) + end do + end if + end +! + subroutine rdft2d_sub(n1max, n1, n2, isgn, a) + integer n1max, n1, n2, isgn, n2h, i, j + real*8 a(0 : n1max - 1, 0 : n2 - 1), xi + n2h = n2 / 2 + if (isgn .lt. 0) then + do i = 1, n2h - 1 + j = n2 - i + xi = a(0, i) - a(0, j) + a(0, i) = a(0, i) + a(0, j) + a(0, j) = xi + xi = a(1, j) - a(1, i) + a(1, i) = a(1, i) + a(1, j) + a(1, j) = xi + end do + else + do i = 1, n2h - 1 + j = n2 - i + a(0, j) = 0.5d0 * (a(0, i) - a(0, j)) + a(0, i) = a(0, i) - a(0, j) + a(1, j) = 0.5d0 * (a(1, i) + a(1, j)) + a(1, i) = a(1, i) - a(1, j) + end do + end if + end +! + subroutine ddxt2d_sub(n1max, n1, n2, ics, isgn, a, t, + & ip, w) + integer n1max, n1, n2, ics, isgn, ip(0 : *), i, j + real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 4 * n2 - 1), + & w(0 : *) + if (n1 .gt. 2) then + do i = 0, n1 - 4, 4 + do j = 0, n2 - 1 + t(j) = a(i, j) + t(n2 + j) = a(i + 1, j) + t(2 * n2 + j) = a(i + 2, j) + t(3 * n2 + j) = a(i + 3, j) + end do + if (ics .eq. 0) then + call ddct(n2, isgn, t, ip, w) + call ddct(n2, isgn, t(n2), ip, w) + call ddct(n2, isgn, t(2 * n2), ip, w) + call ddct(n2, isgn, t(3 * n2), ip, w) + else + call ddst(n2, isgn, t, ip, w) + call ddst(n2, isgn, t(n2), ip, w) + call ddst(n2, isgn, t(2 * n2), ip, w) + call ddst(n2, isgn, t(3 * n2), ip, w) + end if + do j = 0, n2 - 1 + a(i, j) = t(j) + a(i + 1, j) = t(n2 + j) + a(i + 2, j) = t(2 * n2 + j) + a(i + 3, j) = t(3 * n2 + j) + end do + end do + else if (n1 .eq. 2) then + do j = 0, n2 - 1 + t(j) = a(0, j) + t(n2 + j) = a(1, j) + end do + if (ics .eq. 0) then + call ddct(n2, isgn, t, ip, w) + call ddct(n2, isgn, t(n2), ip, w) + else + call ddst(n2, isgn, t, ip, w) + call ddst(n2, isgn, t(n2), ip, w) + end if + do j = 0, n2 - 1 + a(0, j) = t(j) + a(1, j) = t(n2 + j) + end do + end if + end +! diff --git a/src/fft2d/fft2d/fftsg3d.c b/src/fft2d/fft2d/fftsg3d.c new file mode 100644 index 0000000..223c0e2 --- /dev/null +++ b/src/fft2d/fft2d/fftsg3d.c @@ -0,0 +1,1695 @@ +/* +Fast Fourier/Cosine/Sine Transform + dimension :three + data length :power of 2 + decimation :frequency + radix :split-radix, row-column + data :inplace + table :use +functions + cdft3d: Complex Discrete Fourier Transform + rdft3d: Real Discrete Fourier Transform + ddct3d: Discrete Cosine Transform + ddst3d: Discrete Sine Transform +function prototypes + void cdft3d(int, int, int, int, double ***, double *, int *, double *); + void rdft3d(int, int, int, int, double ***, double *, int *, double *); + void rdft3dsort(int, int, int, int, double ***); + void ddct3d(int, int, int, int, double ***, double *, int *, double *); + void ddst3d(int, int, int, int, double ***, double *, int *, double *); +necessary package + fftsg.c : 1D-FFT package +macro definitions + USE_FFT3D_PTHREADS : default=not defined + FFT3D_MAX_THREADS : must be 2^N, default=4 + FFT3D_THREADS_BEGIN_N : default=65536 + USE_FFT3D_WINTHREADS : default=not defined + FFT3D_MAX_THREADS : must be 2^N, default=4 + FFT3D_THREADS_BEGIN_N : default=131072 + + +-------- Complex DFT (Discrete Fourier Transform) -------- + [definition] + <case1> + X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 + x[j1][j2][j3] * + exp(2*pi*i*j1*k1/n1) * + exp(2*pi*i*j2*k2/n2) * + exp(2*pi*i*j3*k3/n3), + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + <case2> + X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 + x[j1][j2][j3] * + exp(-2*pi*i*j1*k1/n1) * + exp(-2*pi*i*j2*k2/n2) * + exp(-2*pi*i*j3*k3/n3), + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + (notes: sum_j=0^n-1 is a summation from j=0 to n-1) + [usage] + <case1> + ip[0] = 0; // first time only + cdft3d(n1, n2, 2*n3, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + cdft3d(n1, n2, 2*n3, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 1, n1 = power of 2 + n2 :data length (int) + n2 >= 1, n2 = power of 2 + 2*n3 :data length (int) + n3 >= 1, n3 = power of 2 + a[0...n1-1][0...n2-1][0...2*n3-1] + :input/output data (double ***) + input data + a[j1][j2][2*j3] = Re(x[j1][j2][j3]), + a[j1][j2][2*j3+1] = Im(x[j1][j2][j3]), + 0<=j1<n1, 0<=j2<n2, 0<=j3<n3 + output data + a[k1][k2][2*k3] = Re(X[k1][k2][k3]), + a[k1][k2][2*k3+1] = Im(X[k1][k2][k3]), + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + t[0...*] + :work area (double *) + length of t >= max(8*n1, 8*n2), if single thread, + length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS, + if multi threads, + t is dynamically allocated, if t == NULL. + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1, n2, n3)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1/2, n2/2, n3/2) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + cdft3d(n1, n2, 2*n3, -1, a, t, ip, w); + is + cdft3d(n1, n2, 2*n3, 1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + for (j3 = 0; j3 <= 2 * n3 - 1; j3++) { + a[j1][j2][j3] *= 1.0 / n1 / n2 / n3; + } + } + } + . + + +-------- Real DFT / Inverse of Real DFT -------- + [definition] + <case1> RDFT + R[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 + a[j1][j2][j3] * + cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + + 2*pi*j3*k3/n3), + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + I[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 + a[j1][j2][j3] * + sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + + 2*pi*j3*k3/n3), + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + <case2> IRDFT (excluding scale) + a[k1][k2][k3] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 + (R[j1][j2][j3] * + cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + + 2*pi*j3*k3/n3) + + I[j1][j2][j3] * + sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + + 2*pi*j3*k3/n3)), + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + (notes: R[(n1-k1)%n1][(n2-k2)%n2][(n3-k3)%n3] = R[k1][k2][k3], + I[(n1-k1)%n1][(n2-k2)%n2][(n3-k3)%n3] = -I[k1][k2][k3], + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3) + [usage] + <case1> + ip[0] = 0; // first time only + rdft3d(n1, n2, n3, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + rdft3d(n1, n2, n3, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 2, n1 = power of 2 + n2 :data length (int) + n2 >= 2, n2 = power of 2 + n3 :data length (int) + n3 >= 2, n3 = power of 2 + a[0...n1-1][0...n2-1][0...n3-1] + :input/output data (double ***) + <case1> + output data + a[k1][k2][2*k3] = R[k1][k2][k3] + = R[(n1-k1)%n1][(n2-k2)%n2][n3-k3], + a[k1][k2][2*k3+1] = I[k1][k2][k3] + = -I[(n1-k1)%n1][(n2-k2)%n2][n3-k3], + 0<=k1<n1, 0<=k2<n2, 0<k3<n3/2, + (n%m : n mod m), + a[k1][k2][0] = R[k1][k2][0] + = R[(n1-k1)%n1][n2-k2][0], + a[k1][k2][1] = I[k1][k2][0] + = -I[(n1-k1)%n1][n2-k2][0], + a[k1][n2-k2][1] = R[k1][k2][n3/2] + = R[(n1-k1)%n1][n2-k2][n3/2], + a[k1][n2-k2][0] = -I[k1][k2][n3/2] + = I[(n1-k1)%n1][n2-k2][n3/2], + 0<=k1<n1, 0<k2<n2/2, + a[k1][0][0] = R[k1][0][0] + = R[n1-k1][0][0], + a[k1][0][1] = I[k1][0][0] + = -I[n1-k1][0][0], + a[k1][n2/2][0] = R[k1][n2/2][0] + = R[n1-k1][n2/2][0], + a[k1][n2/2][1] = I[k1][n2/2][0] + = -I[n1-k1][n2/2][0], + a[n1-k1][0][1] = R[k1][0][n3/2] + = R[n1-k1][0][n3/2], + a[n1-k1][0][0] = -I[k1][0][n3/2] + = I[n1-k1][0][n3/2], + a[n1-k1][n2/2][1] = R[k1][n2/2][n3/2] + = R[n1-k1][n2/2][n3/2], + a[n1-k1][n2/2][0] = -I[k1][n2/2][n3/2] + = I[n1-k1][n2/2][n3/2], + 0<k1<n1/2, + a[0][0][0] = R[0][0][0], + a[0][0][1] = R[0][0][n3/2], + a[0][n2/2][0] = R[0][n2/2][0], + a[0][n2/2][1] = R[0][n2/2][n3/2], + a[n1/2][0][0] = R[n1/2][0][0], + a[n1/2][0][1] = R[n1/2][0][n3/2], + a[n1/2][n2/2][0] = R[n1/2][n2/2][0], + a[n1/2][n2/2][1] = R[n1/2][n2/2][n3/2] + <case2> + input data + a[j1][j2][2*j3] = R[j1][j2][j3] + = R[(n1-j1)%n1][(n2-j2)%n2][n3-j3], + a[j1][j2][2*j3+1] = I[j1][j2][j3] + = -I[(n1-j1)%n1][(n2-j2)%n2][n3-j3], + 0<=j1<n1, 0<=j2<n2, 0<j3<n3/2, + a[j1][j2][0] = R[j1][j2][0] + = R[(n1-j1)%n1][n2-j2][0], + a[j1][j2][1] = I[j1][j2][0] + = -I[(n1-j1)%n1][n2-j2][0], + a[j1][n2-j2][1] = R[j1][j2][n3/2] + = R[(n1-j1)%n1][n2-j2][n3/2], + a[j1][n2-j2][0] = -I[j1][j2][n3/2] + = I[(n1-j1)%n1][n2-j2][n3/2], + 0<=j1<n1, 0<j2<n2/2, + a[j1][0][0] = R[j1][0][0] + = R[n1-j1][0][0], + a[j1][0][1] = I[j1][0][0] + = -I[n1-j1][0][0], + a[j1][n2/2][0] = R[j1][n2/2][0] + = R[n1-j1][n2/2][0], + a[j1][n2/2][1] = I[j1][n2/2][0] + = -I[n1-j1][n2/2][0], + a[n1-j1][0][1] = R[j1][0][n3/2] + = R[n1-j1][0][n3/2], + a[n1-j1][0][0] = -I[j1][0][n3/2] + = I[n1-j1][0][n3/2], + a[n1-j1][n2/2][1] = R[j1][n2/2][n3/2] + = R[n1-j1][n2/2][n3/2], + a[n1-j1][n2/2][0] = -I[j1][n2/2][n3/2] + = I[n1-j1][n2/2][n3/2], + 0<j1<n1/2, + a[0][0][0] = R[0][0][0], + a[0][0][1] = R[0][0][n3/2], + a[0][n2/2][0] = R[0][n2/2][0], + a[0][n2/2][1] = R[0][n2/2][n3/2], + a[n1/2][0][0] = R[n1/2][0][0], + a[n1/2][0][1] = R[n1/2][0][n3/2], + a[n1/2][n2/2][0] = R[n1/2][n2/2][0], + a[n1/2][n2/2][1] = R[n1/2][n2/2][n3/2] + ---- output ordering ---- + rdft3d(n1, n2, n3, 1, a, t, ip, w); + rdft3dsort(n1, n2, n3, 1, a); + // stored data is a[0...n1-1][0...n2-1][0...n3+1]: + // a[k1][k2][2*k3] = R[k1][k2][k3], + // a[k1][k2][2*k3+1] = I[k1][k2][k3], + // 0<=k1<n1, 0<=k2<n2, 0<=k3<=n3/2. + // the stored data is larger than the input data! + ---- input ordering ---- + rdft3dsort(n1, n2, n3, -1, a); + rdft3d(n1, n2, n3, -1, a, t, ip, w); + t[0...*] + :work area (double *) + length of t >= max(8*n1, 8*n2), if single thread, + length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS, + if multi threads, + t is dynamically allocated, if t == NULL. + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1, n2, n3/2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1/2, n2/2, n3/4) + n3/4 + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + rdft3d(n1, n2, n3, 1, a, t, ip, w); + is + rdft3d(n1, n2, n3, -1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + for (j3 = 0; j3 <= n3 - 1; j3++) { + a[j1][j2][j3] *= 2.0 / n1 / n2 / n3; + } + } + } + . + + +-------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + <case1> IDCT (excluding scale) + C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 + a[j1][j2][j3] * + cos(pi*j1*(k1+1/2)/n1) * + cos(pi*j2*(k2+1/2)/n2) * + cos(pi*j3*(k3+1/2)/n3), + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + <case2> DCT + C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 + a[j1][j2][j3] * + cos(pi*(j1+1/2)*k1/n1) * + cos(pi*(j2+1/2)*k2/n2) * + cos(pi*(j3+1/2)*k3/n3), + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + [usage] + <case1> + ip[0] = 0; // first time only + ddct3d(n1, n2, n3, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + ddct3d(n1, n2, n3, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 2, n1 = power of 2 + n2 :data length (int) + n2 >= 2, n2 = power of 2 + n3 :data length (int) + n3 >= 2, n3 = power of 2 + a[0...n1-1][0...n2-1][0...n3-1] + :input/output data (double ***) + output data + a[k1][k2][k3] = C[k1][k2][k3], + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + t[0...*] + :work area (double *) + length of t >= max(4*n1, 4*n2), if single thread, + length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS, + if multi threads, + t is dynamically allocated, if t == NULL. + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1/2, n2/2, n3/2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1*3/2, n2*3/2, n3*3/2) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddct3d(n1, n2, n3, -1, a, t, ip, w); + is + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2][0] *= 0.5; + } + for (j3 = 0; j3 <= n3 - 1; j3++) { + a[j1][0][j3] *= 0.5; + } + } + for (j2 = 0; j2 <= n2 - 1; j2++) { + for (j3 = 0; j3 <= n3 - 1; j3++) { + a[0][j2][j3] *= 0.5; + } + } + ddct3d(n1, n2, n3, 1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + for (j3 = 0; j3 <= n3 - 1; j3++) { + a[j1][j2][j3] *= 8.0 / n1 / n2 / n3; + } + } + } + . + + +-------- DST (Discrete Sine Transform) / Inverse of DST -------- + [definition] + <case1> IDST (excluding scale) + S[k1][k2][k3] = sum_j1=1^n1 sum_j2=1^n2 sum_j3=1^n3 + A[j1][j2][j3] * + sin(pi*j1*(k1+1/2)/n1) * + sin(pi*j2*(k2+1/2)/n2) * + sin(pi*j3*(k3+1/2)/n3), + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + <case2> DST + S[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 + a[j1][j2][j3] * + sin(pi*(j1+1/2)*k1/n1) * + sin(pi*(j2+1/2)*k2/n2) * + sin(pi*(j3+1/2)*k3/n3), + 0<k1<=n1, 0<k2<=n2, 0<k3<=n3 + [usage] + <case1> + ip[0] = 0; // first time only + ddst3d(n1, n2, n3, 1, a, t, ip, w); + <case2> + ip[0] = 0; // first time only + ddst3d(n1, n2, n3, -1, a, t, ip, w); + [parameters] + n1 :data length (int) + n1 >= 2, n1 = power of 2 + n2 :data length (int) + n2 >= 2, n2 = power of 2 + n3 :data length (int) + n3 >= 2, n3 = power of 2 + a[0...n1-1][0...n2-1][0...n3-1] + :input/output data (double ***) + <case1> + input data + a[j1%n1][j2%n2][j3%n3] = A[j1][j2][j3], + 0<j1<=n1, 0<j2<=n2, 0<j3<=n3, + (n%m : n mod m) + output data + a[k1][k2][k3] = S[k1][k2][k3], + 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 + <case2> + output data + a[k1%n1][k2%n2][k3%n3] = S[k1][k2][k3], + 0<k1<=n1, 0<k2<=n2, 0<k3<=n3 + t[0...*] + :work area (double *) + length of t >= max(4*n1, 4*n2), if single thread, + length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS, + if multi threads, + t is dynamically allocated, if t == NULL. + ip[0...*] + :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + (n = max(n1/2, n2/2, n3/2)) + ip[0],ip[1] are pointers of the cos/sin table. + w[0...*] + :cos/sin table (double *) + length of w >= max(n1*3/2, n2*3/2, n3*3/2) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddst3d(n1, n2, n3, -1, a, t, ip, w); + is + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2][0] *= 0.5; + } + for (j3 = 0; j3 <= n3 - 1; j3++) { + a[j1][0][j3] *= 0.5; + } + } + for (j2 = 0; j2 <= n2 - 1; j2++) { + for (j3 = 0; j3 <= n3 - 1; j3++) { + a[0][j2][j3] *= 0.5; + } + } + ddst3d(n1, n2, n3, 1, a, t, ip, w); + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + for (j3 = 0; j3 <= n3 - 1; j3++) { + a[j1][j2][j3] *= 8.0 / n1 / n2 / n3; + } + } + } + . +*/ + + +#include <stdio.h> +#include <stdlib.h> +#define fft3d_alloc_error_check(p) { \ + if ((p) == NULL) { \ + fprintf(stderr, "fft3d memory allocation error\n"); \ + exit(1); \ + } \ +} + + +#ifdef USE_FFT3D_PTHREADS +#define USE_FFT3D_THREADS +#ifndef FFT3D_MAX_THREADS +#define FFT3D_MAX_THREADS 4 +#endif +#ifndef FFT3D_THREADS_BEGIN_N +#define FFT3D_THREADS_BEGIN_N 65536 +#endif +#include <pthread.h> +#define fft3d_thread_t pthread_t +#define fft3d_thread_create(thp,func,argp) { \ + if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \ + fprintf(stderr, "fft3d thread error\n"); \ + exit(1); \ + } \ +} +#define fft3d_thread_wait(th) { \ + if (pthread_join(th, NULL) != 0) { \ + fprintf(stderr, "fft3d thread error\n"); \ + exit(1); \ + } \ +} +#endif /* USE_FFT3D_PTHREADS */ + + +#ifdef USE_FFT3D_WINTHREADS +#define USE_FFT3D_THREADS +#ifndef FFT3D_MAX_THREADS +#define FFT3D_MAX_THREADS 4 +#endif +#ifndef FFT3D_THREADS_BEGIN_N +#define FFT3D_THREADS_BEGIN_N 131072 +#endif +#include <windows.h> +#define fft3d_thread_t HANDLE +#define fft3d_thread_create(thp,func,argp) { \ + DWORD thid; \ + *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \ + if (*(thp) == 0) { \ + fprintf(stderr, "fft3d thread error\n"); \ + exit(1); \ + } \ +} +#define fft3d_thread_wait(th) { \ + WaitForSingleObject(th, INFINITE); \ + CloseHandle(th); \ +} +#endif /* USE_FFT3D_WINTHREADS */ + + +void cdft3d(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn, + double ***a, double *t, int *ip, double *w); + void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w); +#ifdef USE_FFT3D_THREADS + void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn, + double ***a, double *t, int *ip, double *w); + void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w); +#endif /* USE_FFT3D_THREADS */ + int n, itnull, nt; + + n = n1; + if (n < n2) { + n = n2; + } + n <<= 1; + if (n < n3) { + n = n3; + } + if (n > (ip[0] << 2)) { + makewt(n >> 2, ip, w); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nt = n1; + if (nt < n2) { + nt = n2; + } + nt *= 8; +#ifdef USE_FFT3D_THREADS + nt *= FFT3D_MAX_THREADS; +#endif /* USE_FFT3D_THREADS */ + if (n3 == 4) { + nt >>= 1; + } else if (n3 < 4) { + nt >>= 2; + } + t = (double *) malloc(sizeof(double) * nt); + fft3d_alloc_error_check(t); + } +#ifdef USE_FFT3D_THREADS + if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { + xdft3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w); + cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w); + } else +#endif /* USE_FFT3D_THREADS */ + { + xdft3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w); + cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); + } +} + + +void rdft3d(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn, + double ***a, double *t, int *ip, double *w); + void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w); + void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a); +#ifdef USE_FFT3D_THREADS + void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn, + double ***a, double *t, int *ip, double *w); + void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w); +#endif /* USE_FFT3D_THREADS */ + int n, nw, nc, itnull, nt; + + n = n1; + if (n < n2) { + n = n2; + } + n <<= 1; + if (n < n3) { + n = n3; + } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n3 > (nc << 2)) { + nc = n3 >> 2; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nt = n1; + if (nt < n2) { + nt = n2; + } + nt *= 8; +#ifdef USE_FFT3D_THREADS + nt *= FFT3D_MAX_THREADS; +#endif /* USE_FFT3D_THREADS */ + if (n3 == 4) { + nt >>= 1; + } else if (n3 < 4) { + nt >>= 2; + } + t = (double *) malloc(sizeof(double) * nt); + fft3d_alloc_error_check(t); + } +#ifdef USE_FFT3D_THREADS + if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { + if (isgn < 0) { + rdft3d_sub(n1, n2, n3, isgn, a); + cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w); + } + xdft3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w); + if (isgn >= 0) { + cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w); + rdft3d_sub(n1, n2, n3, isgn, a); + } + } else +#endif /* USE_FFT3D_THREADS */ + { + if (isgn < 0) { + rdft3d_sub(n1, n2, n3, isgn, a); + cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w); + } + xdft3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w); + if (isgn >= 0) { + cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w); + rdft3d_sub(n1, n2, n3, isgn, a); + } + } + if (itnull != 0) { + free(t); + } +} + + +void rdft3dsort(int n1, int n2, int n3, int isgn, double ***a) +{ + int n1h, n2h, i, j; + double x, y; + + n1h = n1 >> 1; + n2h = n2 >> 1; + if (isgn < 0) { + for (i = 0; i < n1; i++) { + for (j = n2h + 1; j < n2; j++) { + a[i][j][0] = a[i][j][n3 + 1]; + a[i][j][1] = a[i][j][n3]; + } + } + for (i = n1h + 1; i < n1; i++) { + a[i][0][0] = a[i][0][n3 + 1]; + a[i][0][1] = a[i][0][n3]; + a[i][n2h][0] = a[i][n2h][n3 + 1]; + a[i][n2h][1] = a[i][n2h][n3]; + } + a[0][0][1] = a[0][0][n3]; + a[0][n2h][1] = a[0][n2h][n3]; + a[n1h][0][1] = a[n1h][0][n3]; + a[n1h][n2h][1] = a[n1h][n2h][n3]; + } else { + for (j = n2h + 1; j < n2; j++) { + y = a[0][j][0]; + x = a[0][j][1]; + a[0][j][n3] = x; + a[0][j][n3 + 1] = y; + a[0][n2 - j][n3] = x; + a[0][n2 - j][n3 + 1] = -y; + a[0][j][0] = a[0][n2 - j][0]; + a[0][j][1] = -a[0][n2 - j][1]; + } + for (i = 1; i < n1; i++) { + for (j = n2h + 1; j < n2; j++) { + y = a[i][j][0]; + x = a[i][j][1]; + a[i][j][n3] = x; + a[i][j][n3 + 1] = y; + a[n1 - i][n2 - j][n3] = x; + a[n1 - i][n2 - j][n3 + 1] = -y; + a[i][j][0] = a[n1 - i][n2 - j][0]; + a[i][j][1] = -a[n1 - i][n2 - j][1]; + } + } + for (i = n1h + 1; i < n1; i++) { + y = a[i][0][0]; + x = a[i][0][1]; + a[i][0][n3] = x; + a[i][0][n3 + 1] = y; + a[n1 - i][0][n3] = x; + a[n1 - i][0][n3 + 1] = -y; + a[i][0][0] = a[n1 - i][0][0]; + a[i][0][1] = -a[n1 - i][0][1]; + y = a[i][n2h][0]; + x = a[i][n2h][1]; + a[i][n2h][n3] = x; + a[i][n2h][n3 + 1] = y; + a[n1 - i][n2h][n3] = x; + a[n1 - i][n2h][n3 + 1] = -y; + a[i][n2h][0] = a[n1 - i][n2h][0]; + a[i][n2h][1] = -a[n1 - i][n2h][1]; + } + a[0][0][n3] = a[0][0][1]; + a[0][0][n3 + 1] = 0; + a[0][0][1] = 0; + a[0][n2h][n3] = a[0][n2h][1]; + a[0][n2h][n3 + 1] = 0; + a[0][n2h][1] = 0; + a[n1h][0][n3] = a[n1h][0][1]; + a[n1h][0][n3 + 1] = 0; + a[n1h][0][1] = 0; + a[n1h][n2h][n3] = a[n1h][n2h][1]; + a[n1h][n2h][n3 + 1] = 0; + a[n1h][n2h][1] = 0; + } +} + + +void ddct3d(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w); + void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w); +#ifdef USE_FFT3D_THREADS + void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w); + void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w); +#endif /* USE_FFT3D_THREADS */ + int n, nw, nc, itnull, nt; + + n = n1; + if (n < n2) { + n = n2; + } + if (n < n3) { + n = n3; + } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nt = n1; + if (nt < n2) { + nt = n2; + } + nt *= 4; +#ifdef USE_FFT3D_THREADS + nt *= FFT3D_MAX_THREADS; +#endif /* USE_FFT3D_THREADS */ + if (n3 == 2) { + nt >>= 1; + } + t = (double *) malloc(sizeof(double) * nt); + fft3d_alloc_error_check(t); + } +#ifdef USE_FFT3D_THREADS + if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { + ddxt3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w); + ddxt3db_subth(n1, n2, n3, 0, isgn, a, t, ip, w); + } else +#endif /* USE_FFT3D_THREADS */ + { + ddxt3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w); + ddxt3db_sub(n1, n2, n3, 0, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); + } +} + + +void ddst3d(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w) +{ + void makewt(int nw, int *ip, double *w); + void makect(int nc, int *ip, double *c); + void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w); + void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w); +#ifdef USE_FFT3D_THREADS + void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w); + void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w); +#endif /* USE_FFT3D_THREADS */ + int n, nw, nc, itnull, nt; + + n = n1; + if (n < n2) { + n = n2; + } + if (n < n3) { + n = n3; + } + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + itnull = 0; + if (t == NULL) { + itnull = 1; + nt = n1; + if (nt < n2) { + nt = n2; + } + nt *= 4; +#ifdef USE_FFT3D_THREADS + nt *= FFT3D_MAX_THREADS; +#endif /* USE_FFT3D_THREADS */ + if (n3 == 2) { + nt >>= 1; + } + t = (double *) malloc(sizeof(double) * nt); + fft3d_alloc_error_check(t); + } +#ifdef USE_FFT3D_THREADS + if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { + ddxt3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w); + ddxt3db_subth(n1, n2, n3, 1, isgn, a, t, ip, w); + } else +#endif /* USE_FFT3D_THREADS */ + { + ddxt3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w); + ddxt3db_sub(n1, n2, n3, 1, isgn, a, t, ip, w); + } + if (itnull != 0) { + free(t); + } +} + + +/* -------- child routines -------- */ + + +void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn, + double ***a, double *t, int *ip, double *w) +{ + void cdft(int n, int isgn, double *a, int *ip, double *w); + void rdft(int n, int isgn, double *a, int *ip, double *w); + int i, j, k; + + for (i = 0; i < n1; i++) { + if (icr == 0) { + for (j = 0; j < n2; j++) { + cdft(n3, isgn, a[i][j], ip, w); + } + } else if (isgn >= 0) { + for (j = 0; j < n2; j++) { + rdft(n3, isgn, a[i][j], ip, w); + } + } + if (n3 > 4) { + for (k = 0; k < n3; k += 8) { + for (j = 0; j < n2; j++) { + t[2 * j] = a[i][j][k]; + t[2 * j + 1] = a[i][j][k + 1]; + t[2 * n2 + 2 * j] = a[i][j][k + 2]; + t[2 * n2 + 2 * j + 1] = a[i][j][k + 3]; + t[4 * n2 + 2 * j] = a[i][j][k + 4]; + t[4 * n2 + 2 * j + 1] = a[i][j][k + 5]; + t[6 * n2 + 2 * j] = a[i][j][k + 6]; + t[6 * n2 + 2 * j + 1] = a[i][j][k + 7]; + } + cdft(2 * n2, isgn, t, ip, w); + cdft(2 * n2, isgn, &t[2 * n2], ip, w); + cdft(2 * n2, isgn, &t[4 * n2], ip, w); + cdft(2 * n2, isgn, &t[6 * n2], ip, w); + for (j = 0; j < n2; j++) { + a[i][j][k] = t[2 * j]; + a[i][j][k + 1] = t[2 * j + 1]; + a[i][j][k + 2] = t[2 * n2 + 2 * j]; + a[i][j][k + 3] = t[2 * n2 + 2 * j + 1]; + a[i][j][k + 4] = t[4 * n2 + 2 * j]; + a[i][j][k + 5] = t[4 * n2 + 2 * j + 1]; + a[i][j][k + 6] = t[6 * n2 + 2 * j]; + a[i][j][k + 7] = t[6 * n2 + 2 * j + 1]; + } + } + } else if (n3 == 4) { + for (j = 0; j < n2; j++) { + t[2 * j] = a[i][j][0]; + t[2 * j + 1] = a[i][j][1]; + t[2 * n2 + 2 * j] = a[i][j][2]; + t[2 * n2 + 2 * j + 1] = a[i][j][3]; + } + cdft(2 * n2, isgn, t, ip, w); + cdft(2 * n2, isgn, &t[2 * n2], ip, w); + for (j = 0; j < n2; j++) { + a[i][j][0] = t[2 * j]; + a[i][j][1] = t[2 * j + 1]; + a[i][j][2] = t[2 * n2 + 2 * j]; + a[i][j][3] = t[2 * n2 + 2 * j + 1]; + } + } else if (n3 == 2) { + for (j = 0; j < n2; j++) { + t[2 * j] = a[i][j][0]; + t[2 * j + 1] = a[i][j][1]; + } + cdft(2 * n2, isgn, t, ip, w); + for (j = 0; j < n2; j++) { + a[i][j][0] = t[2 * j]; + a[i][j][1] = t[2 * j + 1]; + } + } + if (icr != 0 && isgn < 0) { + for (j = 0; j < n2; j++) { + rdft(n3, isgn, a[i][j], ip, w); + } + } + } +} + + +void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w) +{ + void cdft(int n, int isgn, double *a, int *ip, double *w); + int i, j, k; + + if (n3 > 4) { + for (j = 0; j < n2; j++) { + for (k = 0; k < n3; k += 8) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j][k]; + t[2 * i + 1] = a[i][j][k + 1]; + t[2 * n1 + 2 * i] = a[i][j][k + 2]; + t[2 * n1 + 2 * i + 1] = a[i][j][k + 3]; + t[4 * n1 + 2 * i] = a[i][j][k + 4]; + t[4 * n1 + 2 * i + 1] = a[i][j][k + 5]; + t[6 * n1 + 2 * i] = a[i][j][k + 6]; + t[6 * n1 + 2 * i + 1] = a[i][j][k + 7]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + cdft(2 * n1, isgn, &t[4 * n1], ip, w); + cdft(2 * n1, isgn, &t[6 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][j][k] = t[2 * i]; + a[i][j][k + 1] = t[2 * i + 1]; + a[i][j][k + 2] = t[2 * n1 + 2 * i]; + a[i][j][k + 3] = t[2 * n1 + 2 * i + 1]; + a[i][j][k + 4] = t[4 * n1 + 2 * i]; + a[i][j][k + 5] = t[4 * n1 + 2 * i + 1]; + a[i][j][k + 6] = t[6 * n1 + 2 * i]; + a[i][j][k + 7] = t[6 * n1 + 2 * i + 1]; + } + } + } + } else if (n3 == 4) { + for (j = 0; j < n2; j++) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j][0]; + t[2 * i + 1] = a[i][j][1]; + t[2 * n1 + 2 * i] = a[i][j][2]; + t[2 * n1 + 2 * i + 1] = a[i][j][3]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][j][0] = t[2 * i]; + a[i][j][1] = t[2 * i + 1]; + a[i][j][2] = t[2 * n1 + 2 * i]; + a[i][j][3] = t[2 * n1 + 2 * i + 1]; + } + } + } else if (n3 == 2) { + for (j = 0; j < n2; j++) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j][0]; + t[2 * i + 1] = a[i][j][1]; + } + cdft(2 * n1, isgn, t, ip, w); + for (i = 0; i < n1; i++) { + a[i][j][0] = t[2 * i]; + a[i][j][1] = t[2 * i + 1]; + } + } + } +} + + +void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a) +{ + int n1h, n2h, i, j, k, l; + double xi; + + n1h = n1 >> 1; + n2h = n2 >> 1; + if (isgn < 0) { + for (i = 1; i < n1h; i++) { + j = n1 - i; + xi = a[i][0][0] - a[j][0][0]; + a[i][0][0] += a[j][0][0]; + a[j][0][0] = xi; + xi = a[j][0][1] - a[i][0][1]; + a[i][0][1] += a[j][0][1]; + a[j][0][1] = xi; + xi = a[i][n2h][0] - a[j][n2h][0]; + a[i][n2h][0] += a[j][n2h][0]; + a[j][n2h][0] = xi; + xi = a[j][n2h][1] - a[i][n2h][1]; + a[i][n2h][1] += a[j][n2h][1]; + a[j][n2h][1] = xi; + for (k = 1; k < n2h; k++) { + l = n2 - k; + xi = a[i][k][0] - a[j][l][0]; + a[i][k][0] += a[j][l][0]; + a[j][l][0] = xi; + xi = a[j][l][1] - a[i][k][1]; + a[i][k][1] += a[j][l][1]; + a[j][l][1] = xi; + xi = a[j][k][0] - a[i][l][0]; + a[j][k][0] += a[i][l][0]; + a[i][l][0] = xi; + xi = a[i][l][1] - a[j][k][1]; + a[j][k][1] += a[i][l][1]; + a[i][l][1] = xi; + } + } + for (k = 1; k < n2h; k++) { + l = n2 - k; + xi = a[0][k][0] - a[0][l][0]; + a[0][k][0] += a[0][l][0]; + a[0][l][0] = xi; + xi = a[0][l][1] - a[0][k][1]; + a[0][k][1] += a[0][l][1]; + a[0][l][1] = xi; + xi = a[n1h][k][0] - a[n1h][l][0]; + a[n1h][k][0] += a[n1h][l][0]; + a[n1h][l][0] = xi; + xi = a[n1h][l][1] - a[n1h][k][1]; + a[n1h][k][1] += a[n1h][l][1]; + a[n1h][l][1] = xi; + } + } else { + for (i = 1; i < n1h; i++) { + j = n1 - i; + a[j][0][0] = 0.5 * (a[i][0][0] - a[j][0][0]); + a[i][0][0] -= a[j][0][0]; + a[j][0][1] = 0.5 * (a[i][0][1] + a[j][0][1]); + a[i][0][1] -= a[j][0][1]; + a[j][n2h][0] = 0.5 * (a[i][n2h][0] - a[j][n2h][0]); + a[i][n2h][0] -= a[j][n2h][0]; + a[j][n2h][1] = 0.5 * (a[i][n2h][1] + a[j][n2h][1]); + a[i][n2h][1] -= a[j][n2h][1]; + for (k = 1; k < n2h; k++) { + l = n2 - k; + a[j][l][0] = 0.5 * (a[i][k][0] - a[j][l][0]); + a[i][k][0] -= a[j][l][0]; + a[j][l][1] = 0.5 * (a[i][k][1] + a[j][l][1]); + a[i][k][1] -= a[j][l][1]; + a[i][l][0] = 0.5 * (a[j][k][0] - a[i][l][0]); + a[j][k][0] -= a[i][l][0]; + a[i][l][1] = 0.5 * (a[j][k][1] + a[i][l][1]); + a[j][k][1] -= a[i][l][1]; + } + } + for (k = 1; k < n2h; k++) { + l = n2 - k; + a[0][l][0] = 0.5 * (a[0][k][0] - a[0][l][0]); + a[0][k][0] -= a[0][l][0]; + a[0][l][1] = 0.5 * (a[0][k][1] + a[0][l][1]); + a[0][k][1] -= a[0][l][1]; + a[n1h][l][0] = 0.5 * (a[n1h][k][0] - a[n1h][l][0]); + a[n1h][k][0] -= a[n1h][l][0]; + a[n1h][l][1] = 0.5 * (a[n1h][k][1] + a[n1h][l][1]); + a[n1h][k][1] -= a[n1h][l][1]; + } + } +} + + +void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w) +{ + void ddct(int n, int isgn, double *a, int *ip, double *w); + void ddst(int n, int isgn, double *a, int *ip, double *w); + int i, j, k; + + for (i = 0; i < n1; i++) { + if (ics == 0) { + for (j = 0; j < n2; j++) { + ddct(n3, isgn, a[i][j], ip, w); + } + } else { + for (j = 0; j < n2; j++) { + ddst(n3, isgn, a[i][j], ip, w); + } + } + if (n3 > 2) { + for (k = 0; k < n3; k += 4) { + for (j = 0; j < n2; j++) { + t[j] = a[i][j][k]; + t[n2 + j] = a[i][j][k + 1]; + t[2 * n2 + j] = a[i][j][k + 2]; + t[3 * n2 + j] = a[i][j][k + 3]; + } + if (ics == 0) { + ddct(n2, isgn, t, ip, w); + ddct(n2, isgn, &t[n2], ip, w); + ddct(n2, isgn, &t[2 * n2], ip, w); + ddct(n2, isgn, &t[3 * n2], ip, w); + } else { + ddst(n2, isgn, t, ip, w); + ddst(n2, isgn, &t[n2], ip, w); + ddst(n2, isgn, &t[2 * n2], ip, w); + ddst(n2, isgn, &t[3 * n2], ip, w); + } + for (j = 0; j < n2; j++) { + a[i][j][k] = t[j]; + a[i][j][k + 1] = t[n2 + j]; + a[i][j][k + 2] = t[2 * n2 + j]; + a[i][j][k + 3] = t[3 * n2 + j]; + } + } + } else if (n3 == 2) { + for (j = 0; j < n2; j++) { + t[j] = a[i][j][0]; + t[n2 + j] = a[i][j][1]; + } + if (ics == 0) { + ddct(n2, isgn, t, ip, w); + ddct(n2, isgn, &t[n2], ip, w); + } else { + ddst(n2, isgn, t, ip, w); + ddst(n2, isgn, &t[n2], ip, w); + } + for (j = 0; j < n2; j++) { + a[i][j][0] = t[j]; + a[i][j][1] = t[n2 + j]; + } + } + } +} + + +void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w) +{ + void ddct(int n, int isgn, double *a, int *ip, double *w); + void ddst(int n, int isgn, double *a, int *ip, double *w); + int i, j, k; + + if (n3 > 2) { + for (j = 0; j < n2; j++) { + for (k = 0; k < n3; k += 4) { + for (i = 0; i < n1; i++) { + t[i] = a[i][j][k]; + t[n1 + i] = a[i][j][k + 1]; + t[2 * n1 + i] = a[i][j][k + 2]; + t[3 * n1 + i] = a[i][j][k + 3]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + ddct(n1, isgn, &t[2 * n1], ip, w); + ddct(n1, isgn, &t[3 * n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + ddst(n1, isgn, &t[2 * n1], ip, w); + ddst(n1, isgn, &t[3 * n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][j][k] = t[i]; + a[i][j][k + 1] = t[n1 + i]; + a[i][j][k + 2] = t[2 * n1 + i]; + a[i][j][k + 3] = t[3 * n1 + i]; + } + } + } + } else if (n3 == 2) { + for (j = 0; j < n2; j++) { + for (i = 0; i < n1; i++) { + t[i] = a[i][j][0]; + t[n1 + i] = a[i][j][1]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][j][0] = t[i]; + a[i][j][1] = t[n1 + i]; + } + } + } +} + + +#ifdef USE_FFT3D_THREADS +struct fft3d_arg_st { + int nthread; + int n0; + int n1; + int n2; + int n3; + int ic; + int isgn; + double ***a; + double *t; + int *ip; + double *w; +}; +typedef struct fft3d_arg_st fft3d_arg_t; + + +void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn, + double ***a, double *t, int *ip, double *w) +{ + void *xdft3da_th(void *p); + fft3d_thread_t th[FFT3D_MAX_THREADS]; + fft3d_arg_t ag[FFT3D_MAX_THREADS]; + int nthread, nt, i; + + nthread = FFT3D_MAX_THREADS; + if (nthread > n1) { + nthread = n1; + } + nt = 8 * n2; + if (n3 == 4) { + nt >>= 1; + } else if (n3 < 4) { + nt >>= 2; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].n3 = n3; + ag[i].ic = icr; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].t = &t[nt * i]; + ag[i].ip = ip; + ag[i].w = w; + fft3d_thread_create(&th[i], xdft3da_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft3d_thread_wait(th[i]); + } +} + + +void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a, + double *t, int *ip, double *w) +{ + void *cdft3db_th(void *p); + fft3d_thread_t th[FFT3D_MAX_THREADS]; + fft3d_arg_t ag[FFT3D_MAX_THREADS]; + int nthread, nt, i; + + nthread = FFT3D_MAX_THREADS; + if (nthread > n2) { + nthread = n2; + } + nt = 8 * n1; + if (n3 == 4) { + nt >>= 1; + } else if (n3 < 4) { + nt >>= 2; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].n3 = n3; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].t = &t[nt * i]; + ag[i].ip = ip; + ag[i].w = w; + fft3d_thread_create(&th[i], cdft3db_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft3d_thread_wait(th[i]); + } +} + + +void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w) +{ + void *ddxt3da_th(void *p); + fft3d_thread_t th[FFT3D_MAX_THREADS]; + fft3d_arg_t ag[FFT3D_MAX_THREADS]; + int nthread, nt, i; + + nthread = FFT3D_MAX_THREADS; + if (nthread > n1) { + nthread = n1; + } + nt = 4 * n2; + if (n3 == 2) { + nt >>= 1; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].n3 = n3; + ag[i].ic = ics; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].t = &t[nt * i]; + ag[i].ip = ip; + ag[i].w = w; + fft3d_thread_create(&th[i], ddxt3da_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft3d_thread_wait(th[i]); + } +} + + +void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, + double ***a, double *t, int *ip, double *w) +{ + void *ddxt3db_th(void *p); + fft3d_thread_t th[FFT3D_MAX_THREADS]; + fft3d_arg_t ag[FFT3D_MAX_THREADS]; + int nthread, nt, i; + + nthread = FFT3D_MAX_THREADS; + if (nthread > n2) { + nthread = n2; + } + nt = 4 * n1; + if (n3 == 2) { + nt >>= 1; + } + for (i = 0; i < nthread; i++) { + ag[i].nthread = nthread; + ag[i].n0 = i; + ag[i].n1 = n1; + ag[i].n2 = n2; + ag[i].n3 = n3; + ag[i].ic = ics; + ag[i].isgn = isgn; + ag[i].a = a; + ag[i].t = &t[nt * i]; + ag[i].ip = ip; + ag[i].w = w; + fft3d_thread_create(&th[i], ddxt3db_th, &ag[i]); + } + for (i = 0; i < nthread; i++) { + fft3d_thread_wait(th[i]); + } +} + + +void *xdft3da_th(void *p) +{ + void cdft(int n, int isgn, double *a, int *ip, double *w); + void rdft(int n, int isgn, double *a, int *ip, double *w); + int nthread, n0, n1, n2, n3, icr, isgn, *ip, i, j, k; + double ***a, *t, *w; + + nthread = ((fft3d_arg_t *) p)->nthread; + n0 = ((fft3d_arg_t *) p)->n0; + n1 = ((fft3d_arg_t *) p)->n1; + n2 = ((fft3d_arg_t *) p)->n2; + n3 = ((fft3d_arg_t *) p)->n3; + icr = ((fft3d_arg_t *) p)->ic; + isgn = ((fft3d_arg_t *) p)->isgn; + a = ((fft3d_arg_t *) p)->a; + t = ((fft3d_arg_t *) p)->t; + ip = ((fft3d_arg_t *) p)->ip; + w = ((fft3d_arg_t *) p)->w; + for (i = n0; i < n1; i += nthread) { + if (icr == 0) { + for (j = 0; j < n2; j++) { + cdft(n3, isgn, a[i][j], ip, w); + } + } else if (isgn >= 0) { + for (j = 0; j < n2; j++) { + rdft(n3, isgn, a[i][j], ip, w); + } + } + if (n3 > 4) { + for (k = 0; k < n3; k += 8) { + for (j = 0; j < n2; j++) { + t[2 * j] = a[i][j][k]; + t[2 * j + 1] = a[i][j][k + 1]; + t[2 * n2 + 2 * j] = a[i][j][k + 2]; + t[2 * n2 + 2 * j + 1] = a[i][j][k + 3]; + t[4 * n2 + 2 * j] = a[i][j][k + 4]; + t[4 * n2 + 2 * j + 1] = a[i][j][k + 5]; + t[6 * n2 + 2 * j] = a[i][j][k + 6]; + t[6 * n2 + 2 * j + 1] = a[i][j][k + 7]; + } + cdft(2 * n2, isgn, t, ip, w); + cdft(2 * n2, isgn, &t[2 * n2], ip, w); + cdft(2 * n2, isgn, &t[4 * n2], ip, w); + cdft(2 * n2, isgn, &t[6 * n2], ip, w); + for (j = 0; j < n2; j++) { + a[i][j][k] = t[2 * j]; + a[i][j][k + 1] = t[2 * j + 1]; + a[i][j][k + 2] = t[2 * n2 + 2 * j]; + a[i][j][k + 3] = t[2 * n2 + 2 * j + 1]; + a[i][j][k + 4] = t[4 * n2 + 2 * j]; + a[i][j][k + 5] = t[4 * n2 + 2 * j + 1]; + a[i][j][k + 6] = t[6 * n2 + 2 * j]; + a[i][j][k + 7] = t[6 * n2 + 2 * j + 1]; + } + } + } else if (n3 == 4) { + for (j = 0; j < n2; j++) { + t[2 * j] = a[i][j][0]; + t[2 * j + 1] = a[i][j][1]; + t[2 * n2 + 2 * j] = a[i][j][2]; + t[2 * n2 + 2 * j + 1] = a[i][j][3]; + } + cdft(2 * n2, isgn, t, ip, w); + cdft(2 * n2, isgn, &t[2 * n2], ip, w); + for (j = 0; j < n2; j++) { + a[i][j][0] = t[2 * j]; + a[i][j][1] = t[2 * j + 1]; + a[i][j][2] = t[2 * n2 + 2 * j]; + a[i][j][3] = t[2 * n2 + 2 * j + 1]; + } + } else if (n3 == 2) { + for (j = 0; j < n2; j++) { + t[2 * j] = a[i][j][0]; + t[2 * j + 1] = a[i][j][1]; + } + cdft(2 * n2, isgn, t, ip, w); + for (j = 0; j < n2; j++) { + a[i][j][0] = t[2 * j]; + a[i][j][1] = t[2 * j + 1]; + } + } + if (icr != 0 && isgn < 0) { + for (j = 0; j < n2; j++) { + rdft(n3, isgn, a[i][j], ip, w); + } + } + } + return (void *) 0; +} + + +void *cdft3db_th(void *p) +{ + void cdft(int n, int isgn, double *a, int *ip, double *w); + int nthread, n0, n1, n2, n3, isgn, *ip, i, j, k; + double ***a, *t, *w; + + nthread = ((fft3d_arg_t *) p)->nthread; + n0 = ((fft3d_arg_t *) p)->n0; + n1 = ((fft3d_arg_t *) p)->n1; + n2 = ((fft3d_arg_t *) p)->n2; + n3 = ((fft3d_arg_t *) p)->n3; + isgn = ((fft3d_arg_t *) p)->isgn; + a = ((fft3d_arg_t *) p)->a; + t = ((fft3d_arg_t *) p)->t; + ip = ((fft3d_arg_t *) p)->ip; + w = ((fft3d_arg_t *) p)->w; + if (n3 > 4) { + for (j = n0; j < n2; j += nthread) { + for (k = 0; k < n3; k += 8) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j][k]; + t[2 * i + 1] = a[i][j][k + 1]; + t[2 * n1 + 2 * i] = a[i][j][k + 2]; + t[2 * n1 + 2 * i + 1] = a[i][j][k + 3]; + t[4 * n1 + 2 * i] = a[i][j][k + 4]; + t[4 * n1 + 2 * i + 1] = a[i][j][k + 5]; + t[6 * n1 + 2 * i] = a[i][j][k + 6]; + t[6 * n1 + 2 * i + 1] = a[i][j][k + 7]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + cdft(2 * n1, isgn, &t[4 * n1], ip, w); + cdft(2 * n1, isgn, &t[6 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][j][k] = t[2 * i]; + a[i][j][k + 1] = t[2 * i + 1]; + a[i][j][k + 2] = t[2 * n1 + 2 * i]; + a[i][j][k + 3] = t[2 * n1 + 2 * i + 1]; + a[i][j][k + 4] = t[4 * n1 + 2 * i]; + a[i][j][k + 5] = t[4 * n1 + 2 * i + 1]; + a[i][j][k + 6] = t[6 * n1 + 2 * i]; + a[i][j][k + 7] = t[6 * n1 + 2 * i + 1]; + } + } + } + } else if (n3 == 4) { + for (j = n0; j < n2; j += nthread) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j][0]; + t[2 * i + 1] = a[i][j][1]; + t[2 * n1 + 2 * i] = a[i][j][2]; + t[2 * n1 + 2 * i + 1] = a[i][j][3]; + } + cdft(2 * n1, isgn, t, ip, w); + cdft(2 * n1, isgn, &t[2 * n1], ip, w); + for (i = 0; i < n1; i++) { + a[i][j][0] = t[2 * i]; + a[i][j][1] = t[2 * i + 1]; + a[i][j][2] = t[2 * n1 + 2 * i]; + a[i][j][3] = t[2 * n1 + 2 * i + 1]; + } + } + } else if (n3 == 2) { + for (j = n0; j < n2; j += nthread) { + for (i = 0; i < n1; i++) { + t[2 * i] = a[i][j][0]; + t[2 * i + 1] = a[i][j][1]; + } + cdft(2 * n1, isgn, t, ip, w); + for (i = 0; i < n1; i++) { + a[i][j][0] = t[2 * i]; + a[i][j][1] = t[2 * i + 1]; + } + } + } + return (void *) 0; +} + + +void *ddxt3da_th(void *p) +{ + void ddct(int n, int isgn, double *a, int *ip, double *w); + void ddst(int n, int isgn, double *a, int *ip, double *w); + int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k; + double ***a, *t, *w; + + nthread = ((fft3d_arg_t *) p)->nthread; + n0 = ((fft3d_arg_t *) p)->n0; + n1 = ((fft3d_arg_t *) p)->n1; + n2 = ((fft3d_arg_t *) p)->n2; + n3 = ((fft3d_arg_t *) p)->n3; + ics = ((fft3d_arg_t *) p)->ic; + isgn = ((fft3d_arg_t *) p)->isgn; + a = ((fft3d_arg_t *) p)->a; + t = ((fft3d_arg_t *) p)->t; + ip = ((fft3d_arg_t *) p)->ip; + w = ((fft3d_arg_t *) p)->w; + for (i = n0; i < n1; i += nthread) { + if (ics == 0) { + for (j = 0; j < n2; j++) { + ddct(n3, isgn, a[i][j], ip, w); + } + } else { + for (j = 0; j < n2; j++) { + ddst(n3, isgn, a[i][j], ip, w); + } + } + if (n3 > 2) { + for (k = 0; k < n3; k += 4) { + for (j = 0; j < n2; j++) { + t[j] = a[i][j][k]; + t[n2 + j] = a[i][j][k + 1]; + t[2 * n2 + j] = a[i][j][k + 2]; + t[3 * n2 + j] = a[i][j][k + 3]; + } + if (ics == 0) { + ddct(n2, isgn, t, ip, w); + ddct(n2, isgn, &t[n2], ip, w); + ddct(n2, isgn, &t[2 * n2], ip, w); + ddct(n2, isgn, &t[3 * n2], ip, w); + } else { + ddst(n2, isgn, t, ip, w); + ddst(n2, isgn, &t[n2], ip, w); + ddst(n2, isgn, &t[2 * n2], ip, w); + ddst(n2, isgn, &t[3 * n2], ip, w); + } + for (j = 0; j < n2; j++) { + a[i][j][k] = t[j]; + a[i][j][k + 1] = t[n2 + j]; + a[i][j][k + 2] = t[2 * n2 + j]; + a[i][j][k + 3] = t[3 * n2 + j]; + } + } + } else if (n3 == 2) { + for (j = 0; j < n2; j++) { + t[j] = a[i][j][0]; + t[n2 + j] = a[i][j][1]; + } + if (ics == 0) { + ddct(n2, isgn, t, ip, w); + ddct(n2, isgn, &t[n2], ip, w); + } else { + ddst(n2, isgn, t, ip, w); + ddst(n2, isgn, &t[n2], ip, w); + } + for (j = 0; j < n2; j++) { + a[i][j][0] = t[j]; + a[i][j][1] = t[n2 + j]; + } + } + } + return (void *) 0; +} + + +void *ddxt3db_th(void *p) +{ + void ddct(int n, int isgn, double *a, int *ip, double *w); + void ddst(int n, int isgn, double *a, int *ip, double *w); + int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k; + double ***a, *t, *w; + + nthread = ((fft3d_arg_t *) p)->nthread; + n0 = ((fft3d_arg_t *) p)->n0; + n1 = ((fft3d_arg_t *) p)->n1; + n2 = ((fft3d_arg_t *) p)->n2; + n3 = ((fft3d_arg_t *) p)->n3; + ics = ((fft3d_arg_t *) p)->ic; + isgn = ((fft3d_arg_t *) p)->isgn; + a = ((fft3d_arg_t *) p)->a; + t = ((fft3d_arg_t *) p)->t; + ip = ((fft3d_arg_t *) p)->ip; + w = ((fft3d_arg_t *) p)->w; + if (n3 > 2) { + for (j = n0; j < n2; j += nthread) { + for (k = 0; k < n3; k += 4) { + for (i = 0; i < n1; i++) { + t[i] = a[i][j][k]; + t[n1 + i] = a[i][j][k + 1]; + t[2 * n1 + i] = a[i][j][k + 2]; + t[3 * n1 + i] = a[i][j][k + 3]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + ddct(n1, isgn, &t[2 * n1], ip, w); + ddct(n1, isgn, &t[3 * n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + ddst(n1, isgn, &t[2 * n1], ip, w); + ddst(n1, isgn, &t[3 * n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][j][k] = t[i]; + a[i][j][k + 1] = t[n1 + i]; + a[i][j][k + 2] = t[2 * n1 + i]; + a[i][j][k + 3] = t[3 * n1 + i]; + } + } + } + } else if (n3 == 2) { + for (j = n0; j < n2; j += nthread) { + for (i = 0; i < n1; i++) { + t[i] = a[i][j][0]; + t[n1 + i] = a[i][j][1]; + } + if (ics == 0) { + ddct(n1, isgn, t, ip, w); + ddct(n1, isgn, &t[n1], ip, w); + } else { + ddst(n1, isgn, t, ip, w); + ddst(n1, isgn, &t[n1], ip, w); + } + for (i = 0; i < n1; i++) { + a[i][j][0] = t[i]; + a[i][j][1] = t[n1 + i]; + } + } + } + return (void *) 0; +} +#endif /* USE_FFT3D_THREADS */ + diff --git a/src/fft2d/fft2d/fftsg3d.f b/src/fft2d/fft2d/fftsg3d.f new file mode 100644 index 0000000..350bbef --- /dev/null +++ b/src/fft2d/fft2d/fftsg3d.f @@ -0,0 +1,926 @@ +! Fast Fourier/Cosine/Sine Transform +! dimension :three +! data length :power of 2 +! decimation :frequency +! radix :split-radix, row-column +! data :inplace +! table :use +! subroutines +! cdft3d: Complex Discrete Fourier Transform +! rdft3d: Real Discrete Fourier Transform +! ddct3d: Discrete Cosine Transform +! ddst3d: Discrete Sine Transform +! necessary package +! fftsg.f : 1D-FFT package +! +! +! -------- Complex DFT (Discrete Fourier Transform) -------- +! [definition] +! <case1> +! X(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 +! x(j1,j2,j3) * +! exp(2*pi*i*j1*k1/n1) * +! exp(2*pi*i*j2*k2/n2) * +! exp(2*pi*i*j3*k3/n3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! <case2> +! X(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 +! x(j1,j2,j3) * +! exp(-2*pi*i*j1*k1/n1) * +! exp(-2*pi*i*j2*k2/n2) * +! exp(-2*pi*i*j3*k3/n3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! (notes: sum_j=0^n-1 is a summation from j=0 to n-1) +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call cdft3d(n1max, n2max, 2*n1, n2, n3, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call cdft3d(n1max, n2max, 2*n1, n2, n3, -1, a, t, ip, w) +! [parameters] +! n1max :row1 size of the 3D array (integer) +! n2max :row2 size of the 3D array (integer) +! 2*n1 :data length (integer) +! n1 >= 1, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 1, n2 = power of 2 +! n3 :data length (integer) +! n3 >= 1, n3 = power of 2 +! a(0:2*n1-1,0:n2-1,0:n3-1) +! :input/output data (real*8) +! input data +! a(2*j1,j2,j3) = Re(x(j1,j2,j3)), +! a(2*j1+1,j2,j3) = Im(x(j1,j2,j3)), +! 0<=j1<n1, 0<=j2<n2, 0<=j3<n3 +! output data +! a(2*k1,k2,k3) = Re(X(k1,k2,k3)), +! a(2*k1+1,k2,k3) = Im(X(k1,k2,k3)), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! t(0:*) :work area (real*8) +! length of t >= max(8*n2, 8*n3) +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1, n2, n3)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1/2, n2/2, n3/2) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call cdft3d(n1max, n2max, 2*n1, n2, n3, -1, a, t, ip, w) +! is +! call cdft3d(n1max, n2max, 2*n1, n2, n3, 1, a, t, ip, w) +! do j3 = 0, n3 - 1 +! do j2 = 0, n2 - 1 +! do j1 = 0, 2 * n1 - 1 +! a(j1,j2,j3) = a(j1,j2,j3) * (1.0d0/n1/n2/n3) +! end do +! end do +! end do +! . +! +! +! -------- Real DFT / Inverse of Real DFT -------- +! [definition] +! <case1> RDFT +! R(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 +! a(j1,j2,j3) * +! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + +! 2*pi*j3*k3/n3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! I(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 +! a(j1,j2,j3) * +! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + +! 2*pi*j3*k3/n3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! <case2> IRDFT (excluding scale) +! a(k1,k2,k3) = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 +! (R(j1,j2,j3) * +! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + +! 2*pi*j3*k3/n3) + +! I(j1,j2,j3) * +! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + +! 2*pi*j3*k3/n3)), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! (notes: R(mod(n1-k1,n1),mod(n2-k2,n2),mod(n3-k3,n3)) = R(k1,k2,k3), +! I(mod(n1-k1,n1),mod(n2-k2,n2),mod(n3-k3,n3)) = -I(k1,k2,k3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3) +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call rdft3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call rdft3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) +! [parameters] +! n1max :row1 size of the 3D array (integer) +! n2max :row2 size of the 3D array (integer) +! n1 :data length (integer) +! n1 >= 2, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 2, n2 = power of 2 +! n3 :data length (integer) +! n3 >= 2, n3 = power of 2 +! a(0:n1-1,0:n2-1,0:n3-1) +! :input/output data (real*8) +! <case1> +! output data +! a(2*k1,k2,k3) = R(k1,k2,k3) +! = R(n1-k1,mod(n2-k2,n2),mod(n3-k3,n3)), +! a(2*k1+1,k2,k3) = I(k1,k2,k3) +! = -I(n1-k1,mod(n2-k2,n2),mod(n3-k3,n3)), +! 0<k1<n1/2, 0<=k2<n2, 0<=k3<n3, +! a(0,k2,k3) = R(0,k2,k3) +! = R(0,n2-k2,mod(n3-k3,n3)), +! a(1,k2,k3) = I(0,k2,k3) +! = -I(0,n2-k2,mod(n3-k3,n3)), +! a(1,n2-k2,k3) = R(n1/2,k2,k3) +! = R(n1/2,n2-k2,mod(n3-k3,n3)), +! a(0,n2-k2,k3) = -I(n1/2,k2,k3) +! = I(n1/2,n2-k2,mod(n3-k3,n3)), +! 0<k2<n2/2, 0<=k3<n3, +! a(0,0,k3) = R(0,0,k3) +! = R(0,0,n3-k3), +! a(1,0,k3) = I(0,0,k3) +! = -I(0,0,n3-k3), +! a(0,n2/2,k3) = R(0,n2/2,k3) +! = R(0,n2/2,n3-k3), +! a(1,n2/2,k3) = I(0,n2/2,k3) +! = -I(0,n2/2,n3-k3), +! a(1,0,n3-k3) = R(n1/2,0,k3) +! = R(n1/2,0,n3-k3), +! a(0,0,n3-k3) = -I(n1/2,0,k3) +! = I(n1/2,0,n3-k3), +! a(1,n2/2,n3-k3) = R(n1/2,n2/2,k3) +! = R(n1/2,n2/2,n3-k3), +! a(0,n2/2,n3-k3) = -I(n1/2,n2/2,k3) +! = I(n1/2,n2/2,n3-k3), +! 0<k3<n3/2, +! a(0,0,0) = R(0,0,0), +! a(1,0,0) = R(n1/2,0,0), +! a(0,0,n3/2) = R(0,0,n3/2), +! a(1,0,n3/2) = R(n1/2,0,n3/2), +! a(0,n2/2,0) = R(0,n2/2,0), +! a(1,n2/2,0) = R(n1/2,n2/2,0), +! a(0,n2/2,n3/2) = R(0,n2/2,n3/2), +! a(1,n2/2,n3/2) = R(n1/2,n2/2,n3/2) +! <case2> +! input data +! a(2*j1,j2,j3) = R(j1,j2,j3) +! = R(n1-j1,mod(n2-j2,n2),mod(n3-j3,n3)), +! a(2*j1+1,j2,j3) = I(j1,j2,j3) +! = -I(n1-j1,mod(n2-j2,n2),mod(n3-j3,n3)), +! 0<j1<n1/2, 0<=j2<n2, 0<=j3<n3, +! a(0,j2,j3) = R(0,j2,j3) +! = R(0,n2-j2,mod(n3-j3,n3)), +! a(1,j2,j3) = I(0,j2,j3) +! = -I(0,n2-j2,mod(n3-j3,n3)), +! a(1,n2-j2,j3) = R(n1/2,j2,j3) +! = R(n1/2,n2-j2,mod(n3-j3,n3)), +! a(0,n2-j2,j3) = -I(n1/2,j2,j3) +! = I(n1/2,n2-j2,mod(n3-j3,n3)), +! 0<j2<n2/2, 0<=j3<n3, +! a(0,0,j3) = R(0,0,j3) +! = R(0,0,n3-j3), +! a(1,0,j3) = I(0,0,j3) +! = -I(0,0,n3-j3), +! a(0,n2/2,j3) = R(0,n2/2,j3) +! = R(0,n2/2,n3-j3), +! a(1,n2/2,j3) = I(0,n2/2,j3) +! = -I(0,n2/2,n3-j3), +! a(1,0,n3-j3) = R(n1/2,0,j3) +! = R(n1/2,0,n3-j3), +! a(0,0,n3-j3) = -I(n1/2,0,j3) +! = I(n1/2,0,n3-j3), +! a(1,n2/2,n3-j3) = R(n1/2,n2/2,j3) +! = R(n1/2,n2/2,n3-j3), +! a(0,n2/2,n3-j3) = -I(n1/2,n2/2,j3) +! = I(n1/2,n2/2,n3-j3), +! 0<j3<n3/2, +! a(0,0,0) = R(0,0,0), +! a(1,0,0) = R(n1/2,0,0), +! a(0,0,n3/2) = R(0,0,n3/2), +! a(1,0,n3/2) = R(n1/2,0,n3/2), +! a(0,n2/2,0) = R(0,n2/2,0), +! a(1,n2/2,0) = R(n1/2,n2/2,0), +! a(0,n2/2,n3/2) = R(0,n2/2,n3/2), +! a(1,n2/2,n3/2) = R(n1/2,n2/2,n3/2) +! ---- output ordering ---- +! call rdft3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) +! call rdft3dsort(n1max, n2max, n1, n2, n3, 1, a) +! ! stored data is a(0:n1-1,0:n2-1,0:n3+1): +! ! a(2*k1,k2,k3) = R(k1,k2,k3), +! ! a(2*k1+1,k2,k3) = I(k1,k2,k3), +! ! 0<=k1<=n1/2, 0<=k2<n2, 0<=k3<n3. +! ! the stored data is larger than the input data! +! ---- input ordering ---- +! call rdft3dsort(n1max, n2max, n1, n2, n3, -1, a) +! call rdft3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) +! t(0:*) :work area (real*8) +! length of t >= max(8*n2, 8*n3) +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1/2, n2, n3)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1/4, n2/2, n3/2) + n1/4 +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call rdft3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) +! is +! call rdft3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) +! do j3 = 0, n3 - 1 +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1,j2,j3) = a(j1,j2,j3) * (2.0d0/n1/n2/n3) +! end do +! end do +! end do +! . +! +! +! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- +! [definition] +! <case1> IDCT (excluding scale) +! C(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 +! a(j1,j2,j3) * +! cos(pi*j1*(k1+1/2)/n1) * +! cos(pi*j2*(k2+1/2)/n2) * +! cos(pi*j3*(k3+1/2)/n3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! <case2> DCT +! C(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 +! a(j1,j2,j3) * +! cos(pi*(j1+1/2)*k1/n1) * +! cos(pi*(j2+1/2)*k2/n2) * +! cos(pi*(j3+1/2)*k3/n3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call ddct3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call ddct3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) +! [parameters] +! n1max :row1 size of the 3D array (integer) +! n2max :row2 size of the 3D array (integer) +! n1 :data length (integer) +! n1 >= 2, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 2, n2 = power of 2 +! n3 :data length (integer) +! n3 >= 2, n3 = power of 2 +! a(0:n1-1,0:n2-1,0:n3-1) +! :input/output data (real*8) +! output data +! a(k1,k2,k3) = C(k1,k2,k3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! t(0:*) :work area (real*8) +! length of t >= max(4*n2, 4*n3) +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1/2, n2/2, n3/2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1*3/2, n2*3/2, n3*3/2) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call ddct3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) +! is +! do j3 = 0, n3 - 1 +! do j2 = 0, n2 - 1 +! a(0, j2, j3) = a(0, j2, j3) * 0.5d0 +! end do +! do j1 = 0, n1 - 1 +! a(j1, 0, j3) = a(j1, 0, j3) * 0.5d0 +! end do +! end do +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1, j2, 0) = a(j1, j2, 0) * 0.5d0 +! end do +! end do +! call ddct3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) +! do j3 = 0, n3 - 1 +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1,j2,j3) = a(j1,j2,j3) * (8.0d0/n1/n2/n3) +! end do +! end do +! end do +! . +! +! +! -------- DST (Discrete Sine Transform) / Inverse of DST -------- +! [definition] +! <case1> IDST (excluding scale) +! S(k1,k2,k3) = sum_j1=1^n1 sum_j2=1^n2 sum_j3=1^n3 +! A(j1,j2,j3) * +! sin(pi*j1*(k1+1/2)/n1) * +! sin(pi*j2*(k2+1/2)/n2) * +! sin(pi*j3*(k3+1/2)/n3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! <case2> DST +! S(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 +! a(j1,j2,j3) * +! sin(pi*(j1+1/2)*k1/n1) * +! sin(pi*(j2+1/2)*k2/n2) * +! sin(pi*(j3+1/2)*k3/n3), +! 0<k1<=n1, 0<k2<=n2, 0<k3<=n3 +! [usage] +! <case1> +! ip(0) = 0 ! first time only +! call ddst3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) +! <case2> +! ip(0) = 0 ! first time only +! call ddst3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) +! [parameters] +! n1max :row1 size of the 3D array (integer) +! n2max :row2 size of the 3D array (integer) +! n1 :data length (integer) +! n1 >= 2, n1 = power of 2 +! n2 :data length (integer) +! n2 >= 2, n2 = power of 2 +! n3 :data length (integer) +! n3 >= 2, n3 = power of 2 +! a(0:n1-1,0:n2-1,0:n3-1) +! :input/output data (real*8) +! <case1> +! input data +! a(mod(j1,n1),mod(j2,n2),mod(j3,n3)) = A(j1,j2,j3), +! 0<j1<=n1, 0<j2<=n2, 0<j3<=n3 +! output data +! a(k1,k2,k3) = S(k1,k2,k3), +! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 +! <case2> +! output data +! a(mod(k1,n1),mod(k2,n2),mod(k3,n3)) = S(k1,k2,k3), +! 0<k1<=n1, 0<k2<=n2, 0<k3<=n3 +! t(0:*) :work area (real*8) +! length of t >= max(4*n2, 4*n3) +! ip(0:*):work area for bit reversal (integer) +! length of ip >= 2+sqrt(n) +! (n = max(n1/2, n2/2, n3/2)) +! ip(0),ip(1) are pointers of the cos/sin table. +! w(0:*) :cos/sin table (real*8) +! length of w >= max(n1*3/2, n2*3/2, n3*3/2) +! w(),ip() are initialized if ip(0) = 0. +! [remark] +! Inverse of +! call ddst3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) +! is +! do j3 = 0, n3 - 1 +! do j2 = 0, n2 - 1 +! a(0, j2, j3) = a(0, j2, j3) * 0.5d0 +! end do +! do j1 = 0, n1 - 1 +! a(j1, 0, j3) = a(j1, 0, j3) * 0.5d0 +! end do +! end do +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1, j2, 0) = a(j1, j2, 0) * 0.5d0 +! end do +! end do +! call ddst3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) +! do j3 = 0, n3 - 1 +! do j2 = 0, n2 - 1 +! do j1 = 0, n1 - 1 +! a(j1,j2,j3) = a(j1,j2,j3) * (8.0d0/n1/n2/n3) +! end do +! end do +! end do +! . +! +! + subroutine cdft3d(n1max, n2max, n1, n2, n3, isgn, a, + & t, ip, w) + integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), n + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), + & t(0 : *), w(0 : *) + n = 2 * max(n2, n3) + n = max(n, n1) + if (n .gt. 4 * ip(0)) then + call makewt(n / 4, ip, w) + end if + call xdft3da_sub(n1max, n2max, n1, n2, n3, 0, + & isgn, a, t, ip, w) + call cdft3db_sub(n1max, n2max, n1, n2, n3, + & isgn, a, t, ip, w) + end +! + subroutine rdft3d(n1max, n2max, n1, n2, n3, isgn, a, + & t, ip, w) + integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), + & n, nw, nc + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), + & t(0 : *), w(0 : *) + n = 2 * max(n2, n3) + n = max(n, n1) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n1 .gt. 4 * nc) then + nc = n1 / 4 + call makect(nc, ip, w(nw)) + end if + if (isgn .lt. 0) then + call rdft3d_sub(n1max, n2max, n1, n2, n3, isgn, a) + call cdft3db_sub(n1max, n2max, n1, n2, n3, + & isgn, a, t, ip, w) + call xdft3da_sub(n1max, n2max, n1, n2, n3, 1, + & isgn, a, t, ip, w) + else + call xdft3da_sub(n1max, n2max, n1, n2, n3, 1, + & isgn, a, t, ip, w) + call cdft3db_sub(n1max, n2max, n1, n2, n3, + & isgn, a, t, ip, w) + call rdft3d_sub(n1max, n2max, n1, n2, n3, isgn, a) + end if + end +! + subroutine rdft3dsort(n1max, n2max, n1, n2, n3, isgn, a) + integer n1max, n2max, n1, n2, n3, isgn, n2h, n3h, j, k + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), x, y + n2h = n2 / 2 + n3h = n3 / 2 + if (isgn .lt. 0) then + do k = 0, n3 - 1 + do j = n2h + 1, n2 - 1 + a(0, j, k) = a(n1 + 1, j, k) + a(1, j, k) = a(n1, j, k) + end do + end do + do k = n3h + 1, n3 - 1 + a(0, 0, k) = a(n1 + 1, 0, k) + a(1, 0, k) = a(n1, 0, k) + a(0, n2h, k) = a(n1 + 1, n2h, k) + a(1, n2h, k) = a(n1, n2h, k) + end do + a(1, 0, 0) = a(n1, 0, 0) + a(1, n2h, 0) = a(n1, n2h, 0) + a(1, 0, n3h) = a(n1, 0, n3h) + a(1, n2h, n3h) = a(n1, n2h, n3h) + else + do j = n2h + 1, n2 - 1 + y = a(0, j, 0) + x = a(1, j, 0) + a(n1, j, 0) = x + a(n1 + 1, j, 0) = y + a(n1, n2 - j, 0) = x + a(n1 + 1, n2 - j, 0) = -y + a(0, j, 0) = a(0, n2 - j, 0) + a(1, j, 0) = -a(1, n2 - j, 0) + end do + do k = 1, n3 - 1 + do j = n2h + 1, n2 - 1 + y = a(0, j, k) + x = a(1, j, k) + a(n1, j, k) = x + a(n1 + 1, j, k) = y + a(n1, n2 - j, n3 - k) = x + a(n1 + 1, n2 - j, n3 - k) = -y + a(0, j, k) = a(0, n2 - j, n3 - k) + a(1, j, k) = -a(1, n2 - j, n3 - k) + end do + end do + do k = n3h + 1, n3 - 1 + y = a(0, 0, k) + x = a(1, 0, k) + a(n1, 0, k) = x + a(n1 + 1, 0, k) = y + a(n1, 0, n3 - k) = x + a(n1 + 1, 0, n3 - k) = -y + a(0, 0, k) = a(0, 0, n3 - k) + a(1, 0, k) = -a(1, 0, n3 - k) + y = a(0, n2h, k) + x = a(1, n2h, k) + a(n1, n2h, k) = x + a(n1 + 1, n2h, k) = y + a(n1, n2h, n3 - k) = x + a(n1 + 1, n2h, n3 - k) = -y + a(0, n2h, k) = a(0, n2h, n3 - k) + a(1, n2h, k) = -a(1, n2h, n3 - k) + end do + a(n1, 0, 0) = a(1, 0, 0) + a(n1 + 1, 0, 0) = 0 + a(1, 0, 0) = 0 + a(n1, n2h, 0) = a(1, n2h, 0) + a(n1 + 1, n2h, 0) = 0 + a(1, n2h, 0) = 0 + a(n1, 0, n3h) = a(1, 0, n3h) + a(n1 + 1, 0, n3h) = 0 + a(1, 0, n3h) = 0 + a(n1, n2h, n3h) = a(1, n2h, n3h) + a(n1 + 1, n2h, n3h) = 0 + a(1, n2h, n3h) = 0 + end if + end +! + subroutine ddct3d(n1max, n2max, n1, n2, n3, isgn, a, + & t, ip, w) + integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), + & n, nw, nc + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), + & t(0 : *), w(0 : *) + n = max(n2, n3) + n = max(n, n1) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n .gt. nc) then + nc = n + call makect(nc, ip, w(nw)) + end if + call ddxt3da_sub(n1max, n2max, n1, n2, n3, 0, + & isgn, a, t, ip, w) + call ddxt3db_sub(n1max, n2max, n1, n2, n3, 0, + & isgn, a, t, ip, w) + end +! + subroutine ddst3d(n1max, n2max, n1, n2, n3, isgn, a, + & t, ip, w) + integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), + & n, nw, nc + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), + & t(0 : *), w(0 : *) + n = max(n2, n3) + n = max(n, n1) + nw = ip(0) + if (n .gt. 4 * nw) then + nw = n / 4 + call makewt(nw, ip, w) + end if + nc = ip(1) + if (n .gt. nc) then + nc = n + call makect(nc, ip, w(nw)) + end if + call ddxt3da_sub(n1max, n2max, n1, n2, n3, 1, + & isgn, a, t, ip, w) + call ddxt3db_sub(n1max, n2max, n1, n2, n3, 1, + & isgn, a, t, ip, w) + end +! +! -------- child routines -------- +! + subroutine xdft3da_sub(n1max, n2max, n1, n2, n3, icr, + & isgn, a, t, ip, w) + integer n1max, n2max, n1, n2, n3, icr, isgn, + & ip(0 : *), i, j, k + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), + & t(0 : *), w(0 : *) + do k = 0, n3 - 1 + if (icr .eq. 0) then + do j = 0, n2 - 1 + call cdft(n1, isgn, a(0, j, k), ip, w) + end do + else if (isgn .ge. 0) then + do j = 0, n2 - 1 + call rdft(n1, isgn, a(0, j, k), ip, w) + end do + end if + if (n1 .gt. 4) then + do i = 0, n1 - 8, 8 + do j = 0, n2 - 1 + t(2 * j) = a(i, j, k) + t(2 * j + 1) = a(i + 1, j, k) + t(2 * n2 + 2 * j) = a(i + 2, j, k) + t(2 * n2 + 2 * j + 1) = a(i + 3, j, k) + t(4 * n2 + 2 * j) = a(i + 4, j, k) + t(4 * n2 + 2 * j + 1) = a(i + 5, j, k) + t(6 * n2 + 2 * j) = a(i + 6, j, k) + t(6 * n2 + 2 * j + 1) = a(i + 7, j, k) + end do + call cdft(2 * n2, isgn, t, ip, w) + call cdft(2 * n2, isgn, t(2 * n2), ip, w) + call cdft(2 * n2, isgn, t(4 * n2), ip, w) + call cdft(2 * n2, isgn, t(6 * n2), ip, w) + do j = 0, n2 - 1 + a(i, j, k) = t(2 * j) + a(i + 1, j, k) = t(2 * j + 1) + a(i + 2, j, k) = t(2 * n2 + 2 * j) + a(i + 3, j, k) = t(2 * n2 + 2 * j + 1) + a(i + 4, j, k) = t(4 * n2 + 2 * j) + a(i + 5, j, k) = t(4 * n2 + 2 * j + 1) + a(i + 6, j, k) = t(6 * n2 + 2 * j) + a(i + 7, j, k) = t(6 * n2 + 2 * j + 1) + end do + end do + else if (n1 .eq. 4) then + do j = 0, n2 - 1 + t(2 * j) = a(0, j, k) + t(2 * j + 1) = a(1, j, k) + t(2 * n2 + 2 * j) = a(2, j, k) + t(2 * n2 + 2 * j + 1) = a(3, j, k) + end do + call cdft(2 * n2, isgn, t, ip, w) + call cdft(2 * n2, isgn, t(2 * n2), ip, w) + do j = 0, n2 - 1 + a(0, j, k) = t(2 * j) + a(1, j, k) = t(2 * j + 1) + a(2, j, k) = t(2 * n2 + 2 * j) + a(3, j, k) = t(2 * n2 + 2 * j + 1) + end do + else if (n1 .eq. 2) then + do j = 0, n2 - 1 + t(2 * j) = a(0, j, k) + t(2 * j + 1) = a(1, j, k) + end do + call cdft(2 * n2, isgn, t, ip, w) + do j = 0, n2 - 1 + a(0, j, k) = t(2 * j) + a(1, j, k) = t(2 * j + 1) + end do + end if + if (icr .ne. 0 .and. isgn .lt. 0) then + do j = 0, n2 - 1 + call rdft(n1, isgn, a(0, j, k), ip, w) + end do + end if + end do + end +! + subroutine cdft3db_sub(n1max, n2max, n1, n2, n3, + & isgn, a, t, ip, w) + integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), + & i, j, k + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), + & t(0 : *), w(0 : *) + if (n1 .gt. 4) then + do j = 0, n2 - 1 + do i = 0, n1 - 8, 8 + do k = 0, n3 - 1 + t(2 * k) = a(i, j, k) + t(2 * k + 1) = a(i + 1, j, k) + t(2 * n3 + 2 * k) = a(i + 2, j, k) + t(2 * n3 + 2 * k + 1) = a(i + 3, j, k) + t(4 * n3 + 2 * k) = a(i + 4, j, k) + t(4 * n3 + 2 * k + 1) = a(i + 5, j, k) + t(6 * n3 + 2 * k) = a(i + 6, j, k) + t(6 * n3 + 2 * k + 1) = a(i + 7, j, k) + end do + call cdft(2 * n3, isgn, t, ip, w) + call cdft(2 * n3, isgn, t(2 * n3), ip, w) + call cdft(2 * n3, isgn, t(4 * n3), ip, w) + call cdft(2 * n3, isgn, t(6 * n3), ip, w) + do k = 0, n3 - 1 + a(i, j, k) = t(2 * k) + a(i + 1, j, k) = t(2 * k + 1) + a(i + 2, j, k) = t(2 * n3 + 2 * k) + a(i + 3, j, k) = t(2 * n3 + 2 * k + 1) + a(i + 4, j, k) = t(4 * n3 + 2 * k) + a(i + 5, j, k) = t(4 * n3 + 2 * k + 1) + a(i + 6, j, k) = t(6 * n3 + 2 * k) + a(i + 7, j, k) = t(6 * n3 + 2 * k + 1) + end do + end do + end do + else if (n1 .eq. 4) then + do j = 0, n2 - 1 + do k = 0, n3 - 1 + t(2 * k) = a(0, j, k) + t(2 * k + 1) = a(1, j, k) + t(2 * n3 + 2 * k) = a(2, j, k) + t(2 * n3 + 2 * k + 1) = a(3, j, k) + end do + call cdft(2 * n3, isgn, t, ip, w) + call cdft(2 * n3, isgn, t(2 * n3), ip, w) + do k = 0, n3 - 1 + a(0, j, k) = t(2 * k) + a(1, j, k) = t(2 * k + 1) + a(2, j, k) = t(2 * n3 + 2 * k) + a(3, j, k) = t(2 * n3 + 2 * k + 1) + end do + end do + else if (n1 .eq. 2) then + do j = 0, n2 - 1 + do k = 0, n3 - 1 + t(2 * k) = a(0, j, k) + t(2 * k + 1) = a(1, j, k) + end do + call cdft(2 * n3, isgn, t, ip, w) + do k = 0, n3 - 1 + a(0, j, k) = t(2 * k) + a(1, j, k) = t(2 * k + 1) + end do + end do + end if + end +! + subroutine rdft3d_sub(n1max, n2max, n1, n2, n3, isgn, a) + integer n1max, n2max, n1, n2, n3, isgn, + & n2h, n3h, i, j, k, l + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), xi + n2h = n2 / 2 + n3h = n3 / 2 + if (isgn .lt. 0) then + do k = 1, n3h - 1 + l = n3 - k + xi = a(0, 0, k) - a(0, 0, l) + a(0, 0, k) = a(0, 0, k) + a(0, 0, l) + a(0, 0, l) = xi + xi = a(1, 0, l) - a(1, 0, k) + a(1, 0, k) = a(1, 0, k) + a(1, 0, l) + a(1, 0, l) = xi + xi = a(0, n2h, k) - a(0, n2h, l) + a(0, n2h, k) = a(0, n2h, k) + a(0, n2h, l) + a(0, n2h, l) = xi + xi = a(1, n2h, l) - a(1, n2h, k) + a(1, n2h, k) = a(1, n2h, k) + a(1, n2h, l) + a(1, n2h, l) = xi + do i = 1, n2h - 1 + j = n2 - i + xi = a(0, i, k) - a(0, j, l) + a(0, i, k) = a(0, i, k) + a(0, j, l) + a(0, j, l) = xi + xi = a(1, j, l) - a(1, i, k) + a(1, i, k) = a(1, i, k) + a(1, j, l) + a(1, j, l) = xi + xi = a(0, i, l) - a(0, j, k) + a(0, i, l) = a(0, i, l) + a(0, j, k) + a(0, j, k) = xi + xi = a(1, j, k) - a(1, i, l) + a(1, i, l) = a(1, i, l) + a(1, j, k) + a(1, j, k) = xi + end do + end do + do i = 1, n2h - 1 + j = n2 - i + xi = a(0, i, 0) - a(0, j, 0) + a(0, i, 0) = a(0, i, 0) + a(0, j, 0) + a(0, j, 0) = xi + xi = a(1, j, 0) - a(1, i, 0) + a(1, i, 0) = a(1, i, 0) + a(1, j, 0) + a(1, j, 0) = xi + xi = a(0, i, n3h) - a(0, j, n3h) + a(0, i, n3h) = a(0, i, n3h) + a(0, j, n3h) + a(0, j, n3h) = xi + xi = a(1, j, n3h) - a(1, i, n3h) + a(1, i, n3h) = a(1, i, n3h) + a(1, j, n3h) + a(1, j, n3h) = xi + end do + else + do k = 1, n3h - 1 + l = n3 - k + a(0, 0, l) = 0.5d0 * (a(0, 0, k) - a(0, 0, l)) + a(0, 0, k) = a(0, 0, k) - a(0, 0, l) + a(1, 0, l) = 0.5d0 * (a(1, 0, k) + a(1, 0, l)) + a(1, 0, k) = a(1, 0, k) - a(1, 0, l) + a(0, n2h, l) = 0.5d0 * (a(0, n2h, k) - a(0, n2h, l)) + a(0, n2h, k) = a(0, n2h, k) - a(0, n2h, l) + a(1, n2h, l) = 0.5d0 * (a(1, n2h, k) + a(1, n2h, l)) + a(1, n2h, k) = a(1, n2h, k) - a(1, n2h, l) + do i = 1, n2h - 1 + j = n2 - i + a(0, j, l) = 0.5d0 * (a(0, i, k) - a(0, j, l)) + a(0, i, k) = a(0, i, k) - a(0, j, l) + a(1, j, l) = 0.5d0 * (a(1, i, k) + a(1, j, l)) + a(1, i, k) = a(1, i, k) - a(1, j, l) + a(0, j, k) = 0.5d0 * (a(0, i, l) - a(0, j, k)) + a(0, i, l) = a(0, i, l) - a(0, j, k) + a(1, j, k) = 0.5d0 * (a(1, i, l) + a(1, j, k)) + a(1, i, l) = a(1, i, l) - a(1, j, k) + end do + end do + do i = 1, n2h - 1 + j = n2 - i + a(0, j, 0) = 0.5d0 * (a(0, i, 0) - a(0, j, 0)) + a(0, i, 0) = a(0, i, 0) - a(0, j, 0) + a(1, j, 0) = 0.5d0 * (a(1, i, 0) + a(1, j, 0)) + a(1, i, 0) = a(1, i, 0) - a(1, j, 0) + a(0, j, n3h) = 0.5d0 * (a(0, i, n3h) - a(0, j, n3h)) + a(0, i, n3h) = a(0, i, n3h) - a(0, j, n3h) + a(1, j, n3h) = 0.5d0 * (a(1, i, n3h) + a(1, j, n3h)) + a(1, i, n3h) = a(1, i, n3h) - a(1, j, n3h) + end do + end if + end +! + subroutine ddxt3da_sub(n1max, n2max, n1, n2, n3, ics, + & isgn, a, t, ip, w) + integer n1max, n2max, n1, n2, n3, ics, isgn, + & ip(0 : *), i, j, k + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), + & t(0 : *), w(0 : *) + do k = 0, n3 - 1 + if (ics .eq. 0) then + do j = 0, n2 - 1 + call ddct(n1, isgn, a(0, j, k), ip, w) + end do + else + do j = 0, n2 - 1 + call ddst(n1, isgn, a(0, j, k), ip, w) + end do + end if + if (n1 .gt. 2) then + do i = 0, n1 - 4, 4 + do j = 0, n2 - 1 + t(j) = a(i, j, k) + t(n2 + j) = a(i + 1, j, k) + t(2 * n2 + j) = a(i + 2, j, k) + t(3 * n2 + j) = a(i + 3, j, k) + end do + if (ics .eq. 0) then + call ddct(n2, isgn, t, ip, w) + call ddct(n2, isgn, t(n2), ip, w) + call ddct(n2, isgn, t(2 * n2), ip, w) + call ddct(n2, isgn, t(3 * n2), ip, w) + else + call ddst(n2, isgn, t, ip, w) + call ddst(n2, isgn, t(n2), ip, w) + call ddst(n2, isgn, t(2 * n2), ip, w) + call ddst(n2, isgn, t(3 * n2), ip, w) + end if + do j = 0, n2 - 1 + a(i, j, k) = t(j) + a(i + 1, j, k) = t(n2 + j) + a(i + 2, j, k) = t(2 * n2 + j) + a(i + 3, j, k) = t(3 * n2 + j) + end do + end do + else if (n1 .eq. 2) then + do j = 0, n2 - 1 + t(j) = a(0, j, k) + t(n2 + j) = a(1, j, k) + end do + if (ics .eq. 0) then + call ddct(n2, isgn, t, ip, w) + call ddct(n2, isgn, t(n2), ip, w) + else + call ddst(n2, isgn, t, ip, w) + call ddst(n2, isgn, t(n2), ip, w) + end if + do j = 0, n2 - 1 + a(0, j, k) = t(j) + a(1, j, k) = t(n2 + j) + end do + end if + end do + end +! + subroutine ddxt3db_sub(n1max, n2max, n1, n2, n3, ics, + & isgn, a, t, ip, w) + integer n1max, n2max, n1, n2, n3, ics, isgn, + & ip(0 : *), i, j, k + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), + & t(0 : *), w(0 : *) + if (n1 .gt. 2) then + do j = 0, n2 - 1 + do i = 0, n1 - 4, 4 + do k = 0, n3 - 1 + t(k) = a(i, j, k) + t(n3 + k) = a(i + 1, j, k) + t(2 * n3 + k) = a(i + 2, j, k) + t(3 * n3 + k) = a(i + 3, j, k) + end do + if (ics .eq. 0) then + call ddct(n3, isgn, t, ip, w) + call ddct(n3, isgn, t(n3), ip, w) + call ddct(n3, isgn, t(2 * n3), ip, w) + call ddct(n3, isgn, t(3 * n3), ip, w) + else + call ddst(n3, isgn, t, ip, w) + call ddst(n3, isgn, t(n3), ip, w) + call ddst(n3, isgn, t(2 * n3), ip, w) + call ddst(n3, isgn, t(3 * n3), ip, w) + end if + do k = 0, n3 - 1 + a(i, j, k) = t(k) + a(i + 1, j, k) = t(n3 + k) + a(i + 2, j, k) = t(2 * n3 + k) + a(i + 3, j, k) = t(3 * n3 + k) + end do + end do + end do + else if (n1 .eq. 2) then + do j = 0, n2 - 1 + do k = 0, n3 - 1 + t(k) = a(0, j, k) + t(n3 + k) = a(1, j, k) + end do + if (ics .eq. 0) then + call ddct(n3, isgn, t, ip, w) + call ddct(n3, isgn, t(n3), ip, w) + else + call ddst(n3, isgn, t, ip, w) + call ddst(n3, isgn, t(n3), ip, w) + end if + do k = 0, n3 - 1 + a(0, j, k) = t(k) + a(1, j, k) = t(n3 + k) + end do + end do + end if + end +! diff --git a/src/fft2d/fft2d/readme2d.txt b/src/fft2d/fft2d/readme2d.txt new file mode 100644 index 0000000..5a48e1d --- /dev/null +++ b/src/fft2d/fft2d/readme2d.txt @@ -0,0 +1,71 @@ +General Purpose 2D,3D FFT (Fast Fourier Transform) Package + +Files + alloc.c : 2D-array Allocation + alloc.h : 2D-array Allocation + fft4f2d.c : 2D FFT Package in C - Version I (radix 4, 2) + fft4f2d.f : 2D FFT Package in Fortran - Version I (radix 4, 2) + fftsg.c : 1D FFT Package in C - Fast Version (Split-Radix) + fftsg.f : 1D FFT Package in Fortran - Fast Version (Split-Radix) + fftsg2d.c : 2D FFT Package in C - Version II (Split-Radix) + fftsg2d.f : 2D FFT Package in Fortran - Version II (Split-Radix) + fftsg3d.c : 3D FFT Package in C - Version II (Split-Radix) + fftsg3d.f : 3D FFT Package in Fortran - Version II (Split-Radix) + shrtdct.c : 8x8, 16x16 DCT Package + sample2d/ + Makefile : for gcc, cc + Makefile.f77: for Fortran + Makefile.pth: Pthread version + fft4f2dt.c : Test Program for "fft4f2d.c" + fft4f2dt.f : Test Program for "fft4f2d.f" + fftsg2dt.c : Test Program for "fftsg2d.c" + fftsg2dt.f : Test Program for "fftsg2d.f" + fftsg3dt.c : Test Program for "fftsg3d.c" + fftsg3dt.f : Test Program for "fftsg3d.f" + shrtdctt.c : Test Program for "shrtdct.c" + +Difference of Files + C and Fortran versions are equal and + the same routines are in each version. + ---- Difference between "fft4f2d.*" and "fftsg2d.*" ---- + "fft4f2d.*" are optimized for the old machines that + don't have the large size CPU cache. + "fftsg2d.*", "fftsg3d.*" use 1D FFT routines in "fftsg.*". + "fftsg2d.*", "fftsg3d.*" are optimized for the machines that + have the multi-level (L1,L2,etc) cache. + +Routines in the Package + in fft4f2d.*, fftsg2d.* + cdft2d: 2-dim Complex Discrete Fourier Transform + rdft2d: 2-dim Real Discrete Fourier Transform + ddct2d: 2-dim Discrete Cosine Transform + ddst2d: 2-dim Discrete Sine Transform + rdft2dsort: rdft2d input/output ordering (fftsg2d.*) + in fftsg3d.* + cdft3d: 3-dim Complex Discrete Fourier Transform + rdft3d: 3-dim Real Discrete Fourier Transform + ddct3d: 3-dim Discrete Cosine Transform + ddst3d: 3-dim Discrete Sine Transform + rdft3dsort: rdft3d input/output ordering + in fftsg.* + cdft: 1-dim Complex Discrete Fourier Transform + rdft: 1-dim Real Discrete Fourier Transform + ddct: 1-dim Discrete Cosine Transform + ddst: 1-dim Discrete Sine Transform + dfct: 1-dim Real Symmetric DFT + dfst: 1-dim Real Anti-symmetric DFT + (these routines are called by fftsg2d.*, fftsg3d.*) + in shrtdct.c + ddct8x8s : Normalized 8x8 DCT + ddct16x16s: Normalized 16x16 DCT + (faster than ddct2d()) + +Usage + Brief explanations are in block comments of each packages. + The examples are given in the test programs. + +Copyright + Copyright(C) 1997,2001 Takuya OOURA (email: ooura@kurims.kyoto-u.ac.jp). + You may use, copy, modify this code for any purpose and + without fee. You may distribute this ORIGINAL package. + diff --git a/src/fft2d/fft2d/sample2d/Makefile b/src/fft2d/fft2d/sample2d/Makefile new file mode 100644 index 0000000..130e0a4 --- /dev/null +++ b/src/fft2d/fft2d/sample2d/Makefile @@ -0,0 +1,72 @@ +# ---- for GNU gcc ---- + +CC = gcc + +CFLAGS = -Wall + +OFLAGS = -O2 + +# ---- for SUN WS cc ---- +# +#CC = cc +# +#CFLAGS = +# +#OFLAGS = -xO2 + + + + +all: fft4f2dt fftsg2dt fftsg3dt shrtdctt + + +fft4f2dt : fft4f2dt.o fft4f2d.o alloc.o + $(CC) fft4f2dt.o fft4f2d.o alloc.o -lm -o fft4f2dt + +fftsg2dt : fftsg2dt.o fftsg2d.o fftsg.o alloc.o + $(CC) fftsg2dt.o fftsg2d.o fftsg.o alloc.o -lm -o fftsg2dt + +fftsg3dt : fftsg3dt.o fftsg3d.o fftsg.o alloc.o + $(CC) fftsg3dt.o fftsg3d.o fftsg.o alloc.o -lm -o fftsg3dt + +shrtdctt : shrtdctt.o shrtdct.o + $(CC) shrtdctt.o shrtdct.o -lm -o shrtdctt + + +fft4f2dt.o : fft4f2dt.c + $(CC) $(CFLAGS) $(OFLAGS) -c fft4f2dt.c -o fft4f2dt.o + +fftsg2dt.o : fftsg2dt.c + $(CC) $(CFLAGS) $(OFLAGS) -c fftsg2dt.c -o fftsg2dt.o + +fftsg3dt.o : fftsg3dt.c + $(CC) $(CFLAGS) $(OFLAGS) -c fftsg3dt.c -o fftsg3dt.o + +shrtdctt.o : shrtdctt.c + $(CC) $(CFLAGS) $(OFLAGS) -c shrtdctt.c -o shrtdctt.o + + +fft4f2d.o : ../fft4f2d.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../fft4f2d.c -o fft4f2d.o + +fftsg2d.o : ../fftsg2d.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg2d.c -o fftsg2d.o + +fftsg3d.o : ../fftsg3d.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg3d.c -o fftsg3d.o + +fftsg.o : ../fftsg.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg.c -o fftsg.o + +alloc.o : ../alloc.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../alloc.c -o alloc.o + +shrtdct.o : ../shrtdct.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../shrtdct.c -o shrtdct.o + + + + +clean: + rm -f *.o + diff --git a/src/fft2d/fft2d/sample2d/Makefile.f77 b/src/fft2d/fft2d/sample2d/Makefile.f77 new file mode 100644 index 0000000..0162fdc --- /dev/null +++ b/src/fft2d/fft2d/sample2d/Makefile.f77 @@ -0,0 +1,60 @@ +# ---- for GNU g77 ---- + +F77 = g77 + +FFLAGS = -Wall + +OFLAGS = -O2 + +# ---- for SUN WS f77 ---- +# +#F77 = f77 +# +#FFLAGS = +# +#OFLAGS = -xO2 + + + + +all: fft4f2dt_f fftsg2dt_f fftsg3dt_f + + +fft4f2dt_f : fft4f2dt_f.o fft4f2d_f.o + $(F77) fft4f2dt_f.o fft4f2d_f.o -o fft4f2dt_f + +fftsg2dt_f : fftsg2dt_f.o fftsg2d_f.o fftsg_f.o + $(F77) fftsg2dt_f.o fftsg2d_f.o fftsg_f.o -o fftsg2dt_f + +fftsg3dt_f : fftsg3dt_f.o fftsg3d_f.o fftsg_f.o + $(F77) fftsg3dt_f.o fftsg3d_f.o fftsg_f.o -o fftsg3dt_f + + +fft4f2dt_f.o : fft4f2dt.f + $(F77) $(FFLAGS) $(OFLAGS) -c fft4f2dt.f -o fft4f2dt_f.o + +fftsg2dt_f.o : fftsg2dt.f + $(F77) $(FFLAGS) $(OFLAGS) -c fftsg2dt.f -o fftsg2dt_f.o + +fftsg3dt_f.o : fftsg3dt.f + $(F77) $(FFLAGS) $(OFLAGS) -c fftsg3dt.f -o fftsg3dt_f.o + + +fft4f2d_f.o : ../fft4f2d.f + $(F77) $(FFLAGS) $(OFLAGS) -c ../fft4f2d.f -o fft4f2d_f.o + +fftsg2d_f.o : ../fftsg2d.f + $(F77) $(FFLAGS) $(OFLAGS) -c ../fftsg2d.f -o fftsg2d_f.o + +fftsg3d_f.o : ../fftsg3d.f + $(F77) $(FFLAGS) $(OFLAGS) -c ../fftsg3d.f -o fftsg3d_f.o + +fftsg_f.o : ../fftsg.f + $(F77) $(FFLAGS) $(OFLAGS) -c ../fftsg.f -o fftsg_f.o + + + + +clean: + rm -f *.o + diff --git a/src/fft2d/fft2d/sample2d/Makefile.pth b/src/fft2d/fft2d/sample2d/Makefile.pth new file mode 100644 index 0000000..d46941e --- /dev/null +++ b/src/fft2d/fft2d/sample2d/Makefile.pth @@ -0,0 +1,54 @@ +# ---- for GNU gcc ---- + +CC = gcc + +CFLAGS = -Wall -DUSE_FFT2D_PTHREADS -DUSE_FFT3D_PTHREADS + +OFLAGS = -O2 + +# ---- for SUN WS cc ---- +# +#CC = cc +# +#CFLAGS = -Wall -DUSE_FFT2D_PTHREADS -DUSE_FFT3D_PTHREADS +# +#OFLAGS = -xO2 + + + + +all: fftsg2dt_pt fftsg3dt_pt + + +fftsg2dt_pt : fftsg2dt.o fftsg2dpt.o fftsg.o alloc.o + $(CC) fftsg2dt.o fftsg2dpt.o fftsg.o alloc.o -lm -lpthread -o fftsg2dt_pt + +fftsg3dt_pt : fftsg3dt.o fftsg3dpt.o fftsg.o alloc.o + $(CC) fftsg3dt.o fftsg3dpt.o fftsg.o alloc.o -lm -lpthread -o fftsg3dt_pt + + +fftsg2dt.o : fftsg2dt.c + $(CC) $(CFLAGS) $(OFLAGS) -c fftsg2dt.c -o fftsg2dt.o + +fftsg3dt.o : fftsg3dt.c + $(CC) $(CFLAGS) $(OFLAGS) -c fftsg3dt.c -o fftsg3dt.o + + +fftsg2dpt.o : ../fftsg2d.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg2d.c -o fftsg2dpt.o + +fftsg3dpt.o : ../fftsg3d.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg3d.c -o fftsg3dpt.o + +fftsg.o : ../fftsg.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg.c -o fftsg.o + +alloc.o : ../alloc.c + $(CC) $(CFLAGS) $(OFLAGS) -c ../alloc.c -o alloc.o + + + + +clean: + rm -f *.o + diff --git a/src/fft2d/fft2d/sample2d/alloc.h b/src/fft2d/fft2d/sample2d/alloc.h new file mode 100644 index 0000000..3467cc4 --- /dev/null +++ b/src/fft2d/fft2d/sample2d/alloc.h @@ -0,0 +1,20 @@ +/* ---- memory allocation ---- */ + + +#include <stdlib.h> +#include <stdio.h> + + +int *alloc_1d_int(int n1); +void free_1d_int(int *i); +double *alloc_1d_double(int n1); +void free_1d_double(double *d); +int **alloc_2d_int(int n1, int n2); +void free_2d_int(int **ii); +double **alloc_2d_double(int n1, int n2); +void free_2d_double(double **dd); +int ***alloc_3d_int(int n1, int n2, int n3); +void free_3d_int(int ***iii); +double ***alloc_3d_double(int n1, int n2, int n3); +void free_3d_double(double ***ddd); + diff --git a/src/fft2d/fft2d/sample2d/fft4f2dt.c b/src/fft2d/fft2d/sample2d/fft4f2dt.c new file mode 100644 index 0000000..2f300fd --- /dev/null +++ b/src/fft2d/fft2d/sample2d/fft4f2dt.c @@ -0,0 +1,109 @@ +/* test of fft4f2d.c */ + +#include <math.h> +#include <stdio.h> +#include "alloc.h" +#define MAX(x,y) ((x) > (y) ? (x) : (y)) + +/* random number generator, 0 <= RND < 1 */ +#define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) + + +int main() +{ + void cdft2d(int, int, int, double **, int *, double *); + void rdft2d(int, int, int, double **, int *, double *); + void ddct2d(int, int, int, double **, double **, int *, double *); + void ddst2d(int, int, int, double **, double **, int *, double *); + void putdata2d(int n1, int n2, double **a); + double errorcheck2d(int n1, int n2, double scale, double **a); + int *ip, n1, n2, n, i; + double **a, **t, *w, err; + + printf("data length n1=? (n1 = power of 2) \n"); + scanf("%d", &n1); + printf("data length n2=? (n2 = power of 2) \n"); + scanf("%d", &n2); + + a = alloc_2d_double(n1, n2); + t = alloc_2d_double(n1, n2); + n = MAX(n1, n2 / 2); + ip = alloc_1d_int(2 + (int) sqrt(n + 0.5)); + n = MAX(n1 / 2, n2 / 4) + MAX(n1, n2); + w = alloc_1d_double(n); + ip[0] = 0; + + /* check of CDFT */ + putdata2d(n1, n2, a); + cdft2d(n1, n2, 1, a, ip, w); + cdft2d(n1, n2, -1, a, ip, w); + err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); + printf("cdft2d err= %g \n", err); + + /* check of RDFT */ + putdata2d(n1, n2, a); + rdft2d(n1, n2, 1, a, ip, w); + rdft2d(n1, n2, -1, a, ip, w); + err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); + printf("rdft2d err= %g \n", err); + + /* check of DDCT */ + putdata2d(n1, n2, a); + ddct2d(n1, n2, 1, a, t, ip, w); + ddct2d(n1, n2, -1, a, t, ip, w); + for (i = 0; i <= n1 - 1; i++) { + a[i][0] *= 0.5; + } + for (i = 0; i <= n2 - 1; i++) { + a[0][i] *= 0.5; + } + err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); + printf("ddct2d err= %g \n", err); + + /* check of DDST */ + putdata2d(n1, n2, a); + ddst2d(n1, n2, 1, a, t, ip, w); + ddst2d(n1, n2, -1, a, t, ip, w); + for (i = 0; i <= n1 - 1; i++) { + a[i][0] *= 0.5; + } + for (i = 0; i <= n2 - 1; i++) { + a[0][i] *= 0.5; + } + err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); + printf("ddst2d err= %g \n", err); + + free_1d_double(w); + free_1d_int(ip); + free_2d_double(t); + free_2d_double(a); + return 0; +} + + +void putdata2d(int n1, int n2, double **a) +{ + int j1, j2, seed = 0; + + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2] = RND(&seed); + } + } +} + + +double errorcheck2d(int n1, int n2, double scale, double **a) +{ + int j1, j2, seed = 0; + double err = 0, e; + + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + e = RND(&seed) - a[j1][j2] * scale; + err = MAX(err, fabs(e)); + } + } + return err; +} + diff --git a/src/fft2d/fft2d/sample2d/fft4f2dt.f b/src/fft2d/fft2d/sample2d/fft4f2dt.f new file mode 100644 index 0000000..43ac8ac --- /dev/null +++ b/src/fft2d/fft2d/sample2d/fft4f2dt.f @@ -0,0 +1,95 @@ +! test of fft4f2d.f +! + program main + integer nmax, nmaxsqrt + parameter (nmax = 1024) + parameter (nmaxsqrt = 32) + integer ip(0 : nmaxsqrt + 1), n1, n2, i + real*8 a(0 : nmax - 1, 0 : nmax - 1), + & t(0 : nmax - 1, 0 : nmax - 1), w(0 : nmax * 3 / 2 - 1), + & err, errorcheck2d +! + write (*, *) 'data length n1=? (n1 = power of 2) ' + read (*, *) n1 + write (*, *) 'data length n2=? (n2 = power of 2) ' + read (*, *) n2 + ip(0) = 0 +! +! check of CDFT + call putdata2d(nmax, n1, n2, a) + call cdft2d(nmax, n1, n2, 1, a, ip, w) + call cdft2d(nmax, n1, n2, -1, a, ip, w) + err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) + write (*, *) 'cdft2d err= ', err +! +! check of RDFT + call putdata2d(nmax, n1, n2, a) + call rdft2d(nmax, n1, n2, 1, a, ip, w) + call rdft2d(nmax, n1, n2, -1, a, ip, w) + err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) + write (*, *) 'rdft2d err= ', err +! +! check of DDCT + call putdata2d(nmax, n1, n2, a) + call ddct2d(nmax, n1, n2, 1, a, t, ip, w) + call ddct2d(nmax, n1, n2, -1, a, t, ip, w) + do i = 0, n1 - 1 + a(i, 0) = a(i, 0) * 0.5d0 + end do + do i = 0, n2 - 1 + a(0, i) = a(0, i) * 0.5d0 + end do + err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) + write (*, *) 'ddct2d err= ', err +! +! check of DDST + call putdata2d(nmax, n1, n2, a) + call ddst2d(nmax, n1, n2, 1, a, t, ip, w) + call ddst2d(nmax, n1, n2, -1, a, t, ip, w) + do i = 0, n1 - 1 + a(i, 0) = a(i, 0) * 0.5d0 + end do + do i = 0, n2 - 1 + a(0, i) = a(0, i) * 0.5d0 + end do + err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) + write (*, *) 'ddst2d err= ', err +! + end +! +! + subroutine putdata2d(n1max, n1, n2, a) + integer n1max, n1, n2, j1, j2, seed + real*8 a(0 : n1max - 1, 0 : *), drnd + seed = 0 + do j2 = 0, n2 - 1 + do j1 = 0, n1 - 1 + a(j1, j2) = drnd(seed) + end do + end do + end +! +! + function errorcheck2d(n1max, n1, n2, scale, a) + integer n1max, n1, n2, j1, j2, seed + real*8 scale, a(0 : n1max - 1, 0 : *), drnd, err, e, + & errorcheck2d + err = 0 + seed = 0 + do j2 = 0, n2 - 1 + do j1 = 0, n1 - 1 + e = drnd(seed) - a(j1, j2) * scale + err = max(err, abs(e)) + end do + end do + errorcheck2d = err + end +! +! +! random number generator, 0 <= drnd < 1 + real*8 function drnd(seed) + integer seed + seed = mod(seed * 7141 + 54773, 259200) + drnd = seed * (1.0d0 / 259200) + end +! diff --git a/src/fft2d/fft2d/sample2d/fftsg2dt.c b/src/fft2d/fft2d/sample2d/fftsg2dt.c new file mode 100644 index 0000000..c4cf935 --- /dev/null +++ b/src/fft2d/fft2d/sample2d/fftsg2dt.c @@ -0,0 +1,107 @@ +/* test of fftsg2d.c */ + +#include <math.h> +#include <stdio.h> +#include "alloc.h" +#define MAX(x,y) ((x) > (y) ? (x) : (y)) + +/* random number generator, 0 <= RND < 1 */ +#define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) + + +int main() +{ + void cdft2d(int, int, int, double **, double *, int *, double *); + void rdft2d(int, int, int, double **, double *, int *, double *); + void ddct2d(int, int, int, double **, double *, int *, double *); + void ddst2d(int, int, int, double **, double *, int *, double *); + void putdata2d(int n1, int n2, double **a); + double errorcheck2d(int n1, int n2, double scale, double **a); + int *ip, n1, n2, n, i; + double **a, *w, err; + + printf("data length n1=? (n1 = power of 2) \n"); + scanf("%d", &n1); + printf("data length n2=? (n2 = power of 2) \n"); + scanf("%d", &n2); + + a = alloc_2d_double(n1, n2); + n = MAX(n1, n2 / 2); + ip = alloc_1d_int(2 + (int) sqrt(n + 0.5)); + n = MAX(n1, n2) * 3 / 2; + w = alloc_1d_double(n); + ip[0] = 0; + + /* check of CDFT */ + putdata2d(n1, n2, a); + cdft2d(n1, n2, 1, a, NULL, ip, w); + cdft2d(n1, n2, -1, a, NULL, ip, w); + err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); + printf("cdft2d err= %g \n", err); + + /* check of RDFT */ + putdata2d(n1, n2, a); + rdft2d(n1, n2, 1, a, NULL, ip, w); + rdft2d(n1, n2, -1, a, NULL, ip, w); + err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); + printf("rdft2d err= %g \n", err); + + /* check of DDCT */ + putdata2d(n1, n2, a); + ddct2d(n1, n2, 1, a, NULL, ip, w); + ddct2d(n1, n2, -1, a, NULL, ip, w); + for (i = 0; i <= n1 - 1; i++) { + a[i][0] *= 0.5; + } + for (i = 0; i <= n2 - 1; i++) { + a[0][i] *= 0.5; + } + err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); + printf("ddct2d err= %g \n", err); + + /* check of DDST */ + putdata2d(n1, n2, a); + ddst2d(n1, n2, 1, a, NULL, ip, w); + ddst2d(n1, n2, -1, a, NULL, ip, w); + for (i = 0; i <= n1 - 1; i++) { + a[i][0] *= 0.5; + } + for (i = 0; i <= n2 - 1; i++) { + a[0][i] *= 0.5; + } + err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); + printf("ddst2d err= %g \n", err); + + free_1d_double(w); + free_1d_int(ip); + free_2d_double(a); + return 0; +} + + +void putdata2d(int n1, int n2, double **a) +{ + int j1, j2, seed = 0; + + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2] = RND(&seed); + } + } +} + + +double errorcheck2d(int n1, int n2, double scale, double **a) +{ + int j1, j2, seed = 0; + double err = 0, e; + + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + e = RND(&seed) - a[j1][j2] * scale; + err = MAX(err, fabs(e)); + } + } + return err; +} + diff --git a/src/fft2d/fft2d/sample2d/fftsg2dt.f b/src/fft2d/fft2d/sample2d/fftsg2dt.f new file mode 100644 index 0000000..a375c05 --- /dev/null +++ b/src/fft2d/fft2d/sample2d/fftsg2dt.f @@ -0,0 +1,94 @@ +! test of fftsg2d.f +! + program main + integer nmax, nmaxsqrt + parameter (nmax = 1024) + parameter (nmaxsqrt = 32) + integer ip(0 : nmaxsqrt + 1), n1, n2, i + real*8 a(0 : nmax - 1, 0 : nmax - 1), t(0 : 8 * nmax - 1), + & w(0 : nmax * 3 / 2 - 1), err, errorcheck2d +! + write (*, *) 'data length n1=? (n1 = power of 2) ' + read (*, *) n1 + write (*, *) 'data length n2=? (n2 = power of 2) ' + read (*, *) n2 + ip(0) = 0 +! +! check of CDFT + call putdata2d(nmax, n1, n2, a) + call cdft2d(nmax, n1, n2, 1, a, t, ip, w) + call cdft2d(nmax, n1, n2, -1, a, t, ip, w) + err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) + write (*, *) 'cdft2d err= ', err +! +! check of RDFT + call putdata2d(nmax, n1, n2, a) + call rdft2d(nmax, n1, n2, 1, a, t, ip, w) + call rdft2d(nmax, n1, n2, -1, a, t, ip, w) + err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) + write (*, *) 'rdft2d err= ', err +! +! check of DDCT + call putdata2d(nmax, n1, n2, a) + call ddct2d(nmax, n1, n2, 1, a, t, ip, w) + call ddct2d(nmax, n1, n2, -1, a, t, ip, w) + do i = 0, n1 - 1 + a(i, 0) = a(i, 0) * 0.5d0 + end do + do i = 0, n2 - 1 + a(0, i) = a(0, i) * 0.5d0 + end do + err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) + write (*, *) 'ddct2d err= ', err +! +! check of DDST + call putdata2d(nmax, n1, n2, a) + call ddst2d(nmax, n1, n2, 1, a, t, ip, w) + call ddst2d(nmax, n1, n2, -1, a, t, ip, w) + do i = 0, n1 - 1 + a(i, 0) = a(i, 0) * 0.5d0 + end do + do i = 0, n2 - 1 + a(0, i) = a(0, i) * 0.5d0 + end do + err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) + write (*, *) 'ddst2d err= ', err +! + end +! +! + subroutine putdata2d(n1max, n1, n2, a) + integer n1max, n1, n2, j1, j2, seed + real*8 a(0 : n1max - 1, 0 : *), drnd + seed = 0 + do j2 = 0, n2 - 1 + do j1 = 0, n1 - 1 + a(j1, j2) = drnd(seed) + end do + end do + end +! +! + function errorcheck2d(n1max, n1, n2, scale, a) + integer n1max, n1, n2, j1, j2, seed + real*8 scale, a(0 : n1max - 1, 0 : *), drnd, err, e, + & errorcheck2d + err = 0 + seed = 0 + do j2 = 0, n2 - 1 + do j1 = 0, n1 - 1 + e = drnd(seed) - a(j1, j2) * scale + err = max(err, abs(e)) + end do + end do + errorcheck2d = err + end +! +! +! random number generator, 0 <= drnd < 1 + real*8 function drnd(seed) + integer seed + seed = mod(seed * 7141 + 54773, 259200) + drnd = seed * (1.0d0 / 259200) + end +! diff --git a/src/fft2d/fft2d/sample2d/fftsg3dt.c b/src/fft2d/fft2d/sample2d/fftsg3dt.c new file mode 100644 index 0000000..87879e6 --- /dev/null +++ b/src/fft2d/fft2d/sample2d/fftsg3dt.c @@ -0,0 +1,128 @@ +/* test of fftsg3d.c */ + +#include <math.h> +#include <stdio.h> +#include "alloc.h" +#define MAX(x,y) ((x) > (y) ? (x) : (y)) + +/* random number generator, 0 <= RND < 1 */ +#define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) + + +int main() +{ + void cdft3d(int, int, int, int, double ***, double *, int *, double *); + void rdft3d(int, int, int, int, double ***, double *, int *, double *); + void ddct3d(int, int, int, int, double ***, double *, int *, double *); + void ddst3d(int, int, int, int, double ***, double *, int *, double *); + void putdata3d(int n1, int n2, int n3, double ***a); + double errorcheck3d(int n1, int n2, int n3, double scale, double ***a); + int *ip, n1, n2, n3, n, nt, i, j; + double ***a, *w, err; + + printf("data length n1=? (n1 = power of 2) \n"); + scanf("%d", &n1); + printf("data length n2=? (n2 = power of 2) \n"); + scanf("%d", &n2); + printf("data length n3=? (n3 = power of 2) \n"); + scanf("%d", &n3); + + a = alloc_3d_double(n1, n2, n3); + nt = MAX(n1, n2); + n = MAX(nt, n3 / 2); + ip = alloc_1d_int(2 + (int) sqrt(n + 0.5)); + n = MAX(nt, n3) * 3 / 2; + w = alloc_1d_double(n); + ip[0] = 0; + + /* check of CDFT */ + putdata3d(n1, n2, n3, a); + cdft3d(n1, n2, n3, 1, a, NULL, ip, w); + cdft3d(n1, n2, n3, -1, a, NULL, ip, w); + err = errorcheck3d(n1, n2, n3, 2.0 / n1 / n2 / n3, a); + printf("cdft3d err= %g \n", err); + + /* check of RDFT */ + putdata3d(n1, n2, n3, a); + rdft3d(n1, n2, n3, 1, a, NULL, ip, w); + rdft3d(n1, n2, n3, -1, a, NULL, ip, w); + err = errorcheck3d(n1, n2, n3, 2.0 / n1 / n2 / n3, a); + printf("rdft3d err= %g \n", err); + + /* check of DDCT */ + putdata3d(n1, n2, n3, a); + ddct3d(n1, n2, n3, 1, a, NULL, ip, w); + ddct3d(n1, n2, n3, -1, a, NULL, ip, w); + for (i = 0; i <= n1 - 1; i++) { + for (j = 0; j <= n2 - 1; j++) { + a[i][j][0] *= 0.5; + } + for (j = 0; j <= n3 - 1; j++) { + a[i][0][j] *= 0.5; + } + } + for (i = 0; i <= n2 - 1; i++) { + for (j = 0; j <= n3 - 1; j++) { + a[0][i][j] *= 0.5; + } + } + err = errorcheck3d(n1, n2, n3, 8.0 / n1 / n2 / n3, a); + printf("ddct3d err= %g \n", err); + + /* check of DDST */ + putdata3d(n1, n2, n3, a); + ddst3d(n1, n2, n3, 1, a, NULL, ip, w); + ddst3d(n1, n2, n3, -1, a, NULL, ip, w); + for (i = 0; i <= n1 - 1; i++) { + for (j = 0; j <= n2 - 1; j++) { + a[i][j][0] *= 0.5; + } + for (j = 0; j <= n3 - 1; j++) { + a[i][0][j] *= 0.5; + } + } + for (i = 0; i <= n2 - 1; i++) { + for (j = 0; j <= n3 - 1; j++) { + a[0][i][j] *= 0.5; + } + } + err = errorcheck3d(n1, n2, n3, 8.0 / n1 / n2 / n3, a); + printf("ddst3d err= %g \n", err); + + free_1d_double(w); + free_1d_int(ip); + free_3d_double(a); + return 0; +} + + +void putdata3d(int n1, int n2, int n3, double ***a) +{ + int j1, j2, j3, seed = 0; + + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + for (j3 = 0; j3 <= n3 - 1; j3++) { + a[j1][j2][j3] = RND(&seed); + } + } + } +} + + +double errorcheck3d(int n1, int n2, int n3, double scale, double ***a) +{ + int j1, j2, j3, seed = 0; + double err = 0, e; + + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + for (j3 = 0; j3 <= n3 - 1; j3++) { + e = RND(&seed) - a[j1][j2][j3] * scale; + err = MAX(err, fabs(e)); + } + } + } + return err; +} + diff --git a/src/fft2d/fft2d/sample2d/fftsg3dt.f b/src/fft2d/fft2d/sample2d/fftsg3dt.f new file mode 100644 index 0000000..bbada0d --- /dev/null +++ b/src/fft2d/fft2d/sample2d/fftsg3dt.f @@ -0,0 +1,119 @@ +! test of fftsg3d.f +! + program main + integer nmax, nmaxsqrt + parameter (nmax = 128) + parameter (nmaxsqrt = 16) + integer ip(0 : nmaxsqrt + 1), n1, n2, n3, i, j + real*8 a(0 : nmax - 1, 0 : nmax - 1, 0 : nmax - 1), + & t(0 : 8 * nmax - 1), + & w(0 : nmax * 3 / 2 - 1), err, errorcheck3d +! + write (*, *) 'data length n1=? (n1 = power of 2) ' + read (*, *) n1 + write (*, *) 'data length n2=? (n2 = power of 2) ' + read (*, *) n2 + write (*, *) 'data length n3=? (n3 = power of 2) ' + read (*, *) n3 + ip(0) = 0 +! +! check of CDFT + call putdata3d(nmax, nmax, n1, n2, n3, a) + call cdft3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) + call cdft3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) + err = errorcheck3d(nmax, nmax, n1, n2, n3, + & 2.0d0 / n1 / n2 / n3, a) + write (*, *) 'cdft3d err= ', err +! +! check of RDFT + call putdata3d(nmax, nmax, n1, n2, n3, a) + call rdft3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) + call rdft3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) + err = errorcheck3d(nmax, nmax, n1, n2, n3, + & 2.0d0 / n1 / n2 / n3, a) + write (*, *) 'rdft3d err= ', err +! +! check of DDCT + call putdata3d(nmax, nmax, n1, n2, n3, a) + call ddct3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) + call ddct3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) + do j = 0, n2 - 1 + do i = 0, n1 - 1 + a(i, j, 0) = a(i, j, 0) * 0.5d0 + end do + end do + do j = 0, n3 - 1 + do i = 0, n1 - 1 + a(i, 0, j) = a(i, 0, j) * 0.5d0 + end do + do i = 0, n2 - 1 + a(0, i, j) = a(0, i, j) * 0.5d0 + end do + end do + err = errorcheck3d(nmax, nmax, n1, n2, n3, + & 8.0d0 / n1 / n2 / n3, a) + write (*, *) 'ddct3d err= ', err +! +! check of DDST + call putdata3d(nmax, nmax, n1, n2, n3, a) + call ddst3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) + call ddst3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) + do j = 0, n2 - 1 + do i = 0, n1 - 1 + a(i, j, 0) = a(i, j, 0) * 0.5d0 + end do + end do + do j = 0, n3 - 1 + do i = 0, n1 - 1 + a(i, 0, j) = a(i, 0, j) * 0.5d0 + end do + do i = 0, n2 - 1 + a(0, i, j) = a(0, i, j) * 0.5d0 + end do + end do + err = errorcheck3d(nmax, nmax, n1, n2, n3, + & 8.0d0 / n1 / n2 / n3, a) + write (*, *) 'ddst3d err= ', err +! + end +! +! + subroutine putdata3d(n1max, n2max, n1, n2, n3, a) + integer n1max, n2max, n1, n2, n3, j1, j2, j3, seed + real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : *), drnd + seed = 0 + do j3 = 0, n3 - 1 + do j2 = 0, n2 - 1 + do j1 = 0, n1 - 1 + a(j1, j2, j3) = drnd(seed) + end do + end do + end do + end +! +! + function errorcheck3d(n1max, n2max, n1, n2, n3, scale, a) + integer n1max, n2max, n1, n2, n3, j1, j2, j3, seed + real*8 scale, a(0 : n1max - 1, 0 : n2max - 1, 0 : *), + & drnd, err, e, errorcheck3d + err = 0 + seed = 0 + do j3 = 0, n3 - 1 + do j2 = 0, n2 - 1 + do j1 = 0, n1 - 1 + e = drnd(seed) - a(j1, j2, j3) * scale + err = max(err, abs(e)) + end do + end do + end do + errorcheck3d = err + end +! +! +! random number generator, 0 <= drnd < 1 + real*8 function drnd(seed) + integer seed + seed = mod(seed * 7141 + 54773, 259200) + drnd = seed * (1.0d0 / 259200) + end +! diff --git a/src/fft2d/fft2d/sample2d/shrtdctt.c b/src/fft2d/fft2d/sample2d/shrtdctt.c new file mode 100644 index 0000000..70d665f --- /dev/null +++ b/src/fft2d/fft2d/sample2d/shrtdctt.c @@ -0,0 +1,68 @@ +/* test of shrtdct.c */ + +#include <math.h> +#include <stdio.h> +#define MAX(x,y) ((x) > (y) ? (x) : (y)) + +/* random number generator, 0 <= RND < 1 */ +#define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) + +#define NMAX 16 + +int main() +{ + void ddct8x8s(int isgn, double **a); + void ddct16x16s(int isgn, double **a); + void putdata2d(int n1, int n2, double **a); + double errorcheck2d(int n1, int n2, double scale, double **a); + double err; + + int i; + double aarr[NMAX][NMAX], *a[NMAX], barr[NMAX][NMAX], *b[NMAX]; + for (i = 0; i < NMAX; i++) a[i] = aarr[i]; + for (i = 0; i < NMAX; i++) b[i] = barr[i]; + + /* check of 8x8 DCT */ + putdata2d(8, 8, a); + ddct8x8s(-1, a); + ddct8x8s(1, a); + err = errorcheck2d(8, 8, 1.0, a); + printf("ddct8x8s err= %g\n", err); + + /* check of 16x16 DCT */ + putdata2d(16, 16, a); + ddct16x16s(-1, a); + ddct16x16s(1, a); + err = errorcheck2d(16, 16, 1.0, a); + printf("ddct16x16s err= %g\n", err); + + return 0; +} + + +void putdata2d(int n1, int n2, double **a) +{ + int j1, j2, seed = 0; + + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + a[j1][j2] = RND(&seed); + } + } +} + + +double errorcheck2d(int n1, int n2, double scale, double **a) +{ + int j1, j2, seed = 0; + double err = 0, e; + + for (j1 = 0; j1 <= n1 - 1; j1++) { + for (j2 = 0; j2 <= n2 - 1; j2++) { + e = RND(&seed) - a[j1][j2] * scale; + err = MAX(err, fabs(e)); + } + } + return err; +} + diff --git a/src/fft2d/fft2d/shrtdct.c b/src/fft2d/fft2d/shrtdct.c new file mode 100644 index 0000000..455cb4c --- /dev/null +++ b/src/fft2d/fft2d/shrtdct.c @@ -0,0 +1,538 @@ +/* +Short Discrete Cosine Transform + data length :8x8, 16x16 + method :row-column, radix 4 FFT +functions + ddct8x8s : 8x8 DCT + ddct16x16s: 16x16 DCT +function prototypes + void ddct8x8s(int isgn, double **a); + void ddct16x16s(int isgn, double **a); +*/ + + +/* +-------- 8x8 DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + <case1> Normalized 8x8 IDCT + C[k1][k2] = (1/4) * sum_j1=0^7 sum_j2=0^7 + a[j1][j2] * s[j1] * s[j2] * + cos(pi*j1*(k1+1/2)/8) * + cos(pi*j2*(k2+1/2)/8), 0<=k1<8, 0<=k2<8 + (s[0] = 1/sqrt(2), s[j] = 1, j > 0) + <case2> Normalized 8x8 DCT + C[k1][k2] = (1/4) * s[k1] * s[k2] * sum_j1=0^7 sum_j2=0^7 + a[j1][j2] * + cos(pi*(j1+1/2)*k1/8) * + cos(pi*(j2+1/2)*k2/8), 0<=k1<8, 0<=k2<8 + (s[0] = 1/sqrt(2), s[j] = 1, j > 0) + [usage] + <case1> + ddct8x8s(1, a); + <case2> + ddct8x8s(-1, a); + [parameters] + a[0...7][0...7] :input/output data (double **) + output data + a[k1][k2] = C[k1][k2], 0<=k1<8, 0<=k2<8 +*/ + + +/* Cn_kR = sqrt(2.0/n) * cos(pi/2*k/n) */ +/* Cn_kI = sqrt(2.0/n) * sin(pi/2*k/n) */ +/* Wn_kR = cos(pi/2*k/n) */ +/* Wn_kI = sin(pi/2*k/n) */ +#define C8_1R 0.49039264020161522456 +#define C8_1I 0.09754516100806413392 +#define C8_2R 0.46193976625564337806 +#define C8_2I 0.19134171618254488586 +#define C8_3R 0.41573480615127261854 +#define C8_3I 0.27778511650980111237 +#define C8_4R 0.35355339059327376220 +#define W8_4R 0.70710678118654752440 + + +void ddct8x8s(int isgn, double **a) +{ + int j; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + double xr, xi; + + if (isgn < 0) { + for (j = 0; j <= 7; j++) { + x0r = a[0][j] + a[7][j]; + x1r = a[0][j] - a[7][j]; + x0i = a[2][j] + a[5][j]; + x1i = a[2][j] - a[5][j]; + x2r = a[4][j] + a[3][j]; + x3r = a[4][j] - a[3][j]; + x2i = a[6][j] + a[1][j]; + x3i = a[6][j] - a[1][j]; + xr = x0r + x2r; + xi = x0i + x2i; + a[0][j] = C8_4R * (xr + xi); + a[4][j] = C8_4R * (xr - xi); + xr = x0r - x2r; + xi = x0i - x2i; + a[2][j] = C8_2R * xr - C8_2I * xi; + a[6][j] = C8_2R * xi + C8_2I * xr; + xr = W8_4R * (x1i - x3i); + x1i = W8_4R * (x1i + x3i); + x3i = x1i - x3r; + x1i += x3r; + x3r = x1r - xr; + x1r += xr; + a[1][j] = C8_1R * x1r - C8_1I * x1i; + a[7][j] = C8_1R * x1i + C8_1I * x1r; + a[3][j] = C8_3R * x3r - C8_3I * x3i; + a[5][j] = C8_3R * x3i + C8_3I * x3r; + } + for (j = 0; j <= 7; j++) { + x0r = a[j][0] + a[j][7]; + x1r = a[j][0] - a[j][7]; + x0i = a[j][2] + a[j][5]; + x1i = a[j][2] - a[j][5]; + x2r = a[j][4] + a[j][3]; + x3r = a[j][4] - a[j][3]; + x2i = a[j][6] + a[j][1]; + x3i = a[j][6] - a[j][1]; + xr = x0r + x2r; + xi = x0i + x2i; + a[j][0] = C8_4R * (xr + xi); + a[j][4] = C8_4R * (xr - xi); + xr = x0r - x2r; + xi = x0i - x2i; + a[j][2] = C8_2R * xr - C8_2I * xi; + a[j][6] = C8_2R * xi + C8_2I * xr; + xr = W8_4R * (x1i - x3i); + x1i = W8_4R * (x1i + x3i); + x3i = x1i - x3r; + x1i += x3r; + x3r = x1r - xr; + x1r += xr; + a[j][1] = C8_1R * x1r - C8_1I * x1i; + a[j][7] = C8_1R * x1i + C8_1I * x1r; + a[j][3] = C8_3R * x3r - C8_3I * x3i; + a[j][5] = C8_3R * x3i + C8_3I * x3r; + } + } else { + for (j = 0; j <= 7; j++) { + x1r = C8_1R * a[1][j] + C8_1I * a[7][j]; + x1i = C8_1R * a[7][j] - C8_1I * a[1][j]; + x3r = C8_3R * a[3][j] + C8_3I * a[5][j]; + x3i = C8_3R * a[5][j] - C8_3I * a[3][j]; + xr = x1r - x3r; + xi = x1i + x3i; + x1r += x3r; + x3i -= x1i; + x1i = W8_4R * (xr + xi); + x3r = W8_4R * (xr - xi); + xr = C8_2R * a[2][j] + C8_2I * a[6][j]; + xi = C8_2R * a[6][j] - C8_2I * a[2][j]; + x0r = C8_4R * (a[0][j] + a[4][j]); + x0i = C8_4R * (a[0][j] - a[4][j]); + x2r = x0r - xr; + x2i = x0i - xi; + x0r += xr; + x0i += xi; + a[0][j] = x0r + x1r; + a[7][j] = x0r - x1r; + a[2][j] = x0i + x1i; + a[5][j] = x0i - x1i; + a[4][j] = x2r - x3i; + a[3][j] = x2r + x3i; + a[6][j] = x2i - x3r; + a[1][j] = x2i + x3r; + } + for (j = 0; j <= 7; j++) { + x1r = C8_1R * a[j][1] + C8_1I * a[j][7]; + x1i = C8_1R * a[j][7] - C8_1I * a[j][1]; + x3r = C8_3R * a[j][3] + C8_3I * a[j][5]; + x3i = C8_3R * a[j][5] - C8_3I * a[j][3]; + xr = x1r - x3r; + xi = x1i + x3i; + x1r += x3r; + x3i -= x1i; + x1i = W8_4R * (xr + xi); + x3r = W8_4R * (xr - xi); + xr = C8_2R * a[j][2] + C8_2I * a[j][6]; + xi = C8_2R * a[j][6] - C8_2I * a[j][2]; + x0r = C8_4R * (a[j][0] + a[j][4]); + x0i = C8_4R * (a[j][0] - a[j][4]); + x2r = x0r - xr; + x2i = x0i - xi; + x0r += xr; + x0i += xi; + a[j][0] = x0r + x1r; + a[j][7] = x0r - x1r; + a[j][2] = x0i + x1i; + a[j][5] = x0i - x1i; + a[j][4] = x2r - x3i; + a[j][3] = x2r + x3i; + a[j][6] = x2i - x3r; + a[j][1] = x2i + x3r; + } + } +} + + + +/* +-------- 16x16 DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + <case1> Normalized 16x16 IDCT + C[k1][k2] = (1/8) * sum_j1=0^15 sum_j2=0^15 + a[j1][j2] * s[j1] * s[j2] * + cos(pi*j1*(k1+1/2)/16) * + cos(pi*j2*(k2+1/2)/16), 0<=k1<16, 0<=k2<16 + (s[0] = 1/sqrt(2), s[j] = 1, j > 0) + <case2> Normalized 16x16 DCT + C[k1][k2] = (1/8) * s[k1] * s[k2] * sum_j1=0^15 sum_j2=0^15 + a[j1][j2] * + cos(pi*(j1+1/2)*k1/16) * + cos(pi*(j2+1/2)*k2/16), 0<=k1<16, 0<=k2<16 + (s[0] = 1/sqrt(2), s[j] = 1, j > 0) + [usage] + <case1> + ddct16x16s(1, a); + <case2> + ddct16x16s(-1, a); + [parameters] + a[0...15][0...15] :input/output data (double **) + output data + a[k1][k2] = C[k1][k2], 0<=k1<16, 0<=k2<16 +*/ + + +/* Cn_kR = sqrt(2.0/n) * cos(pi/2*k/n) */ +/* Cn_kI = sqrt(2.0/n) * sin(pi/2*k/n) */ +/* Wn_kR = cos(pi/2*k/n) */ +/* Wn_kI = sin(pi/2*k/n) */ +#define C16_1R 0.35185093438159561476 +#define C16_1I 0.03465429229977286565 +#define C16_2R 0.34675996133053686546 +#define C16_2I 0.06897484482073575308 +#define C16_3R 0.33832950029358816957 +#define C16_3I 0.10263113188058934529 +#define C16_4R 0.32664074121909413196 +#define C16_4I 0.13529902503654924610 +#define C16_5R 0.31180625324666780814 +#define C16_5I 0.16666391461943662432 +#define C16_6R 0.29396890060483967924 +#define C16_6I 0.19642373959677554532 +#define C16_7R 0.27330046675043937206 +#define C16_7I 0.22429189658565907106 +#define C16_8R 0.25 +#define W16_4R 0.92387953251128675613 +#define W16_4I 0.38268343236508977173 +#define W16_8R 0.70710678118654752440 + + +void ddct16x16s(int isgn, double **a) +{ + int j; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + double x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i; + double xr, xi; + + if (isgn < 0) { + for (j = 0; j <= 15; j++) { + x4r = a[0][j] - a[15][j]; + xr = a[0][j] + a[15][j]; + x4i = a[8][j] - a[7][j]; + xi = a[8][j] + a[7][j]; + x0r = xr + xi; + x0i = xr - xi; + x5r = a[2][j] - a[13][j]; + xr = a[2][j] + a[13][j]; + x5i = a[10][j] - a[5][j]; + xi = a[10][j] + a[5][j]; + x1r = xr + xi; + x1i = xr - xi; + x6r = a[4][j] - a[11][j]; + xr = a[4][j] + a[11][j]; + x6i = a[12][j] - a[3][j]; + xi = a[12][j] + a[3][j]; + x2r = xr + xi; + x2i = xr - xi; + x7r = a[6][j] - a[9][j]; + xr = a[6][j] + a[9][j]; + x7i = a[14][j] - a[1][j]; + xi = a[14][j] + a[1][j]; + x3r = xr + xi; + x3i = xr - xi; + xr = x0r + x2r; + xi = x1r + x3r; + a[0][j] = C16_8R * (xr + xi); + a[8][j] = C16_8R * (xr - xi); + xr = x0r - x2r; + xi = x1r - x3r; + a[4][j] = C16_4R * xr - C16_4I * xi; + a[12][j] = C16_4R * xi + C16_4I * xr; + x0r = W16_8R * (x1i - x3i); + x2r = W16_8R * (x1i + x3i); + xr = x0i + x0r; + xi = x2r + x2i; + a[2][j] = C16_2R * xr - C16_2I * xi; + a[14][j] = C16_2R * xi + C16_2I * xr; + xr = x0i - x0r; + xi = x2r - x2i; + a[6][j] = C16_6R * xr - C16_6I * xi; + a[10][j] = C16_6R * xi + C16_6I * xr; + xr = W16_8R * (x6r - x6i); + xi = W16_8R * (x6i + x6r); + x6r = x4r - xr; + x6i = x4i - xi; + x4r += xr; + x4i += xi; + xr = W16_4I * x7r - W16_4R * x7i; + xi = W16_4I * x7i + W16_4R * x7r; + x7r = W16_4R * x5r - W16_4I * x5i; + x7i = W16_4R * x5i + W16_4I * x5r; + x5r = x7r + xr; + x5i = x7i + xi; + x7r -= xr; + x7i -= xi; + xr = x4r + x5r; + xi = x5i + x4i; + a[1][j] = C16_1R * xr - C16_1I * xi; + a[15][j] = C16_1R * xi + C16_1I * xr; + xr = x4r - x5r; + xi = x5i - x4i; + a[7][j] = C16_7R * xr - C16_7I * xi; + a[9][j] = C16_7R * xi + C16_7I * xr; + xr = x6r - x7i; + xi = x7r + x6i; + a[5][j] = C16_5R * xr - C16_5I * xi; + a[11][j] = C16_5R * xi + C16_5I * xr; + xr = x6r + x7i; + xi = x7r - x6i; + a[3][j] = C16_3R * xr - C16_3I * xi; + a[13][j] = C16_3R * xi + C16_3I * xr; + } + for (j = 0; j <= 15; j++) { + x4r = a[j][0] - a[j][15]; + xr = a[j][0] + a[j][15]; + x4i = a[j][8] - a[j][7]; + xi = a[j][8] + a[j][7]; + x0r = xr + xi; + x0i = xr - xi; + x5r = a[j][2] - a[j][13]; + xr = a[j][2] + a[j][13]; + x5i = a[j][10] - a[j][5]; + xi = a[j][10] + a[j][5]; + x1r = xr + xi; + x1i = xr - xi; + x6r = a[j][4] - a[j][11]; + xr = a[j][4] + a[j][11]; + x6i = a[j][12] - a[j][3]; + xi = a[j][12] + a[j][3]; + x2r = xr + xi; + x2i = xr - xi; + x7r = a[j][6] - a[j][9]; + xr = a[j][6] + a[j][9]; + x7i = a[j][14] - a[j][1]; + xi = a[j][14] + a[j][1]; + x3r = xr + xi; + x3i = xr - xi; + xr = x0r + x2r; + xi = x1r + x3r; + a[j][0] = C16_8R * (xr + xi); + a[j][8] = C16_8R * (xr - xi); + xr = x0r - x2r; + xi = x1r - x3r; + a[j][4] = C16_4R * xr - C16_4I * xi; + a[j][12] = C16_4R * xi + C16_4I * xr; + x0r = W16_8R * (x1i - x3i); + x2r = W16_8R * (x1i + x3i); + xr = x0i + x0r; + xi = x2r + x2i; + a[j][2] = C16_2R * xr - C16_2I * xi; + a[j][14] = C16_2R * xi + C16_2I * xr; + xr = x0i - x0r; + xi = x2r - x2i; + a[j][6] = C16_6R * xr - C16_6I * xi; + a[j][10] = C16_6R * xi + C16_6I * xr; + xr = W16_8R * (x6r - x6i); + xi = W16_8R * (x6i + x6r); + x6r = x4r - xr; + x6i = x4i - xi; + x4r += xr; + x4i += xi; + xr = W16_4I * x7r - W16_4R * x7i; + xi = W16_4I * x7i + W16_4R * x7r; + x7r = W16_4R * x5r - W16_4I * x5i; + x7i = W16_4R * x5i + W16_4I * x5r; + x5r = x7r + xr; + x5i = x7i + xi; + x7r -= xr; + x7i -= xi; + xr = x4r + x5r; + xi = x5i + x4i; + a[j][1] = C16_1R * xr - C16_1I * xi; + a[j][15] = C16_1R * xi + C16_1I * xr; + xr = x4r - x5r; + xi = x5i - x4i; + a[j][7] = C16_7R * xr - C16_7I * xi; + a[j][9] = C16_7R * xi + C16_7I * xr; + xr = x6r - x7i; + xi = x7r + x6i; + a[j][5] = C16_5R * xr - C16_5I * xi; + a[j][11] = C16_5R * xi + C16_5I * xr; + xr = x6r + x7i; + xi = x7r - x6i; + a[j][3] = C16_3R * xr - C16_3I * xi; + a[j][13] = C16_3R * xi + C16_3I * xr; + } + } else { + for (j = 0; j <= 15; j++) { + x5r = C16_1R * a[1][j] + C16_1I * a[15][j]; + x5i = C16_1R * a[15][j] - C16_1I * a[1][j]; + xr = C16_7R * a[7][j] + C16_7I * a[9][j]; + xi = C16_7R * a[9][j] - C16_7I * a[7][j]; + x4r = x5r + xr; + x4i = x5i - xi; + x5r -= xr; + x5i += xi; + x7r = C16_5R * a[5][j] + C16_5I * a[11][j]; + x7i = C16_5R * a[11][j] - C16_5I * a[5][j]; + xr = C16_3R * a[3][j] + C16_3I * a[13][j]; + xi = C16_3R * a[13][j] - C16_3I * a[3][j]; + x6r = x7r + xr; + x6i = x7i - xi; + x7r -= xr; + x7i += xi; + xr = x4r - x6r; + xi = x4i - x6i; + x4r += x6r; + x4i += x6i; + x6r = W16_8R * (xi + xr); + x6i = W16_8R * (xi - xr); + xr = x5r + x7i; + xi = x5i - x7r; + x5r -= x7i; + x5i += x7r; + x7r = W16_4I * x5r + W16_4R * x5i; + x7i = W16_4I * x5i - W16_4R * x5r; + x5r = W16_4R * xr + W16_4I * xi; + x5i = W16_4R * xi - W16_4I * xr; + xr = C16_4R * a[4][j] + C16_4I * a[12][j]; + xi = C16_4R * a[12][j] - C16_4I * a[4][j]; + x2r = C16_8R * (a[0][j] + a[8][j]); + x3r = C16_8R * (a[0][j] - a[8][j]); + x0r = x2r + xr; + x1r = x3r + xi; + x2r -= xr; + x3r -= xi; + x0i = C16_2R * a[2][j] + C16_2I * a[14][j]; + x2i = C16_2R * a[14][j] - C16_2I * a[2][j]; + x1i = C16_6R * a[6][j] + C16_6I * a[10][j]; + x3i = C16_6R * a[10][j] - C16_6I * a[6][j]; + xr = x0i - x1i; + xi = x2i + x3i; + x0i += x1i; + x2i -= x3i; + x1i = W16_8R * (xi + xr); + x3i = W16_8R * (xi - xr); + xr = x0r + x0i; + xi = x0r - x0i; + a[0][j] = xr + x4r; + a[15][j] = xr - x4r; + a[8][j] = xi + x4i; + a[7][j] = xi - x4i; + xr = x1r + x1i; + xi = x1r - x1i; + a[2][j] = xr + x5r; + a[13][j] = xr - x5r; + a[10][j] = xi + x5i; + a[5][j] = xi - x5i; + xr = x2r + x2i; + xi = x2r - x2i; + a[4][j] = xr + x6r; + a[11][j] = xr - x6r; + a[12][j] = xi + x6i; + a[3][j] = xi - x6i; + xr = x3r + x3i; + xi = x3r - x3i; + a[6][j] = xr + x7r; + a[9][j] = xr - x7r; + a[14][j] = xi + x7i; + a[1][j] = xi - x7i; + } + for (j = 0; j <= 15; j++) { + x5r = C16_1R * a[j][1] + C16_1I * a[j][15]; + x5i = C16_1R * a[j][15] - C16_1I * a[j][1]; + xr = C16_7R * a[j][7] + C16_7I * a[j][9]; + xi = C16_7R * a[j][9] - C16_7I * a[j][7]; + x4r = x5r + xr; + x4i = x5i - xi; + x5r -= xr; + x5i += xi; + x7r = C16_5R * a[j][5] + C16_5I * a[j][11]; + x7i = C16_5R * a[j][11] - C16_5I * a[j][5]; + xr = C16_3R * a[j][3] + C16_3I * a[j][13]; + xi = C16_3R * a[j][13] - C16_3I * a[j][3]; + x6r = x7r + xr; + x6i = x7i - xi; + x7r -= xr; + x7i += xi; + xr = x4r - x6r; + xi = x4i - x6i; + x4r += x6r; + x4i += x6i; + x6r = W16_8R * (xi + xr); + x6i = W16_8R * (xi - xr); + xr = x5r + x7i; + xi = x5i - x7r; + x5r -= x7i; + x5i += x7r; + x7r = W16_4I * x5r + W16_4R * x5i; + x7i = W16_4I * x5i - W16_4R * x5r; + x5r = W16_4R * xr + W16_4I * xi; + x5i = W16_4R * xi - W16_4I * xr; + xr = C16_4R * a[j][4] + C16_4I * a[j][12]; + xi = C16_4R * a[j][12] - C16_4I * a[j][4]; + x2r = C16_8R * (a[j][0] + a[j][8]); + x3r = C16_8R * (a[j][0] - a[j][8]); + x0r = x2r + xr; + x1r = x3r + xi; + x2r -= xr; + x3r -= xi; + x0i = C16_2R * a[j][2] + C16_2I * a[j][14]; + x2i = C16_2R * a[j][14] - C16_2I * a[j][2]; + x1i = C16_6R * a[j][6] + C16_6I * a[j][10]; + x3i = C16_6R * a[j][10] - C16_6I * a[j][6]; + xr = x0i - x1i; + xi = x2i + x3i; + x0i += x1i; + x2i -= x3i; + x1i = W16_8R * (xi + xr); + x3i = W16_8R * (xi - xr); + xr = x0r + x0i; + xi = x0r - x0i; + a[j][0] = xr + x4r; + a[j][15] = xr - x4r; + a[j][8] = xi + x4i; + a[j][7] = xi - x4i; + xr = x1r + x1i; + xi = x1r - x1i; + a[j][2] = xr + x5r; + a[j][13] = xr - x5r; + a[j][10] = xi + x5i; + a[j][5] = xi - x5i; + xr = x2r + x2i; + xi = x2r - x2i; + a[j][4] = xr + x6r; + a[j][11] = xr - x6r; + a[j][12] = xi + x6i; + a[j][3] = xi - x6i; + xr = x3r + x3i; + xi = x3r - x3i; + a[j][6] = xr + x7r; + a[j][9] = xr - x7r; + a[j][14] = xi + x7i; + a[j][1] = xi - x7i; + } + } +} + |