casacore
Loading...
Searching...
No Matches
SCSL.h
Go to the documentation of this file.
1//# extern_fft.h: C++ wrapper functions for FORTRAN FFT code
2//# Copyright (C) 1993,1994,1995,1997,1999,2000
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: casa-feedback@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25
26#ifndef SCIMATH_SCSL_H
27#define SCIMATH_SCSL_H
28
29#include <casacore/casa/aips.h>
30#include <casacore/casa/BasicSL/Complex.h>
31
32
33namespace casacore { //# NAMESPACE CASACORE - BEGIN
34
35// <summary>C++ Interface to the Sgi/Cray Scientific Library (SCSL)</summary>
36// <synopsis>
37// These are C++ wrapper functions for the transform routines in the SGI/Cray
38// Scientific Library (SCSL). The purpose of these definitions is to overload
39// the functions so that C++ users can access the functions in SCSL with
40// identical function names.
41//
42// <note role=warning>
43// Currently, the SCSL is available only on SGI machines.
44// </note>
45// </synopsis>
46
47class SCSL
48{
49public:
50// These routines compute the Fast Fourier Transform (FFT) of the complex
51// vector x, and store the result in vector y. <src>ccfft</src> does the
52// complex-to-complex transform and <src>zzfft</src> does the same for double
53// precision arrays.
54//
55// In FFT applications, it is customary to use zero-based subscripts; the
56// formulas are simpler that way. Suppose that the arrays are
57// dimensioned as follows:
58//
59// <srcblock>
60// COMPLEX X(0:N-1), Y(0:N-1)
61// </srcblock>
62//
63// The output array is the FFT of the input array, using the following
64// formula for the FFT:
65//
66// <srcblock>
67// n-1
68// Y(k) = scale * Sum [ X(j)*w**(isign*j*k) ] for k = 0, ..., n-1
69// j=0
70//
71// where:
72// w = exp(2*pi*i/n),
73// i = + sqrt(-1),
74// pi = 3.14159...,
75// isign = +1 or -1
76// </srcblock>
77//
78// Different authors use different conventions for which of the
79// transforms, isign = +1 or isign = -1, is the forward or inverse
80// transform, and what the scale factor should be in either case. You
81// can make this routine compute any of the various possible definitions,
82// however, by choosing the appropriate values for isign and scale.
83//
84// The relevant fact from FFT theory is this: If you take the FFT with
85// any particular values of isign and scale, the mathematical inverse
86// function is computed by taking the FFT with -isign and 1/(n*scale).
87// In particular, if you use isign = +1 and scale = 1.0 you can compute
88// the inverse FFT by using isign = -1 and scale = 1.0/n.
89//
90// The output array may be the same as the input array.
91//
92// <h3>Initialization</h3>
93// The table array stores the trigonometric tables used in calculation of
94// the FFT. You must initialize table by calling the routine with isign
95// = 0 prior to doing the transforms. If the value of the problem size,
96// n, does not change, table does not have to be reinitialized.
97//
98// <h3>Dimensions</h3>
99// In the preceding description, it is assumed that array subscripts were
100// zero-based, as is customary in FFT applications. Thus, the input and
101// output arrays are declared as follows:
102//
103// <srcblock>
104// COMPLEX X(0:N-1)
105// COMPLEX Y(0:N-1)
106// </srcblock>
107//
108// However, if you prefer to use the more customary FORTRAN style with
109// subscripts starting at 1 you do not have to change the calling
110// sequence, as in the following (assuming N > 0):
111//
112// <srcblock>
113// COMPLEX X(N)
114// COMPLEX Y(N)
115// </srcblock>
116//
117// <example>
118// These examples use the table and workspace sizes appropriate to the
119// Origin series.
120//
121// Example 1: Initialize the complex array table in preparation for
122// doing an FFT of size 1024. Only the isign, n, and table arguments are
123// used in this case. You can use dummy arguments or zeros for the other
124// arguments in the subroutine call.
125//
126// <srcblock>
127// REAL TABLE(30 + 2048)
128// CALL CCFFT(0, 1024, 0.0, DUMMY, DUMMY, TABLE, DUMMY, 0)
129// </srcblock>
130//
131// Example 2: x and y are complex arrays of dimension (0:1023). Take
132// the FFT of x and store the results in y. Before taking the FFT,
133// initialize the table array, as in example 1.
134//
135// <srcblock>
136// COMPLEX X(0:1023), Y(0:1023)
137// REAL TABLE(30 + 2048)
138// REAL WORK(2048)
139// CALL CCFFT(0, 1024, 1.0, X, Y, TABLE, WORK, 0)
140// CALL CCFFT(1, 1024, 1.0, X, Y, TABLE, WORK, 0)
141// </srcblock>
142//
143// Example 3: Using the same x and y as in example 2, take the inverse
144// FFT of y and store it back in x. The scale factor 1/1024 is used.
145// Assume that the table array is already initialized.
146//
147// <srcblock>
148// CALL CCFFT(-1, 1024, 1.0/1024.0, Y, X, TABLE, WORK, 0)
149// </srcblock>
150//
151// Example 4: Perform the same computation as in example 2, but assume
152// that the lower bound of each array is 1, rather than 0. No change was
153// needed in the subroutine calls.
154//
155// <srcblock>
156// COMPLEX X(1024), Y(1024)
157// CALL CCFFT(0, 1024, 1.0, X, Y, TABLE, WORK, 0)
158// CALL CCFFT(1, 1024, 1.0, X, Y, TABLE, WORK, 0)
159// </srcblock>
160//
161// Example 5: Do the same computation as in example 4, but put the
162// output back in array x to save storage space. Assume that table is
163// already initialized.
164//
165// <srcblock>
166// COMPLEX X(1024)
167// CALL CCFFT(1, 1024, 1.0, X, X, TABLE, WORK, 0)
168// </srcblock>
169// </example>
170//
171// Input parameters:
172// <dl compact>
173// <dt><b>isign</b>
174// <dd> Integer.
175// Specifies whether to initialize the table array or to do the
176// forward or inverse Fourier transform, as follows:
177//
178// If isign = 0, the routine initializes the table array and
179// returns. In this case, the only arguments used or checked
180// are isign, n, and table.
181//
182// If isign = +1 or -1, the value of isign is the sign of the
183// exponent used in the FFT formula.
184// <dt><b>n</b>
185// <dd> Integer. Size of the transform (the number of values in
186// the input array). n >= 1.
187// <dt><b>scale</b>
188// <dd> Scale factor.
189// <src>ccfft</src>: real.
190// <src>zzfft</src>: double precision.
191// Each element of the output array is multiplied by scale
192// after taking the Fourier transform, as defined by the previous
193// formula.
194// <dt><b>x</b>
195// <dd> Array of dimension (0:n-1).
196// <src>ccfft</src>: complex array.
197// <src>zzfft</src>: double complex array.
198//
199// Input array of values to be transformed.
200// <dt><b>isys</b>
201// <dd> Integer.
202// Algorithm used; value dependent on hardware system. Currently, no
203// special options are supported; therefore, you must always specify
204// an isys argument as constant 0.
205// </dl>
206// Output parameters:
207// <dl compact>
208// <dt><b>y</b>
209// <dd> Array of dimension (0:n-1).
210// <src>ccfft</src>: complex array.
211// <src>zzfft</src>: double complex array.
212// Output array of transformed values. The output array may be
213// the same as the input array. In that case, the transform is
214// done in place and the input array is overwritten with the
215// transformed values.
216// <dt><b>table</b>
217// <dd> Real array; dimension 2*n+30.
218//
219// Table of factors and trigonometric functions.
220//
221// If isign = 0, the routine initializes table (table is output
222// only).
223//
224// If isign = +1 or -1, the values in table are assumed to be
225// initialized already by a prior call with isign = 0 (table is
226// input only).
227// <dt><b>work</b>
228// <dd> Real array; dimension 2*n.
229//
230// Work array. This is a scratch array used for intermediate
231// calculations. Its address space must be different address
232// space from that of the input and output arrays.
233// </dl>
234// <group>
235static void ccfft(Int isign, Int n, Float scale, Complex* x,
236 Complex* y, Float* table, Float* work, Int isys);
237static void ccfft(Int isign, Int n, Double scale, DComplex* x,
238 DComplex* y, Double* table, Double* work, Int isys);
239static void zzfft(Int isign, Int n, Double scale, DComplex* x,
240 DComplex* y, Double* table, Double* work, Int isys);
241// </group>
242
243// <src>scfft/dzfft</src> computes the FFT of the real array x, and it stores
244// the results in the complex array y. <src>csfft/zdfft</src> computes the
245// corresponding inverse complex-to-real transform.
246//
247// It is customary in FFT applications to use zero-based subscripts; the
248// formulas are simpler that way. For these routines, suppose that the
249// arrays are dimensioned as follows:
250//
251// <srcblock>
252// REAL X(0:n-1)
253// COMPLEX Y(0:n/2)
254// </srcblock>
255//
256// Then the output array is the FFT of the input array, using the
257// following formula for the FFT:
258//
259// <srcblock>
260// n-1
261// Y(k) = scale * Sum [ X(j)*w**(isign*j*k) ] for k = 0, ..., n/2
262// j=0
263//
264// where:
265// w = exp(2*pi*i/n),
266// i = + sqrt(-1),
267// pi = 3.14159...,
268// isign = +1 or -1.
269// </srcblock>
270//
271// Different authors use different conventions for which of the
272// transforms, isign = +1 or isign = -1, is the forward or inverse
273// transform, and what the scale factor should be in either case. You
274// can make these routines compute any of the various possible
275// definitions, however, by choosing the appropriate values for isign and
276// scale.
277//
278// The relevant fact from FFT theory is this: If you call <src>scfft</src>
279// with any particular values of isign and scale, the mathematical inverse
280// function is computed by calling <src>csfft</src> with -isign and
281// 1/(n*scale). In particular, if you use isign = +1 and scale = 1.0 in
282// <src>scfft</src> for the forward FFT, you can compute the inverse FFT by
283// using <src>ccfft</src> with isign = -1 and scale = 1.0/n.
284//
285// <h3>Real-to-complex FFTs</h3>
286// Notice in the preceding formula that there are n real input values,
287// and n/2 + 1 complex output values. This property is characteristic of
288// real-to-complex FFTs.
289//
290// The mathematical definition of the Fourier transform takes a sequence
291// of n complex values and transforms it to another sequence of n complex
292// values. A complex-to-complex FFT routine, such as <src>ccfft</src>, will
293// take n complex input values, and produce n complex output values. In
294// fact, one easy way to compute a real-to-complex FFT is to store the
295// input data in a complex array, then call routine <src>ccfft</src> to
296// compute the FFT. You get the same answer when using the <src>scfft</src>
297// routine.
298//
299// The reason for having a separate real-to-complex FFT routine is
300// efficiency. Because the input data is real, you can make use of this
301// fact to save almost half of the computational work. The theory of
302// Fourier transforms tells us that for real input data, you have to
303// compute only the first n/2 + 1 complex output values, because the
304// remaining values can be computed from the first half of the values by
305// the simple formula:
306//
307// <srcblock>
308// Y(k) = conjg(Y(n-k)) for n/2 <= k <= n-1
309// </srcblock>
310//
311// where the notation conjgY represents the complex conjugate of y.
312//
313// In fact, in many applications, the second half of the complex output
314// data is never explicitly computed or stored. Likewise, as explained
315// later, only the first half of the complex data has to be supplied for
316// the complex-to-real FFT.
317//
318// Another implication of FFT theory is that, for real input data, the
319// first output value, Y(0), will always be a real number; therefore, the
320// imaginary part will always be 0. If n is an even number, Y(n/2) will
321// also be real and thus, have zero imaginary parts.
322//
323// <h3>Complex-to-real FFTs</h3>
324// Consider the complex-to-real case. The effect of the computation is
325// given by the preceding formula, but with X complex and Y real.
326//
327// Generally, the FFT transforms a complex sequence into a complex
328// sequence. However, in a certain application we may know the output
329// sequence is real. Often, this is the case because the complex input
330// sequence was the transform of a real sequence. In this case, you can
331// save about half of the computational work.
332//
333// According to the theory of Fourier transforms, for the output
334// sequence, Y, to be a real sequence, the following identity on the
335// input sequence, X, must be true:
336//
337// <srcblock>
338// X(k) = conjg(X(n-k)) for n/2 <= k <= n-1
339// </srcblock>
340//
341// And, in fact, the input values X(k) for k > n/2 need not be supplied;
342// they can be inferred from the first half of the input.
343//
344// Thus, in the complex-to-real routine, <src>csfft</src>, the arrays can be
345// dimensioned as follows:
346//
347// <srcblock>
348// COMPLEX X(0:n/2)
349// REAL Y(0:n-1)
350// </srcblock>
351//
352// There are n/2 + 1 complex input values and n real output values. Even
353// though only n/2 + 1 input values are supplied, the size of the
354// transform is still n in this case, because implicitly you are using
355// the FFT formula for a sequence of length n.
356//
357// Another implication of the theory is that X(0) must be a real number
358// (that is, it must have zero imaginary part). Also, if n is even,
359// X(n/2) must also be real. Routine <src>CSFFT</src> assumes that these
360// values are real; if you specify a nonzero imaginary part, it is ignored.
361//
362// <h3>Table Initialization</h3>
363// The table array stores the trigonometric tables used in calculation of
364// the FFT. This table must be initialized by calling the routine with
365// isign = 0 prior to doing the transforms. The table does not have to
366// be reinitialized if the value of the problem size, n, does not change.
367// Because <src>scfft</src> and <src>csfft</src> use the same format for
368// table, either can be used to initialize it (note that CCFFT uses a
369// different table format).
370//
371// <h3>Dimensions</h3>
372// In the preceding description, it is assumed that array subscripts were
373// zero-based, as is customary in FFT applications. Thus, the input and
374// output arrays are declared (assuming n > 0):
375//
376// <srcblock>
377// REAL X(0:n-1)
378// COMPLEX Y(0:n/2)
379// </srcblock>
380//
381// No change is needed in the calling sequence; however, if you prefer
382// you can use the more customary Fortran style with subscripts starting
383// at 1, as in the following:
384//
385// <srcblock>
386// REAL X(n)
387// COMPLEX Y(n/2 + 1)
388// </srcblock>
389//
390// <example>
391// These examples use the table and workspace sizes appropriate to Origin
392// series.
393//
394// Example 1: Initialize the complex array TABLE in preparation for
395// doing an FFT of size 1024. In this case only the arguments isign, n,
396// and table are used. You can use dummy arguments or zeros for the other
397// arguments in the subroutine call.
398//
399// <srcblock>
400// REAL TABLE(15 + 1024)
401// CALL SCFFT(0, 1024, 0.0, DUMMY, DUMMY, TABLE, DUMMY, 0)
402// </srcblock>
403//
404// Example 2: X is a real array of dimension (0:1023), and Y is a
405// complex array of dimension (0:512). Take the FFT of X and store the
406// results in Y. Before taking the FFT, initialize the TABLE array, as
407// in example 1.
408//
409// <srcblock>
410// REAL X(0:1023)
411// COMPLEX Y(0:512)
412// REAL TABLE(15 + 1024)
413// REAL WORK(1024)
414// CALL SCFFT(0, 1024, 1.0, X, Y, TABLE, WORK, 0)
415// CALL SCFFT(1, 1024, 1.0, X, Y, TABLE, WORK, 0)
416// </srcblock>
417//
418// Example 3: With X and Y as in example 2, take the inverse FFT of Y
419// and store it back in X. The scale factor 1/1024 is used. Assume that
420// the TABLE array is initialized already.
421//
422// <srcblock>
423// CALL CSFFT(-1, 1024, 1.0/1024.0, Y, X, TABLE, WORK, 0)
424// </srcblock>
425//
426// Example 4: Perform the same computation as in example 2, but assume
427// that the lower bound of each array is 1, rather than 0. The
428// subroutine calls are not changed.
429//
430// <srcblock>
431// REAL X(1024)
432// COMPLEX Y(513)
433// CALL SCFFT(0, 1024, 1.0, X, Y, TABLE, WORK, 0)
434// CALL SCFFT(1, 1024, 1.0, X, Y, TABLE, WORK, 0)
435// </srcblock>
436//
437// Example 5: Perform the same computation as in example 4, but
438// equivalence the input and output arrays to save storage space. Assume
439// that the TABLE array is initialized already.
440//
441// <srcblock>
442// REAL X(1024)
443// COMPLEX Y(513)
444// EQUIVALENCE ( X(1), Y(1) )
445// CALL SCFFT(1, 1024, 1.0, X, Y, TABLE, WORK, 0)
446// </srcblock>
447// </example>
448//
449// Input parameters:
450// <dl compact>
451// <dt><b>isign</b>
452// <dd> Integer.
453// Specifies whether to initialize the table array or to do the
454// forward or inverse Fourier transform, as follows:
455//
456// If isign = 0, the routine initializes the table array and
457// returns. In this case, the only arguments used or checked
458// are isign, n, and table.
459//
460// If isign = +1 or -1, the value of isign is the sign of the
461// exponent used in the FFT formula.
462// <dt><b>n</b>
463// <dd> Integer.
464// Size of transform. If n <= 2, <src>scfft/dzfft</src>
465// returns without calculating the transform.
466// <dt><b>scale</b>
467// <dd> Scale factor.
468// <src>scfft</src>: real.
469// <src>dzfft</src>: double precision.
470// <src>csfft</src>: real.
471// <src>zdfft</src>: double precision.
472// Each element of the output array is multiplied by scale
473// after taking the Fourier transform, as defined by the previous
474// formula.
475// <dt><b>x</b>
476// <dd> Input array of values to be transformed.
477// <src>scfft</src>: real array of dimension (0:n-1).
478// <src>dzfft</src>: double precision array of dimension (0:n-1).
479// <src>csfft</src>: complex array of dimension (0:n/2).
480// <src>zdfft</src>: double complex array of dimension (0:n/2).
481// <dt><b>isys</b>
482// <dd> Integer array of dimension (0:isys(0)).
483// Use isys to specify certain processor-specific parameters or
484// options. The first element of the array specifies how many
485// more elements are in the array.
486//
487// If isys(0) = 0, the default values of such parameters are
488// used. In this case, you can specify the argument value as
489// the scalar integer constant 0. If isys(0) > 0, isys(0)
490// gives the upper bound of the isys array; that is, if
491// il = isys(0), user-specified parameters are expected in
492// isys(1) through isys(il).
493// </dl>
494// Output parameters:
495// <dl compact>
496// <dt><b>y</b>
497// <dd> Output array of transformed values.
498// <src>scfft</src>: complex array of dimension (0:n/2).
499// <src>dzfft</src>: double complex array of dimension (0:n/2).
500// <src>csfft</src>: real array of dimension (0:n-1).
501// <src>zdfft</src>: double precision array of dimension (0:n-1).
502//
503// The output array, y, is the FFT of the the input array, x,
504// computed according to the preceding formula. The output
505// array may be equivalenced to the input array in the calling
506// program. Be careful when dimensioning the arrays, in this
507// case, to allow for the fact that the complex array contains
508// two (real) words more than the real array.
509// <dt><b>table</b>
510// <dd> Real array; dimension n+15.
511//
512// Table of factors and trigonometric functions.
513//
514// If isign = 0, the table array is initialized to contain
515// trigonometric tables needed to compute an FFT of size n.
516//
517// If isign = +1 or -1, the values in table are assumed to be
518// initialized already by a prior call with isign = 0.
519// <dt><b>work</b>
520// <dd> Real array; dimension n.
521//
522// Work array used for intermediate calculations. Its address
523// space must be different from that of the input and output
524// arrays.
525// </dl>
526// <group>
527static void scfft(Int isign, Int n, Float scale, Float* x,
528 Complex* y, Float* table, Float* work, Int isys);
529static void scfft(Int isign, Int n, Double scale, Double* x,
530 DComplex* y, Double* table, Double* work, Int isys);
531static void dzfft(Int isign, Int n, Double scale, Double* x,
532 DComplex* y, Double* table, Double* work, Int isys);
533static void csfft(Int isign, Int n, Float scale, Complex* x,
534 Float* y, Float* table, Float* work, Int isys);
535static void csfft(Int isign, Int n, Double scale, DComplex* x,
536 Double* y, Double* table, Double* work, Int isys);
537static void zdfft(Int isign, Int n, Double scale, DComplex* x,
538 Double* y, Double* table, Double* work, Int isys);
539// </group>
540
541// <src>ccfftm/zzfftm</src> computes the FFT of each column of the
542// complex matrix x, and stores the results in the columns of complex
543// matrix y.
544//
545// Suppose the arrays are dimensioned as follows:
546//
547// <srcblock>
548// COMPLEX X(0:ldx-1, 0:lot-1)
549// COMPLEX Y(0:ldy-1, 0:lot-1)
550//
551// where ldx >= n, ldy >= n.
552// </srcblock>
553//
554// Then column L of the output array is the FFT of column L of the
555// input array, using the following formula for the FFT:
556//
557// <srcblock>
558// n-1
559// Y(k, L) = scale * Sum [ X(j)*w**(isign*j*k) ]
560// j=0
561// for k = 0, ..., n-1
562// L = 0, ..., lot-1
563// where:
564// w = exp(2*pi*i/n),
565// i = + sqrt(-1),
566// pi = 3.14159...,
567// isign = +1 or -1
568// lot = the number of columns to transform
569// </srcblock>
570//
571// Different authors use different conventions for which of the
572// transforms, isign = +1 or isign = -1, is the forward or inverse
573// transform, and what the scale factor should be in either case. You
574// can make this routine compute any of the various possible definitions,
575// however, by choosing the appropriate values for isign and scale.
576//
577// The relevant fact from FFT theory is this: If you take the FFT with
578// any particular values of isign and scale, the mathematical inverse
579// function is computed by taking the FFT with -isign and 1/(n * scale).
580// In particular, if you use isign = +1 and scale = 1.0 for the forward
581// FFT, you can compute the inverse FFT by using the following: isign =
582// -1 and scale = 1.0/n.
583//
584// This section contains information about the algorithm for these
585// routines, the initialization of the table array, the declaration of
586// dimensions for x and y arrays, some performance tips, and some
587// implementation-dependent details.
588//
589// <h3>Algorithm</h3>
590// These routines use decimation-in-frequency type FFT. It takes the FFT
591// of the columns and vectorizes the operations along the rows of the
592// matrix. Thus, the vector length in the calculations depends on the
593// row size, and the strides for vector loads and stores are the leading
594// dimensions, ldx and ldy.
595//
596// <h3>Initialization</h3>
597// The table array stores the trigonometric tables used in calculation of
598// the FFT. You must initialize the table array by calling the routine
599// with isign = 0 prior to doing the transforms. If the value of the
600// problem size, n, does not change, table does not have to be
601// reinitialized.
602//
603// <h3>Dimensions</h3>
604// In the preceding description, it is assumed that array subscripts were
605// zero-based, as is customary in FFT applications. Thus, the input and
606// output arrays are declared as follows:
607//
608// <srcblock>
609// COMPLEX X(0:ldx-1, 0:lot-1)
610// COMPLEX Y(0:ldy-1, 0:lot-1)
611// </srcblock>
612//
613// The calling sequence does not have to change, however, if you prefer
614// to use the more customary Fortran style with subscripts starting at 1.
615// The same values of ldx and ldy would be passed to the subroutine even
616// if the input and output arrays were dimensioned as follows:
617//
618// <srcblock>
619// COMPLEX X(ldx, lot)
620// COMPLEX Y(ldy, lot)
621// </srcblock>
622//
623// <example>
624// Example 1: Initialize the TABLE array in preparation for doing an FFT
625// of size 128. Only the isign, n, and table arguments are used in this
626// case. You can use dummy arguments or zeros for the other arguments in
627// the subroutine call.
628//
629// <srcblock>
630// REAL TABLE(30 + 256)
631// CALL CCFFTM(0, 128, 0, 0., DUMMY, 1, DUMMY, 1, TABLE, DUMMY, 0)
632// </srcblock>
633//
634// Example 2: X and Y are complex arrays of dimension (0:128) by (0:55).
635// The first 128 elements of each column contain data. For performance
636// reasons, the extra element forces the leading dimension to be an odd
637// number. Take the FFT of the first 50 columns of X and store the
638// results in the first 50 columns of Y. Before taking the FFT,
639// initialize the TABLE array, as in example 1.
640//
641// <srcblock>
642// COMPLEX X(0:128, 0:55)
643// COMPLEX Y(0:128, 0:55)
644// REAL TABLE(30 + 256)
645// REAL WORK(256)
646// ...
647// CALL CCFFTM(0, 128, 50, 1.0, X, 129, Y, 129, TABLE, WORK, 0)
648// CALL CCFFTM(1, 128, 50, 1.0, X, 129, Y, 129, TABLE, WORK, 0)
649// </srcblock>
650//
651// Example 3: With X and Y as in example 2, take the inverse FFT of Y
652// and store it back in X. The scale factor 1/128 is used. Assume that
653// the TABLE array is already initialized.
654//
655// <srcblock>
656// CALL CCFFTM(-1, 128, 50, 1./128., Y, 129, X, 129, TABLE,WORK,0)
657// </srcblock>
658//
659// Example 4: Perform the same computation as in example 2, but assume
660// that the lower bound of each array is 1, rather than 0. The
661// subroutine calls are not changed.
662//
663// <srcblock>
664// COMPLEX X(129, 55)
665// COMPLEX Y(129, 55)
666// ...
667// CALL CCFFTM(0, 128, 50, 1.0, X, 129, Y, 129, TABLE, WORK, 0)
668// CALL CCFFTM(1, 128, 50, 1.0, X, 129, Y, 129, TABLE, WORK, 0)
669// </srcblock>
670//
671// Example 5: Perform the same computation as in example 4, but put the
672// output back in array X to save storage space. Assume that the TABLE
673// array is already initialized.
674//
675// <srcblock>
676// COMPLEX X(129, 55)
677// ...
678// CALL CCFFTM(1, 128, 50, 1.0, X, 129, X, 129, TABLE, WORK, 0)
679// </srcblock>
680//
681// </example>
682//
683// Input parameters:
684// <dl compact>
685// <dt><b>isign</b>
686// <dd> Integer.
687// Specifies whether to initialize the table array or to do the
688// forward or inverse Fourier transform, as follows:
689//
690// If isign = 0, the routine initializes the table array and
691// returns. In this case, the only arguments used or checked
692// are isign, n, and table.
693//
694// If isign = +1 or -1, the value of isign is the sign of the
695// exponent used in the FFT formula.
696// <dt><b>n</b>
697// <dd> Integer.
698// Size of each transform (the number of elements in each
699// column of the input and output matrix to be transformed).
700// Performance depends on the value of n, as explained above.
701// n >= 0; if n = 0, the routine returns.
702// <dt><b>lot</b>
703// <dd> Integer.
704// The number of transforms to be computed (lot size). This is
705// the number of elements in each row of the input and output
706// matrix. lot >= 0. If lot = 0, the routine returns.
707// <dt><b>scale</b>
708// <dd> Scale factor.
709// <src>ccfftm</src>: real.
710// <src>zzfftm</src>: double precision.
711// Each element of the output array is multiplied by scale
712// factor after taking the Fourier transform, as defined
713// previously.
714// <dt><b>x</b>
715// <dd> Array of dimension (0:ldx-1, 0:n2-1).
716// <src>ccfftm</src>: real array.
717// <src>zzfftm</src>: double precision array.
718// Input array of values to be transformed.
719// <dt><b>ldx</b>
720// <dd> The number of rows in the x array, as it was declared in the
721// calling program (the leading dimension of X). ldx >= MAX(n, 1).
722// <dt><b>ldy</b>
723// <dd> Integer.
724// The number of rows in the y array, as it was declared in the
725// calling program (the leading dimension of y). ldy >= MAX(n,
726// 1).
727// <dt><b>isys</b>
728// <dd> Integer array of dimension (0:isys(0)).
729// The first element of the array specifies how many more
730// elements are in the array. Use isys to specify certain
731// processor-specific parameters or options.
732//
733// If isys(0) = 0, the default values of such parameters are
734// used. In this case, you can specify the argument value as
735// the scalar integer constant 0.
736//
737// If isys(0) > 0, isys(0) gives the upper bound of the isys
738// array. Therefore, if il = isys(0), user-specified
739// parameters are expected in isys(1) through isys(il).
740// </dl>
741// Output parameters:
742// <dl compact>
743// <dt><b>y</b>
744// <dd> Array of dimension (0:ldy-1, 0:lot-1).
745// <src>ccfftm</src>: complex array.
746// <src>zzfftm</src>: double complex array.
747// Output array of transformed values. Each column of the
748// output array, y, is the FFT of the corresponding column of
749// the input array, x, computed according to the preceding
750// formula.
751//
752// The output array may be the same as the input array. In that
753// case, the transform is done in place. The input array is
754// overwritten with the transformed values. In this case, it
755// is necessary that ldx = ldy.
756// <dt><b>table</b>
757// <dd> Real array; dimension (30 + 2n).
758// Table of factors and trigonometric functions.
759//
760// If isign = 0, the routine initializes table (table is output
761// only).
762//
763// If isign = +1 or -1, the values in table are assumed to be
764// initialized already by a prior call with isign = 0 (table is
765// input only).
766// <dt><b>work</b>
767// <dd> Real array; dimension 2n.
768// Work array. This is a scratch array used for intermediate
769// calculations. Its address space must be different from that
770// of the input and output arrays.
771// </dl>
772// <group>
773static void ccfftm(Int isign, Int n, Int lot, Float scale, Complex*
774 x, Int ldx, Complex* y, Int ldy, Float* table,
775 Float* work, Int isys);
776static void zzfftm(Int isign, Int n, Int lot, Double scale, DComplex*
777 x, Int ldx, DComplex* y, Int ldy, Double* table,
778 Double* work, Int isys);
779// </group>
780
781// <src>scfftm/dzfftm</src> computes the FFT of each column of the real matrix
782// X, and it stores the results in the corresponding column of the complex
783// matrix Y. <src>csfftm/zdfftm</src> computes the corresponding inverse
784// transforms.
785//
786// In FFT applications, it is customary to use zero-based subscripts; the
787// formulas are simpler that way. First, the function of <src>scfftm</src> is
788// described. Suppose that the arrays are dimensioned as follows:
789//
790// <srcblock>
791// REAL X(0:ldx-1, 0:lot-1)
792// COMPLEX Y(0:ldy-1, 0:lot-1)
793//
794// where ldx >= n, ldy >= n/2 + 1.
795// </srcblock>
796//
797// Then column L of the output array is the FFT of column L of the input
798// array, using the following formula for the FFT:
799//
800// <srcblock>
801// n-1
802// Y(k, L) = scale * Sum [ X(j, L)*w**(isign*j*k) ]
803// j=0
804//
805// for k = 0, ..., n/2
806// L = 0, ..., lot-1 where:
807// w = exp(2*pi*i/n),
808// i = + sqrt(-1)
809// pi = 3.14159...,
810// isign = +1 or -1,
811// lot = the number of columns to transform
812// </srcblock>
813//
814// Different authors use different conventions for which transform
815// (isign = +1 or isign = -1) is used in the real-to-complex case, and
816// what the scale factor should be. Some adopt the convention that isign
817// = 1 for the real-to-complex transform, and isign = -1 for the
818// complex-to-real inverse. Others use the opposite convention. You can
819// make these routines compute any of the various possible definitions,
820// however, by choosing the appropriate values for isign and scale.
821//
822// The relevant fact from FFT theory is this: If you use <src>scfftm</src> to
823// take the real-to-complex FFT, using any particular values of isign and
824// scale, the mathematical inverse function is computed by using
825// <src>csfftm</src> with -isign and 1/ (n*scale). In particular, if you call
826// <src>scfftm</src> with isign = +1 and scale = 1.0, you can use
827// <src>csfftm</src> to compute the inverse complex-to-real FFT by using isign
828// = -1 and scale = 1.0/n.
829//
830// <h3>Real-to-complex FFTs</h3>
831// Notice in the preceding formula that there are n real input values and
832// (n/2) + 1 complex output values for each column. This property is
833// characteristic of real-to-complex FFTs.
834//
835// The mathematical definition of the Fourier transform takes a sequence
836// of n complex values and transforms it to another sequence of n complex
837// values. A complex-to-complex FFT routine, such as <src>ccfftm</src>, will
838// take n complex input values and produce n complex output values. In fact,
839// one easy way to compute a real-to-complex FFT is to store the input
840// data x in a complex array, then call routine <src>ccfftm</src> to compute
841// the FFT. You get the same answer when using the <src>scfftm</src> routine.
842//
843// A separate real-to-complex FFT routine is more efficient than the
844// equivalent complex-to-complex routine. Because the input data is
845// real, you can make use of this fact to save almost half of the
846// computational work. According to the theory of Fourier transforms,
847// for real input data, you have to compute only the first n/2 + 1
848// complex output values in each column, because the second half of the
849// FFT values in each column can be computed from the first half of the
850// values by the simple formula:
851//
852// <srcblock>
853// Y = conjgY for n/2 <= k <= n-1
854// k,L n-k, L
855//
856// where the notation conjg(z) represents the complex conjugate of z.
857// </srcblock>
858//
859// In fact, in many applications, the second half of the complex output
860// data is never explicitly computed or stored. Likewise, you must
861// supply only the first half of the complex data in each column has to
862// be supplied for the complex-to-real FFT.
863//
864// Another implication of FFT theory is that for real input data, the
865// first output value in each column, Y(0, L), will always be a real
866// number; therefore, the imaginary part will always be 0. If n is an
867// even number, Y(n/2, L) will also be real and have 0 imaginary parts.
868//
869// <h3>Complex-to-real FFTs</h3>
870// Consider the complex-to-real case. The effect of the computation is
871// given by the preceding formula, but with X complex and Y real.
872//
873// In general, the FFT transforms a complex sequence into a complex
874// sequence; however, in a certain application you may know the output
875// sequence is real, perhaps because the complex input sequence was the
876// transform of a real sequence. In this case, you can save about half
877// of the computational work.
878//
879// According to the theory of Fourier transforms, for the output
880// sequence, Y, to be a real sequence, the following identity on the
881// input sequence, X, must be true:
882//
883// <srcblock>
884// X = conjgX for n/2 <= k <= n-1
885// k,L n-k,L
886// And, in fact, the following input values
887//
888// X for k > n/2
889// k,L
890// do not have to be supplied, because they can be inferred from the
891// first half of the input.
892// </srcblock>
893//
894// Thus, in the complex-to-real routine, CSFFTM, the arrays can be
895// dimensioned as follows:
896//
897// <srcblock>
898// COMPLEX X(0:ldx-1, 0:lot-1)
899// REAL Y(0:ldy-1, 0:lot-1)
900//
901// where ldx >= n/2 + 1, ldy >= n.
902// </srcblock>
903//
904// In each column, there are (n/2) + 1 complex input values and n real
905// output values. Even though only (n/2) + 1 input values are supplied,
906// the size of the transform is still n in this case, because implicitly
907// the FFT formula for a sequence of length n is used.
908//
909// Another implication of the theory is that X(0, L) must be a real
910// number (that is, must have zero imaginary part). If n is an even
911// number, X(n/2, L) must also be real. Routine CSFFTM assumes that each
912// of these values is real; if a nonzero imaginary part is given, it is
913// ignored.
914//
915// <h3>Table Initialization</h3>
916// The table array contains the trigonometric tables used in calculation
917// of the FFT. You must initialize this table by calling the routine
918// with isign = 0 prior to doing the transforms. table does not have to
919// be reinitialized if the value of the problem size, n, does not change.
920//
921// <h3>Dimensions</h3>
922// In the preceding description, it is assumed that array subscripts were
923// zero-based, as is customary in FFT applications. Thus, the input and
924// output arrays are declared (for SCFFTM):
925//
926// <srcblock>
927// REAL X(0:ldx-1, 0:lot-1)
928// COMPLEX Y(0:ldy-1, 0:lot-1)
929// </srcblock>
930//
931// No change is made in the calling sequence, however, if you prefer to
932// use the more customary Fortran style with subscripts starting at 1.
933// The same values of ldx and ldy would be passed to the subroutine even
934// if the input and output arrays were dimensioned as follows:
935//
936// <srcblock>
937// REAL X(ldx, lot)
938// COMPLEX Y(ldy, lot)
939// </srcblock>
940
941// </example>
942// Example 1: Initialize the complex array TABLE in preparation for
943// doing an FFT of size 128. In this case only the isign, n, and table
944// arguments are used; you may use dummy arguments or zeros for the other
945// arguments in the subroutine call.
946//
947// <srcblock>
948// REAL TABLE(15 + 128)
949// CALL SCFFTM(0, 128, 1, 0.0, DUMMY, 1, DUMMY, 1,
950// & TABLE, DUMMY, 0)
951// </srcblock>
952//
953// Example 2: X is a real array of dimension (0:128, 0:55), and Y is a
954// complex array of dimension (0:64, 0:55). The first 128 elements in
955// each column of X contain data; the extra element forces an odd leading
956// dimension. Take the FFT of the first 50 columns of X and store the
957// results in the first 50 columns of Y. Before taking the FFT,
958// initialize the TABLE array, as in example 1.
959//
960// <srcblock>
961// REAL X(0:128, 0:55)
962// COMPLEX Y(0:64, 0:55)
963// REAL TABLE(15 + 128)
964// REAL WORK((128)
965// ...
966// CALL SCFFTM(0, 128, 50, 1.0, X, 129, Y, 65, TABLE, WORK, 0)
967// CALL SCFFTM(1, 128, 50, 1.0, X, 129, Y, 65, TABLE, WORK, 0)
968// </srcblock>
969//
970// Example 3: With X and Y as in example 2, take the inverse FFT of Y
971// and store it back in X. The scale factor 1/128 is used. Assume that
972// the TABLE array is initialized already.
973//
974// <srcblock>
975// CALL CSFFTM(-1, 128, 50, 1.0/128.0, Y, 65, X, 129,
976// & TABLE, WORK, 0)
977// </srcblock>
978//
979// Example 4: Perform the same computation as in example 2, but assume
980// that the lower bound of each array is 1, rather than 0. No change is
981// made in the subroutine calls.
982//
983// <srcblock>
984// REAL X(129, 56)
985// COMPLEX Y(65, 56)
986// ...
987// CALL SCFFTM(0, 128, 50, 1.0, X, 129, Y, 65, TABLE, WORK, 0)
988// CALL SCFFTM(1, 128, 50, 1.0, X, 129, Y, 65, TABLE, WORK, 0)
989// </srcblock>
990//
991// Example 5: Perform the same computation as in example 4, but
992// equivalence the input and output arrays to save storage space. In
993// this case, a row must be added to X, because it is equivalenced to a
994// complex array. The leading dimension of X is two times an odd number;
995// therefore, memory bank conflicts are minimal. Assume that TABLE is
996// initialized already.
997//
998// <srcblock>
999// REAL X(130, 56)
1000// COMPLEX Y(65, 56)
1001// EQUIVALENCE ( X(1, 1), Y(1, 1) )
1002// ...
1003// CALL SCFFTM(1, 128, 50, 1.0, X, 130, Y, 65, TABLE, WORK, 0)
1004// </srcblock>
1005// </example>
1006//
1007// Input parameters:
1008// <dl compact>
1009// <dt><b>isign</b>
1010// <dd> Integer.
1011// Specifies whether to initialize the table array or to do the
1012// forward or inverse Fourier transform, as follows:
1013//
1014// If isign = 0, the routine initializes the table array and
1015// returns. In this case, the only arguments used or checked
1016// are isign, n, and table.
1017//
1018// If isign = +1 or -1, the value of isign is the sign of the
1019// exponent used in the FFT formula.
1020// <dt><b>n</b>
1021// <dd> Integer.
1022// Size of the transforms (the number of elements in each
1023// column of the input and output matrix to be transformed).
1024// If n is not positive, <src>scfftm</src> or <src>csfftm</src> returns
1025// without computing a transforms.
1026// <dt><b>lot</b>
1027// <dd> Integer.
1028// The number of transforms to be computed (or "lot size").
1029// This is the number of elements in each row of the input and
1030// output matrix. If lot is not positive, <src>csfftm</src> or
1031// <src>scfftm</src> returns without computing a transforms.
1032// <dt><b>scale</b>
1033// <dd> Scale factor.
1034// <src>scfftm</src>: real.
1035// <src>dzfftm</src>: double precision.
1036// <src>csfftm</src>: real.
1037// <src>zdfftm</src>: double precision.
1038// Each element of the output array is multiplied by scale
1039// after taking the transform, as defined in the preceding
1040// formula.
1041// <dt><b>x</b>
1042// <dd> Input array of values to be transformed. Dimension (0:ldx-1,
1043// 0:lot-1).
1044// <src>scfftm</src>: real array.
1045// <src>dzfftm</src>: double precision array.
1046// <src>csfftm</src>: complex array.
1047// <src>zdfftm</src>: double complex array.
1048// <dt><b>ldx</b>
1049// <dd> Integer.
1050// The number of rows in the x array, as it was declared in the
1051// calling program. That is, the leading dimension of x.
1052// <src>scfftm, dzfftm</src>: ldx >= MAX(n, 1).
1053// <src>csfftm, zdfftm</src>: ldx >= MAX(n/2 + 1, 1).
1054// <dt><b>ldy</b>
1055// <dd> Integer.
1056// The number of rows in the y array, as it was declared in the
1057// calling program (the leading dimension of y).
1058// <src>scfftm, dzfftm</src>: ldy >= MAX(n/2 + 1, 1).
1059// <src>csfftm, zdfftm</src>: ldy >= MAX(n, 1).
1060// <dt><b>isys</b>
1061// <dd> Integer array of dimension (0:isys(0)).
1062// The first element of the array specifies how many more
1063// elements are in the array. Use isys to specify certain
1064// processor-specific parameters or options.
1065//
1066// If isys(0) = 0, the default values of such parameters are
1067// used. In this case, you can specify the argument value as
1068// the scalar integer constant 0.
1069//
1070// If isys(0) > 0, isys(0) gives the upper bound of the isys
1071// array. Therefore, if il = isys(0), user-specified
1072// parameters are expected in isys(1) through isys(il).
1073// </dl>
1074// Output parameters:
1075// <dl compact>
1076// <dt><b>y</b>
1077// <dd> Output array of transformed values. Dimension (0:ldy-1,
1078// 0:lot-1).
1079// <src>scfftm</src>: complex array.
1080// <src>dzfftm</src>: double complex array.
1081// <src>csfftm</src>: real array.
1082// <src>zdfftm</src>: double precision array.
1083//
1084// Each column of the output array, y, is the FFT of the
1085// corresponding column of the input array, x, computed
1086// according to the preceding formula. The output array may be
1087// equivalenced to the input array. In that case, the transform
1088// is done in place and the input array is overwritten with the
1089// transformed values. In this case, the following conditions
1090// on the leading dimensions must hold:
1091//
1092// <src>scfftm, dzfftm</src>: ldx = 2ldy.
1093// <src>csfftm, zdfftm</src>: ldy = 2ldx.
1094// <dt><b>table</b>
1095// <dd> Real array; dimension (15 + n).
1096// Table of factors and trigonometric functions.
1097// This array must be initialized by a call to <src>scfftm</src> (or
1098// <src>csfftm</src>) with isign = 0.
1099//
1100// If isign = 0, table is initialized to contain trigonometric
1101// tables needed to compute an FFT of length n.
1102// <dt><b>work</b>
1103// <dd> Real array; dimension n.
1104// Work array used for intermediate calculations. Its address
1105// space must be different from that of the input and output
1106// arrays.
1107// </dl>
1108// <group>
1109static void scfftm(Int isign, Int n, Int lot, Float scale, Float*
1110 x, Int ldx, Complex* y, Int ldy, Float* table,
1111 Float* work, Int isys);
1112static void dzfftm(Int isign, Int n, Int lot, Double scale, Double*
1113 x, Int ldx, DComplex* y, Int ldy, Double* table,
1114 Double* work, Int isys);
1115static void csfftm(Int isign, Int n, Int lot, Float scale, Complex*
1116 x, Int ldx, Float* y, Int ldy, Float* table,
1117 Float* work, Int isys);
1118static void zdfftm(Int isign, Int n, Int lot, Double scale, DComplex*
1119 x, Int ldx, Double* y, Int ldy, Double* table,
1120 Double* work, Int isys);
1121// </group>
1122
1123// These routines compute the two-dimensional complex Fast Fourier
1124// Transform (FFT) of the complex matrix x, and store the results in the
1125// complex matrix y. <src>ccfft2d</src> does the complex-to-complex
1126// transform and <src>zzfft</src> does the same for double
1127// precision arrays.
1128//
1129// In FFT applications, it is customary to use zero-based subscripts; the
1130// formulas are simpler that way. Suppose that the arrays are
1131// dimensioned as follows:
1132//
1133// <srcblock>
1134// COMPLEX X(0:n1-1, 0:n2-1)
1135// COMPLEX Y(0:n1-1, 0:n2-1)
1136// </srcblock>
1137//
1138// These routines compute the formula:
1139//
1140// <srcblock>
1141// n2-1 n1-1
1142// Y(k1, k2) = scale * Sum Sum [ X(j1, j2)*w1**(j1*k1)*w2**(j2*k2) ]
1143// j2=0 j1=0
1144//
1145// for k1 = 0, ..., n1-1
1146// k2 = 0, ..., n2-1
1147//
1148// where:
1149// w1 = exp(isign*2*pi*i/n1)
1150// w2 = exp(isign*2*pi*i/n2)
1151// i = + sqrt(-1)
1152// pi = 3.14159...,
1153// isign = +1 or -1
1154// </srcblock>
1155//
1156// Different authors use different conventions for which of the
1157// transforms, isign = +1 or isign = -1, is the forward or inverse
1158// transform, and what the scale factor should be in either case. You
1159// can make this routine compute any of the various possible definitions,
1160// however, by choosing the appropriate values for isign and scale.
1161//
1162// The relevant fact from FFT theory is this: If you take the FFT with
1163// any particular values of isign and scale, the mathematical inverse
1164// function is computed by taking the FFT with -isign and
1165// 1/(n1*n2*scale). In particular, if you use isign = +1 and scale = 1.0
1166// for the forward FFT, you can compute the inverse FFT by using isign =
1167// -1 and scale = 1.0/(n1*n2).
1168//
1169// <h3>Algorithm</h3>
1170// These routines use a routine very much like <src>ccfftm/zzfftm</src> to do
1171// multiple FFTs first on all columns in an input matrix and then on all
1172// of the rows.
1173//
1174// <h3>Initialization</h3>
1175// The table array stores factors of n1 and n2 and also trigonometric
1176// tables that are used in calculation of the FFT. This table must be
1177// initialized by calling the routine with isign = 0. If the values of
1178// the problem sizes, n1 and n2, do not change, the table does not have
1179// to be reinitialized.
1180//
1181// <h3>Dimensions</h3>
1182// In the preceding description, it is assumed that array subscripts were
1183// zero-based, as is customary in FFT applications. Thus, the input and
1184// output arrays are declared as follows:
1185//
1186// <srcblock>
1187// COMPLEX X(0:ldx-1, 0:n2-1)
1188// COMPLEX Y(0:ldy-1, 0:n2-1)
1189// </srcblock>
1190//
1191// However, the calling sequence does not change if you prefer to use the
1192// more customary Fortran style with subscripts starting at 1. The same
1193// values of ldx and ldy would be passed to the subroutine even if the
1194// input and output arrays were dimensioned as follows:
1195//
1196// <srcblock>
1197// COMPLEX X(ldx, n2)
1198// COMPLEX Y(ldy, n2)
1199// </srcblock>
1200//
1201// <example>
1202// All examples here are for Origin series only.
1203//
1204// Example 1: Initialize the TABLE array in preparation for doing a
1205// two-dimensional FFT of size 128 by 256. In this case only the isign,
1206// n1, n2, and table arguments are used; you can use dummy arguments or
1207// zeros for other arguments.
1208//
1209// <srcblock>
1210// REAL TABLE ((30 + 256) + (30 + 512))
1211// CALL CCFFT2D (0, 128, 256, 0.0, DUMMY, 1, DUMMY, 1,
1212// & TABLE, DUMMY, 0)
1213// </srcblock>
1214//
1215// Example 2: X and Y are complex arrays of dimension (0:128, 0:255).
1216// The first 128 elements of each column contain data. For performance
1217// reasons, the extra element forces the leading dimension to be an odd
1218// number. Take the two-dimensional FFT of X and store it in Y.
1219// Initialize the TABLE array, as in example 1.
1220//
1221// <srcblock>
1222// COMPLEX X(0:128, 0:255)
1223// COMPLEX Y(0:128, 0:255)
1224// REAL TABLE((30 + 256) + (30 + 512))
1225// REAL WORK(2*128*256)
1226// ...
1227// CALL CCFFT2D(0, 128, 256, 1.0, X, 129, Y, 129, TABLE, WORK, 0)
1228// CALL CCFFT2D(1, 128, 256, 1.0, X, 129, Y, 129, TABLE, WORK, 0)
1229// </srcblock>
1230//
1231// Example 3: With X and Y as in example 2, take the inverse FFT of Y
1232// and store it back in X. The scale factor 1/(128*256) is used. Assume
1233// that the TABLE array is already initialized.
1234//
1235// <srcblock>
1236// CALL CCFFT2D(-1, 128, 256, 1.0/(128.0*256.0), Y, 129,
1237// & X, 129, TABLE, WORK, 0)
1238// </srcblock>
1239//
1240// Example 4: Perform the same computation as in example 2, but assume
1241// that the lower bound of each array is 1, rather than 0. The
1242// subroutine calls are not changed.
1243//
1244// <srcblock>
1245// COMPLEX X(129, 256)
1246// COMPLEX Y(129, 256)
1247// ...
1248// CALL CCFFT2D(0, 128, 256, 1.0, X, 129, Y, 129, TABLE, WORK, 0)
1249// CALL CCFFT2D(1, 128, 256, 1.0, X, 129, Y, 129, TABLE, WORK, 0)
1250// </srcblock>
1251//
1252// Example 5: Perform the same computation as in example 4, but put the
1253// output back in array X to save storage space. Assume that the TABLE
1254// array is already initialized.
1255//
1256// <srcblock>
1257// COMPLEX X(129, 256)
1258// ...
1259// CALL CCFFT2D(1, 128, 256, 1.0, X, 129, X, 129, TABLE, WORK, 0)
1260// </srcblock>
1261// </example>
1262//
1263// Input parameters:
1264// <dl compact>
1265// <dt><b>isign</b>
1266// <dd> Integer.
1267// Specifies whether to initialize the table array or to do the
1268// forward or inverse transform as follows:
1269//
1270// If isign = 0, the routine initializes the table array and
1271// returns. In this case, the only arguments used or checked
1272// are isign, n1, n2, table.
1273//
1274// If isign = +1 or -1, the value of isign is the sign of the
1275// exponent used in the FFT formula.
1276// <dt><b>n1</b>
1277// <dd> Integer.
1278// Transform size in the first dimension. If n1 is not
1279// positive, the routine returns without performing a
1280// transform.
1281// <dt><b>n2</b>
1282// <dd> Integer.
1283// Transform size in the second dimension. If n2 is not
1284// positive, the routine returns without performing a
1285// transform.
1286// <dt><b>scale</b>
1287// <dd> Scale factor.
1288// ccfft2d: real.
1289// zzfft2d: double precision.
1290// Each element of the output array is multiplied by scale
1291// factor after taking the Fourier transform, as defined
1292// previously.
1293// <dt><b>x</b>
1294// <dd> Array of dimension (0:ldx-1, 0:n2-1).
1295// ccfft2d: complex array.
1296// zzfft2d: double complex array.
1297// Input array of values to be transformed.
1298// <dt><b>ldx</b>
1299// <dd> Integer.
1300// The number of rows in the x array, as it was declared in the
1301// calling program (the leading dimension of x). ldx >=
1302// MAX(n1, 1).
1303// <dt><b>ldy</b>
1304// <dd> Integer.
1305//
1306// The number of rows in the y array, as it was declared in the
1307// calling program (the leading dimension of y). ldy >=
1308// MAX(n1, 1).
1309// <dt><b>isys</b>
1310// <dd> Algorithm used; value dependent on hardware system. Currently, no
1311// special options are supported; therefore, you must always specify
1312// an isys argument as constant 0.
1313// </dl>
1314// Output parameters:
1315// <dl compact>
1316// <dt><b>y</b>
1317// <dd> Array of dimension (0:ldy-1, 0:n2-1).
1318// ccfft2d: complex array.
1319// zzfft2d: double complex array.
1320// Output array of transformed values. The output array may be
1321// the same as the input array, in which case, the transform is
1322// done in place (the input array is overwritten with the
1323// transformed values). In this case, it is necessary that
1324// ldx = ldy.
1325// <dt><b>table</b>
1326// <dd> Real array; dimension (30+ 2 * n1) + (30 + 2 * n2).
1327//
1328// Table of factors and trigonometric functions.
1329//
1330// If isign = 0, the routine initializes table (table is output
1331// only).
1332//
1333// If isign = +1 or -1, the values in table are assumed to be
1334// initialized already by a prior call with isign = 0 (table is
1335// input only).
1336// <dt><b>work</b>
1337// <dd> Real array; dimension 2 * (n1*n2).
1338//
1339// Work array. This is a scratch array used for intermediate
1340// calculations. Its address space must be different from that
1341// of the input and output arrays.
1342// </dl>
1343// <group>
1344static void ccfft2d(Int isign, Int n1, Int n2, Float scale, Complex*
1345 x, Int ldx, Complex* y, Int ldy, Float* table,
1346 Float* work, Int isys);
1347static void zzfft2d(Int isign, Int n1, Int n2, Double scale, DComplex*
1348 x, Int ldx, DComplex* y, Int ldy, Double* table,
1349 Double* work, Int isys);
1350// </group>
1351
1352// <src>scfft2d/dzfft2d</src> computes the two-dimensional Fast Fourier
1353// Transform (FFT) of the real matrix x, and it stores the results in the
1354// complex matrix y. <src>csfft2d/zdfft2d</src> computes the corresponding
1355// inverse transform.
1356//
1357// In FFT applications, it is customary to use zero-based subscripts; the
1358// formulas are simpler that way. First the function of <src>scfft2d</src> is
1359// described. Suppose the arrays are dimensioned as follows:
1360//
1361// <srcblock>
1362// REAL X(0:ldx-1, 0:n2-1)
1363// COMPLEX Y(0:ldy-1, 0:n2-1)
1364//
1365// where ldx >= n1 ldy >= (n1/2) + 1.
1366// </srcblock>
1367//
1368// <src>scfft2d</src> computes the formula:
1369//
1370// <srcblock>
1371// n2-1 n1-1
1372// Y(k1, k2) = scale * Sum Sum [ X(j1, j2)*w1**(j1*k1)*w2**(j2*k2) ]
1373// j2=0 j1=0
1374//
1375// for k1 = 0, ..., n1/2 + 1
1376// k2 = 0, ..., n2-1
1377//
1378// where:
1379// w1 = exp(isign*2*pi*i/n1)
1380// w2 = exp(isign*2*pi*i/n2)
1381// i = + sqrt(-1)
1382// pi = 3.14159...,
1383// isign = +1 or -1
1384// </srcblock>
1385//
1386// Different authors use different conventions for which of the
1387// transforms, isign = +1 or isign = -1, is the forward or inverse
1388// transform, and what the scale factor should be in either case. You
1389// can make these routines compute any of the various possible
1390// definitions, however, by choosing the appropriate values for isign and
1391// scale.
1392//
1393// The relevant fact from FFT theory is this: If you take the FFT with
1394// any particular values of isign and scale, the mathematical inverse
1395// function is computed by taking the FFT with -isign and 1/(n1 * n2 *
1396// scale). In particular, if you use isign = +1 and scale = 1.0 for the
1397// forward FFT, you can compute the inverse FFT by using isign = -1 and
1398// scale = 1.0/(n1 . n2).
1399//
1400// <src>scfft2d</src> is very similar in function to <src>ccfft2d</src>, but
1401// it takes the real-to-complex transform in the first dimension, followed by
1402// the complex-to-complex transform in the second dimension.
1403//
1404// <src>csfft2d</src> does the reverse. It takes the complex-to-complex FFT
1405// in the second dimension, followed by the complex-to-real FFT in the first
1406// dimension.
1407//
1408// See the <src>scfft</src> man page for more information about real-to-complex
1409// and complex-to-real FFTs. The two-dimensional analog of the conjugate
1410// formula is as follows:
1411//
1412// <srcblock>
1413// Y = conjg Y
1414// k , k n1 - k , n2 - k
1415// 1 2 1 2
1416//
1417// for n1/2 < k <= n1 - 1
1418// 1
1419//
1420// 0 <= k <= n2 - 1
1421// 2
1422// where the notation conjg(z) represents the complex conjugate of z.
1423// </srcblock>
1424//
1425// Thus, you have to compute only (slightly more than) half of the output
1426// values, namely:
1427//
1428// <srcblock>
1429// Y for 0 <= k <= n1/2 0 <= k <= n2 - 1
1430// k , k 1 2
1431// 1 2
1432// </srcblock>
1433//
1434// <h3>Algorithm</h3>
1435// <src>scfft2d</src> uses a routine similar to <src>scfftm</src> to do a
1436// real-to-complex FFT on the columns, then uses a routine similar to
1437// <src>ccfftm</src> to do a complex-to-complex FFT on the rows.
1438//
1439// <src>csfft2d</src> uses a routine similar to <src>ccfftm</src> to do a
1440// complex-to-complex FFT on the rows, then uses a routine similar to
1441// <src>csfftm</src> to do a complex-to-real FFT on the columns.
1442//
1443// <h3>Table Initialization</h3>
1444// The table array stores factors of n1 and n2, and trigonometric tables
1445// that are used in calculation of the FFT. table must be initialized by
1446// calling the routine with isign = 0. table does not have to be
1447// reinitialized if the values of the problem sizes, n1 and n2, do not
1448// change.
1449//
1450// <h3>Dimensions</h3>
1451// In the preceding description, it is assumed that array subscripts were
1452// zero-based, as is customary in FFT applications. Thus, the input and
1453// output arrays are declared:
1454//
1455// <srcblock>
1456// REAL X(0:ldx-1, 0:n2-1)
1457// COMPLEX Y(0:ldy-1, 0:n2-1)
1458// </srcblock>
1459//
1460// No change is made in the calling sequence, however, if you prefer to
1461// use the more customary Fortran style with subscripts starting at 1.
1462// The same values of ldx and ldy would be passed to the subroutine even
1463// if the input and output arrays were dimensioned as follows:
1464//
1465// <srcblock>
1466// REAL X(ldx, n2)
1467// COMPLEX Y(ldy, n2)
1468// </srcblock>
1469//
1470// <example>
1471// The following examples are for Origin series only.
1472//
1473// Example 1: Initialize the TABLE array in preparation for doing a
1474// two-dimensional FFT of size 128 by 256. In this case, only the isign,
1475// n1, n2, and table arguments are used; you can use dummy arguments or
1476// zeros for other arguments.
1477//
1478// <srcblock>
1479// REAL TABLE ((15 + 128) + 2(15 + 256))
1480// CALL SCFFT2D (0, 128, 256, 0.0, DUMMY, 1, DUMMY, 1,
1481// & TABLE, DUMMY, 0)
1482// </srcblock>
1483//
1484// Example 2: X is a real array of size (0:128, 0: 255), and Y is a
1485// complex array of dimension (0:64, 0:255). The first 128 elements of
1486// each column of X contain data; for performance reasons, the extra
1487// element forces the leading dimension to be an odd number. Take the
1488// two-dimensional FFT of X and store it in Y. Initialize the TABLE
1489// array, as in example 1.
1490//
1491// <srcblock>
1492// REAL X(0:128, 0:255)
1493// COMPLEX Y(0:64, 0:255)
1494// REAL TABLE ((15 + 128) + 2(15 + 256))
1495// REAL WORK(128*256)
1496// ...
1497// CALL SCFFT2D(0, 128, 256, 1.0, X, 129, Y, 65, TABLE, WORK, 0)
1498// CALL SCFFT2D(1, 128, 256, 1.0, X, 129, Y, 65, TABLE, WORK, 0)
1499// </srcblock>
1500//
1501// Example 3: With X and Y as in example 2, take the inverse FFT of Y
1502// and store it back in X. The scale factor 1/(128*256) is used. Assume
1503// that the TABLE array is initialized already.
1504//
1505// <srcblock>
1506// CALL CSFFT2D(-1, 128, 256, 1.0/(128.0*256.0), Y, 65,
1507// & X, 130, TABLE, WORK, 0)
1508// </srcblock>
1509//
1510// Example 4: Perform the same computation as in example 2, but assume
1511// that the lower bound of each array is 1, rather than 0. No change is
1512// needed in the subroutine calls.
1513//
1514// <srcblock>
1515// REAL X(129, 256)
1516// COMPLEX Y(65, 256)
1517// ...
1518// CALL SCFFT2D(0, 128, 256, 1.0, X, 129, Y, 65, TABLE, WORK, 0)
1519// CALL SCFFT2D(1, 128, 256, 1.0, X, 129, Y, 65, TABLE, WORK, 0)
1520// </srcblock>
1521//
1522// Example 5: Perform the same computation as in example 4, but
1523// equivalence the input and output arrays to save storage space. In
1524// this case, a row must be added to X, because it is equivalenced to a
1525// complex array. Assume that TABLE is already initialized.
1526//
1527// <srcblock>
1528// REAL X(130, 256)
1529// COMPLEX Y(65, 256)
1530// EQUIVALENCE ( X(1, 1), Y(1, 1) )
1531// ...
1532// CALL SCFFT2D(1, 128, 256, 1.0, X, 130, Y, 65, TABLE, WORK, 0)
1533// </srcblock>
1534// </example>
1535//
1536// Input parameters:
1537// <dl compact>
1538// <dt><b>isign</b>
1539// <dd> Integer.
1540// Specifies whether to initialize the table array or to do the
1541// forward or inverse Fourier transform, as follows:
1542//
1543// If isign = 0, the routine initializes the table array and
1544// returns. In this case, the only arguments used or checked
1545// are isign, n, and table.
1546//
1547// If isign = +1 or -1, the value of isign is the sign of the
1548// exponent used in the FFT formula.
1549// <dt><b>n1</b>
1550// <dd> Integer.
1551// Transform size in the first dimension. If n1 is not
1552// positive, <src>scfft2d</src> returns without calculating a
1553// transform.
1554// <dt><b>n2</b>
1555// <dd> Integer.
1556// Transform size in the second dimension. If n2 is not
1557// positive, <src>scfft2d</src> returns without calculating a
1558// transform.
1559// <dt><b>scale</b>
1560// <dd> Scale factor.
1561// <src>scfft2d</src>: real.
1562// <src>dzfft2d</src>: double precision.
1563// <src>csfft2d</src>: real.
1564// <src>zdfft2d</src>: double precision.
1565// Each element of the output array is multiplied by scale
1566// factor after taking the Fourier transform, as defined
1567// previously.
1568// <dt><b>x</b>
1569// <dd> Array of dimension (0:ldx-1, 0:n2-1).
1570// <src>scfft2d</src>: real array.
1571// <src>dzfft2d</src>: double precision array.
1572// <src>csfft2d</src>: complex array.
1573// <src>zdfft2d</src>: double complex array.
1574//
1575// Array of values to be transformed.
1576// <dt><b>ldx</b>
1577// <dd> Integer.
1578// The number of rows in the x array, as it was declared in the
1579// calling program. That is, the leading dimension of x.
1580// <src>scfft2d, dzfft2d</src>: ldx >= MAX(n1, 1).
1581// <src>csfft2d, zdfft2d</src>: ldx >= MAX(n1/2 + 1, 1).
1582// <dt><b>ldy</b>
1583// <dd> Integer.
1584//
1585// The number of rows in the y array, as it was declared in the
1586// calling program (the leading dimension of y).
1587//
1588// <src>scfft2d, dzfft2d</src>: ldy >= MAX(n1/2 + 1, 1).
1589// <src>csfft2d, zdfft2d</src>: ldy >= MAX(n1 + 2, 1).
1590//
1591// In the complex-to-real routine, two extra elements are in
1592// the first dimension (ldy >= n1 + 2, rather than just ldy >=
1593// n1). These elements are needed for intermediate storage
1594// during the computation. On exit, their value is undefined.
1595// <dt><b>isys</b>
1596// <dd> Integer array of dimension (0:isys(0)).
1597// The first element of the array specifies how many more
1598// elements are in the array. Use isys to specify certain
1599// processor-specific parameters or options.
1600//
1601// If isys(0) = 0, the default values of such parameters are
1602// used. In this case, you can specify the argument value as
1603// the scalar integer constant 0.
1604//
1605// If isys(0) > 0, isys(0) gives the upper bound of the isys
1606// array. Therefore, if il = isys(0), user-specified
1607// parameters are expected in isys(1) through isys(il).
1608// <dt><b>isys</b>
1609// <dd> Algorithm used; value dependent on hardware system. Currently, no
1610// special options are supported; therefore, you must always specify
1611// an isys argument as constant 0.
1612// </dl>
1613// Output parameters:
1614// <dl compact>
1615// <dt><b>y</b>
1616// <dd> <src>scfft2d</src>: complex array.
1617// <src>dzfft2d</src>: double complex array.
1618// <src>csfft2d</src>: real array.
1619// <src>zdfft2d</src>: double precision array.
1620//
1621// Output array of transformed values. The output array can be
1622// the same as the input array, in which case, the transform is
1623// done in place and the input array is overwritten with the
1624// transformed values. In this case, it is necessary that the
1625// following equalities hold:
1626//
1627// <src>scfft2d, dzfft2d</src>: ldx = 2 * ldy.
1628// <src>csfft2d, zdfft2d</src>: ldy = 2 * ldx.
1629// <dt><b>table</b>
1630// <dd> Real array; dimension (15 + n1) + 2(15 + n2).
1631//
1632// Table of factors and trigonometric functions.
1633//
1634// If isign = 0, the routine initializes table (table is output
1635// only).
1636//
1637// If isign = +1 or -1, the values in table are assumed to be
1638// initialized already by a prior call with isign = 0 (table is
1639// input only).
1640// <dt><b>work</b>
1641// <dd> Real array; dimension (n1 * n2).
1642//
1643// Work array. This is a scratch array used for intermediate
1644// calculations. Its address space must be different from that
1645// of the input and output arrays.
1646// </dl>
1647// <group>
1648static void scfft2d(Int isign, Int n1, Int n2, Float scale, Float*
1649 x, Int ldx, Complex* y, Int ldy, Float* table,
1650 Float* work, Int isys);
1651static void dzfft2d(Int isign, Int n1, Int n2, Double scale, Double*
1652 x, Int ldx, DComplex* y, Int ldy, Double* table,
1653 Double* work, Int isys);
1654static void csfft2d(Int isign, Int n1, Int n2, Float scale, Complex*
1655 x, Int ldx, Float* y, Int ldy, Float* table,
1656 Float* work, Int isys);
1657static void zdfft2d(Int isign, Int n1, Int n2, Double scale, DComplex*
1658 x, Int ldx, Double* y, Int ldy, Double* table,
1659 Double* work, Int isys);
1660// </group>
1661
1662// These routines compute the three-dimensional complex FFT of the
1663// complex matrix X, and store the results in the complex matrix Y.
1664//
1665// In FFT applications, it is customary to use zero-based subscripts; the
1666// formulas are simpler that way. So suppose the arrays are dimensioned
1667// as follows:
1668//
1669// <srcblock>
1670// COMPLEX X(0:n1-1, 0:n2-1, 0:n3-1)
1671// COMPLEX Y(0:n1-1, 0:n2-1, 0:n3-1)
1672// </srcblock>
1673//
1674// These routines compute the formula:
1675//
1676// <srcblock>
1677// Y(k1,k2,k3) =
1678// n1-1 n2-1 n3-1
1679// scale * Sum Sum Sum [X(j1,j2,j3)*w1**(j1*k1)*w2**(j2*k2)*w3**(j3*k3)]
1680// j1=0 j2=0 j3=0
1681//
1682// for k1 = 0, ..., n1 - 1,
1683// k2 = 0, ..., n2 - 1,
1684// k3 = 0, ..., n3 - 1,
1685//
1686// where:
1687// w1 = exp(isign*2*pi*i/n1),
1688// w2 = exp(isign*2*pi*i/n2),
1689// w3 = exp(isign*2*pi*i/n3),
1690// i = + sqrt(-1)
1691// pi = 3.14159...
1692// isign = +1 or -1
1693// </srcblock>
1694//
1695// Different authors use different conventions for which of the
1696// transforms, isign = +1 or isign = -1, is the forward or inverse
1697// transform, and what the scale factor should be in either case. You
1698// can make this routine compute any of the various possible definitions,
1699// however, by choosing the appropriate values for isign and scale.
1700//
1701// The relevant fact from FFT theory is this: If you take the FFT with
1702// any particular values of isign and scale, the mathematical inverse
1703// function is computed by taking the FFT with -isign and 1/(n1 * n2 * n3
1704// * scale). In particular, if you use isign = +1 and scale = 1.0 for
1705// the forward FFT, you can compute the inverse FFT by using isign = -1
1706// and scale = 1/(n1 . n2 . n3).
1707//
1708// <example>
1709// The following examples are for Origin series only.
1710//
1711// Example 1: Initialize the TABLE array in preparation for doing a
1712// three-dimensional FFT of size 128 by 128 by 128. In this case, only
1713// the isign, n1, n2, n3, and table arguments are used; you can use dummy
1714// arguments or zeros for other arguments.
1715//
1716// <srcblock>
1717// REAL TABLE ((30 + 256) + (30 + 256) + (30 + 256))
1718// CALL CCFFT3D (0, 128, 128, 128, 0.0, DUMMY, 1, 1, DUMMY, 1, 1,
1719// & TABLE, DUMMY, 0)
1720// </srcblock>
1721//
1722// Example 2: X and Y are complex arrays of dimension (0:128, 0:128,
1723// 0:128). The first 128 elements of each dimension contain data; for
1724// performance reasons, the extra element forces the leading dimensions
1725// to be odd numbers. Take the three-dimensional FFT of X and store it
1726// in Y. Initialize the TABLE array, as in example 1.
1727//
1728// <srcblock>
1729// COMPLEX X(0:128, 0:128, 0:128)
1730// COMPLEX Y(0:128, 0:128, 0:128)
1731// REAL TABLE ((30+256) + (30 + 256) + (30 + 256))
1732// REAL WORK 2(128*128*128)
1733// ...
1734// CALL CCFFT3D(0, 128, 128, 128, 1.0, DUMMY, 1, 1,
1735// & DUMMY, 1, 1, TABLE, WORK, 0)
1736// CALL CCFFT3D(1, 128, 128, 128, 1.0, X, 129, 129,
1737// & Y, 129, 129, TABLE, WORK, 0)
1738// </srcblock>
1739//
1740// Example 3: With X and Y as in example 2, take the inverse FFT of Y
1741// and store it back in X. The scale factor 1.0/(128.0**3) is used.
1742// Assume that the TABLE array is already initialized.
1743//
1744// <srcblock>
1745// CALL CCFFT3D(-1, 128, 128, 128, 1.0/(128.0**3), Y, 129, 129,
1746// & X, 129, 129, TABLE, WORK, 0)
1747// </srcblock>
1748//
1749// Example 4: Perform the same computation as in example 2, but assume
1750// that the lower bound of each array is 1, rather than 0. The
1751// subroutine calls do not change.
1752//
1753// <srcblock>
1754// COMPLEX X(129, 129, 129)
1755// COMPLEX Y(129, 129, 129)
1756// ...
1757// CALL CCFFT3D(0, 128, 128, 128, 1.0, DUMMY, 1, 1,
1758// & DUMMY, 1, 1, TABLE, WORK, 0)
1759// CALL CCFFT3D(1, 128, 128, 128, 1.0, X, 129, 129,
1760// & Y, 129, 129, TABLE, WORK, 0)
1761// </srcblock>
1762//
1763// Example 5: Perform the same computation as in example 4, but put the
1764// output back in the array X to save storage space. Assume that the
1765// TABLE array is already initialized.
1766//
1767// <srcblock>
1768// COMPLEX X(129, 129, 129)
1769// ...
1770// CALL CCFFT3D(1, 128, 128, 128, 1.0, X, 129, 129,
1771// & X, 129, 129, TABLE, WORK, 0)
1772// </srcblock>
1773// </example>
1774//
1775// Input parameters:
1776// <dl compact>
1777// <dt><b>isign</b>
1778// <dd> Integer.
1779// Specifies whether to initialize the table array or to do the
1780// forward or inverse Fourier transform, as follows:
1781//
1782// If isign = 0, the routine initializes the table array and
1783// returns. In this case, the only arguments used or checked
1784// are isign, n1, n2, n3, and table.
1785//
1786// If isign = +1 or -1, the value of isign is the sign of the
1787// exponent used in the FFT formula.
1788//
1789// <dt><b>n1</b>
1790// <dd> Integer.
1791// Transform size in the first dimension. If n1 is not
1792// positive, the routine returns without computing a transform.
1793//
1794// <dt><b>n2</b>
1795// <dd> Integer.
1796// Transform size in the second dimension. If n2 is not
1797// positive, the routine returns without computing a transform.
1798//
1799// <dt><b>n3</b>
1800// <dd> Integer.
1801// Transform size in the third dimension. If n3 is not
1802// positive, the routine returns without computing a transform.
1803//
1804// <dt><b>scale</b>
1805// <dd> Scale factor.
1806// <src>ccfft3d</src>: real.
1807// <src>zzfft3d</src>: double precision.
1808//
1809// Each element of the output array is multiplied by scale
1810// after taking the Fourier transform, as defined previously.
1811//
1812// <dt><b>x</b>
1813// <dd> Array of dimension (0:ldx-1, 0:ldx2-1, 0:n3-1).
1814// <src>ccfft3d</src>: complex array.
1815// <src>zzfft3d</src>: double complex array.
1816//
1817// Input array of values to be transformed.
1818//
1819// <dt><b>ldx</b>
1820// <dd> Integer.
1821// The first dimension of x, as it was declared in the calling
1822// program (the leading dimension of x). ldx >= MAX(n1, 1).
1823//
1824// <dt><b>ldx2</b>
1825// <dd> Integer.
1826// The second dimension of x, as it was declared in the calling
1827// program. ldx2 >= MAX(n2, 1).
1828//
1829// <dt><b>ldy</b>
1830// <dd> Integer.
1831// The first dimension of y, as it was declared in the calling
1832// program (the leading dimension of y). ldy >= MAX(n1, 1).
1833//
1834// <dt><b>ldy2</b>
1835// <dd> Integer.
1836// The second dimension of y, as it was declared in the calling
1837// program. ldy2 >= MAX(n2, 1).
1838//
1839// <dt><b>isys</b>
1840// <dd> Algorithm used; value dependent on hardware system. Currently, no
1841// special options are supported; therefore, you must always specify
1842// an isys argument as constant 0.
1843//
1844// isys = 0 or 1 depending on the amount of workspace the user
1845// can provide to the routine.
1846// </dl>
1847// Output parameters:
1848// <dl compact>
1849// <dt><b>y</b>
1850// <dd> Array of dimension (0:ldy-1, 0:ldy2-1, 0:n3-1).
1851// <src>ccfft3d</src>: complex array.
1852// <src>zzfft3d</src>: double complex array.
1853//
1854// Output array of transformed values. The output array may be
1855// the same as the input array, in which case, the transform is
1856// done in place; that is, the input array is overwritten with
1857// the transformed values. In this case, it is necessary that
1858// ldx = ldy, and ldx2 = ldy2.
1859//
1860// <dt><b>table</b>
1861// <dd> Real array; dimension 30 + 2 * n1) + (30 + 2 * n2) + (30 + 2 * n3).
1862//
1863// Table of factors and trigonometric functions. If isign = 0,
1864// the routine initializes table (table is output only). If
1865// isign = +1 or -1, the values in table are assumed to be
1866// initialized already by a prior call with isign = 0 (table is
1867// input only).
1868//
1869// <dt><b>work</b>
1870// <dd> Real array; dimension (n1 * n2 * n3).
1871//
1872// Work array. This is a scratch array used for intermediate
1873// calculations. Its address space must be different from that
1874// of the input and output arrays.
1875//
1876// </dl>
1877// <group>
1878static void ccfft3d(Int isign, Int n1, Int n2, Int n3, Float scale,
1879 Complex* x, Int ldx, Int ldx2, Complex* y, Int ldy,
1880 Int ldy2, Float* table, Float* work, Int isys);
1881static void zzfft3d(Int isign, Int n1, Int n2, Int n3, Double scale,
1882 DComplex* x, Int ldx, Int ldx2, DComplex* y, Int
1883 ldy, Int ldy2, Double* table, Double* work, Int
1884 isys);
1885// </group>
1886
1887// These are C++ wrapper functions for the 3D real-to-complex and
1888// complex-to-real transform routines in the SGI/Cray Scientific Library
1889// (SCSL). The purpose of these definitions is to overload the functions so
1890// that C++ users can access the functions in SCSL with identical function
1891// names.
1892//
1893// <note role=warning>
1894// Currently, the SCSL is available only on SGI machines.
1895// </note>
1896//
1897// <src>scfft3d/dzfft3d</src> computes the three-dimensional Fast Fourier
1898// Transform (FFT) of the real matrix X, and it stores the results in the
1899// complex matrix Y. <src>csfft3d/zdfft3d</src> computes the corresponding
1900// inverse transform.
1901//
1902// In FFT applications, it is customary to use zero-based subscripts; the
1903// formulas are simpler that way. First, the function of <src>SCFFT3D</src> is
1904// described. Suppose the arrays are dimensioned as follows:
1905//
1906// <srcblock>
1907// REAL X(0:ldx-1, 0:ldx2-1, 0:n3-1)
1908// COMPLEX Y(0:ldy-1, 0:ldy2-1, 0:n3-1)
1909// </srcblock>
1910//
1911// <src>scfft3d</src> computes the formula:
1912//
1913// <srcblock>
1914// Y(k1,k2,k3) =
1915// n1-1 n2-1 n3-1
1916// scale * Sum Sum Sum [X(j1,j2,j3)*w1**(j1*k1)*w2**(j2*k2)*w3**(j3*k3)]
1917// j1=0 j2=0 j3=0
1918//
1919// for k1 = 0, ..., n1/2,
1920// k2 = 0, ..., n2 - 1,
1921// k3 = 0, ..., n3 - 1,
1922//
1923// where:
1924// w1 = exp(isign*2*pi*i/n1),
1925// w2 = exp(isign*2*pi*i/n2),
1926// w3 = exp(isign*2*pi*i/n3),
1927// i = + sqrt(-1)
1928// pi = 3.14159...
1929// isign = +1 or -1
1930// </srcblock>
1931//
1932// Different authors use different conventions for which of the
1933// transforms, isign = +1 or isign = -1, is the forward or inverse
1934// transform, and what the scale factor should be in either case. You
1935// can make these routines compute any of the various possible
1936// definitions, however, by choosing the appropriate values for isign and
1937// scale.
1938//
1939// The relevant fact from FFT theory is this: If you take the FFT with
1940// any particular values of isign and scale, the mathematical inverse
1941// function is computed by taking the FFT with -isign and 1/(n1 * n2 * n3
1942// * scale). In particular, if you use isign = +1 and scale = 1.0 for
1943// the forward FFT, you can compute the inverse FFT by isign = -1 and
1944//
1945// <srcblock>
1946// scale = 1.0/(n1*n2*n3).
1947// </srcblock>
1948//
1949// <src>scfft3d</src> is very similar in function to <src>ccfft3d</src>, but
1950// it takes the real-to-complex transform in the first dimension, followed by
1951// the complex-to-complex transform in the second and third dimensions.
1952//
1953// <src>csfft3d</src> does the reverse. It takes the complex-to-complex FFT
1954// in the third and second dimensions, followed by the complex-to-real FFT in
1955// the first dimension.
1956//
1957// See the <src>scfftm</src> man page for more information about
1958// real-to-complex and complex-to-real FFTs. The three dimensional analog of
1959// the conjugate formula is as follows:
1960//
1961// <srcblock>
1962// Y = conjg Y
1963// k ,k ,k n1 - k , n2 - k , n3 - k
1964// 1 2 3 1 2 3
1965//
1966// for n1/2 < k <= n1 - 1
1967// 1
1968//
1969// 0 <= k <= n2 - 1
1970// 2
1971//
1972// 0 <= k <= n3 - 1
1973// 3
1974// where the notation conjg(z) represents the complex conjugate of z.
1975// </srcblock>
1976//
1977// Thus, you have to compute only (slightly more than) half out the
1978// output values, namely:
1979//
1980// <srcblock>
1981// Y
1982// k ,k ,k
1983// 1 2 3
1984//
1985// for 0 <= k <= n1/2
1986// 1
1987//
1988// 0 <= k <= n2 - 1
1989// 2
1990//
1991// 0 <= k <= n3 - 1
1992// </srcblock>
1993//
1994// <h3>Algorithm</h3>
1995// <src>scfft3d</src> uses a routine similar to <src>scfftm</src> to do
1996// multiple FFTs first on all columns of the input matrix, then uses a routine
1997// similar to <src>ccfftm</src> on all rows of the result, and then on all
1998// planes of that result. See <src>scfftm</src> and <src>ccfftm</src> for
1999// more information about the algorithms used.
2000//
2001// </example>
2002// The following examples are for Origin series only.
2003//
2004// Example 1: Initialize the TABLE array in preparation for doing a
2005// three-dimensional FFT of size 128 by 128 by 128. In this case only
2006// the isign, n1, n2, n3, and table arguments are used; you can use dummy
2007// arguments or zeros for other arguments.
2008//
2009// <srcblock>
2010// REAL TABLE ((15 + 128) + 2(15+128) + 2( 15 + 128))
2011// CALL SCFFT3D (0, 128, 128, 128, 0.0, DUMMY, 1, 1, DUMMY, 1, 1,
2012// & TABLE, DUMMY, 0)
2013// </srcblock>
2014//
2015// Example 2: X is a real array of size (0:128, 0:128, 0:128). The
2016// first 128 elements of each dimension contain data; for performance
2017// reasons, the extra element forces the leading dimensions to be odd
2018// numbers. Y is a complex array of dimension (0:64, 0:128, 0:128).
2019// Take the three-dimensional FFT of X and store it in Y. Initialize the
2020// TABLE array, as in example 1.
2021//
2022// <srcblock>
2023// REAL X(0:128, 0:128, 0:128)
2024// COMPLEX Y(0:64, 0:128, 0:128)
2025// REAL TABLE ((15+128) + 2(15 + 128) + 2(15 + 128))
2026// REAL WORK(128*128*128)
2027// ...
2028// CALL SCFFT3D(0, 128, 128, 128, 1.0, X, 129, 129,
2029// & Y, 65, 129, TABLE, WORK, 0)
2030// CALL SCFFT3D(1, 128, 128, 128, 1.0, X, 129, 129,
2031// & Y, 65, 129, TABLE, WORK, 0)
2032// </srcblock>
2033//
2034// Example 3: With X and Y as in example 2, take the inverse FFT of Y
2035// and store it back in X. The scale factor 1/(128**3) is used. Assume
2036// that the TABLE array is initialized already.
2037//
2038// <srcblock>
2039// CALL CSFFT3D(-1, 128, 128, 128, 1.0/128.0**3, Y, 65, 129,
2040// & X, 130, 129, TABLE, WORK, 0)
2041// </srcblock>
2042//
2043// Example 4: Perform the same computation as in example 2, but assume
2044// that the lower bound of each array is 1, rather than 0. No change is
2045// made in the subroutine calls.
2046//
2047// <srcblock>
2048// REAL X(129, 129, 129)
2049// COMPLEX Y(65, 129, 129)
2050// REAL TABLE ((15+128) + 2(15 + 128) + 2(15 + 128))
2051// REAL WORK(128*128*128)
2052// ...
2053// CALL SCFFT3D(0, 128, 128, 128, 1.0, X, 129, 129,
2054// & Y, 65, 129, TABLE, WORK, 0)
2055// CALL SCFFT3D(1, 128, 128, 128, 1.0, X, 129, 129,
2056// & X, 129, 129, TABLE, WORK, 0)
2057// </srcblock>
2058//
2059// Example 5: Perform the same computation as in example 4, but
2060// equivalence the input and output arrays to save storage space. Assume
2061// that the TABLE array is initialized already.
2062//
2063// <srcblock>
2064// REAL X(130, 129, 129)
2065// COMPLEX Y(65, 129, 129)
2066// EQUIVALENCE (X(1, 1, 1), Y(1, 1, 1))
2067// ...
2068// CALL SCFFT3D(1, 128, 128, 128, 1.0, X, 130, 129,
2069// & Y, 65, 129, TABLE, WORK, 0)
2070// </srcblock>
2071// </example>
2072//
2073// Input parameters:
2074// <dl compact>
2075// <dt><b>isign</b>
2076// <dd> Integer.
2077// Specifies whether to initialize the table array or to do the
2078// forward or inverse Fourier transform, as follows:
2079//
2080// If isign = 0, the routine initializes the table array and
2081// returns. In this case, the only arguments used or checked
2082// are isign, n1, n2, n3, and table.
2083//
2084// If isign = +1 or -1, the value of isign is the sign of the
2085// exponent used in the FFT formula.
2086//
2087// <dt><b>n1</b>
2088// <dd> Integer.
2089// Transform size in the first dimension. If n1 is not
2090// positive, <src>scfft3d</src> returns without computing a transform.
2091//
2092// <dt><b>n2</b>
2093// <dd> Integer.
2094// Transform size in the second dimension. If n2 is not
2095// positive, <src>scfft3d</src> returns without computing a transform.
2096//
2097// <dt><b>n3</b>
2098// <dd> Integer.
2099// Transform size in the third dimension. If n3 is not
2100// positive, <src>scfft3d</src> returns without computing a transform.
2101//
2102// <dt><b>scale</b>
2103// <dd> Scale factor.
2104// <src>scfft3d</src>: real.
2105// <src>dzfft3d</src>: double precision.
2106// <src>csfft3d</src>: real.
2107// <src>zdfft3d</src>: double precision.
2108// Each element of the output array is multiplied by scale
2109// after taking the Fourier transform, as defined previously.
2110//
2111// <dt><b>x</b>
2112// <dd> Array of dimension (0:ldx-1, 0:ldx2-1, 0:n3-1).
2113// <src>scfft3d</src>: real array.
2114// <src>dzfft3d</src>: double precision array.
2115// <src>csfft3d</src>: complex array.
2116// <src>zdfft3d</src>: double complex array.
2117//
2118// Array of values to be transformed.
2119//
2120// <dt><b>ldx</b>
2121// <dd> Integer.
2122// The first dimension of x, as it was declared in the calling
2123// program (the leading dimension of x).
2124//
2125// <src>scfft3d, dzfft3d</src>: ldx >= MAX(n1, 1).
2126// <src>csfft3d, zdfft3d</src>: ldx >= MAX(n1/2 + 1, 1).
2127//
2128// <dt><b>ldx2</b>
2129// <dd> Integer.
2130// The second dimension of x, as it was declared in the calling
2131// program. ldx2 >= MAX(n2, 1).
2132//
2133// <dt><b>ldy</b>
2134// <dd> Integer.
2135// The first dimension of y, as it was declared in the calling
2136// program; that is, the leading dimension of y.
2137//
2138// <src>scfft3d, dzfft3d</src>: ldy >= MAX(n1/2 + 1, 1).
2139// <src>csfft3d, zdfft3d</src>: ldy >= MAX(n1 + 2, 1).
2140//
2141// In the complex-to-real routine, two extra elements are in
2142// the first dimension (that is, ldy >= n1 + 2, rather than
2143// just ldy >= n1). These elements are needed for intermediate
2144// storage during the computation. On exit, their value is
2145// undefined.
2146//
2147// <dt><b>ldy2</b>
2148// <dd> Integer.
2149// The second dimension of y, as it was declared in the calling
2150// program. ldy2 >= MAX(n2, 1).
2151//
2152// <dt><b>isys</b>
2153// <dd> Algorithm used; value dependent on hardware system. Currently, no
2154// special options are supported; therefore, you must always specify
2155// an isys argument as constant 0.
2156//
2157// isys = 0 or 1 depending on the amount of workspace the user
2158// can provide to the routine.
2159// </dl>
2160// Output parameters:
2161// <dl compact>
2162// <dt><b>y</b>
2163// <dd> Array of dimension (0:ldy-1, 0:ldy2-1, 0:n3-1).
2164// <src>scfft3d</src>: complex array.
2165// <src>dzfft3d</src>: double complex array.
2166// <src>csfft3d</src>: real array.
2167// <src>zdfft3d</src>: double precision array.
2168//
2169// Output array of transformed values. The output array can be
2170// the same as the input array, in which case, the transform is
2171// done in place; that is, the input array is overwritten with
2172// the transformed values. In this case, it is necessary that
2173// the following equalities hold:
2174//
2175// <src>scfft3d, dzfft3d</src>: ldx = 2 * ldy, and ldx2 = ldy2.
2176// <src>csfft3d, zdfft3d</src>: ldy = 2 * ldx, and ldx2 = ldy2.
2177//
2178// <dt><b>table</b>
2179// <dd> Real array; dimension (15 + n1) + 2(15 + n2) + 2(15 + n3).
2180//
2181// Table of factors and trigonometric functions.
2182//
2183// This array must be initialized by a call to <src>scfft3d</src> or
2184// <src>csfft3d</src> with isign = 0.
2185//
2186// If isign = 0, table is initialized to contain trigonometric
2187// tables needed to compute a three-dimensional FFT of size n1
2188// by n2 by n3. If isign = +1 or -1, the values in table are
2189// assumed to be initialized already by a prior call with isign
2190// = 0.
2191//
2192// <dt><b>work</b>
2193// <dd> Real array; dimension n1 * n2 * n3.
2194//
2195// Work array. This is a scratch array used for intermediate
2196// calculations. Its address space must be different from that
2197// of the input and output arrays.
2198//
2199// </dl>
2200// <group>
2201static void scfft3d(Int isign, Int n1, Int n2, Int n3, Float scale,
2202 Float* x, Int ldx, Int ldx2, Complex* y, Int ldy,
2203 Int ldy2, Float* table, Float* work, Int isys);
2204static void dzfft3d(Int isign, Int n1, Int n2, Int n3, Double scale,
2205 Double* x, Int ldx, Int ldx2, DComplex* y, Int
2206 ldy, Int ldy2, Double* table, Double* work, Int
2207 isys);
2208static void csfft3d(Int isign, Int n1, Int n2, Int n3, Float scale,
2209 Complex* x, Int ldx, Int ldx2, Float* y, Int ldy,
2210 Int ldy2, Float* table, Float* work, Int isys);
2211static void zdfft3d(Int isign, Int n1, Int n2, Int n3, Double scale,
2212 DComplex* x, Int ldx, Int ldx2, Double* y, Int
2213 ldy, Int ldy2, Double* table, Double* work, Int
2214 isys);
2215// </group>
2216};
2217
2218} //# NAMESPACE CASACORE - END
2219
2220#endif
static void dzfft(Int isign, Int n, Double scale, Double *x, DComplex *y, Double *table, Double *work, Int isys)
static void zzfftm(Int isign, Int n, Int lot, Double scale, DComplex *x, Int ldx, DComplex *y, Int ldy, Double *table, Double *work, Int isys)
static void dzfft3d(Int isign, Int n1, Int n2, Int n3, Double scale, Double *x, Int ldx, Int ldx2, DComplex *y, Int ldy, Int ldy2, Double *table, Double *work, Int isys)
static void scfft(Int isign, Int n, Double scale, Double *x, DComplex *y, Double *table, Double *work, Int isys)
static void zdfft(Int isign, Int n, Double scale, DComplex *x, Double *y, Double *table, Double *work, Int isys)
static void ccfft2d(Int isign, Int n1, Int n2, Float scale, Complex *x, Int ldx, Complex *y, Int ldy, Float *table, Float *work, Int isys)
These routines compute the two-dimensional complex Fast Fourier Transform (FFT) of the complex matrix...
static void zzfft3d(Int isign, Int n1, Int n2, Int n3, Double scale, DComplex *x, Int ldx, Int ldx2, DComplex *y, Int ldy, Int ldy2, Double *table, Double *work, Int isys)
static void zdfft3d(Int isign, Int n1, Int n2, Int n3, Double scale, DComplex *x, Int ldx, Int ldx2, Double *y, Int ldy, Int ldy2, Double *table, Double *work, Int isys)
static void scfft3d(Int isign, Int n1, Int n2, Int n3, Float scale, Float *x, Int ldx, Int ldx2, Complex *y, Int ldy, Int ldy2, Float *table, Float *work, Int isys)
These are C++ wrapper functions for the 3D real-to-complex and complex-to-real transform routines in ...
static void dzfft2d(Int isign, Int n1, Int n2, Double scale, Double *x, Int ldx, DComplex *y, Int ldy, Double *table, Double *work, Int isys)
static void csfftm(Int isign, Int n, Int lot, Float scale, Complex *x, Int ldx, Float *y, Int ldy, Float *table, Float *work, Int isys)
static void scfftm(Int isign, Int n, Int lot, Float scale, Float *x, Int ldx, Complex *y, Int ldy, Float *table, Float *work, Int isys)
scfftm/dzfftm computes the FFT of each column of the real matrix X, and it stores the results in the ...
static void ccfft(Int isign, Int n, Float scale, Complex *x, Complex *y, Float *table, Float *work, Int isys)
These routines compute the Fast Fourier Transform (FFT) of the complex vector x, and store the result...
static void ccfft3d(Int isign, Int n1, Int n2, Int n3, Float scale, Complex *x, Int ldx, Int ldx2, Complex *y, Int ldy, Int ldy2, Float *table, Float *work, Int isys)
These routines compute the three-dimensional complex FFT of the complex matrix X, and store the resul...
static void zdfftm(Int isign, Int n, Int lot, Double scale, DComplex *x, Int ldx, Double *y, Int ldy, Double *table, Double *work, Int isys)
static void zzfft(Int isign, Int n, Double scale, DComplex *x, DComplex *y, Double *table, Double *work, Int isys)
static void scfft2d(Int isign, Int n1, Int n2, Float scale, Float *x, Int ldx, Complex *y, Int ldy, Float *table, Float *work, Int isys)
scfft2d/dzfft2d computes the two-dimensional Fast Fourier Transform (FFT) of the real matrix x,...
static void scfft(Int isign, Int n, Float scale, Float *x, Complex *y, Float *table, Float *work, Int isys)
scfft/dzfft computes the FFT of the real array x, and it stores the results in the complex array y.
static void csfft2d(Int isign, Int n1, Int n2, Float scale, Complex *x, Int ldx, Float *y, Int ldy, Float *table, Float *work, Int isys)
static void zzfft2d(Int isign, Int n1, Int n2, Double scale, DComplex *x, Int ldx, DComplex *y, Int ldy, Double *table, Double *work, Int isys)
static void ccfftm(Int isign, Int n, Int lot, Float scale, Complex *x, Int ldx, Complex *y, Int ldy, Float *table, Float *work, Int isys)
ccfftm/zzfftm computes the FFT of each column of the complex matrix x, and stores the results in the ...
static void csfft3d(Int isign, Int n1, Int n2, Int n3, Float scale, Complex *x, Int ldx, Int ldx2, Float *y, Int ldy, Int ldy2, Float *table, Float *work, Int isys)
static void dzfftm(Int isign, Int n, Int lot, Double scale, Double *x, Int ldx, DComplex *y, Int ldy, Double *table, Double *work, Int isys)
static void ccfft(Int isign, Int n, Double scale, DComplex *x, DComplex *y, Double *table, Double *work, Int isys)
static void zdfft2d(Int isign, Int n1, Int n2, Double scale, DComplex *x, Int ldx, Double *y, Int ldy, Double *table, Double *work, Int isys)
static void csfft(Int isign, Int n, Double scale, DComplex *x, Double *y, Double *table, Double *work, Int isys)
static void csfft(Int isign, Int n, Float scale, Complex *x, Float *y, Float *table, Float *work, Int isys)
this file contains all the compiler specific defines
Definition mainpage.dox:28
float Float
Definition aipstype.h:52
int Int
Definition aipstype.h:48
double Double
Definition aipstype.h:53