NFFT  3.3.1
fpt.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00025 #include "config.h"
00026 
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <stdbool.h>
00030 #include <math.h>
00031 #ifdef HAVE_COMPLEX_H
00032 #include <complex.h>
00033 #endif
00034 
00035 #include "nfft3.h"
00036 #include "infft.h"
00037 
00038 /* Macros for index calculation. */
00039 
00041 #define K_START_TILDE(x,y) (MAX(MIN(x,y-2),0))
00042 
00044 #define K_END_TILDE(x,y) MIN(x,y-1)
00045 
00047 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
00048 
00050 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
00051 
00052 #define N_TILDE(y) (y-1)
00053 
00054 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00055 
00056 #define FPT_BREAK_EVEN 4
00057 
00061 typedef struct fpt_step_
00062 {
00063   bool stable;                            
00066   int Ns;                                 
00067   int ts;                                 
00068   double **a11,**a12,**a21,**a22;         
00069   double *g;                              
00070 } fpt_step;
00071 
00075 typedef struct fpt_data_
00076 {
00077   fpt_step **steps;                       
00078   int k_start;                            
00079   double *alphaN;                         
00080   double *betaN;                          
00081   double *gammaN;                         
00082   double alpha_0;                         
00083   double beta_0;                          
00084   double gamma_m1;                        
00085   /* Data for direct transform. */        
00086   double *_alpha;                          
00087   double *_beta;                           
00088   double *_gamma;                          
00089 } fpt_data;
00090 
00094 typedef struct fpt_set_s_
00095 {
00096   int flags;                              
00097   int M;                                  
00098   int N;                                  
00100   int t;                                  
00101   fpt_data *dpt;                          
00102   double **xcvecs;                        
00105   double *xc;                             
00106   double _Complex *temp;                          
00107   double _Complex *work;                          
00108   double _Complex *result;                        
00109   double _Complex *vec3;
00110   double _Complex *vec4;
00111   double _Complex *z;
00112   fftw_plan *plans_dct3;                  
00114   fftw_plan *plans_dct2;                  
00116   fftw_r2r_kind *kinds;                   
00118   fftw_r2r_kind *kindsr;                  
00121   int *lengths; 
00123   /* Data for slow transforms. */
00124   double *xc_slow;
00125 } fpt_set_s;
00126 
00127 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
00128   double* v, double _Complex* y, double* w, int n)
00129 {
00130   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00131   double *v_ptr = v, *w_ptr = w;
00132   for (l = 0; l < n; l++)
00133     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00134 }
00135 
00136 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00137 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00138   double* v, double _Complex* y, double* w, int n) \
00139 { \
00140   const int n2 = n>>1; \
00141   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00142   double *v_ptr = v, *w_ptr = w; \
00143   for (l = 0; l < n2; l++) \
00144     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00145   v_ptr--; w_ptr--; \
00146   for (l = 0; l < n2; l++) \
00147     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00148 }
00149 
00150 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00152 
00153 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00154 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00155   double* v, double _Complex* y, int n, double *xx) \
00156 { \
00157   const int n2 = n>>1; \
00158   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00159   double *v_ptr = v, *xx_ptr = xx; \
00160   for (l = 0; l < n2; l++, v_ptr++) \
00161     *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
00162   v_ptr--; \
00163   for (l = 0; l < n2; l++, v_ptr--) \
00164     *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
00165 }
00166 
00167 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00169 
00170 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00171 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00172   double _Complex* y, double* w, int n, double *xx) \
00173 { \
00174   const int n2 = n>>1; \
00175   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00176   double *w_ptr = w, *xx_ptr = xx; \
00177   for (l = 0; l < n2; l++, w_ptr++) \
00178     *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
00179   w_ptr--; \
00180   for (l = 0; l < n2; l++, w_ptr--) \
00181     *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
00182 }
00183 
00184 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00186 
00187 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
00188   double _Complex* y, double* w, int n)
00189 {
00190   int l;
00191   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00192   double *v_ptr = v, *w_ptr = w;
00193   for (l = n; l > 0; l--)
00194     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00195 }
00196 
00197 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
00198   double* v, double _Complex* y, double* w, int n)
00199 {
00200   const int n2 = n>>1; \
00201   int l;
00202   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00203   double *v_ptr = v, *w_ptr = w;
00204   for (l = n2; l > 0; l--)
00205     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00206   v_ptr--; w_ptr--;
00207   for (l = n2; l > 0; l--)
00208     *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
00209 }
00210 
00211 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
00212   double* v, double _Complex* y, double* w, int n, double *xx)
00213 {
00214   const int n2 = n>>1; \
00215   int l;
00216   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00217   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00218   for (l = n2; l > 0; l--, xx_ptr++)
00219     *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
00220   v_ptr--; w_ptr--;
00221   for (l = n2; l > 0; l--, xx_ptr++)
00222     *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
00223 }
00224 
00225 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
00226   double* v, double _Complex* y, double* w, int n, double *xx)
00227 {
00228   const int n2 = n>>1; \
00229   int l;
00230   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00231   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00232   for (l = n2; l > 0; l--, xx_ptr++)
00233     *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
00234   v_ptr--; w_ptr--;
00235   for (l = n2; l > 0; l--, xx_ptr++)
00236     *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
00237 }
00238 
00239 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00240 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, double *a12, \
00241   double *a21, double *a22, double g, int tau, fpt_set set) \
00242 { \
00243  \
00244   int length = 1<<(tau+1); \
00245  \
00246   double norm = 1.0/(length<<1); \
00247   \
00248   /* Compensate for factors introduced by a raw DCT-III. */ \
00249   a[0] *= 2.0; \
00250   b[0] *= 2.0; \
00251   \
00252   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00253   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00254   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00255   \
00256   /*for (k = 0; k < length; k++)*/ \
00257   /*{*/ \
00258     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
00259     /*  a11[k],a12[k],a21[k],a22[k]);*/ \
00260   /*}*/ \
00261   \
00262   /* Check, if gamma is zero. */ \
00263   if (g == 0.0) \
00264   { \
00265     /*fprintf(stderr,"gamma == 0!\n");*/ \
00266     /* Perform multiplication only for second row. */ \
00267     M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00268   } \
00269   else \
00270   { \
00271     /*fprintf(stderr,"gamma != 0!\n");*/ \
00272     /* Perform multiplication for both rows. */ \
00273     M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00274     M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
00275     memcpy(b,set->z,length*sizeof(double _Complex)); \
00276     /* Compute Chebyshev-coefficients using a DCT-II. */ \
00277     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00278     /* Compensate for factors introduced by a raw DCT-II. */ \
00279     a[0] *= 0.5; \
00280   } \
00281   \
00282   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00283   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00284   /* Compensate for factors introduced by a raw DCT-II. */ \
00285   b[0] *= 0.5; \
00286 }
00287 
00288 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00289 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00290 /*FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
00291 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)*/
00292 
00293 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
00294   double *a11, double *a12, double *a21, double *a22, double *x,
00295   double gam, int tau, fpt_set set)
00296 {
00298   int length = 1<<(tau+1);
00300   double norm = 1.0/(length<<1);
00301 
00302   UNUSED(a21); UNUSED(a22);
00303 
00304   /* Compensate for factors introduced by a raw DCT-III. */
00305   a[0] *= 2.0;
00306   b[0] *= 2.0;
00307 
00308   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00309   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00310   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00311 
00312   /*for (k = 0; k < length; k++)*/
00313   /*{*/
00314     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00315     /*  a11[k],a12[k],a21[k],a22[k]);*/
00316   /*}*/
00317 
00318   /* Check, if gamma is zero. */
00319   if (gam == 0.0)
00320   {
00321     /*fprintf(stderr,"gamma == 0!\n");*/
00322     /* Perform multiplication only for second row. */
00323     auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
00324   }
00325   else
00326   {
00327     /*fprintf(stderr,"gamma != 0!\n");*/
00328     /* Perform multiplication for both rows. */
00329     auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
00330     auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
00331     memcpy(b,set->z,length*sizeof(double _Complex));
00332     /* Compute Chebyshev-coefficients using a DCT-II. */
00333     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00334     /* Compensate for factors introduced by a raw DCT-II. */
00335     a[0] *= 0.5;
00336   }
00337 
00338   /* Compute Chebyshev-coefficients using a DCT-II. */
00339   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00340   /* Compensate for factors introduced by a raw DCT-II. */
00341   b[0] *= 0.5;
00342 }
00343 
00344 static inline void fpt_do_step_symmetric_l(double _Complex  *a, double _Complex *b,
00345   double *a11, double *a12, double *a21, double *a22, double *x, double gam, int tau, fpt_set set)
00346 {
00348   int length = 1<<(tau+1);
00350   double norm = 1.0/(length<<1);
00351 
00352   /* Compensate for factors introduced by a raw DCT-III. */
00353   a[0] *= 2.0;
00354   b[0] *= 2.0;
00355 
00356   UNUSED(a11); UNUSED(a12);
00357 
00358   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00359   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00360   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00361 
00362   /*for (k = 0; k < length; k++)*/
00363   /*{*/
00364     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00365     /*  a11[k],a12[k],a21[k],a22[k]);*/
00366   /*}*/
00367 
00368   /* Check, if gamma is zero. */
00369   if (gam == 0.0)
00370   {
00371     /*fprintf(stderr,"gamma == 0!\n");*/
00372     /* Perform multiplication only for second row. */
00373     auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
00374   }
00375   else
00376   {
00377     /*fprintf(stderr,"gamma != 0!\n");*/
00378     /* Perform multiplication for both rows. */
00379     auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
00380     auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
00381     memcpy(b,set->z,length*sizeof(double _Complex));
00382     /* Compute Chebyshev-coefficients using a DCT-II. */
00383     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00384     /* Compensate for factors introduced by a raw DCT-II. */
00385     a[0] *= 0.5;
00386   }
00387 
00388   /* Compute Chebyshev-coefficients using a DCT-II. */
00389   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00390   /* Compensate for factors introduced by a raw DCT-II. */
00391   b[0] *= 0.5;
00392 }
00393 
00394 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00395 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, \
00396   double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
00397 { \
00398  \
00399   int length = 1<<(tau+1); \
00400  \
00401   double norm = 1.0/(length<<1); \
00402   \
00403   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00404   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00405   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00406   \
00407   /* Perform matrix multiplication. */ \
00408   M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
00409   M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
00410   memcpy(a,set->z,length*sizeof(double _Complex)); \
00411   \
00412   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00413   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00414   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00415 }
00416 
00417 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
00418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00419 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)*/
00420 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)*/
00421 
00422 
00423 static inline void fpt_do_step_t_symmetric_u(double _Complex  *a,
00424   double _Complex *b, double *a11, double *a12, double *x,
00425   double gam, int tau, fpt_set set)
00426 {
00428   int length = 1<<(tau+1);
00430   double norm = 1.0/(length<<1);
00431 
00432   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00433   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00434   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00435 
00436   /* Perform matrix multiplication. */
00437   abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
00438   abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
00439   memcpy(a,set->z,length*sizeof(double _Complex));
00440 
00441   /* Compute Chebyshev-coefficients using a DCT-II. */
00442   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00443   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00444 }
00445 
00446 static inline void fpt_do_step_t_symmetric_l(double _Complex  *a,
00447   double _Complex *b, double *a21, double *a22,
00448   double *x, double gam, int tau, fpt_set set)
00449 {
00451   int length = 1<<(tau+1);
00453   double norm = 1.0/(length<<1);
00454 
00455   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00456   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00457   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00458 
00459   /* Perform matrix multiplication. */
00460   abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
00461   abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
00462   memcpy(a,set->z,length*sizeof(double _Complex));
00463 
00464   /* Compute Chebyshev-coefficients using a DCT-II. */
00465   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00466   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00467 }
00468 
00469 
00470 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00471   const double *beta, const double *gam)
00472 {
00473   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00474    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00475    */
00476   int i,j;
00477   double a,b,x_val_act,a_old;
00478   const double *x_act;
00479   double *y_act;
00480   const double *alpha_act, *beta_act, *gamma_act;
00481 
00482   /* Traverse all nodes. */
00483   x_act = x;
00484   y_act = y;
00485   for (i = 0; i < size; i++)
00486   {
00487     a = 1.0;
00488     b = 0.0;
00489     x_val_act = *x_act;
00490 
00491     if (k == 0)
00492     {
00493       *y_act = 1.0;
00494     }
00495     else
00496     {
00497       alpha_act = &(alpha[k]);
00498       beta_act = &(beta[k]);
00499       gamma_act = &(gam[k]);
00500       for (j = k; j > 1; j--)
00501       {
00502         a_old = a;
00503         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00504          b = a_old*(*gamma_act);
00505         alpha_act--;
00506         beta_act--;
00507         gamma_act--;
00508       }
00509       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00510     }
00511     x_act++;
00512     y_act++;
00513   }
00514 }
00515 
00516 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
00517   const double *beta, const double *gam)
00518 {
00519   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00520    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00521    */
00522   int i,j;
00523   double a,b,x_val_act,a_old;
00524   const double *x_act;
00525   double *y_act, *z_act;
00526   const double *alpha_act, *beta_act, *gamma_act;
00527 
00528   /* Traverse all nodes. */
00529   x_act = x;
00530   y_act = y;
00531   z_act = z;
00532   for (i = 0; i < size; i++)
00533   {
00534     a = 1.0;
00535     b = 0.0;
00536     x_val_act = *x_act;
00537 
00538     if (k == 0)
00539     {
00540       *y_act = 1.0;
00541       *z_act = 0.0;
00542     }
00543     else
00544     {
00545       alpha_act = &(alpha[k]);
00546       beta_act = &(beta[k]);
00547       gamma_act = &(gam[k]);
00548       for (j = k; j > 1; j--)
00549       {
00550         a_old = a;
00551         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00552         b = a_old*(*gamma_act);
00553         alpha_act--;
00554         beta_act--;
00555         gamma_act--;
00556       }
00557       if (i < size1)
00558         *z_act = a;
00559       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00560     }
00561 
00562     x_act++;
00563     y_act++;
00564     z_act++;
00565   }
00566   /*gamma_act++;
00567   FILE *f = fopen("/Users/keiner/Desktop/nfsft_debug.txt","a");
00568   fprintf(f,"size1: %10d, size: %10d\n",size1,size);
00569   fclose(f);*/
00570 }
00571 
00572 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
00573   const double *alpha, const double *beta, const double *gam, const
00574   double threshold)
00575 {
00576   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00577    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00578    */
00579   int i,j;
00580   double a,b,x_val_act,a_old;
00581   const double *x_act;
00582   double *y_act, *z_act;
00583   const double *alpha_act, *beta_act, *gamma_act;
00584   R max = -nfft_float_property(NFFT_R_MAX);
00585   const R t = LOG10(FABS(threshold));
00586 
00587   /* Traverse all nodes. */
00588   x_act = x;
00589   y_act = y;
00590   z_act = z;
00591   for (i = 0; i < size; i++)
00592   {
00593     a = 1.0;
00594     b = 0.0;
00595     x_val_act = *x_act;
00596 
00597     if (k == 0)
00598     {
00599      *y_act = 1.0;
00600      *z_act = 0.0;
00601     }
00602     else
00603     {
00604       alpha_act = &(alpha[k]);
00605       beta_act = &(beta[k]);
00606       gamma_act = &(gam[k]);
00607       for (j = k; j > 1; j--)
00608       {
00609         a_old = a;
00610         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00611          b = a_old*(*gamma_act);
00612         alpha_act--;
00613         beta_act--;
00614         gamma_act--;
00615       }
00616       *z_act = a;
00617       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00618       max = FMAX(max,LOG10(FABS(*y_act)));
00619       if (max > t)
00620         return 1;
00621     }
00622     x_act++;
00623     y_act++;
00624     z_act++;
00625   }
00626   return 0;
00627 }
00628 
00629 static inline void eval_sum_clenshaw_fast(const int N, const int M,
00630   const double _Complex *a, const double *x, double _Complex *y,
00631   const double *alpha, const double *beta, const double *gam,
00632   const double lambda)
00633 {
00634   int j,k;
00635   double _Complex tmp1, tmp2, tmp3;
00636   double xc;
00637   
00638   /*fprintf(stderr, "Executing eval_sum_clenshaw_fast.\n");  
00639   fprintf(stderr, "Before transform:\n");  
00640   for (j = 0; j < N; j++)
00641     fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);  
00642   for (j = 0; j <= M; j++)
00643     fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]);*/
00644 
00645   if (N == 0)
00646     for (j = 0; j <= M; j++)
00647       y[j] = a[0];
00648   else
00649   {
00650     for (j = 0; j <= M; j++)
00651     {
00652 #if 0
00653       xc = x[j];
00654       tmp2 = a[N];
00655       tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
00656       for (k = N-1; k > 0; k--)
00657       {
00658         tmp3 =   a[k-1]
00659                + (alpha[k-1] * xc + beta[k-1]) * tmp1
00660                + gam[k] * tmp2;
00661         tmp2 = tmp1;
00662         tmp1 = tmp3;
00663       }
00664       y[j] = lambda * tmp1;
00665 #else
00666       xc = x[j];
00667       tmp1 = a[N-1];
00668       tmp2 = a[N];
00669       for (k = N-1; k > 0; k--)
00670       {
00671         tmp3 = a[k-1] + tmp2 * gam[k];
00672         tmp2 *= (alpha[k] * xc + beta[k]);
00673         tmp2 += tmp1;
00674         tmp1 = tmp3;
00675         /*if (j == 1515) 
00676         {
00677           fprintf(stderr, "k = %d, tmp1 = %e, tmp2 = %e.\n", k, tmp1, tmp2);  
00678         }*/
00679       }
00680       tmp2 *= (alpha[0] * xc + beta[0]);
00681         //fprintf(stderr, "alpha[0] = %e, beta[0] = %e.\n", alpha[0], beta[0]);  
00682       y[j] = lambda * (tmp2 + tmp1);
00683         //fprintf(stderr, "lambda = %e.\n", lambda);  
00684 #endif
00685     }
00686   }
00687   /*fprintf(stderr, "Before transform:\n");  
00688   for (j = 0; j < N; j++)
00689     fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);  
00690   for (j = 0; j <= M; j++)
00691     fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]);  */
00692 }
00693 
00712 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
00713   double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam,
00714   double lambda)
00715 {
00716   int j,k;
00717   double _Complex* it1 = temp;
00718   double _Complex* it2 = y;
00719   double _Complex aux;
00720 
00721   /* Compute final result by multiplying with the constant lambda */
00722   a[0] = 0.0;
00723   for (j = 0; j <= M; j++)
00724   {
00725     it2[j] = lambda * y[j];
00726     a[0] += it2[j];
00727   }
00728 
00729   if (N > 0)
00730   {
00731     /* Compute final step. */
00732     a[1] = 0.0;
00733     for (j = 0; j <= M; j++)
00734     {
00735       it1[j] = it2[j];
00736       it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00737       a[1] += it2[j];
00738     }
00739 
00740     for (k = 2; k <= N; k++)
00741     {
00742       a[k] = 0.0;
00743       for (j = 0; j <= M; j++)
00744       {
00745         aux = it1[j];
00746         it1[j] = it2[j];
00747         it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
00748         a[k] += it2[j];
00749       }
00750     }
00751   }
00752 }
00753 
00754 
00755 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00756 {
00758   int plength;
00760   int tau;
00762   int m;
00763   int k;
00764 #ifdef _OPENMP
00765   int nthreads = X(get_num_threads)();
00766 #endif
00767 
00768   /* Allocate memory for new DPT set. */
00769   fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
00770 
00771   /* Save parameters in structure. */
00772   set->flags = flags;
00773 
00774   set->M = M;
00775   set->t = t;
00776   set->N = 1<<t;
00777 
00778   /* Allocate memory for M transforms. */
00779   set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
00780 
00781   /* Initialize with NULL pointer. */
00782   for (m = 0; m < set->M; m++)
00783     set->dpt[m].steps = 0;
00784 
00785   /* Create arrays with Chebyshev nodes. */
00786 
00787   /* Initialize array with Chebyshev coefficients for the polynomial x. This
00788    * would be trivially an array containing a 1 as second entry with all other
00789    * coefficients set to zero. In order to compensate for the multiplicative
00790    * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
00791 
00792   /* Allocate memory for array of pointers to node arrays. */
00793   set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
00794   /* For each polynomial length starting with 4, compute the Chebyshev nodes
00795    * using a DCT-III. */
00796   plength = 4;
00797   for (tau = 1; tau < t+1; tau++)
00798   {
00799     /* Allocate memory for current array. */
00800     set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
00801     for (k = 0; k < plength; k++)
00802     {
00803       set->xcvecs[tau-1][k] = cos(((k+0.5)*KPI)/plength);
00804     }
00805     plength = plength << 1;
00806   }
00807 
00809   set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00810   set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00811 
00812   set->lengths = (int*) nfft_malloc((set->t/*-1*/)*sizeof(int));
00813   set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00814   set->kindsr     = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00815   set->kindsr[0]  = FFTW_REDFT10;
00816   set->kindsr[1]  = FFTW_REDFT10;
00817   for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
00818   {
00819     set->lengths[tau] = plength;
00820 #ifdef _OPENMP
00821 #pragma omp critical (nfft_omp_critical_fftw_plan)
00822 {
00823     fftw_plan_with_nthreads(nthreads);
00824 #endif
00825     set->plans_dct2[tau] =
00826       fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00827                          2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00828                          0);
00829 #ifdef _OPENMP
00830 }
00831 #endif
00832   }
00833 
00834   /* Check if fast transform is activated. */
00835   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00836   {
00838     set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00839     set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00840     set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00841 
00843     set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00844     set->kinds      = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00845     set->kinds[0]   = FFTW_REDFT01;
00846     set->kinds[1]   = FFTW_REDFT01;
00847     for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
00848     {
00849       set->lengths[tau] = plength;
00850 #ifdef _OPENMP
00851 #pragma omp critical (nfft_omp_critical_fftw_plan)
00852 {
00853     fftw_plan_with_nthreads(nthreads);
00854 #endif
00855       set->plans_dct3[tau] =
00856         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00857                            2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00858                            0);
00859 #ifdef _OPENMP
00860 }
00861 #endif
00862     }
00863     nfft_free(set->lengths);
00864     nfft_free(set->kinds);
00865     nfft_free(set->kindsr);
00866     set->lengths = NULL;
00867     set->kinds = NULL;
00868     set->kindsr = NULL;
00869   }
00870 
00871   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
00872   {
00873     set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
00874     set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
00875   }
00876 
00877   /* Return the newly created DPT set. */
00878   return set;
00879 }
00880 
00881 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
00882   double *gam, int k_start, const double threshold)
00883 {
00884 
00885   int tau;          
00886   int l;            
00887   int plength;      
00889   int degree;       
00891   int firstl;       
00892   int lastl;        
00893   int plength_stab; 
00895   int degree_stab;  
00897   double *a11;      
00899   double *a12;      
00901   double *a21;      
00903   double *a22;      
00905   const double *calpha;
00906   const double *cbeta;
00907   const double *cgamma;
00908   int needstab = 0; 
00909   int k_start_tilde;
00910   int N_tilde;
00911   int clength;
00912   int clength_1;
00913   int clength_2;
00914   int t_stab, N_stab;
00915   fpt_data *data;
00916 
00917   /* Get pointer to DPT data. */
00918   data = &(set->dpt[m]);
00919 
00920   /* Check, if already precomputed. */
00921   if (data->steps != NULL)
00922     return;
00923 
00924   /* Save k_start. */
00925   data->k_start = k_start;
00926 
00927   data->gamma_m1 = gam[0];
00928 
00929   /* Check if fast transform is activated. */
00930   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00931   {
00932     /* Save recursion coefficients. */
00933     data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00934     data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00935     data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00936 
00937     for (tau = 2; tau <= set->t; tau++)
00938     {
00939 
00940       data->alphaN[tau-2] = alpha[1<<tau];
00941       data->betaN[tau-2] = beta[1<<tau];
00942       data->gammaN[tau-2] = gam[1<<tau];
00943     }
00944 
00945     data->alpha_0 = alpha[1];
00946     data->beta_0 = beta[1];
00947 
00948     k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
00949       /*set->N*/);
00950     N_tilde = N_TILDE(set->N);
00951 
00952     /* Allocate memory for the cascade with t = log_2(N) many levels. */
00953     data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
00954 
00955     /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
00956     plength = 4;
00957     for (tau = 1; tau < set->t; tau++)
00958     {
00959       /* Compute auxilliary values. */
00960       degree = plength>>1;
00961       /* Compute first l. */
00962       firstl = FIRST_L(k_start_tilde,plength);
00963       /* Compute last l. */
00964       lastl = LAST_L(N_tilde,plength);
00965 
00966       /* Allocate memory for current level. This level will contain 2^{t-tau-1}
00967        * many matrices. */
00968       data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
00969                          * (lastl+1));
00970 
00971       /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
00972       for (l = firstl; l <= lastl; l++)
00973       {
00974         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00975         {
00976           //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
00977           //fflush(stderr);
00978           clength = plength/2;
00979         }
00980         else
00981         {
00982           clength = plength;
00983         }
00984 
00985         /* Allocate memory for the components of U_{n,tau,l}. */
00986         a11 = (double*) nfft_malloc(sizeof(double)*clength);
00987         a12 = (double*) nfft_malloc(sizeof(double)*clength);
00988         a21 = (double*) nfft_malloc(sizeof(double)*clength);
00989         a22 = (double*) nfft_malloc(sizeof(double)*clength);
00990 
00991         /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
00992          * nodes. */
00993 
00994         /* Get the pointers to the three-term recurrence coeffcients. */
00995         calpha = &(alpha[plength*l+1+1]);
00996         cbeta = &(beta[plength*l+1+1]);
00997         cgamma = &(gam[plength*l+1+1]);
00998 
00999         if (set->flags & FPT_NO_STABILIZATION)
01000         {
01001           /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
01002           calpha--;
01003           cbeta--;
01004           cgamma--;
01005           eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
01006             cgamma);
01007           eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
01008             cgamma);
01009           needstab = 0;
01010         }
01011         else
01012         {
01013           calpha--;
01014           cbeta--;
01015           cgamma--;
01016           /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
01017           needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
01018             degree-1, calpha, cbeta, cgamma, threshold);
01019           if (needstab == 0)
01020           {
01021             /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
01022             needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
01023               degree, calpha, cbeta, cgamma, threshold);
01024           }
01025         }
01026         
01027 
01028         /* Check if stabilization needed. */
01029         if (needstab == 0)
01030         {
01031           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01032           data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
01033           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01034           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01035           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01036           /* No stabilization needed. */
01037           data->steps[tau][l].a11[0] = a11;
01038           data->steps[tau][l].a12[0] = a12;
01039           data->steps[tau][l].a21[0] = a21;
01040           data->steps[tau][l].a22[0] = a22;
01041           data->steps[tau][l].g[0] = gam[plength*l+1+1];
01042           data->steps[tau][l].stable = true;
01043         }
01044         else
01045         {
01046           /* Stabilize. */
01047           degree_stab = degree*(2*l+1);
01048           X(next_power_of_2_exp_int)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
01049 
01050           /* Old arrays are to small. */
01051           nfft_free(a11);
01052           nfft_free(a12);
01053           nfft_free(a21);
01054           nfft_free(a22);
01055 
01056           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01057           data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
01058           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01059           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01060           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01061 
01062           plength_stab = N_stab;
01063 
01064           if (set->flags & FPT_AL_SYMMETRY)
01065           {
01066             if (m <= 1)
01067             {
01068               /* This should never be executed */
01069               clength_1 = plength_stab;
01070               clength_2 = plength_stab;
01071               /* Allocate memory for arrays. */
01072               a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01073               a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01074               a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01075               a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01076               /* Get the pointers to the three-term recurrence coeffcients. */
01077               calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01078               eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
01079                 clength_2, degree_stab-1, calpha, cbeta, cgamma);
01080               eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
01081                 clength_2, degree_stab+0, calpha, cbeta, cgamma);
01082             }
01083             else
01084             {
01085               clength = plength_stab/2;
01086               if (m%2 == 0)
01087               {
01088                 a11 = (double*) nfft_malloc(sizeof(double)*clength);
01089                 a12 = (double*) nfft_malloc(sizeof(double)*clength);
01090                 a21 = 0;
01091                 a22 = 0;
01092                 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
01093                 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
01094                   degree_stab-2, calpha, cbeta, cgamma);
01095                 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
01096                   degree_stab-1, calpha, cbeta, cgamma);
01097               }
01098               else
01099               {
01100                 a11 = 0;
01101                 a12 = 0;
01102                 a21 = (double*) nfft_malloc(sizeof(double)*clength);
01103                 a22 = (double*) nfft_malloc(sizeof(double)*clength);
01104                 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01105                 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
01106                   degree_stab-1,calpha, cbeta, cgamma);
01107                 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
01108                   degree_stab+0, calpha, cbeta, cgamma);
01109               }
01110             }
01111           }
01112           else
01113           {
01114             clength_1 = plength_stab;
01115             clength_2 = plength_stab;
01116             a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01117             a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01118             a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01119             a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01120             calpha = &(alpha[2]);
01121             cbeta = &(beta[2]);
01122             cgamma = &(gam[2]);
01123             calpha--;
01124             cbeta--;
01125             cgamma--;
01126             eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
01127               calpha, cbeta, cgamma);
01128             eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
01129               calpha, cbeta, cgamma);
01130 
01131           }
01132           data->steps[tau][l].a11[0] = a11;
01133           data->steps[tau][l].a12[0] = a12;
01134           data->steps[tau][l].a21[0] = a21;
01135           data->steps[tau][l].a22[0] = a22;
01136 
01137           data->steps[tau][l].g[0] =  gam[1+1];
01138           data->steps[tau][l].stable = false;
01139           data->steps[tau][l].ts = t_stab;
01140           data->steps[tau][l].Ns = N_stab;
01141         }
01142       }
01144       plength = plength << 1;
01145     }
01146   }
01147 
01148   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01149   {
01150     /* Check, if recurrence coefficients must be copied. */
01151     if (set->flags & FPT_PERSISTENT_DATA)
01152     {
01153       data->_alpha = (double*) alpha;
01154       data->_beta = (double*) beta;
01155       data->_gamma = (double*) gam;
01156     }
01157     else
01158     {
01159       data->_alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
01160       data->_beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
01161       data->_gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
01162       memcpy(data->_alpha,alpha,(set->N+1)*sizeof(double));
01163       memcpy(data->_beta,beta,(set->N+1)*sizeof(double));
01164       memcpy(data->_gamma,gam,(set->N+1)*sizeof(double));
01165     }
01166   }
01167 }
01168 
01169 void fpt_trafo_direct(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01170   const int k_end, const unsigned int flags)
01171 {
01172   int j;
01173   fpt_data *data = &(set->dpt[m]);
01174   int Nk;
01175   int tk;
01176   double norm;
01177   
01178     //fprintf(stderr, "Executing dpt.\n");  
01179 
01180   X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk);
01181   norm = 2.0/(Nk<<1);
01182 
01183     //fprintf(stderr, "Norm = %e.\n", norm);  
01184 
01185   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01186   {
01187     return;
01188   }
01189 
01190   if (flags & FPT_FUNCTION_VALUES)
01191   {
01192     /* Fill array with Chebyshev nodes. */
01193     for (j = 0; j <= k_end; j++)
01194     {
01195       set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
01196         //fprintf(stderr, "x[%4d] = %e.\n", j, set->xc_slow[j]);  
01197     }
01198 
01199     memset(set->result,0U,data->k_start*sizeof(double _Complex));
01200     memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01201 
01202     /*eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
01203       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01204       data->gamma_m1);*/
01205     eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
01206       y, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], data->gamma_m1);
01207   }
01208   else
01209   {
01210     memset(set->temp,0U,data->k_start*sizeof(double _Complex));
01211     memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01212 
01213     eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01214       set->result, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
01215       data->gamma_m1);
01216 
01217     fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01218       (double*)set->result);
01219 
01220     set->result[0] *= 0.5;
01221     for (j = 0; j < Nk; j++)
01222     {
01223       set->result[j] *= norm;
01224     }
01225 
01226     memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
01227   }
01228 }
01229 
01230 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01231   const int k_end, const unsigned int flags)
01232 {
01233   /* Get transformation data. */
01234   fpt_data *data = &(set->dpt[m]);
01236   int Nk;
01238   int tk;
01240   int k_start_tilde;
01242   int k_end_tilde;
01243 
01245   int tau;
01247   int firstl;
01249   int lastl;
01251   int l;
01253   int plength;
01255   int plength_stab;
01256   int t_stab;
01258   fpt_step *step;
01260   fftw_plan plan = 0;
01261   int length = k_end+1;
01262   fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01263 
01265   int k;
01266 
01267   double _Complex *work_ptr;
01268   const double _Complex *x_ptr;
01269 
01270   /* Check, if slow transformation should be used due to small bandwidth. */
01271   if (k_end < FPT_BREAK_EVEN)
01272   {
01273     /* Use NDSFT. */
01274     fpt_trafo_direct(set, m, x, y, k_end, flags);
01275     return;
01276   }
01277 
01278   X(next_power_of_2_exp_int)(k_end,&Nk,&tk);
01279   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01280   k_end_tilde = K_END_TILDE(k_end,Nk);
01281 
01282   /* Check if fast transform is activated. */
01283   if (set->flags & FPT_NO_FAST_ALGORITHM)
01284     return;
01285 
01286   if (flags & FPT_FUNCTION_VALUES)
01287   {
01288 #ifdef _OPENMP
01289     int nthreads = X(get_num_threads)();
01290 #pragma omp critical (nfft_omp_critical_fftw_plan)
01291 {
01292     fftw_plan_with_nthreads(nthreads);
01293 #endif
01294     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01295       (double*)set->work, NULL, 2, 1, kinds, 0U);
01296 #ifdef _OPENMP
01297 }
01298 #endif
01299   }
01300 
01301   /* Initialize working arrays. */
01302   memset(set->result,0U,2*Nk*sizeof(double _Complex));
01303 
01304   /* The first step. */
01305 
01306   /* Set the first 2*data->k_start coefficients to zero. */
01307   memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
01308 
01309   work_ptr = &set->work[2*data->k_start];
01310   x_ptr = x;
01311 
01312   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01313   {
01314     *work_ptr++ = *x_ptr++;
01315     *work_ptr++ = K(0.0);
01316   }
01317 
01318   /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
01319   memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
01320 
01321   /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
01322    * x_{Nk-1} and x_{Nk-2}. */
01323   if (k_end == Nk)
01324   {
01325     set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
01326     set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
01327     set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
01328   }
01329 
01330   /* Compute the remaining steps. */
01331   plength = 4;
01332   for (tau = 1; tau < tk; tau++)
01333   {
01334     /* Compute first l. */
01335     firstl = FIRST_L(k_start_tilde,plength);
01336     /* Compute last l. */
01337     lastl = LAST_L(k_end_tilde,plength);
01338 
01339     /* Compute the multiplication steps. */
01340     for (l = firstl; l <= lastl; l++)
01341     {
01342       /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
01343       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
01344       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
01345       memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
01346       memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
01347 
01348       /* Copy coefficients into first half. */
01349       memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
01350       memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
01351       memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
01352 
01353       /* Get matrix U_{n,tau,l} */
01354       step = &(data->steps[tau][l]);
01355 
01356       /* Check if step is stable. */
01357       if (step->stable)
01358       {
01359         /* Check, if we should do a symmetrizised step. */
01360         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01361         {
01362           /*for (k = 0; k < plength; k++)
01363           {
01364             fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
01365               step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
01366           }*/
01367           /* Multiply third and fourth polynomial with matrix U. */
01368           //fprintf(stderr,"\nhallo\n");
01369           fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01370             step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
01371         }
01372         else
01373         {
01374           /* Multiply third and fourth polynomial with matrix U. */
01375           fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01376             step->a21[0], step->a22[0], step->g[0], tau, set);
01377         }
01378 
01379         if (step->g[0] != 0.0)
01380         {
01381           for (k = 0; k < plength; k++)
01382           {
01383             set->work[plength*2*l+k] += set->vec3[k];
01384           }
01385         }
01386         for (k = 0; k < plength; k++)
01387         {
01388           set->work[plength*(2*l+1)+k] += set->vec4[k];
01389         }
01390       }
01391       else
01392       {
01393         /* Stabilize. */
01394 
01395         /* The lengh of the polynomials */
01396         plength_stab = step->Ns;
01397         t_stab = step->ts;
01398 
01399         /*---------*/
01400         /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
01401         fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
01402         fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
01403         fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
01404         /*---------*/
01405 
01406         /* Set rest of vectors explicitely to zero */
01407         /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
01408           plength, plength_stab);*/
01409         memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01410         memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01411 
01412         /* Multiply third and fourth polynomial with matrix U. */
01413         /* Check for symmetry. */
01414         if (set->flags & FPT_AL_SYMMETRY)
01415         {
01416           if (m <= 1)
01417           {
01418             fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01419               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01420           }
01421           else if (m%2 == 0)
01422           {
01423             /*fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01424               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01425             fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01426               step->a21[0], step->a22[0],
01427               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01428             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01429               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01430           }
01431           else
01432           {
01433             /*fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01434               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01435             fpt_do_step_symmetric_l(set->vec3, set->vec4,
01436               step->a11[0], step->a12[0],
01437               step->a21[0],
01438               step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01439             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01440               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01441           }
01442         }
01443         else
01444         {
01445             fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01446               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01447         }
01448 
01449         if (step->g[0] != 0.0)
01450         {
01451           for (k = 0; k < plength_stab; k++)
01452           {
01453             set->result[k] += set->vec3[k];
01454           }
01455         }
01456 
01457         for (k = 0; k < plength_stab; k++)
01458         {
01459           set->result[Nk+k] += set->vec4[k];
01460         }
01461       }
01462     }
01463     /* Double length of polynomials. */
01464     plength = plength<<1;
01465 
01466     /*--------*/
01467     /*for (k = 0; k < 2*Nk; k++)
01468     {
01469       fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01470         k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01471         cimag(set->result[k]));
01472     }*/
01473     /*--------*/
01474   }
01475 
01476   /* Add the resulting cascade coeffcients to the coeffcients accumulated from
01477    * the stabilization steps. */
01478   for (k = 0; k < 2*Nk; k++)
01479   {
01480     set->result[k] += set->work[k];
01481   }
01482 
01483   /* The last step. Compute the Chebyshev coeffcients c_k^n from the
01484    * polynomials in front of P_0^n and P_1^n. */
01485   y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01486     data->alpha_0*set->result[Nk+1]*0.5);
01487   y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01488     data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01489   y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01490     data->beta_0*set->result[Nk+k_end-1] +
01491     data->alpha_0*set->result[Nk+k_end-2]*0.5);
01492   y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01493   for (k = 2; k <= k_end-2; k++)
01494   {
01495     y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01496       data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01497   }
01498 
01499   if (flags & FPT_FUNCTION_VALUES)
01500   {
01501     y[0] *= 2.0;
01502     fftw_execute_r2r(plan,(double*)y,(double*)y);
01503 #ifdef _OPENMP
01504     #pragma omp critical (nfft_omp_critical_fftw_plan)
01505 #endif
01506     fftw_destroy_plan(plan);
01507     for (k = 0; k <= k_end; k++)
01508     {
01509       y[k] *= 0.5;
01510     }
01511   }
01512 }
01513 
01514 void fpt_transposed_direct(fpt_set set, const int m, double _Complex *x,
01515   double _Complex *y, const int k_end, const unsigned int flags)
01516 {
01517   int j;
01518   fpt_data *data = &(set->dpt[m]);
01519   int Nk;
01520   int tk;
01521   double norm;
01522 
01523   X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk);
01524   norm = 2.0/(Nk<<1);
01525 
01526   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01527   {
01528     return;
01529   }
01530 
01531   if (flags & FPT_FUNCTION_VALUES)
01532   {
01533     for (j = 0; j <= k_end; j++)
01534     {
01535       set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
01536     }
01537 
01538     eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01539       y, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
01540       data->gamma_m1);
01541 
01542     memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01543       sizeof(double _Complex));
01544   }
01545   else
01546   {
01547     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01548     memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
01549 
01550     for (j = 0; j < Nk; j++)
01551     {
01552       set->result[j] *= norm;
01553     }
01554 
01555     fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01556       (double*)set->result);
01557 
01558     eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01559       set->result, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
01560       data->gamma_m1);
01561 
01562     memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
01563   }
01564 }
01565 
01566 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
01567   double _Complex *y, const int k_end, const unsigned int flags)
01568 {
01569   /* Get transformation data. */
01570   fpt_data *data = &(set->dpt[m]);
01572   int Nk;
01574   int tk;
01576   int k_start_tilde;
01578   int k_end_tilde;
01579 
01581   int tau;
01583   int firstl;
01585   int lastl;
01587   int l;
01589   int plength;
01591   int plength_stab;
01593   fpt_step *step;
01595   fftw_plan plan;
01596   int length = k_end+1;
01597   fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01599   int k;
01600   int t_stab;
01601 
01602   /* Check, if slow transformation should be used due to small bandwidth. */
01603   if (k_end < FPT_BREAK_EVEN)
01604   {
01605     /* Use NDSFT. */
01606     fpt_transposed_direct(set, m, x, y, k_end, flags);
01607     return;
01608   }
01609 
01610   X(next_power_of_2_exp_int)(k_end,&Nk,&tk);
01611   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01612   k_end_tilde = K_END_TILDE(k_end,Nk);
01613 
01614   /* Check if fast transform is activated. */
01615   if (set->flags & FPT_NO_FAST_ALGORITHM)
01616   {
01617     return;
01618   }
01619 
01620   if (flags & FPT_FUNCTION_VALUES)
01621   {
01622 #ifdef _OPENMP
01623     int nthreads = X(get_num_threads)();
01624 #pragma omp critical (nfft_omp_critical_fftw_plan)
01625 {
01626     fftw_plan_with_nthreads(nthreads);
01627 #endif
01628     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01629       (double*)set->work, NULL, 2, 1, kinds, 0U);
01630 #ifdef _OPENMP
01631 }
01632 #endif
01633     fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01634 #ifdef _OPENMP
01635     #pragma omp critical (nfft_omp_critical_fftw_plan)
01636 #endif
01637     fftw_destroy_plan(plan);
01638     for (k = 0; k <= k_end; k++)
01639     {
01640       set->result[k] *= 0.5;
01641     }
01642   }
01643   else
01644   {
01645     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01646   }
01647 
01648   /* Initialize working arrays. */
01649   memset(set->work,0U,2*Nk*sizeof(double _Complex));
01650 
01651   /* The last step is now the first step. */
01652   for (k = 0; k <= k_end; k++)
01653   {
01654     set->work[k] = data->gamma_m1*set->result[k];
01655   }
01656   //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double _Complex));
01657 
01658   set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01659     data->alpha_0*set->result[1]);
01660   for (k = 1; k < k_end; k++)
01661   {
01662     set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01663       data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01664   }
01665   if (k_end<Nk)
01666   {
01667     memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
01668   }
01669 
01671   memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
01672 
01673   /* Compute the remaining steps. */
01674   plength = Nk;
01675   for (tau = tk-1; tau >= 1; tau--)
01676   {
01677     /* Compute first l. */
01678     firstl = FIRST_L(k_start_tilde,plength);
01679     /* Compute last l. */
01680     lastl = LAST_L(k_end_tilde,plength);
01681 
01682     /* Compute the multiplication steps. */
01683     for (l = firstl; l <= lastl; l++)
01684     {
01685       /* Initialize second half of coefficient arrays with zeros. */
01686       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
01687       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
01688 
01689       memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01690         (plength/2)*sizeof(double _Complex));
01691 
01692       /* Get matrix U_{n,tau,l} */
01693       step = &(data->steps[tau][l]);
01694 
01695       /* Check if step is stable. */
01696       if (step->stable)
01697       {
01698         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01699         {
01700           /* Multiply third and fourth polynomial with matrix U. */
01701           fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01702             step->a21[0], step->a22[0], step->g[0], tau, set);
01703         }
01704         else
01705         {
01706           /* Multiply third and fourth polynomial with matrix U. */
01707           fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01708             step->a21[0], step->a22[0], step->g[0], tau, set);
01709         }
01710         memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
01711 
01712         for (k = 0; k < plength; k++)
01713         {
01714           set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01715         }
01716       }
01717       else
01718       {
01719         /* Stabilize. */
01720         plength_stab = step->Ns;
01721         t_stab = step->ts;
01722 
01723         memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
01724         memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
01725 
01726         /* Multiply third and fourth polynomial with matrix U. */
01727         if (set->flags & FPT_AL_SYMMETRY)
01728         {
01729           if (m <= 1)
01730           {
01731             fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01732               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01733           }
01734           else if (m%2 == 0)
01735           {
01736             fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01737               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01738           }
01739           else
01740           {
01741             fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
01742               step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01743           }
01744         }
01745         else
01746         {
01747             fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01748               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01749         }
01750 
01751         memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
01752 
01753         for (k = 0; k < plength; k++)
01754         {
01755           set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01756         }
01757        }
01758     }
01759     /* Half the length of polynomial arrays. */
01760     plength = plength>>1;
01761   }
01762 
01763   /* First step */
01764   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01765   {
01766     x[k] = set->work[2*(data->k_start+k)];
01767   }
01768   if (k_end == Nk)
01769   {
01770     x[Nk-data->k_start] =
01771         data->gammaN[tk-2]*set->work[2*(Nk-2)]
01772       + data->betaN[tk-2] *set->work[2*(Nk-1)]
01773       + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01774   }
01775 }
01776 
01777 void fpt_finalize(fpt_set set)
01778 {
01779   int tau;
01780   int l;
01781   int m;
01782   fpt_data *data;
01783   int k_start_tilde;
01784   int N_tilde;
01785   int firstl, lastl;
01786   int plength;
01787   const int M = set->M;
01788 
01789   /* TODO Clean up DPT transform data structures. */
01790   for (m = 0; m < M; m++)
01791   {
01792     /* Check if precomputed. */
01793     data = &set->dpt[m];
01794     if (data->steps != (fpt_step**)NULL)
01795     {
01796       nfft_free(data->alphaN);
01797       nfft_free(data->betaN);
01798       nfft_free(data->gammaN);
01799       data->alphaN = NULL;
01800       data->betaN = NULL;
01801       data->gammaN = NULL;
01802 
01803       /* Free precomputed data. */
01804       k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
01805         /*set->N*/);
01806       N_tilde = N_TILDE(set->N);
01807       plength = 4;
01808       for (tau = 1; tau < set->t; tau++)
01809       {
01810         /* Compute first l. */
01811         firstl = FIRST_L(k_start_tilde,plength);
01812         /* Compute last l. */
01813         lastl = LAST_L(N_tilde,plength);
01814 
01815         /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
01816         for (l = firstl; l <= lastl; l++)
01817         {
01818           /* Free components. */
01819           nfft_free(data->steps[tau][l].a11[0]);
01820           nfft_free(data->steps[tau][l].a12[0]);
01821           nfft_free(data->steps[tau][l].a21[0]);
01822           nfft_free(data->steps[tau][l].a22[0]);
01823           data->steps[tau][l].a11[0] = NULL;
01824           data->steps[tau][l].a12[0] = NULL;
01825           data->steps[tau][l].a21[0] = NULL;
01826           data->steps[tau][l].a22[0] = NULL;
01827           /* Free components. */
01828           nfft_free(data->steps[tau][l].a11);
01829           nfft_free(data->steps[tau][l].a12);
01830           nfft_free(data->steps[tau][l].a21);
01831           nfft_free(data->steps[tau][l].a22);
01832           nfft_free(data->steps[tau][l].g);
01833           data->steps[tau][l].a11 = NULL;
01834           data->steps[tau][l].a12 = NULL;
01835           data->steps[tau][l].a21 = NULL;
01836           data->steps[tau][l].a22 = NULL;
01837           data->steps[tau][l].g = NULL;
01838         }
01839         /* Free pointers for current level. */
01840         nfft_free(data->steps[tau]);
01841         data->steps[tau] = NULL;
01842         /* Double length of polynomials. */
01843         plength = plength<<1;
01844       }
01845       /* Free steps. */
01846       nfft_free(data->steps);
01847       data->steps = NULL;
01848     }
01849 
01850     if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01851     {
01852       /* Check, if recurrence coefficients must be copied. */
01853       //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
01854       if (!(set->flags & FPT_PERSISTENT_DATA))
01855       {
01856         nfft_free(data->_alpha);
01857         nfft_free(data->_beta);
01858         nfft_free(data->_gamma);
01859       }
01860       data->_alpha = NULL;
01861       data->_beta = NULL;
01862       data->_gamma = NULL;
01863     }
01864   }
01865 
01866   /* Delete array of DPT transform data. */
01867   nfft_free(set->dpt);
01868   set->dpt = NULL;
01869 
01870   for (tau = 1; tau < set->t+1; tau++)
01871   {
01872     nfft_free(set->xcvecs[tau-1]);
01873     set->xcvecs[tau-1] = NULL;
01874   }
01875   nfft_free(set->xcvecs);
01876   set->xcvecs = NULL;
01877 
01878   /* Free auxilliary arrays. */
01879   nfft_free(set->work);
01880   nfft_free(set->result);
01881 
01882   /* Check if fast transform is activated. */
01883   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
01884   {
01885     /* Free auxilliary arrays. */
01886     nfft_free(set->vec3);
01887     nfft_free(set->vec4);
01888     nfft_free(set->z);
01889     set->work = NULL;
01890     set->result = NULL;
01891     set->vec3 = NULL;
01892     set->vec4 = NULL;
01893     set->z = NULL;
01894 
01895     /* Free FFTW plans. */
01896     for(tau = 0; tau < set->t/*-1*/; tau++)
01897     {
01898 #ifdef _OPENMP
01899 #pragma omp critical (nfft_omp_critical_fftw_plan)
01900 #endif
01901 {
01902       fftw_destroy_plan(set->plans_dct3[tau]);
01903       fftw_destroy_plan(set->plans_dct2[tau]);
01904 }
01905       set->plans_dct3[tau] = NULL;
01906       set->plans_dct2[tau] = NULL;
01907     }
01908 
01909     nfft_free(set->plans_dct3);
01910     nfft_free(set->plans_dct2);
01911     set->plans_dct3 = NULL;
01912     set->plans_dct2 = NULL;
01913   }
01914 
01915   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01916   {
01917     /* Delete arrays of Chebyshev nodes. */
01918     nfft_free(set->xc_slow);
01919     set->xc_slow = NULL;
01920     nfft_free(set->temp);
01921     set->temp = NULL;
01922   }
01923 
01924   /* Free DPT set structure. */
01925   nfft_free(set);
01926 }