// fd_grid.hh -- grid with finite differencing operations // $Header$ // // *** Implementation Notes -- Overview *** // *** Implementation Notes -- Techniques using C++ Templates *** // *** Implementation Notes -- Techniques using the C/C++ Preprocessor *** // *** Implementation Notes -- Run-Time Choice of Molecules *** // *** finite difference molecules *** // ***** fd_grid - grid with finite differencing operations ***** // // // prerequisites: // // // // "stdc.h" // "config.hh" // "../jtutil/util.hh" // "../jtutil/array.hh" // "../jtutil/linear_map.hh" // "coords.hh" // "grid.hh" // // everything in this file is inside this namespace namespace AHFinderDirect { //****************************************************************************** // // *** Implementation Notes -- Overview *** // // // The key design problem for our finite differencing is how to // implement an entire family of 5(9) finite difference operations in // 2D(3D) // // partial_rho partial_sigma // partial_{rho,rho} partial_{rho,sigma} // partial_{sigma,sigma} // // partial_x partial_y partial_z // partial_xx partial_xy partial_xz // partial_yy partial_yz // partial_zz // // without having to write out the finite differencing molecules multiple // times, and while still preserving maximum inline-function efficiency. // In particular, mixed 2nd-order derivative operations like partial_xy // should be automatically composed from the two individual 1st derivative // operations (partial_x and partial_y). // // // Our basic approach is to define each finite difference molecule in // a generic 1-dimensional form using an abstract "data(m)" interface. // Here we use the terminology that a finite difference molecule is // defined as // out[k] = sum(m) c[m] * in[k+m] // where c[] is the vector/matrix of molecule coefficients, and m is // the (integer) relative grid coordinate within a molecule. // // That is, for example, we define the usual 2nd order centered 1st // derivative operator as // diff = 0.5*inv_delta_x*(data(+1) - data(-1)) // leaving unspecified just what the data source is. We then use this // with an appropriate data source (indexing along that gridfn array axis) // for each directional derivative operation, and we compose two of // these, using the first along x as the data source for the second // along y, for the mixed 2nd-order derivative operation. // //****************************************************************************** // // *** Implementation Notes -- Techniques using C++ Templates *** // // // There are two plausible ways to use C++ templates // [C++ templates are described in detail in chapter 13 of // Stroustrup's "The C++ Programming Language" (3rd Edition), // hereinafter "C++PL", and chapter 15 of Stroustrup's // "The Design and Evolution of C++", hereinafter "D&EC++".] // to write the sort of generic-at-compile-time code we want: // - Template specializations for each axis, as discussed in D&EC++ // section 15.10.3. // - Overloaded functions for each axis, with an argument type // (possibly that of an extra unused argument) selecting the // appropriate axis and hence the appropriate function. This // technique is discussed in D&EC++ section 15.6.3.1. // // Quoting from D&EC++ (section 15.6.3.1), // // The fundamental observation is that every property // of a type or an algorithm can be represented by a // type (possibly defined specificaly to do exactly // that). That done, such a type can be used to guide // the overload resolution to select a function that // depends on the desired property. [...] // // Please note that thanks to inlining this resolution // is done at compile-time, so the appropriate [...] // function will be called directly without any run-time // overhead. // // Quoting from C++PL3 (section 13.4), // // Passing [...] operations as a template parameter has two // significant benefits compared to alternatives such as // passing pointers to functions. Several operations can // be passed as a single argument with no run-time cost. // In addition, the [...] operators [passed this way] are // trivial to inline, whereas inlininkg a call through a // pointer to function requires exceptional attention from // a compiler. // // // In my opinion the template-specialization design is cleaner, and it // clearly has no run-time cost (whereas the overloaded-function design // may have a run-time cost for constructing and passing unused objects), // so we use it here. // // There are, however, two (non-fatal) problema with this approach: // - Unfortunately, it appears C++ (or at least gcc 2.95.1) forbids // template specialization within a class, so some of the functions // which whould logically be class members, must instead be defined // outside any class. We use the namespace fd_stuff:: to hide // these from the outside world. // - C++PL3, section C.13.3, states that // Only class templates can be template arguments. // so we have to use dummy classes around some of our template // functions. To avoid extra constructor/destructor overhead, we // make these template functions static. // //****************************************************************************** // // *** Implementation Notes -- Techniques using the C/C++ Preprocessor *** // // // The fundamental problem with the template approaches is portability: // Although the C++ standard describes powerful template facilities, not // all C++ compilers yet fully support these. As an alternative, we can // use the C/C++ preprocessor. This is ugly and dangerous (global names!), // but is probably simpler than any of the template approaches. It can // provide the same finite differencing functionality and efficiency as // the template-based approaches. // // Because of its greater portability, we use the preprocessor-based // approach here. // //****************************************************************************** // // *** Implementation Notes -- Run-Time Choice of Molecules *** // // *If* we want to allow the finite differencing scheme to be changed // at run-time (e.g. from a parameter file), there are three plausible // ways to do this: // - Using switch(molecule_type) , as is standard in C. This is // simple, and for this particular application quite well-structured // and maintainable (there are only a few different molecule types, // all centralized in this file). // - Using virtual functions, with molecule a virtual base class // and individual molecules derived from it. This is elegant, but // may have some performance problems (below). It also requires some // sort of switch-based "object factory" to interface with with the // molecule-choice parameters. // - Write all the finite differencing code multiple times, once for // each finite differencing scheme. // // The typical use of these functions will be from within a loop over // a whole grid. In both cases we can expect excellent accuracy from // modern hardware branch prediction (and thus minimal performance loss // from the branching). It's reasonable to expect a compiler to fully // inline the switch-based code, exposing all the gridfn array subscriptings // to strength reduction etc, but this is much trickier for the // virtual-function--based code. For this reason, the switch-based // design seems superior to the virtual-function--based one. // // However, at present we don't implement any run-time selection: we // "just" fix the finite differencing scheme at compile time via the // preprocessor. // //****************************************************************************** // // *** finite difference molecules *** // //************************************** // // define the actual molecules // // In the following macros, we first define all the distinct floating- // -point numbers appearing in a molecules as "K" constants (all > 0), // then define the actual derivative and its molecule coefficients // using +/- the "K" constants, with multiplies by 1.0 elided and 0 // terms skipped in computing the derivative. This (hopefully) gives // maximum efficiency by avoiding the generated code loading the same // constants multiple times. // // // The molecule macros all take the following arguments: // inv_delta_x_ = inverse of grid spacing in the finite differencing // direction // data_= a data-fetching function or macro: data_(ghosted_gfn, irho, isigma) // is the data to be finite differenced // irho_plus_m_ = a function or macro: irho_plus_m_(irho,m) returns the // rho coordinate to be passed to data_() for the [m] // molecule coefficient // isigma_plus_m_ = same thing, for the sigma coordinate // // n.b. We grab the variables ghosted_gfn, irho, and isigma from the calling // environment, and we define assorted local variables as needed! // //************************************** // // 2nd order // #define FD_GRID__ORDER2__MOL_RADIUS 1 #define FD_GRID__ORDER2__MOL_DIAMETER 3 #define FD_GRID__ORDER2__DX__KPM1 0.5 #define FD_GRID__ORDER2__DX(inv_delta_x_, data_, \ irho_plus_m_, isigma_plus_m_) \ const fp data_p1 = data_(ghosted_gfn, \ irho_plus_m_(irho ,+1), \ isigma_plus_m_(isigma,+1)); \ const fp data_m1 = data_(ghosted_gfn, \ irho_plus_m_(irho ,-1), \ isigma_plus_m_(isigma,-1)); \ const fp sum = FD_GRID__ORDER2__DX__KPM1 * (data_p1 - data_m1); \ return inv_delta_x_ * sum; /* end macro */ #define FD_GRID__ORDER2__DX__COEFF_M1 (-FD_GRID__ORDER2__DX__KPM1) #define FD_GRID__ORDER2__DX__COEFF_0 0.0 #define FD_GRID__ORDER2__DX__COEFF_P1 (+FD_GRID__ORDER2__DX__KPM1) #define FD_GRID__ORDER2__DXX__K0 2.0 #define FD_GRID__ORDER2__DXX(inv_delta_x_, data_, \ irho_plus_m_, isigma_plus_m_) \ const fp data_p1 = data_(ghosted_gfn, \ irho_plus_m_(irho ,+1), \ isigma_plus_m_(isigma,+1)); \ const fp data_0 = data_(ghosted_gfn, \ irho_plus_m_(irho , 0), \ isigma_plus_m_(isigma, 0)); \ const fp data_m1 = data_(ghosted_gfn, \ irho_plus_m_(irho ,-1), \ isigma_plus_m_(isigma,-1)); \ const fp sum \ = data_m1 - FD_GRID__ORDER2__DXX__K0*data_0 + data_p1; \ return jtutil::pow2(inv_delta_x_) * sum; /* end macro */ #define FD_GRID__ORDER2__DXX__COEFF_M1 1.0 #define FD_GRID__ORDER2__DXX__COEFF_0 (-FD_GRID__ORDER2__DXX__K0) #define FD_GRID__ORDER2__DXX__COEFF_P1 1.0 //************************************** // // 4th order // #define FD_GRID__ORDER4__MOL_RADIUS 2 #define FD_GRID__ORDER4__MOL_DIAMETER 5 #define FD_GRID__ORDER4__DX__KPM2 (1.0/12.0) #define FD_GRID__ORDER4__DX__KPM1 (8.0/12.0) #define FD_GRID__ORDER4__DX(inv_delta_x_, data_, \ irho_plus_m_, isigma_plus_m_) \ const fp data_p2 = data_(ghosted_gfn, \ irho_plus_m_(irho ,+2), \ isigma_plus_m_(isigma,+2)); \ const fp data_p1 = data_(ghosted_gfn, \ irho_plus_m_(irho ,+1), \ isigma_plus_m_(isigma,+1)); \ const fp data_m1 = data_(ghosted_gfn, \ irho_plus_m_(irho ,-1), \ isigma_plus_m_(isigma,-1)); \ const fp data_m2 = data_(ghosted_gfn, \ irho_plus_m_(irho ,-2), \ isigma_plus_m_(isigma,-2)); \ const fp sum \ = FD_GRID__ORDER4__DX__KPM1 * (data_p1 - data_m1) \ + FD_GRID__ORDER4__DX__KPM2 * (data_m2 - data_p2); \ return inv_delta_x_ * sum; /* end macro */ #define FD_GRID__ORDER4__DX__COEFF_M2 (+FD_GRID__ORDER4__DX__KPM2) #define FD_GRID__ORDER4__DX__COEFF_M1 (-FD_GRID__ORDER4__DX__KPM1) #define FD_GRID__ORDER4__DX__COEFF_0 0.0 #define FD_GRID__ORDER4__DX__COEFF_P1 (+FD_GRID__ORDER4__DX__KPM1) #define FD_GRID__ORDER4__DX__COEFF_P2 (-FD_GRID__ORDER4__DX__KPM2) //************************************** #define FD_GRID__ORDER4__DXX__KPM2 ( 1.0/12.0) #define FD_GRID__ORDER4__DXX__KPM1 (16.0/12.0) #define FD_GRID__ORDER4__DXX__K0 (30.0/12.0) #define FD_GRID__ORDER4__DXX(inv_delta_x_, data_, \ irho_plus_m_, isigma_plus_m_) \ const fp data_p2 = data_(ghosted_gfn, \ irho_plus_m_(irho ,+2), \ isigma_plus_m_(isigma,+2)); \ const fp data_p1 = data_(ghosted_gfn, \ irho_plus_m_(irho ,+1), \ isigma_plus_m_(isigma,+1)); \ const fp data_0 = data_(ghosted_gfn, \ irho_plus_m_(irho , 0), \ isigma_plus_m_(isigma, 0)); \ const fp data_m1 = data_(ghosted_gfn, \ irho_plus_m_(irho ,-1), \ isigma_plus_m_(isigma,-1)); \ const fp data_m2 = data_(ghosted_gfn, \ irho_plus_m_(irho ,-2), \ isigma_plus_m_(isigma,-2)); \ const fp sum \ = - FD_GRID__ORDER4__DXX__K0 * data_0 \ + FD_GRID__ORDER4__DXX__KPM1 * (data_m1 + data_p1) \ - FD_GRID__ORDER4__DXX__KPM2 * (data_m2 + data_p2); \ return jtutil::pow2(inv_delta_x_) * sum; /* end macro */ #define FD_GRID__ORDER4__DXX__COEFF_M2 (-FD_GRID__ORDER4__DXX__KPM2) #define FD_GRID__ORDER4__DXX__COEFF_M1 (+FD_GRID__ORDER4__DXX__KPM1) #define FD_GRID__ORDER4__DXX__COEFF_0 (-FD_GRID__ORDER4__DXX__K0 ) #define FD_GRID__ORDER4__DXX__COEFF_P1 (+FD_GRID__ORDER4__DXX__KPM1) #define FD_GRID__ORDER4__DXX__COEFF_P2 (-FD_GRID__ORDER4__DXX__KPM2) //****************************************************************************** // // choose finite differencing order via preprocessor symbol FINITE_DIFF_ORDER // #ifndef FINITE_DIFF_ORDER #error "must define FINITE_DIFF_ORDER!" #endif #if (FINITE_DIFF_ORDER == 2) #define FD_GRID__MOL_RADIUS FD_GRID__ORDER2__MOL_RADIUS #define FD_GRID__MOL_DIAMETER FD_GRID__ORDER2__MOL_DIAMETER #define FD_GRID__DX FD_GRID__ORDER2__DX #define FD_GRID__DXX FD_GRID__ORDER2__DXX #elif (FINITE_DIFF_ORDER == 4) #define FD_GRID__MOL_RADIUS FD_GRID__ORDER4__MOL_RADIUS #define FD_GRID__MOL_DIAMETER FD_GRID__ORDER4__MOL_DIAMETER #define FD_GRID__DX FD_GRID__ORDER4__DX #define FD_GRID__DXX FD_GRID__ORDER4__DXX #else #error "unknown value " FINITE_DIFF_ORDER " for FINITE_DIFF_ORDER!" #endif #define FD_GRID__MOL_AREA (FD_GRID__MOL_DIAMETER * FD_GRID__MOL_DIAMETER) //****************************************************************************** // // ***** fd_grid - grid with finite differencing operations ***** // // An fd_grid is identical to a grid except that it also defines // (rho,sigma)-coordinate finite differencing operations on gridfns. // class fd_grid : public grid { // // molecule sizes // public: // n.b. this interface implicitly assumes that all molecules // are centered and are the same order and size static int finite_diff_order() { return FINITE_DIFF_ORDER; } static int molecule_radius() { return FD_GRID__MOL_RADIUS; } static int molecule_diameter() { return FD_GRID__MOL_DIAMETER; } static int molecule_min_m() { return -FD_GRID__MOL_RADIUS; } static int molecule_max_m() { return FD_GRID__MOL_RADIUS; } // // helper functions to compute (irho,isigma) + [m] // along each axis // private: static int rho_axis__irho_plus_m(int irho , int m) { return irho +m; } static int rho_axis__isigma_plus_m(int isigma, int m) { return isigma ; } static int sigma_axis__irho_plus_m(int irho , int m) { return irho ; } static int sigma_axis__isigma_plus_m(int isigma, int m) { return isigma+m; } // // ***** finite differencing ***** // public: // 1st derivatives fp partial_rho(int ghosted_gfn, int irho, int isigma) const { FD_GRID__DX(inverse_delta_rho(), ghosted_gridfn, rho_axis__irho_plus_m, rho_axis__isigma_plus_m); } fp partial_sigma(int ghosted_gfn, int irho, int isigma) const { FD_GRID__DX(inverse_delta_sigma(), ghosted_gridfn, sigma_axis__irho_plus_m, sigma_axis__isigma_plus_m); } // "pure" 2nd derivatives fp partial_rho_rho(int ghosted_gfn, int irho, int isigma) const { FD_GRID__DXX(inverse_delta_rho(), ghosted_gridfn, rho_axis__irho_plus_m, rho_axis__isigma_plus_m); } fp partial_sigma_sigma(int ghosted_gfn, int irho, int isigma) const { FD_GRID__DXX(inverse_delta_sigma(), ghosted_gridfn, sigma_axis__irho_plus_m, sigma_axis__isigma_plus_m); } // mixed 2nd partial derivative fp partial_rho_sigma(int ghosted_gfn, int irho, int isigma) const { FD_GRID__DX(inverse_delta_rho(), partial_sigma, rho_axis__irho_plus_m, rho_axis__isigma_plus_m); } // // ***** molecule coefficients ***** // public: // molecule coefficients // n.b. this interface implicitly assumes that all molecules // are position-independent fp partial_rho_coeff (int m) const { return inverse_delta_rho() * dx_coeff(m); } fp partial_sigma_coeff (int m) const { return inverse_delta_sigma() * dx_coeff(m); } fp partial_rho_rho_coeff (int m) const { return jtutil::pow2(inverse_delta_rho()) * dxx_coeff(m); } fp partial_sigma_sigma_coeff(int m) const { return jtutil::pow2(inverse_delta_sigma()) * dxx_coeff(m); } fp partial_rho_sigma_coeff(int m_rho, int m_sigma) const { return partial_rho_coeff(m_rho) * partial_sigma_coeff(m_sigma); } // worker functions: molecule coefficients for unit grid spacing private: static fp dx_coeff(int m); static fp dxx_coeff(int m); // // ***** constructor, destructor ***** // public: // constructor: pass through to grid:: constructor fd_grid(const grid_array_pars& grid_array_pars_in, const grid_pars& grid_pars_in) : grid(grid_array_pars_in, grid_pars_in) { } // compiler-generated default destructor is ok private: // we forbid copying and passing by value // by declaring the copy constructor and assignment operator // private, but never defining them fd_grid(const fd_grid& rhs); fd_grid& operator=(const fd_grid& rhs); }; //****************************************************************************** } // namespace AHFinderDirect