src/patch/fd_grid.hh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391

// fd_grid.hh -- grid with finite differencing operations
// $Id$
//
// *** Implementation Notes -- Overview ***
// *** Implementation Notes -- Techniques using C++ Templates ***
// *** Implementation Notes -- Techniques using the C/C++ Preprocessor ***
// *** Implementation Notes -- Run-Time Choice of Molecules ***
// *** finite difference molecules ***
// ***** fd_grid - grid with finite differencing operations *****
//

//
// prerequisites:
//    <stdio.h>
//    <assert.h>
//    <math.h>		// for M_PI (used by degree/radian conversions)
//    "jt/util++.hh"	// jtutil:: stuff:
//			//    how_many_in_range(),
//			//    degrees_of_radians(), radians_of_degrees(),
//    "jt/array.hh"
//    "jt/linear_map.hh"
//    fp.hh
//    coords.hh
//    grid.hh
//

//******************************************************************************

//
// *** Implementation Notes -- Overview ***
//

//
// The key design problem for our finite differencing is how to
// implement an entire family of 5(9) finite difference operations in
// 2D(3D)
//
//	partial_rho		partial_sigma
//	partial_{rho,rho}	partial_{rho,sigma}
//				partial_{sigma,sigma}
//
//	partial_x		partial_y		partial_z
//	partial_xx		partial_xy		partial_xz
//				partial_yy		partial_yz
//							partial_zz
//
// without having to write out the finite differencing molecules multiple
// times, and while still preserving maximum inline-function efficiency.
// In particular, mixed 2nd-order derivative operations like partial_xy
// should be automatically composed from the two individual 1st derivative
// operations (partial_x and partial_y).
//

//
// Our basic approach is to define each finite difference molecule in
// a generic 1-dimensional form using an abstract "data(m)" interface.
// Here we use the terminology that a finite difference molecule is
// defined as
//	out[k] = sum(m) c[m] * in[k+m]
// where c[] is the vector/matrix of molecule coefficients, and m is
// the (integer) relative grid coordinate within a molecule.
//
// That is, for example, we define the usual 2nd order centered 1st
// derivative operator as
//	diff = 0.5*inv_delta_x*(data(+1) - data(-1))
// leaving unspecified just what the date source is.  We then use this
// with an appropriate data source (indexing along that gridfn array axis)
// for each directional derivative operation, and we compose two of
// these, using the first along x as the data source for the second
// along y, for the mixed 2nd-order derivative operation.
//

//******************************************************************************

//
// *** Implementation Notes -- Techniques using C++ Templates ***
// 

//
// There are two plausible ways to use C++ templates
//	[C++ templates are described in detail in chapter 13 of
//	Stroustrup's "The C++ Programming Language" (3rd Edition),
//	hereinafter "C++PL", and chapter 15 of Stroustrup's
//	"The Design and Evolution of C++", hereinafter "D&EC++".]
// to write the sort of generic-at-compile-time code we want:
// - Template specializations for each axis, as discussed in D&EC++
//   section 15.10.3.
// - Overloaded functions for each axis, with an argument type
//   (possibly that of an extra unused argument) selecting the
//   appropriate axis and hence the appropriate function.  This
//   technique is discussed in D&EC++ section 15.6.3.1.
//
// Quoting from D&EC++ (section 15.6.3.1),
//
//	The fundamental observation is that every property
//	of a type or an algorithm can be represented by a
//	type (possibly defined specificaly to do exactly
//	that).  That done, such a type can be used to guide
//	the overload resolution to select a function that
//	depends on the desired property.  [...]
//
//	Please note that thanks to inlining this resolution
//	is done at compile-time, so the appropriate [...]
//	function will be called directly without any run-time
//	overhead.
//
// Quoting from C++PL3 (section 13.4),
//
//	Passing [...] operations as a template parameter has two
//	significant benefits compared to alternatives such as
//	passing pointers to functions.  Several operations can
//	be passed as a single argument with no run-time cost.
//	In addition, the [...] operators [passed this way] are
//	trivial to inline, whereas inlininkg a call through a
//	pointer to function requires exceptional attention from
//	a compiler.
//

//
// In my opinion the template-specialization design is cleaner, and it
// clearly has no run-time cost (whereas the overloaded-function design
// may have a run-time cost for constructing and passing unused objects),
// so we use it here.
//
// There are, however, two (non-fatal) problema with this approach:
// - Unfortunately, it appears C++ (or at least gcc 2.95.1) forbids
//   template specialization within a class, so some of the functions
//   which whould logically be class members, must instead be defined
//   outside any class.  We use the namespace  fd_stuff::  to hide
//   these from the outside world.
// - C++PL3, section C.13.3, states that
//	Only class templates can be template arguments.
//   so we have to use dummy classes around some of our template
//   functions.  To avoid extra constructor/destructor overhead, we
//   make these template functions static.
//

//******************************************************************************

//
// *** Implementation Notes -- Techniques using the C/C++ Preprocessor ***
//

//
// The fundamental problem with the template approaches is portability:
// Although the C++ standard describes powerful template facilities, not
// all C++ compilers yet fully support these.  As an alternative, we can
// use the C/C++ preprocessor.  This is ugly and dangerous (global names!),
// but with care, can provide the same finite differencing functionality
// and efficiency as the template-based approaches.
//
// Because of its greater portability, we use the preprocessor-based
// approach here.
//

//******************************************************************************


//
// *** Implementation Notes -- Run-Time Choice of Molecules ***
//
// *If* we want to allow the finite differencing scheme to be changed
// at run-time (e.g. from a parameter file), there are two plausible
// ways to do this:
// - Using  switch(molecule_type) , as is standard in C.  This is
//   simple, and for this particular application quite well-structured
//   and maintainable (there are only a few different molecule types,
//   all centralized in this file).
// - Using virtual functions, with  molecule  a virtual base class
//   and individual molecules derived from it.  This is elegant, but
//   may have some performance problems (below).  It also requires some
//   sort of switch-based "object factory" to interface with with the
//   molecule-choice parameters.
//
// The typical use of these functions will be from within a loop over
// a whole grid.  In both cases we can expect excellent accuracy from
// modern hardware branch prediction (and thus minimal performance loss
// from the branching).  It's reasonable to expect a compiler to fully
// inline the switch-based code, exposing all the gridfn array subscriptings
// to strength reduction etc, but this is much trickier for the
// virtual-function--based code.
//
// For this reason, the switch-based design seems superior.
// However, for the present, we don't even implement this -- we fix
// the finite differencing scheme at compile time.
//

//******************************************************************************

//
// *** finite difference molecules ***
//

//**************************************

//
// define the actual molecules
//

//
// these  all take the following arguments:
// inv_delta_x_ = inverse of grid spacing in the finite differencing
//		  direction
// data_= a data-fetching function or macro: data_(gfn, irho, isigma)
//	  is the data to be finite differenced
// irho_plus_m_ = a function or macro: irho_plus_m_(irho,m) returns the
//		  rho coordinate to be passed to data_() for the [m]
//		  molecule coefficient
// isigma_plus_m_ = same thing, for the sigma coordinate
//
// n.b. We grab the variables gfn, irho, and isigma from the calling
//      environment, and we define assorted local variables as needed!
//

// 2nd order
#define FD_GRID__DX_ORDER2(inv_delta_x_, data_,				\
			   irho_plus_m_, isigma_plus_m_)		\
	const fp data_p1 = data_(gfn,   irho_plus_m_(irho  ,+1),	\
				      isigma_plus_m_(isigma,+1));	\
	const fp data_m1 = data_(gfn,   irho_plus_m_(irho  ,-1),	\
				      isigma_plus_m_(isigma,-1));	\
	const fp sum = 0.5 * (data_p1 - data_m1);			\
	return inv_delta_x_ * sum;					\
								/* end macro */
#define FD_GRID__DXX_ORDER2(inv_delta_x_, data_,			\
			    irho_plus_m_, isigma_plus_m_)		\
	const fp data_p1 = data_(gfn,   irho_plus_m_(irho  ,+1),	\
				      isigma_plus_m_(isigma,+1));	\
	const fp data_0  = data_(gfn,   irho_plus_m_(irho  , 0),	\
				      isigma_plus_m_(isigma, 0));	\
	const fp data_m1 = data_(gfn,   irho_plus_m_(irho  ,-1),	\
				      isigma_plus_m_(isigma,-1));	\
	const fp sum = data_m1 - 2.0*data_0 + data_p1;			\
	return jtutil::pow2(inv_delta_x_) * sum;			\
								/* end macro */

// 4th order
#define FD_GRID__DX_ORDER4(inv_delta_x_, data_,				\
			   irho_plus_m_, isigma_plus_m_)		\
	const fp data_p2 = data_(gfn,   irho_plus_m_(irho  ,+2),	\
				      isigma_plus_m_(isigma,+2));	\
	const fp data_p1 = data_(gfn,   irho_plus_m_(irho  ,+1),	\
				      isigma_plus_m_(isigma,+1));	\
	const fp data_m1 = data_(gfn,   irho_plus_m_(irho  ,-1),	\
				      isigma_plus_m_(isigma,-1));	\
	const fp data_m2 = data_(gfn,   irho_plus_m_(irho  ,-2),	\
				      isigma_plus_m_(isigma,-2));	\
	const fp sum = + (8.0/12.0) * (data_p1 - data_m1)		\
		       + (1.0/12.0) * (data_m2 - data_p2);		\
	return inv_delta_x_ * sum;					\
								/* end macro */
#define FD_GRID__DXX_ORDER4(inv_delta_x_, data_,			\
			    irho_plus_m_, isigma_plus_m_)		\
	const fp data_p2 = data_(gfn,   irho_plus_m_(irho  ,+2),	\
				      isigma_plus_m_(isigma,+2));	\
	const fp data_p1 = data_(gfn,   irho_plus_m_(irho  ,+1),	\
				      isigma_plus_m_(isigma,+1));	\
	const fp data_0  = data_(gfn,   irho_plus_m_(irho  , 0),	\
				      isigma_plus_m_(isigma, 0));	\
	const fp data_m1 = data_(gfn,   irho_plus_m_(irho  ,-1),	\
				      isigma_plus_m_(isigma,-1));	\
	const fp data_m2 = data_(gfn,   irho_plus_m_(irho  ,-2),	\
				      isigma_plus_m_(isigma,-2));	\
	const fp sum = - (30.0/12.0) * data_0				\
		       + (16.0/12.0) * (data_m1 + data_p1)		\
		       - ( 1.0/12.0) * (data_m2 + data_p2);		\
	return jtutil::pow2(inv_delta_x_) * sum;			\
								/* end macro */

//**************************************

//
// choose finite differencing order via preprocessor symbol FINITE_DIFF_ORDER
//
#ifndef FINITE_DIFF_ORDER
  #error "must define FINITE_DIFF_ORDER!"
#endif

#if   (FINITE_DIFF_ORDER == 2)
  #define FD_GRID__DX	FD_GRID__DX_ORDER2
  #define FD_GRID__DXX	FD_GRID__DXX_ORDER2
#elif (FINITE_DIFF_ORDER == 4)
  #define FD_GRID__DX	FD_GRID__DX_ORDER4
  #define FD_GRID__DXX	FD_GRID__DXX_ORDER4
#else
  #error "unknown value " FINITE_DIFF_ORDER " for FINITE_DIFF_ORDER!"
#endif

//******************************************************************************

//
// ***** fd_grid - grid with finite differencing operations *****
//
// An  fd_grid  is identical to a  grid  except that it also defines
// (rho,sigma)-coordinate finite differencing operations on gridfns.
//

class	fd_grid
	: public grid
	{
private:
	//
	// helper functions to compute (irho,isigma) + [m]
	// along each axis
	//
	static
	  int     rho_axis__irho_plus_m(int irho  , int m) { return irho  +m; }
	static
	  int   rho_axis__isigma_plus_m(int isigma, int m) { return isigma  ; }
	static
	  int   sigma_axis__irho_plus_m(int irho  , int m) { return irho    ; }
	static
	  int sigma_axis__isigma_plus_m(int isigma, int m) { return isigma+m; }


public:
	//
	// ***** user functions for finite differencing *****
	//

	// 1st derivatives
	fp partial_rho(int gfn,  int irho, int isigma)
		const
		{
		FD_GRID__DX(inverse_delta_rho(),
			    gridfn,
			    rho_axis__irho_plus_m,
			    rho_axis__isigma_plus_m);
		}
	fp partial_sigma(int gfn,  int irho, int isigma)
		const
		{
		FD_GRID__DX(inverse_delta_sigma(),
			    gridfn,
			    sigma_axis__irho_plus_m,
			    sigma_axis__isigma_plus_m);
		}

	// mixed 2nd partial derivative
	fp partial_rho_sigma(int gfn,   int irho, int isigma)
		const
		{
		FD_GRID__DX(inverse_delta_rho(),
			    partial_sigma,
			    rho_axis__irho_plus_m,
			    rho_axis__isigma_plus_m);
		}

	// "pure" 2nd derivatives
	fp partial_rho_rho(int gfn,  int irho, int isigma)
		const
		{
		FD_GRID__DXX(inverse_delta_rho(),
			     gridfn,
			     rho_axis__irho_plus_m,
			     rho_axis__isigma_plus_m);
		}
	fp partial_sigma_sigma(int gfn,  int irho, int isigma)
		const
		{
		FD_GRID__DXX(inverse_delta_sigma(),
			     gridfn,
			     sigma_axis__irho_plus_m,
			     sigma_axis__isigma_plus_m);
		}


public:
	//
	// ***** constructor, destructor *****
	//

	// constructor: pass through to grid:: constructor
	fd_grid(const grid_arrays::array_pars& grid_array_pars_in,
		const grid::grid_pars& grid_pars_in,
		int N_gridfns_in,
		bool verbose_flag = false)
		: grid(grid_array_pars_in,
		       grid_pars_in,
		       N_gridfns_in,
		       verbose_flag)
		{ }
	// compiler-generated default destructor is ok

private:
        // we forbid copying and passing by value
        // by declaring the copy constructor and assignment operator
        // private, but never defining them
	fd_grid(const fd_grid& rhs);
	fd_grid& operator=(const fd_grid& rhs);
	};