src/patch/fd_grid.hh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513

// fd_grid.hh -- grid with finite differencing operations
// $Header$
//
// *** Implementation Notes -- Overview ***
// *** Implementation Notes -- Techniques using C++ Templates ***
// *** Implementation Notes -- Techniques using the C/C++ Preprocessor ***
// *** Implementation Notes -- Run-Time Choice of Molecules ***
// *** finite difference molecules ***
// ***** fd_grid - grid with finite differencing operations *****
//

//
// prerequisites:
//    <stdio.h>
//    <assert.h>
//    <math.h>
//    "stdc.h"
//    "config.hh"
//    "../jtutil/util.hh"
//    "../jtutil/array.hh"
//    "../jtutil/linear_map.hh"
//    "coords.hh"
//    "grid.hh"
//

// everything in this file is inside this namespace
namespace AHFinderDirect
	  {

//******************************************************************************

//
// *** Implementation Notes -- Overview ***
//

//
// The key design problem for our finite differencing is how to
// implement an entire family of 5(9) finite difference operations in
// 2D(3D)
//
//	partial_rho		partial_sigma
//	partial_{rho,rho}	partial_{rho,sigma}
//				partial_{sigma,sigma}
//
//	partial_x		partial_y		partial_z
//	partial_xx		partial_xy		partial_xz
//				partial_yy		partial_yz
//							partial_zz
//
// without having to write out the finite differencing molecules multiple
// times, and while still preserving maximum inline-function efficiency.
// In particular, mixed 2nd-order derivative operations like partial_xy
// should be automatically composed from the two individual 1st derivative
// operations (partial_x and partial_y).
//

//
// Our basic approach is to define each finite difference molecule in
// a generic 1-dimensional form using an abstract "data(m)" interface.
// Here we use the terminology that a finite difference molecule is
// defined as
//	out[k] = sum(m) c[m] * in[k+m]
// where c[] is the vector/matrix of molecule coefficients, and m is
// the (integer) relative grid coordinate within a molecule.
//
// That is, for example, we define the usual 2nd order centered 1st
// derivative operator as
//	diff = 0.5*inv_delta_x*(data(+1) - data(-1))
// leaving unspecified just what the data source is.  We then use this
// with an appropriate data source (indexing along that gridfn array axis)
// for each directional derivative operation, and we compose two of
// these, using the first along x as the data source for the second
// along y, for the mixed 2nd-order derivative operation.
//

//******************************************************************************

//
// *** Implementation Notes -- Techniques using C++ Templates ***
// 

//
// There are two plausible ways to use C++ templates
//	[C++ templates are described in detail in chapter 13 of
//	Stroustrup's "The C++ Programming Language" (3rd Edition),
//	hereinafter "C++PL", and chapter 15 of Stroustrup's
//	"The Design and Evolution of C++", hereinafter "D&EC++".]
// to write the sort of generic-at-compile-time code we want:
// - Template specializations for each axis, as discussed in D&EC++
//   section 15.10.3.
// - Overloaded functions for each axis, with an argument type
//   (possibly that of an extra unused argument) selecting the
//   appropriate axis and hence the appropriate function.  This
//   technique is discussed in D&EC++ section 15.6.3.1.
//
// Quoting from D&EC++ (section 15.6.3.1),
//
//	The fundamental observation is that every property
//	of a type or an algorithm can be represented by a
//	type (possibly defined specificaly to do exactly
//	that).  That done, such a type can be used to guide
//	the overload resolution to select a function that
//	depends on the desired property.  [...]
//
//	Please note that thanks to inlining this resolution
//	is done at compile-time, so the appropriate [...]
//	function will be called directly without any run-time
//	overhead.
//
// Quoting from C++PL3 (section 13.4),
//
//	Passing [...] operations as a template parameter has two
//	significant benefits compared to alternatives such as
//	passing pointers to functions.  Several operations can
//	be passed as a single argument with no run-time cost.
//	In addition, the [...] operators [passed this way] are
//	trivial to inline, whereas inlininkg a call through a
//	pointer to function requires exceptional attention from
//	a compiler.
//

//
// In my opinion the template-specialization design is cleaner, and it
// clearly has no run-time cost (whereas the overloaded-function design
// may have a run-time cost for constructing and passing unused objects),
// so we use it here.
//
// There are, however, two (non-fatal) problema with this approach:
// - Unfortunately, it appears C++ (or at least gcc 2.95.1) forbids
//   template specialization within a class, so some of the functions
//   which whould logically be class members, must instead be defined
//   outside any class.  We use the namespace  fd_stuff::  to hide
//   these from the outside world.
// - C++PL3, section C.13.3, states that
//	Only class templates can be template arguments.
//   so we have to use dummy classes around some of our template
//   functions.  To avoid extra constructor/destructor overhead, we
//   make these template functions static.
//

//******************************************************************************

//
// *** Implementation Notes -- Techniques using the C/C++ Preprocessor ***
//

//
// The fundamental problem with the template approaches is portability:
// Although the C++ standard describes powerful template facilities, not
// all C++ compilers yet fully support these.  As an alternative, we can
// use the C/C++ preprocessor.  This is ugly and dangerous (global names!),
// but is probably simpler than any of the template approaches.  It can
// provide the same finite differencing functionality and efficiency as
// the template-based approaches.
//
// Because of its greater portability, we use the preprocessor-based
// approach here.
//

//******************************************************************************


//
// *** Implementation Notes -- Run-Time Choice of Molecules ***
//
// *If* we want to allow the finite differencing scheme to be changed
// at run-time (e.g. from a parameter file), there are three plausible
// ways to do this:
// - Using  switch(molecule_type) , as is standard in C.  This is
//   simple, and for this particular application quite well-structured
//   and maintainable (there are only a few different molecule types,
//   all centralized in this file).
// - Using virtual functions, with  molecule  a virtual base class
//   and individual molecules derived from it.  This is elegant, but
//   may have some performance problems (below).  It also requires some
//   sort of switch-based "object factory" to interface with with the
//   molecule-choice parameters.
// - Write all the finite differencing code multiple times, once for
//   each finite differencing scheme.
//
// The typical use of these functions will be from within a loop over
// a whole grid.  In both cases we can expect excellent accuracy from
// modern hardware branch prediction (and thus minimal performance loss
// from the branching).  It's reasonable to expect a compiler to fully
// inline the switch-based code, exposing all the gridfn array subscriptings
// to strength reduction etc, but this is much trickier for the
// virtual-function--based code.  For this reason, the switch-based
// design seems superior to the virtual-function--based one.
//
// However, at present we don't implement any run-time selection: we
// "just" fix the finite differencing scheme at compile time via the
// preprocessor.
//

//******************************************************************************

//
// *** finite difference molecules ***
//

//**************************************

//
// define the actual molecules
//
// In the following macros, we first define all the distinct floating-
// -point numbers appearing in a molecules as "K" constants (all > 0),
// then define the actual derivative and its molecule coefficients
// using +/- the "K" constants, with multiplies by 1.0 elided and 0
// terms skipped in computing the derivative.  This (hopefully) gives
// maximum efficiency by avoiding the generated code loading the same
// constants multiple times.
// 

//
// The molecule macros all take the following arguments:
// inv_delta_x_ = inverse of grid spacing in the finite differencing
//		  direction
// data_= a data-fetching function or macro: data_(ghosted_gfn, irho, isigma)
//	  is the data to be finite differenced
// irho_plus_m_ = a function or macro: irho_plus_m_(irho,m) returns the
//		  rho coordinate to be passed to data_() for the [m]
//		  molecule coefficient
// isigma_plus_m_ = same thing, for the sigma coordinate
//
// n.b. We grab the variables ghosted_gfn, irho, and isigma from the calling
//      environment, and we define assorted local variables as needed!
//

//**************************************

//
// 2nd order
//

#define FD_GRID__ORDER2__MOL_RADIUS	1
#define FD_GRID__ORDER2__MOL_DIAMETER	3

#define FD_GRID__ORDER2__DX__KPM1	0.5
#define FD_GRID__ORDER2__DX(inv_delta_x_, data_,			\
			    irho_plus_m_, isigma_plus_m_)		\
	const fp data_p1 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,+1),		\
				 isigma_plus_m_(isigma,+1));		\
	const fp data_m1 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,-1),		\
				 isigma_plus_m_(isigma,-1));		\
	const fp sum = FD_GRID__ORDER2__DX__KPM1 * (data_p1 - data_m1);	\
	return inv_delta_x_ * sum;				/* end macro */
#define FD_GRID__ORDER2__DX__COEFF_M1	(-FD_GRID__ORDER2__DX__KPM1)
#define FD_GRID__ORDER2__DX__COEFF_0	0.0
#define FD_GRID__ORDER2__DX__COEFF_P1	(+FD_GRID__ORDER2__DX__KPM1)

#define FD_GRID__ORDER2__DXX__K0		2.0
#define FD_GRID__ORDER2__DXX(inv_delta_x_, data_,			\
			     irho_plus_m_, isigma_plus_m_)		\
	const fp data_p1 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,+1),		\
				 isigma_plus_m_(isigma,+1));		\
	const fp data_0  = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  , 0),		\
				 isigma_plus_m_(isigma, 0));		\
	const fp data_m1 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,-1),		\
				 isigma_plus_m_(isigma,-1));		\
	const fp sum							\
		= data_m1 - FD_GRID__ORDER2__DXX__K0*data_0 + data_p1;	\
	return jtutil::pow2(inv_delta_x_) * sum;		/* end macro */
#define FD_GRID__ORDER2__DXX__COEFF_M1	1.0
#define FD_GRID__ORDER2__DXX__COEFF_0	(-FD_GRID__ORDER2__DXX__K0)
#define FD_GRID__ORDER2__DXX__COEFF_P1	1.0

//**************************************

//
// 4th order
//

#define FD_GRID__ORDER4__MOL_RADIUS	2
#define FD_GRID__ORDER4__MOL_DIAMETER	5

#define FD_GRID__ORDER4__DX__KPM2	(1.0/12.0)
#define FD_GRID__ORDER4__DX__KPM1	(8.0/12.0)
#define FD_GRID__ORDER4__DX(inv_delta_x_, data_,			\
			    irho_plus_m_, isigma_plus_m_)		\
	const fp data_p2 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,+2),		\
				 isigma_plus_m_(isigma,+2));		\
	const fp data_p1 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,+1),		\
				 isigma_plus_m_(isigma,+1));		\
	const fp data_m1 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,-1),		\
				 isigma_plus_m_(isigma,-1));		\
	const fp data_m2 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,-2),		\
				 isigma_plus_m_(isigma,-2));		\
	const fp sum							\
		=   FD_GRID__ORDER4__DX__KPM1 * (data_p1 - data_m1)	\
		  + FD_GRID__ORDER4__DX__KPM2 * (data_m2 - data_p2);	\
	return inv_delta_x_ * sum;				/* end macro */
#define FD_GRID__ORDER4__DX__COEFF_M2	(+FD_GRID__ORDER4__DX__KPM2)
#define FD_GRID__ORDER4__DX__COEFF_M1	(-FD_GRID__ORDER4__DX__KPM1)
#define FD_GRID__ORDER4__DX__COEFF_0	0.0
#define FD_GRID__ORDER4__DX__COEFF_P1	(+FD_GRID__ORDER4__DX__KPM1)
#define FD_GRID__ORDER4__DX__COEFF_P2	(-FD_GRID__ORDER4__DX__KPM2)

//**************************************

#define FD_GRID__ORDER4__DXX__KPM2	( 1.0/12.0)
#define FD_GRID__ORDER4__DXX__KPM1	(16.0/12.0)
#define FD_GRID__ORDER4__DXX__K0		(30.0/12.0)
#define FD_GRID__ORDER4__DXX(inv_delta_x_, data_,			\
			     irho_plus_m_, isigma_plus_m_)		\
	const fp data_p2 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,+2),		\
				 isigma_plus_m_(isigma,+2));		\
	const fp data_p1 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,+1),		\
				 isigma_plus_m_(isigma,+1));		\
	const fp data_0  = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  , 0),		\
				 isigma_plus_m_(isigma, 0));		\
	const fp data_m1 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,-1),		\
				 isigma_plus_m_(isigma,-1));		\
	const fp data_m2 = data_(ghosted_gfn,				\
				   irho_plus_m_(irho  ,-2),		\
				 isigma_plus_m_(isigma,-2));		\
	const fp sum							\
		= - FD_GRID__ORDER4__DXX__K0 * data_0			\
		  + FD_GRID__ORDER4__DXX__KPM1 * (data_m1 + data_p1)	\
		  - FD_GRID__ORDER4__DXX__KPM2 * (data_m2 + data_p2);	\
	return jtutil::pow2(inv_delta_x_) * sum;		/* end macro */
#define FD_GRID__ORDER4__DXX__COEFF_M2	(-FD_GRID__ORDER4__DXX__KPM2)
#define FD_GRID__ORDER4__DXX__COEFF_M1	(+FD_GRID__ORDER4__DXX__KPM1)
#define FD_GRID__ORDER4__DXX__COEFF_0	(-FD_GRID__ORDER4__DXX__K0  )
#define FD_GRID__ORDER4__DXX__COEFF_P1	(+FD_GRID__ORDER4__DXX__KPM1)
#define FD_GRID__ORDER4__DXX__COEFF_P2	(-FD_GRID__ORDER4__DXX__KPM2)

//******************************************************************************

//
// choose finite differencing order via preprocessor symbol FINITE_DIFF_ORDER
//
#ifndef FINITE_DIFF_ORDER
  #error "must define FINITE_DIFF_ORDER!"
#endif

#if   (FINITE_DIFF_ORDER == 2)
  #define FD_GRID__MOL_RADIUS	FD_GRID__ORDER2__MOL_RADIUS
  #define FD_GRID__MOL_DIAMETER	FD_GRID__ORDER2__MOL_DIAMETER
  #define FD_GRID__DX		FD_GRID__ORDER2__DX
  #define FD_GRID__DXX		FD_GRID__ORDER2__DXX
#elif (FINITE_DIFF_ORDER == 4)
  #define FD_GRID__MOL_RADIUS	FD_GRID__ORDER4__MOL_RADIUS
  #define FD_GRID__MOL_DIAMETER	FD_GRID__ORDER4__MOL_DIAMETER
  #define FD_GRID__DX		FD_GRID__ORDER4__DX
  #define FD_GRID__DXX		FD_GRID__ORDER4__DXX
#else
  #error "unknown value " FINITE_DIFF_ORDER " for FINITE_DIFF_ORDER!"
#endif

#define FD_GRID__MOL_AREA	(FD_GRID__MOL_DIAMETER * FD_GRID__MOL_DIAMETER)

//******************************************************************************

//
// ***** fd_grid - grid with finite differencing operations *****
//
// An  fd_grid  is identical to a  grid  except that it also defines
// (rho,sigma)-coordinate finite differencing operations on gridfns.
//

class	fd_grid
	: public grid
	{
	//
	// molecule sizes
	//
public:
	// n.b. this interface implicitly assumes that all molecules
	//      are centered and are the same order and size
	static
	  int finite_diff_order() { return FINITE_DIFF_ORDER; }
	static
	  int molecule_radius()   { return FD_GRID__MOL_RADIUS; }
	static
	  int molecule_diameter() { return FD_GRID__MOL_DIAMETER; }
	static
	  int molecule_min_m() { return -FD_GRID__MOL_RADIUS; }
	static
	  int molecule_max_m() { return  FD_GRID__MOL_RADIUS; }

	//
	// helper functions to compute (irho,isigma) + [m]
	// along each axis
	//
private:
	static
	  int     rho_axis__irho_plus_m(int irho  , int m) { return irho  +m; }
	static
	  int   rho_axis__isigma_plus_m(int isigma, int m) { return isigma  ; }
	static
	  int   sigma_axis__irho_plus_m(int irho  , int m) { return irho    ; }
	static
	  int sigma_axis__isigma_plus_m(int isigma, int m) { return isigma+m; }


	//
	// ***** finite differencing *****
	//
public:

	// 1st derivatives
	fp partial_rho(int ghosted_gfn,  int irho, int isigma)
		const
		{
		FD_GRID__DX(inverse_delta_rho(),
			    ghosted_gridfn,
			    rho_axis__irho_plus_m,
			    rho_axis__isigma_plus_m);
		}
	fp partial_sigma(int ghosted_gfn,  int irho, int isigma)
		const
		{
		FD_GRID__DX(inverse_delta_sigma(),
			    ghosted_gridfn,
			    sigma_axis__irho_plus_m,
			    sigma_axis__isigma_plus_m);
		}

	// "pure" 2nd derivatives
	fp partial_rho_rho(int ghosted_gfn,  int irho, int isigma)
		const
		{
		FD_GRID__DXX(inverse_delta_rho(),
			     ghosted_gridfn,
			     rho_axis__irho_plus_m,
			     rho_axis__isigma_plus_m);
		}
	fp partial_sigma_sigma(int ghosted_gfn,  int irho, int isigma)
		const
		{
		FD_GRID__DXX(inverse_delta_sigma(),
			     ghosted_gridfn,
			     sigma_axis__irho_plus_m,
			     sigma_axis__isigma_plus_m);
		}

	// mixed 2nd partial derivative
	fp partial_rho_sigma(int ghosted_gfn,   int irho, int isigma)
		const
		{
		FD_GRID__DX(inverse_delta_rho(),
			    partial_sigma,
			    rho_axis__irho_plus_m,
			    rho_axis__isigma_plus_m);
		}


	//
	// ***** molecule coefficients *****
	//
public:
	// molecule coefficients
	// n.b. this interface implicitly assumes that all molecules
	//      are position-independent
	fp partial_rho_coeff        (int m) const
		{ return inverse_delta_rho()   * dx_coeff(m); }
	fp partial_sigma_coeff      (int m) const
		{ return inverse_delta_sigma() * dx_coeff(m); }
	fp partial_rho_rho_coeff    (int m) const
		{ return jtutil::pow2(inverse_delta_rho()) * dxx_coeff(m); }
	fp partial_sigma_sigma_coeff(int m) const
		{ return jtutil::pow2(inverse_delta_sigma()) * dxx_coeff(m); }
	fp partial_rho_sigma_coeff(int m_rho, int m_sigma) const
		{
		return partial_rho_coeff(m_rho) * partial_sigma_coeff(m_sigma);
		}

	// worker functions: molecule coefficients for unit grid spacing
private:
	static
	  fp dx_coeff(int m);
	static
	  fp dxx_coeff(int m);


	//
	// ***** constructor, destructor *****
	//
public:

	// constructor: pass through to grid:: constructor
	fd_grid(const grid_array_pars& grid_array_pars_in,
		const grid_pars& grid_pars_in)
		: grid(grid_array_pars_in, grid_pars_in)
		{ }
	// compiler-generated default destructor is ok


private:
        // we forbid copying and passing by value
        // by declaring the copy constructor and assignment operator
        // private, but never defining them
	fd_grid(const fd_grid& rhs);
	fd_grid& operator=(const fd_grid& rhs);
	};

//******************************************************************************

	  }	// namespace AHFinderDirect