aboutsummaryrefslogtreecommitdiff
path: root/src/nuc_eos_cxx/helpers.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/nuc_eos_cxx/helpers.hh')
-rw-r--r--src/nuc_eos_cxx/helpers.hh101
1 files changed, 49 insertions, 52 deletions
diff --git a/src/nuc_eos_cxx/helpers.hh b/src/nuc_eos_cxx/helpers.hh
index f8537b4..c68ce7d 100644
--- a/src/nuc_eos_cxx/helpers.hh
+++ b/src/nuc_eos_cxx/helpers.hh
@@ -18,24 +18,24 @@ int checkbounds(const double xrho,
// 105 -- rho too high
// 106 -- rho too low
- if(xrho > eos_rhomax) {
+ if(CCTK_BUILTIN_EXPECT(xrho > eos_rhomax, false)) {
return 105;
}
- if(xrho < eos_rhomin) {
+ if(CCTK_BUILTIN_EXPECT(xrho < eos_rhomin, false)) {
return 106;
}
- if(xye > eos_yemax) {
+ if(CCTK_BUILTIN_EXPECT(xye > eos_yemax, false)) {
return 101;
}
- if(xye < eos_yemin) {
+ if(CCTK_BUILTIN_EXPECT(xye < eos_yemin, false)) {
// this is probably not pure and should be removed
fprintf(stderr,"xye: %15.6E eos_yemin: %15.6E\n",xye,eos_yemin);
return 102;
}
- if(xtemp > eos_tempmax) {
+ if(CCTK_BUILTIN_EXPECT(xtemp > eos_tempmax, false)) {
return 103;
}
- if(xtemp < eos_tempmin) {
+ if(CCTK_BUILTIN_EXPECT(xtemp < eos_tempmin, false)) {
return 104;
}
return 0;
@@ -53,16 +53,16 @@ int checkbounds_kt0_noTcheck(const double xrho,
// 105 -- rho too high
// 106 -- rho too low
- if(xrho > eos_rhomax) {
+ if(CCTK_BUILTIN_EXPECT(xrho > eos_rhomax, false)) {
return 105;
}
- if(xrho < eos_rhomin) {
+ if(CCTK_BUILTIN_EXPECT(xrho < eos_rhomin, false)) {
return 106;
}
- if(xye > eos_yemax) {
+ if(CCTK_BUILTIN_EXPECT(xye > eos_yemax, false)) {
return 101;
}
- if(xye < eos_yemin) {
+ if(CCTK_BUILTIN_EXPECT(xye < eos_yemin, false)) {
return 102;
}
return 0;
@@ -331,10 +331,12 @@ double linterp2D(const double *restrict xs,
// then interpolate in y
// assume rectangular grid
- double t1 = (fs[1]-fs[0])/(xs[1]-xs[0]) * (x - xs[0]) + fs[0];
- double t2 = (fs[3]-fs[2])/(xs[1]-xs[0]) * (x - xs[0]) + fs[2];
+ double dxi = 1./(xs[1]-xs[0]);
+ double dyi = 1./(ys[1]-ys[0]); // x*1./y uses faster instructions than x/y
+ double t1 = (fs[1]-fs[0])*dxi * (x - xs[0]) + fs[0];
+ double t2 = (fs[3]-fs[2])*dxi * (x - xs[0]) + fs[2];
- return (t2 - t1)/(ys[1]-ys[0]) * (y-ys[0]) + t1;
+ return (t2 - t1)*dyi * (y-ys[0]) + t1;
}
static inline __attribute__((always_inline))
@@ -359,6 +361,8 @@ void bisection(const double lr,
const double dltp = log(1.2);
const double dltm = log(0.8);
+ const double leps0_prec = leps0*prec;
+
// temporary local vars
double lt, lt1, lt2;
double ltmin = logtemp[0];
@@ -404,7 +408,7 @@ void bisection(const double lr,
#endif
bcount++;
- if(bcount >= maxbcount) {
+ if(CCTK_BUILTIN_EXPECT(bcount >= maxbcount, false)) {
*keyerrt = 667;
return;
}
@@ -432,22 +436,20 @@ void bisection(const double lr,
nuc_eos_C_linterp_one(idx,delx,dely,delz,&f2a,iv);
fmid=f2a-leps0;
- if(fmid <= 0.0) lt=ltmid;
+ if(CCTK_BUILTIN_EXPECT(fmid <= 0.0, false)) lt=ltmid;
#if DEBUG
fprintf(stderr,"bisection step 2 it %d, fmid: %15.6E f2a: %15.6E lt: %15.6E ltmid: %15.6E dlt: %15.6E\n",
it,fmid,f2a,lt,ltmid,dlt);
#endif
- if(fabs(1.0-f2a/leps0) <= prec) {
+ if(CCTK_BUILTIN_EXPECT(fabs(leps0-f2a) <= leps0_prec, false)) {
*ltout = ltmid;
return;
}
} // for it = 0
- if(it >= itmax-1) {
- *keyerrt = 667;
- return;
- }
+ *keyerrt = 667;
+ return;
} // bisection
@@ -465,7 +467,7 @@ void nuc_eos_findtemp(const double lr,
// local variables
const int itmax = 200; // use at most 10 iterations, then go to bisection
- double dlepsdlt; // derivative dlogeps/dlogT
+ double dlepsdlti; // 1 / derivative dlogeps/dlogT
double ldt;
double leps,leps0; // temp vars for eps
double ltn, lt; // temp vars for temperature
@@ -489,6 +491,7 @@ void nuc_eos_findtemp(const double lr,
fprintf(stderr,"it: %d t: %15.6E leps: %15.6E eps0: %15.6E del: %15.6E\n",
it,lt, leps,leps0,fabs(leps-leps0)/(fabs(leps0)));
#endif
+ // TODO: profile this to see which outcome is more likely
if(fabs(leps-leps0) < prec*fabs(leps0)) {
*ltout = lt0;
return;
@@ -548,8 +551,7 @@ void nuc_eos_findtemp(const double lr,
// Check if we are already bracketing the input internal
// energy. If so, interpolate for new T.
- if( (leps0 >= epst1 && leps0 <= epst2)
- || (leps0 <= epst1 && leps0 >=epst2)) {
+ if(CCTK_BUILTIN_EXPECT((leps0 - epst1) * (leps0 - epst2) <= 0., false)) {
*ltout = (logtemp[itemp]-logtemp[itemp-1]) / (epst2 - epst1) *
(leps0 - epst1) + logtemp[itemp-1];
@@ -559,8 +561,8 @@ void nuc_eos_findtemp(const double lr,
// well, then do a Newton-Raphson step
// first, guess the derivative
- dlepsdlt = (epst2-epst1)/(logtemp[itemp]-logtemp[itemp-1]);
- ldt = -(leps - leps0) / dlepsdlt * fac;
+ dlepsdlti = (logtemp[itemp]-logtemp[itemp-1])/(epst2-epst1);
+ ldt = -(leps - leps0) * dlepsdlti * fac;
ltn = MIN(MAX(lt + ldt,ltmin),ltmax);
lt = ltn;
@@ -578,7 +580,7 @@ void nuc_eos_findtemp(const double lr,
if(oerr < err) fac *= 0.9;
oerr = err;
- if(err < prec*fabs(leps0)) {
+ if(CCTK_BUILTIN_EXPECT(err < prec*fabs(leps0), false)) {
*ltout = lt;
return;
}
@@ -587,20 +589,18 @@ void nuc_eos_findtemp(const double lr,
} // while(it < itmax)
- if(it >= itmax - 1) {
- // try bisection
- // bisection(lr, lt0, ye, leps0, ltout, 1, prec, keyerrt);
+ // try bisection
+ // bisection(lr, lt0, ye, leps0, ltout, 1, prec, keyerrt);
#if DEBUG
- fprintf(stderr, "Failed to converge. This is bad. Trying bisection!\n");
+ fprintf(stderr, "Failed to converge. This is bad. Trying bisection!\n");
#endif
- bisection(lr,lt0,ye,leps0,prec,ltout,1,keyerrt);
+ bisection(lr,lt0,ye,leps0,prec,ltout,1,keyerrt);
#if DEBUG
- if(*keyerrt==667) {
- fprintf(stderr,"This is worse. Bisection failed!\n");
- abort();
- }
+ if(*keyerrt==667) {
+ fprintf(stderr,"This is worse. Bisection failed!\n");
+ abort();
+ }
#endif
- }
return;
@@ -620,7 +620,7 @@ void nuc_eos_findtemp(const double lr,
// local variables
const int itmax = 200; // use at most 10 iterations, then go to bisection
- double dentdlt; // derivative dentropy/dlogT
+ double dentdlti; // 1 / derivative dentropy/dlogT
double ldt;
double ent,ent0; // temp vars for eps
double ltn, lt; // temp vars for temperature
@@ -644,7 +644,7 @@ void nuc_eos_findtemp(const double lr,
fprintf(stderr,"it: %d t: %15.6E leps: %15.6E eps0: %15.6E del: %15.6E\n",
it,lt,ent,ent0,fabs(ent-ent0)/(fabs(ent0)));
#endif
- if(fabs(ent-ent0) < prec*fabs(ent0)) {
+ if(CCTK_BUILTIN_EXPECT(fabs(ent-ent0) < prec*fabs(ent0), false)) {
*ltout = lt0;
return;
}
@@ -703,8 +703,7 @@ void nuc_eos_findtemp(const double lr,
// Check if we are already bracketing the input internal
// energy. If so, interpolate for new T.
- if( (ent0 >= ent1 && ent0 <= ent2)
- || (ent0 <= ent1 && ent0 >= ent2)) {
+ if(CCTK_BUILTIN_EXPECT((ent0 - ent1) * (ent0 - ent2) <= 0., false)) {
*ltout = (logtemp[itemp]-logtemp[itemp-1]) / (ent2 - ent1) *
(ent0 - ent1) + logtemp[itemp-1];
@@ -714,8 +713,8 @@ void nuc_eos_findtemp(const double lr,
// well, then do a Newton-Raphson step
// first, guess the derivative
- dentdlt = (ent2-ent1)/(logtemp[itemp]-logtemp[itemp-1]);
- ldt = -(ent - ent0) / dentdlt * fac;
+ dentdlti = (logtemp[itemp]-logtemp[itemp-1])/(ent2-ent1);
+ ldt = -(ent - ent0) * dentdlti * fac;
ltn = MIN(MAX(lt + ldt,ltmin),ltmax);
lt = ltn;
@@ -733,7 +732,7 @@ void nuc_eos_findtemp(const double lr,
if(oerr < err) fac *= 0.9;
oerr = err;
- if(err < prec*fabs(ent0)) {
+ if(CCTK_BUILTIN_EXPECT(err < prec*fabs(ent0), false)) {
*ltout = lt;
return;
}
@@ -742,19 +741,17 @@ void nuc_eos_findtemp(const double lr,
} // while(it < itmax)
- if(it >= itmax - 1) {
- // try bisection
+ // try bisection
#if DEBUG
- fprintf(stderr, "Failed to converge. This is bad. Trying bisection!\n");
+ fprintf(stderr, "Failed to converge. This is bad. Trying bisection!\n");
#endif
- bisection(lr,lt0,ye,ent0,prec,ltout,2,keyerrt);
+ bisection(lr,lt0,ye,ent0,prec,ltout,2,keyerrt);
#if DEBUG
- if(*keyerrt==667) {
- fprintf(stderr,"This is worse. Bisection failed!\n");
- abort();
- }
+ if(*keyerrt==667) {
+ fprintf(stderr,"This is worse. Bisection failed!\n");
+ abort();
+ }
#endif
- }
return;