diff options
author | rhaas <rhaas@8e189c6b-2ab8-4400-aa02-70a9cfce18b9> | 2014-03-13 03:01:55 +0000 |
---|---|---|
committer | rhaas <rhaas@8e189c6b-2ab8-4400-aa02-70a9cfce18b9> | 2014-03-13 03:01:55 +0000 |
commit | 830dee6e364e7ea4b47e7187e838aadce351cfce (patch) | |
tree | c7068f2e44c9db178967e4065daf144ee66e3dce | |
parent | 22a8c86befc2b993d1d67b41759d9e24ec996754 (diff) |
EOS_Omni: low-level optimizations
* use log-rules to transform base 10 logs to natural ones
faster and hopefully more accurate
* optimize away a division when computing cs2
* optimize bracketing test to (a-a1)*(a-a2)<0
* replace divisions by multiplication by inverse
* use CCTK_BUILTIN_EXPECT to indicate likely outcome
this should help branch prediction since the compiler can put the the
less likely branch into the unfavorable location.
* remove superfluous if statements
whenever we leave a itmax loop regularly we iterated for too long
* explicitly replace x/y by x*1./y
From: Roland Haas <rhaas@tapir.caltech.edu>
git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinEOS/EOS_Omni/trunk@101 8e189c6b-2ab8-4400-aa02-70a9cfce18b9
-rw-r--r-- | src/nuc_eos_cxx/helpers.hh | 101 | ||||
-rw-r--r-- | src/nuc_eos_cxx/nuc_eos_dpdrhoe_dpderho.cc | 6 | ||||
-rw-r--r-- | src/nuc_eos_cxx/nuc_eos_full.cc | 12 | ||||
-rw-r--r-- | src/nuc_eos_cxx/nuc_eos_press.cc | 8 | ||||
-rw-r--r-- | src/nuc_eos_cxx/nuc_eos_press_cs2.cc | 12 | ||||
-rw-r--r-- | src/nuc_eos_cxx/nuc_eos_short.cc | 18 | ||||
-rw-r--r-- | src/nuc_eos_cxx/readtable.cc | 12 |
7 files changed, 85 insertions, 84 deletions
diff --git a/src/nuc_eos_cxx/helpers.hh b/src/nuc_eos_cxx/helpers.hh index f8537b4..c68ce7d 100644 --- a/src/nuc_eos_cxx/helpers.hh +++ b/src/nuc_eos_cxx/helpers.hh @@ -18,24 +18,24 @@ int checkbounds(const double xrho, // 105 -- rho too high // 106 -- rho too low - if(xrho > eos_rhomax) { + if(CCTK_BUILTIN_EXPECT(xrho > eos_rhomax, false)) { return 105; } - if(xrho < eos_rhomin) { + if(CCTK_BUILTIN_EXPECT(xrho < eos_rhomin, false)) { return 106; } - if(xye > eos_yemax) { + if(CCTK_BUILTIN_EXPECT(xye > eos_yemax, false)) { return 101; } - if(xye < eos_yemin) { + if(CCTK_BUILTIN_EXPECT(xye < eos_yemin, false)) { // this is probably not pure and should be removed fprintf(stderr,"xye: %15.6E eos_yemin: %15.6E\n",xye,eos_yemin); return 102; } - if(xtemp > eos_tempmax) { + if(CCTK_BUILTIN_EXPECT(xtemp > eos_tempmax, false)) { return 103; } - if(xtemp < eos_tempmin) { + if(CCTK_BUILTIN_EXPECT(xtemp < eos_tempmin, false)) { return 104; } return 0; @@ -53,16 +53,16 @@ int checkbounds_kt0_noTcheck(const double xrho, // 105 -- rho too high // 106 -- rho too low - if(xrho > eos_rhomax) { + if(CCTK_BUILTIN_EXPECT(xrho > eos_rhomax, false)) { return 105; } - if(xrho < eos_rhomin) { + if(CCTK_BUILTIN_EXPECT(xrho < eos_rhomin, false)) { return 106; } - if(xye > eos_yemax) { + if(CCTK_BUILTIN_EXPECT(xye > eos_yemax, false)) { return 101; } - if(xye < eos_yemin) { + if(CCTK_BUILTIN_EXPECT(xye < eos_yemin, false)) { return 102; } return 0; @@ -331,10 +331,12 @@ double linterp2D(const double *restrict xs, // then interpolate in y // assume rectangular grid - double t1 = (fs[1]-fs[0])/(xs[1]-xs[0]) * (x - xs[0]) + fs[0]; - double t2 = (fs[3]-fs[2])/(xs[1]-xs[0]) * (x - xs[0]) + fs[2]; + double dxi = 1./(xs[1]-xs[0]); + double dyi = 1./(ys[1]-ys[0]); // x*1./y uses faster instructions than x/y + double t1 = (fs[1]-fs[0])*dxi * (x - xs[0]) + fs[0]; + double t2 = (fs[3]-fs[2])*dxi * (x - xs[0]) + fs[2]; - return (t2 - t1)/(ys[1]-ys[0]) * (y-ys[0]) + t1; + return (t2 - t1)*dyi * (y-ys[0]) + t1; } static inline __attribute__((always_inline)) @@ -359,6 +361,8 @@ void bisection(const double lr, const double dltp = log(1.2); const double dltm = log(0.8); + const double leps0_prec = leps0*prec; + // temporary local vars double lt, lt1, lt2; double ltmin = logtemp[0]; @@ -404,7 +408,7 @@ void bisection(const double lr, #endif bcount++; - if(bcount >= maxbcount) { + if(CCTK_BUILTIN_EXPECT(bcount >= maxbcount, false)) { *keyerrt = 667; return; } @@ -432,22 +436,20 @@ void bisection(const double lr, nuc_eos_C_linterp_one(idx,delx,dely,delz,&f2a,iv); fmid=f2a-leps0; - if(fmid <= 0.0) lt=ltmid; + if(CCTK_BUILTIN_EXPECT(fmid <= 0.0, false)) lt=ltmid; #if DEBUG fprintf(stderr,"bisection step 2 it %d, fmid: %15.6E f2a: %15.6E lt: %15.6E ltmid: %15.6E dlt: %15.6E\n", it,fmid,f2a,lt,ltmid,dlt); #endif - if(fabs(1.0-f2a/leps0) <= prec) { + if(CCTK_BUILTIN_EXPECT(fabs(leps0-f2a) <= leps0_prec, false)) { *ltout = ltmid; return; } } // for it = 0 - if(it >= itmax-1) { - *keyerrt = 667; - return; - } + *keyerrt = 667; + return; } // bisection @@ -465,7 +467,7 @@ void nuc_eos_findtemp(const double lr, // local variables const int itmax = 200; // use at most 10 iterations, then go to bisection - double dlepsdlt; // derivative dlogeps/dlogT + double dlepsdlti; // 1 / derivative dlogeps/dlogT double ldt; double leps,leps0; // temp vars for eps double ltn, lt; // temp vars for temperature @@ -489,6 +491,7 @@ void nuc_eos_findtemp(const double lr, fprintf(stderr,"it: %d t: %15.6E leps: %15.6E eps0: %15.6E del: %15.6E\n", it,lt, leps,leps0,fabs(leps-leps0)/(fabs(leps0))); #endif + // TODO: profile this to see which outcome is more likely if(fabs(leps-leps0) < prec*fabs(leps0)) { *ltout = lt0; return; @@ -548,8 +551,7 @@ void nuc_eos_findtemp(const double lr, // Check if we are already bracketing the input internal // energy. If so, interpolate for new T. - if( (leps0 >= epst1 && leps0 <= epst2) - || (leps0 <= epst1 && leps0 >=epst2)) { + if(CCTK_BUILTIN_EXPECT((leps0 - epst1) * (leps0 - epst2) <= 0., false)) { *ltout = (logtemp[itemp]-logtemp[itemp-1]) / (epst2 - epst1) * (leps0 - epst1) + logtemp[itemp-1]; @@ -559,8 +561,8 @@ void nuc_eos_findtemp(const double lr, // well, then do a Newton-Raphson step // first, guess the derivative - dlepsdlt = (epst2-epst1)/(logtemp[itemp]-logtemp[itemp-1]); - ldt = -(leps - leps0) / dlepsdlt * fac; + dlepsdlti = (logtemp[itemp]-logtemp[itemp-1])/(epst2-epst1); + ldt = -(leps - leps0) * dlepsdlti * fac; ltn = MIN(MAX(lt + ldt,ltmin),ltmax); lt = ltn; @@ -578,7 +580,7 @@ void nuc_eos_findtemp(const double lr, if(oerr < err) fac *= 0.9; oerr = err; - if(err < prec*fabs(leps0)) { + if(CCTK_BUILTIN_EXPECT(err < prec*fabs(leps0), false)) { *ltout = lt; return; } @@ -587,20 +589,18 @@ void nuc_eos_findtemp(const double lr, } // while(it < itmax) - if(it >= itmax - 1) { - // try bisection - // bisection(lr, lt0, ye, leps0, ltout, 1, prec, keyerrt); + // try bisection + // bisection(lr, lt0, ye, leps0, ltout, 1, prec, keyerrt); #if DEBUG - fprintf(stderr, "Failed to converge. This is bad. Trying bisection!\n"); + fprintf(stderr, "Failed to converge. This is bad. Trying bisection!\n"); #endif - bisection(lr,lt0,ye,leps0,prec,ltout,1,keyerrt); + bisection(lr,lt0,ye,leps0,prec,ltout,1,keyerrt); #if DEBUG - if(*keyerrt==667) { - fprintf(stderr,"This is worse. Bisection failed!\n"); - abort(); - } + if(*keyerrt==667) { + fprintf(stderr,"This is worse. Bisection failed!\n"); + abort(); + } #endif - } return; @@ -620,7 +620,7 @@ void nuc_eos_findtemp(const double lr, // local variables const int itmax = 200; // use at most 10 iterations, then go to bisection - double dentdlt; // derivative dentropy/dlogT + double dentdlti; // 1 / derivative dentropy/dlogT double ldt; double ent,ent0; // temp vars for eps double ltn, lt; // temp vars for temperature @@ -644,7 +644,7 @@ void nuc_eos_findtemp(const double lr, fprintf(stderr,"it: %d t: %15.6E leps: %15.6E eps0: %15.6E del: %15.6E\n", it,lt,ent,ent0,fabs(ent-ent0)/(fabs(ent0))); #endif - if(fabs(ent-ent0) < prec*fabs(ent0)) { + if(CCTK_BUILTIN_EXPECT(fabs(ent-ent0) < prec*fabs(ent0), false)) { *ltout = lt0; return; } @@ -703,8 +703,7 @@ void nuc_eos_findtemp(const double lr, // Check if we are already bracketing the input internal // energy. If so, interpolate for new T. - if( (ent0 >= ent1 && ent0 <= ent2) - || (ent0 <= ent1 && ent0 >= ent2)) { + if(CCTK_BUILTIN_EXPECT((ent0 - ent1) * (ent0 - ent2) <= 0., false)) { *ltout = (logtemp[itemp]-logtemp[itemp-1]) / (ent2 - ent1) * (ent0 - ent1) + logtemp[itemp-1]; @@ -714,8 +713,8 @@ void nuc_eos_findtemp(const double lr, // well, then do a Newton-Raphson step // first, guess the derivative - dentdlt = (ent2-ent1)/(logtemp[itemp]-logtemp[itemp-1]); - ldt = -(ent - ent0) / dentdlt * fac; + dentdlti = (logtemp[itemp]-logtemp[itemp-1])/(ent2-ent1); + ldt = -(ent - ent0) * dentdlti * fac; ltn = MIN(MAX(lt + ldt,ltmin),ltmax); lt = ltn; @@ -733,7 +732,7 @@ void nuc_eos_findtemp(const double lr, if(oerr < err) fac *= 0.9; oerr = err; - if(err < prec*fabs(ent0)) { + if(CCTK_BUILTIN_EXPECT(err < prec*fabs(ent0), false)) { *ltout = lt; return; } @@ -742,19 +741,17 @@ void nuc_eos_findtemp(const double lr, } // while(it < itmax) - if(it >= itmax - 1) { - // try bisection + // try bisection #if DEBUG - fprintf(stderr, "Failed to converge. This is bad. Trying bisection!\n"); + fprintf(stderr, "Failed to converge. This is bad. Trying bisection!\n"); #endif - bisection(lr,lt0,ye,ent0,prec,ltout,2,keyerrt); + bisection(lr,lt0,ye,ent0,prec,ltout,2,keyerrt); #if DEBUG - if(*keyerrt==667) { - fprintf(stderr,"This is worse. Bisection failed!\n"); - abort(); - } + if(*keyerrt==667) { + fprintf(stderr,"This is worse. Bisection failed!\n"); + abort(); + } #endif - } return; diff --git a/src/nuc_eos_cxx/nuc_eos_dpdrhoe_dpderho.cc b/src/nuc_eos_cxx/nuc_eos_dpdrhoe_dpderho.cc index 1ff15eb..c4f4c1f 100644 --- a/src/nuc_eos_cxx/nuc_eos_dpdrhoe_dpderho.cc +++ b/src/nuc_eos_cxx/nuc_eos_dpdrhoe_dpderho.cc @@ -30,7 +30,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_dpdrhoe_dpderho)(const int *restrict n_in, // Note that this code now requires that the // temperature guess be within the table bounds keyerr[i] = checkbounds_kt0_noTcheck(rho[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -45,7 +45,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_dpdrhoe_dpderho)(const int *restrict n_in, const double lt = log(MIN(MAX(temp[i],eos_tempmin),eos_tempmax)); double ltout; const double epstot = eps[i]+energy_shift; - if(epstot>0.0e0) { + if(CCTK_BUILTIN_EXPECT(epstot>0.0e0, true)) { // this is the standard scenario; eps is larger than zero // and we can operate with logarithmic tables const double lxeps = log(epstot); @@ -61,7 +61,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_dpdrhoe_dpderho)(const int *restrict n_in, keyerr[i] = 667; } // epstot > 0.0 - if(keyerr[i]) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { // now try negative temperature treatment double eps0, eps1; int idx[8]; diff --git a/src/nuc_eos_cxx/nuc_eos_full.cc b/src/nuc_eos_cxx/nuc_eos_full.cc index 57f0776..cc0d4ec 100644 --- a/src/nuc_eos_cxx/nuc_eos_full.cc +++ b/src/nuc_eos_cxx/nuc_eos_full.cc @@ -39,7 +39,7 @@ void CCTK_FNAME(nuc_eos_m_kt1_full)(const int *restrict n_in, for(int i=0;i<n;i++) { // check if we are fine keyerr[i] = checkbounds(rho[i], temp[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -148,7 +148,7 @@ void CCTK_FNAME(nuc_eos_m_kt1_full)(const int *restrict n_in, prs[i] = exp(prs[i]); eps[i] = exp(eps[i]) - energy_shift; #if HAVEGR - cs2[i] = cs2[i] / (1.0 + eps[i] + prs[i]/rho[i]); + cs2[i] = rho[i] * cs2[i] / (rho[i] + rho[i] * eps[i] + prs[i]); #endif } @@ -194,7 +194,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_full)(const int *restrict n_in, // Note that this code now requires that the // temperature guess be within the table bounds keyerr[i] = checkbounds_kt0_noTcheck(rho[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -209,7 +209,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_full)(const int *restrict n_in, const double lt = log(MIN(MAX(temp[i],eos_tempmin),eos_tempmax)); double ltout; const double epstot = eps[i]+energy_shift; - if(epstot>0.0e0) { + if(CCTK_BUILTIN_EXPECT(epstot>0.0e0, true)) { // this is the standard scenario; eps is larger than zero // and we can operate with logarithmic tables const double lxeps = log(epstot); @@ -225,7 +225,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_full)(const int *restrict n_in, } // epstot > 0.0 - if(keyerr[i]) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr=1; } else { temp[i] = exp(ltout); @@ -318,7 +318,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_full)(const int *restrict n_in, for(int i=0;i<n;i++) { prs[i] = exp(prs[i]); #if HAVEGR - cs2[i] = cs2[i] / (1.0 + eps[i] + prs[i]/rho[i]); + cs2[i] = rho[i]*cs2[i] / (rho[i] + rho[i]*eps[i] + prs[i]); #endif } diff --git a/src/nuc_eos_cxx/nuc_eos_press.cc b/src/nuc_eos_cxx/nuc_eos_press.cc index 52c3168..9a2f5a0 100644 --- a/src/nuc_eos_cxx/nuc_eos_press.cc +++ b/src/nuc_eos_cxx/nuc_eos_press.cc @@ -25,7 +25,7 @@ void CCTK_FNAME(nuc_eos_m_kt1_press_eps)(const int *restrict n_in, for(int i=0;i<n;i++) { // check if we are fine keyerr[i] = checkbounds(rho[i], temp[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -87,7 +87,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_press)(const int *restrict n_in, // Note that this code now requires that the // temperature guess be within the table bounds keyerr[i] = checkbounds_kt0_noTcheck(rho[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -103,7 +103,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_press)(const int *restrict n_in, const double lt = log(MIN(MAX(temp[i],eos_tempmin),eos_tempmax)); double ltout; const double epstot = eps[i]+energy_shift; - if(epstot>0.0e0) { + if(CCTK_BUILTIN_EXPECT(epstot>0.0e0, true)) { // this is the standard scenario; eps is larger than zero // and we can operate with logarithmic tables const double lxeps = log(epstot); @@ -119,7 +119,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_press)(const int *restrict n_in, keyerr[i] = 667; } // epstot > 0.0 - if(keyerr[i]) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { // now try negative temperature treatment double eps0, eps1; int idx[8]; diff --git a/src/nuc_eos_cxx/nuc_eos_press_cs2.cc b/src/nuc_eos_cxx/nuc_eos_press_cs2.cc index 2652ac5..35de771 100644 --- a/src/nuc_eos_cxx/nuc_eos_press_cs2.cc +++ b/src/nuc_eos_cxx/nuc_eos_press_cs2.cc @@ -26,7 +26,7 @@ void CCTK_FNAME(nuc_eos_m_kt1_press_eps_cs2)(const int *restrict n_in, for(int i=0;i<n;i++) { // check if we are fine keyerr[i] = checkbounds(rho[i], temp[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -63,7 +63,7 @@ void CCTK_FNAME(nuc_eos_m_kt1_press_eps_cs2)(const int *restrict n_in, prs[i] = exp(prs[i]); eps[i] = exp(eps[i]) - energy_shift; #if HAVEGR - cs2[i] = cs2[i] / (1.0 + eps[i] + prs[i]/rho[i]); + cs2[i] = rho[i] * cs2[i] / (rho[i] + rho[i] * eps[i] + prs[i]); #endif } @@ -96,7 +96,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_press_cs2)(const int *restrict n_in, // Note that this code now requires that the // temperature guess be within the table bounds keyerr[i] = checkbounds_kt0_noTcheck(rho[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -112,7 +112,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_press_cs2)(const int *restrict n_in, const double lt = log(MIN(MAX(temp[i],eos_tempmin),eos_tempmax)); double ltout; const double epstot = eps[i]+energy_shift; - if(epstot>0.0e0) { + if(CCTK_BUILTIN_EXPECT(epstot>0.0e0, true)) { // this is the standard scenario; eps is larger than zero // and we can operate with logarithmic tables const double lxeps = log(epstot); @@ -124,7 +124,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_press_cs2)(const int *restrict n_in, keyerr[i] = 667; } // epstot > 0.0 - if(keyerr[i]) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { // now try negative temperature treatment double eps0, eps1; int idx[8]; @@ -175,7 +175,7 @@ void CCTK_FNAME(nuc_eos_m_kt0_press_cs2)(const int *restrict n_in, for(int i=0;i<n;i++) { prs[i] = exp(prs[i]); #if HAVEGR - cs2[i] = cs2[i] / (1.0 + eps[i] + prs[i]/rho[i]); + cs2[i] = rho[i] * cs2[i] / (rho[i] + rho[i]*eps[i] + prs[i]); #endif } diff --git a/src/nuc_eos_cxx/nuc_eos_short.cc b/src/nuc_eos_cxx/nuc_eos_short.cc index 0e6347e..89444a2 100644 --- a/src/nuc_eos_cxx/nuc_eos_short.cc +++ b/src/nuc_eos_cxx/nuc_eos_short.cc @@ -31,7 +31,7 @@ void CCTK_FNAME(nuc_eos_m_kt1_short)(const int *restrict n_in, for(int i=0;i<n;i++) { // check if we are fine keyerr[i] = checkbounds(rho[i], temp[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -94,7 +94,7 @@ void CCTK_FNAME(nuc_eos_m_kt1_short)(const int *restrict n_in, prs[i] = exp(prs[i]); eps[i] = exp(eps[i]) - energy_shift; #if HAVEGR - cs2[i] = cs2[i] / (1.0 + eps[i] + prs[i]/rho[i]); + cs2[i] = rho[i] * cs2[i] / (rho[i] + rho[i]*eps[i] + prs[i]); #endif } @@ -131,7 +131,7 @@ extern "C" // Note that this code now requires that the // temperature guess be within the table bounds keyerr[i] = checkbounds_kt0_noTcheck(rho[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -146,7 +146,7 @@ extern "C" const double lt = log(MIN(MAX(temp[i],eos_tempmin),eos_tempmax)); double ltout; const double epstot = eps[i]+energy_shift; - if(epstot>0.0e0) { + if(CCTK_BUILTIN_EXPECT(epstot>0.0e0, true)) { // this is the standard scenario; eps is larger than zero // and we can operate with logarithmic tables const double lxeps = log(epstot); @@ -163,7 +163,7 @@ extern "C" } // epstot > 0.0 - if(keyerr[i]) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } else { temp[i] = exp(ltout); @@ -211,7 +211,7 @@ extern "C" for(int i=0;i<n;i++) { prs[i] = exp(prs[i]); #if HAVEGR - cs2[i] = cs2[i] / (1.0 + eps[i] + prs[i]/rho[i]); + cs2[i] = rho[i] * cs2[i] / (rho[i] + rho[i] * eps[i] + prs[i]); #endif } @@ -247,7 +247,7 @@ extern "C" // Note that this code now requires that the // temperature guess be within the table bounds keyerr[i] = checkbounds_kt0_noTcheck(rho[i], ye[i]); - if(keyerr[i] != 0) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } } @@ -264,7 +264,7 @@ extern "C" nuc_eos_findtemp_entropy(lr,lt,ye[i],ent[i],*prec, (double *restrict)(<out),&keyerr[i]); - if(keyerr[i]) { + if(CCTK_BUILTIN_EXPECT(keyerr[i] != 0, false)) { *anyerr = 1; } else { temp[i] = exp(ltout); @@ -313,7 +313,7 @@ extern "C" prs[i] = exp(prs[i]); eps[i] = exp(eps[i]) - energy_shift; #if HAVEGR - cs2[i] = cs2[i] / (1.0 + eps[i] + prs[i]/rho[i]); + cs2[i] = rho[i] * cs2[i] / (rho[i] + rho[i] * eps[i] + prs[i]); #endif } diff --git a/src/nuc_eos_cxx/readtable.cc b/src/nuc_eos_cxx/readtable.cc index 9798fd3..9c21ea7 100644 --- a/src/nuc_eos_cxx/readtable.cc +++ b/src/nuc_eos_cxx/readtable.cc @@ -194,11 +194,15 @@ void nuc_eos_C_ReadTable(char* nuceos_table_name) // pressure energy_shift = energy_shift * EPSGF; for(int i=0;i<nrho;i++) { - logrho[i] = log(pow(10.0,logrho[i]) * RHOGF); + // rewrite: + //logrho[i] = log(pow(10.0,logrho[i]) * RHOGF); + // by using log(a^b*c) = b*log(a)+log(c) + logrho[i] = logrho[i] * log(10.) + log(RHOGF); } for(int i=0;i<ntemp;i++) { - logtemp[i] = log(pow(10.0,logtemp[i])); + //logtemp[i] = log(pow(10.0,logtemp[i])); + logtemp[i] = logtemp[i]*log(10.0); } // allocate epstable; a linear-scale eps table @@ -214,12 +218,12 @@ void nuc_eos_C_ReadTable(char* nuceos_table_name) { // pressure int idx = 0 + NTABLES*i; - alltables[idx] = log(pow(10.0,alltables[idx])*PRESSGF); + alltables[idx] = alltables[idx] * log(10.0) + log(PRESSGF); } { // eps int idx = 1 + NTABLES*i; - alltables[idx] = log(pow(10.0,alltables[idx])*EPSGF); + alltables[idx] = alltables[idx] * log(10.0) + log(EPSGF); epstable[i] = exp(alltables[idx]); } |