diff options
Diffstat (limited to 'ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc')
-rw-r--r-- | ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc | 181 |
1 files changed, 110 insertions, 71 deletions
diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc index 5190707..36bd7fa 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBaseFakeDtLapseShift.cc @@ -13,13 +13,14 @@ #include "GenericFD.h" #include "Differencing.h" #include "loopcontrol.h" +#include "vectors.h" /* Define macros used in calculations */ #define INITVALUE (42) #define QAD(x) (SQR(SQR(x))) -#define INV(x) ((1.0) / (x)) -#define SQR(x) ((x) * (x)) -#define CUB(x) ((x) * (x) * (x)) +#define INV(x) (kdiv(ToReal(1.0),x)) +#define SQR(x) (kmul(x,x)) +#define CUB(x) (kmul(x,SQR(x))) static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[]) { @@ -52,65 +53,66 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c ptrdiff_t const cdi = sizeof(CCTK_REAL) * di; ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj; ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk; - CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0)); - CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1)); - CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2)); - CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME); - CCTK_REAL const dxi = INV(dx); - CCTK_REAL const dyi = INV(dy); - CCTK_REAL const dzi = INV(dz); - CCTK_REAL const khalf = 0.5; - CCTK_REAL const kthird = 1/3.0; - CCTK_REAL const ktwothird = 2.0/3.0; - CCTK_REAL const kfourthird = 4.0/3.0; - CCTK_REAL const keightthird = 8.0/3.0; - CCTK_REAL const hdxi = 0.5 * dxi; - CCTK_REAL const hdyi = 0.5 * dyi; - CCTK_REAL const hdzi = 0.5 * dzi; + CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0)); + CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1)); + CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2)); + CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME); + CCTK_REAL_VEC const dxi = INV(dx); + CCTK_REAL_VEC const dyi = INV(dy); + CCTK_REAL_VEC const dzi = INV(dz); + CCTK_REAL_VEC const khalf = ToReal(0.5); + CCTK_REAL_VEC const kthird = ToReal(1.0/3.0); + CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0); + CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0); + CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0); + CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi); + CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi); + CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi); /* Initialize predefined quantities */ - CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); - CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); - CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); - CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx); - CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy); - CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz); - CCTK_REAL const p1o64dx = 0.015625*INV(dx); - CCTK_REAL const p1o64dy = 0.015625*INV(dy); - CCTK_REAL const p1o64dz = 0.015625*INV(dz); - CCTK_REAL const p1odx = INV(dx); - CCTK_REAL const p1ody = INV(dy); - CCTK_REAL const p1odz = INV(dz); - CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); - CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); - CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); + CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); + CCTK_REAL_VEC const p1odx = INV(dx); + CCTK_REAL_VEC const p1ody = INV(dy); + CCTK_REAL_VEC const p1odz = INV(dz); + CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333)); /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_BSSN_convertToADMBaseFakeDtLapseShift, + LC_LOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift, i,j,k, min[0],min[1],min[2], max[0],max[1],max[2], - cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) + cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], + CCTK_REAL_VEC_SIZE) { ptrdiff_t const index = di*i + dj*j + dk*k; /* Assign local copies of grid functions */ - CCTK_REAL AL = A[index]; - CCTK_REAL alphaL = alpha[index]; - CCTK_REAL B1L = B1[index]; - CCTK_REAL B2L = B2[index]; - CCTK_REAL B3L = B3[index]; - CCTK_REAL beta1L = beta1[index]; - CCTK_REAL beta2L = beta2[index]; - CCTK_REAL beta3L = beta3[index]; - CCTK_REAL rL = r[index]; - CCTK_REAL trKL = trK[index]; - CCTK_REAL Xt1L = Xt1[index]; - CCTK_REAL Xt2L = Xt2[index]; - CCTK_REAL Xt3L = Xt3[index]; + CCTK_REAL_VEC AL = vec_load(A[index]); + CCTK_REAL_VEC alphaL = vec_load(alpha[index]); + CCTK_REAL_VEC B1L = vec_load(B1[index]); + CCTK_REAL_VEC B2L = vec_load(B2[index]); + CCTK_REAL_VEC B3L = vec_load(B3[index]); + CCTK_REAL_VEC beta1L = vec_load(beta1[index]); + CCTK_REAL_VEC beta2L = vec_load(beta2[index]); + CCTK_REAL_VEC beta3L = vec_load(beta3[index]); + CCTK_REAL_VEC rL = vec_load(r[index]); + CCTK_REAL_VEC trKL = vec_load(trK[index]); + CCTK_REAL_VEC Xt1L = vec_load(Xt1[index]); + CCTK_REAL_VEC Xt2L = vec_load(Xt2[index]); + CCTK_REAL_VEC Xt3L = vec_load(Xt3[index]); /* Include user supplied include files */ @@ -118,34 +120,71 @@ static void ML_BSSN_convertToADMBaseFakeDtLapseShift_Body(cGH const * restrict c /* Precompute derivatives */ /* Calculate temporaries and grid functions */ - CCTK_REAL eta = fmin(1,INV(rL)*ToReal(SpatialBetaDriverRadius)); + CCTK_REAL_VEC eta = + kfmin(ToReal(1),kmul(INV(rL),ToReal(SpatialBetaDriverRadius))); - CCTK_REAL theta = fmin(1,exp(1 - - rL*INV(ToReal(SpatialShiftGammaCoeffRadius)))); + CCTK_REAL_VEC theta = + kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); - CCTK_REAL dtalpL = - -(pow(alphaL,ToReal(harmonicN))*ToReal(harmonicF)*(trKL + (AL - - trKL)*ToReal(LapseACoeff))); + CCTK_REAL_VEC dtalpL = + kneg(kmul(kpow(alphaL,harmonicN),kmul(ToReal(harmonicF),kmadd(ksub(AL,trKL),ToReal(LapseACoeff),trKL)))); - CCTK_REAL dtbetaxL = theta*(Xt1L + beta1L*eta*ToReal(BetaDriver)*(-1 + - ToReal(ShiftBCoeff)) + (B1L - - Xt1L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff); + CCTK_REAL_VEC dtbetaxL = + kmul(theta,kmul(kadd(Xt1L,kmadd(beta1L,kmul(eta,ToReal(BetaDriver*(-1 + + ShiftBCoeff))),kmul(ksub(B1L,Xt1L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - CCTK_REAL dtbetayL = theta*(Xt2L + beta2L*eta*ToReal(BetaDriver)*(-1 + - ToReal(ShiftBCoeff)) + (B2L - - Xt2L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff); + CCTK_REAL_VEC dtbetayL = + kmul(theta,kmul(kadd(Xt2L,kmadd(beta2L,kmul(eta,ToReal(BetaDriver*(-1 + + ShiftBCoeff))),kmul(ksub(B2L,Xt2L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); - CCTK_REAL dtbetazL = theta*(Xt3L + beta3L*eta*ToReal(BetaDriver)*(-1 + - ToReal(ShiftBCoeff)) + (B3L - - Xt3L)*ToReal(ShiftBCoeff))*ToReal(ShiftGammaCoeff); + CCTK_REAL_VEC dtbetazL = + kmul(theta,kmul(kadd(Xt3L,kmadd(beta3L,kmul(eta,ToReal(BetaDriver*(-1 + + ShiftBCoeff))),kmul(ksub(B3L,Xt3L),ToReal(ShiftBCoeff)))),ToReal(ShiftGammaCoeff))); + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count_lo = lc_imin-i; + ptrdiff_t const elt_count_hi = lc_imax-i; + vec_store_nta_partial_mid(dtalp[index],dtalpL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(dtbetax[index],dtbetaxL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(dtbetay[index],dtbetayL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(dtbetaz[index],dtbetazL,elt_count_lo,elt_count_hi); + break; + } + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) + { + ptrdiff_t const elt_count = lc_imin-i; + vec_store_nta_partial_hi(dtalp[index],dtalpL,elt_count); + vec_store_nta_partial_hi(dtbetax[index],dtbetaxL,elt_count); + vec_store_nta_partial_hi(dtbetay[index],dtbetayL,elt_count); + vec_store_nta_partial_hi(dtbetaz[index],dtbetazL,elt_count); + continue; + } + + /* If necessary, store only partial vectors after the last iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count = lc_imax-i; + vec_store_nta_partial_lo(dtalp[index],dtalpL,elt_count); + vec_store_nta_partial_lo(dtbetax[index],dtbetaxL,elt_count); + vec_store_nta_partial_lo(dtbetay[index],dtbetayL,elt_count); + vec_store_nta_partial_lo(dtbetaz[index],dtbetazL,elt_count); + break; + } /* Copy local copies back to grid functions */ - dtalp[index] = dtalpL; - dtbetax[index] = dtbetaxL; - dtbetay[index] = dtbetayL; - dtbetaz[index] = dtbetazL; + vec_store_nta(dtalp[index],dtalpL); + vec_store_nta(dtbetax[index],dtbetaxL); + vec_store_nta(dtbetay[index],dtbetayL); + vec_store_nta(dtbetaz[index],dtbetazL); } - LC_ENDLOOP3 (ML_BSSN_convertToADMBaseFakeDtLapseShift); + LC_ENDLOOP3VEC (ML_BSSN_convertToADMBaseFakeDtLapseShift); } extern "C" void ML_BSSN_convertToADMBaseFakeDtLapseShift(CCTK_ARGUMENTS) |