diff options
Diffstat (limited to 'ML_BSSN/src/ML_BSSN_convertToADMBase.cc')
-rw-r--r-- | ML_BSSN/src/ML_BSSN_convertToADMBase.cc | 280 |
1 files changed, 178 insertions, 102 deletions
diff --git a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc index 589ccc2..5219ef7 100644 --- a/ML_BSSN/src/ML_BSSN_convertToADMBase.cc +++ b/ML_BSSN/src/ML_BSSN_convertToADMBase.cc @@ -13,13 +13,14 @@ #include "GenericFD.h" #include "Differencing.h" #include "loopcontrol.h" +#include "vectors.h" /* Define macros used in calculations */ #define INITVALUE (42) #define QAD(x) (SQR(SQR(x))) -#define INV(x) ((1.0) / (x)) -#define SQR(x) ((x) * (x)) -#define CUB(x) ((x) * (x) * (x)) +#define INV(x) (kdiv(ToReal(1.0),x)) +#define SQR(x) (kmul(x,x)) +#define CUB(x) (kmul(x,SQR(x))) static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[]) { @@ -52,76 +53,77 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int ptrdiff_t const cdi = sizeof(CCTK_REAL) * di; ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj; ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk; - CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0)); - CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1)); - CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2)); - CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME); - CCTK_REAL const dxi = INV(dx); - CCTK_REAL const dyi = INV(dy); - CCTK_REAL const dzi = INV(dz); - CCTK_REAL const khalf = 0.5; - CCTK_REAL const kthird = 1/3.0; - CCTK_REAL const ktwothird = 2.0/3.0; - CCTK_REAL const kfourthird = 4.0/3.0; - CCTK_REAL const keightthird = 8.0/3.0; - CCTK_REAL const hdxi = 0.5 * dxi; - CCTK_REAL const hdyi = 0.5 * dyi; - CCTK_REAL const hdzi = 0.5 * dzi; + CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0)); + CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1)); + CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2)); + CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME); + CCTK_REAL_VEC const dxi = INV(dx); + CCTK_REAL_VEC const dyi = INV(dy); + CCTK_REAL_VEC const dzi = INV(dz); + CCTK_REAL_VEC const khalf = ToReal(0.5); + CCTK_REAL_VEC const kthird = ToReal(1.0/3.0); + CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0); + CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0); + CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0); + CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi); + CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi); + CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi); /* Initialize predefined quantities */ - CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); - CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); - CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); - CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx); - CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy); - CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz); - CCTK_REAL const p1o64dx = 0.015625*INV(dx); - CCTK_REAL const p1o64dy = 0.015625*INV(dy); - CCTK_REAL const p1o64dz = 0.015625*INV(dz); - CCTK_REAL const p1odx = INV(dx); - CCTK_REAL const p1ody = INV(dy); - CCTK_REAL const p1odz = INV(dz); - CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); - CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); - CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); + CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); + CCTK_REAL_VEC const p1odx = INV(dx); + CCTK_REAL_VEC const p1ody = INV(dy); + CCTK_REAL_VEC const p1odz = INV(dz); + CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333)); /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_BSSN_convertToADMBase, + LC_LOOP3VEC (ML_BSSN_convertToADMBase, i,j,k, min[0],min[1],min[2], max[0],max[1],max[2], - cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) + cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], + CCTK_REAL_VEC_SIZE) { ptrdiff_t const index = di*i + dj*j + dk*k; /* Assign local copies of grid functions */ - CCTK_REAL alphaL = alpha[index]; - CCTK_REAL At11L = At11[index]; - CCTK_REAL At12L = At12[index]; - CCTK_REAL At13L = At13[index]; - CCTK_REAL At22L = At22[index]; - CCTK_REAL At23L = At23[index]; - CCTK_REAL At33L = At33[index]; - CCTK_REAL beta1L = beta1[index]; - CCTK_REAL beta2L = beta2[index]; - CCTK_REAL beta3L = beta3[index]; - CCTK_REAL gt11L = gt11[index]; - CCTK_REAL gt12L = gt12[index]; - CCTK_REAL gt13L = gt13[index]; - CCTK_REAL gt22L = gt22[index]; - CCTK_REAL gt23L = gt23[index]; - CCTK_REAL gt33L = gt33[index]; - CCTK_REAL gxxL = gxx[index]; - CCTK_REAL gxyL = gxy[index]; - CCTK_REAL gxzL = gxz[index]; - CCTK_REAL gyyL = gyy[index]; - CCTK_REAL gyzL = gyz[index]; - CCTK_REAL gzzL = gzz[index]; - CCTK_REAL phiL = phi[index]; - CCTK_REAL trKL = trK[index]; + CCTK_REAL_VEC alphaL = vec_load(alpha[index]); + CCTK_REAL_VEC At11L = vec_load(At11[index]); + CCTK_REAL_VEC At12L = vec_load(At12[index]); + CCTK_REAL_VEC At13L = vec_load(At13[index]); + CCTK_REAL_VEC At22L = vec_load(At22[index]); + CCTK_REAL_VEC At23L = vec_load(At23[index]); + CCTK_REAL_VEC At33L = vec_load(At33[index]); + CCTK_REAL_VEC beta1L = vec_load(beta1[index]); + CCTK_REAL_VEC beta2L = vec_load(beta2[index]); + CCTK_REAL_VEC beta3L = vec_load(beta3[index]); + CCTK_REAL_VEC gt11L = vec_load(gt11[index]); + CCTK_REAL_VEC gt12L = vec_load(gt12[index]); + CCTK_REAL_VEC gt13L = vec_load(gt13[index]); + CCTK_REAL_VEC gt22L = vec_load(gt22[index]); + CCTK_REAL_VEC gt23L = vec_load(gt23[index]); + CCTK_REAL_VEC gt33L = vec_load(gt33[index]); + CCTK_REAL_VEC gxxL = vec_load(gxx[index]); + CCTK_REAL_VEC gxyL = vec_load(gxy[index]); + CCTK_REAL_VEC gxzL = vec_load(gxz[index]); + CCTK_REAL_VEC gyyL = vec_load(gyy[index]); + CCTK_REAL_VEC gyzL = vec_load(gyz[index]); + CCTK_REAL_VEC gzzL = vec_load(gzz[index]); + CCTK_REAL_VEC phiL = vec_load(phi[index]); + CCTK_REAL_VEC trKL = vec_load(trK[index]); /* Include user supplied include files */ @@ -129,65 +131,139 @@ static void ML_BSSN_convertToADMBase_Body(cGH const * restrict const cctkGH, int /* Precompute derivatives */ /* Calculate temporaries and grid functions */ - CCTK_REAL e4phi = IfThen(conformalMethod,INV(SQR(phiL)),exp(4*phiL)); + CCTK_REAL_VEC e4phi = + IfThen(conformalMethod,INV(SQR(phiL)),kexp(kmul(phiL,ToReal(4)))); - gxxL = e4phi*gt11L; + gxxL = kmul(e4phi,gt11L); - gxyL = e4phi*gt12L; + gxyL = kmul(e4phi,gt12L); - gxzL = e4phi*gt13L; + gxzL = kmul(e4phi,gt13L); - gyyL = e4phi*gt22L; + gyyL = kmul(e4phi,gt22L); - gyzL = e4phi*gt23L; + gyzL = kmul(e4phi,gt23L); - gzzL = e4phi*gt33L; + gzzL = kmul(e4phi,gt33L); - CCTK_REAL kxxL = At11L*e4phi + - 0.333333333333333333333333333333*gxxL*trKL; + CCTK_REAL_VEC kxxL = + kmadd(At11L,e4phi,kmul(gxxL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); - CCTK_REAL kxyL = At12L*e4phi + - 0.333333333333333333333333333333*gxyL*trKL; + CCTK_REAL_VEC kxyL = + kmadd(At12L,e4phi,kmul(gxyL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); - CCTK_REAL kxzL = At13L*e4phi + - 0.333333333333333333333333333333*gxzL*trKL; + CCTK_REAL_VEC kxzL = + kmadd(At13L,e4phi,kmul(gxzL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); - CCTK_REAL kyyL = At22L*e4phi + - 0.333333333333333333333333333333*gyyL*trKL; + CCTK_REAL_VEC kyyL = + kmadd(At22L,e4phi,kmul(gyyL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); - CCTK_REAL kyzL = At23L*e4phi + - 0.333333333333333333333333333333*gyzL*trKL; + CCTK_REAL_VEC kyzL = + kmadd(At23L,e4phi,kmul(gyzL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); - CCTK_REAL kzzL = At33L*e4phi + - 0.333333333333333333333333333333*gzzL*trKL; + CCTK_REAL_VEC kzzL = + kmadd(At33L,e4phi,kmul(gzzL,kmul(trKL,ToReal(0.333333333333333333333333333333)))); - CCTK_REAL alpL = alphaL; + CCTK_REAL_VEC alpL = alphaL; - CCTK_REAL betaxL = beta1L; + CCTK_REAL_VEC betaxL = beta1L; - CCTK_REAL betayL = beta2L; + CCTK_REAL_VEC betayL = beta2L; - CCTK_REAL betazL = beta3L; + CCTK_REAL_VEC betazL = beta3L; + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count_lo = lc_imin-i; + ptrdiff_t const elt_count_hi = lc_imax-i; + vec_store_nta_partial_mid(alp[index],alpL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(betax[index],betaxL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(betay[index],betayL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(betaz[index],betazL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gxx[index],gxxL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gxy[index],gxyL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gxz[index],gxzL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gyy[index],gyyL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gyz[index],gyzL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gzz[index],gzzL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(kxx[index],kxxL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(kxy[index],kxyL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(kxz[index],kxzL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(kyy[index],kyyL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(kyz[index],kyzL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(kzz[index],kzzL,elt_count_lo,elt_count_hi); + break; + } + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) + { + ptrdiff_t const elt_count = lc_imin-i; + vec_store_nta_partial_hi(alp[index],alpL,elt_count); + vec_store_nta_partial_hi(betax[index],betaxL,elt_count); + vec_store_nta_partial_hi(betay[index],betayL,elt_count); + vec_store_nta_partial_hi(betaz[index],betazL,elt_count); + vec_store_nta_partial_hi(gxx[index],gxxL,elt_count); + vec_store_nta_partial_hi(gxy[index],gxyL,elt_count); + vec_store_nta_partial_hi(gxz[index],gxzL,elt_count); + vec_store_nta_partial_hi(gyy[index],gyyL,elt_count); + vec_store_nta_partial_hi(gyz[index],gyzL,elt_count); + vec_store_nta_partial_hi(gzz[index],gzzL,elt_count); + vec_store_nta_partial_hi(kxx[index],kxxL,elt_count); + vec_store_nta_partial_hi(kxy[index],kxyL,elt_count); + vec_store_nta_partial_hi(kxz[index],kxzL,elt_count); + vec_store_nta_partial_hi(kyy[index],kyyL,elt_count); + vec_store_nta_partial_hi(kyz[index],kyzL,elt_count); + vec_store_nta_partial_hi(kzz[index],kzzL,elt_count); + continue; + } + + /* If necessary, store only partial vectors after the last iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count = lc_imax-i; + vec_store_nta_partial_lo(alp[index],alpL,elt_count); + vec_store_nta_partial_lo(betax[index],betaxL,elt_count); + vec_store_nta_partial_lo(betay[index],betayL,elt_count); + vec_store_nta_partial_lo(betaz[index],betazL,elt_count); + vec_store_nta_partial_lo(gxx[index],gxxL,elt_count); + vec_store_nta_partial_lo(gxy[index],gxyL,elt_count); + vec_store_nta_partial_lo(gxz[index],gxzL,elt_count); + vec_store_nta_partial_lo(gyy[index],gyyL,elt_count); + vec_store_nta_partial_lo(gyz[index],gyzL,elt_count); + vec_store_nta_partial_lo(gzz[index],gzzL,elt_count); + vec_store_nta_partial_lo(kxx[index],kxxL,elt_count); + vec_store_nta_partial_lo(kxy[index],kxyL,elt_count); + vec_store_nta_partial_lo(kxz[index],kxzL,elt_count); + vec_store_nta_partial_lo(kyy[index],kyyL,elt_count); + vec_store_nta_partial_lo(kyz[index],kyzL,elt_count); + vec_store_nta_partial_lo(kzz[index],kzzL,elt_count); + break; + } /* Copy local copies back to grid functions */ - alp[index] = alpL; - betax[index] = betaxL; - betay[index] = betayL; - betaz[index] = betazL; - gxx[index] = gxxL; - gxy[index] = gxyL; - gxz[index] = gxzL; - gyy[index] = gyyL; - gyz[index] = gyzL; - gzz[index] = gzzL; - kxx[index] = kxxL; - kxy[index] = kxyL; - kxz[index] = kxzL; - kyy[index] = kyyL; - kyz[index] = kyzL; - kzz[index] = kzzL; + vec_store_nta(alp[index],alpL); + vec_store_nta(betax[index],betaxL); + vec_store_nta(betay[index],betayL); + vec_store_nta(betaz[index],betazL); + vec_store_nta(gxx[index],gxxL); + vec_store_nta(gxy[index],gxyL); + vec_store_nta(gxz[index],gxzL); + vec_store_nta(gyy[index],gyyL); + vec_store_nta(gyz[index],gyzL); + vec_store_nta(gzz[index],gzzL); + vec_store_nta(kxx[index],kxxL); + vec_store_nta(kxy[index],kxyL); + vec_store_nta(kxz[index],kxzL); + vec_store_nta(kyy[index],kyyL); + vec_store_nta(kyz[index],kyzL); + vec_store_nta(kzz[index],kzzL); } - LC_ENDLOOP3 (ML_BSSN_convertToADMBase); + LC_ENDLOOP3VEC (ML_BSSN_convertToADMBase); } extern "C" void ML_BSSN_convertToADMBase(CCTK_ARGUMENTS) |