diff options
Diffstat (limited to 'ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc')
-rw-r--r-- | ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc | 280 |
1 files changed, 191 insertions, 89 deletions
diff --git a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc index d5809e7..6d1c90d 100644 --- a/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc +++ b/ML_BSSN/src/ML_BSSN_RHSStaticBoundary.cc @@ -13,13 +13,14 @@ #include "GenericFD.h" #include "Differencing.h" #include "loopcontrol.h" +#include "vectors.h" /* Define macros used in calculations */ #define INITVALUE (42) #define QAD(x) (SQR(SQR(x))) -#define INV(x) ((1.0) / (x)) -#define SQR(x) ((x) * (x)) -#define CUB(x) ((x) * (x) * (x)) +#define INV(x) (kdiv(ToReal(1.0),x)) +#define SQR(x) (kmul(x,x)) +#define CUB(x) (kmul(x,SQR(x))) extern "C" void ML_BSSN_RHSStaticBoundary_SelectBCs(CCTK_ARGUMENTS) { @@ -88,47 +89,48 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in ptrdiff_t const cdi = sizeof(CCTK_REAL) * di; ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj; ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk; - CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0)); - CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1)); - CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2)); - CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME); - CCTK_REAL const dxi = INV(dx); - CCTK_REAL const dyi = INV(dy); - CCTK_REAL const dzi = INV(dz); - CCTK_REAL const khalf = 0.5; - CCTK_REAL const kthird = 1/3.0; - CCTK_REAL const ktwothird = 2.0/3.0; - CCTK_REAL const kfourthird = 4.0/3.0; - CCTK_REAL const keightthird = 8.0/3.0; - CCTK_REAL const hdxi = 0.5 * dxi; - CCTK_REAL const hdyi = 0.5 * dyi; - CCTK_REAL const hdzi = 0.5 * dzi; + CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0)); + CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1)); + CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2)); + CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME); + CCTK_REAL_VEC const dxi = INV(dx); + CCTK_REAL_VEC const dyi = INV(dy); + CCTK_REAL_VEC const dzi = INV(dz); + CCTK_REAL_VEC const khalf = ToReal(0.5); + CCTK_REAL_VEC const kthird = ToReal(1.0/3.0); + CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0); + CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0); + CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0); + CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi); + CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi); + CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi); /* Initialize predefined quantities */ - CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); - CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); - CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); - CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx); - CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy); - CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz); - CCTK_REAL const p1o64dx = 0.015625*INV(dx); - CCTK_REAL const p1o64dy = 0.015625*INV(dy); - CCTK_REAL const p1o64dz = 0.015625*INV(dz); - CCTK_REAL const p1odx = INV(dx); - CCTK_REAL const p1ody = INV(dy); - CCTK_REAL const p1odz = INV(dz); - CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); - CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); - CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); + CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); + CCTK_REAL_VEC const p1odx = INV(dx); + CCTK_REAL_VEC const p1ody = INV(dy); + CCTK_REAL_VEC const p1odz = INV(dz); + CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333)); /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_BSSN_RHSStaticBoundary, + LC_LOOP3VEC (ML_BSSN_RHSStaticBoundary, i,j,k, min[0],min[1],min[2], max[0],max[1],max[2], - cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) + cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], + CCTK_REAL_VEC_SIZE) { ptrdiff_t const index = di*i + dj*j + dk*k; @@ -141,84 +143,184 @@ static void ML_BSSN_RHSStaticBoundary_Body(cGH const * restrict const cctkGH, in /* Precompute derivatives */ /* Calculate temporaries and grid functions */ - CCTK_REAL phirhsL = 0; + CCTK_REAL_VEC phirhsL = ToReal(0); - CCTK_REAL gt11rhsL = 0; + CCTK_REAL_VEC gt11rhsL = ToReal(0); - CCTK_REAL gt12rhsL = 0; + CCTK_REAL_VEC gt12rhsL = ToReal(0); - CCTK_REAL gt13rhsL = 0; + CCTK_REAL_VEC gt13rhsL = ToReal(0); - CCTK_REAL gt22rhsL = 0; + CCTK_REAL_VEC gt22rhsL = ToReal(0); - CCTK_REAL gt23rhsL = 0; + CCTK_REAL_VEC gt23rhsL = ToReal(0); - CCTK_REAL gt33rhsL = 0; + CCTK_REAL_VEC gt33rhsL = ToReal(0); - CCTK_REAL trKrhsL = 0; + CCTK_REAL_VEC trKrhsL = ToReal(0); - CCTK_REAL At11rhsL = 0; + CCTK_REAL_VEC At11rhsL = ToReal(0); - CCTK_REAL At12rhsL = 0; + CCTK_REAL_VEC At12rhsL = ToReal(0); - CCTK_REAL At13rhsL = 0; + CCTK_REAL_VEC At13rhsL = ToReal(0); - CCTK_REAL At22rhsL = 0; + CCTK_REAL_VEC At22rhsL = ToReal(0); - CCTK_REAL At23rhsL = 0; + CCTK_REAL_VEC At23rhsL = ToReal(0); - CCTK_REAL At33rhsL = 0; + CCTK_REAL_VEC At33rhsL = ToReal(0); - CCTK_REAL Xt1rhsL = 0; + CCTK_REAL_VEC Xt1rhsL = ToReal(0); - CCTK_REAL Xt2rhsL = 0; + CCTK_REAL_VEC Xt2rhsL = ToReal(0); - CCTK_REAL Xt3rhsL = 0; + CCTK_REAL_VEC Xt3rhsL = ToReal(0); - CCTK_REAL alpharhsL = 0; + CCTK_REAL_VEC alpharhsL = ToReal(0); - CCTK_REAL ArhsL = 0; + CCTK_REAL_VEC ArhsL = ToReal(0); - CCTK_REAL beta1rhsL = 0; + CCTK_REAL_VEC beta1rhsL = ToReal(0); - CCTK_REAL beta2rhsL = 0; + CCTK_REAL_VEC beta2rhsL = ToReal(0); - CCTK_REAL beta3rhsL = 0; + CCTK_REAL_VEC beta3rhsL = ToReal(0); - CCTK_REAL B1rhsL = 0; + CCTK_REAL_VEC B1rhsL = ToReal(0); - CCTK_REAL B2rhsL = 0; + CCTK_REAL_VEC B2rhsL = ToReal(0); - CCTK_REAL B3rhsL = 0; + CCTK_REAL_VEC B3rhsL = ToReal(0); + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count_lo = lc_imin-i; + ptrdiff_t const elt_count_hi = lc_imax-i; + vec_store_nta_partial_mid(alpharhs[index],alpharhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Arhs[index],ArhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At11rhs[index],At11rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At12rhs[index],At12rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At13rhs[index],At13rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At22rhs[index],At22rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At23rhs[index],At23rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At33rhs[index],At33rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(B1rhs[index],B1rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(B2rhs[index],B2rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(B3rhs[index],B3rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(beta1rhs[index],beta1rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(beta2rhs[index],beta2rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(beta3rhs[index],beta3rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt11rhs[index],gt11rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt12rhs[index],gt12rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt13rhs[index],gt13rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt22rhs[index],gt22rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt23rhs[index],gt23rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt33rhs[index],gt33rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(phirhs[index],phirhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(trKrhs[index],trKrhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt1rhs[index],Xt1rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt2rhs[index],Xt2rhsL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt3rhs[index],Xt3rhsL,elt_count_lo,elt_count_hi); + break; + } + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) + { + ptrdiff_t const elt_count = lc_imin-i; + vec_store_nta_partial_hi(alpharhs[index],alpharhsL,elt_count); + vec_store_nta_partial_hi(Arhs[index],ArhsL,elt_count); + vec_store_nta_partial_hi(At11rhs[index],At11rhsL,elt_count); + vec_store_nta_partial_hi(At12rhs[index],At12rhsL,elt_count); + vec_store_nta_partial_hi(At13rhs[index],At13rhsL,elt_count); + vec_store_nta_partial_hi(At22rhs[index],At22rhsL,elt_count); + vec_store_nta_partial_hi(At23rhs[index],At23rhsL,elt_count); + vec_store_nta_partial_hi(At33rhs[index],At33rhsL,elt_count); + vec_store_nta_partial_hi(B1rhs[index],B1rhsL,elt_count); + vec_store_nta_partial_hi(B2rhs[index],B2rhsL,elt_count); + vec_store_nta_partial_hi(B3rhs[index],B3rhsL,elt_count); + vec_store_nta_partial_hi(beta1rhs[index],beta1rhsL,elt_count); + vec_store_nta_partial_hi(beta2rhs[index],beta2rhsL,elt_count); + vec_store_nta_partial_hi(beta3rhs[index],beta3rhsL,elt_count); + vec_store_nta_partial_hi(gt11rhs[index],gt11rhsL,elt_count); + vec_store_nta_partial_hi(gt12rhs[index],gt12rhsL,elt_count); + vec_store_nta_partial_hi(gt13rhs[index],gt13rhsL,elt_count); + vec_store_nta_partial_hi(gt22rhs[index],gt22rhsL,elt_count); + vec_store_nta_partial_hi(gt23rhs[index],gt23rhsL,elt_count); + vec_store_nta_partial_hi(gt33rhs[index],gt33rhsL,elt_count); + vec_store_nta_partial_hi(phirhs[index],phirhsL,elt_count); + vec_store_nta_partial_hi(trKrhs[index],trKrhsL,elt_count); + vec_store_nta_partial_hi(Xt1rhs[index],Xt1rhsL,elt_count); + vec_store_nta_partial_hi(Xt2rhs[index],Xt2rhsL,elt_count); + vec_store_nta_partial_hi(Xt3rhs[index],Xt3rhsL,elt_count); + continue; + } + + /* If necessary, store only partial vectors after the last iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count = lc_imax-i; + vec_store_nta_partial_lo(alpharhs[index],alpharhsL,elt_count); + vec_store_nta_partial_lo(Arhs[index],ArhsL,elt_count); + vec_store_nta_partial_lo(At11rhs[index],At11rhsL,elt_count); + vec_store_nta_partial_lo(At12rhs[index],At12rhsL,elt_count); + vec_store_nta_partial_lo(At13rhs[index],At13rhsL,elt_count); + vec_store_nta_partial_lo(At22rhs[index],At22rhsL,elt_count); + vec_store_nta_partial_lo(At23rhs[index],At23rhsL,elt_count); + vec_store_nta_partial_lo(At33rhs[index],At33rhsL,elt_count); + vec_store_nta_partial_lo(B1rhs[index],B1rhsL,elt_count); + vec_store_nta_partial_lo(B2rhs[index],B2rhsL,elt_count); + vec_store_nta_partial_lo(B3rhs[index],B3rhsL,elt_count); + vec_store_nta_partial_lo(beta1rhs[index],beta1rhsL,elt_count); + vec_store_nta_partial_lo(beta2rhs[index],beta2rhsL,elt_count); + vec_store_nta_partial_lo(beta3rhs[index],beta3rhsL,elt_count); + vec_store_nta_partial_lo(gt11rhs[index],gt11rhsL,elt_count); + vec_store_nta_partial_lo(gt12rhs[index],gt12rhsL,elt_count); + vec_store_nta_partial_lo(gt13rhs[index],gt13rhsL,elt_count); + vec_store_nta_partial_lo(gt22rhs[index],gt22rhsL,elt_count); + vec_store_nta_partial_lo(gt23rhs[index],gt23rhsL,elt_count); + vec_store_nta_partial_lo(gt33rhs[index],gt33rhsL,elt_count); + vec_store_nta_partial_lo(phirhs[index],phirhsL,elt_count); + vec_store_nta_partial_lo(trKrhs[index],trKrhsL,elt_count); + vec_store_nta_partial_lo(Xt1rhs[index],Xt1rhsL,elt_count); + vec_store_nta_partial_lo(Xt2rhs[index],Xt2rhsL,elt_count); + vec_store_nta_partial_lo(Xt3rhs[index],Xt3rhsL,elt_count); + break; + } /* Copy local copies back to grid functions */ - alpharhs[index] = alpharhsL; - Arhs[index] = ArhsL; - At11rhs[index] = At11rhsL; - At12rhs[index] = At12rhsL; - At13rhs[index] = At13rhsL; - At22rhs[index] = At22rhsL; - At23rhs[index] = At23rhsL; - At33rhs[index] = At33rhsL; - B1rhs[index] = B1rhsL; - B2rhs[index] = B2rhsL; - B3rhs[index] = B3rhsL; - beta1rhs[index] = beta1rhsL; - beta2rhs[index] = beta2rhsL; - beta3rhs[index] = beta3rhsL; - gt11rhs[index] = gt11rhsL; - gt12rhs[index] = gt12rhsL; - gt13rhs[index] = gt13rhsL; - gt22rhs[index] = gt22rhsL; - gt23rhs[index] = gt23rhsL; - gt33rhs[index] = gt33rhsL; - phirhs[index] = phirhsL; - trKrhs[index] = trKrhsL; - Xt1rhs[index] = Xt1rhsL; - Xt2rhs[index] = Xt2rhsL; - Xt3rhs[index] = Xt3rhsL; + vec_store_nta(alpharhs[index],alpharhsL); + vec_store_nta(Arhs[index],ArhsL); + vec_store_nta(At11rhs[index],At11rhsL); + vec_store_nta(At12rhs[index],At12rhsL); + vec_store_nta(At13rhs[index],At13rhsL); + vec_store_nta(At22rhs[index],At22rhsL); + vec_store_nta(At23rhs[index],At23rhsL); + vec_store_nta(At33rhs[index],At33rhsL); + vec_store_nta(B1rhs[index],B1rhsL); + vec_store_nta(B2rhs[index],B2rhsL); + vec_store_nta(B3rhs[index],B3rhsL); + vec_store_nta(beta1rhs[index],beta1rhsL); + vec_store_nta(beta2rhs[index],beta2rhsL); + vec_store_nta(beta3rhs[index],beta3rhsL); + vec_store_nta(gt11rhs[index],gt11rhsL); + vec_store_nta(gt12rhs[index],gt12rhsL); + vec_store_nta(gt13rhs[index],gt13rhsL); + vec_store_nta(gt22rhs[index],gt22rhsL); + vec_store_nta(gt23rhs[index],gt23rhsL); + vec_store_nta(gt33rhs[index],gt33rhsL); + vec_store_nta(phirhs[index],phirhsL); + vec_store_nta(trKrhs[index],trKrhsL); + vec_store_nta(Xt1rhs[index],Xt1rhsL); + vec_store_nta(Xt2rhs[index],Xt2rhsL); + vec_store_nta(Xt3rhs[index],Xt3rhsL); } - LC_ENDLOOP3 (ML_BSSN_RHSStaticBoundary); + LC_ENDLOOP3VEC (ML_BSSN_RHSStaticBoundary); } extern "C" void ML_BSSN_RHSStaticBoundary(CCTK_ARGUMENTS) |