diff options
Diffstat (limited to 'ML_BSSN/src/ML_BSSN_boundary.cc')
-rw-r--r-- | ML_BSSN/src/ML_BSSN_boundary.cc | 280 |
1 files changed, 191 insertions, 89 deletions
diff --git a/ML_BSSN/src/ML_BSSN_boundary.cc b/ML_BSSN/src/ML_BSSN_boundary.cc index ac1e90f..b00443d 100644 --- a/ML_BSSN/src/ML_BSSN_boundary.cc +++ b/ML_BSSN/src/ML_BSSN_boundary.cc @@ -13,13 +13,14 @@ #include "GenericFD.h" #include "Differencing.h" #include "loopcontrol.h" +#include "vectors.h" /* Define macros used in calculations */ #define INITVALUE (42) #define QAD(x) (SQR(SQR(x))) -#define INV(x) ((1.0) / (x)) -#define SQR(x) ((x) * (x)) -#define CUB(x) ((x) * (x) * (x)) +#define INV(x) (kdiv(ToReal(1.0),x)) +#define SQR(x) (kmul(x,x)) +#define CUB(x) (kmul(x,SQR(x))) extern "C" void ML_BSSN_boundary_SelectBCs(CCTK_ARGUMENTS) { @@ -88,47 +89,48 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d ptrdiff_t const cdi = sizeof(CCTK_REAL) * di; ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj; ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk; - CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0)); - CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1)); - CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2)); - CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME); - CCTK_REAL const dxi = INV(dx); - CCTK_REAL const dyi = INV(dy); - CCTK_REAL const dzi = INV(dz); - CCTK_REAL const khalf = 0.5; - CCTK_REAL const kthird = 1/3.0; - CCTK_REAL const ktwothird = 2.0/3.0; - CCTK_REAL const kfourthird = 4.0/3.0; - CCTK_REAL const keightthird = 8.0/3.0; - CCTK_REAL const hdxi = 0.5 * dxi; - CCTK_REAL const hdyi = 0.5 * dyi; - CCTK_REAL const hdzi = 0.5 * dzi; + CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0)); + CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1)); + CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2)); + CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME); + CCTK_REAL_VEC const dxi = INV(dx); + CCTK_REAL_VEC const dyi = INV(dy); + CCTK_REAL_VEC const dzi = INV(dz); + CCTK_REAL_VEC const khalf = ToReal(0.5); + CCTK_REAL_VEC const kthird = ToReal(1.0/3.0); + CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0); + CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0); + CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0); + CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi); + CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi); + CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi); /* Initialize predefined quantities */ - CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); - CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); - CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); - CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx); - CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy); - CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz); - CCTK_REAL const p1o64dx = 0.015625*INV(dx); - CCTK_REAL const p1o64dy = 0.015625*INV(dy); - CCTK_REAL const p1o64dz = 0.015625*INV(dz); - CCTK_REAL const p1odx = INV(dx); - CCTK_REAL const p1ody = INV(dy); - CCTK_REAL const p1odz = INV(dz); - CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); - CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); - CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); + CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); + CCTK_REAL_VEC const p1odx = INV(dx); + CCTK_REAL_VEC const p1ody = INV(dy); + CCTK_REAL_VEC const p1odz = INV(dz); + CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333)); /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_BSSN_boundary, + LC_LOOP3VEC (ML_BSSN_boundary, i,j,k, min[0],min[1],min[2], max[0],max[1],max[2], - cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) + cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], + CCTK_REAL_VEC_SIZE) { ptrdiff_t const index = di*i + dj*j + dk*k; @@ -141,84 +143,184 @@ static void ML_BSSN_boundary_Body(cGH const * restrict const cctkGH, int const d /* Precompute derivatives */ /* Calculate temporaries and grid functions */ - CCTK_REAL phiL = IfThen(conformalMethod,1,0); + CCTK_REAL_VEC phiL = IfThen(conformalMethod,ToReal(1),ToReal(0)); - CCTK_REAL gt11L = 1; + CCTK_REAL_VEC gt11L = ToReal(1); - CCTK_REAL gt12L = 0; + CCTK_REAL_VEC gt12L = ToReal(0); - CCTK_REAL gt13L = 0; + CCTK_REAL_VEC gt13L = ToReal(0); - CCTK_REAL gt22L = 1; + CCTK_REAL_VEC gt22L = ToReal(1); - CCTK_REAL gt23L = 0; + CCTK_REAL_VEC gt23L = ToReal(0); - CCTK_REAL gt33L = 1; + CCTK_REAL_VEC gt33L = ToReal(1); - CCTK_REAL trKL = 0; + CCTK_REAL_VEC trKL = ToReal(0); - CCTK_REAL At11L = 0; + CCTK_REAL_VEC At11L = ToReal(0); - CCTK_REAL At12L = 0; + CCTK_REAL_VEC At12L = ToReal(0); - CCTK_REAL At13L = 0; + CCTK_REAL_VEC At13L = ToReal(0); - CCTK_REAL At22L = 0; + CCTK_REAL_VEC At22L = ToReal(0); - CCTK_REAL At23L = 0; + CCTK_REAL_VEC At23L = ToReal(0); - CCTK_REAL At33L = 0; + CCTK_REAL_VEC At33L = ToReal(0); - CCTK_REAL Xt1L = 0; + CCTK_REAL_VEC Xt1L = ToReal(0); - CCTK_REAL Xt2L = 0; + CCTK_REAL_VEC Xt2L = ToReal(0); - CCTK_REAL Xt3L = 0; + CCTK_REAL_VEC Xt3L = ToReal(0); - CCTK_REAL alphaL = 1; + CCTK_REAL_VEC alphaL = ToReal(1); - CCTK_REAL AL = 0; + CCTK_REAL_VEC AL = ToReal(0); - CCTK_REAL beta1L = 0; + CCTK_REAL_VEC beta1L = ToReal(0); - CCTK_REAL beta2L = 0; + CCTK_REAL_VEC beta2L = ToReal(0); - CCTK_REAL beta3L = 0; + CCTK_REAL_VEC beta3L = ToReal(0); - CCTK_REAL B1L = 0; + CCTK_REAL_VEC B1L = ToReal(0); - CCTK_REAL B2L = 0; + CCTK_REAL_VEC B2L = ToReal(0); - CCTK_REAL B3L = 0; + CCTK_REAL_VEC B3L = ToReal(0); + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count_lo = lc_imin-i; + ptrdiff_t const elt_count_hi = lc_imax-i; + vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(alpha[index],alphaL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At11[index],At11L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At12[index],At12L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At13[index],At13L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At22[index],At22L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At23[index],At23L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(At33[index],At33L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(beta1[index],beta1L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(beta2[index],beta2L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(beta3[index],beta3L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt11[index],gt11L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt12[index],gt12L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt13[index],gt13L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt22[index],gt22L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt23[index],gt23L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(gt33[index],gt33L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(phi[index],phiL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(trK[index],trKL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); + break; + } + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) + { + ptrdiff_t const elt_count = lc_imin-i; + vec_store_nta_partial_hi(A[index],AL,elt_count); + vec_store_nta_partial_hi(alpha[index],alphaL,elt_count); + vec_store_nta_partial_hi(At11[index],At11L,elt_count); + vec_store_nta_partial_hi(At12[index],At12L,elt_count); + vec_store_nta_partial_hi(At13[index],At13L,elt_count); + vec_store_nta_partial_hi(At22[index],At22L,elt_count); + vec_store_nta_partial_hi(At23[index],At23L,elt_count); + vec_store_nta_partial_hi(At33[index],At33L,elt_count); + vec_store_nta_partial_hi(B1[index],B1L,elt_count); + vec_store_nta_partial_hi(B2[index],B2L,elt_count); + vec_store_nta_partial_hi(B3[index],B3L,elt_count); + vec_store_nta_partial_hi(beta1[index],beta1L,elt_count); + vec_store_nta_partial_hi(beta2[index],beta2L,elt_count); + vec_store_nta_partial_hi(beta3[index],beta3L,elt_count); + vec_store_nta_partial_hi(gt11[index],gt11L,elt_count); + vec_store_nta_partial_hi(gt12[index],gt12L,elt_count); + vec_store_nta_partial_hi(gt13[index],gt13L,elt_count); + vec_store_nta_partial_hi(gt22[index],gt22L,elt_count); + vec_store_nta_partial_hi(gt23[index],gt23L,elt_count); + vec_store_nta_partial_hi(gt33[index],gt33L,elt_count); + vec_store_nta_partial_hi(phi[index],phiL,elt_count); + vec_store_nta_partial_hi(trK[index],trKL,elt_count); + vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); + vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); + vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); + continue; + } + + /* If necessary, store only partial vectors after the last iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count = lc_imax-i; + vec_store_nta_partial_lo(A[index],AL,elt_count); + vec_store_nta_partial_lo(alpha[index],alphaL,elt_count); + vec_store_nta_partial_lo(At11[index],At11L,elt_count); + vec_store_nta_partial_lo(At12[index],At12L,elt_count); + vec_store_nta_partial_lo(At13[index],At13L,elt_count); + vec_store_nta_partial_lo(At22[index],At22L,elt_count); + vec_store_nta_partial_lo(At23[index],At23L,elt_count); + vec_store_nta_partial_lo(At33[index],At33L,elt_count); + vec_store_nta_partial_lo(B1[index],B1L,elt_count); + vec_store_nta_partial_lo(B2[index],B2L,elt_count); + vec_store_nta_partial_lo(B3[index],B3L,elt_count); + vec_store_nta_partial_lo(beta1[index],beta1L,elt_count); + vec_store_nta_partial_lo(beta2[index],beta2L,elt_count); + vec_store_nta_partial_lo(beta3[index],beta3L,elt_count); + vec_store_nta_partial_lo(gt11[index],gt11L,elt_count); + vec_store_nta_partial_lo(gt12[index],gt12L,elt_count); + vec_store_nta_partial_lo(gt13[index],gt13L,elt_count); + vec_store_nta_partial_lo(gt22[index],gt22L,elt_count); + vec_store_nta_partial_lo(gt23[index],gt23L,elt_count); + vec_store_nta_partial_lo(gt33[index],gt33L,elt_count); + vec_store_nta_partial_lo(phi[index],phiL,elt_count); + vec_store_nta_partial_lo(trK[index],trKL,elt_count); + vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); + vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); + vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); + break; + } /* Copy local copies back to grid functions */ - A[index] = AL; - alpha[index] = alphaL; - At11[index] = At11L; - At12[index] = At12L; - At13[index] = At13L; - At22[index] = At22L; - At23[index] = At23L; - At33[index] = At33L; - B1[index] = B1L; - B2[index] = B2L; - B3[index] = B3L; - beta1[index] = beta1L; - beta2[index] = beta2L; - beta3[index] = beta3L; - gt11[index] = gt11L; - gt12[index] = gt12L; - gt13[index] = gt13L; - gt22[index] = gt22L; - gt23[index] = gt23L; - gt33[index] = gt33L; - phi[index] = phiL; - trK[index] = trKL; - Xt1[index] = Xt1L; - Xt2[index] = Xt2L; - Xt3[index] = Xt3L; + vec_store_nta(A[index],AL); + vec_store_nta(alpha[index],alphaL); + vec_store_nta(At11[index],At11L); + vec_store_nta(At12[index],At12L); + vec_store_nta(At13[index],At13L); + vec_store_nta(At22[index],At22L); + vec_store_nta(At23[index],At23L); + vec_store_nta(At33[index],At33L); + vec_store_nta(B1[index],B1L); + vec_store_nta(B2[index],B2L); + vec_store_nta(B3[index],B3L); + vec_store_nta(beta1[index],beta1L); + vec_store_nta(beta2[index],beta2L); + vec_store_nta(beta3[index],beta3L); + vec_store_nta(gt11[index],gt11L); + vec_store_nta(gt12[index],gt12L); + vec_store_nta(gt13[index],gt13L); + vec_store_nta(gt22[index],gt22L); + vec_store_nta(gt23[index],gt23L); + vec_store_nta(gt33[index],gt33L); + vec_store_nta(phi[index],phiL); + vec_store_nta(trK[index],trKL); + vec_store_nta(Xt1[index],Xt1L); + vec_store_nta(Xt2[index],Xt2L); + vec_store_nta(Xt3[index],Xt3L); } - LC_ENDLOOP3 (ML_BSSN_boundary); + LC_ENDLOOP3VEC (ML_BSSN_boundary); } extern "C" void ML_BSSN_boundary(CCTK_ARGUMENTS) |