diff options
Diffstat (limited to 'ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc')
-rw-r--r-- | ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc | 418 |
1 files changed, 219 insertions, 199 deletions
diff --git a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc index 2eebecc..4d543bd 100644 --- a/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc +++ b/ML_BSSN/src/ML_BSSN_convertFromADMBaseGamma.cc @@ -13,13 +13,14 @@ #include "GenericFD.h" #include "Differencing.h" #include "loopcontrol.h" +#include "vectors.h" /* Define macros used in calculations */ #define INITVALUE (42) #define QAD(x) (SQR(SQR(x))) -#define INV(x) ((1.0) / (x)) -#define SQR(x) ((x) * (x)) -#define CUB(x) ((x) * (x) * (x)) +#define INV(x) (kdiv(ToReal(1.0),x)) +#define SQR(x) (kmul(x,x)) +#define CUB(x) (kmul(x,SQR(x))) extern "C" void ML_BSSN_convertFromADMBaseGamma_SelectBCs(CCTK_ARGUMENTS) { @@ -71,114 +72,115 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk ptrdiff_t const cdi = sizeof(CCTK_REAL) * di; ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj; ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk; - CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0)); - CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1)); - CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2)); - CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME); - CCTK_REAL const dxi = INV(dx); - CCTK_REAL const dyi = INV(dy); - CCTK_REAL const dzi = INV(dz); - CCTK_REAL const khalf = 0.5; - CCTK_REAL const kthird = 1/3.0; - CCTK_REAL const ktwothird = 2.0/3.0; - CCTK_REAL const kfourthird = 4.0/3.0; - CCTK_REAL const keightthird = 8.0/3.0; - CCTK_REAL const hdxi = 0.5 * dxi; - CCTK_REAL const hdyi = 0.5 * dyi; - CCTK_REAL const hdzi = 0.5 * dzi; + CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0)); + CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1)); + CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2)); + CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME); + CCTK_REAL_VEC const dxi = INV(dx); + CCTK_REAL_VEC const dyi = INV(dy); + CCTK_REAL_VEC const dzi = INV(dz); + CCTK_REAL_VEC const khalf = ToReal(0.5); + CCTK_REAL_VEC const kthird = ToReal(1.0/3.0); + CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0); + CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0); + CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0); + CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi); + CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi); + CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi); /* Initialize predefined quantities */ - CCTK_REAL const p1o12dx = 0.0833333333333333333333333333333*INV(dx); - CCTK_REAL const p1o12dy = 0.0833333333333333333333333333333*INV(dy); - CCTK_REAL const p1o12dz = 0.0833333333333333333333333333333*INV(dz); - CCTK_REAL const p1o144dxdy = 0.00694444444444444444444444444444*INV(dx)*INV(dy); - CCTK_REAL const p1o144dxdz = 0.00694444444444444444444444444444*INV(dx)*INV(dz); - CCTK_REAL const p1o144dydz = 0.00694444444444444444444444444444*INV(dy)*INV(dz); - CCTK_REAL const p1o24dx = 0.0416666666666666666666666666667*INV(dx); - CCTK_REAL const p1o24dy = 0.0416666666666666666666666666667*INV(dy); - CCTK_REAL const p1o24dz = 0.0416666666666666666666666666667*INV(dz); - CCTK_REAL const p1o64dx = 0.015625*INV(dx); - CCTK_REAL const p1o64dy = 0.015625*INV(dy); - CCTK_REAL const p1o64dz = 0.015625*INV(dz); - CCTK_REAL const p1odx = INV(dx); - CCTK_REAL const p1ody = INV(dy); - CCTK_REAL const p1odz = INV(dz); - CCTK_REAL const pm1o12dx2 = -0.0833333333333333333333333333333*INV(SQR(dx)); - CCTK_REAL const pm1o12dy2 = -0.0833333333333333333333333333333*INV(SQR(dy)); - CCTK_REAL const pm1o12dz2 = -0.0833333333333333333333333333333*INV(SQR(dz)); + CCTK_REAL_VEC const p1o12dx = kmul(INV(dx),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dy = kmul(INV(dy),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o12dz = kmul(INV(dz),ToReal(0.0833333333333333333333333333333)); + CCTK_REAL_VEC const p1o144dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o144dydz = kmul(INV(dy),kmul(INV(dz),ToReal(0.00694444444444444444444444444444))); + CCTK_REAL_VEC const p1o24dx = kmul(INV(dx),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dy = kmul(INV(dy),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o24dz = kmul(INV(dz),ToReal(0.0416666666666666666666666666667)); + CCTK_REAL_VEC const p1o64dx = kmul(INV(dx),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dy = kmul(INV(dy),ToReal(0.015625)); + CCTK_REAL_VEC const p1o64dz = kmul(INV(dz),ToReal(0.015625)); + CCTK_REAL_VEC const p1odx = INV(dx); + CCTK_REAL_VEC const p1ody = INV(dy); + CCTK_REAL_VEC const p1odz = INV(dz); + CCTK_REAL_VEC const pm1o12dx2 = kmul(INV(SQR(dx)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dy2 = kmul(INV(SQR(dy)),ToReal(-0.0833333333333333333333333333333)); + CCTK_REAL_VEC const pm1o12dz2 = kmul(INV(SQR(dz)),ToReal(-0.0833333333333333333333333333333)); /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_BSSN_convertFromADMBaseGamma, + LC_LOOP3VEC (ML_BSSN_convertFromADMBaseGamma, i,j,k, min[0],min[1],min[2], max[0],max[1],max[2], - cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) + cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], + CCTK_REAL_VEC_SIZE) { ptrdiff_t const index = di*i + dj*j + dk*k; /* Assign local copies of grid functions */ - CCTK_REAL alphaL = alpha[index]; - CCTK_REAL beta1L = beta1[index]; - CCTK_REAL beta2L = beta2[index]; - CCTK_REAL beta3L = beta3[index]; - CCTK_REAL dtalpL = dtalp[index]; - CCTK_REAL dtbetaxL = dtbetax[index]; - CCTK_REAL dtbetayL = dtbetay[index]; - CCTK_REAL dtbetazL = dtbetaz[index]; - CCTK_REAL gt11L = gt11[index]; - CCTK_REAL gt12L = gt12[index]; - CCTK_REAL gt13L = gt13[index]; - CCTK_REAL gt22L = gt22[index]; - CCTK_REAL gt23L = gt23[index]; - CCTK_REAL gt33L = gt33[index]; - CCTK_REAL rL = r[index]; + CCTK_REAL_VEC alphaL = vec_load(alpha[index]); + CCTK_REAL_VEC beta1L = vec_load(beta1[index]); + CCTK_REAL_VEC beta2L = vec_load(beta2[index]); + CCTK_REAL_VEC beta3L = vec_load(beta3[index]); + CCTK_REAL_VEC dtalpL = vec_load(dtalp[index]); + CCTK_REAL_VEC dtbetaxL = vec_load(dtbetax[index]); + CCTK_REAL_VEC dtbetayL = vec_load(dtbetay[index]); + CCTK_REAL_VEC dtbetazL = vec_load(dtbetaz[index]); + CCTK_REAL_VEC gt11L = vec_load(gt11[index]); + CCTK_REAL_VEC gt12L = vec_load(gt12[index]); + CCTK_REAL_VEC gt13L = vec_load(gt13[index]); + CCTK_REAL_VEC gt22L = vec_load(gt22[index]); + CCTK_REAL_VEC gt23L = vec_load(gt23[index]); + CCTK_REAL_VEC gt33L = vec_load(gt33[index]); + CCTK_REAL_VEC rL = vec_load(r[index]); /* Include user supplied include files */ /* Precompute derivatives */ - CCTK_REAL const PDupwindNthAnti1alpha = PDupwindNthAnti1(&alpha[index]); - CCTK_REAL const PDupwindNthSymm1alpha = PDupwindNthSymm1(&alpha[index]); - CCTK_REAL const PDupwindNthAnti2alpha = PDupwindNthAnti2(&alpha[index]); - CCTK_REAL const PDupwindNthSymm2alpha = PDupwindNthSymm2(&alpha[index]); - CCTK_REAL const PDupwindNthAnti3alpha = PDupwindNthAnti3(&alpha[index]); - CCTK_REAL const PDupwindNthSymm3alpha = PDupwindNthSymm3(&alpha[index]); - CCTK_REAL const PDupwindNthAnti1beta1 = PDupwindNthAnti1(&beta1[index]); - CCTK_REAL const PDupwindNthSymm1beta1 = PDupwindNthSymm1(&beta1[index]); - CCTK_REAL const PDupwindNthAnti2beta1 = PDupwindNthAnti2(&beta1[index]); - CCTK_REAL const PDupwindNthSymm2beta1 = PDupwindNthSymm2(&beta1[index]); - CCTK_REAL const PDupwindNthAnti3beta1 = PDupwindNthAnti3(&beta1[index]); - CCTK_REAL const PDupwindNthSymm3beta1 = PDupwindNthSymm3(&beta1[index]); - CCTK_REAL const PDupwindNthAnti1beta2 = PDupwindNthAnti1(&beta2[index]); - CCTK_REAL const PDupwindNthSymm1beta2 = PDupwindNthSymm1(&beta2[index]); - CCTK_REAL const PDupwindNthAnti2beta2 = PDupwindNthAnti2(&beta2[index]); - CCTK_REAL const PDupwindNthSymm2beta2 = PDupwindNthSymm2(&beta2[index]); - CCTK_REAL const PDupwindNthAnti3beta2 = PDupwindNthAnti3(&beta2[index]); - CCTK_REAL const PDupwindNthSymm3beta2 = PDupwindNthSymm3(&beta2[index]); - CCTK_REAL const PDupwindNthAnti1beta3 = PDupwindNthAnti1(&beta3[index]); - CCTK_REAL const PDupwindNthSymm1beta3 = PDupwindNthSymm1(&beta3[index]); - CCTK_REAL const PDupwindNthAnti2beta3 = PDupwindNthAnti2(&beta3[index]); - CCTK_REAL const PDupwindNthSymm2beta3 = PDupwindNthSymm2(&beta3[index]); - CCTK_REAL const PDupwindNthAnti3beta3 = PDupwindNthAnti3(&beta3[index]); - CCTK_REAL const PDupwindNthSymm3beta3 = PDupwindNthSymm3(&beta3[index]); - CCTK_REAL const PDstandardNth1gt11 = PDstandardNth1(>11[index]); - CCTK_REAL const PDstandardNth2gt11 = PDstandardNth2(>11[index]); - CCTK_REAL const PDstandardNth3gt11 = PDstandardNth3(>11[index]); - CCTK_REAL const PDstandardNth1gt12 = PDstandardNth1(>12[index]); - CCTK_REAL const PDstandardNth2gt12 = PDstandardNth2(>12[index]); - CCTK_REAL const PDstandardNth3gt12 = PDstandardNth3(>12[index]); - CCTK_REAL const PDstandardNth1gt13 = PDstandardNth1(>13[index]); - CCTK_REAL const PDstandardNth2gt13 = PDstandardNth2(>13[index]); - CCTK_REAL const PDstandardNth3gt13 = PDstandardNth3(>13[index]); - CCTK_REAL const PDstandardNth1gt22 = PDstandardNth1(>22[index]); - CCTK_REAL const PDstandardNth2gt22 = PDstandardNth2(>22[index]); - CCTK_REAL const PDstandardNth3gt22 = PDstandardNth3(>22[index]); - CCTK_REAL const PDstandardNth1gt23 = PDstandardNth1(>23[index]); - CCTK_REAL const PDstandardNth2gt23 = PDstandardNth2(>23[index]); - CCTK_REAL const PDstandardNth3gt23 = PDstandardNth3(>23[index]); - CCTK_REAL const PDstandardNth1gt33 = PDstandardNth1(>33[index]); - CCTK_REAL const PDstandardNth2gt33 = PDstandardNth2(>33[index]); - CCTK_REAL const PDstandardNth3gt33 = PDstandardNth3(>33[index]); + CCTK_REAL_VEC const PDupwindNthAnti1alpha = PDupwindNthAnti1(&alpha[index]); + CCTK_REAL_VEC const PDupwindNthSymm1alpha = PDupwindNthSymm1(&alpha[index]); + CCTK_REAL_VEC const PDupwindNthAnti2alpha = PDupwindNthAnti2(&alpha[index]); + CCTK_REAL_VEC const PDupwindNthSymm2alpha = PDupwindNthSymm2(&alpha[index]); + CCTK_REAL_VEC const PDupwindNthAnti3alpha = PDupwindNthAnti3(&alpha[index]); + CCTK_REAL_VEC const PDupwindNthSymm3alpha = PDupwindNthSymm3(&alpha[index]); + CCTK_REAL_VEC const PDupwindNthAnti1beta1 = PDupwindNthAnti1(&beta1[index]); + CCTK_REAL_VEC const PDupwindNthSymm1beta1 = PDupwindNthSymm1(&beta1[index]); + CCTK_REAL_VEC const PDupwindNthAnti2beta1 = PDupwindNthAnti2(&beta1[index]); + CCTK_REAL_VEC const PDupwindNthSymm2beta1 = PDupwindNthSymm2(&beta1[index]); + CCTK_REAL_VEC const PDupwindNthAnti3beta1 = PDupwindNthAnti3(&beta1[index]); + CCTK_REAL_VEC const PDupwindNthSymm3beta1 = PDupwindNthSymm3(&beta1[index]); + CCTK_REAL_VEC const PDupwindNthAnti1beta2 = PDupwindNthAnti1(&beta2[index]); + CCTK_REAL_VEC const PDupwindNthSymm1beta2 = PDupwindNthSymm1(&beta2[index]); + CCTK_REAL_VEC const PDupwindNthAnti2beta2 = PDupwindNthAnti2(&beta2[index]); + CCTK_REAL_VEC const PDupwindNthSymm2beta2 = PDupwindNthSymm2(&beta2[index]); + CCTK_REAL_VEC const PDupwindNthAnti3beta2 = PDupwindNthAnti3(&beta2[index]); + CCTK_REAL_VEC const PDupwindNthSymm3beta2 = PDupwindNthSymm3(&beta2[index]); + CCTK_REAL_VEC const PDupwindNthAnti1beta3 = PDupwindNthAnti1(&beta3[index]); + CCTK_REAL_VEC const PDupwindNthSymm1beta3 = PDupwindNthSymm1(&beta3[index]); + CCTK_REAL_VEC const PDupwindNthAnti2beta3 = PDupwindNthAnti2(&beta3[index]); + CCTK_REAL_VEC const PDupwindNthSymm2beta3 = PDupwindNthSymm2(&beta3[index]); + CCTK_REAL_VEC const PDupwindNthAnti3beta3 = PDupwindNthAnti3(&beta3[index]); + CCTK_REAL_VEC const PDupwindNthSymm3beta3 = PDupwindNthSymm3(&beta3[index]); + CCTK_REAL_VEC const PDstandardNth1gt11 = PDstandardNth1(>11[index]); + CCTK_REAL_VEC const PDstandardNth2gt11 = PDstandardNth2(>11[index]); + CCTK_REAL_VEC const PDstandardNth3gt11 = PDstandardNth3(>11[index]); + CCTK_REAL_VEC const PDstandardNth1gt12 = PDstandardNth1(>12[index]); + CCTK_REAL_VEC const PDstandardNth2gt12 = PDstandardNth2(>12[index]); + CCTK_REAL_VEC const PDstandardNth3gt12 = PDstandardNth3(>12[index]); + CCTK_REAL_VEC const PDstandardNth1gt13 = PDstandardNth1(>13[index]); + CCTK_REAL_VEC const PDstandardNth2gt13 = PDstandardNth2(>13[index]); + CCTK_REAL_VEC const PDstandardNth3gt13 = PDstandardNth3(>13[index]); + CCTK_REAL_VEC const PDstandardNth1gt22 = PDstandardNth1(>22[index]); + CCTK_REAL_VEC const PDstandardNth2gt22 = PDstandardNth2(>22[index]); + CCTK_REAL_VEC const PDstandardNth3gt22 = PDstandardNth3(>22[index]); + CCTK_REAL_VEC const PDstandardNth1gt23 = PDstandardNth1(>23[index]); + CCTK_REAL_VEC const PDstandardNth2gt23 = PDstandardNth2(>23[index]); + CCTK_REAL_VEC const PDstandardNth3gt23 = PDstandardNth3(>23[index]); + CCTK_REAL_VEC const PDstandardNth1gt33 = PDstandardNth1(>33[index]); + CCTK_REAL_VEC const PDstandardNth2gt33 = PDstandardNth2(>33[index]); + CCTK_REAL_VEC const PDstandardNth3gt33 = PDstandardNth3(>33[index]); /* Calculate temporaries and grid functions */ ptrdiff_t dir1 = Sign(beta1L); @@ -187,154 +189,172 @@ static void ML_BSSN_convertFromADMBaseGamma_Body(cGH const * restrict const cctk ptrdiff_t dir3 = Sign(beta3L); - CCTK_REAL detgt = 1; + CCTK_REAL_VEC detgt = ToReal(1); - CCTK_REAL gtu11 = INV(detgt)*(gt22L*gt33L - SQR(gt23L)); + CCTK_REAL_VEC gtu11 = kmul(INV(detgt),kmsub(gt22L,gt33L,SQR(gt23L))); - CCTK_REAL gtu12 = (gt13L*gt23L - gt12L*gt33L)*INV(detgt); + CCTK_REAL_VEC gtu12 = + kmul(INV(detgt),kmsub(gt13L,gt23L,kmul(gt12L,gt33L))); - CCTK_REAL gtu13 = (-(gt13L*gt22L) + gt12L*gt23L)*INV(detgt); + CCTK_REAL_VEC gtu13 = + kmul(INV(detgt),kmsub(gt12L,gt23L,kmul(gt13L,gt22L))); - CCTK_REAL gtu22 = INV(detgt)*(gt11L*gt33L - SQR(gt13L)); + CCTK_REAL_VEC gtu22 = kmul(INV(detgt),kmsub(gt11L,gt33L,SQR(gt13L))); - CCTK_REAL gtu23 = (gt12L*gt13L - gt11L*gt23L)*INV(detgt); + CCTK_REAL_VEC gtu23 = + kmul(INV(detgt),kmsub(gt12L,gt13L,kmul(gt11L,gt23L))); - CCTK_REAL gtu33 = INV(detgt)*(gt11L*gt22L - SQR(gt12L)); + CCTK_REAL_VEC gtu33 = kmul(INV(detgt),kmsub(gt11L,gt22L,SQR(gt12L))); - CCTK_REAL Gt111 = 0.5*(gtu11*PDstandardNth1gt11 + - 2*(gtu12*PDstandardNth1gt12 + gtu13*PDstandardNth1gt13) - - gtu12*PDstandardNth2gt11 - gtu13*PDstandardNth3gt11); + CCTK_REAL_VEC Gt111 = + kmul(ToReal(0.5),kmadd(gtu11,PDstandardNth1gt11,knmsub(gtu12,PDstandardNth2gt11,kmsub(kmadd(gtu12,PDstandardNth1gt12,kmul(gtu13,PDstandardNth1gt13)),ToReal(2),kmul(gtu13,PDstandardNth3gt11))))); - CCTK_REAL Gt211 = 0.5*(gtu12*PDstandardNth1gt11 + - 2*(gtu22*PDstandardNth1gt12 + gtu23*PDstandardNth1gt13) - - gtu22*PDstandardNth2gt11 - gtu23*PDstandardNth3gt11); + CCTK_REAL_VEC Gt211 = + kmul(ToReal(0.5),kmadd(gtu12,PDstandardNth1gt11,knmsub(gtu22,PDstandardNth2gt11,kmsub(kmadd(gtu22,PDstandardNth1gt12,kmul(gtu23,PDstandardNth1gt13)),ToReal(2),kmul(gtu23,PDstandardNth3gt11))))); - CCTK_REAL Gt311 = 0.5*(gtu13*PDstandardNth1gt11 + - 2*(gtu23*PDstandardNth1gt12 + gtu33*PDstandardNth1gt13) - - gtu23*PDstandardNth2gt11 - gtu33*PDstandardNth3gt11); + CCTK_REAL_VEC Gt311 = + kmul(ToReal(0.5),kmadd(gtu13,PDstandardNth1gt11,knmsub(gtu23,PDstandardNth2gt11,kmsub(kmadd(gtu23,PDstandardNth1gt12,kmul(gtu33,PDstandardNth1gt13)),ToReal(2),kmul(gtu33,PDstandardNth3gt11))))); - CCTK_REAL Gt112 = 0.5*(gtu12*PDstandardNth1gt22 + - gtu11*PDstandardNth2gt11 + gtu13*(PDstandardNth1gt23 + - PDstandardNth2gt13 - PDstandardNth3gt12)); + CCTK_REAL_VEC Gt112 = + kmul(kmadd(gtu12,PDstandardNth1gt22,kmadd(gtu11,PDstandardNth2gt11,kmul(gtu13,kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12))))),ToReal(0.5)); - CCTK_REAL Gt212 = 0.5*(gtu22*PDstandardNth1gt22 + - gtu12*PDstandardNth2gt11 + gtu23*(PDstandardNth1gt23 + - PDstandardNth2gt13 - PDstandardNth3gt12)); + CCTK_REAL_VEC Gt212 = + kmul(kmadd(gtu22,PDstandardNth1gt22,kmadd(gtu12,PDstandardNth2gt11,kmul(gtu23,kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12))))),ToReal(0.5)); - CCTK_REAL Gt312 = 0.5*(gtu23*PDstandardNth1gt22 + - gtu13*PDstandardNth2gt11 + gtu33*(PDstandardNth1gt23 + - PDstandardNth2gt13 - PDstandardNth3gt12)); + CCTK_REAL_VEC Gt312 = + kmul(kmadd(gtu23,PDstandardNth1gt22,kmadd(gtu13,PDstandardNth2gt11,kmul(gtu33,kadd(PDstandardNth1gt23,ksub(PDstandardNth2gt13,PDstandardNth3gt12))))),ToReal(0.5)); - CCTK_REAL Gt113 = 0.5*(gtu13*PDstandardNth1gt33 + - gtu11*PDstandardNth3gt11 + gtu12*(PDstandardNth1gt23 - - PDstandardNth2gt13 + PDstandardNth3gt12)); + CCTK_REAL_VEC Gt113 = + kmul(kmadd(gtu13,PDstandardNth1gt33,kmadd(gtu11,PDstandardNth3gt11,kmul(gtu12,kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13))))),ToReal(0.5)); - CCTK_REAL Gt213 = 0.5*(gtu23*PDstandardNth1gt33 + - gtu12*PDstandardNth3gt11 + gtu22*(PDstandardNth1gt23 - - PDstandardNth2gt13 + PDstandardNth3gt12)); + CCTK_REAL_VEC Gt213 = + kmul(kmadd(gtu23,PDstandardNth1gt33,kmadd(gtu12,PDstandardNth3gt11,kmul(gtu22,kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13))))),ToReal(0.5)); - CCTK_REAL Gt313 = 0.5*(gtu33*PDstandardNth1gt33 + - gtu13*PDstandardNth3gt11 + gtu23*(PDstandardNth1gt23 - - PDstandardNth2gt13 + PDstandardNth3gt12)); + CCTK_REAL_VEC Gt313 = + kmul(kmadd(gtu33,PDstandardNth1gt33,kmadd(gtu13,PDstandardNth3gt11,kmul(gtu23,kadd(PDstandardNth1gt23,ksub(PDstandardNth3gt12,PDstandardNth2gt13))))),ToReal(0.5)); - CCTK_REAL Gt122 = 0.5*(gtu11*(-PDstandardNth1gt22 + - 2*PDstandardNth2gt12) + gtu12*PDstandardNth2gt22 + - gtu13*(2*PDstandardNth2gt23 - PDstandardNth3gt22)); + CCTK_REAL_VEC Gt122 = + kmul(ToReal(0.5),kmadd(gtu12,PDstandardNth2gt22,kmadd(gtu11,kmsub(PDstandardNth2gt12,ToReal(2),PDstandardNth1gt22),kmul(gtu13,kmsub(PDstandardNth2gt23,ToReal(2),PDstandardNth3gt22))))); - CCTK_REAL Gt222 = 0.5*(gtu12*(-PDstandardNth1gt22 + - 2*PDstandardNth2gt12) + gtu22*PDstandardNth2gt22 + - gtu23*(2*PDstandardNth2gt23 - PDstandardNth3gt22)); + CCTK_REAL_VEC Gt222 = + kmul(ToReal(0.5),kmadd(gtu22,PDstandardNth2gt22,kmadd(gtu12,kmsub(PDstandardNth2gt12,ToReal(2),PDstandardNth1gt22),kmul(gtu23,kmsub(PDstandardNth2gt23,ToReal(2),PDstandardNth3gt22))))); - CCTK_REAL Gt322 = 0.5*(gtu13*(-PDstandardNth1gt22 + - 2*PDstandardNth2gt12) + gtu23*PDstandardNth2gt22 + - gtu33*(2*PDstandardNth2gt23 - PDstandardNth3gt22)); + CCTK_REAL_VEC Gt322 = + kmul(ToReal(0.5),kmadd(gtu23,PDstandardNth2gt22,kmadd(gtu13,kmsub(PDstandardNth2gt12,ToReal(2),PDstandardNth1gt22),kmul(gtu33,kmsub(PDstandardNth2gt23,ToReal(2),PDstandardNth3gt22))))); - CCTK_REAL Gt123 = 0.5*(gtu13*PDstandardNth2gt33 + - gtu11*(-PDstandardNth1gt23 + PDstandardNth2gt13 + PDstandardNth3gt12) + - gtu12*PDstandardNth3gt22); + CCTK_REAL_VEC Gt123 = + kmul(kmadd(gtu13,PDstandardNth2gt33,kmadd(gtu12,PDstandardNth3gt22,kmul(gtu11,kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23))))),ToReal(0.5)); - CCTK_REAL Gt223 = 0.5*(gtu23*PDstandardNth2gt33 + - gtu12*(-PDstandardNth1gt23 + PDstandardNth2gt13 + PDstandardNth3gt12) + - gtu22*PDstandardNth3gt22); + CCTK_REAL_VEC Gt223 = + kmul(kmadd(gtu23,PDstandardNth2gt33,kmadd(gtu22,PDstandardNth3gt22,kmul(gtu12,kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23))))),ToReal(0.5)); - CCTK_REAL Gt323 = 0.5*(gtu33*PDstandardNth2gt33 + - gtu13*(-PDstandardNth1gt23 + PDstandardNth2gt13 + PDstandardNth3gt12) + - gtu23*PDstandardNth3gt22); + CCTK_REAL_VEC Gt323 = + kmul(kmadd(gtu33,PDstandardNth2gt33,kmadd(gtu23,PDstandardNth3gt22,kmul(gtu13,kadd(PDstandardNth2gt13,ksub(PDstandardNth3gt12,PDstandardNth1gt23))))),ToReal(0.5)); - CCTK_REAL Gt133 = 0.5*(gtu11*(-PDstandardNth1gt33 + - 2*PDstandardNth3gt13) + gtu12*(-PDstandardNth2gt33 + - 2*PDstandardNth3gt23) + gtu13*PDstandardNth3gt33); + CCTK_REAL_VEC Gt133 = + kmul(ToReal(0.5),kmadd(gtu13,PDstandardNth3gt33,kmadd(gtu11,kmsub(PDstandardNth3gt13,ToReal(2),PDstandardNth1gt33),kmul(gtu12,kmsub(PDstandardNth3gt23,ToReal(2),PDstandardNth2gt33))))); - CCTK_REAL Gt233 = 0.5*(gtu12*(-PDstandardNth1gt33 + - 2*PDstandardNth3gt13) + gtu22*(-PDstandardNth2gt33 + - 2*PDstandardNth3gt23) + gtu23*PDstandardNth3gt33); + CCTK_REAL_VEC Gt233 = + kmul(ToReal(0.5),kmadd(gtu23,PDstandardNth3gt33,kmadd(gtu12,kmsub(PDstandardNth3gt13,ToReal(2),PDstandardNth1gt33),kmul(gtu22,kmsub(PDstandardNth3gt23,ToReal(2),PDstandardNth2gt33))))); - CCTK_REAL Gt333 = 0.5*(gtu13*(-PDstandardNth1gt33 + - 2*PDstandardNth3gt13) + gtu23*(-PDstandardNth2gt33 + - 2*PDstandardNth3gt23) + gtu33*PDstandardNth3gt33); + CCTK_REAL_VEC Gt333 = + kmul(ToReal(0.5),kmadd(gtu33,PDstandardNth3gt33,kmadd(gtu13,kmsub(PDstandardNth3gt13,ToReal(2),PDstandardNth1gt33),kmul(gtu23,kmsub(PDstandardNth3gt23,ToReal(2),PDstandardNth2gt33))))); - CCTK_REAL Xt1L = Gt111*gtu11 + Gt122*gtu22 + 2*(Gt112*gtu12 + - Gt113*gtu13 + Gt123*gtu23) + Gt133*gtu33; + CCTK_REAL_VEC Xt1L = + kmadd(Gt111,gtu11,kmadd(Gt122,gtu22,kmadd(Gt133,gtu33,kmul(kmadd(Gt112,gtu12,kmadd(Gt113,gtu13,kmul(Gt123,gtu23))),ToReal(2))))); - CCTK_REAL Xt2L = Gt211*gtu11 + Gt222*gtu22 + 2*(Gt212*gtu12 + - Gt213*gtu13 + Gt223*gtu23) + Gt233*gtu33; + CCTK_REAL_VEC Xt2L = + kmadd(Gt211,gtu11,kmadd(Gt222,gtu22,kmadd(Gt233,gtu33,kmul(kmadd(Gt212,gtu12,kmadd(Gt213,gtu13,kmul(Gt223,gtu23))),ToReal(2))))); - CCTK_REAL Xt3L = Gt311*gtu11 + Gt322*gtu22 + 2*(Gt312*gtu12 + - Gt313*gtu13 + Gt323*gtu23) + Gt333*gtu33; + CCTK_REAL_VEC Xt3L = + kmadd(Gt311,gtu11,kmadd(Gt322,gtu22,kmadd(Gt333,gtu33,kmul(kmadd(Gt312,gtu12,kmadd(Gt313,gtu13,kmul(Gt323,gtu23))),ToReal(2))))); - CCTK_REAL AL = IfThen(LapseACoeff != - 0,-(INV(ToReal(harmonicF))*pow(alphaL,-ToReal(harmonicN))*(dtalpL - - (beta1L*PDupwindNthAnti1alpha + beta2L*PDupwindNthAnti2alpha + - beta3L*PDupwindNthAnti3alpha + PDupwindNthSymm1alpha*Abs(beta1L) + - PDupwindNthSymm2alpha*Abs(beta2L) + - PDupwindNthSymm3alpha*Abs(beta3L))*ToReal(LapseAdvectionCoeff))),0); + CCTK_REAL_VEC AL = IfThen(LapseACoeff != + 0,kneg(kmul(INV(ToReal(harmonicF)),kmul(kpow(alphaL,-harmonicN),knmsub(kmadd(beta1L,PDupwindNthAnti1alpha,kmadd(beta2L,PDupwindNthAnti2alpha,kmadd(beta3L,PDupwindNthAnti3alpha,kmadd(PDupwindNthSymm1alpha,kfabs(beta1L),kmadd(PDupwindNthSymm2alpha,kfabs(beta2L),kmul(PDupwindNthSymm3alpha,kfabs(beta3L))))))),ToReal(LapseAdvectionCoeff),dtalpL)))),ToReal(0)); - CCTK_REAL theta = fmin(1,exp(1 - - rL*INV(ToReal(SpatialShiftGammaCoeffRadius)))); + CCTK_REAL_VEC theta = + kfmin(ToReal(1),kexp(knmsub(rL,INV(ToReal(SpatialShiftGammaCoeffRadius)),ToReal(1)))); - CCTK_REAL B1L; - CCTK_REAL B2L; - CCTK_REAL B3L; + CCTK_REAL_VEC B1L; + CCTK_REAL_VEC B2L; + CCTK_REAL_VEC B3L; if (ShiftBCoeff*ShiftGammaCoeff != 0) { - B1L = INV(theta)*INV(ToReal(ShiftGammaCoeff))*(dtbetaxL - - (beta1L*PDupwindNthAnti1beta1 + beta2L*PDupwindNthAnti2beta1 + - beta3L*PDupwindNthAnti3beta1 + PDupwindNthSymm1beta1*Abs(beta1L) + - PDupwindNthSymm2beta1*Abs(beta2L) + - PDupwindNthSymm3beta1*Abs(beta3L))*ToReal(ShiftAdvectionCoeff)); + B1L = + kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,PDupwindNthAnti1beta1,kmadd(beta2L,PDupwindNthAnti2beta1,kmadd(beta3L,PDupwindNthAnti3beta1,kmadd(PDupwindNthSymm1beta1,kfabs(beta1L),kmadd(PDupwindNthSymm2beta1,kfabs(beta2L),kmul(PDupwindNthSymm3beta1,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetaxL))); - B2L = INV(theta)*INV(ToReal(ShiftGammaCoeff))*(dtbetayL - - (beta1L*PDupwindNthAnti1beta2 + beta2L*PDupwindNthAnti2beta2 + - beta3L*PDupwindNthAnti3beta2 + PDupwindNthSymm1beta2*Abs(beta1L) + - PDupwindNthSymm2beta2*Abs(beta2L) + - PDupwindNthSymm3beta2*Abs(beta3L))*ToReal(ShiftAdvectionCoeff)); + B2L = + kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,PDupwindNthAnti1beta2,kmadd(beta2L,PDupwindNthAnti2beta2,kmadd(beta3L,PDupwindNthAnti3beta2,kmadd(PDupwindNthSymm1beta2,kfabs(beta1L),kmadd(PDupwindNthSymm2beta2,kfabs(beta2L),kmul(PDupwindNthSymm3beta2,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetayL))); - B3L = INV(theta)*INV(ToReal(ShiftGammaCoeff))*(dtbetazL - - (beta1L*PDupwindNthAnti1beta3 + beta2L*PDupwindNthAnti2beta3 + - beta3L*PDupwindNthAnti3beta3 + PDupwindNthSymm1beta3*Abs(beta1L) + - PDupwindNthSymm2beta3*Abs(beta2L) + - PDupwindNthSymm3beta3*Abs(beta3L))*ToReal(ShiftAdvectionCoeff)); + B3L = + kmul(INV(theta),kmul(INV(ToReal(ShiftGammaCoeff)),knmsub(kmadd(beta1L,PDupwindNthAnti1beta3,kmadd(beta2L,PDupwindNthAnti2beta3,kmadd(beta3L,PDupwindNthAnti3beta3,kmadd(PDupwindNthSymm1beta3,kfabs(beta1L),kmadd(PDupwindNthSymm2beta3,kfabs(beta2L),kmul(PDupwindNthSymm3beta3,kfabs(beta3L))))))),ToReal(ShiftAdvectionCoeff),dtbetazL))); } else { - B1L = 0; + B1L = ToReal(0); - B2L = 0; + B2L = ToReal(0); - B3L = 0; + B3L = ToReal(0); + } + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count_lo = lc_imin-i; + ptrdiff_t const elt_count_hi = lc_imax-i; + vec_store_nta_partial_mid(A[index],AL,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(B1[index],B1L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(B2[index],B2L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(B3[index],B3L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); + break; + } + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) + { + ptrdiff_t const elt_count = lc_imin-i; + vec_store_nta_partial_hi(A[index],AL,elt_count); + vec_store_nta_partial_hi(B1[index],B1L,elt_count); + vec_store_nta_partial_hi(B2[index],B2L,elt_count); + vec_store_nta_partial_hi(B3[index],B3L,elt_count); + vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); + vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); + vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); + continue; + } + + /* If necessary, store only partial vectors after the last iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count = lc_imax-i; + vec_store_nta_partial_lo(A[index],AL,elt_count); + vec_store_nta_partial_lo(B1[index],B1L,elt_count); + vec_store_nta_partial_lo(B2[index],B2L,elt_count); + vec_store_nta_partial_lo(B3[index],B3L,elt_count); + vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); + vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); + vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); + break; } /* Copy local copies back to grid functions */ - A[index] = AL; - B1[index] = B1L; - B2[index] = B2L; - B3[index] = B3L; - Xt1[index] = Xt1L; - Xt2[index] = Xt2L; - Xt3[index] = Xt3L; + vec_store_nta(A[index],AL); + vec_store_nta(B1[index],B1L); + vec_store_nta(B2[index],B2L); + vec_store_nta(B3[index],B3L); + vec_store_nta(Xt1[index],Xt1L); + vec_store_nta(Xt2[index],Xt2L); + vec_store_nta(Xt3[index],Xt3L); } - LC_ENDLOOP3 (ML_BSSN_convertFromADMBaseGamma); + LC_ENDLOOP3VEC (ML_BSSN_convertFromADMBaseGamma); } extern "C" void ML_BSSN_convertFromADMBaseGamma(CCTK_ARGUMENTS) |