diff options
author | Barry Wardell <barry.wardell@gmail.com> | 2011-08-18 00:12:46 +0200 |
---|---|---|
committer | Barry Wardell <barry.wardell@gmail.com> | 2011-08-25 21:07:02 +0200 |
commit | 16c7cc3c66cab59041cfb2203953eaa21cd7812d (patch) | |
tree | c17f1b4606986e34a48d67e726ff2b8f3b132cdb /ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc | |
parent | 2888d221261da25e7802b76c46ee3e10ca578418 (diff) |
Regenerate code with Vectorisation enabled.
Note that this also applies the changes from commit 3ba8a55ae2578cb6dc06f0ec8b81f86b3a2654ac to ML_BSSN_MP, ML_BSSN_MP_Helper, ML_BSSN_MP_O8, ML_BSSN_MP_O8_Helper, ML_BSSN_O2, ML_BSSN_O2_Helper, ML_BSSN_O8, ML_BSSN_O8_Helper, ML_BSSN_UPW and ML_BSSN_UPW_Helper, which were accidentally missed in that commit.
Diffstat (limited to 'ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc')
-rw-r--r-- | ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc | 135 |
1 files changed, 84 insertions, 51 deletions
diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc b/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc index 148022e..da223e3 100644 --- a/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc +++ b/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc @@ -13,13 +13,14 @@ #include "GenericFD.h" #include "Differencing.h" #include "loopcontrol.h" +#include "vectors.h" /* Define macros used in calculations */ #define INITVALUE (42) #define QAD(x) (SQR(SQR(x))) -#define INV(x) ((1.0) / (x)) -#define SQR(x) ((x) * (x)) -#define CUB(x) ((x) * (x) * (x)) +#define INV(x) (kdiv(ToReal(1.0),x)) +#define SQR(x) (kmul(x,x)) +#define CUB(x) (kmul(x,SQR(x))) static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[]) { @@ -52,53 +53,51 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con ptrdiff_t const cdi = sizeof(CCTK_REAL) * di; ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj; ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk; - CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0)); - CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1)); - CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2)); - CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME); - CCTK_REAL const dxi = INV(dx); - CCTK_REAL const dyi = INV(dy); - CCTK_REAL const dzi = INV(dz); - CCTK_REAL const khalf = 0.5; - CCTK_REAL const kthird = 1/3.0; - CCTK_REAL const ktwothird = 2.0/3.0; - CCTK_REAL const kfourthird = 4.0/3.0; - CCTK_REAL const keightthird = 8.0/3.0; - CCTK_REAL const hdxi = 0.5 * dxi; - CCTK_REAL const hdyi = 0.5 * dyi; - CCTK_REAL const hdzi = 0.5 * dzi; + CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0)); + CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1)); + CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2)); + CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME); + CCTK_REAL_VEC const dxi = INV(dx); + CCTK_REAL_VEC const dyi = INV(dy); + CCTK_REAL_VEC const dzi = INV(dz); + CCTK_REAL_VEC const khalf = ToReal(0.5); + CCTK_REAL_VEC const kthird = ToReal(1.0/3.0); + CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0); + CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0); + CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0); + CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi); + CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi); + CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi); /* Initialize predefined quantities */ - CCTK_REAL const p1o1024dx = 0.0009765625*INV(dx); - CCTK_REAL const p1o1024dy = 0.0009765625*INV(dy); - CCTK_REAL const p1o1024dz = 0.0009765625*INV(dz); - CCTK_REAL const p1o1680dx = 0.000595238095238095238095238095238*INV(dx); - CCTK_REAL const p1o1680dy = 0.000595238095238095238095238095238*INV(dy); - CCTK_REAL const p1o1680dz = 0.000595238095238095238095238095238*INV(dz); - CCTK_REAL const p1o5040dx2 = 0.000198412698412698412698412698413*INV(SQR(dx)); - CCTK_REAL const p1o5040dy2 = 0.000198412698412698412698412698413*INV(SQR(dy)); - CCTK_REAL const p1o5040dz2 = 0.000198412698412698412698412698413*INV(SQR(dz)); - CCTK_REAL const p1o560dx = 0.00178571428571428571428571428571*INV(dx); - CCTK_REAL const p1o560dy = 0.00178571428571428571428571428571*INV(dy); - CCTK_REAL const p1o560dz = 0.00178571428571428571428571428571*INV(dz); - CCTK_REAL const p1o705600dxdy = 1.41723356009070294784580498866e-6*INV(dx)*INV(dy); - CCTK_REAL const p1o705600dxdz = 1.41723356009070294784580498866e-6*INV(dx)*INV(dz); - CCTK_REAL const p1o705600dydz = 1.41723356009070294784580498866e-6*INV(dy)*INV(dz); - CCTK_REAL const p1o840dx = 0.00119047619047619047619047619048*INV(dx); - CCTK_REAL const p1o840dy = 0.00119047619047619047619047619048*INV(dy); - CCTK_REAL const p1o840dz = 0.00119047619047619047619047619048*INV(dz); - CCTK_REAL const p1odx = INV(dx); - CCTK_REAL const p1ody = INV(dy); - CCTK_REAL const p1odz = INV(dz); - CCTK_REAL const pm1o840dx = -0.00119047619047619047619047619048*INV(dx); - CCTK_REAL const pm1o840dy = -0.00119047619047619047619047619048*INV(dy); - CCTK_REAL const pm1o840dz = -0.00119047619047619047619047619048*INV(dz); + CCTK_REAL_VEC const p1o1024dx = kmul(INV(dx),ToReal(0.0009765625)); + CCTK_REAL_VEC const p1o1024dy = kmul(INV(dy),ToReal(0.0009765625)); + CCTK_REAL_VEC const p1o1024dz = kmul(INV(dz),ToReal(0.0009765625)); + CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238)); + CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238)); + CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238)); + CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413)); + CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413)); + CCTK_REAL_VEC const p1o5040dz2 = kmul(INV(SQR(dz)),ToReal(0.000198412698412698412698412698413)); + CCTK_REAL_VEC const p1o560dx = kmul(INV(dx),ToReal(0.00178571428571428571428571428571)); + CCTK_REAL_VEC const p1o560dy = kmul(INV(dy),ToReal(0.00178571428571428571428571428571)); + CCTK_REAL_VEC const p1o560dz = kmul(INV(dz),ToReal(0.00178571428571428571428571428571)); + CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6))); + CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048)); + CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048)); + CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048)); + CCTK_REAL_VEC const p1odx = INV(dx); + CCTK_REAL_VEC const p1ody = INV(dy); + CCTK_REAL_VEC const p1odz = INV(dz); /* Loop over the grid points */ #pragma omp parallel - LC_LOOP3 (ML_BSSN_O8_InitGamma, + LC_LOOP3VEC (ML_BSSN_O8_InitGamma, i,j,k, min[0],min[1],min[2], max[0],max[1],max[2], - cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]) + cctk_lsh[0],cctk_lsh[1],cctk_lsh[2], + CCTK_REAL_VEC_SIZE) { ptrdiff_t const index = di*i + dj*j + dk*k; @@ -111,18 +110,52 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con /* Precompute derivatives */ /* Calculate temporaries and grid functions */ - CCTK_REAL Xt1L = 0; + CCTK_REAL_VEC Xt1L = ToReal(0); - CCTK_REAL Xt2L = 0; + CCTK_REAL_VEC Xt2L = ToReal(0); - CCTK_REAL Xt3L = 0; + CCTK_REAL_VEC Xt3L = ToReal(0); + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count_lo = lc_imin-i; + ptrdiff_t const elt_count_hi = lc_imax-i; + vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi); + vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi); + break; + } + + /* If necessary, store only partial vectors after the first iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0)) + { + ptrdiff_t const elt_count = lc_imin-i; + vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count); + vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count); + vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count); + continue; + } + + /* If necessary, store only partial vectors after the last iteration */ + + if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0)) + { + ptrdiff_t const elt_count = lc_imax-i; + vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count); + vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count); + vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count); + break; + } /* Copy local copies back to grid functions */ - Xt1[index] = Xt1L; - Xt2[index] = Xt2L; - Xt3[index] = Xt3L; + vec_store_nta(Xt1[index],Xt1L); + vec_store_nta(Xt2[index],Xt2L); + vec_store_nta(Xt3[index],Xt3L); } - LC_ENDLOOP3 (ML_BSSN_O8_InitGamma); + LC_ENDLOOP3VEC (ML_BSSN_O8_InitGamma); } extern "C" void ML_BSSN_O8_InitGamma(CCTK_ARGUMENTS) |