aboutsummaryrefslogtreecommitdiff
path: root/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc
diff options
context:
space:
mode:
authorBarry Wardell <barry.wardell@gmail.com>2011-08-18 00:12:46 +0200
committerBarry Wardell <barry.wardell@gmail.com>2011-08-25 21:07:02 +0200
commit16c7cc3c66cab59041cfb2203953eaa21cd7812d (patch)
treec17f1b4606986e34a48d67e726ff2b8f3b132cdb /ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc
parent2888d221261da25e7802b76c46ee3e10ca578418 (diff)
Regenerate code with Vectorisation enabled.
Note that this also applies the changes from commit 3ba8a55ae2578cb6dc06f0ec8b81f86b3a2654ac to ML_BSSN_MP, ML_BSSN_MP_Helper, ML_BSSN_MP_O8, ML_BSSN_MP_O8_Helper, ML_BSSN_O2, ML_BSSN_O2_Helper, ML_BSSN_O8, ML_BSSN_O8_Helper, ML_BSSN_UPW and ML_BSSN_UPW_Helper, which were accidentally missed in that commit.
Diffstat (limited to 'ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc')
-rw-r--r--ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc135
1 files changed, 84 insertions, 51 deletions
diff --git a/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc b/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc
index 148022e..da223e3 100644
--- a/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc
+++ b/ML_BSSN_O8/src/ML_BSSN_O8_InitGamma.cc
@@ -13,13 +13,14 @@
#include "GenericFD.h"
#include "Differencing.h"
#include "loopcontrol.h"
+#include "vectors.h"
/* Define macros used in calculations */
#define INITVALUE (42)
#define QAD(x) (SQR(SQR(x)))
-#define INV(x) ((1.0) / (x))
-#define SQR(x) ((x) * (x))
-#define CUB(x) ((x) * (x) * (x))
+#define INV(x) (kdiv(ToReal(1.0),x))
+#define SQR(x) (kmul(x,x))
+#define CUB(x) (kmul(x,SQR(x)))
static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[])
{
@@ -52,53 +53,51 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con
ptrdiff_t const cdi = sizeof(CCTK_REAL) * di;
ptrdiff_t const cdj = sizeof(CCTK_REAL) * dj;
ptrdiff_t const cdk = sizeof(CCTK_REAL) * dk;
- CCTK_REAL const dx = ToReal(CCTK_DELTA_SPACE(0));
- CCTK_REAL const dy = ToReal(CCTK_DELTA_SPACE(1));
- CCTK_REAL const dz = ToReal(CCTK_DELTA_SPACE(2));
- CCTK_REAL const dt = ToReal(CCTK_DELTA_TIME);
- CCTK_REAL const dxi = INV(dx);
- CCTK_REAL const dyi = INV(dy);
- CCTK_REAL const dzi = INV(dz);
- CCTK_REAL const khalf = 0.5;
- CCTK_REAL const kthird = 1/3.0;
- CCTK_REAL const ktwothird = 2.0/3.0;
- CCTK_REAL const kfourthird = 4.0/3.0;
- CCTK_REAL const keightthird = 8.0/3.0;
- CCTK_REAL const hdxi = 0.5 * dxi;
- CCTK_REAL const hdyi = 0.5 * dyi;
- CCTK_REAL const hdzi = 0.5 * dzi;
+ CCTK_REAL_VEC const dx = ToReal(CCTK_DELTA_SPACE(0));
+ CCTK_REAL_VEC const dy = ToReal(CCTK_DELTA_SPACE(1));
+ CCTK_REAL_VEC const dz = ToReal(CCTK_DELTA_SPACE(2));
+ CCTK_REAL_VEC const dt = ToReal(CCTK_DELTA_TIME);
+ CCTK_REAL_VEC const dxi = INV(dx);
+ CCTK_REAL_VEC const dyi = INV(dy);
+ CCTK_REAL_VEC const dzi = INV(dz);
+ CCTK_REAL_VEC const khalf = ToReal(0.5);
+ CCTK_REAL_VEC const kthird = ToReal(1.0/3.0);
+ CCTK_REAL_VEC const ktwothird = ToReal(2.0/3.0);
+ CCTK_REAL_VEC const kfourthird = ToReal(4.0/3.0);
+ CCTK_REAL_VEC const keightthird = ToReal(8.0/3.0);
+ CCTK_REAL_VEC const hdxi = kmul(ToReal(0.5), dxi);
+ CCTK_REAL_VEC const hdyi = kmul(ToReal(0.5), dyi);
+ CCTK_REAL_VEC const hdzi = kmul(ToReal(0.5), dzi);
/* Initialize predefined quantities */
- CCTK_REAL const p1o1024dx = 0.0009765625*INV(dx);
- CCTK_REAL const p1o1024dy = 0.0009765625*INV(dy);
- CCTK_REAL const p1o1024dz = 0.0009765625*INV(dz);
- CCTK_REAL const p1o1680dx = 0.000595238095238095238095238095238*INV(dx);
- CCTK_REAL const p1o1680dy = 0.000595238095238095238095238095238*INV(dy);
- CCTK_REAL const p1o1680dz = 0.000595238095238095238095238095238*INV(dz);
- CCTK_REAL const p1o5040dx2 = 0.000198412698412698412698412698413*INV(SQR(dx));
- CCTK_REAL const p1o5040dy2 = 0.000198412698412698412698412698413*INV(SQR(dy));
- CCTK_REAL const p1o5040dz2 = 0.000198412698412698412698412698413*INV(SQR(dz));
- CCTK_REAL const p1o560dx = 0.00178571428571428571428571428571*INV(dx);
- CCTK_REAL const p1o560dy = 0.00178571428571428571428571428571*INV(dy);
- CCTK_REAL const p1o560dz = 0.00178571428571428571428571428571*INV(dz);
- CCTK_REAL const p1o705600dxdy = 1.41723356009070294784580498866e-6*INV(dx)*INV(dy);
- CCTK_REAL const p1o705600dxdz = 1.41723356009070294784580498866e-6*INV(dx)*INV(dz);
- CCTK_REAL const p1o705600dydz = 1.41723356009070294784580498866e-6*INV(dy)*INV(dz);
- CCTK_REAL const p1o840dx = 0.00119047619047619047619047619048*INV(dx);
- CCTK_REAL const p1o840dy = 0.00119047619047619047619047619048*INV(dy);
- CCTK_REAL const p1o840dz = 0.00119047619047619047619047619048*INV(dz);
- CCTK_REAL const p1odx = INV(dx);
- CCTK_REAL const p1ody = INV(dy);
- CCTK_REAL const p1odz = INV(dz);
- CCTK_REAL const pm1o840dx = -0.00119047619047619047619047619048*INV(dx);
- CCTK_REAL const pm1o840dy = -0.00119047619047619047619047619048*INV(dy);
- CCTK_REAL const pm1o840dz = -0.00119047619047619047619047619048*INV(dz);
+ CCTK_REAL_VEC const p1o1024dx = kmul(INV(dx),ToReal(0.0009765625));
+ CCTK_REAL_VEC const p1o1024dy = kmul(INV(dy),ToReal(0.0009765625));
+ CCTK_REAL_VEC const p1o1024dz = kmul(INV(dz),ToReal(0.0009765625));
+ CCTK_REAL_VEC const p1o1680dx = kmul(INV(dx),ToReal(0.000595238095238095238095238095238));
+ CCTK_REAL_VEC const p1o1680dy = kmul(INV(dy),ToReal(0.000595238095238095238095238095238));
+ CCTK_REAL_VEC const p1o1680dz = kmul(INV(dz),ToReal(0.000595238095238095238095238095238));
+ CCTK_REAL_VEC const p1o5040dx2 = kmul(INV(SQR(dx)),ToReal(0.000198412698412698412698412698413));
+ CCTK_REAL_VEC const p1o5040dy2 = kmul(INV(SQR(dy)),ToReal(0.000198412698412698412698412698413));
+ CCTK_REAL_VEC const p1o5040dz2 = kmul(INV(SQR(dz)),ToReal(0.000198412698412698412698412698413));
+ CCTK_REAL_VEC const p1o560dx = kmul(INV(dx),ToReal(0.00178571428571428571428571428571));
+ CCTK_REAL_VEC const p1o560dy = kmul(INV(dy),ToReal(0.00178571428571428571428571428571));
+ CCTK_REAL_VEC const p1o560dz = kmul(INV(dz),ToReal(0.00178571428571428571428571428571));
+ CCTK_REAL_VEC const p1o705600dxdy = kmul(INV(dx),kmul(INV(dy),ToReal(1.41723356009070294784580498866e-6)));
+ CCTK_REAL_VEC const p1o705600dxdz = kmul(INV(dx),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6)));
+ CCTK_REAL_VEC const p1o705600dydz = kmul(INV(dy),kmul(INV(dz),ToReal(1.41723356009070294784580498866e-6)));
+ CCTK_REAL_VEC const p1o840dx = kmul(INV(dx),ToReal(0.00119047619047619047619047619048));
+ CCTK_REAL_VEC const p1o840dy = kmul(INV(dy),ToReal(0.00119047619047619047619047619048));
+ CCTK_REAL_VEC const p1o840dz = kmul(INV(dz),ToReal(0.00119047619047619047619047619048));
+ CCTK_REAL_VEC const p1odx = INV(dx);
+ CCTK_REAL_VEC const p1ody = INV(dy);
+ CCTK_REAL_VEC const p1odz = INV(dz);
/* Loop over the grid points */
#pragma omp parallel
- LC_LOOP3 (ML_BSSN_O8_InitGamma,
+ LC_LOOP3VEC (ML_BSSN_O8_InitGamma,
i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],
- cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])
+ cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],
+ CCTK_REAL_VEC_SIZE)
{
ptrdiff_t const index = di*i + dj*j + dk*k;
@@ -111,18 +110,52 @@ static void ML_BSSN_O8_InitGamma_Body(cGH const * restrict const cctkGH, int con
/* Precompute derivatives */
/* Calculate temporaries and grid functions */
- CCTK_REAL Xt1L = 0;
+ CCTK_REAL_VEC Xt1L = ToReal(0);
- CCTK_REAL Xt2L = 0;
+ CCTK_REAL_VEC Xt2L = ToReal(0);
- CCTK_REAL Xt3L = 0;
+ CCTK_REAL_VEC Xt3L = ToReal(0);
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 2 && CCTK_BUILTIN_EXPECT(i < lc_imin && i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count_lo = lc_imin-i;
+ ptrdiff_t const elt_count_hi = lc_imax-i;
+ vec_store_nta_partial_mid(Xt1[index],Xt1L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt2[index],Xt2L,elt_count_lo,elt_count_hi);
+ vec_store_nta_partial_mid(Xt3[index],Xt3L,elt_count_lo,elt_count_hi);
+ break;
+ }
+
+ /* If necessary, store only partial vectors after the first iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i < lc_imin, 0))
+ {
+ ptrdiff_t const elt_count = lc_imin-i;
+ vec_store_nta_partial_hi(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_hi(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_hi(Xt3[index],Xt3L,elt_count);
+ continue;
+ }
+
+ /* If necessary, store only partial vectors after the last iteration */
+
+ if (CCTK_REAL_VEC_SIZE > 1 && CCTK_BUILTIN_EXPECT(i+CCTK_REAL_VEC_SIZE > lc_imax, 0))
+ {
+ ptrdiff_t const elt_count = lc_imax-i;
+ vec_store_nta_partial_lo(Xt1[index],Xt1L,elt_count);
+ vec_store_nta_partial_lo(Xt2[index],Xt2L,elt_count);
+ vec_store_nta_partial_lo(Xt3[index],Xt3L,elt_count);
+ break;
+ }
/* Copy local copies back to grid functions */
- Xt1[index] = Xt1L;
- Xt2[index] = Xt2L;
- Xt3[index] = Xt3L;
+ vec_store_nta(Xt1[index],Xt1L);
+ vec_store_nta(Xt2[index],Xt2L);
+ vec_store_nta(Xt3[index],Xt3L);
}
- LC_ENDLOOP3 (ML_BSSN_O8_InitGamma);
+ LC_ENDLOOP3VEC (ML_BSSN_O8_InitGamma);
}
extern "C" void ML_BSSN_O8_InitGamma(CCTK_ARGUMENTS)