From 688e7f30c2e3bf33c96d30277863713dd9865f9e Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Sat, 2 Oct 2010 02:08:16 +0200 Subject: IOASCII.m: Add a small Mathematica package for plotting 1D unigrid data for use in the tutorial --- Auxiliary/Cactus/IOASCII.m | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 Auxiliary/Cactus/IOASCII.m (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/IOASCII.m b/Auxiliary/Cactus/IOASCII.m new file mode 100644 index 0000000..85de605 --- /dev/null +++ b/Auxiliary/Cactus/IOASCII.m @@ -0,0 +1,22 @@ + +BeginPackage["IOASCII`"]; + +ReadIOASCII::usage = "ReadIOASCII[file] reads the IOASCII file and parses it into list format"; +MGraph::usage = "MGraph[file] plots the IOASCII file"; + +Begin["`Private`"]; + +ReadIOASCII[file_] := + Module[{data1, data2, data3}, + data1 = Import[file, "Table"]; + data2 = Select[SplitBy[data1, Length[#] == 0 &], #[[1]] != {} &]; + data3 = Map[{First[#][[3]], Drop[#, 1]} &, data2]]; + +MGraph[file_] := + Module[{data = ReadIOASCII[file]}, + Manipulate[ + ListLinePlot[data[[it, 2]], PlotLabel -> data[[it, 1]]], {{it,1,"it"}, 1, Length[data], 1}]]; + +End[]; + +EndPackage[]; -- cgit v1.2.3 From b851fa7a46d47fd8d66a74dedd1308eef03be902 Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Mon, 4 Oct 2010 14:42:51 +0200 Subject: IOASCII.m: Make functions flexible so they can accept either data or filename --- Auxiliary/Cactus/IOASCII.m | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/IOASCII.m b/Auxiliary/Cactus/IOASCII.m index 85de605..118db8d 100644 --- a/Auxiliary/Cactus/IOASCII.m +++ b/Auxiliary/Cactus/IOASCII.m @@ -12,8 +12,12 @@ ReadIOASCII[file_] := data2 = Select[SplitBy[data1, Length[#] == 0 &], #[[1]] != {} &]; data3 = Map[{First[#][[3]], Drop[#, 1]} &, data2]]; -MGraph[file_] := +MGraph[file_String] := Module[{data = ReadIOASCII[file]}, + MGraph[data]]; + +MGraph[data_List] := + Module[{}, Manipulate[ ListLinePlot[data[[it, 2]], PlotLabel -> data[[it, 1]]], {{it,1,"it"}, 1, Length[data], 1}]]; -- cgit v1.2.3 From 96aabdc245e13ecf9fe116cb7e0323adf06cf7c0 Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Thu, 28 Oct 2010 14:19:09 +0200 Subject: Omit error when cannot find current map in GenericFD_GetBoundaryWidth --- Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c index 4e1e774..ba83b89 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c @@ -69,8 +69,12 @@ int GenericFD_GetBoundaryWidth(cGH const * restrict const cctkGH) if (CCTK_IsFunctionAliased ("MultiPatch_GetBoundarySpecification")) { int const map = MultiPatch_GetMap (cctkGH); + /* This doesn't make sense in level mode */ if (map < 0) - CCTK_WARN(0, "Could not determine current map"); + { +// CCTK_WARN(1, "Could not determine current map"); + return 0; + } ierr = MultiPatch_GetBoundarySpecification (map, 6, nboundaryzones, is_internal, is_staggered, shiftout); if (ierr != 0) -- cgit v1.2.3 From c9431558d69ee1b8769c918044a45bdd50520b46 Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Tue, 23 Nov 2010 00:18:24 +0100 Subject: Add support for vectorisation of generated code. Vectorisation needs to be explicitly enabled by setting -DKRANC_VECTORS at build time. --- .../KrancNumericalTools/GenericFD/interface.ccl | 1 + .../KrancNumericalTools/GenericFD/src/GenericFD.h | 48 ++--- .../KrancNumericalTools/GenericFD/src/Vectors.hh | 236 +++++++++++++++++++++ Tools/CodeGen/CalculationFunction.m | 8 +- Tools/CodeGen/CodeGen.m | 34 +-- Tools/CodeGen/Differencing.m | 8 +- Tools/CodeGen/KrancThorn.m | 2 +- Tools/CodeGen/Thorn.m | 2 +- 8 files changed, 282 insertions(+), 57 deletions(-) create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/interface.ccl b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/interface.ccl index ccc3785..efbdaa6 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/interface.ccl +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/interface.ccl @@ -7,6 +7,7 @@ implements: GenericFD INCLUDE HEADER: GenericFD.h in GenericFD.h +INCLUDE HEADER: Vectors.hh in Vectors.hh INCLUDE HEADER: sbp_calc_coeffs.h in sbp_calc_coeffs.h USES INCLUDE: Boundary.h diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h index 041347d..568c70f 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h @@ -27,7 +27,11 @@ along with Kranc; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - + +#ifdef __cplusplus +extern "C" { +#endif + #ifndef NOPRECOMPUTE #define PRECOMPUTE #endif @@ -677,6 +681,14 @@ int GenericFD_GetBoundaryWidth(cGH const * restrict const cctkGH); /* int GenericFD_BoundaryWidthTable(cGH const * restrict const cctkGH); */ +#ifdef __cplusplus +// Define the restrict qualifier +# ifdef CCTK_CXX_RESTRICT +# undef restrict +# define restrict CCTK_CXX_RESTRICT +# endif +#endif + void GenericFD_GetBoundaryInfo(cGH const * restrict cctkGH, int const * restrict cctk_lsh, int const * restrict cctk_lssh, @@ -792,38 +804,8 @@ void GenericFD_LoopOverInterior(cGH const * restrict cctkGH, Kranc_Calculation c -/* Vectorisation of memory accesses */ - -#include -#include - -#if defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8) - -#include - -/* A vector type corresponding to CCTK_REAL */ -typedef __m128d CCTK_REAL_VEC; - -/* Really only SSE is required, but there doesn't seem to be a - preprocessing flag to check for this */ -#elif defined(__SSE2__) && defined(CCTK_REAL_PRECISION_4) - -#include - -/* A vector type corresponding to CCTK_REAL */ -typedef __m128 CCTK_REAL_VEC; - -#else - -/* There is no vector type corresponding to CCTK_REAL */ -typedef CCTK_REAL CCTK_REAL_VEC; - +#ifdef __cplusplus +} /* extern "C" */ #endif -/* The number of vector elements in a CCTK_REAL_VEC */ -static -size_t const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - #endif diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh new file mode 100644 index 0000000..3fb77e1 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh @@ -0,0 +1,236 @@ +#ifndef VECTORS_HH +#define VECTORS_HH + + + +// Vectorisation + +#include +#include +#include + + + +// I: i,j: integer +// R: a,b: real +// V: x,y: vector (of real) +// P: p,q: pointer (i.e. const reference) to something +// L: l,m: L-value (i.e. non-const reference) to something + +#define DEFINE_FUNCTION_PR_V(name,expr) \ +inline \ +CCTK_REAL_VEC name (CCTK_REAL const& p) \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_PRV(name,expr) \ +inline \ +void name (CCTK_REAL& p, CCTK_REAL_VEC const& x) \ +{ \ + expr; \ +} + +#define DEFINE_FUNCTION_PVR(name,expr) \ +inline \ +void name (CCTK_REAL_VEC& p, CCTK_REAL const& a) \ +{ \ + expr; \ +} + +#define DEFINE_FUNCTION_V_V(name,expr) \ +inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const& x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return CCTK_REAL_VEC(expr); \ +} + +#define DEFINE_FUNCTION_V_R(name,expr) \ +inline \ +CCTK_REAL name (CCTK_REAL_VEC const& x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_R_V(name,expr) \ +inline \ +CCTK_REAL_VEC name (CCTK_REAL const& a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VV_V(name,expr) \ +inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL_VEC const& y) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VR_V(name,expr) \ +inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL const& a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RV_V(name,expr) \ +inline \ +CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL_VEC const& x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RR_V(name,expr) \ +inline \ +CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL const& b) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + + + +// Intel, double +#if defined(KRANC_VECTORS) && defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8) + +#include + +// Vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + typedef double S; + typedef __m128d V; + static int const n = sizeof(V)/sizeof(S); + V v; + + // Set a vector from scalars + CCTK_REAL_VEC(S const& a, S const& b): v(_mm_set_pd(a,b)) { }; + + // Get a scalar from the vector + S elt0() const { return _mm_cvtsd_f64(v); /* this is a no-op */ } + S elt1() const { return _mm_cvtsd_f64(_mm_shuffle_pd(v,v,_MM_SHUFFLE2(1,1))); } + + // Set a vector from a scalar, replicating the scalar + CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { }; + + // Convert from and to the underlying vector type + CCTK_REAL_VEC(V const& v_): v(v_) { }; + operator V const() const { return v; } + + CCTK_REAL_VEC() { }; + + // Copy constructor + CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { }; +}; + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)); +DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)); + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) +DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) + +// Double-argument operators, both vectors +DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,x+CCTK_REAL_VEC(a)) +DEFINE_FUNCTION_VR_V(operator-,x-CCTK_REAL_VEC(a)) +DEFINE_FUNCTION_VR_V(operator*,x*CCTK_REAL_VEC(a)) +DEFINE_FUNCTION_VR_V(operator/,x/CCTK_REAL_VEC(a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a)+x) +DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a)-x) +DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a)*x) +DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a)/x) + +// Single-argument operators +DEFINE_FUNCTION_V_V(operator+,x) +DEFINE_FUNCTION_V_V(operator-,0.0-x) + +// Cheap functions +static union { + unsigned long long const bits[2]; + CCTK_REAL_VEC::V v; +} const fabs_mask = + { { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; +DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,fabs_mask.v)) +DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) +DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) +DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.elt0()),exp(x.elt1()))) +DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.elt0()),log(x.elt1()))) +DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.elt0(),a),pow(x.elt1(),a))) + +// Un-implemented functions +DEFINE_FUNCTION_V_R(signbit,0) + + + +#if 0 +// Intel, float +#elif defined(KRANC_VECTORS) && defined(__SSE__) && defined(CCTK_REAL_PRECISION_4) + +#include + +// A vector type corresponding to CCTK_REAL +typedef __m128 CCTK_REAL_VEC; +#endif + + + +// Fallback: no vectorisation +#else + +// There is no vector type corresponding to CCTK_REAL +typedef CCTK_REAL CCTK_REAL_VEC; + + + +DEFINE_FUNCTION_PR_V(vec_load,p) +DEFINE_FUNCTION_PR_V(vec_loadu,p) + +DEFINE_FUNCTION_PRV(vec_store,p=x) +DEFINE_FUNCTION_PRV(vec_store_nta,p=x) + +DEFINE_FUNCTION_V_R(signbit,x<0) + + + +#endif + + + +#undef DEFINE_FUNCTION_PR_V +#undef DEFINE_FUNCTION_PRV +#undef DEFINE_FUNCTION_V_V +#undef DEFINE_FUNCTION_R_V +#undef DEFINE_FUNCTION_VV_V +#undef DEFINE_FUNCTION_VR_V +#undef DEFINE_FUNCTION_RV_V +#undef DEFINE_FUNCTION_RR_V + + + +// Number of vector elements in a CCTK_REAL_VEC +static +size_t const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +#endif // #ifndef VECTORS_HH diff --git a/Tools/CodeGen/CalculationFunction.m b/Tools/CodeGen/CalculationFunction.m index d73adb6..3d86503 100644 --- a/Tools/CodeGen/CalculationFunction.m +++ b/Tools/CodeGen/CalculationFunction.m @@ -229,7 +229,7 @@ simpCollect[collectList_, eqrhs_, localvar_, debug_] := assignVariableFromExpression[dest_, expr_, declare_] := Module[{tSym, type, cleanExpr, code}, tSym = Unique[]; - type = If[StringMatchQ[ToString[dest], "dir*"], "int", "CCTK_REAL"]; + type = If[StringMatchQ[ToString[dest], "dir*"], "int", "CCTK_REAL_VEC"]; cleanExpr = ReplacePowers[expr] /. Kranc`t -> tSym; If[SOURCELANGUAGE == "C", @@ -400,7 +400,7 @@ CreateCalculationFunction[calc_, debug_, useCSE_, opts:OptionsPattern[]] := "Calculation is:", cleancalc]]; { - DefineFunction[bodyFunctionName, "void", "cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[]", + DefineFunction[bodyFunctionName, "static void", "cGH const * restrict const cctkGH, int const dir, int const face, CCTK_REAL const normal[3], CCTK_REAL const tangentA[3], CCTK_REAL const tangentB[3], int const min[3], int const max[3], int const n_subblock_gfs, CCTK_REAL * restrict const subblock_gfs[]", { "DECLARE_CCTK_ARGUMENTS;\n", "DECLARE_CCTK_PARAMETERS;\n\n", @@ -541,7 +541,7 @@ equationLoop[eqs_, cleancalc_, gfs_, shorts_, incs_, groups_, pddefs_, CommentedBlock["Assign local copies of grid functions", Map[DeclareMaybeAssignVariableInLoop[ - "CCTK_REAL", localName[#], GridName[#], + "CCTK_REAL_VEC", localName[#], GridName[#], StringMatchQ[ToString[GridName[#]], "eT" ~~ _ ~~ _ ~~ "[" ~~ __ ~~ "]"], "*stress_energy_state"] &, gfsInRHS]], @@ -559,7 +559,7 @@ equationLoop[eqs_, cleancalc_, gfs_, shorts_, incs_, groups_, pddefs_, ""], CommentedBlock["Copy local copies back to grid functions", - Map[AssignVariableInLoop[GridName[#], localName[#]] &, + Map[StoreVariableInLoop[GridName[#], localName[#]] &, gfsInLHS]], If[debugInLoop, Map[InfoVariable[GridName[#]] &, gfsInLHS], ""]}, opts]]; diff --git a/Tools/CodeGen/CodeGen.m b/Tools/CodeGen/CodeGen.m index 7cae5fb..de8dbd2 100644 --- a/Tools/CodeGen/CodeGen.m +++ b/Tools/CodeGen/CodeGen.m @@ -60,14 +60,16 @@ DeclareAssignVariable::usage = "DeclareAssignVariable[type_, dest_, src_] return "that declares and sets a constant variable of given name and type."; AssignVariableInLoop::usage = "AssignVariableInLoop[dest_, src_] returns a block of code " <> "that assigns 'src' to 'dest'."; +StoreVariableInLoop::usage = "StoreVariableInLoop[dest_, src_] returns a block of code " <> + "that assigns 'src' to 'dest'."; DeclareAssignVariableInLoop::usage = "DeclareAssignVariableInLoop[type_, dest_, src_] returns a block of code " <> "that assigns 'src' to 'dest'."; MaybeAssignVariableInLoop::usage = "MaybeAssignVariableInLoop[dest_, src_, cond_] returns a block of code " <> "that assigns 'src' to 'dest'."; DeclareMaybeAssignVariableInLoop::usage = "DeclareMaybeAssignVariableInLoop[type_, dest_, src_, cond_] returns a block of code " <> "that assigns 'src' to 'dest'."; -DeclareVariablesInLoopVectorised::usage = ""; -AssignVariablesInLoopVectorised::usage = ""; +UNUSEDDeclareVariablesInLoopVectorised::usage = ""; +UNUSEDAssignVariablesInLoopVectorised::usage = ""; TestForNaN::usage = "TestForNaN[expr_] returns a block of code " <> "that tests 'expr' for nan."; CommentedBlock::usage = "CommentedBlock[comment, block] returns a block consisting " <> @@ -129,11 +131,11 @@ Quote::usage = "Quote[x] returns x surrounded by quotes"; Begin["`Private`"]; SOURCELANGUAGE = "C"; -SOURCESUFFIX = ".c"; +SOURCESUFFIX = ".cc"; setSourceSuffix[lang_] := If[ (lang == "C"), - SOURCESUFFIX = ".c"; + SOURCESUFFIX = ".cc"; , SOURCESUFFIX = ".F90"; ]; @@ -146,7 +148,7 @@ If[ (lang == "C" || lang == "Fortran"), InfoMessage[Terse, "User set source language to " <> lang], SOURCELANGUAGE = "C"; - setSourceSuffix[".c"]; + setSourceSuffix[".cc"]; InfoMessage[Terse, "Setting Source Language to C"]; ]; @@ -280,21 +282,24 @@ AssignVariableInLoop[dest_, src_] := TestForNaN[dest]}; *) +StoreVariableInLoop[dest_, src_] := + {"vec_store_nta(", dest, ",", src, ")", EOL[]}; + DeclareAssignVariableInLoop[type_, dest_, src_] := - {type, " const ", dest, " = ", src, EOL[]}; + {type, " const ", dest, " = vec_load(", src, ")", EOL[]}; MaybeAssignVariableInLoop[dest_, src_, cond_] := If [cond, - {dest, " = useMatter ? ", src, " : 0.0", EOL[]}, - {dest, " = ", src, EOL[]}]; + {dest, " = useMatter ? vec_load(", src, ") : 0.0", EOL[]}, + {dest, " = vec_load(", src, ")", EOL[]}]; DeclareMaybeAssignVariableInLoop[type_, dest_, src_, mmaCond_, codeCond_] := If [mmaCond, - {type, " ", dest, " = (", codeCond, ") ? (", src, ") : 0.0", EOL[]}, - {type, " ", dest, " = ", src, EOL[]}]; + {type, " ", dest, " = (", codeCond, ") ? vec_load(", src, ") : 0.0", EOL[]}, + {type, " ", dest, " = vec_load(", src, ")", EOL[]}]; (* TODO: move these into OpenMP loop *) -DeclareVariablesInLoopVectorised[dests_, temps_, srcs_] := +UNUSEDDeclareVariablesInLoopVectorised[dests_, temps_, srcs_] := { {"#undef LC_PRELOOP_STATEMENTS", "\n"}, {"#define LC_PRELOOP_STATEMENTS", " \\\n"}, @@ -307,7 +312,7 @@ DeclareVariablesInLoopVectorised[dests_, temps_, srcs_] := {"\n"} }; -AssignVariablesInLoopVectorised[dests_, temps_, srcs_] := +UNUSEDAssignVariablesInLoopVectorised[dests_, temps_, srcs_] := { {"{\n"}, {" if (i < GFD_imin || i >= GFD_imax) {\n"}, @@ -334,7 +339,7 @@ AssignVariablesInLoopVectorised[dests_, temps_, srcs_] := {"}\n"} }; -AssignVariableInLoopsVectorised[dest_, temp_, src_] := +UNUSEDAssignVariableInLoopsVectorised[dest_, temp_, src_] := {"GFD_save_and_store(", dest, ",", "index", ",", "&", temp, ",", src, ")", EOL[]}; TestForNaN[expr_] := @@ -406,7 +411,7 @@ defineSubroutine[name_, args_, contents_] := defineSubroutineC[name_, args_, contents_] := SeparatedBlock[ - {"void ", name, "(", args, ")", "\n", + {"extern \"C\" void ", name, "(", args, ")", "\n", CBlock[contents]}]; defineSubroutineF[name_, args_, contents_] := @@ -663,6 +668,7 @@ GenericGridLoopUsingLoopControl[functionName_, block_] := block } ], + "i += CCTK_REAL_VEC_SIZE-1;\n", "}\n", "LC_ENDLOOP3 (", functionName, ");\n" } diff --git a/Tools/CodeGen/Differencing.m b/Tools/CodeGen/Differencing.m index d6b9d26..609aa9d 100644 --- a/Tools/CodeGen/Differencing.m +++ b/Tools/CodeGen/Differencing.m @@ -208,7 +208,7 @@ ReplaceDerivatives[derivOps_, expr_, precompute_] := PrecomputeDerivative[d:pd_[gf_, inds___]] := Module[{}, - DeclareAssignVariable["CCTK_REAL", GridFunctionDerivativeName[d], evaluateDerivative[d]]]; + DeclareAssignVariable["CCTK_REAL_VEC", GridFunctionDerivativeName[d], evaluateDerivative[d]]]; evaluateDerivative[d:pd_[gf_, inds___]] := Module[{macroname}, @@ -216,7 +216,7 @@ evaluateDerivative[d:pd_[gf_, inds___]] := Return[ToString[macroName] <> "(" <> ToString[gf] <> ", i, j, k)"]]; DeclareDerivative[d:pd_[gf_, inds___]] := - DeclareVariable[GridFunctionDerivativeName[d], "// CCTK_REAL"]; + DeclareVariable[GridFunctionDerivativeName[d], "// CCTK_REAL_VEC"]; (*************************************************************) @@ -404,10 +404,10 @@ DifferenceGFTerm[op_, i_, j_, k_] := "(int)(" <> ToString[CFormHideStrings[j+ny]] <> ")," <> "(int)(" <> ToString[CFormHideStrings[k+nz]] <> "))]", *) - remaining "(u)[index" <> + remaining "vec_loadu((u)[index" <> "+di*(" <> ToString[CFormHideStrings[nx]] <> ")" <> "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> - "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")]", + "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", (* remaining "(u)[CCTK_GFINDEX3D(cctkGH,floor((" <> ToString[CFormHideStrings[i+nx]] <> ")+0.5),floor((" <> diff --git a/Tools/CodeGen/KrancThorn.m b/Tools/CodeGen/KrancThorn.m index 7527f5e..0281b3d 100644 --- a/Tools/CodeGen/KrancThorn.m +++ b/Tools/CodeGen/KrancThorn.m @@ -127,7 +127,7 @@ CreateKrancThorn[groupsOrig_, parentDirectory_, thornName_, opts:OptionsPattern[ coordGroup = {"grid::coordinates", {Kranc`x,Kranc`y,Kranc`z,Kranc`r}}; groups = Join[groupsOrig, {coordGroup}]; - includeFiles = Join[includeFiles, {"GenericFD.h", "Symmetry.h", "sbp_calc_coeffs.h"}]; + includeFiles = Join[includeFiles, {"GenericFD.h", "Symmetry.h", "Vectors.hh", "sbp_calc_coeffs.h"}]; inheritedImplementations = Join[inheritedImplementations, {"Grid", "GenericFD"}, CactusBoundary`GetInheritedImplementations[]]; diff --git a/Tools/CodeGen/Thorn.m b/Tools/CodeGen/Thorn.m index 73f7e99..353be9c 100644 --- a/Tools/CodeGen/Thorn.m +++ b/Tools/CodeGen/Thorn.m @@ -508,7 +508,7 @@ CreateSetterSource[calcs_, debug_, useCSE_, include_, imp_, ], Map[IncludeFile, Join[{"cctk.h", "cctk_Arguments.h", "cctk_Parameters.h", - (*"precomputations.h",*) "GenericFD.h", "Differencing.h"}, include, + (*"precomputations.h",*) "GenericFD.h", "Differencing.h", "Vectors.hh"}, include, If[OptionValue[UseLoopControl], {"loopcontrol.h"}, {}]]], calculationMacros[], -- cgit v1.2.3 From d6c4d4c2131107ef3a4004692823e2041394acd6 Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Mon, 22 Nov 2010 11:20:48 +0100 Subject: Implement vectorisation This is Erik's vectorisation working tree from 13-Oct-2010 --- .../GenericFD/src/MathematicaCompat.h | 59 +- .../GenericFD/src/Vectors-SSE2-direct.hh | 135 +++++ .../GenericFD/src/Vectors-SSE2.hh | 194 +++++++ .../GenericFD/src/Vectors-VSX-direct.hh | 111 ++++ .../GenericFD/src/Vectors-VSX.hh | 212 ++++++++ .../GenericFD/src/Vectors-default.hh | 31 ++ .../GenericFD/src/Vectors-define.hh | 104 ++++ .../GenericFD/src/Vectors-outdated.hh | 591 +++++++++++++++++++++ .../GenericFD/src/Vectors-pseudo.hh | 72 +++ .../GenericFD/src/Vectors-undefine.hh | 14 + .../KrancNumericalTools/GenericFD/src/Vectors.hh | 238 +-------- Tools/CodeGen/CalculationFunction.m | 28 +- Tools/CodeGen/CodeGen.m | 229 ++++---- Tools/CodeGen/Differencing.m | 120 ++--- Tools/CodeGen/Kranc.m | 6 +- Tools/CodeGen/Thorn.m | 29 +- 16 files changed, 1767 insertions(+), 406 deletions(-) create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h index 29c89d1..ee6a3b7 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h @@ -1,17 +1,17 @@ -#define Power(x, y) (pow((x), (y))) +#define Power(x, y) (pow(x,y)) #define Sqrt(x) (sqrt(x)) #ifdef KRANC_C -#define Abs(x) (fabs(x)) -#define Min(x, y) (fmin((x), (y))) -#define Max(x, y) (fmax((x), (y))) -#define IfThen(x,y,z) ((x) ? (y) : (z)) +# define Abs(x) (fabs(x)) +# define Min(x, y) (fmin(x,y)) +# define Max(x, y) (fmax(x,y)) +# define IfThen(x,y,z) ((x) ? (y) : (z)) #else -#define Abs(x) (abs(x)) -#define Min(x, y) (min((x), (y))) -#define Max(x, y) (max((x), (y))) +# define Abs(x) (abs(x)) +# define Min(x, y) (min(x,y)) +# define Max(x, y) (max(x,y)) /* IfThen cannot be expressed in Fortran */ #endif @@ -31,17 +31,46 @@ #define Tanh(x) (tanh(x)) #ifdef KRANC_C -#define Sign(x) (signbit(x)?-1:+1) +# define Sign(x) ((x)<0?-1:+1) +# define ToReal(x) ((CCTK_REAL)(x)) #else -#define Sign(x) (sgn(x)) +# define Sign(x) (sgn(x)) +# define ToReal(x) (real((x),kind(khalf))) #endif +/* TODO: use fma(x,y,z) to implement fmadd and friends? Note that fma + may be unsupported, or may be slow. */ + +/* #define fmadd(x,y,z) ((x)*(y)+(z)) */ +/* #define fmsub(x,y,z) ((x)*(y)-(z)) */ +/* #define fnmadd(x,y,z) (-(z)-(x)*(y)) */ +/* #define fnmsub(x,y,z) (+(z)-(x)*(y)) */ + +#define fneg(x) (-(x)) +#define fmul(x,y) ((x)*(y)) +#define fdiv(x,y) ((x)/(y)) +#define fadd(x,y) ((x)+(y)) +#define fsub(x,y) ((x)-(y)) + +#define fmadd(x,y,z) (fadd(fmul(x,y),z)) +#define fmsub(x,y,z) (fsub(fmul(x,y),z)) +#define fnmadd(x,y,z) (fsub(fneg(z),fmul(x,y))) +#define fnmsub(x,y,z) (fsub(z,fmul(x,y))) + +#define kexp(x) (exp(x)) +#define kfabs(x) (fabs(x)) +#define kfmax(x,y) (fmax(x,y)) +#define kfmin(x,y) (fmin(x,y)) +#define klog(x) (log(x)) +#define kpow(x,y) (pow(x,y)) +#define ksqrt(x) (sqrt(x)) + #ifdef KRANC_C -#define E M_E -#define Pi M_PI +# define E M_E +# define Pi M_PI #else -#define E 2.71828182845904523536029d0 -#define Pi 3.14159265358979323846264d0 +# define E 2.71828182845904523536029d0 +# define Pi 3.14159265358979323846264d0 #endif -#define UnitStep(x) ( (x) > 0 ? 1 : 0 ) +#define UnitStep(x) ((x)>0) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh new file mode 100644 index 0000000..12cd6e8 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh @@ -0,0 +1,135 @@ +// Vectorise using Intel's or AMD's SSE2 + +// Use the type __m128d directly, without introducing a wrapper class +// Use macros instead of inline functions + + + +#include + +// Vector type corresponding to CCTK_REAL +typedef __m128d CCTK_REAL_VEC; + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +// Create vectors, extract vector elements + +#define vec_set1(a) (_mm_set1_pd(a)) +#define vec_set(a,b) (_mm_set_pd(b,a)) + +// Get a scalar from the vector +#if defined(__PGI) && defined (__amd64__) +// _mm_cvtsd_f64 does not exist on PGI compilers +// # define vec_elt0(x) (*(CCTK_REAL const*)&(x)) +# define vec_elt0(x) ({ CCTK_REAL a_elt0; asm ("" : "=x" (a_elt0) : "0" (x)); a_elt0; }) +#else +// this is a no-op +# define vec_elt0(x) (_mm_cvtsd_f64(x)) +#endif +#define vec_elt1(x_) ({ CCTK_REAL_VEC const x_elt1=(x_); vec_elt0(_mm_unpackhi_pd(x_elt1,x_elt1)); }) + + + +// Load and store vectors + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +#define vec_load(p) (_mm_load_pd(&(p))) +#define vec_loadu(p) (_mm_loadu_pd(&(p))) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: Always use unaligned load +#define vec_loadu_maybe(off,p) (vec_loadu(p)) +#define vec_loadu_maybe3(off1,off2,off3,p) (vec_loadu(p)) +#if 0 +#define vec_loadu_maybe(off,p) \ + (!((off)&(CCTK_REAL_VEC_SIZE-1)) ? \ + vec_load(p) : vec_loadu(p)) +#define vec_loadu_maybe3(off1,off2,off3,p) \ + (!((off1)&(CCTK_REAL_VEC_SIZE-1)) && \ + !((off2)&(CCTK_REAL_VEC_SIZE-1)) && \ + !((off3)&(CCTK_REAL_VEC_SIZE-1)) ? \ + vec_load(p) : vec_loadu(p)) +#endif + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +#define vec_store(p,x) (_mm_store_pd(&(p),x)) +#define vec_storeu(p,x) (_mm_storeu_pd(&(p),x)) +#if defined(KRANC_CACHE) +# define vec_store_nta(p,x) (_mm_stream_pd(&(p),x)) +#else +# define vec_store_nta(p,x) (_mm_store_pd(&(p),x)) +#endif + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +#define vec_store_nta_partial_lo(p,x,n) (_mm_storel_pd(&(p),x)) +#define vec_store_nta_partial_hi(p,x,n) (_mm_storeh_pd((&(p))+1,x)) + + + +// Functions and operators + +// Operators +#undef fneg +#undef fmul +#undef fdiv +#undef fadd +#undef fsub +#if defined(__PGI) && defined (__amd64__) +// The PGI compiler does not understand __m128d literals +static union { + unsigned long long s[CCTK_REAL_VEC_SIZE]; + CCTK_REAL_VEC v; +} vec_neg_mask_impl = {0x8000000000000000ULL, 0x8000000000000000ULL}; +# define vec_neg_mask (vec_neg_mask_impl.v) +#else +# define vec_neg_mask ((CCTK_REAL_VEC)(__m128i){0x8000000000000000ULL, 0x8000000000000000ULL}) +#endif +#define fneg(x) (_mm_xor_pd(x,vec_neg_mask)) +#define fmul(x,y) (_mm_mul_pd(x,y)) +#define fdiv(x,y) (_mm_div_pd(x,y)) +#define fadd(x,y) (_mm_add_pd(x,y)) +#define fsub(x,y) (_mm_sub_pd(x,y)) + +// Cheap functions +#undef kfabs +#undef kfmax +#undef kfmin +#undef ksqrt +#if defined(__PGI) && defined (__amd64__) +// The PGI compiler does not understand __m128d literals +static union { + unsigned long long s[CCTK_REAL_VEC_SIZE]; + CCTK_REAL_VEC v; +} vec_fabs_mask_impl = {0x7fffffffffffffffULL, 0x7fffffffffffffffULL}; +# define vec_fabs_mask (vec_fabs_mask_impl.v) +#else +# define vec_fabs_mask ((CCTK_REAL_VEC)(__m128i){0x7fffffffffffffffULL, 0x7fffffffffffffffULL}) +#endif +#define kfabs(x) (_mm_and_pd(x,vec_fabs_mask)) +#define kfmax(x,y) (_mm_max_pd(x,y)) +#define kfmin(x,y) (_mm_min_pd(x,y)) +#define ksqrt(x) (_mm_sqrt_pd(x)) + +// Expensive functions +#undef kexp +#undef klog +#undef kpow +#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) +#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) +#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) + + + +#undef Sign +#define Sign(x) (42) + +#undef ToReal +#define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh new file mode 100644 index 0000000..4a4eea6 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh @@ -0,0 +1,194 @@ +// Vectorise using Intel's or AMD's SSE2 + + + +#include + +// Vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + typedef double S; + typedef __m128d V; + V v; + + // Convert from and to the underlying vector type + inline CCTK_REAL_VEC(V const v_): v(v_) { } + inline operator V const() const { return v; } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } +}; + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +// Create vectors, extract vector elements + +DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) +DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) + +// Get a scalar from the vector +#if defined(__PGI) && defined (__amd64__) +// _mm_cvtsd_f64 does not exist on PGI compilers +// DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; _mm_store_sd(&a,x); a; })) +// DEFINE_FUNCTION_V_R(vec_elt0,(*(CCTK_REAL const*)&x)) +// This generates the fastest code with PGI compilers +DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; asm ("" : "=x" (a) : "0" (x)); a; })) +#else +DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) // this is a no-op +#endif +DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_unpackhi_pd(x,x))) + + + +// Load and store vectors + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) +DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) +// Implementation: load aligned if the modulus is zero +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +// Call the implementation with the modulus +#define vec_loadu_maybe(off,p) \ + (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) +#define vec_loadu_maybe3(off1,off2,off3,p) \ + (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ + (off2)&(CCTK_REAL_VEC_SIZE-1), \ + (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) +DEFINE_FUNCTION_PRV(vec_storeu,_mm_storeu_pd(&p,x)) +#if defined(KRANC_CACHE) +DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) +#else +DEFINE_FUNCTION_PRV(vec_store_nta,_mm_store_pd(&p,x)) +#endif + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) +{ + switch (n) { + case 1: _mm_storel_pd(&p,x); break; + default: assert(0); + } +} +static inline +void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) +{ + switch (n) { + case 1: _mm_storeh_pd((&p)+1,x); break; + default: assert(0); + } +} + + + +// Functions and operators + +// Single-argument operators +#if 0 +DEFINE_FUNCTION_V_V(operator+,x) +static CCTK_REAL_VEC const vec_neg_mask = + (CCTK_REAL_VEC::V)(__m128i) { 0x8000000000000000ULL, 0x8000000000000000ULL }; +DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask)) +#endif +DEFINE_FUNCTION_V_V(operator+,+x.v) +DEFINE_FUNCTION_V_V(operator-,-x.v) + +// Double-argument operators, both vectors +#if 0 +DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) +#endif +DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) +DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) +DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) +DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) +DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) +DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) +DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) + +// Cheap functions +#if defined(__PGI) && defined (__amd64__) +// The PGI compiler does not understand __m128d literals +static union { + CCTK_REAL_VEC::S s[CCTK_REAL_VEC_SIZE]; + CCTK_REAL_VEC::V v; +} vec_fabs_mask_impl = { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; +# define vec_fabs_mask (vec_fabs_mask_impl.v) +#else +static CCTK_REAL_VEC const vec_fabs_mask = + (CCTK_REAL_VEC::V)(__m128i) { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; +#endif +DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask)) +DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) +DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) +DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) + + + +#undef Sign +#define Sign(x) (42) + +#undef ToReal +#define ToReal(x) vec_set1(x) + +#if defined(__PGI) && defined (__amd64__) +// Special case for PGI 9.0.4 to avoid an internal compiler error +#undef IfThen +static inline +CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) +{ + union { + __m128i vi; + CCTK_REAL_VEC::V v; + } mask; + mask.vi = _mm_set1_epi64x(-(long long)cond); + return _mm_or_pd(_mm_and_pd(x.v, mask.v), _mm_andnot_pd(mask.v, y.v)); +} +#endif diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh new file mode 100644 index 0000000..7e06017 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh @@ -0,0 +1,111 @@ +// Vectorise using IBM's Altivec + +// Use the type vector double directly, without introducing a wrapper class +// Use macros instead of inline functions + + + +#include + +// Vector type corresponding to CCTK_REAL +typedef vector double CCTK_REAL_VEC; + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +// Create vectors, extract vector elements + +#define vec_set1(a) (vec_splats(a)) +#if defined(__GNUC__) +// GNU doesn't support array indices on vectors +union vec_mask { + double elts[2]; + vector double v; +}; +# define vec_set(a,b) ({ vec_mask x_set; x_set.elts[0]=(a); x_set.elts[1]=(b); x_set.v; }) +#else +# define vec_set(a,b) ({ CCTK_REAL_VEC x_set; x_set[0]=(a); x_set[1]=(b); x_set; }) +#endif + +// Get a scalar from the vector +#if defined(__GNUC__) +// GNU doesn't support array indices on vectors +# define vec_elt0(x) ({ vec_mask x_elt0; x_elt0.v=(x); x_elt0.elts[0]; }) +# define vec_elt1(x) ({ vec_mask x_elt1; x_elt1.v=(x); x_elt1.elts[1]; }) +#else +# define vec_elt0(x) ((x)[0]) +# define vec_elt1(x) ((x)[1]) +#endif + + + +// Load and store vectors + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +#define vec_load(p) (*(CCTK_REAL_VEC const*)&(p)) +#define vec_loadu(p) (vec_load(p)) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +#define vec_loadu_maybe(off,p) (vec_load(p)) +#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +#define vec_store(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) +#define vec_storeu(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) +// TODO: Use stvxl instruction? +#define vec_store_nta(p,x) vec_store(p,x) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +#define vec_store_nta_partial_lo(p,x,n) ((p)=vec_elt0(x)) +#define vec_store_nta_partial_hi(p,x,n) ((&(p))[1]=vec_elt1(x)) + + + +// Functions and operators + +// Other Altivec functions are: +// nabs: -abs a +// madd msub nmadd nmsub: [+-]a*b[+-]c + +// Triple-argument operators, all vectors +#undef fmadd +#undef fmsub +#undef fnmadd +#undef fnmsub +#define fmadd(x,y,z) (vec_madd(x,y,z)) +#define fmsub(x,y,z) (vec_msub(x,y,z)) +#define fnmadd(x,y,z) (vec_nmadd(x,y,z)) +#define fnmsub(x,y,z) (vec_nmsub(x,y,z)) + +// Cheap functions +#undef kfabs +#undef kfmax +#undef kfmin +#define kfabs(x) (vec_abs(x)) +#define kfmax(x,y) (vec_max(x,y)) +#define kfmin(x,y) (vec_min(x,y)) + +// Expensive functions +#undef kexp +#undef klog +#undef kpow +#undef ksqrt +#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) +#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) +#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) +#define ksqrt(x_) ({ CCTK_REAL_VEC const x_sqrt=(x_); vec_set(sqrt(vec_elt0(x_sqrt)),sqrt(vec_elt1(x_sqrt))); }) + + + +#undef Sign +#define Sign(x) (42) + +#undef ToReal +#define ToReal(x) (vec_set1((CCTK_REAL)(x))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh new file mode 100644 index 0000000..f591647 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh @@ -0,0 +1,212 @@ +// Vectorise using IBM's Altivec + + + +#include + +// Vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + typedef double S; + typedef vector double V; + V v; + union vec_mask { + S elts[2]; + V v; + }; + + // Set a vector from scalars +#if 0 + // IBM + inline CCTK_REAL_VEC(S const a, S const b) { v[0]=a; v[1]=b; } +#endif +#if 0 + inline CCTK_REAL_VEC(S const a, S const b): + v(vec_mergel(vec_splats(a), vec_splats(b))) { } +#endif + inline CCTK_REAL_VEC(S const a, S const b) + { + vec_mask x; + x.elts[0] = a; + x.elts[1] = b; + v = x.v; + } + + // Set a vector from a scalar, replicating the scalar + // Note: Could also use vec_xlds instead + inline CCTK_REAL_VEC(S const a): v(vec_splats(a)) { } + + // Convert from and to the underlying vector type + inline CCTK_REAL_VEC(V const v_): v(v_) { } + inline operator V const() const { return v; } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } +}; + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +// Create vectors, extract vector elements +DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) +DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) + +// Get a scalar from the vector +#if 0 +// IBM +DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) +DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) +#endif +static inline CCTK_REAL vec_elt0(CCTK_REAL_VEC const x) +{ + CCTK_REAL_VEC::vec_mask x1; + x1.v = x; + return x1.elts[0]; +} +static inline CCTK_REAL vec_elt1(CCTK_REAL_VEC const x) +{ + CCTK_REAL_VEC::vec_mask x1; + x1.v = x; + return x1.elts[1]; +} + + + +// Load and store vectors + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,p) +#if 0 +// IBM +DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast(&p))) +#endif +DEFINE_FUNCTION_PR_V(vec_loadu,p) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) +// Implementation: load aligned if the modulus is zero +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +// Call the implementation with the modulus +#define vec_loadu_maybe(off,p) \ + (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) +#define vec_loadu_maybe3(off1,off2,off3,p) \ + (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ + (off2)&(CCTK_REAL_VEC_SIZE-1), \ + (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) +DEFINE_FUNCTION_PRV(vec_storeu,*(CCTK_REAL_VEC::V*)&p=x) +// TODO: Use stvxl instruction? +DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) +{ + switch (n) { + case 1: p=vec_elt0(x); break; + default: assert(0); + } +} +static inline +void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) +{ + switch (n) { + case 1: (&p)[1]=vec_elt1(x); break; + default: assert(0); + } +} + + + +// Functions and operators + +// Other Altivec functions are: +// nabs: -abs a +// madd msub nmadd nmsub: [+-]a*b[+-]c + +// Single-argument operators +#if 0 +DEFINE_FUNCTION_V_V(operator+,x) +DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) +#endif +DEFINE_FUNCTION_V_V(operator+,+x.v) +DEFINE_FUNCTION_V_V(operator-,-x.v) + +// Double-argument operators, both vectors +#if 0 +DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) +DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) +DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) +DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) +#endif +DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) +DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) +DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) +DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) +DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) +DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) +DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) + +// Triple-argument operators, all vectors +#undef fmadd +#undef fmsub +#undef fnmadd +#undef fnmsub +DEFINE_FUNCTION_VVV_V(fmadd,vec_madd(x.v,y.v,z.v)) +DEFINE_FUNCTION_VVV_V(fmsub,vec_msub(x.v,y.v,z.v)) +DEFINE_FUNCTION_VVV_V(fnmadd,vec_nmadd(x.v,y.v,z.v)) +DEFINE_FUNCTION_VVV_V(fnmsub,vec_nmsub(x.v,y.v,z.v)) + +// Cheap functions +DEFINE_FUNCTION_V_V(fabs,vec_abs(x.v)) +DEFINE_FUNCTION_VV_V(fmax,vec_max(x.v,y.v)) +DEFINE_FUNCTION_VV_V(fmin,vec_min(x.v,y.v)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) +DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) + + + +#undef Sign +#define Sign(x) (42) + +#undef ToReal +#define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh new file mode 100644 index 0000000..f928ed8 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh @@ -0,0 +1,31 @@ +// Fallback vectorisation implementation: Do not vectorise + + + +// Use CCTK_REAL +typedef CCTK_REAL CCTK_REAL_VEC; + +// Number of vector elements in a CCTK_REAL_VEC +static int const CCTK_REAL_VEC_SIZE = 1; + + + +// We use macros here, so that we are not surprised by compilers which +// don't like to inline functions (e.g. PGI). This should also make +// debug builds (which may not inline) more efficient. + +#define vec_load(p) (p) +#define vec_loadu(p) (p) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +#define vec_loadu_maybe(off,p) (p) +#define vec_loadu_maybe3(off1,off2,off3,p) (p) + +#define vec_store(p,x) ((p)=(x)) +#define vec_store_nta(p,x) ((p)=(x)) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +#define vec_store_nta_partial_lo(p,x,n) (assert(0)) +#define vec_store_nta_partial_hi(p,x,n) (assert(0)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh new file mode 100644 index 0000000..f5c0b22 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh @@ -0,0 +1,104 @@ +// Define some macros that simplify defining short function that are +// supposed to be inlined + + + +// Letters defining the prototype (argument and return value types): +// I: i,j: integer +// R: a,b: real +// V: x,y: vector (of real) +// P: p,q: pointer (i.e. const reference) to something +// L: l,m: L-value (i.e. non-const reference) to something + + + +// Load and store + +#define DEFINE_FUNCTION_PR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& p) \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_PRV(name,expr) \ +static inline \ +void name (CCTK_REAL& p, CCTK_REAL_VEC const x) \ +{ \ + expr; \ +} + +#define DEFINE_FUNCTION_PVR(name,expr) \ +static inline \ +void name (CCTK_REAL_VEC& p, CCTK_REAL const a) \ +{ \ + expr; \ +} + + + +// Functions and operators + +#define DEFINE_FUNCTION_V_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_V_R(name,expr) \ +static inline \ +CCTK_REAL name (CCTK_REAL_VEC const x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_R_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL const a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL_VEC const x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL const b) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VVV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y, CCTK_REAL_VEC const z) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh new file mode 100644 index 0000000..df83b3a --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh @@ -0,0 +1,591 @@ +#ifndef VECTORS_HH +#define VECTORS_HH + + + +// Vectorisation + +#include +#include +#include + +#include + + + +// I: i,j: integer +// R: a,b: real +// V: x,y: vector (of real) +// P: p,q: pointer (i.e. const reference) to something +// L: l,m: L-value (i.e. non-const reference) to something + +#define DEFINE_FUNCTION_PR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& p) \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_PRV(name,expr) \ +static inline \ +void name (CCTK_REAL& p, CCTK_REAL_VEC const& x) \ +{ \ + expr; \ +} + +#define DEFINE_FUNCTION_PVR(name,expr) \ +static inline \ +void name (CCTK_REAL_VEC& p, CCTK_REAL const& a) \ +{ \ + expr; \ +} + +#define DEFINE_FUNCTION_V_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const& x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_V_R(name,expr) \ +static inline \ +CCTK_REAL name (CCTK_REAL_VEC const& x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_R_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL_VEC const& y) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL const& a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL_VEC const& x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL const& b) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + + + +// Intel, double +#if defined(KRANC_VECTORS) && defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8) + +#include + +// Vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + typedef double S; + typedef __m128d V; + V v; + + // Set a vector from scalars + inline CCTK_REAL_VEC(S const& a, S const& b): v(_mm_set_pd(b,a)) { } + + // Set a vector from a scalar, replicating the scalar + inline CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { } + + // Convert from and to the underlying vector type + inline CCTK_REAL_VEC(V const& v_): v(v_) { } + inline operator V const() const { return v; } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } +}; + +union vec_mask { + unsigned long long bits[2]; + CCTK_REAL_VEC::V v; +}; + +DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) +DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) + +// Get a scalar from the vector +#if defined(__PGI) && defined (__amd64__) +// _mm_cvtsd_f64 does not exist on PGI compilers +static inline +CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) +{ + CCTK_REAL a; _mm_store_sd(&a,x); return a; +} +#else +DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) //this is a no-op +#endif + +#if 0 +DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) +#endif +static inline +CCTK_REAL vec_elt1 (CCTK_REAL_VEC const& x) +{ + CCTK_REAL a; _mm_storeh_pd(&a,x); return a; +} + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) +DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) + +#if 0 +// Load a partial vector (duplicating the last loaded element to fill +// the remaining elements) +// TODO: Should this be aligned or unaligned? +static inline +CCTK_REAL_VEC vec_load_partial (CCTK_REAL const& p, int const n) +{ + switch (n) { + case 1: return _mm_load1_pd(p); + default: assert(0); + } +} +#endif + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) +// Implementation: load aligned if the modulus is zero +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +// Call the implementation with the modulus +template +static inline +CCTK_REAL_VEC vec_loadu_maybe (CCTK_REAL const& p) +{ + return vec_loadu_maybe_impl(p); +} +template +static inline +CCTK_REAL_VEC vec_loadu_maybe3 (CCTK_REAL const& p) +{ + return vec_loadu_maybe_impl3(p); +} + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) +DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + switch (n) { + case 1: _mm_storel_pd(&p,x); break; + default: assert(0); + } +} +static inline +void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + switch (n) { + case 1: _mm_storeh_pd((&p)+1,x); break; + default: assert(0); + } +} + +// Double-argument operators, both vectors +DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) +DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) +DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) +DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) + +// Single-argument operators +DEFINE_FUNCTION_V_V(operator+,x) +#if 0 +DEFINE_FUNCTION_V_V(operator-,vec_set(0.0,0.0)-x) +#endif +static vec_mask const vec_neg_mask = +{ { 0x8000000000000000ULL, 0x8000000000000000ULL } }; +DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask.v)) + +// Cheap functions +static vec_mask const vec_fabs_mask = +{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; +DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) +DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) +DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) +DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) + +// Special case for PGI to avoid internal compiler error +#if defined(__PGI) && defined (__amd64__) +#undef IfThen +CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const& x, CCTK_REAL_VEC co\ +nst& y) +{ + return cond*x + (not cond)*y; +} +#endif + + + +#if 0 +// Try to use the __m128d type directly. + +// This does not really work, because it is not possible to define +// automatic conversion operators from double to __m128d, so that +// explicit conversions are required. This makes the code look more +// clumsy. + +// Vector type corresponding to CCTK_REAL +typedef __m128d CCTK_REAL_VEC; + +DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) +DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) + +// Get a scalar from the vector +static inline +CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) +{ +#if 0 + // _mm_cvtsd_f64 does not exist on PGI compilers + return _mm_cvtsd_f64(x); // this is a no-op +#endif + CCTK_REAL a; _mm_store_sd(&a,x); return a; +} + +DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) +DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) +DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) + +// Cheap functions +static vec_mask const vec_fabs_mask = +{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; +DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) +DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) +DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) +DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) + +#endif + + + +// Intel, float +#elif defined(KRANC_VECTORS) && defined(__SSE__) && defined(CCTK_REAL_PRECISION_4) + +#include + +// A vector type corresponding to CCTK_REAL +typedef __m128 CCTK_REAL_VEC; + + + +// Power, double +#elif defined(KRANC_VECTORS) && defined(__ALTIVEC__) && defined(_ARCH_PWR7) && defined(CCTK_REAL_PRECISION_8) + +#include + +// Vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + typedef double S; + typedef vector double V; + V v; + + // vec_insert, vec_extract, vec_splat + + // Set a vector from scalars + inline CCTK_REAL_VEC(S const& a, S const& b) { v[0]=a; v[1]=b; } + + // Set a vector from a scalar, replicating the scalar + inline CCTK_REAL_VEC(S const& a): v(vec_splats(a)) { } + + // Convert from and to the underlying vector type + inline CCTK_REAL_VEC(V const& v_): v(v_) { } + inline operator V const() const { return v; } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } +}; + +DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) +DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) + +// Get a scalar from the vector +DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) +DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,p) +DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast(&p))) +// vec_xlds + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) +// Implementation: load aligned if the modulus is zero +#define static +template<> +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl<0>,vec_load(p)) +#undef static +// Call the implementation with the modulus +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe,vec_loadu_maybe_impl(p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) +DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + switch (n) { + case 1: p=x.v[0]; break; + default: assert(0); + } +} +static inline +void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + switch (n) { + case 1: (&p)[1]=x.v[1]; break; + default: assert(0); + } +} + +// Double-argument operators, both vectors +DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) +DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) +DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) +DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) +DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) +DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) +DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) + +// Single-argument operators +DEFINE_FUNCTION_V_V(operator+,x) +DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) + +// Cheap functions +DEFINE_FUNCTION_V_V(fabs,vec_abs(x)) +DEFINE_FUNCTION_VV_V(fmax,vec_max(x,y)) +DEFINE_FUNCTION_VV_V(fmin,vec_min(x,y)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) +DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) + + + +// Fallback: pseudo-vectorisation +#elif 0 + +// There is no vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + CCTK_REAL v, w; + + // Set a vector from scalars + inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } + + // Set a vector from a scalar, replicating the scalar + inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x.v), w(x.w) { } +}; + + + +DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) +DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +template +DEFINE_FUNCTION_PR_V(vec_loadm,vec_load(p)) + +DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) +DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) + +// Double-argument operators, both vectors +DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v+y.v,x.w+y.w)) +DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v-y.v,x.w-y.w)) +DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v*y.v,x.w*y.w)) +DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v/y.v,x.w/y.w)) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v+a,x.w+a)) +DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v-a,x.w-a)) +DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v*a,x.w*a)) +DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v/a,x.w/a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v,a+x.w)) +DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v,a-x.w)) +DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v,a*x.w)) +DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v,a/x.w)) + +// Single-argument operators +DEFINE_FUNCTION_V_V(operator+,x) +DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v,-x.w)) + +// Cheap functions +DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v),fabs(x.w))) +DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v,y.v),fmax(x.w,y.w))) +DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v,y.v),fmin(x.w,y.w))) +DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v),sqrt(x.w))) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v),exp(x.w))) +DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v),log(x.w))) +DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v,a),pow(x.w,a))) + + + +// Fallback: no vectorisation +#else + +// There is no vector type corresponding to CCTK_REAL +typedef CCTK_REAL CCTK_REAL_VEC; + + + +DEFINE_FUNCTION_PR_V(vec_load,p) +DEFINE_FUNCTION_PR_V(vec_loadu,p) +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe,p) +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe3,p) + +DEFINE_FUNCTION_PRV(vec_store,p=x) +DEFINE_FUNCTION_PRV(vec_store_nta,p=x) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + assert(0); +} +static inline +void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + assert(0); +} + + + +#endif + + + +#undef DEFINE_FUNCTION_PR_V +#undef DEFINE_FUNCTION_PRV +#undef DEFINE_FUNCTION_V_V +#undef DEFINE_FUNCTION_R_V +#undef DEFINE_FUNCTION_VV_V +#undef DEFINE_FUNCTION_VR_V +#undef DEFINE_FUNCTION_RV_V +#undef DEFINE_FUNCTION_RR_V + + + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +#endif // #ifndef VECTORS_HH diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh new file mode 100644 index 0000000..f439c9b --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh @@ -0,0 +1,72 @@ +// Pseudo vectorisation using scalar operations + + + +// Number of vector elements in a CCTK_REAL_VEC +static int const CCTK_REAL_VEC_SIZE = 2; + +// There is no vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + CCTK_REAL v[CCTK_REAL_VEC_SIZE]; + + // Set a vector from scalars + inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } + + // Set a vector from a scalar, replicating the scalar + inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x) { v[0]=x.v[0]; v[1]=x.v[1]; } +}; + + + +// Load and store vectors + +DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) +DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +#define vec_loadu_maybe(off,p) (vec_load(p)) +#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) + +DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) +DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) + + + +// Functions and operators + +// Double-argument operators, both vectors +DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v[0]+y.v[0],x.v[1]+y.v[1])) +DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v[0]-y.v[0],x.v[1]-y.v[1])) +DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v[0]*y.v[0],x.v[1]*y.v[1])) +DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v[0]/y.v[0],x.v[1]/y.v[1])) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v[0]+a,x.v[1]+a)) +DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v[0]-a,x.v[1]-a)) +DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v[0]*a,x.v[1]*a)) +DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v[0]/a,x.v[1]/a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v[0],a+x.v[1])) +DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v[0],a-x.v[1])) +DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v[0],a*x.v[1])) +DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v[0],a/x.v[1])) + +// Single-argument operators +DEFINE_FUNCTION_V_V(operator+,x) +DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v[0],-x.v[1])) + +// Functions +DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v[0]),exp(x.v[1]))) +DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v[0]),fabs(x.v[1]))) +DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v[0],y.v[0]),fmax(x.v[1],y.v[1]))) +DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v[0],y.v[0]),fmin(x.v[1],y.v[1]))) +DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v[0]),log(x.v[1]))) +DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v[0],a),pow(x.v[1],a))) +DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v[0]),sqrt(x.v[1]))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh new file mode 100644 index 0000000..0d950c7 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh @@ -0,0 +1,14 @@ +// Undefine all macros defined in "Vectors-define.hh", so that we +// leave a clean namespace + + + +#undef DEFINE_FUNCTION_PR_V +#undef DEFINE_FUNCTION_PRV +#undef DEFINE_FUNCTION_V_V +#undef DEFINE_FUNCTION_R_V +#undef DEFINE_FUNCTION_VV_V +#undef DEFINE_FUNCTION_VR_V +#undef DEFINE_FUNCTION_RV_V +#undef DEFINE_FUNCTION_RR_V +#undef DEFINE_FUNCTION_VVV_V diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh index 3fb77e1..d32afb2 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh @@ -5,231 +5,47 @@ // Vectorisation +#include #include #include -#include - - - -// I: i,j: integer -// R: a,b: real -// V: x,y: vector (of real) -// P: p,q: pointer (i.e. const reference) to something -// L: l,m: L-value (i.e. non-const reference) to something - -#define DEFINE_FUNCTION_PR_V(name,expr) \ -inline \ -CCTK_REAL_VEC name (CCTK_REAL const& p) \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_PRV(name,expr) \ -inline \ -void name (CCTK_REAL& p, CCTK_REAL_VEC const& x) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_PVR(name,expr) \ -inline \ -void name (CCTK_REAL_VEC& p, CCTK_REAL const& a) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_V_V(name,expr) \ -inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return CCTK_REAL_VEC(expr); \ -} - -#define DEFINE_FUNCTION_V_R(name,expr) \ -inline \ -CCTK_REAL name (CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_R_V(name,expr) \ -inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VV_V(name,expr) \ -inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL_VEC const& y) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VR_V(name,expr) \ -inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL const& a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RV_V(name,expr) \ -inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RR_V(name,expr) \ -inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL const& b) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - - - -// Intel, double -#if defined(KRANC_VECTORS) && defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8) - -#include - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef __m128d V; - static int const n = sizeof(V)/sizeof(S); - V v; - - // Set a vector from scalars - CCTK_REAL_VEC(S const& a, S const& b): v(_mm_set_pd(a,b)) { }; - - // Get a scalar from the vector - S elt0() const { return _mm_cvtsd_f64(v); /* this is a no-op */ } - S elt1() const { return _mm_cvtsd_f64(_mm_shuffle_pd(v,v,_MM_SHUFFLE2(1,1))); } - - // Set a vector from a scalar, replicating the scalar - CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { }; - - // Convert from and to the underlying vector type - CCTK_REAL_VEC(V const& v_): v(v_) { }; - operator V const() const { return v; } - - CCTK_REAL_VEC() { }; - - // Copy constructor - CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { }; -}; - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)); -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)); - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+CCTK_REAL_VEC(a)) -DEFINE_FUNCTION_VR_V(operator-,x-CCTK_REAL_VEC(a)) -DEFINE_FUNCTION_VR_V(operator*,x*CCTK_REAL_VEC(a)) -DEFINE_FUNCTION_VR_V(operator/,x/CCTK_REAL_VEC(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a)+x) -DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a)-x) -DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a)*x) -DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a)/x) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,0.0-x) - -// Cheap functions -static union { - unsigned long long const bits[2]; - CCTK_REAL_VEC::V v; -} const fabs_mask = - { { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,fabs_mask.v)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.elt0()),exp(x.elt1()))) -DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.elt0()),log(x.elt1()))) -DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.elt0(),a),pow(x.elt1(),a))) - -// Un-implemented functions -DEFINE_FUNCTION_V_R(signbit,0) - - - -#if 0 -// Intel, float -#elif defined(KRANC_VECTORS) && defined(__SSE__) && defined(CCTK_REAL_PRECISION_4) - -#include - -// A vector type corresponding to CCTK_REAL -typedef __m128 CCTK_REAL_VEC; -#endif - +#include -// Fallback: no vectorisation -#else - -// There is no vector type corresponding to CCTK_REAL -typedef CCTK_REAL CCTK_REAL_VEC; +#include "Vectors-define.hh" -DEFINE_FUNCTION_PR_V(vec_load,p) -DEFINE_FUNCTION_PR_V(vec_loadu,p) +#if defined(KRANC_VECTORS) +// Vectorise -DEFINE_FUNCTION_PRV(vec_store,p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,p=x) +# if ! defined(CCTK_REAL_PRECISION_8) +# error "Vectorisation is currently only supported for double precision" +# endif -DEFINE_FUNCTION_V_R(signbit,x<0) +# if defined(__SSE2__) // SSE2 (Intel) +# if defined(KRANC_DIRECT) +# include "Vectors-SSE2-direct.hh" +# else +# include "Vectors-SSE2.hh" +# endif +# elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Altivec (Power) +# if defined(KRANC_DIRECT) +# include "Vectors-VSX-direct.hh" +# else +# include "Vectors-VSX.hh" +# endif +# else +# include "Vectors-pseudo.hh" +# endif +#else +// Don't vectorise +# include "Vectors-default.hh" #endif - - -#undef DEFINE_FUNCTION_PR_V -#undef DEFINE_FUNCTION_PRV -#undef DEFINE_FUNCTION_V_V -#undef DEFINE_FUNCTION_R_V -#undef DEFINE_FUNCTION_VV_V -#undef DEFINE_FUNCTION_VR_V -#undef DEFINE_FUNCTION_RV_V -#undef DEFINE_FUNCTION_RR_V - - - -// Number of vector elements in a CCTK_REAL_VEC -static -size_t const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); +#include "Vectors-undefine.hh" diff --git a/Tools/CodeGen/CalculationFunction.m b/Tools/CodeGen/CalculationFunction.m index 3d86503..df79553 100644 --- a/Tools/CodeGen/CalculationFunction.m +++ b/Tools/CodeGen/CalculationFunction.m @@ -178,7 +178,7 @@ localName[x_] := definePreDefinitions[pDefs_] := CommentedBlock["Initialize predefined quantities", - Map[DeclareAssignVariable["CCTK_REAL", #[[1]], #[[2]]] &, pDefs]]; + Map[DeclareAssignVariable["CCTK_REAL_VEC", #[[1]], #[[2]]] &, pDefs]]; (* -------------------------------------------------------------------------- Equations @@ -326,7 +326,7 @@ Options[CreateCalculationFunction] = ThornOptions; CreateCalculationFunction[calc_, debug_, useCSE_, opts:OptionsPattern[]] := Module[{gfs, allSymbols, knownSymbols, - shorts, eqs, parameters, + shorts, eqs, parameters, parameterRules, functionName, dsUsed, groups, pddefs, cleancalc, eqLoop, where, addToStencilWidth, pDefs, haveCondTextuals, condTextuals}, @@ -366,10 +366,14 @@ CreateCalculationFunction[calc_, debug_, useCSE_, opts:OptionsPattern[]] := If[!lookupDefault[cleancalc, NoSimplify, False], InfoMessage[InfoFull, "Simplifying equations", eqs]; - eqs = Simplify[eqs, {r>0}]]]; + eqs = Simplify[eqs, {r>=0}]]]; InfoMessage[InfoFull, "Equations:"]; + (* Wrap parameters with ToReal *) + parameterRules = Map[(#->ToReal[#])&, parameters]; + eqs = eqs /. parameterRules; + Map[printEq, eqs]; (* Check all the function names *) @@ -537,7 +541,7 @@ equationLoop[eqs_, cleancalc_, gfs_, shorts_, incs_, groups_, pddefs_, GenericGridLoop[functionName, { - DeclareDerivatives[defsWithoutShorts, eqsOrdered], + (* DeclareDerivatives[defsWithoutShorts, eqsOrdered], *) CommentedBlock["Assign local copies of grid functions", Map[DeclareMaybeAssignVariableInLoop[ @@ -558,6 +562,22 @@ equationLoop[eqs_, cleancalc_, gfs_, shorts_, incs_, groups_, pddefs_, Map[InfoVariable[#[[1]]] &, (eqs2 /. localMap)], ""], + CommentedBlock["If necessary, store only partial vectors after the first iteration", + ConditionalOnParameterTextual["CCTK_REAL_VEC_SIZE > 1 && i 1 && i+CCTK_REAL_VEC_SIZE > lc_imax", + { + DeclareAssignVariable["int", "elt_count", "lc_imax-i"], + Map[StoreLowPartialVariableInLoop[GridName[#], localName[#], "elt_count"] &, + gfsInLHS], + "break;\n" + }]], CommentedBlock["Copy local copies back to grid functions", Map[StoreVariableInLoop[GridName[#], localName[#]] &, gfsInLHS]], diff --git a/Tools/CodeGen/CodeGen.m b/Tools/CodeGen/CodeGen.m index de8dbd2..09e7fe1 100644 --- a/Tools/CodeGen/CodeGen.m +++ b/Tools/CodeGen/CodeGen.m @@ -62,14 +62,16 @@ AssignVariableInLoop::usage = "AssignVariableInLoop[dest_, src_] returns a block "that assigns 'src' to 'dest'."; StoreVariableInLoop::usage = "StoreVariableInLoop[dest_, src_] returns a block of code " <> "that assigns 'src' to 'dest'."; +StoreLowPartialVariableInLoop::usage = "StoreLowPartialVariableInLoop[dest_, src_, count_] returns a block of code " <> + "that assigns 'src' to 'dest'."; +StoreHighPartialVariableInLoop::usage = "StoreHighPartialVariableInLoop[dest_, src_, count_] returns a block of code " <> + "that assigns 'src' to 'dest'."; DeclareAssignVariableInLoop::usage = "DeclareAssignVariableInLoop[type_, dest_, src_] returns a block of code " <> "that assigns 'src' to 'dest'."; MaybeAssignVariableInLoop::usage = "MaybeAssignVariableInLoop[dest_, src_, cond_] returns a block of code " <> "that assigns 'src' to 'dest'."; DeclareMaybeAssignVariableInLoop::usage = "DeclareMaybeAssignVariableInLoop[type_, dest_, src_, cond_] returns a block of code " <> "that assigns 'src' to 'dest'."; -UNUSEDDeclareVariablesInLoopVectorised::usage = ""; -UNUSEDAssignVariablesInLoopVectorised::usage = ""; TestForNaN::usage = "TestForNaN[expr_] returns a block of code " <> "that tests 'expr' for nan."; CommentedBlock::usage = "CommentedBlock[comment, block] returns a block consisting " <> @@ -285,62 +287,24 @@ AssignVariableInLoop[dest_, src_] := StoreVariableInLoop[dest_, src_] := {"vec_store_nta(", dest, ",", src, ")", EOL[]}; +StoreLowPartialVariableInLoop[dest_, src_, count_] := + {"vec_store_nta_partial_lo(", dest, ",", src, ",", count, ")", EOL[]}; + +StoreHighPartialVariableInLoop[dest_, src_, count_] := + {"vec_store_nta_partial_hi(", dest, ",", src, ",", count, ")", EOL[]}; + DeclareAssignVariableInLoop[type_, dest_, src_] := {type, " const ", dest, " = vec_load(", src, ")", EOL[]}; MaybeAssignVariableInLoop[dest_, src_, cond_] := If [cond, - {dest, " = useMatter ? vec_load(", src, ") : 0.0", EOL[]}, + {dest, " = useMatter ? vec_load(", src, ") : ToReal(0.0)", EOL[]}, {dest, " = vec_load(", src, ")", EOL[]}]; DeclareMaybeAssignVariableInLoop[type_, dest_, src_, mmaCond_, codeCond_] := If [mmaCond, - {type, " ", dest, " = (", codeCond, ") ? vec_load(", src, ") : 0.0", EOL[]}, - {type, " ", dest, " = vec_load(", src, ")", EOL[]}]; - -(* TODO: move these into OpenMP loop *) -UNUSEDDeclareVariablesInLoopVectorised[dests_, temps_, srcs_] := - { - {"#undef LC_PRELOOP_STATEMENTS", "\n"}, - {"#define LC_PRELOOP_STATEMENTS", " \\\n"}, - {"int const GFD_imin = lc_imin + ((lc_imin + cctk_lsh[0] * (j + cctk_lsh[1] * k)) & (CCTK_REAL_VEC_SIZE-1))", "; \\\n"}, - {"int const GFD_imax = lc_imax + ((lc_imax + cctk_lsh[0] * (j + cctk_lsh[1] * k)) & (CCTK_REAL_VEC_SIZE-1)) - CCTK_REAL_VEC_SIZE", "; \\\n"}, - Map[Function[x, Module[{dest, temp, src}, - {dest, temp, src} = x; - {"CCTK_REAL_VEC ", temp, "; \\\n"}]], - Transpose[{dests, temps, srcs}]], - {"\n"} - }; - -UNUSEDAssignVariablesInLoopVectorised[dests_, temps_, srcs_] := - { - {"{\n"}, - {" if (i < GFD_imin || i >= GFD_imax) {\n"}, - Map[Function[x, Module[{dest, temp, src}, - {dest, temp, src} = x; - {" ", dest, "[index] = ", src, EOL[]}]], - Transpose[{dests, temps, srcs}]], - {" } else {\n"}, - {" size_t const index0 = index & (CCTK_REAL_VEC_SIZE-1)", EOL[]}, - Map[Function[x, Module[{dest, temp, src}, - {dest, temp, src} = x; - {" ((CCTK_REAL*)&", temp, ")[index0] = ", - src, EOL[]}]], - Transpose[{dests, temps, srcs}]], - {" if (index0 == CCTK_REAL_VEC_SIZE-1) {\n"}, - {" size_t const index1 = index - (CCTK_REAL_VEC_SIZE-1)", EOL[]}, - Map[Function[x, Module[{dest, temp, src}, - {dest, temp, src} = x; - {" _mm_stream_pd (&", dest, "[index1], ", - temp, ")", EOL[]}]], - Transpose[{dests, temps, srcs}]], - {" }\n"}, - {" }\n"}, - {"}\n"} - }; - -UNUSEDAssignVariableInLoopsVectorised[dest_, temp_, src_] := - {"GFD_save_and_store(", dest, ",", "index", ",", "&", temp, ",", src, ")", EOL[]}; + {type, " ", dest, " = (", codeCond, ") ? vec_load(", src, ") : ToReal(0.0)", EOL[]}, + {type, " ", dest, " = vec_load(", src, ")", EOL[]}]; TestForNaN[expr_] := {"if (isnan(", expr, ")) {\n", @@ -463,13 +427,13 @@ DeclareFDVariables[] := InitialiseFDSpacingVariablesC[] := { - DeclareAssignVariable["CCTK_REAL", "dx", "CCTK_DELTA_SPACE(0)"], - DeclareAssignVariable["CCTK_REAL", "dy", "CCTK_DELTA_SPACE(1)"], - DeclareAssignVariable["CCTK_REAL", "dz", "CCTK_DELTA_SPACE(2)"], (* DeclareAssignVariable["int", "di", "CCTK_GFINDEX3D(cctkGH,1,0,0) - CCTK_GFINDEX3D(cctkGH,0,0,0)"], *) DeclareAssignVariable["int", "di", "1"], DeclareAssignVariable["int", "dj", "CCTK_GFINDEX3D(cctkGH,0,1,0) - CCTK_GFINDEX3D(cctkGH,0,0,0)"], - DeclareAssignVariable["int", "dk", "CCTK_GFINDEX3D(cctkGH,0,0,1) - CCTK_GFINDEX3D(cctkGH,0,0,0)"] + DeclareAssignVariable["int", "dk", "CCTK_GFINDEX3D(cctkGH,0,0,1) - CCTK_GFINDEX3D(cctkGH,0,0,0)"], + DeclareAssignVariable["CCTK_REAL_VEC", "dx", "ToReal(CCTK_DELTA_SPACE(0))"], + DeclareAssignVariable["CCTK_REAL_VEC", "dy", "ToReal(CCTK_DELTA_SPACE(1))"], + DeclareAssignVariable["CCTK_REAL_VEC", "dz", "ToReal(CCTK_DELTA_SPACE(2))"] }; InitialiseFDSpacingVariablesFortran[] := @@ -486,17 +450,17 @@ InitialiseFDVariables[] := InitialiseFDSpacingVariablesFortran[], InitialiseFDSpacingVariablesC[]], - DeclareAssignVariable["CCTK_REAL", "dxi", "1.0 / dx"], - DeclareAssignVariable["CCTK_REAL", "dyi", "1.0 / dy"], - DeclareAssignVariable["CCTK_REAL", "dzi", "1.0 / dz"], - DeclareAssignVariable["CCTK_REAL", "khalf", "0.5"], - DeclareAssignVariable["CCTK_REAL", "kthird", "1/3.0"], - DeclareAssignVariable["CCTK_REAL", "ktwothird", "2.0/3.0"], - DeclareAssignVariable["CCTK_REAL", "kfourthird", "4.0/3.0"], - DeclareAssignVariable["CCTK_REAL", "keightthird", "8.0/3.0"], - DeclareAssignVariable["CCTK_REAL", "hdxi", "0.5 * dxi"], - DeclareAssignVariable["CCTK_REAL", "hdyi", "0.5 * dyi"], - DeclareAssignVariable["CCTK_REAL", "hdzi", "0.5 * dzi"]}]; + DeclareAssignVariable["CCTK_REAL_VEC", "dxi", "INV(dx)"], + DeclareAssignVariable["CCTK_REAL_VEC", "dyi", "INV(dy)"], + DeclareAssignVariable["CCTK_REAL_VEC", "dzi", "INV(dz)"], + DeclareAssignVariable["CCTK_REAL_VEC", "khalf", "ToReal(0.5)"], + DeclareAssignVariable["CCTK_REAL_VEC", "kthird", "ToReal(1.0/3.0)"], + DeclareAssignVariable["CCTK_REAL_VEC", "ktwothird", "ToReal(2.0/3.0)"], + DeclareAssignVariable["CCTK_REAL_VEC", "kfourthird", "ToReal(4.0/3.0)"], + DeclareAssignVariable["CCTK_REAL_VEC", "keightthird", "ToReal(8.0/3.0)"], + DeclareAssignVariable["CCTK_REAL_VEC", "hdxi", "fmul(ToReal(0.5), dxi)"], + DeclareAssignVariable["CCTK_REAL_VEC", "hdyi", "fmul(ToReal(0.5), dyi)"], + DeclareAssignVariable["CCTK_REAL_VEC", "hdzi", "fmul(ToReal(0.5), dzi)"]}]; GridName[x_] := If[SOURCELANGUAGE == "C", ToExpression[ToString[x] <> "[index]"], @@ -657,20 +621,21 @@ GenericGridLoopUsingLoopControl[functionName_, block_] := CommentedBlock["Loop over the grid points", { "#pragma omp parallel\n", - "LC_LOOP3 (", functionName, ",\n", - " i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],\n", - " cctk_lsh[0],cctk_lsh[1],cctk_lsh[2])\n", + "LC_LOOP3VEC (", functionName, ",\n", + " i,j,k, min[0],min[1],min[2], max[0],max[1],max[2],\n", + " cctk_lsh[0],cctk_lsh[1],cctk_lsh[2],\n", + " CCTK_REAL_VEC_SIZE)\n", "{\n", indentBlock[ { - DeclareVariable["index", "// int"], - DeclareAssignVariable["int", "index", "CCTK_GFINDEX3D(cctkGH,i,j,k)"], + (* DeclareVariable["index", "// int"], *) + (* DeclareAssignVariable["int", "index", "CCTK_GFINDEX3D(cctkGH,i,j,k)"], *) + DeclareAssignVariable["int", "index", "di*i + dj*j + dk*k"], block } ], - "i += CCTK_REAL_VEC_SIZE-1;\n", "}\n", - "LC_ENDLOOP3 (", functionName, ");\n" + "LC_ENDLOOP3VEC (", functionName, ");\n" } ], "" @@ -797,45 +762,117 @@ insertFile[name_] := (* Take an expression x and replace occurrences of Powers with the C macros SQR, CUB, QAD *) -ReplacePowers[x_] := - Module[{rhs}, - rhs = x /. Power[xx_, -1] -> INV[xx]; +ReplacePowers[expr_] := + Module[{rhs, fmaRules, arithRules}, + rhs = expr /. Power[xx_, -1] -> INV[xx]; If[SOURCELANGUAGE == "C", Module[{}, - rhs = rhs //. Power[xx_, 2] -> SQR[xx]; - rhs = rhs //. Power[xx_, 3] -> CUB[xx]; - rhs = rhs //. Power[xx_, 4] -> QAD[xx]; + rhs = rhs /. Power[xx_, 2 ] -> SQR[xx]; + rhs = rhs /. Power[xx_, 3 ] -> CUB[xx]; + rhs = rhs /. Power[xx_, 4 ] -> QAD[xx]; + rhs = rhs /. Power[xx_, -2 ] -> INV[SQR[xx]]; + rhs = rhs /. Power[xx_, 1/2] -> sqrt[xx]; + rhs = rhs /. Power[xx_, -1/2] -> INV[sqrt[xx]]; + rhs = rhs /. Power[xx_, 0.5] -> sqrt[xx]; + rhs = rhs /. Power[xx_, -0.5] -> INV[sqrt[xx]]; - rhs = rhs //. xx_/2 -> khalf xx; - rhs = rhs //. (-1/2) -> -khalf; + (* + rhs = rhs /. 1/2 -> khalf + rhs = rhs /. -1/2 -> -khalf; - rhs = rhs //. xx_/3 -> kthird xx; - rhs = rhs //. (-1/3) -> -kthird; + rhs = rhs /. 1/3 -> kthird; + rhs = rhs /. -1/3 -> -kthird; - rhs = rhs //. 2/3 -> ktwothird; - rhs = rhs //. (-2/3) -> -ktwothird; + rhs = rhs /. 2/3 -> ktwothird; + rhs = rhs /. -2/3 -> -ktwothird; - rhs = rhs //. 4/3 -> kfourthird; - rhs = rhs //. (-4/3) -> -kfourthird; + rhs = rhs /. 4/3 -> kfourthird; + rhs = rhs /. -4/3 -> -kfourthird; - rhs = rhs //. 8/3 -> keightthird; - rhs = rhs //. (-8/3) -> -keightthird; + rhs = rhs /. 8/3 -> keightthird; + rhs = rhs /. -8/3 -> -keightthird; + *) - rhs = rhs //. xx_ y_ + xx_ z_ -> xx(y+z); + (* Avoid rational numbers *) + rhs = rhs /. Rational[xx_,yy_] :> N[xx/yy, 30]; - rhs = rhs //. Power[E, power_] -> exp[power]; - rhs = rhs //. Power[xx_, 0.5] -> sqrt[xx]; + rhs = rhs //. IfThen[cond1_,xx1_,yy1_] + IfThen[cond2_,xx2_,yy2_] /; cond1==cond2 :> IfThen[cond1, Simplify[ xx1 + xx2], Simplify[ yy1 + yy2]]; + rhs = rhs //. ff1_ IfThen[cond1_,xx1_,yy1_] + IfThen[cond2_,xx2_,yy2_] /; cond1==cond2 :> IfThen[cond1, Simplify[ff1 xx1 + xx2], Simplify[ff1 yy1 + yy2]]; + rhs = rhs //. IfThen[cond1_,xx1_,yy1_] + ff2_ IfThen[cond2_,xx2_,yy2_] /; cond1==cond2 :> IfThen[cond1, Simplify[ xx1 + ff2 xx2], Simplify[ yy1 + ff2 yy2]]; + rhs = rhs //. ff1_ IfThen[cond1_,xx1_,yy1_] + ff2_ IfThen[cond2_,xx2_,yy2_] /; cond1==cond2 :> IfThen[cond1, Simplify[ff1 xx1 + ff2 xx2], Simplify[ff1 yy1 + ff2 yy2]]; + + (* Is this still a good idea when FMA instructions are used? *) + rhs = rhs //. xx_ yy_ + xx_ zz_ -> xx (yy+zz); + rhs = rhs //. xx_ yy_ - xx_ zz_ -> xx (yy-zz); + + rhs = rhs /. Power[E, power_] -> exp[power]; (* there have been some problems doing the Max/Min replacement via the preprocessor for C, so we do it here *) - rhs = rhs //. Max[xx_, yy_] -> fmax[xx, yy]; - rhs = rhs //. Min[xx_, yy_] -> fmin[xx, yy]; - - rhs = rhs //. Power[xx_, power_] -> pow[xx, power]], - - rhs = rhs //. Power[xx_, power_] -> xx^power + rhs = rhs /. Max[xx_, yy_] -> fmax[xx, yy]; + rhs = rhs /. Min[xx_, yy_] -> fmin[xx, yy]; + + rhs = rhs /. Power[xx_, power_] -> pow[xx, power]; + + (* FMA (fused multiply-add) instructions *) + (* Note that -x is represented as Times[-1, x] *) + isNotMinusOneQ[n_] := ! (IntegerQ[n] && n == -1); + isNotTimesMinusOneQ[n_] := ! MatchQ[n,- _]; + fmaRules = { + + (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) + (zz_? isNotTimesMinusOneQ) :> fmadd [xx,yy,zz], + + (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) - (zz_? isNotTimesMinusOneQ) :> fmsub [xx,yy,zz], + - (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) + (zz_? isNotTimesMinusOneQ) :> fnmadd[xx,yy,zz], + - (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) - (zz_? isNotTimesMinusOneQ) :> fnmsub[xx,yy,zz], + + (xx_? isNotMinusOneQ) (yy_ + 1) -> fmadd [xx, yy, xx], + + (xx_? isNotMinusOneQ) (yy_ - 1) -> fmsub [xx, yy, xx], + - (xx_? isNotMinusOneQ) (yy_ + 1) -> fnmadd[xx, yy, xx], + - (xx_? isNotMinusOneQ) (yy_ - 1) -> fnmsub[xx, yy, xx], + fmadd[xx_, - yy_, zz_] -> fnmsub[xx,yy,zz], + fmsub[xx_, - yy_, zz_] -> fnmadd[xx,yy,zz] + }; + rhs = rhs //. fmaRules; + + (* Constants *) + rhs = rhs /. xx_Integer/; xx!=-1 :> ToReal[xx]; + rhs = rhs /. xx_Real -> ToReal[xx]; + rhs = rhs /. - ToReal[xx_] -> ToReal[- xx]; + rhs = rhs /. ToReal[xx_] + ToReal[yy_] -> ToReal[xx + yy]; + rhs = rhs /. ToReal[xx_] * ToReal[yy_] -> ToReal[xx * yy]; + rhs = rhs /. pow[xx_, ToReal[power_]] -> pow[xx, power]; + rhs = rhs /. IfThen[ToReal[xx_], yy_, zz_] -> IfThen[xx, yy, zz]; + + (* Replace all operators and functions *) + (* fadd, fsub, fmul, fdiv, fneg *) + isNotFneg[n_] := ! MatchQ[n,fneg[_]]; + arithRules = { + - xx_ -> fneg[xx], + xx_ * yy_ -> fmul[xx,yy], + xx_ / yy_ -> fdiv[xx,yy], + xx_ + yy_ -> fadd[xx,yy], + xx_ - yy_ -> fsub[xx,yy], + fmul[-1,xx_] -> fneg[xx], + fadd[xx_,fneg[yy_]] -> fsub[xx,yy], + fadd[fneg[xx_],(yy_? isNotFneg)] :> fsub[yy,xx], + Abs[xx_] -> kfabs[xx], + Log[xx_] -> klog[xx], + fabs[xx_] -> kfabs[xx], + fmax[xx_,yy_] -> kfmax[xx,yy], + fmin[xx_,yy_] -> kfmin[xx,yy], + sqrt[xx_] -> ksqrt[xx], + exp[xx_] -> kexp[xx], + log[xx_] -> klog[xx], + pow[xx_,yy_] -> kpow[xx,yy] + }; + rhs = rhs //. arithRules; + rhs = rhs /. IfThen[fmul[xx_, yy_], aa_, bb_] -> IfThen[xx*yy, aa, bb]; + rhs = rhs /. ToReal[fneg[xx_]] -> ToReal[-xx]; + rhs = rhs /. ToReal[fmul[xx_, yy_]] -> ToReal[xx*yy]; + rhs = rhs /. kpow[xx_, fneg[power_]] -> kpow[xx, -power]; + ], + + rhs = rhs /. Power[xx_, power_] -> xx^power ]; (* Print[rhs//FullForm];*) rhs diff --git a/Tools/CodeGen/Differencing.m b/Tools/CodeGen/Differencing.m index 609aa9d..1ac5ba8 100644 --- a/Tools/CodeGen/Differencing.m +++ b/Tools/CodeGen/Differencing.m @@ -168,8 +168,6 @@ CreateDifferencingHeader[derivOps_, zeroDims_] := pDefs = Union[Flatten[Map[First, mDefPairs]]]; expressions = Flatten[Map[#[[2]]&, mDefPairs]]; -(* expressions = Flatten[Map[ComponentDerivativeOperatorInlineDefinition, dupsRemoved]];*) - {pDefs,Map[{#, "\n"} &, expressions]}]; ordergfds[_[v1_,___], _[v2_,___]] := @@ -213,7 +211,10 @@ PrecomputeDerivative[d:pd_[gf_, inds___]] := evaluateDerivative[d:pd_[gf_, inds___]] := Module[{macroname}, macroName = ComponentDerivativeOperatorMacroName[pd[inds] -> expr]; - Return[ToString[macroName] <> "(" <> ToString[gf] <> ", i, j, k)"]]; + (* Return[ToString[macroName] <> "(" <> ToString[gf] <> ", i, j, k)"] *) + (* Return[ToString[macroName] <> "(" <> ToString[gf] <> ")"] *) + Return[ToString[macroName] <> "(&" <> ToString[gf] <> "[index])"] + ]; DeclareDerivative[d:pd_[gf_, inds___]] := DeclareVariable[GridFunctionDerivativeName[d], "// CCTK_REAL_VEC"]; @@ -248,7 +249,7 @@ sbpMacroDefinition[macroName_, d_] := <> "(i,j,k,sbp_" <> l <> "min,sbp_" <> l <> "max,d" <> ds <> ",u,q" <> ds <> ",cctkGH))"}] ]; ComponentDerivativeOperatorMacroDefinition[componentDerivOp:(name_[inds___] -> expr_)] := - Module[{macroName, rhs, rhs2, i = "i", j = "j", k = "k", spacings, spacings2, pat, ss, num, den, newnum, signModifier, quotient, liName, rhs3, rhs4}, + Module[{macroName, rhs, i = "i", j = "j", k = "k", spacings, spacings2, pat, ss, num, den, newnum, signModifier, quotient, liName}, macroName = ComponentDerivativeOperatorMacroName[componentDerivOp]; @@ -262,22 +263,23 @@ ComponentDerivativeOperatorMacroDefinition[componentDerivOp:(name_[inds___] -> e Return[sbpMacroDefinition[macroName, 3]]]; rhs = DifferenceGF[expr, i, j, k]; +(* Print["rhs1 == ", FullForm[rhs]];*) spacings = {spacing[1] -> 1/"dxi", spacing[2] -> 1/"dyi", spacing[3] -> 1/"dzi"}; spacings2 = {spacing[1] -> "dx", spacing[2] -> "dy", spacing[3] -> "dz"}; - rhs2 = FullSimplify[rhs]; + rhs = FullSimplify[rhs]; -(* Print["rhs2 == ", FullForm[rhs2]];*) +(* Print["rhs2 == ", FullForm[rhs]];*) pat = Times[spInExpr:(Power[spacing[_],_]..), (Rational[x_,y_])..., rest__]; (* Print["pat == ", pat//FullForm];*) - If[MatchQ[rhs2, pat], + If[MatchQ[rhs, pat], (* Print["matches!"];*) - ss = Times[rhs2 /. pat -> spInExpr]; + ss = Times[rhs /. pat -> spInExpr]; (* Print["ss == ", ss];*) - num = rhs2 /. pat -> x; - den = rhs2 /. pat -> y; + num = rhs /. pat -> x; + den = rhs /. pat -> y; (* Print["num == ", num]; Print["den == ", den];*) If[{num, 1, 2} === {1, 2},(* Print["SEQ!"]; *) newnum = 1; den=1; signModifier = "", @@ -303,39 +305,35 @@ ComponentDerivativeOperatorMacroDefinition[componentDerivOp:(name_[inds___] -> e liName = "p" <> signModifier <> quotient <> ToString[Apply[SequenceForm,Simplify[1/(ss /. spacings2)],{0,Infinity}]]; (* Print["liName == ", liName];*) - rhs3 = rhs2 /. pat -> Times[liName, rest], + (* rhs = rhs /. pat -> Times[liName, rest], *) + rhs = (rhs /. pat -> Times[liName, rest]) / liName, (* Print["!!!!!!!!DOES NOT MATCH!!!!!!!!!"];*) - rhs3 = rhs2]; + rhs = rhs]; -(* Print["rhs3 == ", rhs3];*) +(* Print["rhs3 == ", FullForm[rhs]];*) pDefs = {{liName -> CFormHideStrings[ReplacePowers[num / den ss /. spacings2]]}}; -(* rhs4 = Factor[rhs3];*) - - rhs4 = rhs3 //. (x_ a_ + x_ b_) -> x(a+b); - rhs5 = rhs4 //. (x_ a_ - x_ b_) -> x(a-b); +(* rhs = Factor[rhs];*) + rhs = rhs //. (x_ a_ + x_ b_) -> x (a+b); + rhs = rhs //. (x_ a_ - x_ b_) -> x (a-b); (* Print[componentDerivOp, ": "]; - Print[FullForm[rhs5]]; + Print[FullForm[rhs]]; Print[""];*) - rhs6 = CFormHideStrings[ReplacePowers[rhs5 /. spacings]]; - {pDefs, FlattenBlock[{"#define ", macroName, "(u,i,j,k) ", "(", rhs6, ")"}]}]; - -ComponentDerivativeOperatorInlineDefinition[componentDerivOp:(name_[inds___] -> expr_)] := - Module[{inlineName, rhs, rhs2, i = "i", j = "j", k = "k", spacings}, - - inlineName = ComponentDerivativeOperatorMacroName[componentDerivOp]; - - rhs = DifferenceGF[expr, i, j, k]; -(* rhs = DifferenceGFInline[expr, i, j, k];*) - spacings = {spacing[1] -> 1/"dxi", spacing[2] -> 1/"dyi", spacing[3] -> 1/"dzi"}; - rhs2 = CFormHideStrings[FullSimplify[ReplacePowers[rhs /. spacings]]]; - - DefineFunction[inlineName, "static inline CCTK_REAL", - "CCTK_REAL *u, int i, int j, int k", - {"return ", rhs2, ";\n"}]]; + rhs = CFormHideStrings[ReplacePowers[rhs /. spacings]]; + (* {pDefs, FlattenBlock[{"#define ", macroName, "(u,i,j,k) ", "(", rhs, ")"}]} *) + {pDefs, FlattenBlock[{ + "#ifndef KRANC_DIFF_FUNCTIONS\n", + (* default, differencing operators are macros *) + "# define ", macroName, "(u) ", "(fmul(", liName, ",", rhs, "))\n", + "#else\n", + (* new, differencing operators are static functions *) + "# define ", macroName, "(u) ", "(", liName, "*", macroName, "_impl((u),dj,dk))\n", + "static CCTK_REAL_VEC ", macroName, "_impl(CCTK_REAL const* restrict const u, int const dj, int const dk) ", "{ return ", rhs, "; }\n", + "#endif\n" + }]}]; ComponentDerivativeOperatorMacroName[componentDerivOp:(name_[inds___] -> expr_)] := Module[{stringName}, @@ -368,14 +366,6 @@ DifferenceGF[op_, i_, j_, k_] := Apply[Plus, Map[DifferenceGFTerm[#, i, j, k] &, expanded]], DifferenceGFTerm[expanded, i, j, k]]]; -DifferenceGFInline[op_, i_, j_, k_] := - Module[{expanded}, - expanded = Expand[op]; - - If[Head[expanded] === Plus, - Apply[Plus, Map[DifferenceGFTermInline[#, i, j, k] &, expanded]], - DifferenceGFTerm[expanded, i, j, k]]]; - (* Return the fragment of a macro definition for defining a derivative operator *) @@ -404,10 +394,31 @@ DifferenceGFTerm[op_, i_, j_, k_] := "(int)(" <> ToString[CFormHideStrings[j+ny]] <> ")," <> "(int)(" <> ToString[CFormHideStrings[k+nz]] <> "))]", *) - remaining "vec_loadu((u)[index" <> - "+di*(" <> ToString[CFormHideStrings[nx]] <> ")" <> +(* + remaining "vec_loadu_maybe(" <> ToString[CFormHideStrings[nx]] <> "," <> + "(u)[index" <> + "+di*(" <> ToString[CFormHideStrings[nx]] <> ")" <> + "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> + "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", +*) +(* + remaining "vec_loadu_maybe(" <> ToString[CFormHideStrings[nx]] <> "," <> + "(u)[(" <> ToString[CFormHideStrings[nx]] <> ")" <> "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", +*) + remaining "vec_loadu_maybe3" <> + "(" <> ToString[CFormHideStrings[nx /. {dir1->1, dir2->1, dir3->1}]] <> "," <> + ToString[CFormHideStrings[ny /. {dir1->1, dir2->1, dir3->1}]] <> "," <> + ToString[CFormHideStrings[nz /. {dir1->1, dir2->1, dir3->1}]] <> "," <> + "(u)[(" <> ToString[CFormHideStrings[nx]] <> ")" <> + "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> + "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", +(* + remaining "vec_loadu(u[(" <> ToString[CFormHideStrings[nx]] <> ")" <> + "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> + "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", +*) (* remaining "(u)[CCTK_GFINDEX3D(cctkGH,floor((" <> ToString[CFormHideStrings[i+nx]] <> ")+0.5),floor((" <> @@ -417,27 +428,6 @@ DifferenceGFTerm[op_, i_, j_, k_] := remaining "u(" <> ToString[FortranForm[i+nx]] <> "," <> ToString[FortranForm[j+ny]] <> "," <> ToString[FortranForm[k+nz]] <> ")"] ]; -(* Return the fragment of a function definition for defining a derivative - operator *) -DifferenceGFTermInline[op_, i_, j_, k_] := - Module[{nx, ny, nz, remaining}, - - If[op === 0, - Return[0]]; - - nx = Exponent[op, shift[1]]; - ny = Exponent[op, shift[2]]; - nz = Exponent[op, shift[3]]; - - remaining = op / (shift[1]^nx) / (shift[2]^ny) / (shift[3]^nz); - - If[Cases[{remaining}, shift[_], Infinity] != {}, - ThrowError["Could not parse difference operator:", op]]; - - remaining "(u)[CCTK_GFINDEX3D(cctkGH," <> ToString[CFormHideStrings[i+nx]] <> "," <> - ToString[CFormHideStrings[j+ny]] <> "," <> ToString[CFormHideStrings[k+nz]] <> ")]" - ]; - DerivativeOperatorGFDs[gf_]; diff --git a/Tools/CodeGen/Kranc.m b/Tools/CodeGen/Kranc.m index fd07c53..21acd21 100644 --- a/Tools/CodeGen/Kranc.m +++ b/Tools/CodeGen/Kranc.m @@ -22,7 +22,11 @@ BeginPackage["Kranc`"]; (* CodeGen.m *) -{INV, SQR, CUB, QAD, exp, pow, fmax, fmin, dx, dy, dz, khalf, kthird, ktwothird, kfourthird, keightthird}; +{INV, SQR, CUB, QAD, IfThen, ToReal, sqrt, exp, pow, fmax, fmin, + fmadd, fmsub, fnmadd, fnmsub, fneg, fadd, fsub, fmul, fdiv, + kfabs, kfmax, kfmin, ksqrt, kexp, klog, kpow, + dir1, dir2, dir3, dx, dy, dz, + khalf, kthird, ktwothird, kfourthird, keightthird}; (* Helpers.m *) diff --git a/Tools/CodeGen/Thorn.m b/Tools/CodeGen/Thorn.m index 353be9c..4ada28a 100644 --- a/Tools/CodeGen/Thorn.m +++ b/Tools/CodeGen/Thorn.m @@ -476,11 +476,12 @@ CreateSchedule[globalStorageGroups_, scheduledGroups_, scheduledFunctions_] := calculationMacros[] := CommentedBlock["Define macros used in calculations", Map[{"#define ", #, "\n"} &, - {"INITVALUE (42)", - "INV(x) ((1.0) / (x))" , - "SQR(x) ((x) * (x))" , - "CUB(x) ((x) * (x) * (x))" , - "QAD(x) ((x) * (x) * (x) * (x))"}]]; + {"INITVALUE (42)", + "INV(x) (fdiv(ToReal(1.0),x))", + "SQR(x) (fmul(x,x))", + "CUB(x) (x*SQR(x))", + "QAD(x) (SQR(SQR(x)))" + }]]; (* Given a list of Calculation structures as defined above, create a CodeGen representation of a source file that defines a function for @@ -508,7 +509,7 @@ CreateSetterSource[calcs_, debug_, useCSE_, include_, imp_, ], Map[IncludeFile, Join[{"cctk.h", "cctk_Arguments.h", "cctk_Parameters.h", - (*"precomputations.h",*) "GenericFD.h", "Differencing.h", "Vectors.hh"}, include, + (*"precomputations.h",*) "GenericFD.h", "Vectors.hh", "Differencing.h"}, include, If[OptionValue[UseLoopControl], {"loopcontrol.h"}, {}]]], calculationMacros[], @@ -738,10 +739,10 @@ CreateMoLBoundariesSource[spec_] := "if (CCTK_EQUALS(" <> boundpar <> ", \"none\" ) ||\n", " CCTK_EQUALS(" <> boundpar <> ", \"static\") ||\n", " CCTK_EQUALS(" <> boundpar <> ", \"flat\" ) ||\n", - " CCTK_EQUALS(" <> boundpar <> ", \"zero\" ) ) \n", + " CCTK_EQUALS(" <> boundpar <> ", \"zero\" ) )\n", "{\n", - " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, 1, -1, \n", + " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, boundary_width, -1,\n", " \"" <> fullgroupname <> "\", " <> boundpar <> ");\n", " if (ierr < 0)\n", @@ -760,10 +761,10 @@ CreateMoLBoundariesSource[spec_] := "if (CCTK_EQUALS(" <> boundpar <> ", \"none\" ) ||\n", " CCTK_EQUALS(" <> boundpar <> ", \"static\") ||\n", " CCTK_EQUALS(" <> boundpar <> ", \"flat\" ) ||\n", - " CCTK_EQUALS(" <> boundpar <> ", \"zero\" ) ) \n", + " CCTK_EQUALS(" <> boundpar <> ", \"zero\" ) )\n", "{\n", - " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, 1, -1, \n", + " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, boundary_width, -1,\n", " \"" <> fullgfname <> "\", " <> boundpar <> ");\n", " if (ierr < 0)\n", @@ -796,7 +797,7 @@ CreateMoLBoundariesSource[spec_] := " CCTK_WARN(0, \"could not set SPEED value in table!\");\n", "\n", - " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, 1, "<>myhandle<>", \n", + " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, boundary_width, "<>myhandle<>", \n", " \"" <> fullgroupname <> "\", \"Radiation\");\n\n", " if (ierr < 0)\n", @@ -830,7 +831,7 @@ CreateMoLBoundariesSource[spec_] := " CCTK_WARN(0, \"could not set SPEED value in table!\");\n", "\n", - " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, 1, "<>myhandle<>", \n", + " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, boundary_width, "<>myhandle<>", \n", " \"" <> fullgfname <> "\", \"Radiation\");\n\n", " if (ierr < 0)\n", @@ -859,7 +860,7 @@ CreateMoLBoundariesSource[spec_] := " CCTK_WARN(0, \"could not set SCALAR value in table!\");\n", "\n", - " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, 1, "<>myhandle<>", \n", + " ierr = Boundary_SelectGroupForBC(cctkGH, CCTK_ALL_FACES, boundary_width, "<>myhandle<>", \n", " \"" <> fullgroupname <> "\", \"scalar\");\n\n", " if (ierr < 0)\n", @@ -889,7 +890,7 @@ CreateMoLBoundariesSource[spec_] := " CCTK_WARN(0, \"could not set SCALAR value in table!\");\n", "\n", - " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, 1, "<>myhandle<>", \n", + " ierr = Boundary_SelectVarForBC(cctkGH, CCTK_ALL_FACES, boundary_width, "<>myhandle<>", \n", " \"" <> fullgfname <> "\", \"scalar\");\n\n", " if (ierr < 0)\n", -- cgit v1.2.3 From 0101aa58b9485742a3b688ccfdc1863a9409779f Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Tue, 30 Nov 2010 22:45:57 +0100 Subject: Check group storage --- .../KrancNumericalTools/GenericFD/src/GenericFD.c | 19 +++++++++++++++++++ .../KrancNumericalTools/GenericFD/src/GenericFD.h | 3 +++ Tools/CodeGen/CalculationFunction.m | 21 ++++++++++++++++++++- Tools/CodeGen/Thorn.m | 2 +- 4 files changed, 43 insertions(+), 2 deletions(-) (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c index ba83b89..9084f65 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c @@ -503,3 +503,22 @@ void GenericFD_PenaltyPrim2Char(cGH const * restrict const cctkGH, int const dir return; } + +void GenericFD_AssertGroupStorage(cGH const * restrict const cctkGH, const char *calc, + int ngroups, const char *group_names[ngroups]) +{ + for (int i = 0; i < ngroups; i++) + { + int result = CCTK_QueryGroupStorage(cctkGH, group_names[i]); + if (result == 0) + { + CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, + "Error in %s: Group \"%s\" does not have storage", calc, group_names[i]); + } + else if (result < 0) + { + CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, + "Error in %s: Invalid group name \"%s\"", calc, group_names[i]); + } + } +} diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h index 041347d..1e4faff 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h @@ -688,6 +688,9 @@ void GenericFD_GetBoundaryInfo(cGH const * restrict cctkGH, int * restrict is_physbnd, int * restrict is_ipbnd); +void GenericFD_AssertGroupStorage(cGH const * restrict const cctkGH, const char *calc, + int ngroups, const char *group_names[]); + #if 0 /* Finite differencing near boundaries */ diff --git a/Tools/CodeGen/CalculationFunction.m b/Tools/CodeGen/CalculationFunction.m index d73adb6..4c4cd83 100644 --- a/Tools/CodeGen/CalculationFunction.m +++ b/Tools/CodeGen/CalculationFunction.m @@ -157,6 +157,23 @@ removeUnusedShorthands[calc_] := removeUnusedShorthands[newCalc], newCalc]]; +(* Return all the groups that are used in a given calculation *) +groupsInCalculation[calc_, imp_] := + Module[{groups,gfs,eqs,gfsUsed, groupNames}, + groups = lookup[calc, Groups]; + gfs = allGroupVariables[groups]; + eqs = lookup[calc, Equations]; + gfsUsed = Union[Cases[eqs, _ ? (MemberQ[gfs,#] &), Infinity]]; + groupNames = containingGroups[gfsUsed, groups]; + Map[qualifyGroupName[#, imp] &, groupNames]]; + +CheckGroupStorage[groupNames_, calcName_] := + Module[{}, + {"\nconst char *groups[] = {", + Riffle[Map[Quote,groupNames], ","], + "};\n", + "GenericFD_AssertGroupStorage(cctkGH, ", Quote[calcName],", ", Length[groupNames], ", groups);\n"}]; + (* -------------------------------------------------------------------------- Variables -------------------------------------------------------------------------- *) @@ -324,7 +341,7 @@ pdCanonicalOrdering[name_[inds___] -> x_] := Options[CreateCalculationFunction] = ThornOptions; -CreateCalculationFunction[calc_, debug_, useCSE_, opts:OptionsPattern[]] := +CreateCalculationFunction[calc_, debug_, useCSE_, imp_, opts:OptionsPattern[]] := Module[{gfs, allSymbols, knownSymbols, shorts, eqs, parameters, functionName, dsUsed, groups, pddefs, cleancalc, eqLoop, where, @@ -413,6 +430,8 @@ CreateCalculationFunction[calc_, debug_, useCSE_, opts:OptionsPattern[]] := ConditionalOnParameterTextual["cctk_iteration % " <> functionName <> "_calc_every != " <> functionName <> "_calc_offset", "return;\n"], + CheckGroupStorage[groupsInCalculation[cleancalc, imp], functionName], + If[haveCondTextuals, Map[ConditionalOnParameterTextual["!(" <> # <> ")", "return;\n"] &,condTextuals], {}], CommentedBlock["Include user-supplied include files", diff --git a/Tools/CodeGen/Thorn.m b/Tools/CodeGen/Thorn.m index 73f7e99..29a2307 100644 --- a/Tools/CodeGen/Thorn.m +++ b/Tools/CodeGen/Thorn.m @@ -517,7 +517,7 @@ CreateSetterSource[calcs_, debug_, useCSE_, include_, imp_, CalculationBoundariesFunction[First[calcs], imp], - Map[CreateCalculationFunction[# , debug, useCSE, opts] &, + Map[CreateCalculationFunction[# , debug, useCSE, imp, opts] &, calcs]}]; -- cgit v1.2.3 From a41bfbe853bf083bdc27f46a88e2a09252d0925d Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Mon, 6 Dec 2010 08:06:35 -0600 Subject: Rewrite vectorisation infrastructure: Use "k" prefix more consistently in arithmetic macros. Improve vector code generation patterns. Move all vectorisation run-time out of Kranc and into a new thorn LSUThorns/Vectors, so that non-Kranc thorns can also use it. --- .../KrancNumericalTools/GenericFD/interface.ccl | 1 - .../KrancNumericalTools/GenericFD/src/GenericFD.h | 8 + .../GenericFD/src/MathematicaCompat.h | 7 +- .../GenericFD/src/Vectors-SSE2-direct.hh | 135 ----- .../GenericFD/src/Vectors-SSE2.hh | 194 ------- .../GenericFD/src/Vectors-VSX-direct.hh | 111 ---- .../GenericFD/src/Vectors-VSX.hh | 212 -------- .../GenericFD/src/Vectors-default.hh | 31 -- .../GenericFD/src/Vectors-define.hh | 104 ---- .../GenericFD/src/Vectors-outdated.hh | 591 --------------------- .../GenericFD/src/Vectors-pseudo.hh | 72 --- .../GenericFD/src/Vectors-undefine.hh | 14 - .../KrancNumericalTools/GenericFD/src/Vectors.hh | 52 -- .../GenericFD/src/old/Vectors-SSE2-direct.hh | 135 +++++ .../GenericFD/src/old/Vectors-SSE2.hh | 201 +++++++ .../GenericFD/src/old/Vectors-VSX-direct.hh | 111 ++++ .../GenericFD/src/old/Vectors-VSX.hh | 212 ++++++++ .../GenericFD/src/old/Vectors-default.hh | 31 ++ .../GenericFD/src/old/Vectors-define.hh | 104 ++++ .../GenericFD/src/old/Vectors-outdated.hh | 591 +++++++++++++++++++++ .../GenericFD/src/old/Vectors-pseudo.hh | 72 +++ .../GenericFD/src/old/Vectors-undefine.hh | 14 + .../GenericFD/src/old/Vectors.hh | 52 ++ Tools/CodeGen/CodeGen.m | 69 ++- Tools/CodeGen/Differencing.m | 20 +- Tools/CodeGen/Interface.m | 3 +- Tools/CodeGen/Kranc.m | 3 +- Tools/CodeGen/KrancThorn.m | 2 +- Tools/CodeGen/Thorn.m | 23 +- 29 files changed, 1607 insertions(+), 1568 deletions(-) delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh create mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/interface.ccl b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/interface.ccl index efbdaa6..ccc3785 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/interface.ccl +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/interface.ccl @@ -7,7 +7,6 @@ implements: GenericFD INCLUDE HEADER: GenericFD.h in GenericFD.h -INCLUDE HEADER: Vectors.hh in Vectors.hh INCLUDE HEADER: sbp_calc_coeffs.h in sbp_calc_coeffs.h USES INCLUDE: Boundary.h diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h index 568c70f..7d8a13d 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h @@ -28,10 +28,18 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include "cctk.h" + #ifdef __cplusplus extern "C" { #endif +#ifdef __cplusplus +# ifdef CCTK_CXX_RESTRICT +# define restrict CCTK_CXX_RESTRICT +# endif +#endif + #ifndef NOPRECOMPUTE #define PRECOMPUTE #endif diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h index ee6a3b7..2f00da9 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h @@ -31,13 +31,15 @@ #define Tanh(x) (tanh(x)) #ifdef KRANC_C -# define Sign(x) ((x)<0?-1:+1) +# define Sign(x) (copysign(1.0,(x))) # define ToReal(x) ((CCTK_REAL)(x)) #else # define Sign(x) (sgn(x)) # define ToReal(x) (real((x),kind(khalf))) #endif +#if 0 + /* TODO: use fma(x,y,z) to implement fmadd and friends? Note that fma may be unsupported, or may be slow. */ @@ -46,6 +48,7 @@ /* #define fnmadd(x,y,z) (-(z)-(x)*(y)) */ /* #define fnmsub(x,y,z) (+(z)-(x)*(y)) */ +#define fpos(x) (+(x)) #define fneg(x) (-(x)) #define fmul(x,y) ((x)*(y)) #define fdiv(x,y) ((x)/(y)) @@ -65,6 +68,8 @@ #define kpow(x,y) (pow(x,y)) #define ksqrt(x) (sqrt(x)) +#endif + #ifdef KRANC_C # define E M_E # define Pi M_PI diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh deleted file mode 100644 index 12cd6e8..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2-direct.hh +++ /dev/null @@ -1,135 +0,0 @@ -// Vectorise using Intel's or AMD's SSE2 - -// Use the type __m128d directly, without introducing a wrapper class -// Use macros instead of inline functions - - - -#include - -// Vector type corresponding to CCTK_REAL -typedef __m128d CCTK_REAL_VEC; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements - -#define vec_set1(a) (_mm_set1_pd(a)) -#define vec_set(a,b) (_mm_set_pd(b,a)) - -// Get a scalar from the vector -#if defined(__PGI) && defined (__amd64__) -// _mm_cvtsd_f64 does not exist on PGI compilers -// # define vec_elt0(x) (*(CCTK_REAL const*)&(x)) -# define vec_elt0(x) ({ CCTK_REAL a_elt0; asm ("" : "=x" (a_elt0) : "0" (x)); a_elt0; }) -#else -// this is a no-op -# define vec_elt0(x) (_mm_cvtsd_f64(x)) -#endif -#define vec_elt1(x_) ({ CCTK_REAL_VEC const x_elt1=(x_); vec_elt0(_mm_unpackhi_pd(x_elt1,x_elt1)); }) - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -#define vec_load(p) (_mm_load_pd(&(p))) -#define vec_loadu(p) (_mm_loadu_pd(&(p))) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: Always use unaligned load -#define vec_loadu_maybe(off,p) (vec_loadu(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) (vec_loadu(p)) -#if 0 -#define vec_loadu_maybe(off,p) \ - (!((off)&(CCTK_REAL_VEC_SIZE-1)) ? \ - vec_load(p) : vec_loadu(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) \ - (!((off1)&(CCTK_REAL_VEC_SIZE-1)) && \ - !((off2)&(CCTK_REAL_VEC_SIZE-1)) && \ - !((off3)&(CCTK_REAL_VEC_SIZE-1)) ? \ - vec_load(p) : vec_loadu(p)) -#endif - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -#define vec_store(p,x) (_mm_store_pd(&(p),x)) -#define vec_storeu(p,x) (_mm_storeu_pd(&(p),x)) -#if defined(KRANC_CACHE) -# define vec_store_nta(p,x) (_mm_stream_pd(&(p),x)) -#else -# define vec_store_nta(p,x) (_mm_store_pd(&(p),x)) -#endif - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -#define vec_store_nta_partial_lo(p,x,n) (_mm_storel_pd(&(p),x)) -#define vec_store_nta_partial_hi(p,x,n) (_mm_storeh_pd((&(p))+1,x)) - - - -// Functions and operators - -// Operators -#undef fneg -#undef fmul -#undef fdiv -#undef fadd -#undef fsub -#if defined(__PGI) && defined (__amd64__) -// The PGI compiler does not understand __m128d literals -static union { - unsigned long long s[CCTK_REAL_VEC_SIZE]; - CCTK_REAL_VEC v; -} vec_neg_mask_impl = {0x8000000000000000ULL, 0x8000000000000000ULL}; -# define vec_neg_mask (vec_neg_mask_impl.v) -#else -# define vec_neg_mask ((CCTK_REAL_VEC)(__m128i){0x8000000000000000ULL, 0x8000000000000000ULL}) -#endif -#define fneg(x) (_mm_xor_pd(x,vec_neg_mask)) -#define fmul(x,y) (_mm_mul_pd(x,y)) -#define fdiv(x,y) (_mm_div_pd(x,y)) -#define fadd(x,y) (_mm_add_pd(x,y)) -#define fsub(x,y) (_mm_sub_pd(x,y)) - -// Cheap functions -#undef kfabs -#undef kfmax -#undef kfmin -#undef ksqrt -#if defined(__PGI) && defined (__amd64__) -// The PGI compiler does not understand __m128d literals -static union { - unsigned long long s[CCTK_REAL_VEC_SIZE]; - CCTK_REAL_VEC v; -} vec_fabs_mask_impl = {0x7fffffffffffffffULL, 0x7fffffffffffffffULL}; -# define vec_fabs_mask (vec_fabs_mask_impl.v) -#else -# define vec_fabs_mask ((CCTK_REAL_VEC)(__m128i){0x7fffffffffffffffULL, 0x7fffffffffffffffULL}) -#endif -#define kfabs(x) (_mm_and_pd(x,vec_fabs_mask)) -#define kfmax(x,y) (_mm_max_pd(x,y)) -#define kfmin(x,y) (_mm_min_pd(x,y)) -#define ksqrt(x) (_mm_sqrt_pd(x)) - -// Expensive functions -#undef kexp -#undef klog -#undef kpow -#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) -#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) -#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) - - - -#undef Sign -#define Sign(x) (42) - -#undef ToReal -#define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh deleted file mode 100644 index 4a4eea6..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-SSE2.hh +++ /dev/null @@ -1,194 +0,0 @@ -// Vectorise using Intel's or AMD's SSE2 - - - -#include - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef __m128d V; - V v; - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements - -DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) -DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) - -// Get a scalar from the vector -#if defined(__PGI) && defined (__amd64__) -// _mm_cvtsd_f64 does not exist on PGI compilers -// DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; _mm_store_sd(&a,x); a; })) -// DEFINE_FUNCTION_V_R(vec_elt0,(*(CCTK_REAL const*)&x)) -// This generates the fastest code with PGI compilers -DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; asm ("" : "=x" (a) : "0" (x)); a; })) -#else -DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) // this is a no-op -#endif -DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_unpackhi_pd(x,x))) - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -// Call the implementation with the modulus -#define vec_loadu_maybe(off,p) \ - (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) -#define vec_loadu_maybe3(off1,off2,off3,p) \ - (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ - (off2)&(CCTK_REAL_VEC_SIZE-1), \ - (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_storeu,_mm_storeu_pd(&p,x)) -#if defined(KRANC_CACHE) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) -#else -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_store_pd(&p,x)) -#endif - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: _mm_storel_pd(&p,x); break; - default: assert(0); - } -} -static inline -void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: _mm_storeh_pd((&p)+1,x); break; - default: assert(0); - } -} - - - -// Functions and operators - -// Single-argument operators -#if 0 -DEFINE_FUNCTION_V_V(operator+,x) -static CCTK_REAL_VEC const vec_neg_mask = - (CCTK_REAL_VEC::V)(__m128i) { 0x8000000000000000ULL, 0x8000000000000000ULL }; -DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask)) -#endif -DEFINE_FUNCTION_V_V(operator+,+x.v) -DEFINE_FUNCTION_V_V(operator-,-x.v) - -// Double-argument operators, both vectors -#if 0 -DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) -#endif -DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) -DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) -DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) -DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Cheap functions -#if defined(__PGI) && defined (__amd64__) -// The PGI compiler does not understand __m128d literals -static union { - CCTK_REAL_VEC::S s[CCTK_REAL_VEC_SIZE]; - CCTK_REAL_VEC::V v; -} vec_fabs_mask_impl = { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; -# define vec_fabs_mask (vec_fabs_mask_impl.v) -#else -static CCTK_REAL_VEC const vec_fabs_mask = - (CCTK_REAL_VEC::V)(__m128i) { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; -#endif -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) - - - -#undef Sign -#define Sign(x) (42) - -#undef ToReal -#define ToReal(x) vec_set1(x) - -#if defined(__PGI) && defined (__amd64__) -// Special case for PGI 9.0.4 to avoid an internal compiler error -#undef IfThen -static inline -CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) -{ - union { - __m128i vi; - CCTK_REAL_VEC::V v; - } mask; - mask.vi = _mm_set1_epi64x(-(long long)cond); - return _mm_or_pd(_mm_and_pd(x.v, mask.v), _mm_andnot_pd(mask.v, y.v)); -} -#endif diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh deleted file mode 100644 index 7e06017..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX-direct.hh +++ /dev/null @@ -1,111 +0,0 @@ -// Vectorise using IBM's Altivec - -// Use the type vector double directly, without introducing a wrapper class -// Use macros instead of inline functions - - - -#include - -// Vector type corresponding to CCTK_REAL -typedef vector double CCTK_REAL_VEC; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements - -#define vec_set1(a) (vec_splats(a)) -#if defined(__GNUC__) -// GNU doesn't support array indices on vectors -union vec_mask { - double elts[2]; - vector double v; -}; -# define vec_set(a,b) ({ vec_mask x_set; x_set.elts[0]=(a); x_set.elts[1]=(b); x_set.v; }) -#else -# define vec_set(a,b) ({ CCTK_REAL_VEC x_set; x_set[0]=(a); x_set[1]=(b); x_set; }) -#endif - -// Get a scalar from the vector -#if defined(__GNUC__) -// GNU doesn't support array indices on vectors -# define vec_elt0(x) ({ vec_mask x_elt0; x_elt0.v=(x); x_elt0.elts[0]; }) -# define vec_elt1(x) ({ vec_mask x_elt1; x_elt1.v=(x); x_elt1.elts[1]; }) -#else -# define vec_elt0(x) ((x)[0]) -# define vec_elt1(x) ((x)[1]) -#endif - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -#define vec_load(p) (*(CCTK_REAL_VEC const*)&(p)) -#define vec_loadu(p) (vec_load(p)) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -#define vec_loadu_maybe(off,p) (vec_load(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -#define vec_store(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) -#define vec_storeu(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) -// TODO: Use stvxl instruction? -#define vec_store_nta(p,x) vec_store(p,x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -#define vec_store_nta_partial_lo(p,x,n) ((p)=vec_elt0(x)) -#define vec_store_nta_partial_hi(p,x,n) ((&(p))[1]=vec_elt1(x)) - - - -// Functions and operators - -// Other Altivec functions are: -// nabs: -abs a -// madd msub nmadd nmsub: [+-]a*b[+-]c - -// Triple-argument operators, all vectors -#undef fmadd -#undef fmsub -#undef fnmadd -#undef fnmsub -#define fmadd(x,y,z) (vec_madd(x,y,z)) -#define fmsub(x,y,z) (vec_msub(x,y,z)) -#define fnmadd(x,y,z) (vec_nmadd(x,y,z)) -#define fnmsub(x,y,z) (vec_nmsub(x,y,z)) - -// Cheap functions -#undef kfabs -#undef kfmax -#undef kfmin -#define kfabs(x) (vec_abs(x)) -#define kfmax(x,y) (vec_max(x,y)) -#define kfmin(x,y) (vec_min(x,y)) - -// Expensive functions -#undef kexp -#undef klog -#undef kpow -#undef ksqrt -#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) -#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) -#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) -#define ksqrt(x_) ({ CCTK_REAL_VEC const x_sqrt=(x_); vec_set(sqrt(vec_elt0(x_sqrt)),sqrt(vec_elt1(x_sqrt))); }) - - - -#undef Sign -#define Sign(x) (42) - -#undef ToReal -#define ToReal(x) (vec_set1((CCTK_REAL)(x))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh deleted file mode 100644 index f591647..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-VSX.hh +++ /dev/null @@ -1,212 +0,0 @@ -// Vectorise using IBM's Altivec - - - -#include - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef vector double V; - V v; - union vec_mask { - S elts[2]; - V v; - }; - - // Set a vector from scalars -#if 0 - // IBM - inline CCTK_REAL_VEC(S const a, S const b) { v[0]=a; v[1]=b; } -#endif -#if 0 - inline CCTK_REAL_VEC(S const a, S const b): - v(vec_mergel(vec_splats(a), vec_splats(b))) { } -#endif - inline CCTK_REAL_VEC(S const a, S const b) - { - vec_mask x; - x.elts[0] = a; - x.elts[1] = b; - v = x.v; - } - - // Set a vector from a scalar, replicating the scalar - // Note: Could also use vec_xlds instead - inline CCTK_REAL_VEC(S const a): v(vec_splats(a)) { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements -DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) -DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) - -// Get a scalar from the vector -#if 0 -// IBM -DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) -DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) -#endif -static inline CCTK_REAL vec_elt0(CCTK_REAL_VEC const x) -{ - CCTK_REAL_VEC::vec_mask x1; - x1.v = x; - return x1.elts[0]; -} -static inline CCTK_REAL vec_elt1(CCTK_REAL_VEC const x) -{ - CCTK_REAL_VEC::vec_mask x1; - x1.v = x; - return x1.elts[1]; -} - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,p) -#if 0 -// IBM -DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast(&p))) -#endif -DEFINE_FUNCTION_PR_V(vec_loadu,p) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -// Call the implementation with the modulus -#define vec_loadu_maybe(off,p) \ - (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) -#define vec_loadu_maybe3(off1,off2,off3,p) \ - (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ - (off2)&(CCTK_REAL_VEC_SIZE-1), \ - (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) -DEFINE_FUNCTION_PRV(vec_storeu,*(CCTK_REAL_VEC::V*)&p=x) -// TODO: Use stvxl instruction? -DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: p=vec_elt0(x); break; - default: assert(0); - } -} -static inline -void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: (&p)[1]=vec_elt1(x); break; - default: assert(0); - } -} - - - -// Functions and operators - -// Other Altivec functions are: -// nabs: -abs a -// madd msub nmadd nmsub: [+-]a*b[+-]c - -// Single-argument operators -#if 0 -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) -#endif -DEFINE_FUNCTION_V_V(operator+,+x.v) -DEFINE_FUNCTION_V_V(operator-,-x.v) - -// Double-argument operators, both vectors -#if 0 -DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) -DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) -DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) -DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) -#endif -DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) -DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) -DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) -DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Triple-argument operators, all vectors -#undef fmadd -#undef fmsub -#undef fnmadd -#undef fnmsub -DEFINE_FUNCTION_VVV_V(fmadd,vec_madd(x.v,y.v,z.v)) -DEFINE_FUNCTION_VVV_V(fmsub,vec_msub(x.v,y.v,z.v)) -DEFINE_FUNCTION_VVV_V(fnmadd,vec_nmadd(x.v,y.v,z.v)) -DEFINE_FUNCTION_VVV_V(fnmsub,vec_nmsub(x.v,y.v,z.v)) - -// Cheap functions -DEFINE_FUNCTION_V_V(fabs,vec_abs(x.v)) -DEFINE_FUNCTION_VV_V(fmax,vec_max(x.v,y.v)) -DEFINE_FUNCTION_VV_V(fmin,vec_min(x.v,y.v)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) -DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) - - - -#undef Sign -#define Sign(x) (42) - -#undef ToReal -#define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh deleted file mode 100644 index f928ed8..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-default.hh +++ /dev/null @@ -1,31 +0,0 @@ -// Fallback vectorisation implementation: Do not vectorise - - - -// Use CCTK_REAL -typedef CCTK_REAL CCTK_REAL_VEC; - -// Number of vector elements in a CCTK_REAL_VEC -static int const CCTK_REAL_VEC_SIZE = 1; - - - -// We use macros here, so that we are not surprised by compilers which -// don't like to inline functions (e.g. PGI). This should also make -// debug builds (which may not inline) more efficient. - -#define vec_load(p) (p) -#define vec_loadu(p) (p) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -#define vec_loadu_maybe(off,p) (p) -#define vec_loadu_maybe3(off1,off2,off3,p) (p) - -#define vec_store(p,x) ((p)=(x)) -#define vec_store_nta(p,x) ((p)=(x)) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -#define vec_store_nta_partial_lo(p,x,n) (assert(0)) -#define vec_store_nta_partial_hi(p,x,n) (assert(0)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh deleted file mode 100644 index f5c0b22..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-define.hh +++ /dev/null @@ -1,104 +0,0 @@ -// Define some macros that simplify defining short function that are -// supposed to be inlined - - - -// Letters defining the prototype (argument and return value types): -// I: i,j: integer -// R: a,b: real -// V: x,y: vector (of real) -// P: p,q: pointer (i.e. const reference) to something -// L: l,m: L-value (i.e. non-const reference) to something - - - -// Load and store - -#define DEFINE_FUNCTION_PR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& p) \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_PRV(name,expr) \ -static inline \ -void name (CCTK_REAL& p, CCTK_REAL_VEC const x) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_PVR(name,expr) \ -static inline \ -void name (CCTK_REAL_VEC& p, CCTK_REAL const a) \ -{ \ - expr; \ -} - - - -// Functions and operators - -#define DEFINE_FUNCTION_V_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_V_R(name,expr) \ -static inline \ -CCTK_REAL name (CCTK_REAL_VEC const x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_R_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL const a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL_VEC const x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL const b) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VVV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y, CCTK_REAL_VEC const z) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh deleted file mode 100644 index df83b3a..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-outdated.hh +++ /dev/null @@ -1,591 +0,0 @@ -#ifndef VECTORS_HH -#define VECTORS_HH - - - -// Vectorisation - -#include -#include -#include - -#include - - - -// I: i,j: integer -// R: a,b: real -// V: x,y: vector (of real) -// P: p,q: pointer (i.e. const reference) to something -// L: l,m: L-value (i.e. non-const reference) to something - -#define DEFINE_FUNCTION_PR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& p) \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_PRV(name,expr) \ -static inline \ -void name (CCTK_REAL& p, CCTK_REAL_VEC const& x) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_PVR(name,expr) \ -static inline \ -void name (CCTK_REAL_VEC& p, CCTK_REAL const& a) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_V_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_V_R(name,expr) \ -static inline \ -CCTK_REAL name (CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_R_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL_VEC const& y) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL const& a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL const& b) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - - - -// Intel, double -#if defined(KRANC_VECTORS) && defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8) - -#include - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef __m128d V; - V v; - - // Set a vector from scalars - inline CCTK_REAL_VEC(S const& a, S const& b): v(_mm_set_pd(b,a)) { } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const& v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -union vec_mask { - unsigned long long bits[2]; - CCTK_REAL_VEC::V v; -}; - -DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) -DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) - -// Get a scalar from the vector -#if defined(__PGI) && defined (__amd64__) -// _mm_cvtsd_f64 does not exist on PGI compilers -static inline -CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) -{ - CCTK_REAL a; _mm_store_sd(&a,x); return a; -} -#else -DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) //this is a no-op -#endif - -#if 0 -DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) -#endif -static inline -CCTK_REAL vec_elt1 (CCTK_REAL_VEC const& x) -{ - CCTK_REAL a; _mm_storeh_pd(&a,x); return a; -} - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) - -#if 0 -// Load a partial vector (duplicating the last loaded element to fill -// the remaining elements) -// TODO: Should this be aligned or unaligned? -static inline -CCTK_REAL_VEC vec_load_partial (CCTK_REAL const& p, int const n) -{ - switch (n) { - case 1: return _mm_load1_pd(p); - default: assert(0); - } -} -#endif - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -// Call the implementation with the modulus -template -static inline -CCTK_REAL_VEC vec_loadu_maybe (CCTK_REAL const& p) -{ - return vec_loadu_maybe_impl(p); -} -template -static inline -CCTK_REAL_VEC vec_loadu_maybe3 (CCTK_REAL const& p) -{ - return vec_loadu_maybe_impl3(p); -} - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: _mm_storel_pd(&p,x); break; - default: assert(0); - } -} -static inline -void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: _mm_storeh_pd((&p)+1,x); break; - default: assert(0); - } -} - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -#if 0 -DEFINE_FUNCTION_V_V(operator-,vec_set(0.0,0.0)-x) -#endif -static vec_mask const vec_neg_mask = -{ { 0x8000000000000000ULL, 0x8000000000000000ULL } }; -DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask.v)) - -// Cheap functions -static vec_mask const vec_fabs_mask = -{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) - -// Special case for PGI to avoid internal compiler error -#if defined(__PGI) && defined (__amd64__) -#undef IfThen -CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const& x, CCTK_REAL_VEC co\ -nst& y) -{ - return cond*x + (not cond)*y; -} -#endif - - - -#if 0 -// Try to use the __m128d type directly. - -// This does not really work, because it is not possible to define -// automatic conversion operators from double to __m128d, so that -// explicit conversions are required. This makes the code look more -// clumsy. - -// Vector type corresponding to CCTK_REAL -typedef __m128d CCTK_REAL_VEC; - -DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) -DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) - -// Get a scalar from the vector -static inline -CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) -{ -#if 0 - // _mm_cvtsd_f64 does not exist on PGI compilers - return _mm_cvtsd_f64(x); // this is a no-op -#endif - CCTK_REAL a; _mm_store_sd(&a,x); return a; -} - -DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) - -// Cheap functions -static vec_mask const vec_fabs_mask = -{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) - -#endif - - - -// Intel, float -#elif defined(KRANC_VECTORS) && defined(__SSE__) && defined(CCTK_REAL_PRECISION_4) - -#include - -// A vector type corresponding to CCTK_REAL -typedef __m128 CCTK_REAL_VEC; - - - -// Power, double -#elif defined(KRANC_VECTORS) && defined(__ALTIVEC__) && defined(_ARCH_PWR7) && defined(CCTK_REAL_PRECISION_8) - -#include - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef vector double V; - V v; - - // vec_insert, vec_extract, vec_splat - - // Set a vector from scalars - inline CCTK_REAL_VEC(S const& a, S const& b) { v[0]=a; v[1]=b; } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(S const& a): v(vec_splats(a)) { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const& v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) -DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) - -// Get a scalar from the vector -DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) -DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,p) -DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast(&p))) -// vec_xlds - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -#define static -template<> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl<0>,vec_load(p)) -#undef static -// Call the implementation with the modulus -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe,vec_loadu_maybe_impl(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: p=x.v[0]; break; - default: assert(0); - } -} -static inline -void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: (&p)[1]=x.v[1]; break; - default: assert(0); - } -} - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) -DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) -DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) -DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) - -// Cheap functions -DEFINE_FUNCTION_V_V(fabs,vec_abs(x)) -DEFINE_FUNCTION_VV_V(fmax,vec_max(x,y)) -DEFINE_FUNCTION_VV_V(fmin,vec_min(x,y)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) -DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) - - - -// Fallback: pseudo-vectorisation -#elif 0 - -// There is no vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - CCTK_REAL v, w; - - // Set a vector from scalars - inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x.v), w(x.w) { } -}; - - - -DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) -DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -template -DEFINE_FUNCTION_PR_V(vec_loadm,vec_load(p)) - -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v+y.v,x.w+y.w)) -DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v-y.v,x.w-y.w)) -DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v*y.v,x.w*y.w)) -DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v/y.v,x.w/y.w)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v+a,x.w+a)) -DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v-a,x.w-a)) -DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v*a,x.w*a)) -DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v/a,x.w/a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v,a+x.w)) -DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v,a-x.w)) -DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v,a*x.w)) -DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v,a/x.w)) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v,-x.w)) - -// Cheap functions -DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v),fabs(x.w))) -DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v,y.v),fmax(x.w,y.w))) -DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v,y.v),fmin(x.w,y.w))) -DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v),sqrt(x.w))) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v),exp(x.w))) -DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v),log(x.w))) -DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v,a),pow(x.w,a))) - - - -// Fallback: no vectorisation -#else - -// There is no vector type corresponding to CCTK_REAL -typedef CCTK_REAL CCTK_REAL_VEC; - - - -DEFINE_FUNCTION_PR_V(vec_load,p) -DEFINE_FUNCTION_PR_V(vec_loadu,p) -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe,p) -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe3,p) - -DEFINE_FUNCTION_PRV(vec_store,p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,p=x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - assert(0); -} -static inline -void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - assert(0); -} - - - -#endif - - - -#undef DEFINE_FUNCTION_PR_V -#undef DEFINE_FUNCTION_PRV -#undef DEFINE_FUNCTION_V_V -#undef DEFINE_FUNCTION_R_V -#undef DEFINE_FUNCTION_VV_V -#undef DEFINE_FUNCTION_VR_V -#undef DEFINE_FUNCTION_RV_V -#undef DEFINE_FUNCTION_RR_V - - - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -#endif // #ifndef VECTORS_HH diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh deleted file mode 100644 index f439c9b..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-pseudo.hh +++ /dev/null @@ -1,72 +0,0 @@ -// Pseudo vectorisation using scalar operations - - - -// Number of vector elements in a CCTK_REAL_VEC -static int const CCTK_REAL_VEC_SIZE = 2; - -// There is no vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - CCTK_REAL v[CCTK_REAL_VEC_SIZE]; - - // Set a vector from scalars - inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x) { v[0]=x.v[0]; v[1]=x.v[1]; } -}; - - - -// Load and store vectors - -DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) -DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -#define vec_loadu_maybe(off,p) (vec_load(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) - -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) - - - -// Functions and operators - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v[0]+y.v[0],x.v[1]+y.v[1])) -DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v[0]-y.v[0],x.v[1]-y.v[1])) -DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v[0]*y.v[0],x.v[1]*y.v[1])) -DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v[0]/y.v[0],x.v[1]/y.v[1])) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v[0]+a,x.v[1]+a)) -DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v[0]-a,x.v[1]-a)) -DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v[0]*a,x.v[1]*a)) -DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v[0]/a,x.v[1]/a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v[0],a+x.v[1])) -DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v[0],a-x.v[1])) -DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v[0],a*x.v[1])) -DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v[0],a/x.v[1])) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v[0],-x.v[1])) - -// Functions -DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v[0]),exp(x.v[1]))) -DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v[0]),fabs(x.v[1]))) -DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v[0],y.v[0]),fmax(x.v[1],y.v[1]))) -DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v[0],y.v[0]),fmin(x.v[1],y.v[1]))) -DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v[0]),log(x.v[1]))) -DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v[0],a),pow(x.v[1],a))) -DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v[0]),sqrt(x.v[1]))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh deleted file mode 100644 index 0d950c7..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors-undefine.hh +++ /dev/null @@ -1,14 +0,0 @@ -// Undefine all macros defined in "Vectors-define.hh", so that we -// leave a clean namespace - - - -#undef DEFINE_FUNCTION_PR_V -#undef DEFINE_FUNCTION_PRV -#undef DEFINE_FUNCTION_V_V -#undef DEFINE_FUNCTION_R_V -#undef DEFINE_FUNCTION_VV_V -#undef DEFINE_FUNCTION_VR_V -#undef DEFINE_FUNCTION_RV_V -#undef DEFINE_FUNCTION_RR_V -#undef DEFINE_FUNCTION_VVV_V diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh deleted file mode 100644 index d32afb2..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Vectors.hh +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef VECTORS_HH -#define VECTORS_HH - - - -// Vectorisation - -#include -#include -#include - -#include - - - -#include "Vectors-define.hh" - -#if defined(KRANC_VECTORS) -// Vectorise - -# if ! defined(CCTK_REAL_PRECISION_8) -# error "Vectorisation is currently only supported for double precision" -# endif - -# if defined(__SSE2__) // SSE2 (Intel) -# if defined(KRANC_DIRECT) -# include "Vectors-SSE2-direct.hh" -# else -# include "Vectors-SSE2.hh" -# endif -# elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Altivec (Power) -# if defined(KRANC_DIRECT) -# include "Vectors-VSX-direct.hh" -# else -# include "Vectors-VSX.hh" -# endif -# else -# include "Vectors-pseudo.hh" -# endif - -#else -// Don't vectorise - -# include "Vectors-default.hh" - -#endif - -#include "Vectors-undefine.hh" - - - -#endif // #ifndef VECTORS_HH diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh new file mode 100644 index 0000000..12cd6e8 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh @@ -0,0 +1,135 @@ +// Vectorise using Intel's or AMD's SSE2 + +// Use the type __m128d directly, without introducing a wrapper class +// Use macros instead of inline functions + + + +#include + +// Vector type corresponding to CCTK_REAL +typedef __m128d CCTK_REAL_VEC; + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +// Create vectors, extract vector elements + +#define vec_set1(a) (_mm_set1_pd(a)) +#define vec_set(a,b) (_mm_set_pd(b,a)) + +// Get a scalar from the vector +#if defined(__PGI) && defined (__amd64__) +// _mm_cvtsd_f64 does not exist on PGI compilers +// # define vec_elt0(x) (*(CCTK_REAL const*)&(x)) +# define vec_elt0(x) ({ CCTK_REAL a_elt0; asm ("" : "=x" (a_elt0) : "0" (x)); a_elt0; }) +#else +// this is a no-op +# define vec_elt0(x) (_mm_cvtsd_f64(x)) +#endif +#define vec_elt1(x_) ({ CCTK_REAL_VEC const x_elt1=(x_); vec_elt0(_mm_unpackhi_pd(x_elt1,x_elt1)); }) + + + +// Load and store vectors + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +#define vec_load(p) (_mm_load_pd(&(p))) +#define vec_loadu(p) (_mm_loadu_pd(&(p))) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: Always use unaligned load +#define vec_loadu_maybe(off,p) (vec_loadu(p)) +#define vec_loadu_maybe3(off1,off2,off3,p) (vec_loadu(p)) +#if 0 +#define vec_loadu_maybe(off,p) \ + (!((off)&(CCTK_REAL_VEC_SIZE-1)) ? \ + vec_load(p) : vec_loadu(p)) +#define vec_loadu_maybe3(off1,off2,off3,p) \ + (!((off1)&(CCTK_REAL_VEC_SIZE-1)) && \ + !((off2)&(CCTK_REAL_VEC_SIZE-1)) && \ + !((off3)&(CCTK_REAL_VEC_SIZE-1)) ? \ + vec_load(p) : vec_loadu(p)) +#endif + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +#define vec_store(p,x) (_mm_store_pd(&(p),x)) +#define vec_storeu(p,x) (_mm_storeu_pd(&(p),x)) +#if defined(KRANC_CACHE) +# define vec_store_nta(p,x) (_mm_stream_pd(&(p),x)) +#else +# define vec_store_nta(p,x) (_mm_store_pd(&(p),x)) +#endif + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +#define vec_store_nta_partial_lo(p,x,n) (_mm_storel_pd(&(p),x)) +#define vec_store_nta_partial_hi(p,x,n) (_mm_storeh_pd((&(p))+1,x)) + + + +// Functions and operators + +// Operators +#undef fneg +#undef fmul +#undef fdiv +#undef fadd +#undef fsub +#if defined(__PGI) && defined (__amd64__) +// The PGI compiler does not understand __m128d literals +static union { + unsigned long long s[CCTK_REAL_VEC_SIZE]; + CCTK_REAL_VEC v; +} vec_neg_mask_impl = {0x8000000000000000ULL, 0x8000000000000000ULL}; +# define vec_neg_mask (vec_neg_mask_impl.v) +#else +# define vec_neg_mask ((CCTK_REAL_VEC)(__m128i){0x8000000000000000ULL, 0x8000000000000000ULL}) +#endif +#define fneg(x) (_mm_xor_pd(x,vec_neg_mask)) +#define fmul(x,y) (_mm_mul_pd(x,y)) +#define fdiv(x,y) (_mm_div_pd(x,y)) +#define fadd(x,y) (_mm_add_pd(x,y)) +#define fsub(x,y) (_mm_sub_pd(x,y)) + +// Cheap functions +#undef kfabs +#undef kfmax +#undef kfmin +#undef ksqrt +#if defined(__PGI) && defined (__amd64__) +// The PGI compiler does not understand __m128d literals +static union { + unsigned long long s[CCTK_REAL_VEC_SIZE]; + CCTK_REAL_VEC v; +} vec_fabs_mask_impl = {0x7fffffffffffffffULL, 0x7fffffffffffffffULL}; +# define vec_fabs_mask (vec_fabs_mask_impl.v) +#else +# define vec_fabs_mask ((CCTK_REAL_VEC)(__m128i){0x7fffffffffffffffULL, 0x7fffffffffffffffULL}) +#endif +#define kfabs(x) (_mm_and_pd(x,vec_fabs_mask)) +#define kfmax(x,y) (_mm_max_pd(x,y)) +#define kfmin(x,y) (_mm_min_pd(x,y)) +#define ksqrt(x) (_mm_sqrt_pd(x)) + +// Expensive functions +#undef kexp +#undef klog +#undef kpow +#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) +#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) +#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) + + + +#undef Sign +#define Sign(x) (42) + +#undef ToReal +#define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh new file mode 100644 index 0000000..b74fac0 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh @@ -0,0 +1,201 @@ +// Vectorise using Intel's or AMD's SSE2 + + + +#include + +// Vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // Underlying scalar and vector types + typedef double S; + typedef __m128d V; + + // Payload + V v; + + // Empty constructur + inline CCTK_REAL_VEC() { } + + // Convert from and to the underlying vector type + inline CCTK_REAL_VEC(V const v_): v(v_) { } + inline operator V const() const { return v; } + + // Convert from the underlying scalar type + inline CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { } + inline CCTK_REAL_VEC(int const& a): v(_mm_set1_pd(S(a))) { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } +}; + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +// Create vectors, extract vector elements + +DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) +DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) + +// Get a scalar from the vector +#if defined(__PGI) && defined (__amd64__) +// _mm_cvtsd_f64 does not exist on PGI compilers +// DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; _mm_store_sd(&a,x); a; })) +// DEFINE_FUNCTION_V_R(vec_elt0,(*(CCTK_REAL const*)&x)) +// This generates the fastest code with PGI compilers +DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; asm ("" : "=x" (a) : "0" (x)); a; })) +#else +DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) // this is a no-op +#endif +DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_unpackhi_pd(x,x))) + + + +// Load and store vectors + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) +DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) +// Implementation: load aligned if the modulus is zero +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +// Call the implementation with the modulus +#define vec_loadu_maybe(off,p) \ + (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) +#define vec_loadu_maybe3(off1,off2,off3,p) \ + (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ + (off2)&(CCTK_REAL_VEC_SIZE-1), \ + (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) +DEFINE_FUNCTION_PRV(vec_storeu,_mm_storeu_pd(&p,x)) +#if defined(KRANC_CACHE) +DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) +#else +DEFINE_FUNCTION_PRV(vec_store_nta,_mm_store_pd(&p,x)) +#endif + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) +{ + switch (n) { + case 1: _mm_storel_pd(&p,x); break; + default: assert(0); + } +} +static inline +void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) +{ + switch (n) { + case 1: _mm_storeh_pd((&p)+1,x); break; + default: assert(0); + } +} + + + +// Functions and operators + +// Single-argument operators +#if 0 +DEFINE_FUNCTION_V_V(operator+,x) +static CCTK_REAL_VEC const vec_neg_mask = + (CCTK_REAL_VEC::V)(__m128i) { 0x8000000000000000ULL, 0x8000000000000000ULL }; +DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask)) +#endif +DEFINE_FUNCTION_V_V(operator+,+x.v) +DEFINE_FUNCTION_V_V(operator-,-x.v) + +// Double-argument operators, both vectors +#if 0 +DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) +#endif +DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) +DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) +DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) +DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) +DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) +DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) +DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) + +// Cheap functions +#if defined(__PGI) && defined (__amd64__) +// The PGI compiler does not understand __m128d literals +static union { + CCTK_REAL_VEC::S s[CCTK_REAL_VEC_SIZE]; + CCTK_REAL_VEC::V v; +} vec_fabs_mask_impl = { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; +# define vec_fabs_mask (vec_fabs_mask_impl.v) +#else +static CCTK_REAL_VEC const vec_fabs_mask = + (CCTK_REAL_VEC::V)(__m128i) { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; +#endif +DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask)) +DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) +DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) +DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) + + + +#undef Sign +#define Sign(x) (42) + +// #undef ToReal +// #define ToReal(x) vec_set1(x) + +#if defined(__PGI) && defined (__amd64__) +// Special case for PGI 9.0.4 to avoid an internal compiler error +#undef IfThen +static inline +CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) +{ + union { + __m128i vi; + CCTK_REAL_VEC::V v; + } mask; + mask.vi = _mm_set1_epi64x(-(long long)cond); + return _mm_or_pd(_mm_and_pd(x.v, mask.v), _mm_andnot_pd(mask.v, y.v)); +} +#endif diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh new file mode 100644 index 0000000..7e06017 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh @@ -0,0 +1,111 @@ +// Vectorise using IBM's Altivec + +// Use the type vector double directly, without introducing a wrapper class +// Use macros instead of inline functions + + + +#include + +// Vector type corresponding to CCTK_REAL +typedef vector double CCTK_REAL_VEC; + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +// Create vectors, extract vector elements + +#define vec_set1(a) (vec_splats(a)) +#if defined(__GNUC__) +// GNU doesn't support array indices on vectors +union vec_mask { + double elts[2]; + vector double v; +}; +# define vec_set(a,b) ({ vec_mask x_set; x_set.elts[0]=(a); x_set.elts[1]=(b); x_set.v; }) +#else +# define vec_set(a,b) ({ CCTK_REAL_VEC x_set; x_set[0]=(a); x_set[1]=(b); x_set; }) +#endif + +// Get a scalar from the vector +#if defined(__GNUC__) +// GNU doesn't support array indices on vectors +# define vec_elt0(x) ({ vec_mask x_elt0; x_elt0.v=(x); x_elt0.elts[0]; }) +# define vec_elt1(x) ({ vec_mask x_elt1; x_elt1.v=(x); x_elt1.elts[1]; }) +#else +# define vec_elt0(x) ((x)[0]) +# define vec_elt1(x) ((x)[1]) +#endif + + + +// Load and store vectors + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +#define vec_load(p) (*(CCTK_REAL_VEC const*)&(p)) +#define vec_loadu(p) (vec_load(p)) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +#define vec_loadu_maybe(off,p) (vec_load(p)) +#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +#define vec_store(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) +#define vec_storeu(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) +// TODO: Use stvxl instruction? +#define vec_store_nta(p,x) vec_store(p,x) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +#define vec_store_nta_partial_lo(p,x,n) ((p)=vec_elt0(x)) +#define vec_store_nta_partial_hi(p,x,n) ((&(p))[1]=vec_elt1(x)) + + + +// Functions and operators + +// Other Altivec functions are: +// nabs: -abs a +// madd msub nmadd nmsub: [+-]a*b[+-]c + +// Triple-argument operators, all vectors +#undef fmadd +#undef fmsub +#undef fnmadd +#undef fnmsub +#define fmadd(x,y,z) (vec_madd(x,y,z)) +#define fmsub(x,y,z) (vec_msub(x,y,z)) +#define fnmadd(x,y,z) (vec_nmadd(x,y,z)) +#define fnmsub(x,y,z) (vec_nmsub(x,y,z)) + +// Cheap functions +#undef kfabs +#undef kfmax +#undef kfmin +#define kfabs(x) (vec_abs(x)) +#define kfmax(x,y) (vec_max(x,y)) +#define kfmin(x,y) (vec_min(x,y)) + +// Expensive functions +#undef kexp +#undef klog +#undef kpow +#undef ksqrt +#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) +#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) +#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) +#define ksqrt(x_) ({ CCTK_REAL_VEC const x_sqrt=(x_); vec_set(sqrt(vec_elt0(x_sqrt)),sqrt(vec_elt1(x_sqrt))); }) + + + +#undef Sign +#define Sign(x) (42) + +#undef ToReal +#define ToReal(x) (vec_set1((CCTK_REAL)(x))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh new file mode 100644 index 0000000..3fc97f6 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh @@ -0,0 +1,212 @@ +// Vectorise using IBM's Altivec + + + +#include + +// Vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + typedef double S; + typedef vector double V; + V v; + union vec_mask { + S elts[2]; + V v; + }; + + // Set a vector from scalars +#if 0 + // IBM + inline CCTK_REAL_VEC(S const a, S const b) { v[0]=a; v[1]=b; } +#endif +#if 0 + inline CCTK_REAL_VEC(S const a, S const b): + v(vec_mergel(vec_splats(a), vec_splats(b))) { } +#endif + inline CCTK_REAL_VEC(S const a, S const b) + { + vec_mask x; + x.elts[0] = a; + x.elts[1] = b; + v = x.v; + } + + // Set a vector from a scalar, replicating the scalar + // Note: Could also use vec_xlds instead + inline CCTK_REAL_VEC(S const a): v(vec_splats(a)) { } + + // Convert from and to the underlying vector type + inline CCTK_REAL_VEC(V const v_): v(v_) { } + inline operator V const() const { return v; } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } +}; + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +// Create vectors, extract vector elements +DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) +DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) + +// Get a scalar from the vector +#if 0 +// IBM +DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) +DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) +#endif +static inline CCTK_REAL vec_elt0(CCTK_REAL_VEC const x) +{ + CCTK_REAL_VEC::vec_mask x1; + x1.v = x; + return x1.elts[0]; +} +static inline CCTK_REAL vec_elt1(CCTK_REAL_VEC const x) +{ + CCTK_REAL_VEC::vec_mask x1; + x1.v = x; + return x1.elts[1]; +} + + + +// Load and store vectors + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,p) +#if 0 +// IBM +DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast(&p))) +#endif +DEFINE_FUNCTION_PR_V(vec_loadu,p) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) +// Implementation: load aligned if the modulus is zero +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +// Call the implementation with the modulus +#define vec_loadu_maybe(off,p) \ + (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) +#define vec_loadu_maybe3(off1,off2,off3,p) \ + (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ + (off2)&(CCTK_REAL_VEC_SIZE-1), \ + (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) +DEFINE_FUNCTION_PRV(vec_storeu,*(CCTK_REAL_VEC::V*)&p=x) +// TODO: Use stvxl instruction? +DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) +{ + switch (n) { + case 1: p=vec_elt0(x); break; + default: assert(0); + } +} +static inline +void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) +{ + switch (n) { + case 1: (&p)[1]=vec_elt1(x); break; + default: assert(0); + } +} + + + +// Functions and operators + +// Other Altivec functions are: +// nabs: -abs a +// madd msub nmadd nmsub: [+-]a*b[+-]c + +// Single-argument operators +#if 0 +DEFINE_FUNCTION_V_V(operator+,x) +DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) +#endif +DEFINE_FUNCTION_V_V(operator+,+x.v) +DEFINE_FUNCTION_V_V(operator-,-x.v) + +// Double-argument operators, both vectors +#if 0 +DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) +DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) +DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) +DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) +#endif +DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) +DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) +DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) +DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) +DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) +DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) +DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) + +// Triple-argument operators, all vectors +#undef fmadd +#undef fmsub +#undef fnmadd +#undef fnmsub +DEFINE_FUNCTION_VVV_V(fmadd,vec_madd(x.v,y.v,z.v)) +DEFINE_FUNCTION_VVV_V(fmsub,vec_msub(x.v,y.v,z.v)) +DEFINE_FUNCTION_VVV_V(fnmadd,vec_nmadd(x.v,y.v,z.v)) +DEFINE_FUNCTION_VVV_V(fnmsub,vec_nmsub(x.v,y.v,z.v)) + +// Cheap functions +DEFINE_FUNCTION_V_V(fabs,vec_abs(x.v)) +DEFINE_FUNCTION_VV_V(fmax,vec_max(x.v,y.v)) +DEFINE_FUNCTION_VV_V(fmin,vec_min(x.v,y.v)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) +DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) + + + +#undef Sign +#define Sign(x) (42) + +// #undef ToReal +// #define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh new file mode 100644 index 0000000..f928ed8 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh @@ -0,0 +1,31 @@ +// Fallback vectorisation implementation: Do not vectorise + + + +// Use CCTK_REAL +typedef CCTK_REAL CCTK_REAL_VEC; + +// Number of vector elements in a CCTK_REAL_VEC +static int const CCTK_REAL_VEC_SIZE = 1; + + + +// We use macros here, so that we are not surprised by compilers which +// don't like to inline functions (e.g. PGI). This should also make +// debug builds (which may not inline) more efficient. + +#define vec_load(p) (p) +#define vec_loadu(p) (p) + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +#define vec_loadu_maybe(off,p) (p) +#define vec_loadu_maybe3(off1,off2,off3,p) (p) + +#define vec_store(p,x) ((p)=(x)) +#define vec_store_nta(p,x) ((p)=(x)) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +#define vec_store_nta_partial_lo(p,x,n) (assert(0)) +#define vec_store_nta_partial_hi(p,x,n) (assert(0)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh new file mode 100644 index 0000000..f5c0b22 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh @@ -0,0 +1,104 @@ +// Define some macros that simplify defining short function that are +// supposed to be inlined + + + +// Letters defining the prototype (argument and return value types): +// I: i,j: integer +// R: a,b: real +// V: x,y: vector (of real) +// P: p,q: pointer (i.e. const reference) to something +// L: l,m: L-value (i.e. non-const reference) to something + + + +// Load and store + +#define DEFINE_FUNCTION_PR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& p) \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_PRV(name,expr) \ +static inline \ +void name (CCTK_REAL& p, CCTK_REAL_VEC const x) \ +{ \ + expr; \ +} + +#define DEFINE_FUNCTION_PVR(name,expr) \ +static inline \ +void name (CCTK_REAL_VEC& p, CCTK_REAL const a) \ +{ \ + expr; \ +} + + + +// Functions and operators + +#define DEFINE_FUNCTION_V_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_V_R(name,expr) \ +static inline \ +CCTK_REAL name (CCTK_REAL_VEC const x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_R_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL const a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL_VEC const x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL const b) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VVV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y, CCTK_REAL_VEC const z) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh new file mode 100644 index 0000000..df83b3a --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh @@ -0,0 +1,591 @@ +#ifndef VECTORS_HH +#define VECTORS_HH + + + +// Vectorisation + +#include +#include +#include + +#include + + + +// I: i,j: integer +// R: a,b: real +// V: x,y: vector (of real) +// P: p,q: pointer (i.e. const reference) to something +// L: l,m: L-value (i.e. non-const reference) to something + +#define DEFINE_FUNCTION_PR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& p) \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_PRV(name,expr) \ +static inline \ +void name (CCTK_REAL& p, CCTK_REAL_VEC const& x) \ +{ \ + expr; \ +} + +#define DEFINE_FUNCTION_PVR(name,expr) \ +static inline \ +void name (CCTK_REAL_VEC& p, CCTK_REAL const& a) \ +{ \ + expr; \ +} + +#define DEFINE_FUNCTION_V_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const& x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_V_R(name,expr) \ +static inline \ +CCTK_REAL name (CCTK_REAL_VEC const& x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_R_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL_VEC const& y) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_VR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL const& a) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RV_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL_VEC const& x) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + +#define DEFINE_FUNCTION_RR_V(name,expr) \ +static inline \ +CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL const& b) \ + CCTK_ATTRIBUTE_PURE \ +{ \ + return expr; \ +} + + + +// Intel, double +#if defined(KRANC_VECTORS) && defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8) + +#include + +// Vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + typedef double S; + typedef __m128d V; + V v; + + // Set a vector from scalars + inline CCTK_REAL_VEC(S const& a, S const& b): v(_mm_set_pd(b,a)) { } + + // Set a vector from a scalar, replicating the scalar + inline CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { } + + // Convert from and to the underlying vector type + inline CCTK_REAL_VEC(V const& v_): v(v_) { } + inline operator V const() const { return v; } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } +}; + +union vec_mask { + unsigned long long bits[2]; + CCTK_REAL_VEC::V v; +}; + +DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) +DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) + +// Get a scalar from the vector +#if defined(__PGI) && defined (__amd64__) +// _mm_cvtsd_f64 does not exist on PGI compilers +static inline +CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) +{ + CCTK_REAL a; _mm_store_sd(&a,x); return a; +} +#else +DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) //this is a no-op +#endif + +#if 0 +DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) +#endif +static inline +CCTK_REAL vec_elt1 (CCTK_REAL_VEC const& x) +{ + CCTK_REAL a; _mm_storeh_pd(&a,x); return a; +} + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) +DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) + +#if 0 +// Load a partial vector (duplicating the last loaded element to fill +// the remaining elements) +// TODO: Should this be aligned or unaligned? +static inline +CCTK_REAL_VEC vec_load_partial (CCTK_REAL const& p, int const n) +{ + switch (n) { + case 1: return _mm_load1_pd(p); + default: assert(0); + } +} +#endif + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) +// Implementation: load aligned if the modulus is zero +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +template<> +inline +CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) +{ + return vec_load(p); +} +// Call the implementation with the modulus +template +static inline +CCTK_REAL_VEC vec_loadu_maybe (CCTK_REAL const& p) +{ + return vec_loadu_maybe_impl(p); +} +template +static inline +CCTK_REAL_VEC vec_loadu_maybe3 (CCTK_REAL const& p) +{ + return vec_loadu_maybe_impl3(p); +} + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) +DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + switch (n) { + case 1: _mm_storel_pd(&p,x); break; + default: assert(0); + } +} +static inline +void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + switch (n) { + case 1: _mm_storeh_pd((&p)+1,x); break; + default: assert(0); + } +} + +// Double-argument operators, both vectors +DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) +DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) +DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) +DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) +DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) + +// Single-argument operators +DEFINE_FUNCTION_V_V(operator+,x) +#if 0 +DEFINE_FUNCTION_V_V(operator-,vec_set(0.0,0.0)-x) +#endif +static vec_mask const vec_neg_mask = +{ { 0x8000000000000000ULL, 0x8000000000000000ULL } }; +DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask.v)) + +// Cheap functions +static vec_mask const vec_fabs_mask = +{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; +DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) +DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) +DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) +DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) + +// Special case for PGI to avoid internal compiler error +#if defined(__PGI) && defined (__amd64__) +#undef IfThen +CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const& x, CCTK_REAL_VEC co\ +nst& y) +{ + return cond*x + (not cond)*y; +} +#endif + + + +#if 0 +// Try to use the __m128d type directly. + +// This does not really work, because it is not possible to define +// automatic conversion operators from double to __m128d, so that +// explicit conversions are required. This makes the code look more +// clumsy. + +// Vector type corresponding to CCTK_REAL +typedef __m128d CCTK_REAL_VEC; + +DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) +DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) + +// Get a scalar from the vector +static inline +CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) +{ +#if 0 + // _mm_cvtsd_f64 does not exist on PGI compilers + return _mm_cvtsd_f64(x); // this is a no-op +#endif + CCTK_REAL a; _mm_store_sd(&a,x); return a; +} + +DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) +DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) +DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) + +// Cheap functions +static vec_mask const vec_fabs_mask = +{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; +DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) +DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) +DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) +DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) + +#endif + + + +// Intel, float +#elif defined(KRANC_VECTORS) && defined(__SSE__) && defined(CCTK_REAL_PRECISION_4) + +#include + +// A vector type corresponding to CCTK_REAL +typedef __m128 CCTK_REAL_VEC; + + + +// Power, double +#elif defined(KRANC_VECTORS) && defined(__ALTIVEC__) && defined(_ARCH_PWR7) && defined(CCTK_REAL_PRECISION_8) + +#include + +// Vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + typedef double S; + typedef vector double V; + V v; + + // vec_insert, vec_extract, vec_splat + + // Set a vector from scalars + inline CCTK_REAL_VEC(S const& a, S const& b) { v[0]=a; v[1]=b; } + + // Set a vector from a scalar, replicating the scalar + inline CCTK_REAL_VEC(S const& a): v(vec_splats(a)) { } + + // Convert from and to the underlying vector type + inline CCTK_REAL_VEC(V const& v_): v(v_) { } + inline operator V const() const { return v; } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } +}; + +DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) +DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) + +// Get a scalar from the vector +DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) +DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) + +// Load a vector from memory (aligned and unaligned); this loads from +// a reference to a scalar +DEFINE_FUNCTION_PR_V(vec_load,p) +DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast(&p))) +// vec_xlds + +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) +// Implementation: load aligned if the modulus is zero +#define static +template<> +DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl<0>,vec_load(p)) +#undef static +// Call the implementation with the modulus +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe,vec_loadu_maybe_impl(p)) + +// Store a vector to memory (aligned and non-temporal); this stores to +// a reference to a scalar +DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) +DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + switch (n) { + case 1: p=x.v[0]; break; + default: assert(0); + } +} +static inline +void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + switch (n) { + case 1: (&p)[1]=x.v[1]; break; + default: assert(0); + } +} + +// Double-argument operators, both vectors +DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) +DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) +DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) +DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) +DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) +DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) +DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) +DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) + +// Single-argument operators +DEFINE_FUNCTION_V_V(operator+,x) +DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) + +// Cheap functions +DEFINE_FUNCTION_V_V(fabs,vec_abs(x)) +DEFINE_FUNCTION_VV_V(fmax,vec_max(x,y)) +DEFINE_FUNCTION_VV_V(fmin,vec_min(x,y)) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) +DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) +DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) +DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) + + + +// Fallback: pseudo-vectorisation +#elif 0 + +// There is no vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + CCTK_REAL v, w; + + // Set a vector from scalars + inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } + + // Set a vector from a scalar, replicating the scalar + inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x.v), w(x.w) { } +}; + + + +DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) +DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +template +DEFINE_FUNCTION_PR_V(vec_loadm,vec_load(p)) + +DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) +DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) + +// Double-argument operators, both vectors +DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v+y.v,x.w+y.w)) +DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v-y.v,x.w-y.w)) +DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v*y.v,x.w*y.w)) +DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v/y.v,x.w/y.w)) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v+a,x.w+a)) +DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v-a,x.w-a)) +DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v*a,x.w*a)) +DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v/a,x.w/a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v,a+x.w)) +DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v,a-x.w)) +DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v,a*x.w)) +DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v,a/x.w)) + +// Single-argument operators +DEFINE_FUNCTION_V_V(operator+,x) +DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v,-x.w)) + +// Cheap functions +DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v),fabs(x.w))) +DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v,y.v),fmax(x.w,y.w))) +DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v,y.v),fmin(x.w,y.w))) +DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v),sqrt(x.w))) + +// Expensive functions +DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v),exp(x.w))) +DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v),log(x.w))) +DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v,a),pow(x.w,a))) + + + +// Fallback: no vectorisation +#else + +// There is no vector type corresponding to CCTK_REAL +typedef CCTK_REAL CCTK_REAL_VEC; + + + +DEFINE_FUNCTION_PR_V(vec_load,p) +DEFINE_FUNCTION_PR_V(vec_loadu,p) +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +// Implementation: default to unaligned load +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe,p) +template +DEFINE_FUNCTION_PR_V(vec_loadu_maybe3,p) + +DEFINE_FUNCTION_PRV(vec_store,p=x) +DEFINE_FUNCTION_PRV(vec_store_nta,p=x) + +// Store a lower or higher partial vector (aligned and non-temporal); +// the non-temporal hint is probably ignored +static inline +void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + assert(0); +} +static inline +void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) +{ + assert(0); +} + + + +#endif + + + +#undef DEFINE_FUNCTION_PR_V +#undef DEFINE_FUNCTION_PRV +#undef DEFINE_FUNCTION_V_V +#undef DEFINE_FUNCTION_R_V +#undef DEFINE_FUNCTION_VV_V +#undef DEFINE_FUNCTION_VR_V +#undef DEFINE_FUNCTION_RV_V +#undef DEFINE_FUNCTION_RR_V + + + +// Number of vector elements in a CCTK_REAL_VEC +static +int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); + + + +#endif // #ifndef VECTORS_HH diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh new file mode 100644 index 0000000..f439c9b --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh @@ -0,0 +1,72 @@ +// Pseudo vectorisation using scalar operations + + + +// Number of vector elements in a CCTK_REAL_VEC +static int const CCTK_REAL_VEC_SIZE = 2; + +// There is no vector type corresponding to CCTK_REAL +struct CCTK_REAL_VEC { + // The underlying scalar and vector types + CCTK_REAL v[CCTK_REAL_VEC_SIZE]; + + // Set a vector from scalars + inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } + + // Set a vector from a scalar, replicating the scalar + inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } + + inline CCTK_REAL_VEC() { } + + // Copy constructor + inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x) { v[0]=x.v[0]; v[1]=x.v[1]; } +}; + + + +// Load and store vectors + +DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) +DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) +// Load a vector from memory that may or may not be aligned, as +// decided by the offset off and the vector size +#define vec_loadu_maybe(off,p) (vec_load(p)) +#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) + +DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) +DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) + + + +// Functions and operators + +// Double-argument operators, both vectors +DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v[0]+y.v[0],x.v[1]+y.v[1])) +DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v[0]-y.v[0],x.v[1]-y.v[1])) +DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v[0]*y.v[0],x.v[1]*y.v[1])) +DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v[0]/y.v[0],x.v[1]/y.v[1])) + +// Double-argument operators, vector and scalar +DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v[0]+a,x.v[1]+a)) +DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v[0]-a,x.v[1]-a)) +DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v[0]*a,x.v[1]*a)) +DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v[0]/a,x.v[1]/a)) + +// Double-argument operators, scalar and vector +DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v[0],a+x.v[1])) +DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v[0],a-x.v[1])) +DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v[0],a*x.v[1])) +DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v[0],a/x.v[1])) + +// Single-argument operators +DEFINE_FUNCTION_V_V(operator+,x) +DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v[0],-x.v[1])) + +// Functions +DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v[0]),exp(x.v[1]))) +DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v[0]),fabs(x.v[1]))) +DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v[0],y.v[0]),fmax(x.v[1],y.v[1]))) +DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v[0],y.v[0]),fmin(x.v[1],y.v[1]))) +DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v[0]),log(x.v[1]))) +DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v[0],a),pow(x.v[1],a))) +DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v[0]),sqrt(x.v[1]))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh new file mode 100644 index 0000000..0d950c7 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh @@ -0,0 +1,14 @@ +// Undefine all macros defined in "Vectors-define.hh", so that we +// leave a clean namespace + + + +#undef DEFINE_FUNCTION_PR_V +#undef DEFINE_FUNCTION_PRV +#undef DEFINE_FUNCTION_V_V +#undef DEFINE_FUNCTION_R_V +#undef DEFINE_FUNCTION_VV_V +#undef DEFINE_FUNCTION_VR_V +#undef DEFINE_FUNCTION_RV_V +#undef DEFINE_FUNCTION_RR_V +#undef DEFINE_FUNCTION_VVV_V diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh new file mode 100644 index 0000000..d32afb2 --- /dev/null +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh @@ -0,0 +1,52 @@ +#ifndef VECTORS_HH +#define VECTORS_HH + + + +// Vectorisation + +#include +#include +#include + +#include + + + +#include "Vectors-define.hh" + +#if defined(KRANC_VECTORS) +// Vectorise + +# if ! defined(CCTK_REAL_PRECISION_8) +# error "Vectorisation is currently only supported for double precision" +# endif + +# if defined(__SSE2__) // SSE2 (Intel) +# if defined(KRANC_DIRECT) +# include "Vectors-SSE2-direct.hh" +# else +# include "Vectors-SSE2.hh" +# endif +# elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Altivec (Power) +# if defined(KRANC_DIRECT) +# include "Vectors-VSX-direct.hh" +# else +# include "Vectors-VSX.hh" +# endif +# else +# include "Vectors-pseudo.hh" +# endif + +#else +// Don't vectorise + +# include "Vectors-default.hh" + +#endif + +#include "Vectors-undefine.hh" + + + +#endif // #ifndef VECTORS_HH diff --git a/Tools/CodeGen/CodeGen.m b/Tools/CodeGen/CodeGen.m index 09e7fe1..4743f2d 100644 --- a/Tools/CodeGen/CodeGen.m +++ b/Tools/CodeGen/CodeGen.m @@ -458,9 +458,9 @@ InitialiseFDVariables[] := DeclareAssignVariable["CCTK_REAL_VEC", "ktwothird", "ToReal(2.0/3.0)"], DeclareAssignVariable["CCTK_REAL_VEC", "kfourthird", "ToReal(4.0/3.0)"], DeclareAssignVariable["CCTK_REAL_VEC", "keightthird", "ToReal(8.0/3.0)"], - DeclareAssignVariable["CCTK_REAL_VEC", "hdxi", "fmul(ToReal(0.5), dxi)"], - DeclareAssignVariable["CCTK_REAL_VEC", "hdyi", "fmul(ToReal(0.5), dyi)"], - DeclareAssignVariable["CCTK_REAL_VEC", "hdzi", "fmul(ToReal(0.5), dzi)"]}]; + DeclareAssignVariable["CCTK_REAL_VEC", "hdxi", "kmul(ToReal(0.5), dxi)"], + DeclareAssignVariable["CCTK_REAL_VEC", "hdyi", "kmul(ToReal(0.5), dyi)"], + DeclareAssignVariable["CCTK_REAL_VEC", "hdzi", "kmul(ToReal(0.5), dzi)"]}]; GridName[x_] := If[SOURCELANGUAGE == "C", ToExpression[ToString[x] <> "[index]"], @@ -821,16 +821,16 @@ ReplacePowers[expr_] := isNotMinusOneQ[n_] := ! (IntegerQ[n] && n == -1); isNotTimesMinusOneQ[n_] := ! MatchQ[n,- _]; fmaRules = { - + (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) + (zz_? isNotTimesMinusOneQ) :> fmadd [xx,yy,zz], - + (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) - (zz_? isNotTimesMinusOneQ) :> fmsub [xx,yy,zz], - - (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) + (zz_? isNotTimesMinusOneQ) :> fnmadd[xx,yy,zz], - - (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) - (zz_? isNotTimesMinusOneQ) :> fnmsub[xx,yy,zz], - + (xx_? isNotMinusOneQ) (yy_ + 1) -> fmadd [xx, yy, xx], - + (xx_? isNotMinusOneQ) (yy_ - 1) -> fmsub [xx, yy, xx], - - (xx_? isNotMinusOneQ) (yy_ + 1) -> fnmadd[xx, yy, xx], - - (xx_? isNotMinusOneQ) (yy_ - 1) -> fnmsub[xx, yy, xx], - fmadd[xx_, - yy_, zz_] -> fnmsub[xx,yy,zz], - fmsub[xx_, - yy_, zz_] -> fnmadd[xx,yy,zz] + + (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) + (zz_? isNotTimesMinusOneQ) :> kmadd [xx,yy,zz], + + (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) - (zz_? isNotTimesMinusOneQ) :> kmsub [xx,yy,zz], + - (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) + (zz_? isNotTimesMinusOneQ) :> knmadd[xx,yy,zz], + - (xx_? isNotMinusOneQ) (yy_? isNotMinusOneQ) - (zz_? isNotTimesMinusOneQ) :> knmsub[xx,yy,zz], + + (xx_? isNotMinusOneQ) (yy_ + 1) -> kmadd [xx, yy, xx], + + (xx_? isNotMinusOneQ) (yy_ - 1) -> kmsub [xx, yy, xx], + - (xx_? isNotMinusOneQ) (yy_ + 1) -> knmadd[xx, yy, xx], + - (xx_? isNotMinusOneQ) (yy_ - 1) -> knmsub[xx, yy, xx], + kmadd[xx_, - yy_, zz_] -> knmsub[xx,yy,zz], + kmsub[xx_, - yy_, zz_] -> knmadd[xx,yy,zz] }; rhs = rhs //. fmaRules; @@ -841,20 +841,24 @@ ReplacePowers[expr_] := rhs = rhs /. ToReal[xx_] + ToReal[yy_] -> ToReal[xx + yy]; rhs = rhs /. ToReal[xx_] * ToReal[yy_] -> ToReal[xx * yy]; rhs = rhs /. pow[xx_, ToReal[power_]] -> pow[xx, power]; + rhs = rhs /. ToReal[xx_] == ToReal[yy_] -> ToReal[xx == yy]; + rhs = rhs /. ToReal[xx_] != ToReal[yy_] -> ToReal[xx != yy]; + (* keep the conditional expression a scalar *) rhs = rhs /. IfThen[ToReal[xx_], yy_, zz_] -> IfThen[xx, yy, zz]; (* Replace all operators and functions *) - (* fadd, fsub, fmul, fdiv, fneg *) - isNotFneg[n_] := ! MatchQ[n,fneg[_]]; + (* kneg, kadd, ksub, kmul, kdiv *) + (* TODO: optimise fabs etc. with regard to fmadd etc. as well *) + isNotKneg[n_] := ! MatchQ[n,kneg[_]]; arithRules = { - - xx_ -> fneg[xx], - xx_ * yy_ -> fmul[xx,yy], - xx_ / yy_ -> fdiv[xx,yy], - xx_ + yy_ -> fadd[xx,yy], - xx_ - yy_ -> fsub[xx,yy], - fmul[-1,xx_] -> fneg[xx], - fadd[xx_,fneg[yy_]] -> fsub[xx,yy], - fadd[fneg[xx_],(yy_? isNotFneg)] :> fsub[yy,xx], + - xx_ -> kneg[xx], + xx_ * yy_ -> kmul[xx,yy], + xx_ / yy_ -> kdiv[xx,yy], + xx_ + yy_ -> kadd[xx,yy], + xx_ - yy_ -> ksub[xx,yy], + kmul[-1,xx_] -> kneg[xx], + kadd[xx_,kneg[yy_]] -> ksub[xx,yy], + kadd[kneg[xx_],(yy_? isNotKneg)] :> ksub[yy,xx], Abs[xx_] -> kfabs[xx], Log[xx_] -> klog[xx], fabs[xx_] -> kfabs[xx], @@ -863,13 +867,22 @@ ReplacePowers[expr_] := sqrt[xx_] -> ksqrt[xx], exp[xx_] -> kexp[xx], log[xx_] -> klog[xx], - pow[xx_,yy_] -> kpow[xx,yy] + pow[xx_,yy_] -> kpow[xx,yy], + kfabs[kneg[xx_]] -> kfabs[xx], + kfnabs[kneg[xx_]] -> kfnabs[xx], + kneg[kfabs[xx_]] -> kfnabs[xx] }; rhs = rhs //. arithRules; - rhs = rhs /. IfThen[fmul[xx_, yy_], aa_, bb_] -> IfThen[xx*yy, aa, bb]; - rhs = rhs /. ToReal[fneg[xx_]] -> ToReal[-xx]; - rhs = rhs /. ToReal[fmul[xx_, yy_]] -> ToReal[xx*yy]; - rhs = rhs /. kpow[xx_, fneg[power_]] -> kpow[xx, -power]; + + (* Undo some transformations *) + undoRules = { + IfThen[kmul[xx_, yy_], aa_, bb_] -> IfThen[xx*yy, aa, bb], + IfThen[kmul[xx_, yy_] != zz_, aa_, bb_] -> IfThen[xx*yy!=zz, aa, bb], + ToReal[kneg[xx_]] -> ToReal[-xx], + ToReal[kmul[xx_, yy_]] -> ToReal[xx*yy], + kpow[xx_, kneg[power_]] -> kpow[xx, -power] + }; + rhs = rhs //. undoRules; ], rhs = rhs /. Power[xx_, power_] -> xx^power diff --git a/Tools/CodeGen/Differencing.m b/Tools/CodeGen/Differencing.m index 1ac5ba8..da90db0 100644 --- a/Tools/CodeGen/Differencing.m +++ b/Tools/CodeGen/Differencing.m @@ -327,7 +327,7 @@ ComponentDerivativeOperatorMacroDefinition[componentDerivOp:(name_[inds___] -> e {pDefs, FlattenBlock[{ "#ifndef KRANC_DIFF_FUNCTIONS\n", (* default, differencing operators are macros *) - "# define ", macroName, "(u) ", "(fmul(", liName, ",", rhs, "))\n", + "# define ", macroName, "(u) ", "(kmul(", liName, ",", rhs, "))\n", "#else\n", (* new, differencing operators are static functions *) "# define ", macroName, "(u) ", "(", liName, "*", macroName, "_impl((u),dj,dk))\n", @@ -395,11 +395,12 @@ DifferenceGFTerm[op_, i_, j_, k_] := "(int)(" <> ToString[CFormHideStrings[k+nz]] <> "))]", *) (* - remaining "vec_loadu_maybe(" <> ToString[CFormHideStrings[nx]] <> "," <> - "(u)[index" <> - "+di*(" <> ToString[CFormHideStrings[nx]] <> ")" <> - "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> - "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", + remaining "vec_loadu_maybe" <> + "(" <> ToString[CFormHideStrings[nx]] <> "," <> + "(u)[index" <> + "+di*(" <> ToString[CFormHideStrings[nx]] <> ")" <> + "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> + "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", *) (* remaining "vec_loadu_maybe(" <> ToString[CFormHideStrings[nx]] <> "," <> @@ -415,9 +416,10 @@ DifferenceGFTerm[op_, i_, j_, k_] := "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", (* - remaining "vec_loadu(u[(" <> ToString[CFormHideStrings[nx]] <> ")" <> - "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> - "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", + remaining "vec_loadu" <> + "(u[(" <> ToString[CFormHideStrings[nx]] <> ")" <> + "+dj*(" <> ToString[CFormHideStrings[ny]] <> ")" <> + "+dk*(" <> ToString[CFormHideStrings[nz]] <> ")])", *) (* remaining "(u)[CCTK_GFINDEX3D(cctkGH,floor((" <> diff --git a/Tools/CodeGen/Interface.m b/Tools/CodeGen/Interface.m index 29c198f..c0a15e1 100644 --- a/Tools/CodeGen/Interface.m +++ b/Tools/CodeGen/Interface.m @@ -127,7 +127,8 @@ CreateKrancInterface[nonevolvedGroups_, evolvedGroups_, rhsGroups_, groups_, interface = CreateInterface[implementation, inheritedImplementations, Join[includeFiles, {CactusBoundary`GetIncludeFiles[]}, - If[OptionValue[UseLoopControl], {"loopcontrol.h"}, {}]], + If[OptionValue[UseLoopControl], {"loopcontrol.h"}, {}], + If[OptionValue[UseVectors], {"vectors.h"}, {}]], groupStructures, UsesFunctions -> Join[{registerEvolved, (*registerConstrained,*) diffCoeff}, diff --git a/Tools/CodeGen/Kranc.m b/Tools/CodeGen/Kranc.m index 21acd21..87324cd 100644 --- a/Tools/CodeGen/Kranc.m +++ b/Tools/CodeGen/Kranc.m @@ -23,7 +23,7 @@ BeginPackage["Kranc`"]; (* CodeGen.m *) {INV, SQR, CUB, QAD, IfThen, ToReal, sqrt, exp, pow, fmax, fmin, - fmadd, fmsub, fnmadd, fnmsub, fneg, fadd, fsub, fmul, fdiv, + kmadd, kmsub, knmadd, knmsub, kpos, kneg, kadd, ksub, kmul, kdiv, kfabs, kfmax, kfmin, ksqrt, kexp, klog, kpow, dir1, dir2, dir3, dx, dy, dz, khalf, kthird, ktwothird, kfourthird, keightthird}; @@ -70,6 +70,7 @@ ThornOptions = ReflectionSymmetries -> {}, ZeroDimensions -> {}, UseLoopControl -> False, + UseVectors -> False, UseCSE -> False, ProhibitAssignmentToGridFunctionsRead -> False, IncludeFiles -> {}}; diff --git a/Tools/CodeGen/KrancThorn.m b/Tools/CodeGen/KrancThorn.m index 0281b3d..7527f5e 100644 --- a/Tools/CodeGen/KrancThorn.m +++ b/Tools/CodeGen/KrancThorn.m @@ -127,7 +127,7 @@ CreateKrancThorn[groupsOrig_, parentDirectory_, thornName_, opts:OptionsPattern[ coordGroup = {"grid::coordinates", {Kranc`x,Kranc`y,Kranc`z,Kranc`r}}; groups = Join[groupsOrig, {coordGroup}]; - includeFiles = Join[includeFiles, {"GenericFD.h", "Symmetry.h", "Vectors.hh", "sbp_calc_coeffs.h"}]; + includeFiles = Join[includeFiles, {"GenericFD.h", "Symmetry.h", "sbp_calc_coeffs.h"}]; inheritedImplementations = Join[inheritedImplementations, {"Grid", "GenericFD"}, CactusBoundary`GetInheritedImplementations[]]; diff --git a/Tools/CodeGen/Thorn.m b/Tools/CodeGen/Thorn.m index 4ada28a..08ba38f 100644 --- a/Tools/CodeGen/Thorn.m +++ b/Tools/CodeGen/Thorn.m @@ -220,7 +220,8 @@ Options[CreateConfiguration] = ThornOptions; CreateConfiguration[opts:OptionsPattern[]] := {whoWhen["CCL"], "REQUIRES GenericFD\n", - If[OptionValue[UseLoopControl], "REQUIRES LoopControl\n", {}] + If[OptionValue[UseLoopControl], "REQUIRES LoopControl\n", {}], + If[OptionValue[UseVectors], "REQUIRES Vectors\n", {}] }; (* ------------------------------------------------------------------------ @@ -475,13 +476,13 @@ CreateSchedule[globalStorageGroups_, scheduledGroups_, scheduledFunctions_] := calculationMacros[] := CommentedBlock["Define macros used in calculations", - Map[{"#define ", #, "\n"} &, - {"INITVALUE (42)", - "INV(x) (fdiv(ToReal(1.0),x))", - "SQR(x) (fmul(x,x))", - "CUB(x) (x*SQR(x))", - "QAD(x) (SQR(SQR(x)))" - }]]; + Map[{"#define ", #, "\n"} &, + {"INITVALUE (42)", + "INV(x) (kdiv(ToReal(1.0),x))", + "SQR(x) (kmul(x,x))", + "CUB(x) (x*SQR(x))", + "QAD(x) (SQR(SQR(x)))" + }]]; (* Given a list of Calculation structures as defined above, create a CodeGen representation of a source file that defines a function for @@ -509,8 +510,10 @@ CreateSetterSource[calcs_, debug_, useCSE_, include_, imp_, ], Map[IncludeFile, Join[{"cctk.h", "cctk_Arguments.h", "cctk_Parameters.h", - (*"precomputations.h",*) "GenericFD.h", "Vectors.hh", "Differencing.h"}, include, - If[OptionValue[UseLoopControl], {"loopcontrol.h"}, {}]]], + (*"precomputations.h",*) "GenericFD.h", "Differencing.h"}, + include, + If[OptionValue[UseLoopControl], {"loopcontrol.h"}, {}], + If[OptionValue[UseVectors], {"vectors.h"}, {}]]], calculationMacros[], (* For each function structure passed, create the function and -- cgit v1.2.3 From 6b4cccc33c949f8b84023d349f4973040e45660e Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Tue, 7 Dec 2010 16:55:18 +0100 Subject: Remove old "Ceiling" and "Perturb" thorns which are unrelated to Kranc --- .../KrancNumericalTools/Ceiling/interface.ccl | 8 - .../Cactus/KrancNumericalTools/Ceiling/param.ccl | 32 -- .../KrancNumericalTools/Ceiling/schedule.ccl | 21 -- .../KrancNumericalTools/Ceiling/src/Ceiling.F90 | 68 ----- .../KrancNumericalTools/Ceiling/src/Startup.c | 15 - .../KrancNumericalTools/Ceiling/src/make.code.defn | 8 - .../KrancNumericalTools/Ceiling/src/selectGFs.c | 94 ------ .../Cactus/KrancNumericalTools/Perturb/README | 5 - .../KrancNumericalTools/Perturb/interface.ccl | 7 - .../Cactus/KrancNumericalTools/Perturb/param.ccl | 45 --- .../KrancNumericalTools/Perturb/schedule.ccl | 21 -- .../KrancNumericalTools/Perturb/src/bc_perturb.c | 322 --------------------- .../KrancNumericalTools/Perturb/src/id_perturb.c | 49 ---- .../KrancNumericalTools/Perturb/src/make.code.defn | 8 - .../KrancNumericalTools/Perturb/src/perturb.h | 45 --- 15 files changed, 748 deletions(-) delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Ceiling/interface.ccl delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Ceiling/param.ccl delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Ceiling/schedule.ccl delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/Ceiling.F90 delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/Startup.c delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/make.code.defn delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/selectGFs.c delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Perturb/README delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Perturb/interface.ccl delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Perturb/param.ccl delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Perturb/schedule.ccl delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Perturb/src/bc_perturb.c delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Perturb/src/id_perturb.c delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Perturb/src/make.code.defn delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/Perturb/src/perturb.h (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/interface.ccl b/Auxiliary/Cactus/KrancNumericalTools/Ceiling/interface.ccl deleted file mode 100644 index 8a9294b..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/interface.ccl +++ /dev/null @@ -1,8 +0,0 @@ -# file produced by user shusa, 31/3/2004 - -# $Id$ - -implements: Ceiling - -inherits: Grid - diff --git a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/param.ccl b/Auxiliary/Cactus/KrancNumericalTools/Ceiling/param.ccl deleted file mode 100644 index a6dc073..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/param.ccl +++ /dev/null @@ -1,32 +0,0 @@ -# file produced by user shusa, 31/3/2004 -# Produced with Mathematica Version 5.0 for Linux (June 9, 2003) - -# Mathematica script written by Ian Hinder and Sascha Husa - -# $Id$ - -private: -BOOLEAN check_active "whether to check ceiling values" -{ -} "false" - -REAL ceiling_value "what value we use for the ceiling" -{ -*:* :: "with negative values no cutoff is set" -} -1 - -KEYWORD type "what type of checking to apply" -{ - "absolute" :: "check for absolute value of GF" - "differential" :: "check for difference between min & max" -} "absolute" - -STRING vars "List of evolved grid functions that should have dissipation added" STEERABLE=always -{ - .* :: "must be a valid list of grid functions" -} "" - -BOOLEAN verbose "produce log output" STEERABLE=always -{ -} "no" - diff --git a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/schedule.ccl b/Auxiliary/Cactus/KrancNumericalTools/Ceiling/schedule.ccl deleted file mode 100644 index b9d167c..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/schedule.ccl +++ /dev/null @@ -1,21 +0,0 @@ -# file produced by user shusa, 31/3/2004 -# Produced with Mathematica Version 5.0 for Linux (June 9, 2003) - -# Mathematica script written by Ian Hinder and Sascha Husa - -# $Id$ - - -schedule Ceiling_Startup at startup -{ -LANG: C -} "ceiling startup message" - -if (check_active) -{ - schedule check_ceiling at PostStep - { - LANG: C - - } "check ceiling" -} diff --git a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/Ceiling.F90 b/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/Ceiling.F90 deleted file mode 100644 index 9e1d2c1..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/Ceiling.F90 +++ /dev/null @@ -1,68 +0,0 @@ -! file written by s. husa, 5/6/2004 - -! $Id$ - -#include "cctk.h" - -subroutine apply_check_abs(var, ni, nj, nk, ceiling_value) - -implicit none - -CCTK_INT, intent(in) :: ni, nj, nk -CCTK_REAL, dimension (ni, nj, nk), intent(in) :: var(ni, nj, nk) -CCTK_REAL, intent(in) :: ceiling_value - -CCTK_REAL :: criterion -CCTK_REAL, save :: initial_value - -CCTK_INT, save :: counter - -counter = counter + 1 -criterion = maxval(abs(var) + epsilon(1.0d0)) - -if (counter == 1) then - initial_value = criterion - write (*,*) "<<<<<< using ceiling initial value", initial_value -else - criterion = criterion / initial_value - if ((ceiling_value > 0).AND.(criterion > ceiling_value)) then - - call CCTK_INFO("Ceiling thorn terminates evolution") - call CCTK_TerminateNext(var) - endif -endif - -end subroutine apply_check_abs - - -subroutine apply_check_diff(var, ni, nj, nk, ceiling_value) - -implicit none - -CCTK_INT, intent(in) :: ni, nj, nk -CCTK_REAL, dimension (ni, nj, nk), intent(in) :: var(ni, nj, nk) -CCTK_REAL, intent(in) :: ceiling_value - -CCTK_REAL :: criterion -CCTK_REAL, save :: initial_value - -CCTK_INT, save :: counter - -counter = counter + 1 - -criterion = maxval(var) - minval(var) - -if (counter == 1) then - initial_value = criterion - write (*,*) "<<<<<< using ceiling initial value", initial_value -else - criterion = criterion / initial_value - if ((ceiling_value > 0).AND.(criterion > ceiling_value)) then - - call CCTK_INFO("Ceiling thorn terminates evolution.") - call CCTK_TerminateNext (var) - endif -endif - -end subroutine apply_check_diff - diff --git a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/Startup.c b/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/Startup.c deleted file mode 100644 index b42b2a1..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/Startup.c +++ /dev/null @@ -1,15 +0,0 @@ -/* file produced by user shusa, 31/3/2004 */ -/* Produced with Mathematica Version 5.0 for Linux (June 9, 2003) */ - -/* Mathematica script written by Ian Hinder and Sascha Husa */ - -/* $Id$ */ - -#include "cctk.h" - -int Ceiling_Startup(void) -{ - const char * banner = "Ceiling: abort when solution grows through the roof"; - CCTK_RegisterBanner(banner); - return 0; -} diff --git a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/make.code.defn b/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/make.code.defn deleted file mode 100644 index dd1377a..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/make.code.defn +++ /dev/null @@ -1,8 +0,0 @@ -# file produced by user shusa, 31/3/2004 -# Produced with Mathematica Version 5.0 for Linux (June 9, 2003) - -# Mathematica script written by Ian Hinder and Sascha Husa - -# $Id$ - -SRCS = Startup.c Ceiling.F90 selectGFs.c diff --git a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/selectGFs.c b/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/selectGFs.c deleted file mode 100644 index 86e1196..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Ceiling/src/selectGFs.c +++ /dev/null @@ -1,94 +0,0 @@ -/* $Header$ */ - -/* this code is based on Erik Schnetter's dissipation thorn */ - -#include -#include - -#include "cctk.h" -#include "cctk_Arguments.h" -#include "cctk_Parameters.h" - -void CCTK_FCALL -CCTK_FNAME(apply_check_abs) (CCTK_REAL const * const var, - int const * const ni, - int const * const nj, - int const * const nk, - CCTK_REAL const * const ceiling_value); - - -void CCTK_FCALL -CCTK_FNAME(apply_check_diff) (CCTK_REAL const * const var, - int const * const ni, - int const * const nj, - int const * const nk, - CCTK_REAL const * const ceiling_value); - -static void -call_apply_check (int const varindex, char const * const optstring, void * const arg); - -void -check_ceiling (CCTK_ARGUMENTS) -{ - DECLARE_CCTK_ARGUMENTS; - DECLARE_CCTK_PARAMETERS; - - CCTK_TraverseString (vars, call_apply_check, cctkGH, CCTK_GROUP_OR_VAR); -} - - -void -call_apply_check (int const varindex, char const * const optstring, void * const arg) -{ - cGH const * const cctkGH = (cGH const *) arg; - DECLARE_CCTK_ARGUMENTS; - DECLARE_CCTK_PARAMETERS; - - int vargroup; - cGroup vardata; - - CCTK_REAL const * varptr; - int ierr /* , terminate */ ; - - assert (varindex >= 0); - - if (verbose) { - char * const fullvarname = CCTK_FullName (varindex); - assert (fullvarname); - CCTK_VInfo (CCTK_THORNSTRING, - "Applying ceiling check to \"%s\" ", - fullvarname); - free (fullvarname); - } - - vargroup = CCTK_GroupIndexFromVarI (varindex); - assert (vargroup >= 0); - - ierr = CCTK_GroupData (vargroup, &vardata); - assert (!ierr); - - assert (vardata.grouptype == CCTK_GF); - assert (vardata.vartype == CCTK_VARIABLE_REAL); - assert (vardata.dim == cctk_dim); - - varptr = CCTK_VarDataPtrI (cctkGH, 0, varindex); - assert (varptr); - - if (CCTK_Equals (type, "absolute")) - { - CCTK_FNAME(apply_check_abs) - (varptr, &cctk_lsh[0], &cctk_lsh[1], &cctk_lsh[2], &ceiling_value); - } - else if (CCTK_Equals (type, "differential")) - { - CCTK_FNAME(apply_check_diff) - (varptr, &cctk_lsh[0], &cctk_lsh[1], &cctk_lsh[2], &ceiling_value); - } - else - { - CCTK_INFO("keyword ceiling::type only allows values 'absolute' and 'differential'"); - } - - /* if (terminate > 0) {CCTK_TerminateNext (cctkGH);} */ -} - diff --git a/Auxiliary/Cactus/KrancNumericalTools/Perturb/README b/Auxiliary/Cactus/KrancNumericalTools/Perturb/README deleted file mode 100644 index 899015f..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Perturb/README +++ /dev/null @@ -1,5 +0,0 @@ - -Purpose of the thorn: - -This thorn adds a perturbation to grid variables at initial data and/or -boundaries, in the style of Denis' noise thorn. diff --git a/Auxiliary/Cactus/KrancNumericalTools/Perturb/interface.ccl b/Auxiliary/Cactus/KrancNumericalTools/Perturb/interface.ccl deleted file mode 100644 index a745e5c..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Perturb/interface.ccl +++ /dev/null @@ -1,7 +0,0 @@ -# Interface definition for thorn Perturb -# $Header$ - -IMPLEMENTS: perturb - -INHERITS: grid - diff --git a/Auxiliary/Cactus/KrancNumericalTools/Perturb/param.ccl b/Auxiliary/Cactus/KrancNumericalTools/Perturb/param.ccl deleted file mode 100644 index 8e87dc6..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Perturb/param.ccl +++ /dev/null @@ -1,45 +0,0 @@ -# Parameter definitions for thorn Perturb -# $Header$ - -#------------------------------------------------------------------------------ -# Private: -#------------------------------------------------------------------------------ -private: - -BOOLEAN apply_id_perturb "Add perturbation to initial data" -{ -} "no" - -BOOLEAN apply_bc_perturb "Add perturbation to boundary data" -{ -} "no" - -STRING id_vars "Initial data variables to modify with perturbation" -{ - .* :: "A regex which matches everything" -} "" - -STRING bc_vars "Variables to modify with perturbation at boundary" -{ - .* :: "A regex which matches everything" -} "" - -BOOLEAN perturb_boundaries[6] "At which boundaries to apply perturbation" -{ -} "yes" - -INT perturb_stencil[3] "Number of boundary points" -{ - 0:* :: "0:*" -} 1 - -REAL amplitude "Amplitude of perturbation data" -{ - 0: :: "Positive number" -} 0.000001 - -REAL period "period of perturbation data" -{ - 0: :: "Positive number" -} 1.0 - diff --git a/Auxiliary/Cactus/KrancNumericalTools/Perturb/schedule.ccl b/Auxiliary/Cactus/KrancNumericalTools/Perturb/schedule.ccl deleted file mode 100644 index 268f81d..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Perturb/schedule.ccl +++ /dev/null @@ -1,21 +0,0 @@ -# Schedule definitions for thorn Perturb -# $Header$ - -if (apply_id_perturb) { - SCHEDULE id_perturb AT CCTK_POSTINITIAL - { - LANG: C - } "Add perturb to initial data" -} - -if (apply_bc_perturb) { - SCHEDULE bc_perturb AT CCTK_POSTSTEP - { - LANG: C - } "Add perturb to boundary condition" - - SCHEDULE bc_perturb AT CCTK_POSTRESTRICT - { - LANG: C - } "Add perturb to boundary condition" -} diff --git a/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/bc_perturb.c b/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/bc_perturb.c deleted file mode 100644 index bf56039..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/bc_perturb.c +++ /dev/null @@ -1,322 +0,0 @@ -/* $Header$ */ - -#include -#include -#include -#include - - - -#include "perturb.h" - -#include "cctk.h" -#include "cctk_Parameters.h" -#include "cctk_Arguments.h" -#include "cctk_FortranString.h" - -#include "Symmetry.h" - - - -/* #define DEBUG_BOUNDARY 1 */ - -static int ApplyBndperturb (const cGH *GH, - int stencil_dir, - const int *stencil, - int dir, - int first_var, - int num_vars); - - - -int -BndperturbVI (const cGH *GH, const int *stencil, int vi) -{ - int retval; - retval = ApplyBndperturb (GH, -1, stencil, 0, vi, 1); - return retval; -} - -void -CCTK_FCALL CCTK_FNAME (BndperturbVI) (int *ierr, const cGH **GH, - const int *stencil, const int *vi) -{ - *ierr = BndperturbVI (*GH, stencil, *vi); -} - -int -BndperturbVN (const cGH *GH, const int *stencil, const char *vn) -{ - int vi, retval; - vi = CCTK_VarIndex(vn); - retval = BndperturbVI (GH, stencil, vi); - return retval; -} - -void -CCTK_FCALL CCTK_FNAME (BndperturbVN) (int *ierr, const cGH **GH, - const int *stencil, ONE_FORTSTRING_ARG) -{ - ONE_FORTSTRING_CREATE (vn); - *ierr = BndperturbVN (*GH, stencil, vn); - free (vn); -} - - - -int -BndperturbGI (const cGH *GH, const int *stencil, int gi) -{ - int first_vi, retval; - - first_vi = CCTK_FirstVarIndexI (gi); - if (first_vi >= 0) - { - retval = ApplyBndperturb (GH, -1, stencil, 0, first_vi, - CCTK_NumVarsInGroupI (gi)); - } - else - { - CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, - "Invalid group index %d in BndperturbGI", gi); - retval = -1; - } - - return (retval); -} - - - -void -CCTK_FCALL CCTK_FNAME (BndperturbGI) (int *ierr, const cGH **GH, - const int *stencil, const int *gi) -{ - *ierr = BndperturbGI (*GH, stencil, *gi); -} - - -int -BndperturbGN (const cGH *GH, const int *stencil, const char *gn) -{ - int gi, retval; - - gi = CCTK_GroupIndex (gn); - if (gi >= 0) - { - retval = BndperturbGI (GH, stencil, gi); - } - else - { - CCTK_VWarn (2, __LINE__, __FILE__, CCTK_THORNSTRING, - "Invalid group name '%s' in BndperturbGN", gn); - retval = -1; - } - - return (retval); -} - -void CCTK_FCALL CCTK_FNAME (BndperturbGN) - (int *ierr, - const cGH **GH, - const int *stencil, - ONE_FORTSTRING_ARG) -{ - ONE_FORTSTRING_CREATE (gn) - *ierr = BndperturbGN (*GH, stencil, gn); - free (gn); -} - - - - - -static int ApplyBndperturb (const cGH *GH, - int stencil_dir, - const int *stencil, - int dir, - int first_var, - int num_vars) -{ - DECLARE_CCTK_PARAMETERS; - int i, j, k; - int var, vtypesize, gindex, gdim, timelvl; - int doBC[2*MAXDIM], dstag[MAXDIM], lsh[MAXDIM], lssh[MAXDIM]; - SymmetryGHex *sGHex; - int type; - - /* This argument is unused an undocumented; better make sure people - don't try to use it for something. */ - assert (stencil_dir == -1); - - /* get the group index of the variables */ - gindex = CCTK_GroupIndexFromVarI (first_var); - - /* get the number of dimensions and the size of the variables' type */ - gdim = CCTK_GroupDimI (gindex); - vtypesize = CCTK_VarTypeSize (CCTK_VarTypeI (first_var)); - - /* make sure we can deal with this number of dimensions */ - if (gdim > MAXDIM) - { - CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, - "ApplyBndperturb: Variable dimension of %d not supported", gdim); - return (-1); - } - - /* check the direction parameter */ - if (abs (dir) > gdim) - { - CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, - "ApplyBndperturb: direction %d greater than dimension %d", - dir, gdim); - return (-2); - } - - /* initialize arrays for variables with less dimensions than MAXDIM - so that we can use the INDEX_3D macro later on */ - for (i = gdim; i < MAXDIM; i++) - { - lsh[i] = 1; - lssh[i] = 1; - } - - /* get the directional staggering of the group */ - CCTK_GroupStaggerDirArrayGI (dstag, gdim, gindex); - - /* get the current timelevel */ - timelvl = 0; - - /* see if we have a symmetry array */ - sGHex = (SymmetryGHex *) CCTK_GHExtension (GH, "Symmetry"); - - - /* now loop over all variables */ - for (var = first_var; var < first_var + num_vars; var++) - { - /* Apply condition if: - + boundary is not a symmetry boundary (no symmetry or unset(=unsed)) - + boundary is a physical boundary - + have enough grid points - */ - memset (doBC, 1, sizeof (doBC)); - if (sGHex) - { - for (i = 0; i < 2 * gdim; i++) - { - doBC[i] = sGHex->GFSym[var][i] == GFSYM_NOSYM || - sGHex->GFSym[var][i] == GFSYM_UNSET; - } - } - for (i = 0; i < gdim; i++) - { - lsh[i] = GH->cctk_lsh[i]; - lssh[i] = GH->cctk_lssh[CCTK_LSSH_IDX (dstag[i], i)]; - doBC[i*2] &= GH->cctk_lsh[i] > 1 && GH->cctk_bbox[i*2]; - doBC[i*2+1] &= GH->cctk_lsh[i] > 1 && GH->cctk_bbox[i*2+1]; - if (dir != 0) - { - doBC[i*2] &= i+1 == -dir; - doBC[i*2+1] &= i+1 == dir; - } - } - - /* now apply the boundaries face by face */ - if (gdim > 0) - { -#ifdef DEBUG_BOUNDARY - if (doBC[0]) - { - printf("Boundary: Applying lower x perturb to boundary\n"); - } - if (doBC[1]) - { - printf("Boundary: Applying upper x perturb to boundary\n"); - } -#endif /* DEBUG_BOUNDARY */ - /* lower x */ - BOUNDARY_PERTURB (doBC[0], stencil[0], lssh[1], lssh[2], - i, j, k); - /* upper x */ - BOUNDARY_PERTURB (doBC[1], stencil[0], lssh[1], lssh[2], - lssh[0]-i-1, j, k); - - } - if (gdim > 1) - - { -#ifdef DEBUG_BOUNDARY - if (doBC[2]) - { - printf("Boundary: Applying lower y perturb to boundary\n"); - } - if (doBC[3]) - { - printf("Boundary: Applying upper y perturb to boundary\n"); - } -#endif /* DEBUG_BOUNDARY */ - /* lower y */ - BOUNDARY_PERTURB (doBC[2], lssh[0], stencil[1], lssh[2], - i, j, k); - /* upper y */ - BOUNDARY_PERTURB (doBC[3], lssh[0], stencil[1], lssh[2], - i, lssh[1]-j-1, k); - } - if (gdim > 2) - { -#ifdef DEBUG_BOUNDARY - if (doBC[4]) - { - printf("Boundary: Applying lower z perturb to boundary\n"); - } - if (doBC[5]) - { - printf("Boundary: Applying upper z perturb to boundary\n"); - } -#endif /* DEBUG_BOUNDARY */ - /* lower z */ - BOUNDARY_PERTURB (doBC[4], lssh[0], lssh[1], stencil[2], - i, j, k); - /* upper z */ - BOUNDARY_PERTURB (doBC[5], lssh[0], lssh[1], stencil[2], - i, j, lssh[2]-k-1); - } - } - - return(0); -} - - -static void -add_bc_perturb_to_var (int idx, const char* optstring, void* cctkGH) -{ - DECLARE_CCTK_PARAMETERS; - cGH* GH = cctkGH; - int sw[3]; - - /* Change type from CCTK_INT to int */ - sw[0] = perturb_stencil[0]; - sw[1] = perturb_stencil[1]; - sw[2] = perturb_stencil[2]; - - if (perturb_boundaries[0]) ApplyBndperturb (GH, -1, sw, -1, idx, 1); - if (perturb_boundaries[1]) ApplyBndperturb (GH, -1, sw, +1, idx, 1); - if (perturb_boundaries[2]) ApplyBndperturb (GH, -1, sw, -2, idx, 1); - if (perturb_boundaries[3]) ApplyBndperturb (GH, -1, sw, +2, idx, 1); - if (perturb_boundaries[4]) ApplyBndperturb (GH, -1, sw, -3, idx, 1); - if (perturb_boundaries[5]) ApplyBndperturb (GH, -1, sw, +3, idx, 1); -} - - -void -bc_perturb(CCTK_ARGUMENTS) -{ - DECLARE_CCTK_ARGUMENTS - DECLARE_CCTK_PARAMETERS - -/* Boundary_MakeSureThatTheSelectionIsEmpty(); */ - if (CCTK_TraverseString(bc_vars, add_bc_perturb_to_var, cctkGH, - CCTK_GROUP_OR_VAR) < 0) - { - CCTK_WARN (1, "Failed to parse 'perturb::bc_vars' parameter"); - } -} diff --git a/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/id_perturb.c b/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/id_perturb.c deleted file mode 100644 index 07bd106..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/id_perturb.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - $Header$ -*/ - -#include - -#include "perturb.h" - -#include "cctk_Arguments.h" -#include "cctk_Parameters.h" - -void -add_perturb_to_var (int idx, const char* optstring, void* cctkGH) -{ - DECLARE_CCTK_PARAMETERS; - int i, j, k, ijk; - CCTK_REAL* data; - cGH* GH = cctkGH; - int type; - - data = (CCTK_REAL*) CCTK_VarDataPtrI(GH, 0, idx); - - for (k=1; k< GH->cctk_lsh[2]-1; ++k) - { - for (j=1; j< GH->cctk_lsh[1]-1; ++j) - { - for (i=1; i< GH->cctk_lsh[0]-1; ++i) - { - ijk = CCTK_GFINDEX3D(GH, i, j, k); - - data[ijk] += amplitude * pow(-1, i/period) * pow(-1, j/period) * pow(-1, k/period); - } - } - } -} - - -void -id_perturb(CCTK_ARGUMENTS) -{ - DECLARE_CCTK_ARGUMENTS - DECLARE_CCTK_PARAMETERS - - if (CCTK_TraverseString(id_vars, add_perturb_to_var, cctkGH, - CCTK_GROUP_OR_VAR) < 0) - { - CCTK_WARN (1, "Failed to parse 'IDRandom::noisy_id_vars' parameter"); - } -} diff --git a/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/make.code.defn b/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/make.code.defn deleted file mode 100644 index 795dd51..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/make.code.defn +++ /dev/null @@ -1,8 +0,0 @@ -# Main make.code.defn file for thorn perturb -# $Header$ - -# Source files in this directory -SRCS = id_perturb.c bc_perturb.c - -# Subdirectories containing source files -SUBDIRS = diff --git a/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/perturb.h b/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/perturb.h deleted file mode 100644 index e7da060..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/Perturb/src/perturb.h +++ /dev/null @@ -1,45 +0,0 @@ -/* $Header$ */ - -#ifndef PERTURB_H -#define PERTURB_H - -#include -#include -#include -#include -#include "cctk.h" - - -/* constants */ - -#define MAXDIM 3 - - -/* macros */ - -#define RAND_VAL ((random()*(1.0/RAND_MAX)-0.5)*amplitude) - -#define BOUNDARY_PERTURB(doBC, \ - iend, jend, kend, \ - ii, jj, kk) \ -{ \ - if (doBC) \ - { \ - CCTK_REAL* v= CCTK_VarDataPtrI(GH, timelvl, var); \ - for (k = 0; k < kend; k++) \ - { \ - for (j = 0; j < jend; j++) \ - { \ - for (i = 0; i < iend; i++) \ - { \ - const int _index = CCTK_GFINDEX3D(GH, (ii), (jj), (kk)); \ - v[_index] += RAND_VAL; \ - } \ - } \ - } \ - } \ -} - - - -#endif /* !define(PERTURB_H) */ -- cgit v1.2.3 From 486369b20142c236b27dfc1d49a4d1c42a62c179 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 7 Dec 2010 10:06:29 -0600 Subject: Remove outdated vectorisation implementation. An equivalent functionality is now available in LSUThorns/Vectors. --- .../GenericFD/src/old/Vectors-SSE2-direct.hh | 135 ----- .../GenericFD/src/old/Vectors-SSE2.hh | 201 ------- .../GenericFD/src/old/Vectors-VSX-direct.hh | 111 ---- .../GenericFD/src/old/Vectors-VSX.hh | 212 -------- .../GenericFD/src/old/Vectors-default.hh | 31 -- .../GenericFD/src/old/Vectors-define.hh | 104 ---- .../GenericFD/src/old/Vectors-outdated.hh | 591 --------------------- .../GenericFD/src/old/Vectors-pseudo.hh | 72 --- .../GenericFD/src/old/Vectors-undefine.hh | 14 - .../GenericFD/src/old/Vectors.hh | 52 -- 10 files changed, 1523 deletions(-) delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh deleted file mode 100644 index 12cd6e8..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2-direct.hh +++ /dev/null @@ -1,135 +0,0 @@ -// Vectorise using Intel's or AMD's SSE2 - -// Use the type __m128d directly, without introducing a wrapper class -// Use macros instead of inline functions - - - -#include - -// Vector type corresponding to CCTK_REAL -typedef __m128d CCTK_REAL_VEC; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements - -#define vec_set1(a) (_mm_set1_pd(a)) -#define vec_set(a,b) (_mm_set_pd(b,a)) - -// Get a scalar from the vector -#if defined(__PGI) && defined (__amd64__) -// _mm_cvtsd_f64 does not exist on PGI compilers -// # define vec_elt0(x) (*(CCTK_REAL const*)&(x)) -# define vec_elt0(x) ({ CCTK_REAL a_elt0; asm ("" : "=x" (a_elt0) : "0" (x)); a_elt0; }) -#else -// this is a no-op -# define vec_elt0(x) (_mm_cvtsd_f64(x)) -#endif -#define vec_elt1(x_) ({ CCTK_REAL_VEC const x_elt1=(x_); vec_elt0(_mm_unpackhi_pd(x_elt1,x_elt1)); }) - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -#define vec_load(p) (_mm_load_pd(&(p))) -#define vec_loadu(p) (_mm_loadu_pd(&(p))) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: Always use unaligned load -#define vec_loadu_maybe(off,p) (vec_loadu(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) (vec_loadu(p)) -#if 0 -#define vec_loadu_maybe(off,p) \ - (!((off)&(CCTK_REAL_VEC_SIZE-1)) ? \ - vec_load(p) : vec_loadu(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) \ - (!((off1)&(CCTK_REAL_VEC_SIZE-1)) && \ - !((off2)&(CCTK_REAL_VEC_SIZE-1)) && \ - !((off3)&(CCTK_REAL_VEC_SIZE-1)) ? \ - vec_load(p) : vec_loadu(p)) -#endif - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -#define vec_store(p,x) (_mm_store_pd(&(p),x)) -#define vec_storeu(p,x) (_mm_storeu_pd(&(p),x)) -#if defined(KRANC_CACHE) -# define vec_store_nta(p,x) (_mm_stream_pd(&(p),x)) -#else -# define vec_store_nta(p,x) (_mm_store_pd(&(p),x)) -#endif - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -#define vec_store_nta_partial_lo(p,x,n) (_mm_storel_pd(&(p),x)) -#define vec_store_nta_partial_hi(p,x,n) (_mm_storeh_pd((&(p))+1,x)) - - - -// Functions and operators - -// Operators -#undef fneg -#undef fmul -#undef fdiv -#undef fadd -#undef fsub -#if defined(__PGI) && defined (__amd64__) -// The PGI compiler does not understand __m128d literals -static union { - unsigned long long s[CCTK_REAL_VEC_SIZE]; - CCTK_REAL_VEC v; -} vec_neg_mask_impl = {0x8000000000000000ULL, 0x8000000000000000ULL}; -# define vec_neg_mask (vec_neg_mask_impl.v) -#else -# define vec_neg_mask ((CCTK_REAL_VEC)(__m128i){0x8000000000000000ULL, 0x8000000000000000ULL}) -#endif -#define fneg(x) (_mm_xor_pd(x,vec_neg_mask)) -#define fmul(x,y) (_mm_mul_pd(x,y)) -#define fdiv(x,y) (_mm_div_pd(x,y)) -#define fadd(x,y) (_mm_add_pd(x,y)) -#define fsub(x,y) (_mm_sub_pd(x,y)) - -// Cheap functions -#undef kfabs -#undef kfmax -#undef kfmin -#undef ksqrt -#if defined(__PGI) && defined (__amd64__) -// The PGI compiler does not understand __m128d literals -static union { - unsigned long long s[CCTK_REAL_VEC_SIZE]; - CCTK_REAL_VEC v; -} vec_fabs_mask_impl = {0x7fffffffffffffffULL, 0x7fffffffffffffffULL}; -# define vec_fabs_mask (vec_fabs_mask_impl.v) -#else -# define vec_fabs_mask ((CCTK_REAL_VEC)(__m128i){0x7fffffffffffffffULL, 0x7fffffffffffffffULL}) -#endif -#define kfabs(x) (_mm_and_pd(x,vec_fabs_mask)) -#define kfmax(x,y) (_mm_max_pd(x,y)) -#define kfmin(x,y) (_mm_min_pd(x,y)) -#define ksqrt(x) (_mm_sqrt_pd(x)) - -// Expensive functions -#undef kexp -#undef klog -#undef kpow -#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) -#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) -#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) - - - -#undef Sign -#define Sign(x) (42) - -#undef ToReal -#define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh deleted file mode 100644 index b74fac0..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-SSE2.hh +++ /dev/null @@ -1,201 +0,0 @@ -// Vectorise using Intel's or AMD's SSE2 - - - -#include - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // Underlying scalar and vector types - typedef double S; - typedef __m128d V; - - // Payload - V v; - - // Empty constructur - inline CCTK_REAL_VEC() { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const v_): v(v_) { } - inline operator V const() const { return v; } - - // Convert from the underlying scalar type - inline CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { } - inline CCTK_REAL_VEC(int const& a): v(_mm_set1_pd(S(a))) { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements - -DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) -DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) - -// Get a scalar from the vector -#if defined(__PGI) && defined (__amd64__) -// _mm_cvtsd_f64 does not exist on PGI compilers -// DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; _mm_store_sd(&a,x); a; })) -// DEFINE_FUNCTION_V_R(vec_elt0,(*(CCTK_REAL const*)&x)) -// This generates the fastest code with PGI compilers -DEFINE_FUNCTION_V_R(vec_elt0,({ CCTK_REAL a; asm ("" : "=x" (a) : "0" (x)); a; })) -#else -DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) // this is a no-op -#endif -DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_unpackhi_pd(x,x))) - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -// Call the implementation with the modulus -#define vec_loadu_maybe(off,p) \ - (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) -#define vec_loadu_maybe3(off1,off2,off3,p) \ - (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ - (off2)&(CCTK_REAL_VEC_SIZE-1), \ - (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_storeu,_mm_storeu_pd(&p,x)) -#if defined(KRANC_CACHE) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) -#else -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_store_pd(&p,x)) -#endif - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: _mm_storel_pd(&p,x); break; - default: assert(0); - } -} -static inline -void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: _mm_storeh_pd((&p)+1,x); break; - default: assert(0); - } -} - - - -// Functions and operators - -// Single-argument operators -#if 0 -DEFINE_FUNCTION_V_V(operator+,x) -static CCTK_REAL_VEC const vec_neg_mask = - (CCTK_REAL_VEC::V)(__m128i) { 0x8000000000000000ULL, 0x8000000000000000ULL }; -DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask)) -#endif -DEFINE_FUNCTION_V_V(operator+,+x.v) -DEFINE_FUNCTION_V_V(operator-,-x.v) - -// Double-argument operators, both vectors -#if 0 -DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) -#endif -DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) -DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) -DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) -DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Cheap functions -#if defined(__PGI) && defined (__amd64__) -// The PGI compiler does not understand __m128d literals -static union { - CCTK_REAL_VEC::S s[CCTK_REAL_VEC_SIZE]; - CCTK_REAL_VEC::V v; -} vec_fabs_mask_impl = { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; -# define vec_fabs_mask (vec_fabs_mask_impl.v) -#else -static CCTK_REAL_VEC const vec_fabs_mask = - (CCTK_REAL_VEC::V)(__m128i) { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL }; -#endif -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) - - - -#undef Sign -#define Sign(x) (42) - -// #undef ToReal -// #define ToReal(x) vec_set1(x) - -#if defined(__PGI) && defined (__amd64__) -// Special case for PGI 9.0.4 to avoid an internal compiler error -#undef IfThen -static inline -CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) -{ - union { - __m128i vi; - CCTK_REAL_VEC::V v; - } mask; - mask.vi = _mm_set1_epi64x(-(long long)cond); - return _mm_or_pd(_mm_and_pd(x.v, mask.v), _mm_andnot_pd(mask.v, y.v)); -} -#endif diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh deleted file mode 100644 index 7e06017..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX-direct.hh +++ /dev/null @@ -1,111 +0,0 @@ -// Vectorise using IBM's Altivec - -// Use the type vector double directly, without introducing a wrapper class -// Use macros instead of inline functions - - - -#include - -// Vector type corresponding to CCTK_REAL -typedef vector double CCTK_REAL_VEC; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements - -#define vec_set1(a) (vec_splats(a)) -#if defined(__GNUC__) -// GNU doesn't support array indices on vectors -union vec_mask { - double elts[2]; - vector double v; -}; -# define vec_set(a,b) ({ vec_mask x_set; x_set.elts[0]=(a); x_set.elts[1]=(b); x_set.v; }) -#else -# define vec_set(a,b) ({ CCTK_REAL_VEC x_set; x_set[0]=(a); x_set[1]=(b); x_set; }) -#endif - -// Get a scalar from the vector -#if defined(__GNUC__) -// GNU doesn't support array indices on vectors -# define vec_elt0(x) ({ vec_mask x_elt0; x_elt0.v=(x); x_elt0.elts[0]; }) -# define vec_elt1(x) ({ vec_mask x_elt1; x_elt1.v=(x); x_elt1.elts[1]; }) -#else -# define vec_elt0(x) ((x)[0]) -# define vec_elt1(x) ((x)[1]) -#endif - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -#define vec_load(p) (*(CCTK_REAL_VEC const*)&(p)) -#define vec_loadu(p) (vec_load(p)) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -#define vec_loadu_maybe(off,p) (vec_load(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -#define vec_store(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) -#define vec_storeu(p,x) (*(CCTK_REAL_VEC*)&(p)=(x)) -// TODO: Use stvxl instruction? -#define vec_store_nta(p,x) vec_store(p,x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -#define vec_store_nta_partial_lo(p,x,n) ((p)=vec_elt0(x)) -#define vec_store_nta_partial_hi(p,x,n) ((&(p))[1]=vec_elt1(x)) - - - -// Functions and operators - -// Other Altivec functions are: -// nabs: -abs a -// madd msub nmadd nmsub: [+-]a*b[+-]c - -// Triple-argument operators, all vectors -#undef fmadd -#undef fmsub -#undef fnmadd -#undef fnmsub -#define fmadd(x,y,z) (vec_madd(x,y,z)) -#define fmsub(x,y,z) (vec_msub(x,y,z)) -#define fnmadd(x,y,z) (vec_nmadd(x,y,z)) -#define fnmsub(x,y,z) (vec_nmsub(x,y,z)) - -// Cheap functions -#undef kfabs -#undef kfmax -#undef kfmin -#define kfabs(x) (vec_abs(x)) -#define kfmax(x,y) (vec_max(x,y)) -#define kfmin(x,y) (vec_min(x,y)) - -// Expensive functions -#undef kexp -#undef klog -#undef kpow -#undef ksqrt -#define kexp(x_) ({ CCTK_REAL_VEC const x_exp=(x_); vec_set(exp(vec_elt0(x_exp)),exp(vec_elt1(x_exp))); }) -#define klog(x_) ({ CCTK_REAL_VEC const x_log=(x_); vec_set(log(vec_elt0(x_log)),log(vec_elt1(x_log))); }) -#define kpow(x_,a_) ({ CCTK_REAL_VEC const x_pow=(x_); CCTK_REAL const a_pow=(a_); vec_set(pow(vec_elt0(x_pow),a_pow),pow(vec_elt1(x_pow),a_pow)); }) -#define ksqrt(x_) ({ CCTK_REAL_VEC const x_sqrt=(x_); vec_set(sqrt(vec_elt0(x_sqrt)),sqrt(vec_elt1(x_sqrt))); }) - - - -#undef Sign -#define Sign(x) (42) - -#undef ToReal -#define ToReal(x) (vec_set1((CCTK_REAL)(x))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh deleted file mode 100644 index 3fc97f6..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-VSX.hh +++ /dev/null @@ -1,212 +0,0 @@ -// Vectorise using IBM's Altivec - - - -#include - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef vector double V; - V v; - union vec_mask { - S elts[2]; - V v; - }; - - // Set a vector from scalars -#if 0 - // IBM - inline CCTK_REAL_VEC(S const a, S const b) { v[0]=a; v[1]=b; } -#endif -#if 0 - inline CCTK_REAL_VEC(S const a, S const b): - v(vec_mergel(vec_splats(a), vec_splats(b))) { } -#endif - inline CCTK_REAL_VEC(S const a, S const b) - { - vec_mask x; - x.elts[0] = a; - x.elts[1] = b; - v = x.v; - } - - // Set a vector from a scalar, replicating the scalar - // Note: Could also use vec_xlds instead - inline CCTK_REAL_VEC(S const a): v(vec_splats(a)) { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -// Create vectors, extract vector elements -DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) -DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) - -// Get a scalar from the vector -#if 0 -// IBM -DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) -DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) -#endif -static inline CCTK_REAL vec_elt0(CCTK_REAL_VEC const x) -{ - CCTK_REAL_VEC::vec_mask x1; - x1.v = x; - return x1.elts[0]; -} -static inline CCTK_REAL vec_elt1(CCTK_REAL_VEC const x) -{ - CCTK_REAL_VEC::vec_mask x1; - x1.v = x; - return x1.elts[1]; -} - - - -// Load and store vectors - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,p) -#if 0 -// IBM -DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast(&p))) -#endif -DEFINE_FUNCTION_PR_V(vec_loadu,p) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -// Call the implementation with the modulus -#define vec_loadu_maybe(off,p) \ - (vec_loadu_maybe_impl<(off)&(CCTK_REAL_VEC_SIZE-1>(p))) -#define vec_loadu_maybe3(off1,off2,off3,p) \ - (vec_loadu_maybe_impl3<(off1)&(CCTK_REAL_VEC_SIZE-1), \ - (off2)&(CCTK_REAL_VEC_SIZE-1), \ - (off3)&(CCTK_REAL_VEC_SIZE-1)>(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) -DEFINE_FUNCTION_PRV(vec_storeu,*(CCTK_REAL_VEC::V*)&p=x) -// TODO: Use stvxl instruction? -DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_store_nta_partial_lo (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: p=vec_elt0(x); break; - default: assert(0); - } -} -static inline -void vec_store_nta_partial_hi (CCTK_REAL& p, CCTK_REAL_VEC const x, int const n) -{ - switch (n) { - case 1: (&p)[1]=vec_elt1(x); break; - default: assert(0); - } -} - - - -// Functions and operators - -// Other Altivec functions are: -// nabs: -abs a -// madd msub nmadd nmsub: [+-]a*b[+-]c - -// Single-argument operators -#if 0 -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) -#endif -DEFINE_FUNCTION_V_V(operator+,+x.v) -DEFINE_FUNCTION_V_V(operator-,-x.v) - -// Double-argument operators, both vectors -#if 0 -DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) -DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) -DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) -DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) -#endif -DEFINE_FUNCTION_VV_V(operator+,x.v+y.v) -DEFINE_FUNCTION_VV_V(operator-,x.v-y.v) -DEFINE_FUNCTION_VV_V(operator*,x.v*y.v) -DEFINE_FUNCTION_VV_V(operator/,x.v/y.v) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Triple-argument operators, all vectors -#undef fmadd -#undef fmsub -#undef fnmadd -#undef fnmsub -DEFINE_FUNCTION_VVV_V(fmadd,vec_madd(x.v,y.v,z.v)) -DEFINE_FUNCTION_VVV_V(fmsub,vec_msub(x.v,y.v,z.v)) -DEFINE_FUNCTION_VVV_V(fnmadd,vec_nmadd(x.v,y.v,z.v)) -DEFINE_FUNCTION_VVV_V(fnmsub,vec_nmsub(x.v,y.v,z.v)) - -// Cheap functions -DEFINE_FUNCTION_V_V(fabs,vec_abs(x.v)) -DEFINE_FUNCTION_VV_V(fmax,vec_max(x.v,y.v)) -DEFINE_FUNCTION_VV_V(fmin,vec_min(x.v,y.v)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) -DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) - - - -#undef Sign -#define Sign(x) (42) - -// #undef ToReal -// #define ToReal(x) (vec_set1(x)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh deleted file mode 100644 index f928ed8..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-default.hh +++ /dev/null @@ -1,31 +0,0 @@ -// Fallback vectorisation implementation: Do not vectorise - - - -// Use CCTK_REAL -typedef CCTK_REAL CCTK_REAL_VEC; - -// Number of vector elements in a CCTK_REAL_VEC -static int const CCTK_REAL_VEC_SIZE = 1; - - - -// We use macros here, so that we are not surprised by compilers which -// don't like to inline functions (e.g. PGI). This should also make -// debug builds (which may not inline) more efficient. - -#define vec_load(p) (p) -#define vec_loadu(p) (p) - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -#define vec_loadu_maybe(off,p) (p) -#define vec_loadu_maybe3(off1,off2,off3,p) (p) - -#define vec_store(p,x) ((p)=(x)) -#define vec_store_nta(p,x) ((p)=(x)) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -#define vec_store_nta_partial_lo(p,x,n) (assert(0)) -#define vec_store_nta_partial_hi(p,x,n) (assert(0)) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh deleted file mode 100644 index f5c0b22..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-define.hh +++ /dev/null @@ -1,104 +0,0 @@ -// Define some macros that simplify defining short function that are -// supposed to be inlined - - - -// Letters defining the prototype (argument and return value types): -// I: i,j: integer -// R: a,b: real -// V: x,y: vector (of real) -// P: p,q: pointer (i.e. const reference) to something -// L: l,m: L-value (i.e. non-const reference) to something - - - -// Load and store - -#define DEFINE_FUNCTION_PR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& p) \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_PRV(name,expr) \ -static inline \ -void name (CCTK_REAL& p, CCTK_REAL_VEC const x) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_PVR(name,expr) \ -static inline \ -void name (CCTK_REAL_VEC& p, CCTK_REAL const a) \ -{ \ - expr; \ -} - - - -// Functions and operators - -#define DEFINE_FUNCTION_V_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_V_R(name,expr) \ -static inline \ -CCTK_REAL name (CCTK_REAL_VEC const x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_R_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL const a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL_VEC const x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const a, CCTK_REAL const b) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VVV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const x, CCTK_REAL_VEC const y, CCTK_REAL_VEC const z) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh deleted file mode 100644 index df83b3a..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-outdated.hh +++ /dev/null @@ -1,591 +0,0 @@ -#ifndef VECTORS_HH -#define VECTORS_HH - - - -// Vectorisation - -#include -#include -#include - -#include - - - -// I: i,j: integer -// R: a,b: real -// V: x,y: vector (of real) -// P: p,q: pointer (i.e. const reference) to something -// L: l,m: L-value (i.e. non-const reference) to something - -#define DEFINE_FUNCTION_PR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& p) \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_PRV(name,expr) \ -static inline \ -void name (CCTK_REAL& p, CCTK_REAL_VEC const& x) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_PVR(name,expr) \ -static inline \ -void name (CCTK_REAL_VEC& p, CCTK_REAL const& a) \ -{ \ - expr; \ -} - -#define DEFINE_FUNCTION_V_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_V_R(name,expr) \ -static inline \ -CCTK_REAL name (CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_R_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL_VEC const& y) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_VR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL_VEC const& x, CCTK_REAL const& a) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RV_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL_VEC const& x) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - -#define DEFINE_FUNCTION_RR_V(name,expr) \ -static inline \ -CCTK_REAL_VEC name (CCTK_REAL const& a, CCTK_REAL const& b) \ - CCTK_ATTRIBUTE_PURE \ -{ \ - return expr; \ -} - - - -// Intel, double -#if defined(KRANC_VECTORS) && defined(__SSE2__) && defined(CCTK_REAL_PRECISION_8) - -#include - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef __m128d V; - V v; - - // Set a vector from scalars - inline CCTK_REAL_VEC(S const& a, S const& b): v(_mm_set_pd(b,a)) { } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(S const& a): v(_mm_set1_pd(a)) { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const& v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -union vec_mask { - unsigned long long bits[2]; - CCTK_REAL_VEC::V v; -}; - -DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) -DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) - -// Get a scalar from the vector -#if defined(__PGI) && defined (__amd64__) -// _mm_cvtsd_f64 does not exist on PGI compilers -static inline -CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) -{ - CCTK_REAL a; _mm_store_sd(&a,x); return a; -} -#else -DEFINE_FUNCTION_V_R(vec_elt0,_mm_cvtsd_f64(x)) //this is a no-op -#endif - -#if 0 -DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) -#endif -static inline -CCTK_REAL vec_elt1 (CCTK_REAL_VEC const& x) -{ - CCTK_REAL a; _mm_storeh_pd(&a,x); return a; -} - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) - -#if 0 -// Load a partial vector (duplicating the last loaded element to fill -// the remaining elements) -// TODO: Should this be aligned or unaligned? -static inline -CCTK_REAL_VEC vec_load_partial (CCTK_REAL const& p, int const n) -{ - switch (n) { - case 1: return _mm_load1_pd(p); - default: assert(0); - } -} -#endif - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl3,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl<0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -template<> -inline -CCTK_REAL_VEC vec_loadu_maybe_impl3<0,0,0> (CCTK_REAL const& p) -{ - return vec_load(p); -} -// Call the implementation with the modulus -template -static inline -CCTK_REAL_VEC vec_loadu_maybe (CCTK_REAL const& p) -{ - return vec_loadu_maybe_impl(p); -} -template -static inline -CCTK_REAL_VEC vec_loadu_maybe3 (CCTK_REAL const& p) -{ - return vec_loadu_maybe_impl3(p); -} - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: _mm_storel_pd(&p,x); break; - default: assert(0); - } -} -static inline -void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: _mm_storeh_pd((&p)+1,x); break; - default: assert(0); - } -} - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,_mm_add_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator-,_mm_sub_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator*,_mm_mul_pd(x,y)) -DEFINE_FUNCTION_VV_V(operator/,_mm_div_pd(x,y)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -#if 0 -DEFINE_FUNCTION_V_V(operator-,vec_set(0.0,0.0)-x) -#endif -static vec_mask const vec_neg_mask = -{ { 0x8000000000000000ULL, 0x8000000000000000ULL } }; -DEFINE_FUNCTION_V_V(operator-,_mm_xor_pd(x,vec_neg_mask.v)) - -// Cheap functions -static vec_mask const vec_fabs_mask = -{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) - -// Special case for PGI to avoid internal compiler error -#if defined(__PGI) && defined (__amd64__) -#undef IfThen -CCTK_REAL_VEC IfThen (bool const cond, CCTK_REAL_VEC const& x, CCTK_REAL_VEC co\ -nst& y) -{ - return cond*x + (not cond)*y; -} -#endif - - - -#if 0 -// Try to use the __m128d type directly. - -// This does not really work, because it is not possible to define -// automatic conversion operators from double to __m128d, so that -// explicit conversions are required. This makes the code look more -// clumsy. - -// Vector type corresponding to CCTK_REAL -typedef __m128d CCTK_REAL_VEC; - -DEFINE_FUNCTION_R_V(vec_set1,_mm_set1_pd(a)) -DEFINE_FUNCTION_RR_V(vec_set,_mm_set_pd(b,a)) - -// Get a scalar from the vector -static inline -CCTK_REAL vec_elt0 (CCTK_REAL_VEC const& x) -{ -#if 0 - // _mm_cvtsd_f64 does not exist on PGI compilers - return _mm_cvtsd_f64(x); // this is a no-op -#endif - CCTK_REAL a; _mm_store_sd(&a,x); return a; -} - -DEFINE_FUNCTION_V_R(vec_elt1,vec_elt0(_mm_shuffle_pd(x,x,_MM_SHUFFLE2(1,1)))) - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,_mm_load_pd(&p)) -DEFINE_FUNCTION_PR_V(vec_loadu,_mm_loadu_pd(&p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,_mm_store_pd(&p,x)) -DEFINE_FUNCTION_PRV(vec_store_nta,_mm_stream_pd(&p,x)) - -// Cheap functions -static vec_mask const vec_fabs_mask = -{ { 0x7fffffffffffffffULL, 0x7fffffffffffffffULL } }; -DEFINE_FUNCTION_V_V(fabs,_mm_and_pd(x,vec_fabs_mask.v)) -DEFINE_FUNCTION_VV_V(fmax,_mm_max_pd(x,y)) -DEFINE_FUNCTION_VV_V(fmin,_mm_min_pd(x,y)) -DEFINE_FUNCTION_V_V(sqrt,_mm_sqrt_pd(x)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) - -#endif - - - -// Intel, float -#elif defined(KRANC_VECTORS) && defined(__SSE__) && defined(CCTK_REAL_PRECISION_4) - -#include - -// A vector type corresponding to CCTK_REAL -typedef __m128 CCTK_REAL_VEC; - - - -// Power, double -#elif defined(KRANC_VECTORS) && defined(__ALTIVEC__) && defined(_ARCH_PWR7) && defined(CCTK_REAL_PRECISION_8) - -#include - -// Vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - typedef double S; - typedef vector double V; - V v; - - // vec_insert, vec_extract, vec_splat - - // Set a vector from scalars - inline CCTK_REAL_VEC(S const& a, S const& b) { v[0]=a; v[1]=b; } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(S const& a): v(vec_splats(a)) { } - - // Convert from and to the underlying vector type - inline CCTK_REAL_VEC(V const& v_): v(v_) { } - inline operator V const() const { return v; } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x) { } -}; - -DEFINE_FUNCTION_R_V(vec_set1,CCTK_REAL_VEC(a)) -DEFINE_FUNCTION_RR_V(vec_set,CCTK_REAL_VEC(a,b)) - -// Get a scalar from the vector -DEFINE_FUNCTION_V_R(vec_elt0,x.v[0]) -DEFINE_FUNCTION_V_R(vec_elt1,x.v[1]) - -// Load a vector from memory (aligned and unaligned); this loads from -// a reference to a scalar -DEFINE_FUNCTION_PR_V(vec_load,p) -DEFINE_FUNCTION_PR_V(vec_loadu,vec_xld2(0,const_cast(&p))) -// vec_xlds - -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl,vec_loadu(p)) -// Implementation: load aligned if the modulus is zero -#define static -template<> -DEFINE_FUNCTION_PR_V(vec_loadu_maybe_impl<0>,vec_load(p)) -#undef static -// Call the implementation with the modulus -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe,vec_loadu_maybe_impl(p)) - -// Store a vector to memory (aligned and non-temporal); this stores to -// a reference to a scalar -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC::V*)&p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,*(CCTK_REAL_VEC::V*)&p=x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: p=x.v[0]; break; - default: assert(0); - } -} -static inline -void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - switch (n) { - case 1: (&p)[1]=x.v[1]; break; - default: assert(0); - } -} - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,vec_add(x,y)) -DEFINE_FUNCTION_VV_V(operator-,vec_sub(x,y)) -DEFINE_FUNCTION_VV_V(operator*,vec_mul(x,y)) -DEFINE_FUNCTION_VV_V(operator/,vec_div(x,y)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,x+vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator-,x-vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator*,x*vec_set1(a)) -DEFINE_FUNCTION_VR_V(operator/,x/vec_set1(a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,vec_set1(a)+x) -DEFINE_FUNCTION_RV_V(operator-,vec_set1(a)-x) -DEFINE_FUNCTION_RV_V(operator*,vec_set1(a)*x) -DEFINE_FUNCTION_RV_V(operator/,vec_set1(a)/x) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,vec_neg(x)) - -// Cheap functions -DEFINE_FUNCTION_V_V(fabs,vec_abs(x)) -DEFINE_FUNCTION_VV_V(fmax,vec_max(x,y)) -DEFINE_FUNCTION_VV_V(fmin,vec_min(x,y)) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,vec_set(exp(vec_elt0(x)),exp(vec_elt1(x)))) -DEFINE_FUNCTION_V_V(log,vec_set(log(vec_elt0(x)),log(vec_elt1(x)))) -DEFINE_FUNCTION_VR_V(pow,vec_set(pow(vec_elt0(x),a),pow(vec_elt1(x),a))) -DEFINE_FUNCTION_V_V(sqrt,vec_set(sqrt(vec_elt0(x)),sqrt(vec_elt1(x)))) - - - -// Fallback: pseudo-vectorisation -#elif 0 - -// There is no vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - CCTK_REAL v, w; - - // Set a vector from scalars - inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x): v(x.v), w(x.w) { } -}; - - - -DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) -DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -template -DEFINE_FUNCTION_PR_V(vec_loadm,vec_load(p)) - -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v+y.v,x.w+y.w)) -DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v-y.v,x.w-y.w)) -DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v*y.v,x.w*y.w)) -DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v/y.v,x.w/y.w)) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v+a,x.w+a)) -DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v-a,x.w-a)) -DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v*a,x.w*a)) -DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v/a,x.w/a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v,a+x.w)) -DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v,a-x.w)) -DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v,a*x.w)) -DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v,a/x.w)) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v,-x.w)) - -// Cheap functions -DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v),fabs(x.w))) -DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v,y.v),fmax(x.w,y.w))) -DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v,y.v),fmin(x.w,y.w))) -DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v),sqrt(x.w))) - -// Expensive functions -DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v),exp(x.w))) -DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v),log(x.w))) -DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v,a),pow(x.w,a))) - - - -// Fallback: no vectorisation -#else - -// There is no vector type corresponding to CCTK_REAL -typedef CCTK_REAL CCTK_REAL_VEC; - - - -DEFINE_FUNCTION_PR_V(vec_load,p) -DEFINE_FUNCTION_PR_V(vec_loadu,p) -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -// Implementation: default to unaligned load -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe,p) -template -DEFINE_FUNCTION_PR_V(vec_loadu_maybe3,p) - -DEFINE_FUNCTION_PRV(vec_store,p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,p=x) - -// Store a lower or higher partial vector (aligned and non-temporal); -// the non-temporal hint is probably ignored -static inline -void vec_storel_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - assert(0); -} -static inline -void vec_storeh_partial (CCTK_REAL& p, CCTK_REAL_VEC const& x, int const n) -{ - assert(0); -} - - - -#endif - - - -#undef DEFINE_FUNCTION_PR_V -#undef DEFINE_FUNCTION_PRV -#undef DEFINE_FUNCTION_V_V -#undef DEFINE_FUNCTION_R_V -#undef DEFINE_FUNCTION_VV_V -#undef DEFINE_FUNCTION_VR_V -#undef DEFINE_FUNCTION_RV_V -#undef DEFINE_FUNCTION_RR_V - - - -// Number of vector elements in a CCTK_REAL_VEC -static -int const CCTK_REAL_VEC_SIZE = sizeof(CCTK_REAL_VEC) / sizeof(CCTK_REAL); - - - -#endif // #ifndef VECTORS_HH diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh deleted file mode 100644 index f439c9b..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-pseudo.hh +++ /dev/null @@ -1,72 +0,0 @@ -// Pseudo vectorisation using scalar operations - - - -// Number of vector elements in a CCTK_REAL_VEC -static int const CCTK_REAL_VEC_SIZE = 2; - -// There is no vector type corresponding to CCTK_REAL -struct CCTK_REAL_VEC { - // The underlying scalar and vector types - CCTK_REAL v[CCTK_REAL_VEC_SIZE]; - - // Set a vector from scalars - inline CCTK_REAL_VEC(CCTK_REAL const& a, CCTK_REAL const& b): v(a), w(b) { } - - // Set a vector from a scalar, replicating the scalar - inline CCTK_REAL_VEC(CCTK_REAL const& a): v(a), w(a) { } - - inline CCTK_REAL_VEC() { } - - // Copy constructor - inline CCTK_REAL_VEC(CCTK_REAL_VEC const& x) { v[0]=x.v[0]; v[1]=x.v[1]; } -}; - - - -// Load and store vectors - -DEFINE_FUNCTION_PR_V(vec_load,*(CCTK_REAL_VEC const* restrict)&p) -DEFINE_FUNCTION_PR_V(vec_loadu,vec_load(p)) -// Load a vector from memory that may or may not be aligned, as -// decided by the offset off and the vector size -#define vec_loadu_maybe(off,p) (vec_load(p)) -#define vec_loadu_maybe3(off1,off2,off3,p) (vec_load(p)) - -DEFINE_FUNCTION_PRV(vec_store,*(CCTK_REAL_VEC* restrict)&p=x) -DEFINE_FUNCTION_PRV(vec_store_nta,vec_store(p,x)) - - - -// Functions and operators - -// Double-argument operators, both vectors -DEFINE_FUNCTION_VV_V(operator+,CCTK_REAL_VEC(x.v[0]+y.v[0],x.v[1]+y.v[1])) -DEFINE_FUNCTION_VV_V(operator-,CCTK_REAL_VEC(x.v[0]-y.v[0],x.v[1]-y.v[1])) -DEFINE_FUNCTION_VV_V(operator*,CCTK_REAL_VEC(x.v[0]*y.v[0],x.v[1]*y.v[1])) -DEFINE_FUNCTION_VV_V(operator/,CCTK_REAL_VEC(x.v[0]/y.v[0],x.v[1]/y.v[1])) - -// Double-argument operators, vector and scalar -DEFINE_FUNCTION_VR_V(operator+,CCTK_REAL_VEC(x.v[0]+a,x.v[1]+a)) -DEFINE_FUNCTION_VR_V(operator-,CCTK_REAL_VEC(x.v[0]-a,x.v[1]-a)) -DEFINE_FUNCTION_VR_V(operator*,CCTK_REAL_VEC(x.v[0]*a,x.v[1]*a)) -DEFINE_FUNCTION_VR_V(operator/,CCTK_REAL_VEC(x.v[0]/a,x.v[1]/a)) - -// Double-argument operators, scalar and vector -DEFINE_FUNCTION_RV_V(operator+,CCTK_REAL_VEC(a+x.v[0],a+x.v[1])) -DEFINE_FUNCTION_RV_V(operator-,CCTK_REAL_VEC(a-x.v[0],a-x.v[1])) -DEFINE_FUNCTION_RV_V(operator*,CCTK_REAL_VEC(a*x.v[0],a*x.v[1])) -DEFINE_FUNCTION_RV_V(operator/,CCTK_REAL_VEC(a/x.v[0],a/x.v[1])) - -// Single-argument operators -DEFINE_FUNCTION_V_V(operator+,x) -DEFINE_FUNCTION_V_V(operator-,CCTK_REAL_VEC(-x.v[0],-x.v[1])) - -// Functions -DEFINE_FUNCTION_V_V(exp,CCTK_REAL_VEC(exp(x.v[0]),exp(x.v[1]))) -DEFINE_FUNCTION_V_V(fabs,CCTK_REAL_VEC(fabs(x.v[0]),fabs(x.v[1]))) -DEFINE_FUNCTION_VV_V(fmax,CCTK_REAL_VEC(fmax(x.v[0],y.v[0]),fmax(x.v[1],y.v[1]))) -DEFINE_FUNCTION_VV_V(fmin,CCTK_REAL_VEC(fmin(x.v[0],y.v[0]),fmin(x.v[1],y.v[1]))) -DEFINE_FUNCTION_V_V(log,CCTK_REAL_VEC(log(x.v[0]),log(x.v[1]))) -DEFINE_FUNCTION_VR_V(pow,CCTK_REAL_VEC(pow(x.v[0],a),pow(x.v[1],a))) -DEFINE_FUNCTION_V_V(sqrt,CCTK_REAL_VEC(sqrt(x.v[0]),sqrt(x.v[1]))) diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh deleted file mode 100644 index 0d950c7..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors-undefine.hh +++ /dev/null @@ -1,14 +0,0 @@ -// Undefine all macros defined in "Vectors-define.hh", so that we -// leave a clean namespace - - - -#undef DEFINE_FUNCTION_PR_V -#undef DEFINE_FUNCTION_PRV -#undef DEFINE_FUNCTION_V_V -#undef DEFINE_FUNCTION_R_V -#undef DEFINE_FUNCTION_VV_V -#undef DEFINE_FUNCTION_VR_V -#undef DEFINE_FUNCTION_RV_V -#undef DEFINE_FUNCTION_RR_V -#undef DEFINE_FUNCTION_VVV_V diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh deleted file mode 100644 index d32afb2..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/old/Vectors.hh +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef VECTORS_HH -#define VECTORS_HH - - - -// Vectorisation - -#include -#include -#include - -#include - - - -#include "Vectors-define.hh" - -#if defined(KRANC_VECTORS) -// Vectorise - -# if ! defined(CCTK_REAL_PRECISION_8) -# error "Vectorisation is currently only supported for double precision" -# endif - -# if defined(__SSE2__) // SSE2 (Intel) -# if defined(KRANC_DIRECT) -# include "Vectors-SSE2-direct.hh" -# else -# include "Vectors-SSE2.hh" -# endif -# elif defined(__ALTIVEC__) && defined(_ARCH_PWR7) // Altivec (Power) -# if defined(KRANC_DIRECT) -# include "Vectors-VSX-direct.hh" -# else -# include "Vectors-VSX.hh" -# endif -# else -# include "Vectors-pseudo.hh" -# endif - -#else -// Don't vectorise - -# include "Vectors-default.hh" - -#endif - -#include "Vectors-undefine.hh" - - - -#endif // #ifndef VECTORS_HH -- cgit v1.2.3 From 5bdd5ad6f48700cba0e6463a7cd24900beec219e Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Tue, 7 Dec 2010 17:04:34 +0100 Subject: Remove ancient unused code --- .../Cactus/KrancNumericalTools/GenericFD/param.ccl | 19 +- .../KrancNumericalTools/GenericFD/schedule.ccl | 13 - .../KrancNumericalTools/GenericFD/src/GenericFD.c | 14 - .../KrancNumericalTools/GenericFD/src/GenericFD.h | 660 --------------------- .../GenericFD/src/ParamCheck.F90 | 96 --- .../KrancNumericalTools/GenericFD/src/Startup.c | 60 -- .../GenericFD/src/make.code.defn | 3 +- .../KrancNumericalTools/GenericFD/src/testmacros.c | 23 - Tools/CodeGen/Param.m | 6 +- 9 files changed, 7 insertions(+), 887 deletions(-) delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/ParamCheck.F90 delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Startup.c delete mode 100644 Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/testmacros.c (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl index 430c2f2..91075c9 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl @@ -4,38 +4,29 @@ # $Header$ -private: - -KEYWORD FDscheme "Type of finite differencing to use" -{ - "2nd order centered macro" :: "centered 2nd order implemented with macros" - "4th order centered macro" :: "centered 4th order implemented with macros" -} "2nd order centered macro" - restricted: -CCTK_INT stencil_width "stencil width used near boundary" +CCTK_INT stencil_width "stencil width used near boundary DEPRECATED" { -1:* :: "outgoing characteristic speed > 0, default of -1 is intentionally invalid" } -1 -CCTK_INT stencil_width_x "stencil width used near boundary" +CCTK_INT stencil_width_x "stencil width used near boundary DEPRECATED" { -1:* :: "outgoing characteristic speed > 0, default of -1 is intentionally invalid" } -1 -CCTK_INT stencil_width_y "stencil width used near boundary" +CCTK_INT stencil_width_y "stencil width used near boundary DEPRECATED" { -1:* :: "outgoing characteristic speed > 0, default of -1 is intentionally invalid" } -1 -CCTK_INT stencil_width_z "stencil width used near boundary" +CCTK_INT stencil_width_z "stencil width used near boundary DEPRECATED" { -1:* :: "outgoing characteristic speed > 0, default of -1 is intentionally invalid" } -1 -CCTK_INT boundary_width "width of boundary (fix later to use Cactus boundary calls)" +CCTK_INT boundary_width "width of boundary (fix later to use Cactus boundary calls) DEPRECATED" { -1:* :: "Any integer" } 1 - diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/schedule.ccl b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/schedule.ccl index a013b00..05ca0e1 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/schedule.ccl +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/schedule.ccl @@ -3,16 +3,3 @@ # author: S. Husa # $Header$ - - - -schedule GenericFD_Startup at STARTUP -{ - LANG: C -} "Register Banner" - -# schedule GenericFD_ParamCheck at ParamCheck -# { -# LANG: Fortran -# } "check stencil width parameters for consistency" - diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c index 9084f65..f23f8dc 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c @@ -97,20 +97,6 @@ int GenericFD_GetBoundaryWidth(cGH const * restrict const cctkGH) return bw; } -/* int GenericFD_BoundaryWidthTable(cGH const * restrict const cctkGH) */ -/* { */ -/* int nboundaryzones[6]; */ -/* GenericFD_GetBoundaryWidth(cctkGH, nboundaryzones); */ - -/* int table = Util_TableCreate(0); */ -/* if (table < 0) CCTK_WARN(0, "Could not create table"); */ - -/* if (Util_TableSetIntArray(table, 6, nboundaryzones, "BOUNDARY_WIDTH") < 0) */ -/* CCTK_WARN(0, "Could not set table"); */ -/* return table; */ -/* } */ - - /* Return the array indices in imin and imax for looping over the interior of the grid. imin is the index of the first grid point. imax is the index of the last grid point plus 1. So a loop over diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h index 1e4faff..fc5a375 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h @@ -28,655 +28,14 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef NOPRECOMPUTE -#define PRECOMPUTE -#endif - -#if defined(FD_C0) || defined(FD_C2) || defined(FD_C4) || defined(FD_C2C4) -#define FD_SET_BY_USER -#endif -#ifndef FD_SET_BY_USER -#define FD_C2 -#endif - - -#if defined(FD_C0) -#define FD_METHOD_DESC "FD method: replace derivatives by zero" -#endif - -#if defined(FD_C2) -#define FD_METHOD_DESC "FD method: second order centered finite differences" -#endif - -#if defined(FD_C4) -#define FD_METHOD_DESC "FD method: fourth order centered finite differences" -#endif - -#if defined(FD_C2C4) -#define FD_METHOD_DESC "FD method: weighted 2nd/4th order centered finite differences" -#endif - - - -/* utility functions */ -#if defined(KRANC_C) -#define string(d,f) d ## f -#else -#define string(d,f) d/**/f -#endif - -#if defined(KRANC_C) -#define IMAX(int1, int2) ((int1) > (int2) ? (int1) : (int2)) -#endif - #include "MathematicaCompat.h" -/* finite differencing macros */ - -/* */ -/* add method argument to shorthands */ -/* */ - -/* third derivatives */ -#define D111x(gf) string(D111,gf) -#define D211x(gf) string(D211,gf) -#define D311x(gf) string(D311,gf) -#define D221x(gf) string(D221,gf) -#define D321x(gf) string(D321,gf) -#define D331x(gf) string(D331,gf) -#define D222x(gf) string(D222,gf) -#define D322x(gf) string(D322,gf) -#define D332x(gf) string(D332,gf) -#define D333x(gf) string(D333,gf) - -/* second derivatives */ -#define D11x(gf) string(D11,gf) -#define D22x(gf) string(D22,gf) -#define D33x(gf) string(D33,gf) -#define D21x(gf) string(D21,gf) -#define D32x(gf) string(D32,gf) -#define D31x(gf) string(D31,gf) - -/* first derivatives */ -#define D1x(gf) string(D1,gf) -#define D2x(gf) string(D2,gf) -#define D3x(gf) string(D3,gf) - -#ifdef PRECOMPUTE - -/* third derivatives */ -#define D111(gf) string(D111,gf) -#define D211(gf) string(D211,gf) -#define D311(gf) string(D311,gf) -#define D221(gf) string(D221,gf) -#define D321(gf) string(D321,gf) -#define D331(gf) string(D331,gf) -#define D222(gf) string(D222,gf) -#define D322(gf) string(D322,gf) -#define D332(gf) string(D332,gf) -#define D333(gf) string(D333,gf) - -/* second derivatives */ -#define D11(gf,i,j,k) string(D11,gf) -#define D22(gf,i,j,k) string(D22,gf) -#define D33(gf,i,j,k) string(D33,gf) -#define D21(gf,i,j,k) string(D21,gf) -#define D32(gf,i,j,k) string(D32,gf) -#define D31(gf,i,j,k) string(D31,gf) - -/* first derivatives */ -#define D1(gf,i,j,k) string(D1,gf) -#define D2(gf,i,j,k) string(D2,gf) -#define D3(gf,i,j,k) string(D3,gf) - -#else - -/* third derivatives */ -#define D111(gf) D111gf(gf,i,j,k) -#define D211(gf) D211gf(gf,i,j,k) -#define D311(gf) D311gf(gf,i,j,k) -#define D221(gf) D221gf(gf,i,j,k) -#define D321(gf) D321gf(gf,i,j,k) -#define D331(gf) D331gf(gf,i,j,k) -#define D222(gf) D222gf(gf,i,j,k) -#define D322(gf) D322gf(gf,i,j,k) -#define D332(gf) D332gf(gf,i,j,k) -#define D333(gf) D333gf(gf,i,j,k) - -/* second derivatives */ -#define D11(gf,i,j,k) D11gf(gf,i,j,k) -#define D22(gf,i,j,k) D22gf(gf,i,j,k) -#define D33(gf,i,j,k) D33gf(gf,i,j,k) -#define D21(gf,i,j,k) D21gf(gf,i,j,k) -#define D32(gf,i,j,k) D32gf(gf,i,j,k) -#define D31(gf,i,j,k) D31gf(gf,i,j,k) - -/* first derivatives */ -#define D1(gf,i,j,k) D1gf(gf, i,j,k) -#define D2(gf,i,j,k) D2gf(gf, i,j,k) -#define D3(gf,i,j,k) D3gf(gf, i,j,k) - -#endif - - -#ifdef FD_C0 -/* third derivatives */ -#define D111gf(gf,i,j,k) D111_c0(gf,i,j,k) -#define D211gf(gf,i,j,k) D211_c0(gf,i,j,k) -#define D311gf(gf,i,j,k) D311_c0(gf,i,j,k) -#define D221gf(gf,i,j,k) D221_c0(gf,i,j,k) -#define D321gf(gf,i,j,k) D321_c0(gf,i,j,k) -#define D331gf(gf,i,j,k) D331_c0(gf,i,j,k) -#define D222gf(gf,i,j,k) D222_c0(gf,i,j,k) -#define D322gf(gf,i,j,k) D322_c0(gf,i,j,k) -#define D332gf(gf,i,j,k) D332_c0(gf,i,j,k) -#define D333gf(gf,i,j,k) D333_c0(gf,i,j,k) - -/* second derivatives */ -#define D11gf(gf,i,j,k) D11_c0(gf,i,j,k) -#define D22gf(gf,i,j,k) D22_c0(gf,i,j,k) -#define D33gf(gf,i,j,k) D33_c0(gf,i,j,k) -#define D21gf(gf,i,j,k) D21_c0(gf,i,j,k) -#define D32gf(gf,i,j,k) D32_c0(gf,i,j,k) -#define D31gf(gf,i,j,k) D31_c0(gf,i,j,k) - -/* first derivatives */ -#define D1gf(gf,i,j,k) D1_c0(gf, i,j,k) -#define D2gf(gf,i,j,k) D2_c0(gf, i,j,k) -#define D3gf(gf,i,j,k) D3_c0(gf, i,j,k) -#endif - - - -#ifdef FD_C2 -/* third derivatives */ -#define D111gf(gf,i,j,k) D111_c2(gf,i,j,k) -#define D211gf(gf,i,j,k) D211_c2(gf,i,j,k) -#define D311gf(gf,i,j,k) D311_c2(gf,i,j,k) -#define D221gf(gf,i,j,k) D221_c2(gf,i,j,k) -#define D321gf(gf,i,j,k) D321_c2(gf,i,j,k) -#define D331gf(gf,i,j,k) D331_c2(gf,i,j,k) -#define D222gf(gf,i,j,k) D222_c2(gf,i,j,k) -#define D322gf(gf,i,j,k) D322_c2(gf,i,j,k) -#define D332gf(gf,i,j,k) D332_c2(gf,i,j,k) -#define D333gf(gf,i,j,k) D333_c2(gf,i,j,k) - -/* second derivatives */ -#define D11gf(gf,i,j,k) D11_c2(gf,i,j,k) -#define D22gf(gf,i,j,k) D22_c2(gf,i,j,k) -#define D33gf(gf,i,j,k) D33_c2(gf,i,j,k) -#define D21gf(gf,i,j,k) D21_c2(gf,i,j,k) -#define D32gf(gf,i,j,k) D32_c2(gf,i,j,k) -#define D31gf(gf,i,j,k) D31_c2(gf,i,j,k) - -/* first derivatives */ -#define D1gf(gf,i,j,k) D1_c2(gf, i,j,k) -#define D2gf(gf,i,j,k) D2_c2(gf, i,j,k) -#define D3gf(gf,i,j,k) D3_c2(gf, i,j,k) -#endif - - - -#ifdef FD_C4 -/* third derivatives */ -#define D111gf(gf,i,j,k) D111_c4(gf,i,j,k) -#define D211gf(gf,i,j,k) D211_c4(gf,i,j,k) -#define D311gf(gf,i,j,k) D311_c4(gf,i,j,k) -#define D221gf(gf,i,j,k) D221_c4(gf,i,j,k) -#define D321gf(gf,i,j,k) D321_c4(gf,i,j,k) -#define D331gf(gf,i,j,k) D331_c4(gf,i,j,k) -#define D222gf(gf,i,j,k) D222_c4(gf,i,j,k) -#define D322gf(gf,i,j,k) D322_c4(gf,i,j,k) -#define D332gf(gf,i,j,k) D332_c4(gf,i,j,k) -#define D333gf(gf,i,j,k) D333_c4(gf,i,j,k) - -/* second derivatives */ -#define D11gf(gf,i,j,k) D11_c4(gf,i,j,k) -#define D22gf(gf,i,j,k) D22_c4(gf,i,j,k) -#define D33gf(gf,i,j,k) D33_c4(gf,i,j,k) -#define D21gf(gf,i,j,k) D21_c4(gf,i,j,k) -#define D32gf(gf,i,j,k) D32_c4(gf,i,j,k) -#define D31gf(gf,i,j,k) D31_c4(gf,i,j,k) - -/* first derivatives */ -#define D1gf(gf,i,j,k) D1_c4(gf, i,j,k) -#define D2gf(gf,i,j,k) D2_c4(gf, i,j,k) -#define D3gf(gf,i,j,k) D3_c4(gf, i,j,k) -#endif - - -#ifdef FD_C2C4 -/* third derivatives */ -#define D111gf(gf,i,j,k) D111_c2c4(gf,i,j,k) -#define D211gf(gf,i,j,k) D211_c2c4(gf,i,j,k) -#define D311gf(gf,i,j,k) D311_c2c4(gf,i,j,k) -#define D221gf(gf,i,j,k) D221_c2c4(gf,i,j,k) -#define D321gf(gf,i,j,k) D321_c2c4(gf,i,j,k) -#define D331gf(gf,i,j,k) D331_c2c4(gf,i,j,k) -#define D222gf(gf,i,j,k) D222_c2c4(gf,i,j,k) -#define D322gf(gf,i,j,k) D322_c2c4(gf,i,j,k) -#define D332gf(gf,i,j,k) D332_c2c4(gf,i,j,k) -#define D333gf(gf,i,j,k) D333_c2c4(gf,i,j,k) - -/* second derivatives */ -#define D11gf(gf,i,j,k) D11_c2c4(gf,i,j,k) -#define D22gf(gf,i,j,k) D22_c2c4(gf,i,j,k) -#define D33gf(gf,i,j,k) D33_c2c4(gf,i,j,k) -#define D21gf(gf,i,j,k) D21_c2c4(gf,i,j,k) -#define D32gf(gf,i,j,k) D32_c2c4(gf,i,j,k) -#define D31gf(gf,i,j,k) D31_c2c4(gf,i,j,k) - -/* first derivatives */ -#define D1gf(gf,i,j,k) D1_c2c4(gf, i,j,k) -#define D2gf(gf,i,j,k) D2_c2c4(gf, i,j,k) -#define D3gf(gf,i,j,k) D3_c2c4(gf, i,j,k) -#endif - - - - -/*****************************************************/ -/* */ -/* METHODS */ -/* */ -/*****************************************************/ - -/* c0 */ - -/* set all derivatives = 0 */ -/* for debugging and benchmarking */ - -/* third derivatives */ - -#define D111_c0(gf,i,j,k) 0. -#define D211_c0(gf,i,j,k) 0. -#define D311_c0(gf,i,j,k) 0. -#define D221_c0(gf,i,j,k) 0. -#define D321_c0(gf,i,j,k) 0. -#define D331_c0(gf,i,j,k) 0. -#define D222_c0(gf,i,j,k) 0. -#define D322_c0(gf,i,j,k) 0. -#define D332_c0(gf,i,j,k) 0. -#define D333_c0(gf,i,j,k) 0. - -/* second derivatives */ - -#define D11_c0(gf,i,j,k) 0. -#define D22_c0(gf,i,j,k) 0. -#define D33_c0(gf,i,j,k) 0. -#define D21_c0(gf,i,j,k) 0. -#define D32_c0(gf,i,j,k) 0. -#define D31_c0(gf,i,j,k) 0. - -/* first derivatives */ - -#define D1_c0(gf,i,j,k) 0. -#define D2_c0(gf,i,j,k) 0. -#define D3_c0(gf,i,j,k) 0. - - - -#ifndef KRANC_C - -/* c2 */ -/* */ -/* 2nd order centered */ -/* */ - -/* third derivatives, centered, 2nd order */ - -#define D111_c2(gf,i,j,k) ((- gf(i+2,j,k) + 2*gf(i+1,j,k) - 2*gf(i-1,j,k) + gf(i-2,j,k)) * dxi*dxi*dxi * (1.0/2.0)) -#define D211_c2(gf,i,j,k) ((gf(i+1,j+1,k) - 2*gf(i,j+1,k) + gf(i-1,j+1,k) - gf(i+1,j-1,k) + 2*gf(i,j-1,k) - gf(i-1,j-1,k)) * dxi*dxi*dyi * (1.0/2.0)) -#define D311_c2(gf,i,j,k) ((gf(i+1,j,k+1) - 2*gf(i,j,k+1) + gf(i-1,j,k+1) - gf(i+1,j,k-1) + 2*gf(i,j,k-1) - gf(i-1,j,k-1)) * dxi*dxi*dzi * (1.0/2.0)) -#define D221_c2(gf,i,j,k) ((gf(i+1,j+1,k) - 2*gf(i+1,j,k) + gf(i+1,j-1,k) - gf(i-1,j+1,k) + 2*gf(i-1,j,k) - gf(i-1,j-1,k)) * dxi*dyi*dyi * (1.0/2.0)) -#define D321_c2(gf,i,j,k) ((gf(i+1,j+1,k+1) - gf(i-1,j+1,k+1) - gf(i+1,j-1,k+1) + gf(i-1,j-1,k+1) - gf(i+1,j+1,k-1) + gf(i-1,j+1,k-1) + gf(i+1,j-1,k-1) - gf(i-1,j-1,k-1)) * dxi*dyi*dzi * (1.0/8.0)) -#define D331_c2(gf,i,j,k) ((gf(i+1,j,k+1) - 2*gf(i+1,j,k) + gf(i+1,j,k-1) - gf(i-1,j,k+1) + 2*gf(i-1,j,k) - gf(i-1,j,k-1)) * dxi*dzi*dzi * (1.0/2.0)) -#define D222_c2(gf,i,j,k) ((- gf(i,j+2,k) + 2*gf(i,j+1,k) - 2*gf(i,j-1,k) + gf(i,j-2,k)) * dyi*dyi*dyi * (1.0/2.0)) -#define D322_c2(gf,i,j,k) ((gf(i,j+1,k+1) - 2*gf(i,j,k+1) + gf(i,j-1,k+1) - gf(i,j+1,k-1) + 2*gf(i,j,k-1) - gf(i,j-1,k-1)) * dyi*dyi*dzi * (1.0/2.0)) -#define D332_c2(gf,i,j,k) ((gf(i,j+1,k+1) - 2*gf(i,j+1,k) + gf(i,j+1,k-1) - gf(i,j-1,k+1) + 2*gf(i,j-1,k) - gf(i,j-1,k-1)) * dyi*dzi*dzi * (1.0/2.0)) -#define D333_c2(gf,i,j,k) ((- gf(i,j,k+2) + 2*gf(i,j,k+1) - 2*gf(i,j,k-1) + gf(i,j,k-2)) * dzi*dzi*dzi * (1.0/2.0)) - -/* second derivatives, centered, 2nd order */ - -#define D11_c2(gf,i,j,k) \ - (( gf(i+1,j,k) \ - - 2.*gf(i, j,k) \ - + gf(i-1,j,k)) * dxi * dxi ) - -#define D22_c2(gf,i,j,k) \ - (( gf(i,j+1,k) \ - - 2.*gf(i,j, k) \ - + gf(i,j-1,k)) * dyi * dyi ) - -#define D33_c2(gf,i,j,k) \ - (( gf(i,j,k+1) \ - - 2.*gf(i,j,k ) \ - + gf(i,j,k-1)) * dzi * dzi ) - -#define D21_c2(gf,i,j,k) \ - ((gf(i+1,j+1,k) \ - + gf(i-1,j-1,k) \ - - gf(i+1,j-1,k) \ - - gf(i-1,j+1,k)) * hdxi * hdyi ) - -#define D32_c2(gf,i,j,k) \ - ((gf(i,j+1,k+1) \ - + gf(i,j-1,k-1) \ - - gf(i,j+1,k-1) \ - - gf(i,j-1,k+1)) * hdyi * hdzi ) - -#define D31_c2(gf,i,j,k) \ - ((gf(i+1,j,k+1) \ - + gf(i-1,j,k-1) \ - - gf(i+1,j,k-1) \ - - gf(i-1,j,k+1)) * hdxi * hdzi ) - -/* first derivatives, centered, 2nd order */ - -#define D1_c2(gf,i,j,k) \ - ((gf(i+1,j,k) \ - - gf(i-1,j,k)) * hdxi) - -#define D2_c2(gf,i,j,k) \ - ((gf(i,j+1,k) \ - - gf(i,j-1,k)) * hdyi) - -#define D3_c2(gf,i,j,k) \ - ((gf(i,j,k+1) \ - - gf(i,j,k-1)) * hdzi) - -#else - - -#define D11_c2(gf,i,j,k) \ - (( gf[CCTK_GFINDEX3D(cctkGH,i+1,j,k)] \ - - 2.*gf[CCTK_GFINDEX3D(cctkGH,i, j,k)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i-1,j,k)]) * dxi * dxi ) - -#define D22_c2(gf,i,j,k) \ - (( gf[CCTK_GFINDEX3D(cctkGH,i,j+1,k)] \ - - 2.*gf[CCTK_GFINDEX3D(cctkGH,i,j, k)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i,j-1,k)]) * dyi * dyi ) - -#define D33_c2(gf,i,j,k) \ - (( gf[CCTK_GFINDEX3D(cctkGH,i,j,k+1)] \ - - 2.*gf[CCTK_GFINDEX3D(cctkGH,i,j,k )] \ - + gf[CCTK_GFINDEX3D(cctkGH,i,j,k-1)]) * dzi * dzi ) - -#define D21_c2(gf,i,j,k) \ - ((gf[CCTK_GFINDEX3D(cctkGH,i+1,j+1,k)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i-1,j-1,k)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i+1,j-1,k)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i-1,j+1,k)]) * hdxi * hdyi ) - -#define D32_c2(gf,i,j,k) \ - ((gf[CCTK_GFINDEX3D(cctkGH,i,j+1,k+1)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i,j-1,k-1)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j+1,k-1)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j-1,k+1)]) * hdyi * hdzi ) - -#define D31_c2(gf,i,j,k) \ - ((gf[CCTK_GFINDEX3D(cctkGH,i+1,j,k+1)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i-1,j,k-1)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i+1,j,k-1)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i-1,j,k+1)]) * hdxi * hdzi ) - -/* first derivatives, centered, 2nd order */ - -#define D1_c2(gf,i,j,k) \ - ((gf[CCTK_GFINDEX3D(cctkGH,i+1,j,k)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i-1,j,k)]) * hdxi) - -#define D2_c2(gf,i,j,k) \ - ((gf[CCTK_GFINDEX3D(cctkGH,i,j+1,k)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j-1,k)]) * hdyi) - -#define D3_c2(gf,i,j,k) \ - ((gf[CCTK_GFINDEX3D(cctkGH,i,j,k+1)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j,k-1)]) * hdzi) - -#endif - - -/* c4 */ -/* */ -/* 4th order centered */ -/* */ - -/* second derivatives, centered, 4th order */ - -#ifndef KRANC_C - -#define D11_c4(gf,i,j,k) \ - ((- gf(i+2,j,k) \ - + 16. * gf(i+1,j,k) \ - - 30. * gf(i, j,k) \ - + 16. * gf(i-1,j,k) \ - - gf(i-2,j,k)) * dxi * dxi / 12.) - - -#define D22_c4(gf,i,j,k) \ - ((- gf(i,j+2,k) \ - + 16. * gf(i,j+1,k) \ - - 30. * gf(i,j, k) \ - + 16. * gf(i,j-1,k) \ - - gf(i,j-2,k)) * dyi * dyi / 12.) - - -#define D33_c4(gf,i,j,k) \ - ((- gf(i,j,k+2) \ - + 16. * gf(i,j,k+1) \ - - 30. * gf(i,j,k ) \ - + 16. * gf(i,j,k-1) \ - - gf(i,j,k-2)) * dzi * dzi / 12.) - -#define D21_c4(gf,i,j,k) \ - ((- gf(i+2,j+2,k) \ - + gf(i+2,j-2,k) \ - + gf(i-2,j+2,k) \ - - gf(i-2,j-2,k) \ - + 16. * gf(i+1,j+1,k) \ - - 16. * gf(i+1,j-1,k) \ - - 16. * gf(i-1,j+1,k) \ - + 16. * gf(i-1,j-1,k)) * dxi * dyi / 48.) - -#define D31_c4(gf,i,j,k) \ - ((- gf(i+2,j,k+2) \ - + gf(i+2,j,k-2) \ - + gf(i-2,j,k+2) \ - - gf(i-2,j,k-2) \ - + 16. * gf(i+1,j,k+1) \ - - 16. * gf(i+1,j,k-1) \ - - 16. * gf(i-1,j,k+1) \ - + 16. * gf(i-1,j,k-1)) * dxi * dzi / 48.) - - -#define D32_c4(gf,i,j,k) \ - ((- gf(i,j+2,k+2) \ - + gf(i,j+2,k-2) \ - + gf(i,j-2,k+2) \ - - gf(i,j-2,k-2) \ - + 16. * gf(i,j+1,k+1) \ - - 16. * gf(i,j+1,k-1) \ - - 16. * gf(i,j-1,k+1) \ - + 16. * gf(i,j-1,k-1)) * dzi * dyi / 48.) - - -/* first derivatives, centered, 4th order */ - -#define D1_c4(gf,i,j,k) \ - ((- gf(i+2,j,k) \ - + 8. * gf(i+1,j,k) \ - - 8. * gf(i-1,j,k) \ - + gf(i-2,j,k)) * (dxi / 12.)) - -#define D2_c4(gf,i,j,k) \ - ((- gf(i,j+2,k) \ - + 8. * gf(i,j+1,k) \ - - 8. * gf(i,j-1,k) \ - + gf(i,j-2,k)) * (dyi / 12.)) - -#define D3_c4(gf,i,j,k) \ - ((- gf(i,j,k+2) \ - + 8. * gf(i,j,k+1) \ - - 8. * gf(i,j,k-1) \ - + gf(i,j,k-2)) * (dzi / 12.)) - - -#else - -#define D11_c4(gf,i,j,k) \ - ((- gf[CCTK_GFINDEX3D(cctkGH,i+2,j,k)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i+1,j,k)] \ - - 30. * gf[CCTK_GFINDEX3D(cctkGH,i, j,k)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i-1,j,k)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i-2,j,k)]) * dxi * dxi / 12.) - - -#define D22_c4(gf,i,j,k) \ - ((- gf[CCTK_GFINDEX3D(cctkGH,i,j+2,k)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i,j+1,k)] \ - - 30. * gf[CCTK_GFINDEX3D(cctkGH,i,j, k)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i,j-1,k)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j-2,k)]) * dyi * dyi / 12.) - - -#define D33_c4(gf,i,j,k) \ - ((- gf[CCTK_GFINDEX3D(cctkGH,i,j,k+2)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i,j,k+1)] \ - - 30. * gf[CCTK_GFINDEX3D(cctkGH,i,j,k )] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i,j,k-1)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j,k-2)]) * dzi * dzi / 12.) - -#define D21_c4(gf,i,j,k) \ - ((- gf[CCTK_GFINDEX3D(cctkGH,i+2,j+2,k)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i+2,j-2,k)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i-2,j+2,k)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i-2,j-2,k)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i+1,j+1,k)] \ - - 16. * gf[CCTK_GFINDEX3D(cctkGH,i+1,j-1,k)] \ - - 16. * gf[CCTK_GFINDEX3D(cctkGH,i-1,j+1,k)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i-1,j-1,k)]) * dxi * dyi / 48.) - -#define D31_c4(gf,i,j,k) \ - ((- gf[CCTK_GFINDEX3D(cctkGH,i+2,j,k+2)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i+2,j,k-2)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i-2,j,k+2)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i-2,j,k-2)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i+1,j,k+1)] \ - - 16. * gf[CCTK_GFINDEX3D(cctkGH,i+1,j,k-1)] \ - - 16. * gf[CCTK_GFINDEX3D(cctkGH,i-1,j,k+1)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i-1,j,k-1)]) * dxi * dzi / 48.) - - -#define D32_c4(gf,i,j,k) \ - ((- gf[CCTK_GFINDEX3D(cctkGH,i,j+2,k+2)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i,j+2,k-2)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i,j-2,k+2)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j-2,k-2)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i,j+1,k+1)] \ - - 16. * gf[CCTK_GFINDEX3D(cctkGH,i,j+1,k-1)] \ - - 16. * gf[CCTK_GFINDEX3D(cctkGH,i,j-1,k+1)] \ - + 16. * gf[CCTK_GFINDEX3D(cctkGH,i,j-1,k-1)]) * dzi * dyi / 48.) - - -/* first derivatives, centered, 4th order */ - -#define D1_c4(gf,i,j,k) \ - ((- gf[CCTK_GFINDEX3D(cctkGH,i+2,j,k)] \ - + 8. * gf[CCTK_GFINDEX3D(cctkGH,i+1,j,k)] \ - - 8. * gf[CCTK_GFINDEX3D(cctkGH,i-1,j,k)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i-2,j,k)]) * (dxi / 12.)) - -#define D2_c4(gf,i,j,k) \ - ((- gf[CCTK_GFINDEX3D(cctkGH,i,j+2,k)] \ - + 8. * gf[CCTK_GFINDEX3D(cctkGH,i,j+1,k)] \ - - 8. * gf[CCTK_GFINDEX3D(cctkGH,i,j-1,k)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i,j-2,k)]) * (dyi / 12.)) - -#define D3_c4(gf,i,j,k) \ - ((- gf[CCTK_GFINDEX3D(cctkGH,i,j,k+2)] \ - + 8. * gf[CCTK_GFINDEX3D(cctkGH,i,j,k+1)] \ - - 8. * gf[CCTK_GFINDEX3D(cctkGH,i,j,k-1)] \ - + gf[CCTK_GFINDEX3D(cctkGH,i,j,k-2)]) * (dzi / 12.)) - -#endif - -/*****************************************************/ -/* */ -/* DERIVED METHODS */ -/* */ -/******************************************************/ - - -/* blend c2 and c4 */ -/* second derivatives */ -#define D11_c2c4(gf,i,j,k) (fdweight_c2*D11_c2(gf,i,j,k) + fdweight_c4*D11_c4(gf,i,j,k)) -#define D22_c2c4(gf,i,j,k) (fdweight_c2*D22_c2(gf,i,j,k) + fdweight_c4*D22_c4(gf,i,j,k)) -#define D33_c2c4(gf,i,j,k) (fdweight_c2*D33_c2(gf,i,j,k) + fdweight_c4*D33_c4(gf,i,j,k)) -#define D21_c2c4(gf,i,j,k) (fdweight_c2*D21_c2(gf,i,j,k) + fdweight_c4*D21_c4(gf,i,j,k)) -#define D32_c2c4(gf,i,j,k) (fdweight_c2*D32_c2(gf,i,j,k) + fdweight_c4*D32_c4(gf,i,j,k)) -#define D31_c2c4(gf,i,j,k) (fdweight_c2*D31_c2(gf,i,j,k) + fdweight_c4*D31_c4(gf,i,j,k)) - -/* first derivatives */ -#define D1_c2c4(gf,i,j,k) (fdweight_c2*D1_c2(gf, i,j,k) + fdweight_c4*D1_c4(gf,i,j,k)) -#define D2_c2c4(gf,i,j,k) (fdweight_c2*D2_c2(gf, i,j,k) + fdweight_c4*D2_c4(gf,i,j,k)) -#define D3_c2c4(gf,i,j,k) (fdweight_c2*D3_c2(gf, i,j,k) + fdweight_c4*D3_c4(gf,i,j,k)) - - -/*****************************************************/ -/* */ -/* Poor man's one-sided derivatives */ -/* */ -/******************************************************/ - - -#define Dplus1(gf,i,j,k) string(Dplus1,gf) -#define Dplus2(gf,i,j,k) string(Dplus2,gf) -#define Dplus3(gf,i,j,k) string(Dplus3,gf) - - -#define Dplus1gf(gf,i,j,k) Dplus1x(gf, i,j,k) -#define Dplus2gf(gf,i,j,k) Dplus2x(gf, i,j,k) -#define Dplus3gf(gf,i,j,k) Dplus3x(gf, i,j,k) - -#ifdef KRANC_C - -#define Dplus1x(gf,i,j,k) \ - ((gf[CCTK_GFINDEX3D(cctkGH,i+1,j,k)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j,k)]) * dxi) - -#define Dplus2x(gf,i,j,k) \ - ((gf[CCTK_GFINDEX3D(cctkGH,i,j+1,k)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j,k)]) * dyi) - -#define Dplus3x(gf,i,j,k) \ - ((gf[CCTK_GFINDEX3D(cctkGH,i,j,k+1)] \ - - gf[CCTK_GFINDEX3D(cctkGH,i,j,k)]) * dzi) - -#else - -#define Dplus1x(gf,i,j,k) ( ( gf(i+1, j, k) - gf(i,j,k) ) * dxi ) -#define Dplus2x(gf,i,j,k) ( ( gf(i, j + 1, k) - gf(i,j,k) ) * dxi ) -#define Dplus3x(gf,i,j,k) ( ( gf(i, j, k + 1) - gf(i,j,k) ) * dxi ) - -#endif - #ifdef KRANC_C int sgn(CCTK_REAL x); -#define Dupwind1(gf,dir,i,j,k) ((dir * gf[CCTK_GFINDEX3D(cctkGH,i+dir,j,k)] \ - - dir * gf[CCTK_GFINDEX3D(cctkGH,i,j,k)]) * dxi) -#define Dupwind2(gf,dir,i,j,k) ((dir * gf[CCTK_GFINDEX3D(cctkGH,i,j+dir,k)] \ - - dir * gf[CCTK_GFINDEX3D(cctkGH,i,j,k)]) * dxi) -#define Dupwind3(gf,dir,i,j,k) ((dir * gf[CCTK_GFINDEX3D(cctkGH,i,j,k+dir)] \ - - dir * gf[CCTK_GFINDEX3D(cctkGH,i,j,k)]) * dxi) - int GenericFD_GetBoundaryWidth(cGH const * restrict const cctkGH); -/* int GenericFD_BoundaryWidthTable(cGH const * restrict const cctkGH); */ - void GenericFD_GetBoundaryInfo(cGH const * restrict cctkGH, int const * restrict cctk_lsh, int const * restrict cctk_lssh, @@ -691,25 +50,6 @@ void GenericFD_GetBoundaryInfo(cGH const * restrict cctkGH, void GenericFD_AssertGroupStorage(cGH const * restrict const cctkGH, const char *calc, int ngroups, const char *group_names[]); -#if 0 -/* Finite differencing near boundaries */ - -/* The array var is to be accessed at the location - [i+ioff,j+joff,k+koff]. idir,jdir,kdir specify whether there is a - lower (dir<0), upper (dir>0), or no boundary nearby. If a boundary - is in the way, the value 0 is returned instead of the array - content. */ -#define CCTK_GFACCESS3D(cctkGH, var, i,j,k, ioff,joff,koff, idir,jdir,kdir) \ - (((idir)<0 && (ioff)<0) || \ - ((jdir)<0 && (joff)<0) || \ - ((kdir)<0 && (koff)<0) || \ - ((idir)>0 && (ioff)>0) || \ - ((jdir)>0 && (joff)>0) || \ - ((kdir)>0 && (koff)>0) || \ - ? 0 \ - : (var)[CCTK_GFINDEX3D((cctkGH), (i)+(ioff),(j)+(joff),(k)+(koff))]) -#endif - /* Summation by parts */ static inline CCTK_REAL sbp_deriv_x(int i, int j, int k, diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/ParamCheck.F90 b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/ParamCheck.F90 deleted file mode 100644 index c2baf82..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/ParamCheck.F90 +++ /dev/null @@ -1,96 +0,0 @@ -/*@@ - @file GenericFD/src/ParamCheck.F90 - @date October 20 2004 - @author S. Husa - @desc - Check consistency of parameters associated with stencil widths - - $Header$ - - @enddesc - @@*/ - -/* Copyright 2004 Sascha Husa, Ian Hinder, Christiane Lechner - - This file is part of Kranc. - - Kranc is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - Kranc is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Kranc; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#define KRANC_FORTRAN - -#include "cctk.h" -#include "cctk_Arguments.h" -#include "cctk_Parameters.h" -#include "GenericFD.h" - -/*@@ - @routine GenericFD_ParamCheck - @date October 20 2004 - @author S. Husa - @desc - Check consistency of parameters associated with stencil widths - - @enddesc - @calls - @calledby - @history - @endhistory - @@*/ - - - -SUBROUTINE GenericFD_ParamCheck(CCTK_ARGUMENTS) - -implicit none - -DECLARE_CCTK_ARGUMENTS -DECLARE_CCTK_PARAMETERS - -if (stencil_width < 0) then - call CCTK_WARN(0, "stencil_width < 0 - set GenericFD::stencil_width > 0 in par file!") -endif - -if ((stencil_width_x < 0).AND.(stencil_width < 0)) then - call CCTK_WARN(0, "stencil_width_x < 0, set GenericFD::stencil_width_x (or stencil_width) > 0!") -endif - -if ((stencil_width_y < 0).AND.(stencil_width < 0)) then - call CCTK_WARN(0, "stencil_width_y < 0, set GenericFD::stencil_width_x (or stencil_width) > 0!") -endif - -if ((stencil_width_z < 0).AND.(stencil_width < 0)) then - call CCTK_WARN(0, "stencil_width_z < 0, set GenericFD::stencil_width_x (or stencil_width) > 0!") -endif - - -if (stencil_width > maxval(cctk_nghostzones)) then - call CCTK_WARN(0, "stencil_width is larger than max(cctk_nghostzones)!") -endif - -if (stencil_width_x > cctk_nghostzones(1)) then - call CCTK_WARN(0, "stencil_width is smaller than cctk_nghostzones(1)!") -endif - -if (stencil_width_y > cctk_nghostzones(2)) then - call CCTK_WARN(0, "stencil_width is smaller than cctk_nghostzones(2)!") -endif - -if (stencil_width_z > cctk_nghostzones(3)) then - call CCTK_WARN(0, "stencil_width is smaller than cctk_nghostzones(3)!") -endif - - -END SUBROUTINE GenericFD_ParamCheck diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Startup.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Startup.c deleted file mode 100644 index 78e9915..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/Startup.c +++ /dev/null @@ -1,60 +0,0 @@ -/*@@ - @file GenericFD/src/Startup.c - @date June 16 2002 - @author S. Husa - @desc - Register Banner - straight copy of WaveToy - - $Header$ - - @enddesc - @@*/ - -/* Copyright 2004 Sascha Husa, Ian Hinder, Christiane Lechner - - This file is part of Kranc. - - Kranc is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - Kranc is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Kranc; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#include "cctk.h" -#include "GenericFD.h" - -int GenericFD_Startup(void); - -/*@@ - @routine GenericFD_Startup - @date June 16 2002 - @author S. Husa - @desc - - @enddesc - @calls - @calledby - @history - @endhistory - @@*/ - - - -int GenericFD_Startup(void) -{ - const char *banner = "GenericFD: generic finite differencing"; - CCTK_RegisterBanner(banner); - - - CCTK_INFO(FD_METHOD_DESC); - return 0; - } diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/make.code.defn b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/make.code.defn index fde3275..c9ef2f1 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/make.code.defn +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/make.code.defn @@ -2,8 +2,7 @@ # $Header$ # Source files in this directory -SRCS = Startup.c GenericFD.c -#ParamCheck.F90 +SRCS = GenericFD.c # Subdirectories containing source files SUBDIRS = diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/testmacros.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/testmacros.c deleted file mode 100644 index 3208ea0..0000000 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/testmacros.c +++ /dev/null @@ -1,23 +0,0 @@ -/* process me with something like - * cpp -DFD_C2 testmacros.c - */ - - -#include "GenericFD.h" - - -dummy = D11loc(testfunc,i,j,k); - -dummy = D11gf(testfunc,i,j,k); - -dummy = D11_c2c4(testfunc,i,j,k); - -dummy = D11(testfunc,i,j,k); - -dummy = D31(testfunc,i,j,k); - -dummy = D1(testfunc,i,j,k); - - -DECLAREVARS - diff --git a/Tools/CodeGen/Param.m b/Tools/CodeGen/Param.m index 7821f40..179cfb2 100644 --- a/Tools/CodeGen/Param.m +++ b/Tools/CodeGen/Param.m @@ -189,11 +189,7 @@ CreateKrancParam[evolvedGroups_, nonevolvedGroups_, groups_, thornName_, { Name -> "GenericFD", UsedParameters -> - {{Name -> "stencil_width", Type -> "CCTK_INT"}, - {Name -> "stencil_width_x", Type -> "CCTK_INT"}, - {Name -> "stencil_width_y", Type -> "CCTK_INT"}, - {Name -> "stencil_width_z", Type -> "CCTK_INT"}, - {Name -> "boundary_width", Type -> "CCTK_INT"}} + {} }; realStructs = Map[krancParamStruct[#, "CCTK_REAL", False] &, reals]; -- cgit v1.2.3 From e0be5e82adfb8140cb5854e744ccf31bbf5f5c8e Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Tue, 1 Mar 2011 13:43:31 +0100 Subject: Add support for a Jacobian to be applied to all derivatives --- .../Cactus/KrancNumericalTools/GenericFD/param.ccl | 12 ++ .../KrancNumericalTools/GenericFD/src/GenericFD.c | 36 ++++++ .../KrancNumericalTools/GenericFD/src/GenericFD.h | 2 + Doc/KrancDoc.tex | 97 ++++++++++++++++ Tools/CodeGen/CalculationFunction.m | 14 ++- Tools/CodeGen/Jacobian.m | 124 +++++++++++++++++++++ Tools/CodeGen/Kranc.m | 3 +- Tools/CodeGen/KrancThorn.m | 2 +- Tools/CodeGen/Param.m | 7 +- 9 files changed, 288 insertions(+), 9 deletions(-) create mode 100644 Tools/CodeGen/Jacobian.m (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl index 91075c9..85de1f9 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl @@ -30,3 +30,15 @@ CCTK_INT boundary_width "width of boundary (fix later to use Cactus boundary ca { -1:* :: "Any integer" } 1 + +restricted: +CCTK_STRING jacobian_group "Name of group containing Jacobian" STEERABLE=RECOVER +{ + "" :: "String of the form ::" +} "" + +restricted: +CCTK_STRING jacobian_derivative_group "Name of group containing Jacobian derivative" STEERABLE=RECOVER +{ + "" :: "String of the form ::" +} "" diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c index f23f8dc..1663231 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c @@ -508,3 +508,39 @@ void GenericFD_AssertGroupStorage(cGH const * restrict const cctkGH, const char } } } + +/* Return a list of pointers to the members of a named group */ +void GenericFD_GroupDataPointers(cGH const * restrict const cctkGH, const char *group_name, + int nvars, CCTK_REAL **ptrs) +{ + int group_index, status; + cGroup group_info; + + group_index = CCTK_GroupIndex(group_name); + if (group_index < 0) + CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, + "Error return %d trying to get group index for group \'%s\'", + group_index, + group_name); + + status = CCTK_GroupData(group_index, &group_info); + if (status < 0) + CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, + "Error return %d trying to get info for group \'%s\'", + status, + group_name); + + if (group_info.numvars != nvars) + { + CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, + "Group \'%s\' has %d variables but %d were expected", + group_name, group_info.numvars, nvars); + } + + int v1 = CCTK_FirstVarIndex(group_name); + + for (int v = 0; v < nvars; v++) + { + ptrs[v] = (CCTK_REAL *) CCTK_VarDataPtrI(cctkGH, 0 /* timelevel */, v1+v); + } +} diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h index b7a4239..5974e63 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h @@ -152,6 +152,8 @@ void GenericFD_LoopOverBoundary(cGH const * restrict cctkGH, Kranc_Calculation c void GenericFD_LoopOverBoundaryWithGhosts(cGH const * restrict cctkGH, Kranc_Calculation calc); void GenericFD_LoopOverInterior(cGH const * restrict cctkGH, Kranc_Calculation calc); +void GenericFD_GroupDataPointers(cGH const * restrict const cctkGH, const char *group_name, + int nvars, CCTK_REAL **ptrs); #ifdef __cplusplus diff --git a/Doc/KrancDoc.tex b/Doc/KrancDoc.tex index e357de5..131f5c2 100644 --- a/Doc/KrancDoc.tex +++ b/Doc/KrancDoc.tex @@ -3,6 +3,7 @@ \usepackage{tabularx} \usepackage{graphicx} \usepackage{alltt} +\usepackage{hyperref} \addtolength{\oddsidemargin}{-0.25in} \addtolength{\textwidth}{1in} @@ -243,6 +244,7 @@ by the Kranc system, such as a name for the calculation. %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Using Kranc} +\label{chp:usingkranc} %% \section{Types of arguments} @@ -925,6 +927,101 @@ same arguments, but they can be tensorial in nature. \end{tabularx} \end{center} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\chapter{Additional Features} + +In addition to the basic functionality described in Chapter +\ref{chp:usingkranc}, Kranc provides a number of additional features. +Each of these features can be used independently, and are typically +enabled by an option of the form \verb|Use* -> True| in the call to +\verb|CreateThorn|. + +\section{Jacobians} + +Kranc allows the user to relate finite differencing operators to +partial derivatives via an arbitrary user-defined Jacobian +transformation provided in a grid function. This feature is enabled +by setting \verb|UseJacobian -> True| in the options to \verb|CreateThorn| or +\verb|CreateThornTT|. + +Kranc does not provide the Jacobian grid function; it might be +provided by an external infrastructure (for example for multi-block +schemes), or could be provided easily in the user's thorn, or another +Kranc-generated thorn. Wherever the Jacobian is provided, it must +adhere to the following conventions. There should be one Cactus group +for the components of the Jacobian matrix and another for the +components of its first spatial derivative (this is necessary for +systems containing second spatial derivatives). The components should +be real-valued grid functions declared in a similar manner to the +following: + +\begin{verbatim} +CCTK_REAL jac type=GF timelevels=1 +{ + J11, J12, J13, J21, J22, J23, J31, J32, J33 +} "Jacobian of the coordinate transformation" + +CCTK_REALd djac type=GF timelevels=1 +{ + dJ111, dJ112, dJ113, dJ122, dJ123, dJ133, + dJ211, dJ212, dJ213, dJ222, dJ223, dJ233, + dJ311, dJ312, dJ313, dJ322, dJ323, dJ333, +} "Derivative of the Jacobian" +\end{verbatim} + +The names of the groups and variables are not important, but the order +of the variables within the groups is critical. + +The GenericFD thorn provides two parameters, \verb|jacobian_group| and +\verb|jacobian_derivative_group| which should be set by the user in their +parameter file to the names of the Jacobian and Jacobian derivative +groups. With the above Jacobian definition, provided by a thorn with +implementation \verb|MyCoordTransform| the user would set + +\begin{verbatim} +GenericFD::jacobian = "MyCoordTransform::jac" +GenericFD::jacobian_derivative = "MyCoordTransform::djac" +\end{verbatim} + +The partial derivatives, associated with certain finite difference +operators, specified in the user's calculation, will then be +multiplied by the Jacobian. If the user specifies the following, + +\begin{center} +\begin{minipage}{0.8 \textwidth} +\begin{verbatim} +derivs = { + PDstandard2nd[i_] -> DZero[i]} + +... + +dot[v] -> PDstandard2nd[v,1] +\end{verbatim} +\end{minipage} +\end{center} + +the code that will actually be generated will be + +\begin{center} +\begin{minipage}{0.8 \textwidth} +\begin{verbatim} +dot[v] -> J11 PDstandard2nd[v,1] + J21 PDstandard2nd[v,2] + + J31 PDstandard2nd[v,3] +\end{verbatim} +\end{minipage} +\end{center} + +Note: + +\begin{itemize} +\item The Jacobian multiplication introduces an additional performance + cost to the simulation, so it should not be enabled unless + necessary. +\item It is currently not possible to generate a thorn which applies + the Jacobians optionally based on a run-time parameter or depending + on the current Carpet map. This is planned for the future. +\end{itemize} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/Tools/CodeGen/CalculationFunction.m b/Tools/CodeGen/CalculationFunction.m index 99d4343..1ca9513 100644 --- a/Tools/CodeGen/CalculationFunction.m +++ b/Tools/CodeGen/CalculationFunction.m @@ -20,7 +20,7 @@ BeginPackage["CalculationFunction`", {"CodeGen`", "MapLookup`", "KrancGroups`", "Differencing`", "Errors`", - "Helpers`", "Kranc`", "Optimize`"}]; + "Helpers`", "Kranc`", "Optimize`", "Jacobian`"}]; CreateCalculationFunction::usage = ""; VerifyCalculation::usage = ""; @@ -344,11 +344,13 @@ pdCanonicalOrdering[name_[inds___] -> x_] := Options[CreateCalculationFunction] = ThornOptions; -CreateCalculationFunction[calc_, debug_, imp_, opts:OptionsPattern[]] := +CreateCalculationFunction[calcp_, debug_, imp_, opts:OptionsPattern[]] := Module[{gfs, allSymbols, knownSymbols, shorts, eqs, parameters, parameterRules, functionName, dsUsed, groups, pddefs, cleancalc, eqLoop, where, - addToStencilWidth, pDefs, haveCondTextuals, condTextuals}, + addToStencilWidth, pDefs, haveCondTextuals, condTextuals, calc}, + + calc = If[OptionValue[UseJacobian], InsertJacobian[calcp, opts], calcp]; cleancalc = removeUnusedShorthands[calc]; If[OptionValue[CSE], @@ -359,6 +361,7 @@ CreateCalculationFunction[calc_, debug_, imp_, opts:OptionsPattern[]] := eqs = lookup[cleancalc, Equations]; parameters = lookupDefault[cleancalc, Parameters, {}]; groups = lookup[cleancalc, Groups]; + If[OptionValue[UseJacobian], groups = Join[groups, JacobianGroups[]]]; pddefs = lookupDefault[cleancalc, PartialDerivatives, {}]; where = lookupDefault[cleancalc, Where, Everywhere]; addToStencilWidth = lookupDefault[cleancalc, AddToStencilWidth, 0]; @@ -420,7 +423,8 @@ CreateCalculationFunction[calc_, debug_, imp_, opts:OptionsPattern[]] := allSymbols = calculationSymbols[cleancalc]; knownSymbols = Join[lookupDefault[cleancalc, AllowedSymbols, {}], gfs, shorts, parameters, {dx,dy,dz,idx,idy,idz,t, Pi, E, Symbol["i"], Symbol["j"], Symbol["k"], normal1, normal2, - normal3, tangentA1, tangentA2, tangentA3, tangentB1, tangentB2, tangentB3}]; + normal3, tangentA1, tangentA2, tangentA3, tangentB1, tangentB2, tangentB3}, + If[OptionValue[UseJacobian], JacobianSymbols[], {}]]; unknownSymbols = Complement[allSymbols, knownSymbols]; @@ -452,6 +456,8 @@ CreateCalculationFunction[calc_, debug_, imp_, opts:OptionsPattern[]] := InitialiseFDVariables[OptionValue[UseVectors]], definePreDefinitions[pDefs], + If[OptionValue[UseJacobian], CreateJacobianVariables[], {}], + If[Cases[{pddefs}, SBPDerivative[_], Infinity] != {}, CommentedBlock["Compute Summation By Parts derivatives", IncludeFile["sbp_calc_coeffs.h"]], diff --git a/Tools/CodeGen/Jacobian.m b/Tools/CodeGen/Jacobian.m new file mode 100644 index 0000000..59a57f9 --- /dev/null +++ b/Tools/CodeGen/Jacobian.m @@ -0,0 +1,124 @@ + +(* Copyright 2011 Ian Hinder + + This file is part of Kranc. + + Kranc is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + Kranc is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Kranc; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*) + +BeginPackage["Jacobian`", {"Errors`", "Helpers`", "Kranc`", "Differencing`", "MapLookup`", "CodeGen`"}]; + +JacobianQ; +InsertJacobian; +CreateJacobianVariables; +JacobianGenericFDParameters; +JacobianSymbols; +JacobianGroups; + +Begin["`Private`"]; + +Options[JacobianQ] = ThornOptions; +JacobianQ[opts:OptionsPattern[]] := + Length[OptionValue[Jacobian]] > 0; + +(* Assign a shorthand containing the Jacobian multiplied by the passed + 1st derivative *) +jacobianShorthand[d:(deriv_[var_, i_])] := + Module[{}, + derivToJacDeriv[d] -> + Sum[Symbol["J"<>ToString[j]<>ToString[i]] deriv[var, j], {j, 1 3}] + ]; + +(* Assign a shorthand containing the Jacobian multiplied by the passed + 2nd derivative *) +jacobianShorthand[d:(deriv_[var_, i_,j_])] := + Module[{ip,jp}, + {ip,jp} = Sort[{i,j}]; (* dJ is symmetric in the last two indices *) + derivToJacDeriv[d] -> + Sum[Symbol["dJ"<>ToString[a]<>ToString[ip]<>ToString[jp]] deriv[var, a], {a, 1 3}] + + Sum[Symbol["J"<>ToString[a]<>ToString[i]] Symbol["J"<>ToString[b]<>ToString[j]] deriv[var, a, b], {a, 1 3}, {b, 1, 3}] + ]; + +(* Convert a 1st derivative to a Jacobian-multiplied derivative *) +derivToJacDeriv[deriv_[var_, i_]] := + Symbol["Global`Jac"<>ToString[deriv]<>ToString[i]<>ToString[var]]; + +(* Convert a 2nd derivative to a Jacobian-multiplied derivative *) +derivToJacDeriv[deriv_[var_, i_, j_]] := + Symbol["Global`Jac"<>ToString[deriv]<>ToString[i]<>ToString[j]<>ToString[var]]; + +(* Given a calculation containing partial derivatives, return a + version of the calculation with all the partial derivatives multiplied + by the Jacobian *) +Options[InsertJacobian] = ThornOptions; +InsertJacobian[calc_List, opts:OptionsPattern[]] := + Module[{pdDefs, derivs, newShortDefs, newShorts, combinedShorts, combinedEqs, combinedCalc, eqs, newEqs}, + pdDefs = OptionValue[PartialDerivatives]; + derivs = GridFunctionDerivativesInExpression[pdDefs, lookup[calc, Equations]]; + If[Length[derivs] === 0, Return[calc]]; + newShortDefs = Map[jacobianShorthand, derivs]; + newShorts = Map[First, newShortDefs]; + combinedShorts = Join[lookupDefault[calc, Shorthands, {}], newShorts]; + eqs = lookup[calc, Equations]; + newEqs = eqs /. (x_?(MemberQ[derivs, #] &) :> derivToJacDeriv[x]); + combinedEqs = Join[newShortDefs, newEqs]; + combinedCalc = mapReplace[mapReplace[mapEnsureKey[calc, Shorthands, {}], Shorthands, combinedShorts], Equations, combinedEqs]; + combinedCalc]; + +(* Define local pointers to the members of the Jacobian and Jacobian + derivatives groups *) +CreateJacobianVariables[] := +CommentedBlock["Jacobian variable pointers", + {"if (strlen(jacobian_group) == 0 || strlen(jacobian_derivative_group) == 0)\n", + "{\n", + " CCTK_WARN (1, \"GenericFD::jacobian_group and GenericFD::jacobian_derivative_group must both be set to valid group names\");\n", + "}\n\n", + "CCTK_REAL *jacobian_ptrs[9];\n", + "GenericFD_GroupDataPointers(cctkGH, jacobian_group,\n", + " 9, jacobian_ptrs);\n", + "\n", + Table[{"CCTK_REAL *J",i,j," = jacobian_ptrs[",(i-1)*3+j-1,"];\n"},{i,1,3},{j,1,3}], + "\n", + "CCTK_REAL *jacobian_derivative_ptrs[18];\n", + "GenericFD_GroupDataPointers(cctkGH, jacobian_derivative_group,\n", + " 18, jacobian_derivative_ptrs);\n", + "\n", + Module[{syms = Flatten[Table[{"dJ",i,j,k},{i,1,3},{j,1,3},{k,j,3}],2]}, + MapIndexed[{"CCTK_REAL *", #1, " = jacobian_derivative_ptrs[", #2-1, "];\n"} &, syms]]}]; + +(* List of symbols which should be allowed in a calculation *) +JacobianSymbols[] := + Map[Symbol, Join[Flatten[Table[FlattenBlock[{"dJ",i,j,k}],{i,1,3},{j,1,3},{k,j,3}],2], + Flatten[Table[FlattenBlock[{"J",i,j}],{i,1,3},{j,1,3}],1]]]; + +(* Parameters to inherit from GenericFD *) +JacobianGenericFDParameters[] := + {{Name -> "jacobian_group", Type -> "CCTK_STRING"}, + {Name -> "jacobian_derivative_group", Type -> "CCTK_STRING"}}; + +(* The symbols which are used for the Jacobian variables in the + generated source code. These do not have to coincide with the + actual variable names, as the variable pointers are read using + CCTK_VarDataPtr. *) +JacobianGroups[] := + {{"unknown::unknown", {Global`J11, Global`J12, Global`J13, Global`J21, Global`J22, Global`J23, Global`J31, Global`J32, Global`J33}}, + {"unknown::unknown", {Global`dJ111, Global`dJ112, Global`dJ113, Global`dJ122, Global`dJ123, Global`dJ133, + Global`dJ211, Global`dJ212, Global`dJ213, Global`dJ222, Global`dJ223, Global`dJ233, + Global`dJ311, Global`dJ312, Global`dJ313, Global`dJ322, Global`dJ323, Global`dJ333}}}; + + +End[]; + +EndPackage[]; diff --git a/Tools/CodeGen/Kranc.m b/Tools/CodeGen/Kranc.m index bd2c9c7..1077305 100644 --- a/Tools/CodeGen/Kranc.m +++ b/Tools/CodeGen/Kranc.m @@ -73,7 +73,8 @@ ThornOptions = UseVectors -> False, ProhibitAssignmentToGridFunctionsRead -> False, IncludeFiles -> {}, - CSE -> False}; + CSE -> False, + UseJacobian -> False}; (* Thorn.m *) diff --git a/Tools/CodeGen/KrancThorn.m b/Tools/CodeGen/KrancThorn.m index 6463345..bb1b115 100644 --- a/Tools/CodeGen/KrancThorn.m +++ b/Tools/CodeGen/KrancThorn.m @@ -179,7 +179,7 @@ CreateKrancThorn[groupsOrig_, parentDirectory_, thornName_, opts:OptionsPattern[ inheritedRealParams, inheritedIntParams, inheritedKeywordParams, extendedRealParams, extendedIntParams, extendedKeywordParams, evolutionTimelevels, defaultEvolutionTimelevels, - calcs]; + calcs, opts]; (* Construct the schedule file *) InfoMessage[Terse, "Creating schedule file"]; diff --git a/Tools/CodeGen/Param.m b/Tools/CodeGen/Param.m index 90cbb7d..1b4ad37 100644 --- a/Tools/CodeGen/Param.m +++ b/Tools/CodeGen/Param.m @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) -BeginPackage["Param`", {"Thorn`", "Errors`", "Helpers`", "MapLookup`", "KrancGroups`", "Kranc`"}]; +BeginPackage["Param`", {"Thorn`", "Errors`", "Helpers`", "MapLookup`", "KrancGroups`", "Kranc`", "Jacobian`"}]; CreateKrancParam; MakeFullParamDefs; @@ -118,12 +118,13 @@ extendParameters[imp_, reals_, ints_, keywords_] := Return[{Name -> imp, ExtendedParameters -> Join[realStructs, intStructs, keywordStructs]}], Return[{}]]]; +Options[CreateKrancParam] = ThornOptions; CreateKrancParam[evolvedGroups_, nonevolvedGroups_, groups_, thornName_, reals_, ints_, keywords_, inheritedReals_, inheritedInts_, inheritedKeywords_, extendedReals_, extendedInts_, extendedKeywords_, evolutionTimelevels_, defaultEvolutionTimelevels_, - calcs_] := + calcs_, opts:OptionsPattern[]] := Module[{nEvolved, evolvedMoLParam, evolvedGFs, (*constrainedMoLParam,*) genericfdStruct, realStructs, intStructs, allInherited, allExtended, implementationNames, molImplementation, @@ -194,7 +195,7 @@ CreateKrancParam[evolvedGroups_, nonevolvedGroups_, groups_, thornName_, { Name -> "GenericFD", UsedParameters -> - {} + If[OptionValue[UseJacobian], JacobianGenericFDParameters[], {}] }; realStructs = Map[krancParamStruct[#, "CCTK_REAL", False] &, reals]; -- cgit v1.2.3 From b56e3c5fcf0ff63f1b59890fe843482feba4964c Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Tue, 24 May 2011 19:37:47 +0200 Subject: Make multiplication by Jacobian selectable at run-time Use of the Jacobian is determined by the user setting the jacobian_group parameter. Also provide a parameter jacobian_identity_map which the user can set to a Carpet map which does not require the Jacobian, in which case the Jacobian will not be applied. --- Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl | 5 +++++ Tools/CodeGen/Interface.m | 11 +++++++++-- Tools/CodeGen/Jacobian.m | 13 ++++++++----- Tools/CodeGen/Thorn.m | 3 ++- 4 files changed, 24 insertions(+), 8 deletions(-) (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl index 85de1f9..8e410f7 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/param.ccl @@ -42,3 +42,8 @@ CCTK_STRING jacobian_derivative_group "Name of group containing Jacobian derivat { "" :: "String of the form ::" } "" + +CCTK_INT jacobian_identity_map "Map number on which the Jacobian should not be applied" +{ + -1:* :: "Any integer" +} -1 diff --git a/Tools/CodeGen/Interface.m b/Tools/CodeGen/Interface.m index c0a15e1..5f8f613 100644 --- a/Tools/CodeGen/Interface.m +++ b/Tools/CodeGen/Interface.m @@ -73,7 +73,7 @@ CreateKrancInterface[nonevolvedGroups_, evolvedGroups_, rhsGroups_, groups_, Module[{registerEvolved, (*registerConstrained,*) nonevolvedGroupStructures, evolvedGroupStructures, rhsGroupStructures, - groupStructures, interface}, + groupStructures, interface, getMap}, VerifyGroupNames[nonevolvedGroups]; VerifyGroupNames[evolvedGroups]; VerifyGroupNames[rhsGroups]; @@ -105,6 +105,13 @@ CreateKrancInterface[nonevolvedGroups_, evolvedGroups_, rhsGroups_, groups_, ArgString -> "CCTK_POINTER_TO_CONST IN cctkGH, CCTK_INT IN dir, CCTK_INT IN nsize, CCTK_INT OUT ARRAY imin, CCTK_INT OUT ARRAY imax, CCTK_REAL OUT ARRAY q, CCTK_INT IN table_handle" }; + getMap = + { + Name -> "MultiPatch_GetMap", + Type -> "CCTK_INT", + ArgString -> "CCTK_POINTER_TO_CONST IN cctkGH" + }; + (* For each group declared in this thorn, we need an entry in the interface file. Each evolved group needs an associated rhs @@ -131,7 +138,7 @@ CreateKrancInterface[nonevolvedGroups_, evolvedGroups_, rhsGroups_, groups_, If[OptionValue[UseVectors], {"vectors.h"}, {}]], groupStructures, UsesFunctions -> - Join[{registerEvolved, (*registerConstrained,*) diffCoeff}, + Join[{registerEvolved, (*registerConstrained,*) diffCoeff, getMap}, CactusBoundary`GetUsedFunctions[]]]; Return[interface]]; diff --git a/Tools/CodeGen/Jacobian.m b/Tools/CodeGen/Jacobian.m index 59a57f9..8ebd243 100644 --- a/Tools/CodeGen/Jacobian.m +++ b/Tools/CodeGen/Jacobian.m @@ -38,7 +38,7 @@ JacobianQ[opts:OptionsPattern[]] := jacobianShorthand[d:(deriv_[var_, i_])] := Module[{}, derivToJacDeriv[d] -> - Sum[Symbol["J"<>ToString[j]<>ToString[i]] deriv[var, j], {j, 1 3}] + IfThen["use_jacobian", Sum[Symbol["J"<>ToString[j]<>ToString[i]] deriv[var, j], {j, 1 3}], deriv[var, i]] ]; (* Assign a shorthand containing the Jacobian multiplied by the passed @@ -47,8 +47,9 @@ jacobianShorthand[d:(deriv_[var_, i_,j_])] := Module[{ip,jp}, {ip,jp} = Sort[{i,j}]; (* dJ is symmetric in the last two indices *) derivToJacDeriv[d] -> - Sum[Symbol["dJ"<>ToString[a]<>ToString[ip]<>ToString[jp]] deriv[var, a], {a, 1 3}] + - Sum[Symbol["J"<>ToString[a]<>ToString[i]] Symbol["J"<>ToString[b]<>ToString[j]] deriv[var, a, b], {a, 1 3}, {b, 1, 3}] + IfThen["use_jacobian", Sum[Symbol["dJ"<>ToString[a]<>ToString[ip]<>ToString[jp]] deriv[var, a], {a, 1 3}] + + Sum[Symbol["J"<>ToString[a]<>ToString[i]] Symbol["J"<>ToString[b]<>ToString[j]] deriv[var, a, b], {a, 1 3}, {b, 1, 3}], + deriv[var, i, j]] ]; (* Convert a 1st derivative to a Jacobian-multiplied derivative *) @@ -81,7 +82,8 @@ InsertJacobian[calc_List, opts:OptionsPattern[]] := derivatives groups *) CreateJacobianVariables[] := CommentedBlock["Jacobian variable pointers", - {"if (strlen(jacobian_group) == 0 || strlen(jacobian_derivative_group) == 0)\n", + {"bool use_jacobian = (!CCTK_IsFunctionAliased(\"MultiPatch_GetMap\") || MultiPatch_GetMap(cctkGH) != jacobian_identity_map)\n && strlen(jacobian_group) > 0;\n", + "if (use_jacobian && strlen(jacobian_derivative_group) == 0)\n", "{\n", " CCTK_WARN (1, \"GenericFD::jacobian_group and GenericFD::jacobian_derivative_group must both be set to valid group names\");\n", "}\n\n", @@ -106,7 +108,8 @@ JacobianSymbols[] := (* Parameters to inherit from GenericFD *) JacobianGenericFDParameters[] := {{Name -> "jacobian_group", Type -> "CCTK_STRING"}, - {Name -> "jacobian_derivative_group", Type -> "CCTK_STRING"}}; + {Name -> "jacobian_derivative_group", Type -> "CCTK_STRING"}, + {Name -> "jacobian_identity_map", Type -> "CCTK_INT"}}; (* The symbols which are used for the Jacobian variables in the generated source code. These do not have to coincide with the diff --git a/Tools/CodeGen/Thorn.m b/Tools/CodeGen/Thorn.m index a454f87..22b9f77 100644 --- a/Tools/CodeGen/Thorn.m +++ b/Tools/CodeGen/Thorn.m @@ -509,7 +509,8 @@ CreateSetterSource[calcs_, debug_, include_, imp_, {IncludeSystemFile["assert.h"], IncludeSystemFile["math.h"], IncludeSystemFile["stdio.h"], - IncludeSystemFile["stdlib.h"]}, + IncludeSystemFile["stdlib.h"], + IncludeSystemFile["string.h"]}, {"\n"} ], -- cgit v1.2.3 From c8c94cc24a88764febc27707632f29c785aced73 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 27 May 2011 15:13:31 +0200 Subject: Make IfThen available in Fortran --- Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h index 2f00da9..1823cd8 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/MathematicaCompat.h @@ -12,7 +12,7 @@ # define Abs(x) (abs(x)) # define Min(x, y) (min(x,y)) # define Max(x, y) (max(x,y)) -/* IfThen cannot be expressed in Fortran */ +# define IfThen(x,y,z) ((x)*(y) + (1-(x))*(z)) #endif #define Exp(x) (exp(x)) -- cgit v1.2.3 From 184f5947e9948128d0a309e1a4264cf0c209c4ad Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 27 May 2011 15:23:07 +0200 Subject: Tidy up grid indexing using CCTK_LSSH macro Clean up the code that uses lssh. Note: Although most people don't know about lssh, it's defined by the flesh, and e.g. all standard Cactus boundary conditions use it. However, Carpet currently always sets lssh to lsh, so the difference is "hidden". --- .../KrancNumericalTools/GenericFD/src/GenericFD.c | 10 ++++---- Tools/CodeGen/CodeGen.m | 30 +++++++++++----------- Tools/CodeGen/Thorn.m | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c index 1663231..ba9423c 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c @@ -209,7 +209,7 @@ void GenericFD_GetBoundaryInfo(cGH const * restrict const cctkGH, imin[dir] = npoints; break; case 1: /* Upper boundary */ - imax[dir] = cctk_lssh[CCTK_LSSH_IDX(0,dir)] - npoints; + imax[dir] = CCTK_LSSH(0,dir) - npoints; break; default: CCTK_WARN(0, "internal error"); @@ -229,7 +229,7 @@ void GenericFD_LoopOverEverything(cGH const * restrict const cctkGH, Kranc_Calcu CCTK_REAL tangentA[] = {0,0,0}; CCTK_REAL tangentB[] = {0,0,0}; int bmin[] = {0,0,0}; - int bmax[] = {cctk_lssh[CCTK_LSSH_IDX(0,0)],cctk_lssh[CCTK_LSSH_IDX(0,1)],cctk_lssh[CCTK_LSSH_IDX(0,2)]}; + int bmax[] = {CCTK_LSSH(0,0),CCTK_LSSH(0,1),CCTK_LSSH(0,2)}; calc(cctkGH, dir, face, normal, tangentA, tangentB, bmin, bmax, 0, NULL); return; @@ -291,7 +291,7 @@ void GenericFD_LoopOverBoundary(cGH const * restrict const cctkGH, Kranc_Calcula break; case +1: bmin[d] = imax[d]; - bmax[d] = cctk_lssh[CCTK_LSSH_IDX(0,d)]; + bmax[d] = CCTK_LSSH(0,d); have_bnd = 1; all_physbnd = all_physbnd && is_physbnd[2*d+1]; break; @@ -384,7 +384,7 @@ void GenericFD_LoopOverBoundaryWithGhosts(cGH const * restrict const cctkGH, Kra break; case +1: bmin[d] = imax[d]; - bmax[d] = cctk_lssh[CCTK_LSSH_IDX(0,d)]; + bmax[d] = CCTK_LSSH(0,d); have_bnd = 1; have_physbnd = have_physbnd || is_physbnd[2*d+1]; break; @@ -464,7 +464,7 @@ void GenericFD_PenaltyPrim2Char(cGH const * restrict const cctkGH, int const dir CCTK_REAL tangentA[] = {0,0,0}; CCTK_REAL tangentB[] = {0,0,0}; int bmin[] = {0,0,0}; - int bmax[] = {cctk_lssh[CCTK_LSSH_IDX(0,0)],cctk_lssh[CCTK_LSSH_IDX(0,1)],cctk_lssh[CCTK_LSSH_IDX(0,2)]}; + int bmax[] = {cctk_lsh[0],cctk_lsh[1],cctk_lsh[2]}; CCTK_REAL **all_vars; int i = 0; diff --git a/Tools/CodeGen/CodeGen.m b/Tools/CodeGen/CodeGen.m index 44d4262..f4edee8 100644 --- a/Tools/CodeGen/CodeGen.m +++ b/Tools/CodeGen/CodeGen.m @@ -335,7 +335,7 @@ TestForNaN[expr_] := " CCTK_VInfo(CCTK_THORNSTRING, \"ipos: %d %d %d\", i, j, k);\n", " CCTK_VInfo(CCTK_THORNSTRING, \"lbnd: %d %d %d\", cctk_lbnd[0], cctk_lbnd[1], cctk_lbnd[2]);\n", " CCTK_VInfo(CCTK_THORNSTRING, \"lsh: %d %d %d\", cctk_lsh[0], cctk_lsh[1], cctk_lsh[2]);\n", - " CCTK_VInfo(CCTK_THORNSTRING, \"lssh: %d %d %d\", cctk_lssh[CCTK_LSSH_IDX(0,0)], cctk_lssh[CCTK_LSSH_IDX(0,1)], cctk_lssh[CCTK_LSSH_IDX(0,2)]);\n", + " CCTK_VInfo(CCTK_THORNSTRING, \"LSSH: %d %d %d\", CCTK_LSSH(0,0), CCTK_LSSH(0,1), CCTK_LSSH(0,2));\n", " CCTK_VInfo(CCTK_THORNSTRING, \"", expr, ": %.17g\", (double)", expr, ");\n", "}\n"}; @@ -553,9 +553,9 @@ InitialiseGridLoopVariables[derivativesUsedSwitch_, addToStencilWidth_] := AssignVariable["kstart", arrayIndex["index_offset_z"]], "\n", - AssignVariable["iend", {arrayElement["cctk_lssh", "CCTK_LSSH_IDX(0,0)"], " - index_offset_x"}], - AssignVariable["jend", {arrayElement["cctk_lssh", "CCTK_LSSH_IDX(0,1)"], " - index_offset_y"}], - AssignVariable["kend", {arrayElement["cctk_lssh", "CCTK_LSSH_IDX(0,2)"], " - index_offset_z"}] + AssignVariable["iend", {"CCTK_LSSH(0,0) - index_offset_x"}], + AssignVariable["jend", {"CCTK_LSSH(0,1) - index_offset_y"}], + AssignVariable["kend", {"CCTK_LSSH(0,2) - index_offset_z"}] }, { @@ -564,9 +564,9 @@ InitialiseGridLoopVariables[derivativesUsedSwitch_, addToStencilWidth_] := AssignVariable["kstart", arrayIndex[0]], "\n", - AssignVariable["iend", arrayElement["cctk_lssh", "CCTK_LSSH_IDX(0,0)"]], - AssignVariable["jend", arrayElement["cctk_lssh", "CCTK_LSSH_IDX(0,1)"]], - AssignVariable["kend", arrayElement["cctk_lssh", "CCTK_LSSH_IDX(0,2)"]] + AssignVariable["iend", "CCTK_LSSH(0,0)"], + AssignVariable["jend", "CCTK_LSSH(0,1)"], + AssignVariable["kend", "CCTK_LSSH(0,2)"] }] ]; @@ -703,9 +703,9 @@ BoundaryLoop[block_] := AssignVariable[arrayElement["bmin", 0], "is_physbnd[0*2+0] ? 0 : imin[0]"], AssignVariable[arrayElement["bmin", 1], "is_physbnd[1*2+0] ? 0 : imin[1]"], AssignVariable[arrayElement["bmin", 2], "is_physbnd[2*2+0] ? 0 : imin[2]"], - AssignVariable[arrayElement["bmax", 0], "is_physbnd[0*2+1] ? cctk_from[CCTK_LSSH_IDX(0,0)] : imax[0]"], - AssignVariable[arrayElement["bmax", 1], "is_physbnd[1*2+1] ? cctk_from[CCTK_LSSH_IDX(0,1)] : imax[1]"], - AssignVariable[arrayElement["bmax", 2], "is_physbnd[2*2+1] ? cctk_from[CCTK_LSSH_IDX(0,2)] : imax[2]"]}], + AssignVariable[arrayElement["bmax", 0], "is_physbnd[0*2+1] ? CCTK_LSSH(0,0) : imax[0]"], + AssignVariable[arrayElement["bmax", 1], "is_physbnd[1*2+1] ? CCTK_LSSH(0,1) : imax[1]"], + AssignVariable[arrayElement["bmax", 2], "is_physbnd[2*2+1] ? CCTK_LSSH(0,2) : imax[2]"]}], CommentedBlock["Loop over all faces", loopOverInteger["dir", "0", "3", @@ -716,7 +716,7 @@ BoundaryLoop[block_] := {0, {AssignVariable[arrayElement["bmax", "dir"], {arrayElement["imin", "dir"], ""}], AssignVariable[arrayElement["bmin", "dir"], {0, ""}]}}, {1, {AssignVariable[arrayElement["bmin", "dir"], {arrayElement["imax", "dir"], "" }], - AssignVariable[arrayElement["bmax", "dir"], {"cctk_lssh[CCTK_LSSH_IDX(0,dir)]", ""}]}}]], + AssignVariable[arrayElement["bmax", "dir"], {"CCTK_LSSH(0,dir)", ""}]}}]], conditional[arrayElement["is_physbnd", "dir * 2 + face"], loopOverInteger["k", arrayElement["bmin",2], arrayElement["bmax",2], loopOverInteger["j", arrayElement["bmin",1], arrayElement["bmax",1], @@ -740,9 +740,9 @@ BoundaryWithGhostsLoop[block_] := AssignVariable[arrayElement["bmin", 0], "0"], AssignVariable[arrayElement["bmin", 1], "0"], AssignVariable[arrayElement["bmin", 2], "0"], - AssignVariable[arrayElement["bmax", 0], "cctk_lssh[CCTK_LSSH_IDX(0,0)]"], - AssignVariable[arrayElement["bmax", 1], "cctk_lssh[CCTK_LSSH_IDX(0,1)]"], - AssignVariable[arrayElement["bmax", 2], "cctk_lssh[CCTK_LSSH_IDX(0,2)]"]}], + AssignVariable[arrayElement["bmax", 0], "CCTK_LSSH(0,0)"], + AssignVariable[arrayElement["bmax", 1], "CCTK_LSSH(0,1)"], + AssignVariable[arrayElement["bmax", 2], "CCTK_LSSH(0,2)"]}], CommentedBlock["Loop over all faces", loopOverInteger["dir", "0", "3", @@ -753,7 +753,7 @@ BoundaryWithGhostsLoop[block_] := {0, {AssignVariable[arrayElement["bmax", "dir"], {arrayElement["imin", "dir"], ""}], AssignVariable[arrayElement["bmin", "dir"], {0, ""}]}}, {1, {AssignVariable[arrayElement["bmin", "dir"], {arrayElement["imax", "dir"], "" }], - AssignVariable[arrayElement["bmax", "dir"], {"cctk_lssh[CCTK_LSSH_IDX(0,dir)]", ""}]}}]], + AssignVariable[arrayElement["bmax", "dir"], {"CCTK_LSSH(0,dir)]", ""}]}}]], conditional[arrayElement["is_physbnd", "dir * 2 + face"], loopOverInteger["k", arrayElement["bmin",2], arrayElement["bmax",2], loopOverInteger["j", arrayElement["bmin",1], arrayElement["bmax",1], diff --git a/Tools/CodeGen/Thorn.m b/Tools/CodeGen/Thorn.m index 22b9f77..b8155d6 100644 --- a/Tools/CodeGen/Thorn.m +++ b/Tools/CodeGen/Thorn.m @@ -1176,7 +1176,7 @@ headerComment2, " assert (len);\n\n", " for (d = 0; d < 3; ++d) {\n", -" assert (off[d] >= 0 && len[d] >= 0 && off[d] + len[d] <= cctk_lssh[CCTK_LSSH_IDX(0,d)]);\n", +" assert (off[d] >= 0 && len[d] >= 0 && off[d] + len[d] <= CCTK_LSSH(0,d));\n", " }\n\n", " assert (modes);\n", -- cgit v1.2.3 From ce3f98a21a00360edd3e0714e283abb2b321448b Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 27 May 2011 15:34:34 +0200 Subject: Add restrict and const qualifiers to the Jacobian pointers --- Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c | 4 ++-- Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h | 2 +- Tools/CodeGen/Jacobian.m | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c index ba9423c..e418cc6 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c @@ -511,7 +511,7 @@ void GenericFD_AssertGroupStorage(cGH const * restrict const cctkGH, const char /* Return a list of pointers to the members of a named group */ void GenericFD_GroupDataPointers(cGH const * restrict const cctkGH, const char *group_name, - int nvars, CCTK_REAL **ptrs) + int nvars, CCTK_REAL const *restrict *ptrs) { int group_index, status; cGroup group_info; @@ -541,6 +541,6 @@ void GenericFD_GroupDataPointers(cGH const * restrict const cctkGH, const char * for (int v = 0; v < nvars; v++) { - ptrs[v] = (CCTK_REAL *) CCTK_VarDataPtrI(cctkGH, 0 /* timelevel */, v1+v); + ptrs[v] = (CCTK_REAL const *) CCTK_VarDataPtrI(cctkGH, 0 /* timelevel */, v1+v); } } diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h index 5974e63..ee35b05 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h @@ -153,7 +153,7 @@ void GenericFD_LoopOverBoundaryWithGhosts(cGH const * restrict cctkGH, Kranc_Cal void GenericFD_LoopOverInterior(cGH const * restrict cctkGH, Kranc_Calculation calc); void GenericFD_GroupDataPointers(cGH const * restrict const cctkGH, const char *group_name, - int nvars, CCTK_REAL **ptrs); + int nvars, CCTK_REAL const *restrict *ptrs); #ifdef __cplusplus diff --git a/Tools/CodeGen/Jacobian.m b/Tools/CodeGen/Jacobian.m index 19147c0..31f6b67 100644 --- a/Tools/CodeGen/Jacobian.m +++ b/Tools/CodeGen/Jacobian.m @@ -84,23 +84,23 @@ InsertJacobian[calc_List, opts:OptionsPattern[]] := derivatives groups *) CreateJacobianVariables[] := CommentedBlock["Jacobian variable pointers", - {"bool use_jacobian = (!CCTK_IsFunctionAliased(\"MultiPatch_GetMap\") || MultiPatch_GetMap(cctkGH) != jacobian_identity_map)\n && strlen(jacobian_group) > 0;\n", + {"bool const use_jacobian = (!CCTK_IsFunctionAliased(\"MultiPatch_GetMap\") || MultiPatch_GetMap(cctkGH) != jacobian_identity_map)\n && strlen(jacobian_group) > 0;\n", "if (use_jacobian && strlen(jacobian_derivative_group) == 0)\n", "{\n", " CCTK_WARN (1, \"GenericFD::jacobian_group and GenericFD::jacobian_derivative_group must both be set to valid group names\");\n", "}\n\n", - "CCTK_REAL *jacobian_ptrs[9];\n", + "CCTK_REAL const *restrict jacobian_ptrs[9];\n", "if (use_jacobian) GenericFD_GroupDataPointers(cctkGH, jacobian_group,\n", " 9, jacobian_ptrs);\n", "\n", - Table[{"CCTK_REAL *J",i,j," = use_jacobian ? jacobian_ptrs[",(i-1)*3+j-1,"] : 0;\n"},{i,1,3},{j,1,3}], + Table[{"CCTK_REAL const *restrict const J",i,j," = use_jacobian ? jacobian_ptrs[",(i-1)*3+j-1,"] : 0;\n"},{i,1,3},{j,1,3}], "\n", - "CCTK_REAL *jacobian_derivative_ptrs[18];\n", + "CCTK_REAL const *restrict jacobian_derivative_ptrs[18];\n", "if (use_jacobian) GenericFD_GroupDataPointers(cctkGH, jacobian_derivative_group,\n", " 18, jacobian_derivative_ptrs);\n", "\n", Module[{syms = Flatten[Table[{"dJ",i,j,k},{i,1,3},{j,1,3},{k,j,3}],2]}, - MapIndexed[{"CCTK_REAL *", #1, " = use_jacobian ? jacobian_derivative_ptrs[", #2-1, "] : 0;\n"} &, syms]]}]; + MapIndexed[{"CCTK_REAL const *restrict const ", #1, " = use_jacobian ? jacobian_derivative_ptrs[", #2-1, "] : 0;\n"} &, syms]]}]; (* List of symbols which should be allowed in a calculation *) JacobianSymbols[] := -- cgit v1.2.3 From 4ced9076a98db22e333f2c0c7acb81d48673f64f Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Fri, 27 May 2011 11:20:05 +0200 Subject: GenericFD.c: Add GenericFD_GetBoundaryWidths Based on the existing GetBoundaryWidth but returning all the widths. GetBoundaryWidth now calls the new function. --- .../KrancNumericalTools/GenericFD/src/GenericFD.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c index e418cc6..3f34e54 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c @@ -59,11 +59,10 @@ int sgn(CCTK_REAL x) return 0; } -int GenericFD_GetBoundaryWidth(cGH const * restrict const cctkGH) +void GenericFD_GetBoundaryWidths(cGH const * restrict const cctkGH, int nboundaryzones[6]) { int is_internal[6]; int is_staggered[6]; - int nboundaryzones[6]; int shiftout[6]; int ierr = -1; @@ -72,8 +71,15 @@ int GenericFD_GetBoundaryWidth(cGH const * restrict const cctkGH) /* This doesn't make sense in level mode */ if (map < 0) { -// CCTK_WARN(1, "Could not determine current map"); - return 0; + static int have_warned = 0; + if (!have_warned) + { + CCTK_WARN(1, "GenericFD_GetBoundaryWidths: Could not determine current map (can be caused by calling in LEVEL mode)"); + have_warned = 1; + } + for (int i = 0; i < 6; i++) + nboundaryzones[i] = 0; + return; } ierr = MultiPatch_GetBoundarySpecification (map, 6, nboundaryzones, is_internal, is_staggered, shiftout); @@ -87,6 +93,12 @@ int GenericFD_GetBoundaryWidth(cGH const * restrict const cctkGH) } else { CCTK_WARN(0, "Could not obtain boundary specification"); } +} + +int GenericFD_GetBoundaryWidth(cGH const * restrict const cctkGH) +{ + int nboundaryzones[6]; + GenericFD_GetBoundaryWidths(cctkGH, nboundaryzones); int bw = nboundaryzones[0]; -- cgit v1.2.3 From c47a4e5376c935baa2a4ebf787911dbee90a59bc Mon Sep 17 00:00:00 2001 From: Ian Hinder Date: Fri, 27 May 2011 11:20:55 +0200 Subject: GenericFD: Add GenericFD_EnsureStencilFits This function checks that there are enough ghost and boundary points for the stencil widths (ni, nj, nk) passed to it. --- .../KrancNumericalTools/GenericFD/src/GenericFD.c | 42 ++++++++++++++++++++++ .../KrancNumericalTools/GenericFD/src/GenericFD.h | 1 + 2 files changed, 43 insertions(+) (limited to 'Auxiliary') diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c index 3f34e54..8c06940 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.c @@ -556,3 +556,45 @@ void GenericFD_GroupDataPointers(cGH const * restrict const cctkGH, const char * ptrs[v] = (CCTK_REAL const *) CCTK_VarDataPtrI(cctkGH, 0 /* timelevel */, v1+v); } } + +void GenericFD_EnsureStencilFits(cGH const * restrict const cctkGH, const char *calc, int ni, int nj, int nk) +{ + DECLARE_CCTK_ARGUMENTS + + int bws[6]; + GenericFD_GetBoundaryWidths(cctkGH, bws); + + int ns[] = {ni, nj, nk}; + const char *dirs[] = {"x", "y", "z"}; + const char *faces[] = {"lower", "upper"}; + int abort = 0; + + for (int dir = 0; dir < 3; dir++) + { + for (int face = 0; face < 2; face++) + { + int bw = bws[2*dir+face]; + if (bw < ns[dir]) + { + CCTK_VInfo(CCTK_THORNSTRING, + "The stencil for %s requires %d points, but the %s %s boundary has only %d points.", + calc, ns[dir], faces[face], dirs[dir], bw); + abort = 1; + } + } + int gz = cctk_nghostzones[dir]; + if (gz < ns[dir]) + { + CCTK_VInfo(CCTK_THORNSTRING, + "The stencil for %s requires %d points, but there are only %d ghost zones in the %s direction.", + calc, ns[dir], gz, dirs[dir]); + abort = 1; + } + } + + if (abort) + { + CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, + "Insufficient ghost or boundary points for %s", calc); + } +} diff --git a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h index ee35b05..401b4e0 100644 --- a/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h +++ b/Auxiliary/Cactus/KrancNumericalTools/GenericFD/src/GenericFD.h @@ -154,6 +154,7 @@ void GenericFD_LoopOverInterior(cGH const * restrict cctkGH, Kranc_Calculation c void GenericFD_GroupDataPointers(cGH const * restrict const cctkGH, const char *group_name, int nvars, CCTK_REAL const *restrict *ptrs); +void GenericFD_EnsureStencilFits(cGH const * restrict const cctkGH, const char *calc, int ni, int nj, int nk); #ifdef __cplusplus -- cgit v1.2.3