From 0c77e02f695962169f1f8d35f6b2bb6cbcb7ffa9 Mon Sep 17 00:00:00 2001 From: schnetter Date: Tue, 23 Sep 2008 15:46:30 +0000 Subject: Parallelise loops with OpenMP. git-svn-id: http://svn.cactuscode.org/arrangements/CactusNumerical/MoL/trunk@129 578cdeb0-5ea1-4b81-8215-5a3b8777ee0b --- src/GenericRK.c | 9 +++++++++ src/ICN.c | 6 ++++++ src/InitialCopy.c | 3 +++ src/RK2.c | 6 ++++++ src/RK3.c | 9 +++++++++ src/RK4.c | 8 ++++++++ src/RK45.c | 5 +++++ src/RK65.c | 5 +++++ src/RK87.c | 5 +++++ 9 files changed, 56 insertions(+) diff --git a/src/GenericRK.c b/src/GenericRK.c index f9622c7..6fc2dc9 100644 --- a/src/GenericRK.c +++ b/src/GenericRK.c @@ -140,6 +140,7 @@ void MoL_GenericRKAdd(CCTK_ARGUMENTS) beta); #endif +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = (*Original_Delta_Time) / cctkGH->cctk_timefac * beta * RHSVar[index]; @@ -203,6 +204,7 @@ void MoL_GenericRKAdd(CCTK_ARGUMENTS) if ( (alpha > MoL_Tiny)||(alpha < -MoL_Tiny) ) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += alpha * ScratchVar[index]; @@ -248,6 +250,7 @@ void MoL_GenericRKAdd(CCTK_ARGUMENTS) MoL_Intermediate_Steps - (*MoL_Intermediate_Step)) * totalsize, totalsize); #endif +#pragma omp parallel for for (index = 0; index < totalsize; index++) { ScratchVar[index] = UpdateVar[index]; @@ -278,6 +281,7 @@ void MoL_GenericRKAdd(CCTK_ARGUMENTS) RHSComplexVar = (CCTK_COMPLEX const *)CCTK_VarDataPtrI(cctkGH, 0, RHSVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateComplexVar[index] = CCTK_CmplxMul(Complex_Delta_Time, @@ -310,6 +314,7 @@ void MoL_GenericRKAdd(CCTK_ARGUMENTS) if ( (alpha > MoL_Tiny)||(alpha < -MoL_Tiny) ) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateComplexVar[index] = @@ -333,6 +338,7 @@ void MoL_GenericRKAdd(CCTK_ARGUMENTS) MoL_Intermediate_Steps - (*MoL_Intermediate_Step)) * totalsize]; +#pragma omp parallel for for (index = 0; index < totalsize; index++) { ScratchComplexVar[index] = UpdateComplexVar[index]; @@ -365,6 +371,7 @@ void MoL_GenericRKAdd(CCTK_ARGUMENTS) arraytotalsize = ArrayScratchSizes[var]; +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] = (*Original_Delta_Time) / cctkGH->cctk_timefac * beta * RHSVar[index]; @@ -393,6 +400,7 @@ void MoL_GenericRKAdd(CCTK_ARGUMENTS) if ( (alpha > MoL_Tiny)||(alpha < -MoL_Tiny) ) { +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] += alpha * ScratchVar[index]; @@ -441,6 +449,7 @@ void MoL_GenericRKAdd(CCTK_ARGUMENTS) CurrentArrayScratchSize + arrayscratchlocation); #endif +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { ScratchVar[index] = UpdateVar[index]; diff --git a/src/ICN.c b/src/ICN.c index 7f75a35..c9a6114 100644 --- a/src/ICN.c +++ b/src/ICN.c @@ -120,6 +120,7 @@ void MoL_ICNAdd(CCTK_ARGUMENTS) RHSVar = (CCTK_REAL*)CCTK_VarDataPtrI(cctkGH, 0, RHSVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = OldVar[index] + CCTK_DELTA_TIME * RHSVar[index]; @@ -155,6 +156,7 @@ void MoL_ICNAdd(CCTK_ARGUMENTS) /* "This proc array total size is %d.", */ /* arraytotalsize); */ +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] = OldVar[index] + CCTK_DELTA_TIME * RHSVar[index]; @@ -174,6 +176,7 @@ void MoL_ICNAdd(CCTK_ARGUMENTS) RHSComplexVar = (CCTK_COMPLEX*)CCTK_VarDataPtrI(cctkGH, 0, RHSComplexVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateComplexVar[index] = CCTK_CmplxAdd(OldComplexVar[index], @@ -269,6 +272,7 @@ void MoL_ICNAverage(CCTK_ARGUMENTS) RHSVar = (CCTK_REAL*)CCTK_VarDataPtrI(cctkGH, 0, RHSVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { /* UpdateVar[index] = 0.5 * (UpdateVar[index] + OldVar[index]); */ @@ -302,6 +306,7 @@ void MoL_ICNAverage(CCTK_ARGUMENTS) arraytotalsize *= arraydata.lsh[arraydim]; } +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { /* UpdateVar[index] = 0.5 * (UpdateVar[index] + OldVar[index]); */ @@ -323,6 +328,7 @@ void MoL_ICNAverage(CCTK_ARGUMENTS) RHSComplexVar = (CCTK_COMPLEX*)CCTK_VarDataPtrI(cctkGH, 0, RHSComplexVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateComplexVar[index] = CCTK_CmplxMul(Complex_Half, CCTK_CmplxAdd(UpdateComplexVar[index], OldComplexVar[index])); diff --git a/src/InitialCopy.c b/src/InitialCopy.c index 5b72393..b421908 100644 --- a/src/InitialCopy.c +++ b/src/InitialCopy.c @@ -556,6 +556,7 @@ void MoL_InitRHS(CCTK_ARGUMENTS) RHSVariableIndex[var]); if (RHSVar) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { RHSVar[index] = 0; @@ -594,6 +595,7 @@ void MoL_InitRHS(CCTK_ARGUMENTS) { if (RHSVar) { +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { RHSVar[index] = 0; @@ -636,6 +638,7 @@ void MoL_InitRHS(CCTK_ARGUMENTS) RHSComplexVariableIndex[var]); if (RHSVar) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { RHSVar[index] = 0; diff --git a/src/RK2.c b/src/RK2.c index 1301e26..58dd03f 100644 --- a/src/RK2.c +++ b/src/RK2.c @@ -118,6 +118,7 @@ void MoL_RK2Add(CCTK_ARGUMENTS) RHSVar = (CCTK_REAL const*)CCTK_VarDataPtrI(cctkGH, 0, RHSVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += CCTK_DELTA_TIME * RHSVar[index]; @@ -146,6 +147,7 @@ void MoL_RK2Add(CCTK_ARGUMENTS) arraytotalsize *= arraydata.lsh[arraydim]; } +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] += CCTK_DELTA_TIME * RHSVar[index]; @@ -163,6 +165,7 @@ void MoL_RK2Add(CCTK_ARGUMENTS) RHSComplexVar = (CCTK_COMPLEX const*)CCTK_VarDataPtrI(cctkGH, 0, RHSComplexVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateComplexVar[index] = CCTK_CmplxAdd(UpdateComplexVar[index], @@ -186,6 +189,7 @@ void MoL_RK2Add(CCTK_ARGUMENTS) RHSVar = (CCTK_REAL const*)CCTK_VarDataPtrI(cctkGH, 0, RHSVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = 0.5 * (OldVar[index] + UpdateVar[index]) + @@ -218,6 +222,7 @@ void MoL_RK2Add(CCTK_ARGUMENTS) arraytotalsize *= arraydata.lsh[arraydim]; } +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] = 0.5 * (OldVar[index] + UpdateVar[index]) + @@ -238,6 +243,7 @@ void MoL_RK2Add(CCTK_ARGUMENTS) RHSComplexVar = (CCTK_COMPLEX const*)CCTK_VarDataPtrI(cctkGH, 0, RHSComplexVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateComplexVar[index] = diff --git a/src/RK3.c b/src/RK3.c index 68329a2..f3b5e9c 100644 --- a/src/RK3.c +++ b/src/RK3.c @@ -121,6 +121,7 @@ void MoL_RK3Add(CCTK_ARGUMENTS) RHSVar = (CCTK_REAL*)CCTK_VarDataPtrI(cctkGH, 0, RHSVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += CCTK_DELTA_TIME * RHSVar[index]; @@ -149,6 +150,7 @@ void MoL_RK3Add(CCTK_ARGUMENTS) arraytotalsize *= arraydata.lsh[arraydim]; } +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] += CCTK_DELTA_TIME * RHSVar[index]; @@ -166,6 +168,7 @@ void MoL_RK3Add(CCTK_ARGUMENTS) RHSComplexVar = (CCTK_COMPLEX*)CCTK_VarDataPtrI(cctkGH, 0, RHSComplexVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateComplexVar[index] = CCTK_CmplxAdd(UpdateComplexVar[index], @@ -190,6 +193,7 @@ void MoL_RK3Add(CCTK_ARGUMENTS) RHSVar = (CCTK_REAL const*)CCTK_VarDataPtrI(cctkGH, 0, RHSVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = 0.25 * (3*OldVar[index] + @@ -222,6 +226,7 @@ void MoL_RK3Add(CCTK_ARGUMENTS) arraytotalsize *= arraydata.lsh[arraydim]; } +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] = 0.25*(3*OldVar[index] + UpdateVar[index]) + @@ -242,6 +247,7 @@ void MoL_RK3Add(CCTK_ARGUMENTS) RHSComplexVar = (CCTK_COMPLEX const*)CCTK_VarDataPtrI(cctkGH, 0, RHSComplexVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateComplexVar[index] = @@ -271,6 +277,7 @@ void MoL_RK3Add(CCTK_ARGUMENTS) RHSVar = (CCTK_REAL const*)CCTK_VarDataPtrI(cctkGH, 0, RHSVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = (OldVar[index] + 2*UpdateVar[index]) * one_third @@ -303,6 +310,7 @@ void MoL_RK3Add(CCTK_ARGUMENTS) arraytotalsize *= arraydata.lsh[arraydim]; } +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] = (OldVar[index] + 2*UpdateVar[index]) * one_third @@ -323,6 +331,7 @@ void MoL_RK3Add(CCTK_ARGUMENTS) RHSComplexVar = (CCTK_COMPLEX const*)CCTK_VarDataPtrI(cctkGH, 0, RHSComplexVariableIndex[var]); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateComplexVar[index] = diff --git a/src/RK4.c b/src/RK4.c index c96377a..250c847 100644 --- a/src/RK4.c +++ b/src/RK4.c @@ -156,6 +156,7 @@ CCTK_WARN(0, "not implemented"); beta); #endif +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = OldVar[index] + @@ -176,6 +177,7 @@ CCTK_WARN(0, "not implemented"); /* scratch storage */ if ((*MoL_Intermediate_Step) == MoL_Intermediate_Steps) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { ScratchVar[index] = 0; @@ -184,6 +186,7 @@ CCTK_WARN(0, "not implemented"); if ((*MoL_Intermediate_Step)>1) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { ScratchVar[index] += alpha * UpdateVar[index]; @@ -191,6 +194,7 @@ CCTK_WARN(0, "not implemented"); } else { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += ScratchVar[index] - 4.0 / 3.0 * OldVar[index]; @@ -240,6 +244,7 @@ CCTK_WARN(0, "not done"); ScratchVar = &ArrayScratchSpace[arrayscratchlocation]; +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] = OldVar[index] + @@ -248,6 +253,7 @@ CCTK_WARN(0, "not done"); if ((*MoL_Intermediate_Step) == MoL_Intermediate_Steps) { +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { ScratchVar[index] = 0; @@ -256,6 +262,7 @@ CCTK_WARN(0, "not done"); if ((*MoL_Intermediate_Step)>1) { +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { ScratchVar[index] += alpha * UpdateVar[index]; @@ -263,6 +270,7 @@ CCTK_WARN(0, "not done"); } else { +#pragma omp parallel for for (index = 0; index < arraytotalsize; index++) { UpdateVar[index] += ScratchVar[index] - 4.0 / 3.0 * OldVar[index]; diff --git a/src/RK45.c b/src/RK45.c index 3ca2456..6173d77 100644 --- a/src/RK45.c +++ b/src/RK45.c @@ -181,6 +181,7 @@ void MoL_RK45Add(CCTK_ARGUMENTS) + MoL_Num_Evolved_Vars * (MoL_Intermediate_Steps - (*MoL_Intermediate_Step))); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { ScratchVar[index] = tmp * RHSVar[index]; @@ -207,6 +208,7 @@ void MoL_RK45Add(CCTK_ARGUMENTS) if (*MoL_Intermediate_Step - 1) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = OldVar[index]; @@ -226,6 +228,7 @@ void MoL_RK45Add(CCTK_ARGUMENTS) if ( (beta > MoL_Tiny)||(beta < -MoL_Tiny) ) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += beta * ScratchVar[index]; @@ -238,6 +241,7 @@ void MoL_RK45Add(CCTK_ARGUMENTS) else { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = OldVar[index]; @@ -257,6 +261,7 @@ void MoL_RK45Add(CCTK_ARGUMENTS) if ( (gamma > MoL_Tiny)||(gamma < -MoL_Tiny) ) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += gamma * ScratchVar[index]; diff --git a/src/RK65.c b/src/RK65.c index 22bd26b..e98f37a 100644 --- a/src/RK65.c +++ b/src/RK65.c @@ -140,6 +140,7 @@ void MoL_RK65Add(CCTK_ARGUMENTS) + MoL_Num_Evolved_Vars * (MoL_Intermediate_Steps - (*MoL_Intermediate_Step))); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { ScratchVar[index] = tmp * RHSVar[index]; @@ -166,6 +167,7 @@ void MoL_RK65Add(CCTK_ARGUMENTS) if (*MoL_Intermediate_Step - 1) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = OldVar[index]; @@ -185,6 +187,7 @@ void MoL_RK65Add(CCTK_ARGUMENTS) if ( (beta > MoL_Tiny)||(beta < -MoL_Tiny) ) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += beta * ScratchVar[index]; @@ -197,6 +200,7 @@ void MoL_RK65Add(CCTK_ARGUMENTS) else { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = OldVar[index]; @@ -216,6 +220,7 @@ void MoL_RK65Add(CCTK_ARGUMENTS) if ( (gamma > MoL_Tiny)||(gamma < -MoL_Tiny) ) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += gamma * ScratchVar[index]; diff --git a/src/RK87.c b/src/RK87.c index 3ce19b6..f806edd 100644 --- a/src/RK87.c +++ b/src/RK87.c @@ -161,6 +161,7 @@ void MoL_RK87Add(CCTK_ARGUMENTS) + MoL_Num_Evolved_Vars * (MoL_Intermediate_Steps - (*MoL_Intermediate_Step))); +#pragma omp parallel for for (index = 0; index < totalsize; index++) { ScratchVar[index] = tmp * RHSVar[index]; @@ -188,6 +189,7 @@ void MoL_RK87Add(CCTK_ARGUMENTS) if (*MoL_Intermediate_Step - 1) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = OldVar[index]; @@ -207,6 +209,7 @@ void MoL_RK87Add(CCTK_ARGUMENTS) if ( (beta > MoL_Tiny)||(beta < -MoL_Tiny) ) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += beta * ScratchVar[index]; @@ -219,6 +222,7 @@ void MoL_RK87Add(CCTK_ARGUMENTS) else { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] = OldVar[index]; @@ -238,6 +242,7 @@ void MoL_RK87Add(CCTK_ARGUMENTS) if ( (gamma > MoL_Tiny)||(gamma < -MoL_Tiny) ) { +#pragma omp parallel for for (index = 0; index < totalsize; index++) { UpdateVar[index] += gamma * ScratchVar[index]; -- cgit v1.2.3