From 2f28654849b8a440e8560d9a5fe44774ffaa1ec1 Mon Sep 17 00:00:00 2001 From: schnetter Date: Tue, 8 Apr 2008 23:14:58 +0000 Subject: Determine index range correctly: Initialise imax to -1 instead of 0, so that empty domains are handled correctly. Begin to add some OpenMP parallelisation statements. git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/SummationByParts/trunk@98 f69c4107-0314-4c4f-9ad4-17e986b73f4a --- src/Coefficients2_2_1.F90 | 2 +- src/Coefficients2_4_2.F90 | 2 +- src/Coefficients2_4_2_min_err_coeff.F90 | 2 +- src/Coefficients2_6_3.F90 | 2 +- src/Coefficients2_8_4.F90 | 2 +- src/Coefficients_2_1.F90 | 5 +++-- src/Coefficients_4_2.F90 | 5 +++-- src/Coefficients_6_3.F90 | 2 +- src/Coefficients_6_3_min_err_coeff.F90 | 2 +- src/Coefficients_8_4.F90 | 2 +- src/Coefficients_8_4_min_err_coeff.F90 | 4 ++-- src/Derivatives_2_1.F90 | 12 ++++++++++++ src/Derivatives_4_2.F90 | 12 ++++++++++++ src/Dissipation_2_1.F90 | 7 ++++++- 14 files changed, 46 insertions(+), 15 deletions(-) diff --git a/src/Coefficients2_2_1.F90 b/src/Coefficients2_2_1.F90 index 686df7b..e155477 100644 --- a/src/Coefficients2_2_1.F90 +++ b/src/Coefficients2_2_1.F90 @@ -31,7 +31,7 @@ subroutine set_coeff2_2_1 ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients2_4_2.F90 b/src/Coefficients2_4_2.F90 index 5883b02..3da8fff 100644 --- a/src/Coefficients2_4_2.F90 +++ b/src/Coefficients2_4_2.F90 @@ -31,7 +31,7 @@ subroutine set_coeff2_4_2 ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients2_4_2_min_err_coeff.F90 b/src/Coefficients2_4_2_min_err_coeff.F90 index 9720d05..184a242 100644 --- a/src/Coefficients2_4_2_min_err_coeff.F90 +++ b/src/Coefficients2_4_2_min_err_coeff.F90 @@ -31,7 +31,7 @@ subroutine set_coeff2_4_2_opt ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients2_6_3.F90 b/src/Coefficients2_6_3.F90 index 3563c48..ce7f97e 100644 --- a/src/Coefficients2_6_3.F90 +++ b/src/Coefficients2_6_3.F90 @@ -31,7 +31,7 @@ subroutine set_coeff2_6_3 ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients2_8_4.F90 b/src/Coefficients2_8_4.F90 index b4c26dc..eeae118 100644 --- a/src/Coefficients2_8_4.F90 +++ b/src/Coefficients2_8_4.F90 @@ -31,7 +31,7 @@ subroutine set_coeff2_8_4 ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients_2_1.F90 b/src/Coefficients_2_1.F90 index 35b261b..99f21c7 100644 --- a/src/Coefficients_2_1.F90 +++ b/src/Coefficients_2_1.F90 @@ -31,7 +31,7 @@ subroutine set_coeff_2_1 ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize @@ -51,6 +51,7 @@ subroutine set_coeff_2_1 ( nsize, loc_order, bb, gsize, imin, imax, dd ) imin(nsize) = nsize-1; imax(nsize) = nsize ir = nsize - 2 end if +!$omp parallel do do i = il, ir dd(i-1,i) = -a(1); dd(i+1,i) = a(1) imin(i) = i-1; imax(i) = i+1 @@ -91,7 +92,7 @@ subroutine set_coeff_up_2_1 ( nsize, dir, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients_4_2.F90 b/src/Coefficients_4_2.F90 index 0d0c49a..099c5b6 100644 --- a/src/Coefficients_4_2.F90 +++ b/src/Coefficients_4_2.F90 @@ -31,7 +31,7 @@ subroutine set_coeff_4_2 ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize @@ -66,6 +66,7 @@ subroutine set_coeff_4_2 ( nsize, loc_order, bb, gsize, imin, imax, dd ) imin(nsize) = nsize-3; imax(nsize) = nsize ir = nsize - 4 end if +!$omp parallel do do i = il, ir dd(i-2:i-1,i) = -a(2:1:-1); dd(i+1:i+2,i) = a(1:2) imin(i) = i-2; imax(i) = i+2 @@ -105,7 +106,7 @@ subroutine set_coeff_up_4_2 ( nsize, dir, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients_6_3.F90 b/src/Coefficients_6_3.F90 index 498723a..465d4ce 100644 --- a/src/Coefficients_6_3.F90 +++ b/src/Coefficients_6_3.F90 @@ -31,7 +31,7 @@ subroutine set_coeff_6_3 ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients_6_3_min_err_coeff.F90 b/src/Coefficients_6_3_min_err_coeff.F90 index ffad15f..30054c9 100644 --- a/src/Coefficients_6_3_min_err_coeff.F90 +++ b/src/Coefficients_6_3_min_err_coeff.F90 @@ -118,7 +118,7 @@ subroutine set_coeff_up_6_3_opt ( nsize, dir, loc_order, bb, gsize, imin, imax, dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients_8_4.F90 b/src/Coefficients_8_4.F90 index d2461b5..240730f 100644 --- a/src/Coefficients_8_4.F90 +++ b/src/Coefficients_8_4.F90 @@ -31,7 +31,7 @@ subroutine set_coeff_8_4 ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Coefficients_8_4_min_err_coeff.F90 b/src/Coefficients_8_4_min_err_coeff.F90 index 836feec..5bae502 100644 --- a/src/Coefficients_8_4_min_err_coeff.F90 +++ b/src/Coefficients_8_4_min_err_coeff.F90 @@ -31,7 +31,7 @@ subroutine set_coeff_8_4_opt ( nsize, loc_order, bb, gsize, imin, imax, dd ) dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize @@ -130,7 +130,7 @@ subroutine set_coeff_up_8_4_opt ( nsize, dir, loc_order, bb, gsize, imin, imax, dd = zero imin = 0 - imax = 0 + imax = -1 if ( bb(1) == 0 ) then il = 1 + gsize diff --git a/src/Derivatives_2_1.F90 b/src/Derivatives_2_1.F90 index 4ea8593..62a12a5 100644 --- a/src/Derivatives_2_1.F90 +++ b/src/Derivatives_2_1.F90 @@ -55,7 +55,9 @@ subroutine deriv_gf_2_1 ( var, ni, nj, nk, dir, bb, gsize, delta, dvar ) ir = ni - 2 end if if (il > ir+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare dvar(il:ir,:,:) = a(1) * ( var(il+1:ir+1,:,:) - var(il-1:ir-1,:,:) ) * idel +!$omp end parallel workshare case (1) direction if ( zero_derivs_y /= 0 ) then dvar = zero @@ -77,8 +79,10 @@ subroutine deriv_gf_2_1 ( var, ni, nj, nk, dir, bb, gsize, delta, dvar ) jr = nj - 2 end if if (jl > jr+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare dvar(:,jl:jr,:) = a(1) * ( var(:,jl+1:jr+1,:) - & var(:,jl-1:jr-1,:) ) * idel +!$omp end parallel workshare end if case (2) direction if ( zero_derivs_z /= 0 ) then @@ -101,8 +105,10 @@ subroutine deriv_gf_2_1 ( var, ni, nj, nk, dir, bb, gsize, delta, dvar ) kr = nk - 2 end if if (kl > kr+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare dvar(:,:,kl:kr) = a(1) * ( var(:,:,kl+1:kr+1) - & var(:,:,kl-1:kr-1) ) * idel +!$omp end parallel workshare end if end select direction end subroutine deriv_gf_2_1 @@ -183,6 +189,7 @@ subroutine up_deriv_gf_2_1 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) ir = ni - 2 end if if (il > ir+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare where ( up(il:ir,:,:) < zero ) dvar(il:ir,:,:) = ( a1(-1) * var(il-1:ir-1,:,:) + & a1(0) * var(il:ir,:,:) + & @@ -192,6 +199,7 @@ subroutine up_deriv_gf_2_1 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) a2(0) * var(il:ir,:,:) + & a2(1) * var(il+1:ir+1,:,:) ) * idel end where +!$omp end parallel workshare case (1) direction if ( zero_derivs_y /= 0 ) then dvar = zero @@ -235,6 +243,7 @@ subroutine up_deriv_gf_2_1 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) jr = nj - 2 end if if (jl > jr+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare where ( up(:,jl:jr,:) < zero ) dvar(:,jl:jr,:) = ( a1(-1) * var(:,jl-1:jr-1,:) + & a1(0) * var(:,jl:jr,:) + & @@ -244,6 +253,7 @@ subroutine up_deriv_gf_2_1 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) a2(0) * var(:,jl:jr,:) + & a2(1) * var(:,jl+1:jr+1,:) ) * idel end where +!$omp end parallel workshare end if case (2) direction if ( zero_derivs_z /= 0 ) then @@ -288,6 +298,7 @@ subroutine up_deriv_gf_2_1 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) kr = nk - 2 end if if (kl > kr+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare where ( up(:,:,kl:kr) < zero ) dvar(:,:,kl:kr) = ( a1(-1) * var(:,:,kl-1:kr-1) + & a1(0) * var(:,:,kl:kr) + & @@ -297,6 +308,7 @@ subroutine up_deriv_gf_2_1 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) a2(0) * var(:,:,kl:kr) + & a2(1) * var(:,:,kl+1:kr+1) ) * idel end where +!$omp end parallel workshare end if end select direction end subroutine up_deriv_gf_2_1 diff --git a/src/Derivatives_4_2.F90 b/src/Derivatives_4_2.F90 index 5478ffd..cc16ebd 100644 --- a/src/Derivatives_4_2.F90 +++ b/src/Derivatives_4_2.F90 @@ -70,10 +70,12 @@ subroutine deriv_gf_4_2 ( var, ni, nj, nk, dir, bb, gsize, delta, dvar ) ir = ni - 4 end if if (il > ir+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare dvar(il:ir,:,:) = ( a(1) * ( var(il+1:ir+1,:,:) - & var(il-1:ir-1,:,:) ) + & a(2) * ( var(il+2:ir+2,:,:) - & var(il-2:ir-2,:,:) ) ) * idel +!$omp end parallel workshare case (1) direction if ( zero_derivs_y /= 0 ) then dvar = zero @@ -110,10 +112,12 @@ subroutine deriv_gf_4_2 ( var, ni, nj, nk, dir, bb, gsize, delta, dvar ) jr = nj - 4 end if if (jl > jr+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare dvar(:,jl:jr,:) = ( a(1) * ( var(:,jl+1:jr+1,:) - & var(:,jl-1:jr-1,:) ) + & a(2) * ( var(:,jl+2:jr+2,:) - & var(:,jl-2:jr-2,:) ) ) * idel +!$omp end parallel workshare end if case (2) direction if ( zero_derivs_z /= 0 ) then @@ -151,10 +155,12 @@ subroutine deriv_gf_4_2 ( var, ni, nj, nk, dir, bb, gsize, delta, dvar ) kr = nk - 4 end if if (kl > kr+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare dvar(:,:,kl:kr) = ( a(1) * ( var(:,:,kl+1:kr+1) - & var(:,:,kl-1:kr-1) ) + & a(2) * ( var(:,:,kl+2:kr+2) - & var(:,:,kl-2:kr-2) ) ) * idel +!$omp end parallel workshare end if end select direction end subroutine deriv_gf_4_2 @@ -289,6 +295,7 @@ subroutine up_deriv_gf_4_2 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) ir = ni - 4 end if if (il > ir+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare where ( up(il:ir,:,:) < zero ) dvar(il:ir,:,:) = ( a1(-2) * var(il-2:ir-2,:,:) + & a1(-1) * var(il-1:ir-1,:,:) + & @@ -302,6 +309,7 @@ subroutine up_deriv_gf_4_2 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) a2(1) * var(il+1:ir+1,:,:) + & a2(2) * var(il+2:ir+2,:,:) ) * idel end where +!$omp end parallel workshare case (1) direction if ( zero_derivs_y /= 0 ) then dvar = zero @@ -399,6 +407,7 @@ subroutine up_deriv_gf_4_2 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) jr = nj - 4 end if if (jl > jr+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare where ( up(:,jl:jr,:) < zero ) dvar(:,jl:jr,:) = ( a1(-2) * var(:,jl-2:jr-2,:) + & a1(-1) * var(:,jl-1:jr-1,:) + & @@ -412,6 +421,7 @@ subroutine up_deriv_gf_4_2 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) a2(1) * var(:,jl+1:jr+1,:) + & a2(2) * var(:,jl+2:jr+2,:) ) * idel end where +!$omp end parallel workshare end if case (2) direction if ( zero_derivs_z /= 0 ) then @@ -510,6 +520,7 @@ subroutine up_deriv_gf_4_2 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) kr = nk - 4 end if if (kl > kr+1) call CCTK_WARN (0, "domain too small") +!$omp parallel workshare where ( up(:,:,kl:kr) < zero ) dvar(:,:,kl:kr) = ( a1(-2) * var(:,:,kl-2:kr-2) + & a1(-1) * var(:,:,kl-1:kr-1) + & @@ -523,6 +534,7 @@ subroutine up_deriv_gf_4_2 ( var, ni, nj, nk, dir, bb, gsize, delta, up, dvar ) a2(1) * var(:,:,kl+1:kr+1) + & a2(2) * var(:,:,kl+2:kr+2) ) * idel end where +!$omp end parallel workshare end if end select direction end subroutine up_deriv_gf_4_2 diff --git a/src/Dissipation_2_1.F90 b/src/Dissipation_2_1.F90 index 0b625f4..d6a3d5e 100644 --- a/src/Dissipation_2_1.F90 +++ b/src/Dissipation_2_1.F90 @@ -19,7 +19,6 @@ subroutine dissipation_2_1 (var, ni, nj, nk, bb, gsize, delta, epsilon, rhs) CCTK_REAL :: zero = 0.0 integer, parameter :: wp = kind(zero) - integer :: i, j, k CCTK_REAL, dimension(3,2) :: a CCTK_REAL :: idel @@ -55,11 +54,13 @@ subroutine dissipation_2_1 (var, ni, nj, nk, bb, gsize, delta, epsilon, rhs) ir = ni - 2 end if +!$omp parallel workshare rhs(il:ir,:,:) = rhs(il:ir,:,:) + & ( -2.0_wp * var(il:ir,:,:) + & ( var(il-1:ir-1,:,:) + & var(il+1:ir+1,:,:) ) ) * idel +!$omp end parallel workshare if ( zero_derivs_y == 0 ) then call set_coeff ( a ) @@ -91,10 +92,12 @@ subroutine dissipation_2_1 (var, ni, nj, nk, bb, gsize, delta, epsilon, rhs) jr = nj - 2 end if +!$omp parallel workshare rhs(:,jl:jr,:) = rhs(:,jl:jr,:) + & ( -2.0_wp * var(:,jl:jr,:) + & ( var(:,jl-1:jr-1,:) + & var(:,jl+1:jr+1,:) ) ) * idel +!$omp end parallel workshare end if if ( zero_derivs_z == 0 ) then @@ -128,10 +131,12 @@ subroutine dissipation_2_1 (var, ni, nj, nk, bb, gsize, delta, epsilon, rhs) kr = nk - 2 end if +!$omp parallel workshare rhs(:,:,kl:kr) = rhs(:,:,kl:kr) + & ( -2.0_wp * var(:,:,kl:kr) + & ( var(:,:,kl-1:kr-1) + & var(:,:,kl+1:kr+1) ) ) * idel +!$omp end parallel workshare end if contains -- cgit v1.2.3