diff options
author | diener <diener@f69c4107-0314-4c4f-9ad4-17e986b73f4a> | 2012-05-14 19:17:30 +0000 |
---|---|---|
committer | diener <diener@f69c4107-0314-4c4f-9ad4-17e986b73f4a> | 2012-05-14 19:17:30 +0000 |
commit | e91fc0efd260b43f20cc92e46c72e64243670d98 (patch) | |
tree | 99bc6292e1c24f77bf4ec6995ede4cde0ced980d | |
parent | cc09b8e3fa152c7c47a05c39fe4e39f0baa55e5e (diff) |
Rewrite the interior part of the 8-4 Kreiss-Oliger dissipation operator
to use an explicit loop instead of Fortran 90 array statements since
the openmp workshare directive is not parallelized by the intel
compilers. This is based on an original patch by Christian Reisswig. Do
for this is only done for this operator since this is most often used
in BBH simulations.
git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/SummationByParts/trunk@132 f69c4107-0314-4c4f-9ad4-17e986b73f4a
-rw-r--r-- | src/Dissipation_8_4_alt.F90 | 110 |
1 files changed, 67 insertions, 43 deletions
diff --git a/src/Dissipation_8_4_alt.F90 b/src/Dissipation_8_4_alt.F90 index 993600f..e67c703 100644 --- a/src/Dissipation_8_4_alt.F90 +++ b/src/Dissipation_8_4_alt.F90 @@ -23,7 +23,7 @@ subroutine dissipation_8_4_alt (var, ni, nj, nk, bb, gsize, offset, delta, epsil CCTK_REAL, dimension(13,8) :: a CCTK_REAL :: idel - CCTK_INT :: il, ir, jl, jr, kl, kr, ol, or + CCTK_INT :: il, ir, jl, jr, kl, kr, ol, or, i, j, k call set_coeff ( a ) @@ -144,20 +144,28 @@ subroutine dissipation_8_4_alt (var, ni, nj, nk, bb, gsize, offset, delta, epsil ir = or - 8 end if -!$OMP PARALLEL WORKSHARE - rhs(il:ir,:,:) = rhs(il:ir,:,:) + & - ( -252.0_wp * var(il:ir,:,:) + & - 210.0_wp * ( var(il-1:ir-1,:,:) + & - var(il+1:ir+1,:,:) ) - & - 120.0_wp * ( var(il-2:ir-2,:,:) + & - var(il+2:ir+2,:,:) ) + & - 45.0_wp * ( var(il-3:ir-3,:,:) + & - var(il+3:ir+3,:,:) ) - & - 10.0_wp * ( var(il-4:ir-4,:,:) + & - var(il+4:ir+4,:,:) ) + & - ( var(il-5:ir-5,:,:) + & - var(il+5:ir+5,:,:) ) ) * idel -!$OMP END PARALLEL WORKSHARE +!! !$OMP PARALLEL WORKSHARE +!$OMP PARALLEL DO DEFAULT(none) PRIVATE(i,j,k) SHARED(var,rhs,il,ir,nj,nk,idel) + do k = 1, nk + do j = 1, nj + do i = il, ir + rhs(i,j,k) = rhs(i,j,k) + & + ( -252.0_wp * var(i,j,k) + & + 210.0_wp * ( var(i-1,j,k) + & + var(i+1,j,k) ) - & + 120.0_wp * ( var(i-2,j,k) + & + var(i+2,j,k) ) + & + 45.0_wp * ( var(i-3,j,k) + & + var(i+3,j,k) ) - & + 10.0_wp * ( var(i-4,j,k) + & + var(i+4,j,k) ) + & + ( var(i-5,j,k) + & + var(i+5,j,k) ) ) * idel + end do + end do + end do +!$OMP END PARALLEL DO +!! !$OMP END PARALLEL WORKSHARE if ( zero_derivs_y == 0 ) then call set_coeff ( a ) @@ -279,20 +287,28 @@ subroutine dissipation_8_4_alt (var, ni, nj, nk, bb, gsize, offset, delta, epsil jr = or - 8 end if -!$OMP PARALLEL WORKSHARE - rhs(:,jl:jr,:) = rhs(:,jl:jr,:) + & - ( -252.0_wp * var(:,jl:jr,:) + & - 210.0_wp * ( var(:,jl-1:jr-1,:) + & - var(:,jl+1:jr+1,:) ) - & - 120.0_wp * ( var(:,jl-2:jr-2,:) + & - var(:,jl+2:jr+2,:) ) + & - 45.0_wp * ( var(:,jl-3:jr-3,:) + & - var(:,jl+3:jr+3,:) ) - & - 10.0_wp * ( var(:,jl-4:jr-4,:) + & - var(:,jl+4:jr+4,:) ) + & - ( var(:,jl-5:jr-5,:) + & - var(:,jl+5:jr+5,:) ) ) * idel -!$OMP END PARALLEL WORKSHARE +!! !$OMP PARALLEL WORKSHARE +!$OMP PARALLEL DO DEFAULT(none) PRIVATE(i,j,k) SHARED(var,rhs,ni,jl,jr,nk,idel) + do k = 1, nk + do j = jl, jr + do i = 1, ni + rhs(i,j,k) = rhs(i,j,k) + & + ( -252.0_wp * var(i,j,k) + & + 210.0_wp * ( var(i,j-1,k) + & + var(i,j+1,k) ) - & + 120.0_wp * ( var(i,j-2,k) + & + var(i,j+2,k) ) + & + 45.0_wp * ( var(i,j-3,k) + & + var(i,j+3,k) ) - & + 10.0_wp * ( var(i,j-4,k) + & + var(i,j+4,k) ) + & + ( var(i,j-5,k) + & + var(i,j+5,k) ) ) * idel + end do + end do + end do +!$OMP END PARALLEL DO +!! !$OMP END PARALLEL WORKSHARE end if if ( zero_derivs_z == 0 ) then @@ -415,20 +431,28 @@ subroutine dissipation_8_4_alt (var, ni, nj, nk, bb, gsize, offset, delta, epsil kr = or - 8 end if -!$OMP PARALLEL WORKSHARE - rhs(:,:,kl:kr) = rhs(:,:,kl:kr) + & - ( -252.0_wp * var(:,:,kl:kr) + & - 210.0_wp * ( var(:,:,kl-1:kr-1) + & - var(:,:,kl+1:kr+1) ) - & - 120.0_wp * ( var(:,:,kl-2:kr-2) + & - var(:,:,kl+2:kr+2) ) + & - 45.0_wp * ( var(:,:,kl-3:kr-3) + & - var(:,:,kl+3:kr+3) ) - & - 10.0_wp * ( var(:,:,kl-4:kr-4) + & - var(:,:,kl+4:kr+4) ) + & - ( var(:,:,kl-5:kr-5) + & - var(:,:,kl+5:kr+5) ) ) * idel -!$OMP END PARALLEL WORKSHARE +!! !$OMP PARALLEL WORKSHARE +!$OMP PARALLEL DO DEFAULT(none) PRIVATE(i,j,k) SHARED(var,rhs,ni,nj,kl,kr,idel) + do k = kl, kr + do j = 1, nj + do i = 1, ni + rhs(i,j,k) = rhs(i,j,k) + & + ( -252.0_wp * var(i,j,k) + & + 210.0_wp * ( var(i,j,k-1) + & + var(i,j,k+1) ) - & + 120.0_wp * ( var(i,j,k-2) + & + var(i,j,k+2) ) + & + 45.0_wp * ( var(i,j,k-3) + & + var(i,j,k+3) ) - & + 10.0_wp * ( var(i,j,k-4) + & + var(i,j,k+4) ) + & + ( var(i,j,k-5) + & + var(i,j,k+5) ) ) * idel + end do + end do + end do +!$OMP END PARALLEL DO +!! !$OMP END PARALLEL WORKSHARE end if contains |