From e91fc0efd260b43f20cc92e46c72e64243670d98 Mon Sep 17 00:00:00 2001 From: diener Date: Mon, 14 May 2012 19:17:30 +0000 Subject: Rewrite the interior part of the 8-4 Kreiss-Oliger dissipation operator to use an explicit loop instead of Fortran 90 array statements since the openmp workshare directive is not parallelized by the intel compilers. This is based on an original patch by Christian Reisswig. Do for this is only done for this operator since this is most often used in BBH simulations. git-svn-id: https://svn.cct.lsu.edu/repos/numrel/LSUThorns/SummationByParts/trunk@132 f69c4107-0314-4c4f-9ad4-17e986b73f4a --- src/Dissipation_8_4_alt.F90 | 110 +++++++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 43 deletions(-) diff --git a/src/Dissipation_8_4_alt.F90 b/src/Dissipation_8_4_alt.F90 index 993600f..e67c703 100644 --- a/src/Dissipation_8_4_alt.F90 +++ b/src/Dissipation_8_4_alt.F90 @@ -23,7 +23,7 @@ subroutine dissipation_8_4_alt (var, ni, nj, nk, bb, gsize, offset, delta, epsil CCTK_REAL, dimension(13,8) :: a CCTK_REAL :: idel - CCTK_INT :: il, ir, jl, jr, kl, kr, ol, or + CCTK_INT :: il, ir, jl, jr, kl, kr, ol, or, i, j, k call set_coeff ( a ) @@ -144,20 +144,28 @@ subroutine dissipation_8_4_alt (var, ni, nj, nk, bb, gsize, offset, delta, epsil ir = or - 8 end if -!$OMP PARALLEL WORKSHARE - rhs(il:ir,:,:) = rhs(il:ir,:,:) + & - ( -252.0_wp * var(il:ir,:,:) + & - 210.0_wp * ( var(il-1:ir-1,:,:) + & - var(il+1:ir+1,:,:) ) - & - 120.0_wp * ( var(il-2:ir-2,:,:) + & - var(il+2:ir+2,:,:) ) + & - 45.0_wp * ( var(il-3:ir-3,:,:) + & - var(il+3:ir+3,:,:) ) - & - 10.0_wp * ( var(il-4:ir-4,:,:) + & - var(il+4:ir+4,:,:) ) + & - ( var(il-5:ir-5,:,:) + & - var(il+5:ir+5,:,:) ) ) * idel -!$OMP END PARALLEL WORKSHARE +!! !$OMP PARALLEL WORKSHARE +!$OMP PARALLEL DO DEFAULT(none) PRIVATE(i,j,k) SHARED(var,rhs,il,ir,nj,nk,idel) + do k = 1, nk + do j = 1, nj + do i = il, ir + rhs(i,j,k) = rhs(i,j,k) + & + ( -252.0_wp * var(i,j,k) + & + 210.0_wp * ( var(i-1,j,k) + & + var(i+1,j,k) ) - & + 120.0_wp * ( var(i-2,j,k) + & + var(i+2,j,k) ) + & + 45.0_wp * ( var(i-3,j,k) + & + var(i+3,j,k) ) - & + 10.0_wp * ( var(i-4,j,k) + & + var(i+4,j,k) ) + & + ( var(i-5,j,k) + & + var(i+5,j,k) ) ) * idel + end do + end do + end do +!$OMP END PARALLEL DO +!! !$OMP END PARALLEL WORKSHARE if ( zero_derivs_y == 0 ) then call set_coeff ( a ) @@ -279,20 +287,28 @@ subroutine dissipation_8_4_alt (var, ni, nj, nk, bb, gsize, offset, delta, epsil jr = or - 8 end if -!$OMP PARALLEL WORKSHARE - rhs(:,jl:jr,:) = rhs(:,jl:jr,:) + & - ( -252.0_wp * var(:,jl:jr,:) + & - 210.0_wp * ( var(:,jl-1:jr-1,:) + & - var(:,jl+1:jr+1,:) ) - & - 120.0_wp * ( var(:,jl-2:jr-2,:) + & - var(:,jl+2:jr+2,:) ) + & - 45.0_wp * ( var(:,jl-3:jr-3,:) + & - var(:,jl+3:jr+3,:) ) - & - 10.0_wp * ( var(:,jl-4:jr-4,:) + & - var(:,jl+4:jr+4,:) ) + & - ( var(:,jl-5:jr-5,:) + & - var(:,jl+5:jr+5,:) ) ) * idel -!$OMP END PARALLEL WORKSHARE +!! !$OMP PARALLEL WORKSHARE +!$OMP PARALLEL DO DEFAULT(none) PRIVATE(i,j,k) SHARED(var,rhs,ni,jl,jr,nk,idel) + do k = 1, nk + do j = jl, jr + do i = 1, ni + rhs(i,j,k) = rhs(i,j,k) + & + ( -252.0_wp * var(i,j,k) + & + 210.0_wp * ( var(i,j-1,k) + & + var(i,j+1,k) ) - & + 120.0_wp * ( var(i,j-2,k) + & + var(i,j+2,k) ) + & + 45.0_wp * ( var(i,j-3,k) + & + var(i,j+3,k) ) - & + 10.0_wp * ( var(i,j-4,k) + & + var(i,j+4,k) ) + & + ( var(i,j-5,k) + & + var(i,j+5,k) ) ) * idel + end do + end do + end do +!$OMP END PARALLEL DO +!! !$OMP END PARALLEL WORKSHARE end if if ( zero_derivs_z == 0 ) then @@ -415,20 +431,28 @@ subroutine dissipation_8_4_alt (var, ni, nj, nk, bb, gsize, offset, delta, epsil kr = or - 8 end if -!$OMP PARALLEL WORKSHARE - rhs(:,:,kl:kr) = rhs(:,:,kl:kr) + & - ( -252.0_wp * var(:,:,kl:kr) + & - 210.0_wp * ( var(:,:,kl-1:kr-1) + & - var(:,:,kl+1:kr+1) ) - & - 120.0_wp * ( var(:,:,kl-2:kr-2) + & - var(:,:,kl+2:kr+2) ) + & - 45.0_wp * ( var(:,:,kl-3:kr-3) + & - var(:,:,kl+3:kr+3) ) - & - 10.0_wp * ( var(:,:,kl-4:kr-4) + & - var(:,:,kl+4:kr+4) ) + & - ( var(:,:,kl-5:kr-5) + & - var(:,:,kl+5:kr+5) ) ) * idel -!$OMP END PARALLEL WORKSHARE +!! !$OMP PARALLEL WORKSHARE +!$OMP PARALLEL DO DEFAULT(none) PRIVATE(i,j,k) SHARED(var,rhs,ni,nj,kl,kr,idel) + do k = kl, kr + do j = 1, nj + do i = 1, ni + rhs(i,j,k) = rhs(i,j,k) + & + ( -252.0_wp * var(i,j,k) + & + 210.0_wp * ( var(i,j,k-1) + & + var(i,j,k+1) ) - & + 120.0_wp * ( var(i,j,k-2) + & + var(i,j,k+2) ) + & + 45.0_wp * ( var(i,j,k-3) + & + var(i,j,k+3) ) - & + 10.0_wp * ( var(i,j,k-4) + & + var(i,j,k+4) ) + & + ( var(i,j,k-5) + & + var(i,j,k+5) ) ) * idel + end do + end do + end do +!$OMP END PARALLEL DO +!! !$OMP END PARALLEL WORKSHARE end if contains -- cgit v1.2.3