diff options
author | diener <diener@89daf98e-ef62-4674-b946-b8ff9de2216c> | 2001-04-11 17:19:17 +0000 |
---|---|---|
committer | diener <diener@89daf98e-ef62-4674-b946-b8ff9de2216c> | 2001-04-11 17:19:17 +0000 |
commit | 39686b2ce2d0957174905a0313a00d671c718bd2 (patch) | |
tree | 7b24bbaff505d657b491fe20cf2523299279ea3d /src/AHFinder_fun.F | |
parent | 76a39318182dca83e21896ce8b6099e0704e3451 (diff) |
Optimized AHFinder_fun.F. Manually inlined the legendre polynomials. Got rid
of lots of unnecessary sines and cosines, by using recursion relations.
Moved factors, used in recursion relations for the associated Legendre
Polynomials, depending only on L and M into local two-dimensional arrays,
to avoid repeating their calculations. Ditto for normalization factors.
Should give speedups of a factor of 4 on most architectures for reasonable
values of lmax. The speedup should be larger for larger values of lmax.
For some reason the speedup on the Hitachi is more like 16 for lmax=6.
git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinder/trunk@193 89daf98e-ef62-4674-b946-b8ff9de2216c
Diffstat (limited to 'src/AHFinder_fun.F')
-rw-r--r-- | src/AHFinder_fun.F | 249 |
1 files changed, 197 insertions, 52 deletions
diff --git a/src/AHFinder_fun.F b/src/AHFinder_fun.F index a6a426e..8279ae2 100644 --- a/src/AHFinder_fun.F +++ b/src/AHFinder_fun.F @@ -22,16 +22,23 @@ DECLARE_CCTK_FUNCTIONS integer i,j,k - integer l,m + integer l,m,ll CCTK_REAL LEGEN CCTK_REAL xp,yp,zp,rp CCTK_REAL phi,cost,cosa,sina CCTK_REAL zero,half,one,two CCTK_REAL pi,halfpi,twopi - CCTK_REAL aux1,aux2 + CCTK_REAL aux1,aux2,auxtmp CCTK_REAL red_tmp - + CCTK_REAL local_legen, local_legen_old, local_legen_old_old + CCTK_REAL factor, aux3, aux4 + CCTK_REAL aux + CCTK_REAL cc_cosa(lmax,lmax), cs_sina(lmax,lmax), dsqrt_factor(lmax,lmax) + CCTK_REAL dsqrt_factor2(lmax), lm_factor1(0:lmax,0:lmax) + CCTK_REAL lm_factor2(0:lmax,0:lmax), lm_factor3(lmax) + CCTK_REAL sinphi,cosphi,sinphi1,cosphi1,sinphi2,cosphi2 + CCTK_REAL legen_base ! ************************** ! *** DEFINE NUMBERS *** @@ -46,76 +53,214 @@ halfpi = half*pi twopi = two*pi +! +! compute the normalization factors only once for all (l,m) +! +! For M=0 I use the normalization factor: +! +! sqrt(2*L + 1) +! +! For M non-zero I use the normalization factor: +! +! sqrt( 2 (2*L + 1) (L-M)! / (L+M)! ) +! +! The extra factor of sqrt(2) is there because I +! use a basis of sines and cosines and not the +! standard complex exponential. +! +! With this normalization, I ensure that my basis +! functions f _lm are such that: +! +! / +! | f_lm f_l;mp sin(theta) dtheta dphi = 4 pi delta_mm' delta_ll' +! / +! +! Notice the extra factor of 4 pi. This is there because +! for a sphere, I want the (0,0) coefficient to correspond +! to the radius. +! +! also compute the multiplying factors in the recursion relations +! + do l=1,lmax + dsqrt_factor2(l) = c0(l)*dsqrt(2*dble(l) + 1) + lm_factor1(l,0)=dble(2*l-1)/dble(l) + lm_factor2(l,0)=dble(l-1)/dble(l) + lm_factor3(l)=dble(2*l-1) + end do + + if (nonaxi) then + do l=1,lmax + dsqrt_factor(l,1) = dsqrt(2*dble(2*l+1)/dble(l*(l+1))) + lm_factor1(l,1)=dble(2*l-1)/dble(l-1) + lm_factor2(l,1)=dble(l)/dble(l-1) + do m=2,l + dsqrt_factor(l,m)=dsqrt_factor(l,m-1)/dsqrt(dble((l-m+1)*(l+m))) + lm_factor1(l,m)=dble(2*l-1)/dble(l-m) + lm_factor2(l,m)=dble(l+m-1)/dble(l-m) + end do + end do + end if ! ********************************* ! *** FIND HORIZON FUNCTION *** ! ********************************* - do k=1,nz - do j=1,ny - do i=1,nx - - xp = x(i,j,k) - xc - yp = y(i,j,k) - yc - zp = z(i,j,k) - zc - - rp = dsqrt(xp**2 + yp**2 + zp**2) - -! Find phi. - - if (dabs(xp).gt.dabs(yp)) then - phi = atan(dabs(yp/xp)) - else if (dabs(xp).lt.dabs(yp)) then - phi = halfpi - atan(dabs(xp/yp)) - else - phi = 0.25D0*pi + do j=1,ny + yp = y(1,j,1) - yc + do i=1,nx + xp = x(i,1,1) - xc +! +! Find phi. +! + if ((xp.ne.zero).and.(yp.ne.zero)) then + phi = atan2(yp,xp) + else + phi = zero + end if +! +! compute the expensive sines and cosines only once for all (l,m) +! using the recursion relations +! cos(m*phi) = 2*cos((m-1)*phi)*cos(phi)-cos((m-2)*phi) +! sin(m*phi) = 2*sin((m-1)*phi)*cos(phi)-sin((m-2)*phi) +! + if (.not.refy) then + sinphi = sin(phi) + sinphi1 = zero + sinphi2 = sinphi + end if + + cosphi = cos(phi) + cosphi1 = one + cosphi2 = cosphi + + do l=1,lmax + cc_cosa(l,1) = cc(l,1) * cosphi * dsqrt_factor(l,1) + if (.not.refy) then + cs_sina(l,1) = cs(l,1) * sinphi * dsqrt_factor(l,1) end if + end do - if ((xp.eq.zero).and.(yp.eq.zero)) then - phi = zero - else if ((xp.le.zero).and.(yp.ge.zero)) then - phi = pi - phi - else if ((xp.le.zero).and.(yp.le.zero)) then - phi = pi + phi - else if ((xp.ge.zero).and.(yp.le.zero)) then - phi = twopi - phi + do m=2,lmax + cosa = 2*cosphi2*cosphi-cosphi1 + cosphi1 = cosphi2 + cosphi2 = cosa + do l=m,lmax + cc_cosa(l,m) = cc(l,m) * cosa * dsqrt_factor(l,m) + end do + if (.not.refy) then + sina = 2*sinphi2*cosphi-sinphi1 + sinphi1 = sinphi2 + sinphi2 = sina + do l=m,lmax + cs_sina(l,m) = cs(l,m) * sina * dsqrt_factor(l,m) + end do end if + end do -! Monopole term. + do k=1,nz - aux1 = c0(0) + zp = z(i,j,k) - zc + rp = dsqrt(xp**2 + yp**2 + zp**2) +! +! Monopole term. +! + aux1 = c0(0) +! ! Axisymmetric terms. - +! if (rp.ne.zero) then cost = zp/rp else cost = one end if +! +! compute the contribution from M=0, L=1,Lmax +! using the recursion relations from Numerical Recipes +! + if ( lmax .gt. 0 ) then + legen_base = one + local_legen_old_old = one + local_legen_old = cost + aux1 = aux1 + dsqrt_factor2(1)*local_legen_old ! L=1 + + do l=2,lmax ! L=2,lmax + local_legen = cost*lm_factor1(l,0)*local_legen_old - + . lm_factor2(l,0)*local_legen_old_old + local_legen_old_old = local_legen_old + local_legen_old = local_legen + aux1 = aux1 + dsqrt_factor2(l)*local_legen + end do +! +! Non-axisymmetric terms. Notice how the sum over M is first. +! This will allow me to use the recursion relations to avoid +! having to start from scratch every time. Also, I sum over +! all l even if I do not want some terms. This is because +! in order to use the recursion relations I need all +! polynomials. +! + if (nonaxi) then + aux3 = dsqrt((one-cost) * (one+cost)) +! +! This general loop can only be used until l=lmax-2 +! since the recursion relations requires 2 starting values +! + do m=1,lmax-2 + legen_base = -legen_base*aux3*lm_factor3(m) ! L=M + aux1 = aux1 + legen_base*cc_cosa(m,m) + if (.not.refy) then + aux1 = aux1 + legen_base*cs_sina(m,m) + end if + local_legen_old_old = legen_base - do l=1+stepz,lmax,1+stepz - aux1 = aux1 + c0(l)*LEGEN(l,0,cost) - end do - -! Non-axisymmetric terms. Notice how the sum over M is first. -! This will allow me to use the recursion relations to avoid -! having to start from scratch every time. Also, I sum over -! all l even if I do not want some terms. This is because -! in order to use the recursion relations I need all polynomials. - - if (nonaxi) then - do m=1,lmax - aux2 = dble(m)*phi - sina = sin(aux2) - cosa = cos(aux2) - do l=m,lmax - aux2 = LEGEN(l,m,cost) - aux1 = aux1 + aux2*cc(l,m)*cosa + local_legen_old = cost*lm_factor3(m+1)* ! L=M+1 + . legen_base + aux1 = aux1 + local_legen_old*cc_cosa(m+1,m) if (.not.refy) then - aux1 = aux1 + aux2*cs(l,m)*sina + aux1 = aux1 + local_legen_old*cs_sina(m+1,m) end if + + do l=m+2,lmax ! L=M+2,lmax + local_legen = cost*lm_factor1(l,m)*local_legen_old - + . lm_factor2(l,m)*local_legen_old_old + local_legen_old_old = local_legen_old + local_legen_old = local_legen + + aux1 = aux1 + local_legen*cc_cosa(l,m) + if (.not.refy) then + aux1 = aux1 + local_legen*cs_sina(l,m) + end if + end do end do - end do +! +! then do M=lmax-1 +! + if ( lmax .gt. 1 ) then + m=lmax-1 ! L=lmax-1 + legen_base = -legen_base*aux3*lm_factor3(m) + aux1 = aux1 + legen_base*cc_cosa(m,m) + if (.not.refy) then + aux1 = aux1 + legen_base*cs_sina(m,m) + end if + + local_legen = cost*lm_factor3(lmax)* ! L=lmax + . legen_base + aux1 = aux1 + local_legen*cc_cosa(lmax,m) + if (.not.refy) then + aux1 = aux1 + local_legen*cs_sina(lmax,m) + end if + end if + end if +! +! and finally do M=lmax +! + m=lmax ! L=lmax + legen_base = -legen_base*aux3*lm_factor3(m) + aux1 = aux1 + legen_base*cc_cosa(m,m) + if (.not.refy) then + aux1 = aux1 + legen_base*cs_sina(m,m) + end if + end if ahfgrid(i,j,k) = rp - aux1 |