aboutsummaryrefslogtreecommitdiff
path: root/src/AHFinder_fun.F
diff options
context:
space:
mode:
authordiener <diener@89daf98e-ef62-4674-b946-b8ff9de2216c>2001-04-11 17:19:17 +0000
committerdiener <diener@89daf98e-ef62-4674-b946-b8ff9de2216c>2001-04-11 17:19:17 +0000
commit39686b2ce2d0957174905a0313a00d671c718bd2 (patch)
tree7b24bbaff505d657b491fe20cf2523299279ea3d /src/AHFinder_fun.F
parent76a39318182dca83e21896ce8b6099e0704e3451 (diff)
Optimized AHFinder_fun.F. Manually inlined the legendre polynomials. Got rid
of lots of unnecessary sines and cosines, by using recursion relations. Moved factors, used in recursion relations for the associated Legendre Polynomials, depending only on L and M into local two-dimensional arrays, to avoid repeating their calculations. Ditto for normalization factors. Should give speedups of a factor of 4 on most architectures for reasonable values of lmax. The speedup should be larger for larger values of lmax. For some reason the speedup on the Hitachi is more like 16 for lmax=6. git-svn-id: http://svn.einsteintoolkit.org/cactus/EinsteinAnalysis/AHFinder/trunk@193 89daf98e-ef62-4674-b946-b8ff9de2216c
Diffstat (limited to 'src/AHFinder_fun.F')
-rw-r--r--src/AHFinder_fun.F249
1 files changed, 197 insertions, 52 deletions
diff --git a/src/AHFinder_fun.F b/src/AHFinder_fun.F
index a6a426e..8279ae2 100644
--- a/src/AHFinder_fun.F
+++ b/src/AHFinder_fun.F
@@ -22,16 +22,23 @@
DECLARE_CCTK_FUNCTIONS
integer i,j,k
- integer l,m
+ integer l,m,ll
CCTK_REAL LEGEN
CCTK_REAL xp,yp,zp,rp
CCTK_REAL phi,cost,cosa,sina
CCTK_REAL zero,half,one,two
CCTK_REAL pi,halfpi,twopi
- CCTK_REAL aux1,aux2
+ CCTK_REAL aux1,aux2,auxtmp
CCTK_REAL red_tmp
-
+ CCTK_REAL local_legen, local_legen_old, local_legen_old_old
+ CCTK_REAL factor, aux3, aux4
+ CCTK_REAL aux
+ CCTK_REAL cc_cosa(lmax,lmax), cs_sina(lmax,lmax), dsqrt_factor(lmax,lmax)
+ CCTK_REAL dsqrt_factor2(lmax), lm_factor1(0:lmax,0:lmax)
+ CCTK_REAL lm_factor2(0:lmax,0:lmax), lm_factor3(lmax)
+ CCTK_REAL sinphi,cosphi,sinphi1,cosphi1,sinphi2,cosphi2
+ CCTK_REAL legen_base
! **************************
! *** DEFINE NUMBERS ***
@@ -46,76 +53,214 @@
halfpi = half*pi
twopi = two*pi
+!
+! compute the normalization factors only once for all (l,m)
+!
+! For M=0 I use the normalization factor:
+!
+! sqrt(2*L + 1)
+!
+! For M non-zero I use the normalization factor:
+!
+! sqrt( 2 (2*L + 1) (L-M)! / (L+M)! )
+!
+! The extra factor of sqrt(2) is there because I
+! use a basis of sines and cosines and not the
+! standard complex exponential.
+!
+! With this normalization, I ensure that my basis
+! functions f _lm are such that:
+!
+! /
+! | f_lm f_l;mp sin(theta) dtheta dphi = 4 pi delta_mm' delta_ll'
+! /
+!
+! Notice the extra factor of 4 pi. This is there because
+! for a sphere, I want the (0,0) coefficient to correspond
+! to the radius.
+!
+! also compute the multiplying factors in the recursion relations
+!
+ do l=1,lmax
+ dsqrt_factor2(l) = c0(l)*dsqrt(2*dble(l) + 1)
+ lm_factor1(l,0)=dble(2*l-1)/dble(l)
+ lm_factor2(l,0)=dble(l-1)/dble(l)
+ lm_factor3(l)=dble(2*l-1)
+ end do
+
+ if (nonaxi) then
+ do l=1,lmax
+ dsqrt_factor(l,1) = dsqrt(2*dble(2*l+1)/dble(l*(l+1)))
+ lm_factor1(l,1)=dble(2*l-1)/dble(l-1)
+ lm_factor2(l,1)=dble(l)/dble(l-1)
+ do m=2,l
+ dsqrt_factor(l,m)=dsqrt_factor(l,m-1)/dsqrt(dble((l-m+1)*(l+m)))
+ lm_factor1(l,m)=dble(2*l-1)/dble(l-m)
+ lm_factor2(l,m)=dble(l+m-1)/dble(l-m)
+ end do
+ end do
+ end if
! *********************************
! *** FIND HORIZON FUNCTION ***
! *********************************
- do k=1,nz
- do j=1,ny
- do i=1,nx
-
- xp = x(i,j,k) - xc
- yp = y(i,j,k) - yc
- zp = z(i,j,k) - zc
-
- rp = dsqrt(xp**2 + yp**2 + zp**2)
-
-! Find phi.
-
- if (dabs(xp).gt.dabs(yp)) then
- phi = atan(dabs(yp/xp))
- else if (dabs(xp).lt.dabs(yp)) then
- phi = halfpi - atan(dabs(xp/yp))
- else
- phi = 0.25D0*pi
+ do j=1,ny
+ yp = y(1,j,1) - yc
+ do i=1,nx
+ xp = x(i,1,1) - xc
+!
+! Find phi.
+!
+ if ((xp.ne.zero).and.(yp.ne.zero)) then
+ phi = atan2(yp,xp)
+ else
+ phi = zero
+ end if
+!
+! compute the expensive sines and cosines only once for all (l,m)
+! using the recursion relations
+! cos(m*phi) = 2*cos((m-1)*phi)*cos(phi)-cos((m-2)*phi)
+! sin(m*phi) = 2*sin((m-1)*phi)*cos(phi)-sin((m-2)*phi)
+!
+ if (.not.refy) then
+ sinphi = sin(phi)
+ sinphi1 = zero
+ sinphi2 = sinphi
+ end if
+
+ cosphi = cos(phi)
+ cosphi1 = one
+ cosphi2 = cosphi
+
+ do l=1,lmax
+ cc_cosa(l,1) = cc(l,1) * cosphi * dsqrt_factor(l,1)
+ if (.not.refy) then
+ cs_sina(l,1) = cs(l,1) * sinphi * dsqrt_factor(l,1)
end if
+ end do
- if ((xp.eq.zero).and.(yp.eq.zero)) then
- phi = zero
- else if ((xp.le.zero).and.(yp.ge.zero)) then
- phi = pi - phi
- else if ((xp.le.zero).and.(yp.le.zero)) then
- phi = pi + phi
- else if ((xp.ge.zero).and.(yp.le.zero)) then
- phi = twopi - phi
+ do m=2,lmax
+ cosa = 2*cosphi2*cosphi-cosphi1
+ cosphi1 = cosphi2
+ cosphi2 = cosa
+ do l=m,lmax
+ cc_cosa(l,m) = cc(l,m) * cosa * dsqrt_factor(l,m)
+ end do
+ if (.not.refy) then
+ sina = 2*sinphi2*cosphi-sinphi1
+ sinphi1 = sinphi2
+ sinphi2 = sina
+ do l=m,lmax
+ cs_sina(l,m) = cs(l,m) * sina * dsqrt_factor(l,m)
+ end do
end if
+ end do
-! Monopole term.
+ do k=1,nz
- aux1 = c0(0)
+ zp = z(i,j,k) - zc
+ rp = dsqrt(xp**2 + yp**2 + zp**2)
+!
+! Monopole term.
+!
+ aux1 = c0(0)
+!
! Axisymmetric terms.
-
+!
if (rp.ne.zero) then
cost = zp/rp
else
cost = one
end if
+!
+! compute the contribution from M=0, L=1,Lmax
+! using the recursion relations from Numerical Recipes
+!
+ if ( lmax .gt. 0 ) then
+ legen_base = one
+ local_legen_old_old = one
+ local_legen_old = cost
+ aux1 = aux1 + dsqrt_factor2(1)*local_legen_old ! L=1
+
+ do l=2,lmax ! L=2,lmax
+ local_legen = cost*lm_factor1(l,0)*local_legen_old -
+ . lm_factor2(l,0)*local_legen_old_old
+ local_legen_old_old = local_legen_old
+ local_legen_old = local_legen
+ aux1 = aux1 + dsqrt_factor2(l)*local_legen
+ end do
+!
+! Non-axisymmetric terms. Notice how the sum over M is first.
+! This will allow me to use the recursion relations to avoid
+! having to start from scratch every time. Also, I sum over
+! all l even if I do not want some terms. This is because
+! in order to use the recursion relations I need all
+! polynomials.
+!
+ if (nonaxi) then
+ aux3 = dsqrt((one-cost) * (one+cost))
+!
+! This general loop can only be used until l=lmax-2
+! since the recursion relations requires 2 starting values
+!
+ do m=1,lmax-2
+ legen_base = -legen_base*aux3*lm_factor3(m) ! L=M
+ aux1 = aux1 + legen_base*cc_cosa(m,m)
+ if (.not.refy) then
+ aux1 = aux1 + legen_base*cs_sina(m,m)
+ end if
+ local_legen_old_old = legen_base
- do l=1+stepz,lmax,1+stepz
- aux1 = aux1 + c0(l)*LEGEN(l,0,cost)
- end do
-
-! Non-axisymmetric terms. Notice how the sum over M is first.
-! This will allow me to use the recursion relations to avoid
-! having to start from scratch every time. Also, I sum over
-! all l even if I do not want some terms. This is because
-! in order to use the recursion relations I need all polynomials.
-
- if (nonaxi) then
- do m=1,lmax
- aux2 = dble(m)*phi
- sina = sin(aux2)
- cosa = cos(aux2)
- do l=m,lmax
- aux2 = LEGEN(l,m,cost)
- aux1 = aux1 + aux2*cc(l,m)*cosa
+ local_legen_old = cost*lm_factor3(m+1)* ! L=M+1
+ . legen_base
+ aux1 = aux1 + local_legen_old*cc_cosa(m+1,m)
if (.not.refy) then
- aux1 = aux1 + aux2*cs(l,m)*sina
+ aux1 = aux1 + local_legen_old*cs_sina(m+1,m)
end if
+
+ do l=m+2,lmax ! L=M+2,lmax
+ local_legen = cost*lm_factor1(l,m)*local_legen_old -
+ . lm_factor2(l,m)*local_legen_old_old
+ local_legen_old_old = local_legen_old
+ local_legen_old = local_legen
+
+ aux1 = aux1 + local_legen*cc_cosa(l,m)
+ if (.not.refy) then
+ aux1 = aux1 + local_legen*cs_sina(l,m)
+ end if
+ end do
end do
- end do
+!
+! then do M=lmax-1
+!
+ if ( lmax .gt. 1 ) then
+ m=lmax-1 ! L=lmax-1
+ legen_base = -legen_base*aux3*lm_factor3(m)
+ aux1 = aux1 + legen_base*cc_cosa(m,m)
+ if (.not.refy) then
+ aux1 = aux1 + legen_base*cs_sina(m,m)
+ end if
+
+ local_legen = cost*lm_factor3(lmax)* ! L=lmax
+ . legen_base
+ aux1 = aux1 + local_legen*cc_cosa(lmax,m)
+ if (.not.refy) then
+ aux1 = aux1 + local_legen*cs_sina(lmax,m)
+ end if
+ end if
+ end if
+!
+! and finally do M=lmax
+!
+ m=lmax ! L=lmax
+ legen_base = -legen_base*aux3*lm_factor3(m)
+ aux1 = aux1 + legen_base*cc_cosa(m,m)
+ if (.not.refy) then
+ aux1 = aux1 + legen_base*cs_sina(m,m)
+ end if
+
end if
ahfgrid(i,j,k) = rp - aux1