From 61c1c7243f4d6e401deb99fb93730afdcae4b9e6 Mon Sep 17 00:00:00 2001 From: schnetter Date: Sat, 29 May 2004 19:16:55 +0000 Subject: Provide more optimised versions for common cases. git-svn-id: http://svn.cactuscode.org/arrangements/CactusNumerical/Slab/trunk@34 2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8 --- src/slab.c | 253 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 241 insertions(+), 12 deletions(-) diff --git a/src/slab.c b/src/slab.c index f4450e8..e1b4ada 100644 --- a/src/slab.c +++ b/src/slab.c @@ -996,7 +996,7 @@ int Slab_Transfer (cGH const * restrict const cctkGH, if (info[0].xpose==0 && info[1].xpose==1 && info[2].xpose==2 && srcdetail[n*SLAB_MAXDIM ].str==1 && srcdetail[n*SLAB_MAXDIM+1].str==1 && srcdetail[n*SLAB_MAXDIM+2].str==1 && srctype == CCTK_VARIABLE_REAL) { - /* Optimised version for a special case */ + /* Optimised version for a special case: no transposing */ int const srcoffi = info[0].src.local.off; int const srcoffj = info[1].src.local.off; @@ -1014,9 +1014,9 @@ int Slab_Transfer (cGH const * restrict const cctkGH, int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+1].len; int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+2].len; -if (n==0) assert (srcoffset[n]==0); -if (n=0 && srcindi=0 && srcindj=0 && srcindk= info[c].src.local.off + && srcipos[c] < info[c].src.local.off + info[c].src.local.len); + assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off + && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1)); + bufipos[d] = ipos[d]; + assert (bufipos[d] >= 0 && bufipos[d] < srcdetail[n*SLAB_MAXDIM+c].len); + } + srcind = 0; + bufind = 0; + for (d=SLAB_MAXDIM-1; d>=0; --d) { + int const c = info[d].xpose; + srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; + bufind = bufind * srcdetail[n*SLAB_MAXDIM+c].len + bufipos[d]; + } + assert (srcind < srclentot); + assert (bufind < (size_t)srccount[n]); + ((CCTK_REAL*)srcdata)[srcoffset[n] + bufind] + = ((const CCTK_REAL*)srcptr)[srcind]; + } + } + } + } else { /* Generic, unoptimised version */ @@ -1052,9 +1132,6 @@ if (n= info[c].src.local.off && srcipos[c] < info[c].src.local.off + info[c].src.local.len); - if (! (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off - && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1) * allinfo[n*SLAB_MAXDIM+c].src.slab.str)) { - } assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1) * allinfo[n*SLAB_MAXDIM+c].src.slab.str); assert ((srcipos[c] - allinfo[n*SLAB_MAXDIM+c].src.slab.off) % allinfo[n*SLAB_MAXDIM+c].src.slab.str == 0); @@ -1120,11 +1197,10 @@ if (n=0 && dstindj=0 && dstindk=0 && dstindi=0 && dstindj=0 && dstindk=0 && dstindi=0 && dstindj=0 && dstindk=0 && dstindi=0 && dstindj=0 && dstindk= 0 && bufipos[d] < dstdetail[n*SLAB_MAXDIM+d].len); + dstipos[d] = dstdetail[n*SLAB_MAXDIM+d].off + ipos[d]; + ifcheck assert (dstipos[d] >= info[d].dst.local.off + && dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); + ifcheck assert (dstipos[d] >= info[d].dst.slab.off + && dstipos[d] <= info[d].dst.slab.off + info[d].dst.slab.len - 1); + } + bufind = 0; + dstind = 0; + for (d=SLAB_MAXDIM-1; d>=0; --d) { + bufind = bufind * dstdetail[n*SLAB_MAXDIM+d].len + bufipos[d]; + dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; + } + ifcheck assert (bufind < (size_t)dstcount[n]); + ifcheck assert (dstind < dstlentot); + ((CCTK_REAL*)dstptr)[dstind] + = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind]; + } + } } - } } else { /* Generic, unoptimised version */ -- cgit v1.2.3