diff options
author | schnetter <schnetter@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8> | 2004-05-29 19:16:55 +0000 |
---|---|---|
committer | schnetter <schnetter@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8> | 2004-05-29 19:16:55 +0000 |
commit | 61c1c7243f4d6e401deb99fb93730afdcae4b9e6 (patch) | |
tree | ebce31c42fd2d407504a80429dd1d01d0b51b7d5 | |
parent | 72f3f7e410e01b4e052b507d64c688430fb3ddb5 (diff) |
Provide more optimised versions for common cases.
git-svn-id: http://svn.cactuscode.org/arrangements/CactusNumerical/Slab/trunk@34 2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8
-rw-r--r-- | src/slab.c | 253 |
1 files changed, 241 insertions, 12 deletions
@@ -996,7 +996,7 @@ int Slab_Transfer (cGH const * restrict const cctkGH, if (info[0].xpose==0 && info[1].xpose==1 && info[2].xpose==2 && srcdetail[n*SLAB_MAXDIM ].str==1 && srcdetail[n*SLAB_MAXDIM+1].str==1 && srcdetail[n*SLAB_MAXDIM+2].str==1 && srctype == CCTK_VARIABLE_REAL) { - /* Optimised version for a special case */ + /* Optimised version for a special case: no transposing */ int const srcoffi = info[0].src.local.off; int const srcoffj = info[1].src.local.off; @@ -1014,9 +1014,9 @@ int Slab_Transfer (cGH const * restrict const cctkGH, int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+1].len; int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+2].len; -if (n==0) assert (srcoffset[n]==0); -if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*srcdetaillenk); - + if (n==0) assert (srcoffset[n]==0); + if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*srcdetaillenk); + for (k = 0; k < srcdetaillenk; ++k) { for (j = 0; j < srcdetaillenj; ++j) { for (i = 0; i < srcdetailleni; ++i) { @@ -1033,6 +1033,86 @@ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*s } } + } else if (info[0].xpose==1 && info[1].xpose==0 && info[2].xpose==2 + && srcdetail[n*SLAB_MAXDIM ].str==1 && srcdetail[n*SLAB_MAXDIM+1].str==1 && srcdetail[n*SLAB_MAXDIM+2].str==1 + && srctype == CCTK_VARIABLE_REAL) { + /* Optimised version for a special case: transpose x and y */ + + int const srcoffi = info[0].src.local.off; + int const srcoffj = info[1].src.local.off; + int const srcoffk = info[2].src.local.off; + + int const srcleni = info[0].src.local.len; + int const srclenj = info[1].src.local.len; + int const srclenk = info[2].src.local.len; + + int const srcdetailoffi = srcdetail[n*SLAB_MAXDIM+0].off; + int const srcdetailoffj = srcdetail[n*SLAB_MAXDIM+1].off; + int const srcdetailoffk = srcdetail[n*SLAB_MAXDIM+2].off; + + int const srcdetailleni = srcdetail[n*SLAB_MAXDIM+0].len; + int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+1].len; + int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+2].len; + + if (n==0) assert (srcoffset[n]==0); + if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*srcdetaillenk); + + for (k = 0; k < srcdetaillenk; ++k) { + for (j = 0; j < srcdetaillenj; ++j) { + for (i = 0; i < srcdetailleni; ++i) { + int const srcindi = srcdetailoffi + i - srcoffi; + int const srcindj = srcdetailoffj + j - srcoffj; + int const srcindk = srcdetailoffk + k - srcoffk; + size_t const srcind = srcindi + srcleni * (srcindj + srclenj * srcindk); + size_t const bufind = j + srcdetaillenj * (i + srcdetailleni * k); + ifcheck assert (srcindi>=0 && srcindi<srcleni); + ifcheck assert (srcindj>=0 && srcindj<srclenj); + ifcheck assert (srcindk>=0 && srcindk<srclenk); + ((CCTK_REAL*)srcdata)[srcoffset[n] + bufind] = ((const CCTK_REAL*)srcptr)[srcind]; + } + } + } + + } else if (srcdetail[n*SLAB_MAXDIM ].str==1 && srcdetail[n*SLAB_MAXDIM+1].str==1 && srcdetail[n*SLAB_MAXDIM+2].str==1 + && srctype == CCTK_VARIABLE_REAL) { + /* Optimised version for CCTK_REAL and stride 1 */ + + for (k = 0; k < srcdetail[n*SLAB_MAXDIM+info[2].xpose].len; ++k) { + for (j = 0; j < srcdetail[n*SLAB_MAXDIM+info[1].xpose].len; ++j) { + for (i = 0; i < srcdetail[n*SLAB_MAXDIM+info[0].xpose].len; ++i) { + int ipos[SLAB_MAXDIM]; + int srcipos[SLAB_MAXDIM]; + int bufipos[SLAB_MAXDIM]; + size_t srcind; + size_t bufind; + ipos[0] = i; + ipos[1] = j; + ipos[2] = k; + for (d=0; d<SLAB_MAXDIM; ++d) { + int const c = info[d].xpose; + srcipos[c] = srcdetail[n*SLAB_MAXDIM+c].off + ipos[d]; + assert (srcipos[c] >= info[c].src.local.off + && srcipos[c] < info[c].src.local.off + info[c].src.local.len); + assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off + && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1)); + bufipos[d] = ipos[d]; + assert (bufipos[d] >= 0 && bufipos[d] < srcdetail[n*SLAB_MAXDIM+c].len); + } + srcind = 0; + bufind = 0; + for (d=SLAB_MAXDIM-1; d>=0; --d) { + int const c = info[d].xpose; + srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; + bufind = bufind * srcdetail[n*SLAB_MAXDIM+c].len + bufipos[d]; + } + assert (srcind < srclentot); + assert (bufind < (size_t)srccount[n]); + ((CCTK_REAL*)srcdata)[srcoffset[n] + bufind] + = ((const CCTK_REAL*)srcptr)[srcind]; + } + } + } + } else { /* Generic, unoptimised version */ @@ -1052,9 +1132,6 @@ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*s srcipos[c] = srcdetail[n*SLAB_MAXDIM+c].off + ipos[d] * srcdetail[n*SLAB_MAXDIM+c].str; assert (srcipos[c] >= info[c].src.local.off && srcipos[c] < info[c].src.local.off + info[c].src.local.len); - if (! (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off - && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1) * allinfo[n*SLAB_MAXDIM+c].src.slab.str)) { - } assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1) * allinfo[n*SLAB_MAXDIM+c].src.slab.str); assert ((srcipos[c] - allinfo[n*SLAB_MAXDIM+c].src.slab.off) % allinfo[n*SLAB_MAXDIM+c].src.slab.str == 0); @@ -1120,11 +1197,10 @@ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*s for (n = 0; n < size; ++n) { assert (SLAB_MAXDIM == 3); - if (info[0].xpose==0 && info[1].xpose==1 && info[2].xpose==2 - && info[0].flip==0 && info[1].flip==0 && info[2].flip==0 + if (info[0].flip==0 && info[1].flip==0 && info[2].flip==0 && dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1 && dsttype == CCTK_VARIABLE_REAL) { - /* Optimised version for a special case */ + /* Optimised version for a special case: no flipping */ int const dstoffi = info[0].dst.local.off; int const dstoffj = info[1].dst.local.off; @@ -1154,9 +1230,162 @@ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*s ifcheck assert (dstindj>=0 && dstindj<dstlenj); ifcheck assert (dstindk>=0 && dstindk<dstlenk); ((CCTK_REAL*)dstptr)[dstind] = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind]; - } + } + } + } + + } else if (info[0].flip==1 && info[1].flip==0 && info[2].flip==0 + && dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1 + && dsttype == CCTK_VARIABLE_REAL) { + /* Optimised version for a special case: flip in x direction */ + + int const dstoffi = info[0].dst.local.off; + int const dstoffj = info[1].dst.local.off; + int const dstoffk = info[2].dst.local.off; + + int const dstleni = info[0].dst.local.len; + int const dstlenj = info[1].dst.local.len; + int const dstlenk = info[2].dst.local.len; + + int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off; + int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off; + int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off; + + int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len; + int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len; + int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len; + + for (k = 0; k < dstdetaillenk; ++k) { + for (j = 0; j < dstdetaillenj; ++j) { + for (i = 0; i < dstdetailleni; ++i) { + size_t const bufind = (dstdetailleni - 1 - i) + dstdetailleni * (j + dstdetaillenj * k); + int const dstindi = dstdetailoffi + i - dstoffi; + int const dstindj = dstdetailoffj + j - dstoffj; + int const dstindk = dstdetailoffk + k - dstoffk; + size_t const dstind = dstindi + dstleni * (dstindj + dstlenj * dstindk); + ifcheck assert (dstindi>=0 && dstindi<dstleni); + ifcheck assert (dstindj>=0 && dstindj<dstlenj); + ifcheck assert (dstindk>=0 && dstindk<dstlenk); + ((CCTK_REAL*)dstptr)[dstind] = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind]; + } + } + } + + } else if (info[0].flip==0 && info[1].flip==1 && info[2].flip==0 + && dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1 + && dsttype == CCTK_VARIABLE_REAL) { + /* Optimised version for a special case: flip in y direction */ + + int const dstoffi = info[0].dst.local.off; + int const dstoffj = info[1].dst.local.off; + int const dstoffk = info[2].dst.local.off; + + int const dstleni = info[0].dst.local.len; + int const dstlenj = info[1].dst.local.len; + int const dstlenk = info[2].dst.local.len; + + int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off; + int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off; + int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off; + + int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len; + int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len; + int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len; + + for (k = 0; k < dstdetaillenk; ++k) { + for (j = 0; j < dstdetaillenj; ++j) { + for (i = 0; i < dstdetailleni; ++i) { + size_t const bufind = i + dstdetailleni * ((dstdetaillenj - 1 - j) + dstdetaillenj * k); + int const dstindi = dstdetailoffi + i - dstoffi; + int const dstindj = dstdetailoffj + j - dstoffj; + int const dstindk = dstdetailoffk + k - dstoffk; + size_t const dstind = dstindi + dstleni * (dstindj + dstlenj * dstindk); + ifcheck assert (dstindi>=0 && dstindi<dstleni); + ifcheck assert (dstindj>=0 && dstindj<dstlenj); + ifcheck assert (dstindk>=0 && dstindk<dstlenk); + ((CCTK_REAL*)dstptr)[dstind] = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind]; + } + } + } + + } else if (info[0].flip==1 && info[1].flip==1 && info[2].flip==0 + && dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1 + && dsttype == CCTK_VARIABLE_REAL) { + /* Optimised version for a special case: flip in y direction */ + + int const dstoffi = info[0].dst.local.off; + int const dstoffj = info[1].dst.local.off; + int const dstoffk = info[2].dst.local.off; + + int const dstleni = info[0].dst.local.len; + int const dstlenj = info[1].dst.local.len; + int const dstlenk = info[2].dst.local.len; + + int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off; + int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off; + int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off; + + int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len; + int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len; + int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len; + + for (k = 0; k < dstdetaillenk; ++k) { + for (j = 0; j < dstdetaillenj; ++j) { + for (i = 0; i < dstdetailleni; ++i) { + size_t const bufind = (dstdetailleni - 1 - i) + dstdetailleni * ((dstdetaillenj - 1 - j) + dstdetaillenj * k); + int const dstindi = dstdetailoffi + i - dstoffi; + int const dstindj = dstdetailoffj + j - dstoffj; + int const dstindk = dstdetailoffk + k - dstoffk; + size_t const dstind = dstindi + dstleni * (dstindj + dstlenj * dstindk); + ifcheck assert (dstindi>=0 && dstindi<dstleni); + ifcheck assert (dstindj>=0 && dstindj<dstlenj); + ifcheck assert (dstindk>=0 && dstindk<dstlenk); + ((CCTK_REAL*)dstptr)[dstind] = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind]; + } + } + } + + } else if (dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1 + && dsttype == CCTK_VARIABLE_REAL) { + /* Optimised version for CCTK_REAL and stride 1 */ + + for (k = 0; k < dstdetail[n*SLAB_MAXDIM+2].len; ++k) { + for (j = 0; j < dstdetail[n*SLAB_MAXDIM+1].len; ++j) { + for (i = 0; i < dstdetail[n*SLAB_MAXDIM+0].len; ++i) { + int ipos[SLAB_MAXDIM]; + int bufipos[SLAB_MAXDIM]; + int dstipos[SLAB_MAXDIM]; + size_t bufind; + size_t dstind; + ipos[0] = i; + ipos[1] = j; + ipos[2] = k; + for (d=0; d<SLAB_MAXDIM; ++d) { + if (! info[d].flip) { + bufipos[d] = ipos[d]; + } else { + bufipos[d] = dstdetail[n*SLAB_MAXDIM+d].len - 1 - ipos[d]; + } + ifcheck assert (bufipos[d] >= 0 && bufipos[d] < dstdetail[n*SLAB_MAXDIM+d].len); + dstipos[d] = dstdetail[n*SLAB_MAXDIM+d].off + ipos[d]; + ifcheck assert (dstipos[d] >= info[d].dst.local.off + && dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); + ifcheck assert (dstipos[d] >= info[d].dst.slab.off + && dstipos[d] <= info[d].dst.slab.off + info[d].dst.slab.len - 1); + } + bufind = 0; + dstind = 0; + for (d=SLAB_MAXDIM-1; d>=0; --d) { + bufind = bufind * dstdetail[n*SLAB_MAXDIM+d].len + bufipos[d]; + dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; + } + ifcheck assert (bufind < (size_t)dstcount[n]); + ifcheck assert (dstind < dstlentot); + ((CCTK_REAL*)dstptr)[dstind] + = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind]; + } + } } - } } else { /* Generic, unoptimised version */ |