aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorschnetter <schnetter@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8>2004-05-29 19:16:55 +0000
committerschnetter <schnetter@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8>2004-05-29 19:16:55 +0000
commit61c1c7243f4d6e401deb99fb93730afdcae4b9e6 (patch)
treeebce31c42fd2d407504a80429dd1d01d0b51b7d5
parent72f3f7e410e01b4e052b507d64c688430fb3ddb5 (diff)
Provide more optimised versions for common cases.
git-svn-id: http://svn.cactuscode.org/arrangements/CactusNumerical/Slab/trunk@34 2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8
-rw-r--r--src/slab.c253
1 files changed, 241 insertions, 12 deletions
diff --git a/src/slab.c b/src/slab.c
index f4450e8..e1b4ada 100644
--- a/src/slab.c
+++ b/src/slab.c
@@ -996,7 +996,7 @@ int Slab_Transfer (cGH const * restrict const cctkGH,
if (info[0].xpose==0 && info[1].xpose==1 && info[2].xpose==2
&& srcdetail[n*SLAB_MAXDIM ].str==1 && srcdetail[n*SLAB_MAXDIM+1].str==1 && srcdetail[n*SLAB_MAXDIM+2].str==1
&& srctype == CCTK_VARIABLE_REAL) {
- /* Optimised version for a special case */
+ /* Optimised version for a special case: no transposing */
int const srcoffi = info[0].src.local.off;
int const srcoffj = info[1].src.local.off;
@@ -1014,9 +1014,9 @@ int Slab_Transfer (cGH const * restrict const cctkGH,
int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+1].len;
int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+2].len;
-if (n==0) assert (srcoffset[n]==0);
-if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*srcdetaillenk);
-
+ if (n==0) assert (srcoffset[n]==0);
+ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*srcdetaillenk);
+
for (k = 0; k < srcdetaillenk; ++k) {
for (j = 0; j < srcdetaillenj; ++j) {
for (i = 0; i < srcdetailleni; ++i) {
@@ -1033,6 +1033,86 @@ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*s
}
}
+ } else if (info[0].xpose==1 && info[1].xpose==0 && info[2].xpose==2
+ && srcdetail[n*SLAB_MAXDIM ].str==1 && srcdetail[n*SLAB_MAXDIM+1].str==1 && srcdetail[n*SLAB_MAXDIM+2].str==1
+ && srctype == CCTK_VARIABLE_REAL) {
+ /* Optimised version for a special case: transpose x and y */
+
+ int const srcoffi = info[0].src.local.off;
+ int const srcoffj = info[1].src.local.off;
+ int const srcoffk = info[2].src.local.off;
+
+ int const srcleni = info[0].src.local.len;
+ int const srclenj = info[1].src.local.len;
+ int const srclenk = info[2].src.local.len;
+
+ int const srcdetailoffi = srcdetail[n*SLAB_MAXDIM+0].off;
+ int const srcdetailoffj = srcdetail[n*SLAB_MAXDIM+1].off;
+ int const srcdetailoffk = srcdetail[n*SLAB_MAXDIM+2].off;
+
+ int const srcdetailleni = srcdetail[n*SLAB_MAXDIM+0].len;
+ int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+1].len;
+ int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+2].len;
+
+ if (n==0) assert (srcoffset[n]==0);
+ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*srcdetaillenk);
+
+ for (k = 0; k < srcdetaillenk; ++k) {
+ for (j = 0; j < srcdetaillenj; ++j) {
+ for (i = 0; i < srcdetailleni; ++i) {
+ int const srcindi = srcdetailoffi + i - srcoffi;
+ int const srcindj = srcdetailoffj + j - srcoffj;
+ int const srcindk = srcdetailoffk + k - srcoffk;
+ size_t const srcind = srcindi + srcleni * (srcindj + srclenj * srcindk);
+ size_t const bufind = j + srcdetaillenj * (i + srcdetailleni * k);
+ ifcheck assert (srcindi>=0 && srcindi<srcleni);
+ ifcheck assert (srcindj>=0 && srcindj<srclenj);
+ ifcheck assert (srcindk>=0 && srcindk<srclenk);
+ ((CCTK_REAL*)srcdata)[srcoffset[n] + bufind] = ((const CCTK_REAL*)srcptr)[srcind];
+ }
+ }
+ }
+
+ } else if (srcdetail[n*SLAB_MAXDIM ].str==1 && srcdetail[n*SLAB_MAXDIM+1].str==1 && srcdetail[n*SLAB_MAXDIM+2].str==1
+ && srctype == CCTK_VARIABLE_REAL) {
+ /* Optimised version for CCTK_REAL and stride 1 */
+
+ for (k = 0; k < srcdetail[n*SLAB_MAXDIM+info[2].xpose].len; ++k) {
+ for (j = 0; j < srcdetail[n*SLAB_MAXDIM+info[1].xpose].len; ++j) {
+ for (i = 0; i < srcdetail[n*SLAB_MAXDIM+info[0].xpose].len; ++i) {
+ int ipos[SLAB_MAXDIM];
+ int srcipos[SLAB_MAXDIM];
+ int bufipos[SLAB_MAXDIM];
+ size_t srcind;
+ size_t bufind;
+ ipos[0] = i;
+ ipos[1] = j;
+ ipos[2] = k;
+ for (d=0; d<SLAB_MAXDIM; ++d) {
+ int const c = info[d].xpose;
+ srcipos[c] = srcdetail[n*SLAB_MAXDIM+c].off + ipos[d];
+ assert (srcipos[c] >= info[c].src.local.off
+ && srcipos[c] < info[c].src.local.off + info[c].src.local.len);
+ assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off
+ && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1));
+ bufipos[d] = ipos[d];
+ assert (bufipos[d] >= 0 && bufipos[d] < srcdetail[n*SLAB_MAXDIM+c].len);
+ }
+ srcind = 0;
+ bufind = 0;
+ for (d=SLAB_MAXDIM-1; d>=0; --d) {
+ int const c = info[d].xpose;
+ srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off;
+ bufind = bufind * srcdetail[n*SLAB_MAXDIM+c].len + bufipos[d];
+ }
+ assert (srcind < srclentot);
+ assert (bufind < (size_t)srccount[n]);
+ ((CCTK_REAL*)srcdata)[srcoffset[n] + bufind]
+ = ((const CCTK_REAL*)srcptr)[srcind];
+ }
+ }
+ }
+
} else {
/* Generic, unoptimised version */
@@ -1052,9 +1132,6 @@ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*s
srcipos[c] = srcdetail[n*SLAB_MAXDIM+c].off + ipos[d] * srcdetail[n*SLAB_MAXDIM+c].str;
assert (srcipos[c] >= info[c].src.local.off
&& srcipos[c] < info[c].src.local.off + info[c].src.local.len);
- if (! (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off
- && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1) * allinfo[n*SLAB_MAXDIM+c].src.slab.str)) {
- }
assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off
&& srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1) * allinfo[n*SLAB_MAXDIM+c].src.slab.str);
assert ((srcipos[c] - allinfo[n*SLAB_MAXDIM+c].src.slab.off) % allinfo[n*SLAB_MAXDIM+c].src.slab.str == 0);
@@ -1120,11 +1197,10 @@ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*s
for (n = 0; n < size; ++n) {
assert (SLAB_MAXDIM == 3);
- if (info[0].xpose==0 && info[1].xpose==1 && info[2].xpose==2
- && info[0].flip==0 && info[1].flip==0 && info[2].flip==0
+ if (info[0].flip==0 && info[1].flip==0 && info[2].flip==0
&& dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1
&& dsttype == CCTK_VARIABLE_REAL) {
- /* Optimised version for a special case */
+ /* Optimised version for a special case: no flipping */
int const dstoffi = info[0].dst.local.off;
int const dstoffj = info[1].dst.local.off;
@@ -1154,9 +1230,162 @@ if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*s
ifcheck assert (dstindj>=0 && dstindj<dstlenj);
ifcheck assert (dstindk>=0 && dstindk<dstlenk);
((CCTK_REAL*)dstptr)[dstind] = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind];
- }
+ }
+ }
+ }
+
+ } else if (info[0].flip==1 && info[1].flip==0 && info[2].flip==0
+ && dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1
+ && dsttype == CCTK_VARIABLE_REAL) {
+ /* Optimised version for a special case: flip in x direction */
+
+ int const dstoffi = info[0].dst.local.off;
+ int const dstoffj = info[1].dst.local.off;
+ int const dstoffk = info[2].dst.local.off;
+
+ int const dstleni = info[0].dst.local.len;
+ int const dstlenj = info[1].dst.local.len;
+ int const dstlenk = info[2].dst.local.len;
+
+ int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off;
+ int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off;
+ int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off;
+
+ int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len;
+ int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len;
+ int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len;
+
+ for (k = 0; k < dstdetaillenk; ++k) {
+ for (j = 0; j < dstdetaillenj; ++j) {
+ for (i = 0; i < dstdetailleni; ++i) {
+ size_t const bufind = (dstdetailleni - 1 - i) + dstdetailleni * (j + dstdetaillenj * k);
+ int const dstindi = dstdetailoffi + i - dstoffi;
+ int const dstindj = dstdetailoffj + j - dstoffj;
+ int const dstindk = dstdetailoffk + k - dstoffk;
+ size_t const dstind = dstindi + dstleni * (dstindj + dstlenj * dstindk);
+ ifcheck assert (dstindi>=0 && dstindi<dstleni);
+ ifcheck assert (dstindj>=0 && dstindj<dstlenj);
+ ifcheck assert (dstindk>=0 && dstindk<dstlenk);
+ ((CCTK_REAL*)dstptr)[dstind] = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind];
+ }
+ }
+ }
+
+ } else if (info[0].flip==0 && info[1].flip==1 && info[2].flip==0
+ && dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1
+ && dsttype == CCTK_VARIABLE_REAL) {
+ /* Optimised version for a special case: flip in y direction */
+
+ int const dstoffi = info[0].dst.local.off;
+ int const dstoffj = info[1].dst.local.off;
+ int const dstoffk = info[2].dst.local.off;
+
+ int const dstleni = info[0].dst.local.len;
+ int const dstlenj = info[1].dst.local.len;
+ int const dstlenk = info[2].dst.local.len;
+
+ int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off;
+ int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off;
+ int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off;
+
+ int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len;
+ int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len;
+ int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len;
+
+ for (k = 0; k < dstdetaillenk; ++k) {
+ for (j = 0; j < dstdetaillenj; ++j) {
+ for (i = 0; i < dstdetailleni; ++i) {
+ size_t const bufind = i + dstdetailleni * ((dstdetaillenj - 1 - j) + dstdetaillenj * k);
+ int const dstindi = dstdetailoffi + i - dstoffi;
+ int const dstindj = dstdetailoffj + j - dstoffj;
+ int const dstindk = dstdetailoffk + k - dstoffk;
+ size_t const dstind = dstindi + dstleni * (dstindj + dstlenj * dstindk);
+ ifcheck assert (dstindi>=0 && dstindi<dstleni);
+ ifcheck assert (dstindj>=0 && dstindj<dstlenj);
+ ifcheck assert (dstindk>=0 && dstindk<dstlenk);
+ ((CCTK_REAL*)dstptr)[dstind] = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind];
+ }
+ }
+ }
+
+ } else if (info[0].flip==1 && info[1].flip==1 && info[2].flip==0
+ && dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1
+ && dsttype == CCTK_VARIABLE_REAL) {
+ /* Optimised version for a special case: flip in y direction */
+
+ int const dstoffi = info[0].dst.local.off;
+ int const dstoffj = info[1].dst.local.off;
+ int const dstoffk = info[2].dst.local.off;
+
+ int const dstleni = info[0].dst.local.len;
+ int const dstlenj = info[1].dst.local.len;
+ int const dstlenk = info[2].dst.local.len;
+
+ int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off;
+ int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off;
+ int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off;
+
+ int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len;
+ int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len;
+ int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len;
+
+ for (k = 0; k < dstdetaillenk; ++k) {
+ for (j = 0; j < dstdetaillenj; ++j) {
+ for (i = 0; i < dstdetailleni; ++i) {
+ size_t const bufind = (dstdetailleni - 1 - i) + dstdetailleni * ((dstdetaillenj - 1 - j) + dstdetaillenj * k);
+ int const dstindi = dstdetailoffi + i - dstoffi;
+ int const dstindj = dstdetailoffj + j - dstoffj;
+ int const dstindk = dstdetailoffk + k - dstoffk;
+ size_t const dstind = dstindi + dstleni * (dstindj + dstlenj * dstindk);
+ ifcheck assert (dstindi>=0 && dstindi<dstleni);
+ ifcheck assert (dstindj>=0 && dstindj<dstlenj);
+ ifcheck assert (dstindk>=0 && dstindk<dstlenk);
+ ((CCTK_REAL*)dstptr)[dstind] = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind];
+ }
+ }
+ }
+
+ } else if (dstdetail[n*SLAB_MAXDIM ].str==1 && dstdetail[n*SLAB_MAXDIM+1].str==1 && dstdetail[n*SLAB_MAXDIM+2].str==1
+ && dsttype == CCTK_VARIABLE_REAL) {
+ /* Optimised version for CCTK_REAL and stride 1 */
+
+ for (k = 0; k < dstdetail[n*SLAB_MAXDIM+2].len; ++k) {
+ for (j = 0; j < dstdetail[n*SLAB_MAXDIM+1].len; ++j) {
+ for (i = 0; i < dstdetail[n*SLAB_MAXDIM+0].len; ++i) {
+ int ipos[SLAB_MAXDIM];
+ int bufipos[SLAB_MAXDIM];
+ int dstipos[SLAB_MAXDIM];
+ size_t bufind;
+ size_t dstind;
+ ipos[0] = i;
+ ipos[1] = j;
+ ipos[2] = k;
+ for (d=0; d<SLAB_MAXDIM; ++d) {
+ if (! info[d].flip) {
+ bufipos[d] = ipos[d];
+ } else {
+ bufipos[d] = dstdetail[n*SLAB_MAXDIM+d].len - 1 - ipos[d];
+ }
+ ifcheck assert (bufipos[d] >= 0 && bufipos[d] < dstdetail[n*SLAB_MAXDIM+d].len);
+ dstipos[d] = dstdetail[n*SLAB_MAXDIM+d].off + ipos[d];
+ ifcheck assert (dstipos[d] >= info[d].dst.local.off
+ && dstipos[d] < info[d].dst.local.off + info[d].dst.local.len);
+ ifcheck assert (dstipos[d] >= info[d].dst.slab.off
+ && dstipos[d] <= info[d].dst.slab.off + info[d].dst.slab.len - 1);
+ }
+ bufind = 0;
+ dstind = 0;
+ for (d=SLAB_MAXDIM-1; d>=0; --d) {
+ bufind = bufind * dstdetail[n*SLAB_MAXDIM+d].len + bufipos[d];
+ dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off;
+ }
+ ifcheck assert (bufind < (size_t)dstcount[n]);
+ ifcheck assert (dstind < dstlentot);
+ ((CCTK_REAL*)dstptr)[dstind]
+ = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind];
+ }
+ }
}
- }
} else {
/* Generic, unoptimised version */