diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/slab.c | 353 |
1 files changed, 208 insertions, 145 deletions
@@ -102,11 +102,12 @@ MPI_Allgather (void * sendbuf, int sendcnt, int sendtype, void * recvbuf, int recvcnt, int recvtype, MPI_Comm comm) { + int recvsize; assert (sendbuf); assert (recvbuf); assert (sendcnt == recvcnt); assert (sendtype == recvtype); - int const recvsize = CCTK_VarTypeSize (recvtype); + recvsize = CCTK_VarTypeSize (recvtype); assert (size > 0); memcpy (recvbuf, sendbuf, recvcnt * recvsize); return 0; @@ -117,11 +118,12 @@ MPI_Alltoall (void * sendbuf, int sendcnt, int sendtype, void * recvbuf, int recvcnt, int recvtype, MPI_Comm comm) { + int recvsize; assert (sendbuf); assert (recvbuf); assert (sendcnt == recvcnt); assert (sendtype == recvtype); - int const recvsize = CCTK_VarTypeSize (recvtype); + recvsize = CCTK_VarTypeSize (recvtype); assert (size > 0); memcpy (recvbuf, sendbuf, recvcnt * recvsize); return 0; @@ -132,6 +134,7 @@ MPI_Alltoallv (void * sendbuf, int * sendcnt, int * sendoff, int sendtype, void * recvbuf, int * recvcnt, int * recvoff, int recvtype, MPI_Comm comm) { + int recvsize; assert (sendbuf); assert (recvbuf); assert (sendcnt); @@ -142,7 +145,7 @@ MPI_Alltoallv (void * sendbuf, int * sendcnt, int * sendoff, int sendtype, assert (*sendoff == 0); assert (*recvoff == 0); assert (sendtype == recvtype); - int const recvsize = CCTK_VarTypeSize (recvtype); + recvsize = CCTK_VarTypeSize (recvtype); assert (size > 0); memcpy (recvbuf, sendbuf, *recvcnt * recvsize); return 0; @@ -232,6 +235,8 @@ static void active2bbox (struct slabinfo const * restrict const slab, struct bbox * restrict const bbox, int const useghosts) { + int nlghostzones; + int nughostzones; assert (slab); assert (bbox); assert (useghosts == 0 || useghosts == 1); @@ -241,8 +246,8 @@ static void active2bbox (struct slabinfo const * restrict const slab, assert (slab->lbbox == 0 || slab->lbbox == 1); assert (slab->ubbox == 0 || slab->ubbox == 1); assert (slab->nghostzones >= 0); - int const nlghostzones = slab->lbbox || useghosts ? 0 : slab->nghostzones; - int const nughostzones = slab->ubbox || useghosts ? 0 : slab->nghostzones; + nlghostzones = slab->lbbox || useghosts ? 0 : slab->nghostzones; + nughostzones = slab->ubbox || useghosts ? 0 : slab->nghostzones; bbox->off = slab->lbnd + nlghostzones; bbox->len = slab->lsh - nlghostzones - nughostzones; bbox->str = 1; @@ -263,20 +268,24 @@ static void slab2bbox (struct slabinfo const * restrict const slab, static int bbox_iscontained (struct bbox const * restrict const inner, struct bbox const * restrict const outer) { + int inner_last; + int outer_last; bbox_check (inner); bbox_check (outer); - int const inner_last = inner->off + (inner->len - 1) * inner->str; - int const outer_last = outer->off + (outer->len - 1) * outer->str; + inner_last = inner->off + (inner->len - 1) * inner->str; + outer_last = outer->off + (outer->len - 1) * outer->str; return inner->off >= outer->off && inner_last <= outer_last; } static void bbox_clip (struct bbox * restrict const inner, struct bbox const * restrict const outer) { + int inner_last; + int outer_last; bbox_check (inner); bbox_check (outer); - int inner_last = inner->off + (inner->len - 1) * inner->str; - int const outer_last = outer->off + (outer->len - 1) * outer->str; + inner_last = inner->off + (inner->len - 1) * inner->str; + outer_last = outer->off + (outer->len - 1) * outer->str; if (inner->off < outer->off) { inner->off += roundup (outer->off - inner->off, inner->str); } @@ -298,17 +307,20 @@ static void bbox_xform (struct bbox * restrict const ydst, struct bbox const * restrict const xsrc, int const flip) { + int xsrc_last; + int xdst_last; + int ysrc_last; + int ydst_last; assert (ydst); bbox_check (ysrc); bbox_check (xdst); bbox_check (xsrc); assert (ysrc->str == xsrc->str); - int const xsrc_last = xsrc->off + (xsrc->len - 1) * xsrc->str; - int const xdst_last = xdst->off + (xdst->len - 1) * xdst->str; - int const ysrc_last = ysrc->off + (ysrc->len - 1) * ysrc->str; + xsrc_last = xsrc->off + (xsrc->len - 1) * xsrc->str; + xdst_last = xdst->off + (xdst->len - 1) * xdst->str; + ysrc_last = ysrc->off + (ysrc->len - 1) * ysrc->str; ydst->str = xdst->str; assert ((ysrc->off - xsrc->off) % ysrc->str == 0); - int ydst_last; ydst->off = xdst->off + (ysrc->off - xsrc->off) / ysrc->str * ydst->str; ydst_last = xdst->off + (ysrc_last - xsrc->off) / ysrc->str * ydst->str; if (flip) { @@ -333,6 +345,29 @@ int Slab_Transfer (cGH * const cctkGH, int const dsttype, void * const dstptr) { + struct info * restrict info; + size_t srclentot, dstlentot; + + struct info * restrict allinfo; + struct bbox * restrict srcdetail; + struct bbox * restrict dstdetail; + + int * restrict srccount; + int * restrict srcoffset; + int * restrict dstcount; + int * restrict dstoffset; + + void * restrict srcdata; + void * restrict dstdata; + + MPI_Comm comm; + int size, rank; + MPI_Datatype srcdatatype, dstdatatype; + + int i, j, k; + int n; + int d; + /* Check arguments */ assert (cctkGH); assert (dim >= 0); @@ -342,8 +377,9 @@ int Slab_Transfer (cGH * const cctkGH, assert (dsttype == CCTK_VARIABLE_REAL); assert (dstptr); - struct info info[dim]; - for (int d=0; d<dim; ++d) { + info = malloc (dim * sizeof *info); + assert (info); + for (d=0; d<dim; ++d) { global2bbox (&xferinfo[d].src, &info[d].src.global); local2bbox (&xferinfo[d].src, &info[d].src.local); active2bbox (&xferinfo[d].src, &info[d].src.active, 0); @@ -366,32 +402,30 @@ int Slab_Transfer (cGH * const cctkGH, { int iflag[dim]; - for (int d=0; d<dim; ++d) { + for (d=0; d<dim; ++d) { iflag[d] = 0; } - for (int d=0; d<dim; ++d) { + for (d=0; d<dim; ++d) { assert (! iflag[info[d].xpose]); iflag[info[d].xpose] = 1; } - for (int d=0; d<dim; ++d) { + for (d=0; d<dim; ++d) { assert (iflag[d]); } - for (int d=0; d<dim; ++d) { + for (d=0; d<dim; ++d) { assert (info[info[d].xpose].src.slab.len == info[d].dst.slab.len); } } - size_t srclentot, dstlentot; srclentot = 1; dstlentot = 1; - for (int d=0; d<dim; ++d) { + for (d=0; d<dim; ++d) { srclentot *= info[d].src.local.len; dstlentot *= info[d].dst.local.len; } - MPI_Comm comm; #ifdef CCTK_MPI # if defined CARPET_CARPET comm = CarpetMPIComm (); @@ -409,209 +443,226 @@ int Slab_Transfer (cGH * const cctkGH, MPI_Barrier (comm); } - int size, rank; MPI_Comm_size (comm, &size); MPI_Comm_rank (comm, &rank); assert (sizeof(CCTK_REAL) == sizeof(double)); - MPI_Datatype srcdatatype, dstdatatype; srcdatatype = MPI_DOUBLE; dstdatatype = MPI_DOUBLE; - struct info allinfo[size][dim]; - int const info_nints = sizeof(struct info) / sizeof(int); - ifdebug fflush (stdout); - MPI_Allgather - (info, dim * info_nints, MPI_INT, - allinfo, dim * info_nints, MPI_INT, comm); - - for (int n = 0; n < size; ++n) { - for (int d=0; d<dim; ++d) { - assert (allinfo[n][d].src.global.off == info[d].src.global.off); - assert (allinfo[n][d].src.global.len == info[d].src.global.len); - assert (allinfo[n][d].src.global.str == info[d].src.global.str); - assert (allinfo[n][d].dst.global.off == info[d].dst.global.off); - assert (allinfo[n][d].dst.global.len == info[d].dst.global.len); - assert (allinfo[n][d].dst.global.str == info[d].dst.global.str); - assert (allinfo[n][d].src.local.str == info[d].src.local.str); - assert (allinfo[n][d].dst.local.str == info[d].dst.local.str); - assert (allinfo[n][d].src.active.str == info[d].src.active.str); - assert (allinfo[n][d].dst.active.str == info[d].dst.active.str); - assert (allinfo[n][d].src.slab.str == info[d].src.slab.str); - assert (allinfo[n][d].dst.slab.str == info[d].dst.slab.str); - assert (allinfo[n][d].xpose == info[d].xpose); - assert (allinfo[n][d].flip == info[d].flip); + allinfo = malloc (size * dim * sizeof *allinfo); + assert (allinfo); + { + int const info_nints = sizeof(struct info) / sizeof(int); + ifdebug fflush (stdout); + MPI_Allgather + (info, dim * info_nints, MPI_INT, + allinfo, dim * info_nints, MPI_INT, comm); + } + + for (n = 0; n < size; ++n) { + for (d=0; d<dim; ++d) { + assert (allinfo[n*dim+d].src.global.off == info[d].src.global.off); + assert (allinfo[n*dim+d].src.global.len == info[d].src.global.len); + assert (allinfo[n*dim+d].src.global.str == info[d].src.global.str); + assert (allinfo[n*dim+d].dst.global.off == info[d].dst.global.off); + assert (allinfo[n*dim+d].dst.global.len == info[d].dst.global.len); + assert (allinfo[n*dim+d].dst.global.str == info[d].dst.global.str); + assert (allinfo[n*dim+d].src.local.str == info[d].src.local.str); + assert (allinfo[n*dim+d].dst.local.str == info[d].dst.local.str); + assert (allinfo[n*dim+d].src.active.str == info[d].src.active.str); + assert (allinfo[n*dim+d].dst.active.str == info[d].dst.active.str); + assert (allinfo[n*dim+d].src.slab.str == info[d].src.slab.str); + assert (allinfo[n*dim+d].dst.slab.str == info[d].dst.slab.str); + assert (allinfo[n*dim+d].xpose == info[d].xpose); + assert (allinfo[n*dim+d].flip == info[d].flip); } } - struct bbox srcdetail[size][dim]; - for (int n = 0; n < size; ++n) { + srcdetail = malloc (size * dim * sizeof *srcdetail); + assert (srcdetail); + for (n = 0; n < size; ++n) { ifdebug printf ("srcdetail n=%d:\n", n); - for (int d=0; d<dim; ++d) { - srcdetail[n][d] = allinfo[n][d].src.slab; + for (d=0; d<dim; ++d) { + srcdetail[n*dim+d] = allinfo[n*dim+d].src.slab; ifdebug printf (" src.slab d=%d ", d); - ifdebug bbox_print (&srcdetail[n][d]); + ifdebug bbox_print (&srcdetail[n*dim+d]); ifdebug printf ("\n"); - bbox_clip (&srcdetail[n][d], &info[d].src.active); + bbox_clip (&srcdetail[n*dim+d], &info[d].src.active); ifdebug printf (" clipped with src.active d=%d ", d); - ifdebug bbox_print (&srcdetail[n][d]); + ifdebug bbox_print (&srcdetail[n*dim+d]); ifdebug printf ("\n"); } - for (int d=0; d<dim; ++d) { - struct bbox whereto = allinfo[n][d].dst.slab; - ifdebug printf (" dst.slab d=%d ", info[d].xpose); + for (d=0; d<dim; ++d) { + struct bbox whereto; + struct bbox wherefrom; + whereto = allinfo[n*dim+d].dst.slab; + ifdebug printf (" dst.slab d=%d ", info[d].xpose); ifdebug bbox_print (&whereto); ifdebug printf ("\n"); - bbox_clip (&whereto, &allinfo[n][d].dst.active); - ifdebug printf (" whereto d=%d ", info[d].xpose); + bbox_clip (&whereto, &allinfo[n*dim+d].dst.active); + ifdebug printf (" whereto d=%d ", info[d].xpose); ifdebug bbox_print (&whereto); ifdebug printf ("\n"); - struct bbox wherefrom; bbox_xform (&wherefrom, &whereto, - &allinfo[n][info[d].xpose].src.slab, &allinfo[n][d].dst.slab, + &allinfo[n*dim+info[d].xpose].src.slab, &allinfo[n*dim+d].dst.slab, info[d].flip); - ifdebug printf (" wherefrom d=%d ", info[d].xpose); + ifdebug printf (" wherefrom d=%d ", info[d].xpose); ifdebug bbox_print (&wherefrom); ifdebug printf ("\n"); - bbox_clip (&srcdetail[n][info[d].xpose], &wherefrom); - ifdebug printf (" clipped with wherefrom d=%d ", info[d].xpose); - ifdebug bbox_print (&srcdetail[n][info[d].xpose]); + bbox_clip (&srcdetail[n*dim+info[d].xpose], &wherefrom); + ifdebug printf (" clipped with wherefrom d=%d ", info[d].xpose); + ifdebug bbox_print (&srcdetail[n*dim+info[d].xpose]); ifdebug printf ("\n"); } } - int srccount[size]; - int srcoffset[size+1]; + srccount = malloc (size * sizeof *srccount); + assert (srccount); + srcoffset = malloc ((size + 1) * sizeof *srcoffset); + assert (srcoffset); srcoffset[0] = 0; - for (int n = 0; n < size; ++n) { + for (n = 0; n < size; ++n) { srccount[n] = 1; - for (int d=0; d<dim; ++d) { - srccount[n] *= srcdetail[n][d].len; + for (d=0; d<dim; ++d) { + srccount[n] *= srcdetail[n*dim+d].len; } ifdebug printf ("srccnt n=%d offset=%d count=%d\n", n, srcoffset[n], srccount[n]); srcoffset[n+1] = srcoffset[n] + srccount[n]; } - void * restrict srcdata = malloc (srcoffset[size] * sizeof(CCTK_REAL)); + srcdata = malloc (srcoffset[size] * sizeof(CCTK_REAL)); assert (srcoffset[size] == 0 || srcdata); ifcheck { + CCTK_REAL * restrict const srcptr = srcdata; CCTK_REAL marker; memset (&marker, -1, sizeof marker); - CCTK_REAL * restrict const srcptr = srcdata; - for (size_t i = 0; i < srcoffset[size]; ++i) { + for (i = 0; i < srcoffset[size]; ++i) { memcpy (&srcptr[i], &marker, sizeof marker); } } - struct bbox dstdetail[size][dim]; - for (int n = 0; n < size; ++n) { + dstdetail = malloc (size * dim * sizeof *dstdetail); + assert (dstdetail); + for (n = 0; n < size; ++n) { ifdebug printf ("dstdetail n=%d:\n", n); - for (int d=0; d<dim; ++d) { - dstdetail[n][d] = allinfo[n][d].dst.slab; - ifdebug printf (" dst.slab d=%d ", d); - ifdebug bbox_print (&dstdetail[n][d]); + for (d=0; d<dim; ++d) { + struct bbox wherefrom; + struct bbox whereto; + dstdetail[n*dim+d] = allinfo[n*dim+d].dst.slab; + ifdebug printf (" dst.slab d=%d ", d); + ifdebug bbox_print (&dstdetail[n*dim+d]); ifdebug printf ("\n"); - bbox_clip (&dstdetail[n][d], &info[d].dst.active); + bbox_clip (&dstdetail[n*dim+d], &info[d].dst.active); ifdebug printf (" clipped with dst.active d=%d ", d); - ifdebug bbox_print (&dstdetail[n][d]); + ifdebug bbox_print (&dstdetail[n*dim+d]); ifdebug printf ("\n"); - struct bbox wherefrom = allinfo[n][info[d].xpose].src.slab; - ifdebug printf (" src.slab d=%d ", d); - ifdebug bbox_print (&dstdetail[n][d]); + wherefrom = allinfo[n*dim+info[d].xpose].src.slab; + ifdebug printf (" src.slab d=%d ", d); + ifdebug bbox_print (&dstdetail[n*dim+d]); ifdebug printf ("\n"); - bbox_clip (&wherefrom, &allinfo[n][info[d].xpose].src.active); - ifdebug printf (" wherefrom d=%d ", d); - ifdebug bbox_print (&dstdetail[n][d]); + bbox_clip (&wherefrom, &allinfo[n*dim+info[d].xpose].src.active); + ifdebug printf (" wherefrom d=%d ", d); + ifdebug bbox_print (&dstdetail[n*dim+d]); ifdebug printf ("\n"); - struct bbox whereto; bbox_xform (&whereto, &wherefrom, - &allinfo[n][d].dst.slab, &allinfo[n][info[d].xpose].src.slab, + &allinfo[n*dim+d].dst.slab, &allinfo[n*dim+info[d].xpose].src.slab, info[d].flip); - ifdebug printf (" whereto d=%d ", d); - ifdebug bbox_print (&dstdetail[n][d]); + ifdebug printf (" whereto d=%d ", d); + ifdebug bbox_print (&dstdetail[n*dim+d]); ifdebug printf ("\n"); - bbox_clip (&dstdetail[n][d], &whereto); - ifdebug printf (" clipped with whereto d=%d ", d); - ifdebug bbox_print (&dstdetail[n][d]); + bbox_clip (&dstdetail[n*dim+d], &whereto); + ifdebug printf (" clipped with whereto d=%d ", d); + ifdebug bbox_print (&dstdetail[n*dim+d]); ifdebug printf ("\n"); } } - int dstcount[size]; - int dstoffset[size+1]; + dstcount = malloc (size * sizeof *dstcount); + assert (dstcount); + dstoffset = malloc ((size + 1) * sizeof *dstoffset); + assert (dstoffset); dstoffset[0] = 0; - for (int n = 0; n < size; ++n) { + for (n = 0; n < size; ++n) { dstcount[n] = 1; - for (int d=0; d<dim; ++d) { - dstcount[n] *= dstdetail[n][d].len; + for (d=0; d<dim; ++d) { + dstcount[n] *= dstdetail[n*dim+d].len; } ifdebug printf ("dstcnt n=%d offset=%d count=%d\n", n, dstoffset[n], dstcount[n]); dstoffset[n+1] = dstoffset[n] + dstcount[n]; } - void * restrict dstdata = malloc (dstoffset[size] * sizeof(CCTK_REAL)); + dstdata = malloc (dstoffset[size] * sizeof(CCTK_REAL)); assert (dstoffset[size] == 0 || dstdata); ifcheck { + CCTK_REAL * restrict const dstptr = dstdata; CCTK_REAL marker; memset (&marker, -1, sizeof marker); - CCTK_REAL * restrict const dstptr = dstdata; - for (size_t i = 0; i < dstoffset[size]; ++i) { + for (i = 0; i < dstoffset[size]; ++i) { memcpy (&dstptr[i], &marker, sizeof marker); } } ifcheck { - int src2count[size]; - int dst2count[size]; + int * restrict src2count; + int * restrict dst2count; + src2count = malloc (size * sizeof *src2count); + assert (src2count); + dst2count = malloc (size * sizeof *dst2count); + assert (dst2count); ifdebug fflush (stdout); MPI_Alltoall (srccount, 1, MPI_INT, src2count, 1, MPI_INT, comm); MPI_Alltoall (dstcount, 1, MPI_INT, dst2count, 1, MPI_INT, comm); - for (int n = 0; n < size; ++n) { + for (n = 0; n < size; ++n) { assert (src2count[n] == dstcount[n]); assert (dst2count[n] == srccount[n]); } + free (src2count); + free (dst2count); } - for (int n = 0; n < size; ++n) { + for (n = 0; n < size; ++n) { assert (dim == 3); - for (int k = 0; k < srcdetail[n][info[2].xpose].len; ++k) { - for (int j = 0; j < srcdetail[n][info[1].xpose].len; ++j) { - for (int i = 0; i < srcdetail[n][info[0].xpose].len; ++i) { - int ipos[dim]; + for (k = 0; k < srcdetail[n*dim+info[2].xpose].len; ++k) { + for (j = 0; j < srcdetail[n*dim+info[1].xpose].len; ++j) { + for (i = 0; i < srcdetail[n*dim+info[0].xpose].len; ++i) { + int ipos[3]; + int srcipos[3]; + int bufipos[3]; + size_t srcind; + size_t bufind; ipos[0] = i; ipos[1] = j; ipos[2] = k; - int srcipos[dim]; - int bufipos[dim]; - for (int d=0; d<dim; ++d) { + for (d=0; d<dim; ++d) { int const c = info[d].xpose; - srcipos[c] = srcdetail[n][c].off + ipos[d] * srcdetail[n][c].str; + srcipos[c] = srcdetail[n*dim+c].off + ipos[d] * srcdetail[n*dim+c].str; assert (srcipos[c] >= info[c].src.local.off && srcipos[c] < info[c].src.local.off + info[c].src.local.len); - if (! (srcipos[c] >= allinfo[n][c].src.slab.off - && srcipos[c] <= allinfo[n][c].src.slab.off + (allinfo[n][c].src.slab.len - 1) * allinfo[n][c].src.slab.str)) { - printf ("ssc n=%d ipos=[%d,%d,%d] d=%d srcipos=%d slab.off=%d slab.len=%d\n", n, i, j, k, d, srcipos[c], allinfo[n][c].src.slab.off, allinfo[n][c].src.slab.len); + if (! (srcipos[c] >= allinfo[n*dim+c].src.slab.off + && srcipos[c] <= allinfo[n*dim+c].src.slab.off + (allinfo[n*dim+c].src.slab.len - 1) * allinfo[n*dim+c].src.slab.str)) { + printf ("ssc n=%d ipos=[%d,%d,%d] d=%d srcipos=%d slab.off=%d slab.len=%d\n", n, i, j, k, d, srcipos[c], allinfo[n*dim+c].src.slab.off, allinfo[n*dim+c].src.slab.len); } - assert (srcipos[c] >= allinfo[n][c].src.slab.off - && srcipos[c] <= allinfo[n][c].src.slab.off + (allinfo[n][c].src.slab.len - 1) * allinfo[n][c].src.slab.str); - assert ((srcipos[c] - allinfo[n][c].src.slab.off) % allinfo[n][c].src.slab.str == 0); + assert (srcipos[c] >= allinfo[n*dim+c].src.slab.off + && srcipos[c] <= allinfo[n*dim+c].src.slab.off + (allinfo[n*dim+c].src.slab.len - 1) * allinfo[n*dim+c].src.slab.str); + assert ((srcipos[c] - allinfo[n*dim+c].src.slab.off) % allinfo[n*dim+c].src.slab.str == 0); bufipos[d] = ipos[d]; - assert (bufipos[d] >= 0 && bufipos[d] < srcdetail[n][c].len); + assert (bufipos[d] >= 0 && bufipos[d] < srcdetail[n*dim+c].len); } - size_t srcind = 0; - size_t bufind = 0; - for (int d=dim-1; d>=0; --d) { + srcind = 0; + bufind = 0; + for (d=dim-1; d>=0; --d) { int const c = info[d].xpose; srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; - bufind = bufind * srcdetail[n][c].len + bufipos[d]; + bufind = bufind * srcdetail[n*dim+c].len + bufipos[d]; } assert (srcind < srclentot); assert (bufind < srccount[n]); @@ -623,10 +674,10 @@ int Slab_Transfer (cGH * const cctkGH, } ifcheck { + const CCTK_REAL * restrict const srcptr = srcdata; CCTK_REAL marker; memset (&marker, -1, sizeof marker); - const CCTK_REAL * restrict const srcptr = srcdata; - for (size_t i = 0; i < srcoffset[size]; ++i) { + for (i = 0; i < srcoffset[size]; ++i) { assert (memcmp(&srcptr[i], &marker, sizeof marker) != 0); } } @@ -637,43 +688,45 @@ int Slab_Transfer (cGH * const cctkGH, dstdata, dstcount, dstoffset, dstdatatype, comm); ifcheck { + const CCTK_REAL * restrict const dstptr = dstdata; CCTK_REAL marker; memset (&marker, -1, sizeof marker); - const CCTK_REAL * restrict const dstptr = dstdata; - for (size_t i = 0; i < dstoffset[size]; ++i) { + for (i = 0; i < dstoffset[size]; ++i) { assert (memcmp(&dstptr[i], &marker, sizeof marker) != 0); } } - for (int n = 0; n < size; ++n) { + for (n = 0; n < size; ++n) { assert (dim == 3); - for (int k = 0; k < dstdetail[n][2].len; ++k) { - for (int j = 0; j < dstdetail[n][1].len; ++j) { - for (int i = 0; i < dstdetail[n][0].len; ++i) { - int ipos[dim]; + for (k = 0; k < dstdetail[n*dim+2].len; ++k) { + for (j = 0; j < dstdetail[n*dim+1].len; ++j) { + for (i = 0; i < dstdetail[n*dim+0].len; ++i) { + int ipos[3]; + int bufipos[3]; + int dstipos[3]; + size_t bufind; + size_t dstind; ipos[0] = i; ipos[1] = j; ipos[2] = k; - int bufipos[dim]; - int dstipos[dim]; - for (int d=0; d<dim; ++d) { + for (d=0; d<dim; ++d) { if (! info[d].flip) { bufipos[d] = ipos[d]; } else { - bufipos[d] = dstdetail[n][d].len - 1 - ipos[d]; + bufipos[d] = dstdetail[n*dim+d].len - 1 - ipos[d]; } - assert (bufipos[d] >= 0 && bufipos[d] < dstdetail[n][d].len); - dstipos[d] = dstdetail[n][d].off + ipos[d] * info[d].dst.slab.str; + assert (bufipos[d] >= 0 && bufipos[d] < dstdetail[n*dim+d].len); + dstipos[d] = dstdetail[n*dim+d].off + ipos[d] * info[d].dst.slab.str; assert (dstipos[d] >= info[d].dst.local.off && dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); assert (dstipos[d] >= info[d].dst.slab.off && dstipos[d] <= info[d].dst.slab.off + (info[d].dst.slab.len - 1) * info[d].dst.slab.str); assert ((dstipos[d] - info[d].dst.slab.off) % info[d].dst.slab.str == 0); } - size_t bufind = 0; - size_t dstind = 0; - for (int d=dim-1; d>=0; --d) { - bufind = bufind * dstdetail[n][d].len + bufipos[d]; + bufind = 0; + dstind = 0; + for (d=dim-1; d>=0; --d) { + bufind = bufind * dstdetail[n*dim+d].len + bufipos[d]; dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; } assert (bufind < dstcount[n]); @@ -687,8 +740,18 @@ int Slab_Transfer (cGH * const cctkGH, - free (srcdata); free (dstdata); + free (dstcount); + free (dstoffset); + free (dstdetail); + + free (srcdata); + free (srccount); + free (srcoffset); + free (srcdetail); + + free (allinfo); + free (info); |