diff options
Diffstat (limited to 'src/Panda/Chunk.C')
-rw-r--r-- | src/Panda/Chunk.C | 692 |
1 files changed, 692 insertions, 0 deletions
diff --git a/src/Panda/Chunk.C b/src/Panda/Chunk.C new file mode 100644 index 0000000..d6fd028 --- /dev/null +++ b/src/Panda/Chunk.C @@ -0,0 +1,692 @@ +#include "definitions.h" +#include "Chunk.h" +#include "Array.h" +#include <malloc.h> + + +Chunk::Chunk() +{ + base_ = stride_ = size_ = NULL; + array_ = NULL; + chunk_ = NULL; + data_ptr_ = NULL; + stencil_width_ = 0; +} + + +/* This constructor is used to create a chunk given array information */ +Chunk::Chunk(Array *array, int chunk_id, int node_type, DataStatus data_status) +{ + do_init(array, chunk_id, node_type, data_status); +} + +/* Re-initialize an already created chunk object */ +void Chunk::init(Array *array, int chunk_id, int node_type, DataStatus data_status) +{ + clear(); + do_init(array, chunk_id, node_type, data_status); +} + +void Chunk::do_init(Array *array, int chunk_id, int node_type, + DataStatus data_status) +{ + int *stride, *base; + + /* Initialize the instance variables */ + array_ = array; + chunk_ = NULL; + chunk_id_ = chunk_id; + am_subchunk_ = NO; + element_size_ = array->element_size(); + + stride = (int *) malloc(sizeof(int)*array->rank()); + base = (int *) malloc(sizeof(int)*array->rank()); + for(int i=0; i < array->rank(); i++){ stride[i] = 1; base[i] = 0; } + + RegularDistribution *layout=(RegularDistribution *)(array->layout(node_type)); + calculate_base_size_stride(array->rank(), base, array->size(), stride, + layout->layout(), layout->distribution(), + layout->block_dist(), chunk_id); + + /* check if we have to allocate the data space */ + switch(data_status) { + case ALLOC: + data_ptr_ = (char *)malloc(total_size_in_bytes()); + data_status_ = data_status; + stencil_width_ = 0; + break; + + case NO_ALLOC: + data_ptr_ = NULL; + data_status_ = data_status; + stencil_width_ = 0; + break; + + default: + printf("Unsupported \n"); + break; + } +} + +/* This creates a subchunk , given the chunk and subchunk_id */ +Chunk::Chunk(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status) +{ + do_init(mega_chunk, sub_chunkid, data_status); +} + +/* Re-initialize an already created subchunk obj */ +void Chunk::init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status) +{ + clear(); + do_init(mega_chunk, sub_chunkid, data_status); +} + + +void Chunk::do_init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status) +{ + chunk_id_ = sub_chunkid; + element_size_ = mega_chunk->element_size(); + array_ = mega_chunk->array(); + chunk_ = mega_chunk; + am_subchunk_ = YES; + + RegularDistribution *layout=(RegularDistribution *)(array_->layout(SUB_CHUNK)); + calculate_base_size_stride(mega_chunk->rank(), mega_chunk->base(), + mega_chunk->size(), mega_chunk->stride(), + layout->layout(), layout->distribution(), + layout->block_dist(), sub_chunkid); + /* check if we have to allocate the data space */ + switch(data_status) { + case ALLOC: + data_ptr_ = (char *)malloc(total_size_in_bytes()); + data_status_ = data_status; + stencil_width_ = 0; + break; + + case NO_ALLOC: + data_ptr_ = NULL; + data_status_ = data_status; + stencil_width_ = 0; + break; + + default: + data_ptr_ = NULL; + printf("Unsupported \n"); + break; + } +} + +Chunk::~Chunk() +{ + if (base_) delete base_; + if (stride_) delete stride_; + + /* Delete the data buffer only if we allocated it in the first place */ + if ((data_status_ == ALLOC) && data_ptr_) delete data_ptr_; +} + + +void Chunk::clear() +{ + if (base_) free (base_); + if (stride_) free (stride_); + if (data_ptr_) free( data_ptr_); + if (size_) free(size_); + base_ = size_ = stride_ = NULL; + data_ptr_ = NULL; +} + +/* This function takes as input the information about the global + * Array and returns the overlapping compute node chunk indices + * via a singly linked list. + * + * Currently this function can only handle BLOCK,* arrays (Needs + * to be extended for the CYCLIC case) + */ +void Chunk::chunk_overlaps(Array *global_array, int* num_overlaps, + int *ret_list, int node_type) +{ + RegularDistribution *layout1 = + (RegularDistribution *)global_array->layout(node_type); + ArrayLayout *layout= layout1->layout(); + int layout_rank = layout->rank(); + int *overlap_base = (int *)malloc(sizeof(int)*layout_rank); + int *overlap_size = (int *)malloc(sizeof(int)*layout_rank); + + /* Find out the list of possible overlaps */ + compute_first_last_chunk(global_array->rank(), global_array->size(), + layout, layout1->distribution(), layout1->block_dist(), + overlap_base, overlap_size); +#ifdef DEBUG + printf("In chunk_overlaps\n"); + for(int i=0;i<layout_rank;i++) + printf("base[%d] = %d size[%d] = %d\n", i, overlap_base[i], i, overlap_size[i]); +#endif + layout->indices_list(overlap_base, overlap_size, num_overlaps, ret_list); + free(overlap_base); + free(overlap_size); +} + + +/* This function isn't general enough. It implicitly assumes that the I/O + * chunks are distributed using only BLOCK.* distributions. Also the + * compute node chunks are assumed to be distributed using only + * BLOCK,* (can be extended to support CYLCIC later) + * + * Function assumes that the memory for the return paramters + * overlap_base and overlap_size have been allocated + */ +void Chunk::compute_first_last_chunk(int array_rank, int *array_size, + ArrayLayout *layout, Distribution *dist, + Block_Distribution block_dist, + int *overlap_base, int *overlap_size) +{ + /* Validation of input data */ + if (!(layout->valid_distribution(array_rank, dist))) + { + printf("Invalid distribution in compute_first_last_chunk\n"); + exit(1); + } + + /* Verify to see if we are dealing with BLOCK,* case only */ + for(int i=0;i<layout->rank();i++) + { + if (dist[i] == CYCLIC) + { + printf("Cyclic schema not yet supported\n"); + exit(2); + } + } + + for(i=0; i<array_rank;i++) + { + if (stride_[i] != 1) + { + printf("Cyclic schema not yet supported\n"); + exit(2); + } + } + + + /* Now we can get down to business */ + int *overlap_last = (int*)malloc(sizeof(int)*layout->rank()); + int layout_idx=0, array_idx; + int def_chunk_size,rem,tmp,last; + + for(array_idx=0;array_idx < array_rank; array_idx++) + { + switch(dist[array_idx]) + { + case NONE: + break; + + case CYCLIC: + printf("Cyclic schema not yet supported\n"); + exit(3); + break; + + /* Need to verify this stuff - especially the NAS stuff */ + case BLOCK: + switch(block_dist) + { + case HPF: + def_chunk_size = (array_size[array_idx]+layout->size(layout_idx)-1) + / (layout->size(layout_idx)); + overlap_base[layout_idx] = base_[array_idx] + / def_chunk_size; + overlap_last[layout_idx] = (base_[array_idx]+size_[array_idx] -1) + / def_chunk_size; + break; + + case NAS: + def_chunk_size = array_size[array_idx] + / layout->size(layout_idx); + rem = array_size[array_idx] + % layout->size(layout_idx); + if (rem == 0) + { + /* perfect distribution */ + overlap_base[layout_idx] = base_[array_idx] + / def_chunk_size; + overlap_last[layout_idx] = (base_[array_idx] + + size_[array_idx] -1) + / def_chunk_size; + } + else + { + /* first "rem" blocks have "def_chunk+1" elements */ + tmp = (def_chunk_size+1)*rem; + if (base_[array_idx] < tmp) + { + overlap_base[layout_idx] = base_[array_idx] + / (def_chunk_size + 1); + } + else + { + overlap_base[layout_idx] = ((base_[array_idx] - tmp) + / def_chunk_size) + rem; + } + + last = base_[array_idx] + size_[array_idx] -1; + if (last < tmp) + { + overlap_last[layout_idx] = last / (def_chunk_size+1); + + } + else + { + overlap_last[layout_idx] = ((last - tmp) + / def_chunk_size) + rem; + } + } + break; + + default: + printf("Unsupported block distribution\n"); + exit(2); + break; + } + overlap_size[layout_idx] = overlap_last[layout_idx] + - overlap_base[layout_idx] + 1; + layout_idx++; + break; + + default: + printf("Unsupported distribution\n"); + exit(3); + break; + } + + } + + free(overlap_last); + return; +} + + + + +int Chunk::total_size_in_bytes() +{ + return (total_size_in_elements()*element_size_); +} + + + +int Chunk::total_size_in_elements() +{ + return total_elements(); +} + + +int Chunk::chunk_id(){return chunk_id_;} + + +void * Chunk::data_ptr(){return data_ptr_;} + + + +/* This is not a method. It is an generalized inline function to + * calculate the overlap between two chunks. The input parameters + * are rank,base,stride,size of the two arrays and the pointers to + * the base,strides and sizes of the resultant chunk. The functions + * assumes that the rank of the input arrays are equal + * + * This function also assumes that the memory for the return values + * r_base, r_stride, rsize have already been allocated. + */ +inline void determine_overlap(int rank, int *c1_base, int* c1_size, + int* c1_stride, + int* c2_base, int* c2_size, int* c2_stride, + int* r_base, int* r_size, int* r_stride) +{ + + int tmp_base,tmp_size,n; + + for(int i=0; i< rank;i++) + { + /* Compute overlap in each dimension */ + if ((c1_stride[i] == 1) && (c2_stride[i] == 1)) + { + /* Simplest case + * r_base = max(c1_base, c2_base) + * r_size = max( min(c1_base+c1_size, c2_base+c2_size)-r_base, 0); + */ + r_base[i] = max(c1_base[i], c2_base[i]); + r_size[i] = max((min(c1_base[i]+c1_size[i], c2_base[i]+c2_size[i]) + - r_base[i]), 0); + r_stride[i] = 1; + } + else if (c1_stride[i] == 1) + { + /* Not so simple - this needs to be verified + * tmp_B = max(c1_base,c2_base) + * B = tmp_B + (N - ((tmp_B - c2_base)%N))%N + * U = min(c1_base+(c1_size-1), c2_base+(c2_size-1)*N) - B + * if (U < 0) the no overlap else r_size = U/N + 1 + */ + n = c2_stride[i]; + tmp_base = max(c1_base[i], c2_base[i]); + r_base[i] = tmp_base + (n -((tmp_base - c2_base[i])%n))%n; + tmp_size = min(c1_base[i]+(c1_size[i]-1), c2_base[i]+(c2_size[i]-1)*n); + if (tmp_size < 0) + { + /* no overlap */ + r_size[i] = 0; + r_stride[i] = 1; + } + else + { + r_size[i] = tmp_size / n + 1; + r_stride[i] = n; + } + } + else if (c2_stride[i] == 1) + { + /* Similar to the previous case */ + n = c1_stride[i]; + tmp_base = max(c1_base[i], c2_base[i]); + r_base[i] = tmp_base + (n -((tmp_base - c1_base[i])%n))%n; + tmp_size = min(c1_base[i]+(c1_size[i]-1)*n, c2_base[i]+(c2_size[i]-1)); + if (tmp_size < 0) + { + /* no overlap */ + r_size[i] = 0; + r_stride[i] = 1; + } + else + { + r_size[i] = tmp_size / n + 1; + r_stride[i] = n; + } + } + else if (c1_stride[i] = c2_stride[i]) + { + /* Can do this one later */ + } + else + { + /* I give up */ + } + } +#ifdef DEBUG + /* Debugging output */ + printf ("In determine overlap rank= %d\n", rank); + int k; + for(k=0;k<rank;k++) + printf("%d %d %d %d %d %d %d %d %d\n", c1_base[k], c1_size[k], c1_stride[k], + c2_base[k], c2_size[k], c2_stride[k], + r_base[k], r_size[k], r_stride[k]); +#endif + return; +} + + +void Chunk::compute_overlap(Chunk *compute_chunk, int *overlap_base, + int *overlap_size, int *overlap_stride) +{ + determine_overlap(rank_, base_, size_, stride_, + compute_chunk->base(), + compute_chunk->size(), + compute_chunk->stride(), + overlap_base, + overlap_size, + overlap_stride); +} + + +int* Chunk::base(){return base_;} +int* Chunk::size(){return size_;} +int* Chunk::stride(){return stride_;} + +int Chunk::element_size() { return element_size_; } +/* This function needs to be verified when the stride is not 1 */ +void Chunk::base_offset(int *base, void **ptr) +{ + int base_offset = 0; + int offset=1; + + for(int i=rank_ - 1; i>= 0; i--) + { + base_offset += ((base[i]-base_[i]) / stride_[i])*offset; + offset *= size_[i]; + } + base_offset *= element_size_; + *ptr = (char *)data_ptr_ + base_offset; +} + +void Chunk::convert_from_number_to_index(int num, int *result) +{ + int i,j, product=1; + + for(i=0;i<rank_;i++) + { + product=1; + for(j=i+1; j< rank_;j++) product *= size_[j]; + result[i] = num / product; + num -= num/product * product; + } +} + + +/* This method calculates the rank, base, stride of the chunk * + * (subchunk), given the dimensions of the array (chunk) and its * + * layout, distribution and the chunk (subchunk index) */ +void Chunk::calculate_base_size_stride(int rank, int* old_base, + int* old_size, int* old_stride, + ArrayLayout *layout, Distribution *dist, + Block_Distribution block_dist, int id) +{ + int *chunk_index=NULL; + int idx=0, layout_idx=0; + int default_size, rem; + + chunk_index = layout->convert_from_number_to_index(id); + rank_ = rank; + size_ = (int *) malloc(sizeof(int)*rank); + base_ = (int *) malloc(sizeof(int)*rank); + stride_ = (int *) malloc(sizeof(int)*rank); + + + /* Verify if it is possible to distribute the array (subchunk) */ + if (!(layout->valid_index(chunk_index))) + { + printf("Invalid chunk index %d in compute_base_size_stride\n", id); + exit(1); + } + if (!(layout->valid_distribution(rank, dist))) + { + printf("Unable to distribute array in compute_base_size_stride\n"); + exit(2); + } + + for(idx=0; idx < rank; idx++) + { + switch(dist[idx]) + { + case NONE: + base_[idx] = old_base[idx]; + size_[idx] = old_size[idx]; + stride_[idx] = old_stride[idx]*1; + break; + + case CYCLIC: + base_[idx] = old_base[idx] + chunk_index[layout_idx]*old_stride[idx]; + size_[idx] = (old_size[idx] - chunk_index[layout_idx] + + layout->size(layout_idx)-1)/ layout->size(layout_idx); + stride_[idx] = layout->size(layout_idx) * old_stride[idx]; + layout_idx++; + break; + + case BLOCK: + switch(block_dist) + { + case HPF: + default_size = (old_size[idx] + layout->size(layout_idx)-1) + /layout->size(layout_idx); + base_[idx] = old_base[idx] + default_size * + chunk_index[layout_idx] *old_stride[idx]; + size_[idx] = default_size; + stride_[idx] = old_stride[idx]*1; + /* The last chunk may be smaller */ + if (chunk_index[layout_idx] ==(layout->size(layout_idx)-1)) + { + size_[idx] = old_size[idx] - + (default_size * chunk_index[layout_idx]); + } + break; + + case NAS: + default_size = old_size[idx] / layout->size(layout_idx); + rem = old_size[idx] % layout->size(layout_idx); + if (chunk_index[layout_idx] < rem) + { + base_[idx] = old_base[idx] + (chunk_index[layout_idx] + + chunk_index[layout_idx]*default_size) + *old_stride[idx]; + size_[idx] = default_size + 1; + } + else + { + base_[idx] = old_base[idx] + (rem + + chunk_index[layout_idx]*default_size) + *old_stride[idx]; + size_[idx] = default_size; + } + stride_[idx] = old_stride[idx] * 1; + break; + + + default: + printf("Unsupported Block Distribution specified\n"); + exit(3); + break; + } + layout_idx++; + break; + + default: + printf("Unsupported Distribution specified\n"); + exit(3); + break; + } + } + + free(chunk_index); + return; +} + +Array* Chunk::array(){return array_;} + +Boolean Chunk::am_subchunk(){return am_subchunk_;} + +void Chunk::copy_base_size_stride(int *base, int *size, int *stride) +{ + for(int i=0; i< rank_; i++){ + base[i] = base_[i]; + size[i] = size_[i]; + stride[i] = stride_[i]; + } + } + + + +/* This assumes that all the strides are 1 - i.e no cyclic */ +void Chunk::make_datatype(int *overlap_base, int *overlap_size, + int *overlap_stride, void **ptr, + MPI_Datatype *return_data_type) +{ + + MPI_Datatype *tmp_types = (MPI_Datatype *) malloc(sizeof(MPI_Datatype) * rank_); + int i,j , offset = 1; + int base_offset = 0; + int *size, *base; + Boolean allocate; + + // If there is a ghost region + int *array_size = array_->size(); + int bound; + if (stencil_width_ > 0) { + size = (int *)malloc(sizeof(int) * rank_); + base = (int *)malloc(sizeof(int) * rank_); + for (i=0; i<rank_; i++) { + bound = base_[i] + size_[i]; + base[i] = max(base_[i] - stencil_width_, 0); + bound = min(bound + stencil_width_, array_size[i]); + size[i] = bound - base[i]; + } + allocate = YES; + //printf("##### stencil %d base %d %d %d size %d %d %d\n", stencil_width_, base[0], base[1], base[2], size[0], size[1], size[2]); + } else { + size = size_; + base = base_; + allocate = NO; + } + + MPI_Type_contiguous(element_size_, MPI_CHAR, &tmp_types[rank_-1]); + if (overlap_stride[rank_ -1] != 1) + { + printf("error - stride is %d", overlap_stride[rank_ -1]); + exit(10); + } + MPI_Type_vector(overlap_size[rank_-1], 1, 1, tmp_types[rank_-1], &tmp_types[rank_-2]); + for(i=rank_-1; i > 0; i--) + { + offset=1; + for(j=i;j <rank_; j++) offset *= size[j]; + if (overlap_stride[i-1] != 1) + { + printf("error - stride is %d\n", overlap_stride[i-1]); + exit(10); + } + if (i != 1){ + + MPI_Type_hvector(overlap_size[i-1],1,offset*element_size_, + tmp_types[i-1], + &tmp_types[i-2]); + } + else + MPI_Type_hvector(overlap_size[i-1],1,offset*element_size_, + tmp_types[i-1], + return_data_type); + } + MPI_Type_commit(return_data_type); + offset=1; + for(i=rank_-1;i >= 0; i--) + { + base_offset += (overlap_base[i] - base[i])*offset; + offset *= size[i]; + } + + *ptr = data_ptr_ + base_offset*element_size_; + free (tmp_types); + if (allocate) { + free(size); + free(base); + } +} + + +/* Old data buffer should be freed by someother function */ +void Chunk::set_data_ptr(char *data_ptr){ + data_ptr_ = data_ptr; +} + +void Chunk::set_stencil_width(int stencil_width){ + stencil_width_ = stencil_width; +} + +Chunk::Chunk(Array *array, int *base, int *size) +{ + array_ = array; + rank_ = array->rank(); + element_size_ = array->element_size(); + chunk_id_ = 0; + am_subchunk_ = NO; + + base_ = copy_int_list(rank_, base); + size_ = copy_int_list(rank_, size); + stride_ = (int *)malloc(sizeof(int) * rank_); + for (int i=0; i<rank_; i++) stride_[i] = 1; + data_status_ = NO_ALLOC; data_ptr_ = NULL; +} |