#include "definitions.h" #include "Chunk.h" #include "Array.h" Chunk::Chunk() { base_ = stride_ = size_ = NULL; array_ = NULL; chunk_ = NULL; data_ptr_ = NULL; stencil_width_ = 0; } /* This constructor is used to create a chunk given array information */ Chunk::Chunk(Array *array, int chunk_id, int node_type, DataStatus data_status) { do_init(array, chunk_id, node_type, data_status); } /* Re-initialize an already created chunk object */ void Chunk::init(Array *array, int chunk_id, int node_type, DataStatus data_status) { clear(); do_init(array, chunk_id, node_type, data_status); } void Chunk::do_init(Array *array, int chunk_id, int node_type, DataStatus data_status) { int *stride, *base; /* Initialize the instance variables */ array_ = array; chunk_ = NULL; chunk_id_ = chunk_id; am_subchunk_ = NO; element_size_ = array->element_size(); stride = (int *) malloc(sizeof(int)*array->rank()); base = (int *) malloc(sizeof(int)*array->rank()); for(int i=0; i < array->rank(); i++){ stride[i] = 1; base[i] = 0; } RegularDistribution *layout=(RegularDistribution *)(array->layout(node_type)); calculate_base_size_stride(array->rank(), base, array->size(), stride, layout->layout(), layout->distribution(), layout->block_dist(), chunk_id); /* check if we have to allocate the data space */ switch(data_status) { case ALLOC: data_ptr_ = (char *)malloc(total_size_in_bytes()); data_status_ = data_status; stencil_width_ = 0; break; case NO_ALLOC: data_ptr_ = NULL; data_status_ = data_status; stencil_width_ = 0; break; default: printf("Unsupported \n"); break; } } /* This creates a subchunk , given the chunk and subchunk_id */ Chunk::Chunk(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status) { do_init(mega_chunk, sub_chunkid, data_status); } /* Re-initialize an already created subchunk obj */ void Chunk::init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status) { clear(); do_init(mega_chunk, sub_chunkid, data_status); } void Chunk::do_init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status) { chunk_id_ = sub_chunkid; element_size_ = mega_chunk->element_size(); array_ = mega_chunk->array(); chunk_ = mega_chunk; am_subchunk_ = YES; RegularDistribution *layout=(RegularDistribution *)(array_->layout(SUB_CHUNK)); calculate_base_size_stride(mega_chunk->rank(), mega_chunk->base(), mega_chunk->size(), mega_chunk->stride(), layout->layout(), layout->distribution(), layout->block_dist(), sub_chunkid); /* check if we have to allocate the data space */ switch(data_status) { case ALLOC: data_ptr_ = (char *)malloc(total_size_in_bytes()); data_status_ = data_status; stencil_width_ = 0; break; case NO_ALLOC: data_ptr_ = NULL; data_status_ = data_status; stencil_width_ = 0; break; default: data_ptr_ = NULL; printf("Unsupported \n"); break; } } Chunk::~Chunk() { if (base_) delete base_; if (stride_) delete stride_; /* Delete the data buffer only if we allocated it in the first place */ if ((data_status_ == ALLOC) && data_ptr_) delete data_ptr_; } void Chunk::clear() { if (base_) free (base_); if (stride_) free (stride_); if (data_ptr_) free( data_ptr_); if (size_) free(size_); base_ = size_ = stride_ = NULL; data_ptr_ = NULL; } /* This function takes as input the information about the global * Array and returns the overlapping compute node chunk indices * via a singly linked list. * * Currently this function can only handle BLOCK,* arrays (Needs * to be extended for the CYCLIC case) */ void Chunk::chunk_overlaps(Array *global_array, int* num_overlaps, int *ret_list, int node_type) { RegularDistribution *layout1 = (RegularDistribution *)global_array->layout(node_type); ArrayLayout *layout= layout1->layout(); int layout_rank = layout->rank(); int *overlap_base = (int *)malloc(sizeof(int)*layout_rank); int *overlap_size = (int *)malloc(sizeof(int)*layout_rank); /* Find out the list of possible overlaps */ compute_first_last_chunk(global_array->rank(), global_array->size(), layout, layout1->distribution(), layout1->block_dist(), overlap_base, overlap_size); #ifdef DEBUG printf("In chunk_overlaps\n"); for(int i=0;iindices_list(overlap_base, overlap_size, num_overlaps, ret_list); free(overlap_base); free(overlap_size); } /* This function isn't general enough. It implicitly assumes that the I/O * chunks are distributed using only BLOCK.* distributions. Also the * compute node chunks are assumed to be distributed using only * BLOCK,* (can be extended to support CYLCIC later) * * Function assumes that the memory for the return paramters * overlap_base and overlap_size have been allocated */ void Chunk::compute_first_last_chunk(int array_rank, int *array_size, ArrayLayout *layout, Distribution *dist, Block_Distribution block_dist, int *overlap_base, int *overlap_size) { int i; /* Validation of input data */ if (!(layout->valid_distribution(array_rank, dist))) { printf("Invalid distribution in compute_first_last_chunk\n"); exit(1); } /* Verify to see if we are dealing with BLOCK,* case only */ for(i=0;irank();i++) { if (dist[i] == CYCLIC) { printf("Cyclic schema not yet supported\n"); exit(2); } } for(i=0; irank()); int layout_idx=0, array_idx; int def_chunk_size,rem,tmp,last; for(array_idx=0;array_idx < array_rank; array_idx++) { switch(dist[array_idx]) { case NONE: break; case CYCLIC: printf("Cyclic schema not yet supported\n"); exit(3); break; /* Need to verify this stuff - especially the NAS stuff */ case BLOCK: switch(block_dist) { case HPF: def_chunk_size = (array_size[array_idx]+layout->size(layout_idx)-1) / (layout->size(layout_idx)); overlap_base[layout_idx] = base_[array_idx] / def_chunk_size; overlap_last[layout_idx] = (base_[array_idx]+size_[array_idx] -1) / def_chunk_size; break; case NAS: def_chunk_size = array_size[array_idx] / layout->size(layout_idx); rem = array_size[array_idx] % layout->size(layout_idx); if (rem == 0) { /* perfect distribution */ overlap_base[layout_idx] = base_[array_idx] / def_chunk_size; overlap_last[layout_idx] = (base_[array_idx] + size_[array_idx] -1) / def_chunk_size; } else { /* first "rem" blocks have "def_chunk+1" elements */ tmp = (def_chunk_size+1)*rem; if (base_[array_idx] < tmp) { overlap_base[layout_idx] = base_[array_idx] / (def_chunk_size + 1); } else { overlap_base[layout_idx] = ((base_[array_idx] - tmp) / def_chunk_size) + rem; } last = base_[array_idx] + size_[array_idx] -1; if (last < tmp) { overlap_last[layout_idx] = last / (def_chunk_size+1); } else { overlap_last[layout_idx] = ((last - tmp) / def_chunk_size) + rem; } } break; default: printf("Unsupported block distribution\n"); exit(2); break; } overlap_size[layout_idx] = overlap_last[layout_idx] - overlap_base[layout_idx] + 1; layout_idx++; break; default: printf("Unsupported distribution\n"); exit(3); break; } } free(overlap_last); return; } int Chunk::total_size_in_bytes() { return (total_size_in_elements()*element_size_); } int Chunk::total_size_in_elements() { return total_elements(); } int Chunk::chunk_id(){return chunk_id_;} void * Chunk::data_ptr(){return data_ptr_;} /* This is not a method. It is an generalized inline function to * calculate the overlap between two chunks. The input parameters * are rank,base,stride,size of the two arrays and the pointers to * the base,strides and sizes of the resultant chunk. The functions * assumes that the rank of the input arrays are equal * * This function also assumes that the memory for the return values * r_base, r_stride, rsize have already been allocated. */ inline void determine_overlap(int rank, int *c1_base, int* c1_size, int* c1_stride, int* c2_base, int* c2_size, int* c2_stride, int* r_base, int* r_size, int* r_stride) { int tmp_base,tmp_size,n; for(int i=0; i< rank;i++) { /* Compute overlap in each dimension */ if ((c1_stride[i] == 1) && (c2_stride[i] == 1)) { /* Simplest case * r_base = max(c1_base, c2_base) * r_size = max( min(c1_base+c1_size, c2_base+c2_size)-r_base, 0); */ r_base[i] = max(c1_base[i], c2_base[i]); r_size[i] = max((min(c1_base[i]+c1_size[i], c2_base[i]+c2_size[i]) - r_base[i]), 0); r_stride[i] = 1; } else if (c1_stride[i] == 1) { /* Not so simple - this needs to be verified * tmp_B = max(c1_base,c2_base) * B = tmp_B + (N - ((tmp_B - c2_base)%N))%N * U = min(c1_base+(c1_size-1), c2_base+(c2_size-1)*N) - B * if (U < 0) the no overlap else r_size = U/N + 1 */ n = c2_stride[i]; tmp_base = max(c1_base[i], c2_base[i]); r_base[i] = tmp_base + (n -((tmp_base - c2_base[i])%n))%n; tmp_size = min(c1_base[i]+(c1_size[i]-1), c2_base[i]+(c2_size[i]-1)*n); if (tmp_size < 0) { /* no overlap */ r_size[i] = 0; r_stride[i] = 1; } else { r_size[i] = tmp_size / n + 1; r_stride[i] = n; } } else if (c2_stride[i] == 1) { /* Similar to the previous case */ n = c1_stride[i]; tmp_base = max(c1_base[i], c2_base[i]); r_base[i] = tmp_base + (n -((tmp_base - c1_base[i])%n))%n; tmp_size = min(c1_base[i]+(c1_size[i]-1)*n, c2_base[i]+(c2_size[i]-1)); if (tmp_size < 0) { /* no overlap */ r_size[i] = 0; r_stride[i] = 1; } else { r_size[i] = tmp_size / n + 1; r_stride[i] = n; } } else if (c1_stride[i] == c2_stride[i]) { /* Can do this one later */ } else { /* I give up */ } } #ifdef DEBUG /* Debugging output */ printf ("In determine overlap rank= %d\n", rank); int k; for(k=0;kbase(), compute_chunk->size(), compute_chunk->stride(), overlap_base, overlap_size, overlap_stride); } int* Chunk::base(){return base_;} int* Chunk::size(){return size_;} int* Chunk::stride(){return stride_;} int Chunk::element_size() { return element_size_; } /* This function needs to be verified when the stride is not 1 */ void Chunk::base_offset(int *base, void **ptr) { int base_offset = 0; int offset=1; for(int i=rank_ - 1; i>= 0; i--) { base_offset += ((base[i]-base_[i]) / stride_[i])*offset; offset *= size_[i]; } base_offset *= element_size_; *ptr = (char *)data_ptr_ + base_offset; } void Chunk::convert_from_number_to_index(int num, int *result) { int i,j, product=1; for(i=0;iconvert_from_number_to_index(id); rank_ = rank; size_ = (int *) malloc(sizeof(int)*rank); base_ = (int *) malloc(sizeof(int)*rank); stride_ = (int *) malloc(sizeof(int)*rank); /* Verify if it is possible to distribute the array (subchunk) */ if (!(layout->valid_index(chunk_index))) { printf("Invalid chunk index %d in compute_base_size_stride\n", id); exit(1); } if (!(layout->valid_distribution(rank, dist))) { printf("Unable to distribute array in compute_base_size_stride\n"); exit(2); } for(idx=0; idx < rank; idx++) { switch(dist[idx]) { case NONE: base_[idx] = old_base[idx]; size_[idx] = old_size[idx]; stride_[idx] = old_stride[idx]*1; break; case CYCLIC: base_[idx] = old_base[idx] + chunk_index[layout_idx]*old_stride[idx]; size_[idx] = (old_size[idx] - chunk_index[layout_idx] + layout->size(layout_idx)-1)/ layout->size(layout_idx); stride_[idx] = layout->size(layout_idx) * old_stride[idx]; layout_idx++; break; case BLOCK: switch(block_dist) { case HPF: default_size = (old_size[idx] + layout->size(layout_idx)-1) /layout->size(layout_idx); base_[idx] = old_base[idx] + default_size * chunk_index[layout_idx] *old_stride[idx]; size_[idx] = default_size; stride_[idx] = old_stride[idx]*1; /* The last chunk may be smaller */ if (chunk_index[layout_idx] ==(layout->size(layout_idx)-1)) { size_[idx] = old_size[idx] - (default_size * chunk_index[layout_idx]); } break; case NAS: default_size = old_size[idx] / layout->size(layout_idx); rem = old_size[idx] % layout->size(layout_idx); if (chunk_index[layout_idx] < rem) { base_[idx] = old_base[idx] + (chunk_index[layout_idx] + chunk_index[layout_idx]*default_size) *old_stride[idx]; size_[idx] = default_size + 1; } else { base_[idx] = old_base[idx] + (rem + chunk_index[layout_idx]*default_size) *old_stride[idx]; size_[idx] = default_size; } stride_[idx] = old_stride[idx] * 1; break; default: printf("Unsupported Block Distribution specified\n"); exit(3); break; } layout_idx++; break; default: printf("Unsupported Distribution specified\n"); exit(3); break; } } free(chunk_index); return; } Array* Chunk::array(){return array_;} Boolean Chunk::am_subchunk(){return am_subchunk_;} void Chunk::copy_base_size_stride(int *base, int *size, int *stride) { for(int i=0; i< rank_; i++){ base[i] = base_[i]; size[i] = size_[i]; stride[i] = stride_[i]; } } /* This assumes that all the strides are 1 - i.e no cyclic */ void Chunk::make_datatype(int *overlap_base, int *overlap_size, int *overlap_stride, void **ptr, MPI_Datatype *return_data_type) { MPI_Datatype *tmp_types = (MPI_Datatype *) malloc(sizeof(MPI_Datatype) * rank_); int i,j , offset = 1; int base_offset = 0; int *size, *base; Boolean allocate; // If there is a ghost region int *array_size = array_->size(); int bound; if (stencil_width_ > 0) { size = (int *)malloc(sizeof(int) * rank_); base = (int *)malloc(sizeof(int) * rank_); for (i=0; i 0; i--) { offset=1; for(j=i;j = 0; i--) { base_offset += (overlap_base[i] - base[i])*offset; offset *= size[i]; } *ptr = data_ptr_ + base_offset*element_size_; free (tmp_types); if (allocate) { free(size); free(base); } } /* Old data buffer should be freed by someother function */ void Chunk::set_data_ptr(char *data_ptr){ data_ptr_ = data_ptr; } void Chunk::set_stencil_width(int stencil_width){ stencil_width_ = stencil_width; } Chunk::Chunk(Array *array, int *base, int *size) { array_ = array; rank_ = array->rank(); element_size_ = array->element_size(); chunk_id_ = 0; am_subchunk_ = NO; base_ = copy_int_list(rank_, base); size_ = copy_int_list(rank_, size); stride_ = (int *)malloc(sizeof(int) * rank_); for (int i=0; i