aboutsummaryrefslogtreecommitdiff
path: root/src/Panda/Chunk.C
diff options
context:
space:
mode:
Diffstat (limited to 'src/Panda/Chunk.C')
-rw-r--r--src/Panda/Chunk.C692
1 files changed, 692 insertions, 0 deletions
diff --git a/src/Panda/Chunk.C b/src/Panda/Chunk.C
new file mode 100644
index 0000000..d6fd028
--- /dev/null
+++ b/src/Panda/Chunk.C
@@ -0,0 +1,692 @@
+#include "definitions.h"
+#include "Chunk.h"
+#include "Array.h"
+#include <malloc.h>
+
+
+Chunk::Chunk()
+{
+ base_ = stride_ = size_ = NULL;
+ array_ = NULL;
+ chunk_ = NULL;
+ data_ptr_ = NULL;
+ stencil_width_ = 0;
+}
+
+
+/* This constructor is used to create a chunk given array information */
+Chunk::Chunk(Array *array, int chunk_id, int node_type, DataStatus data_status)
+{
+ do_init(array, chunk_id, node_type, data_status);
+}
+
+/* Re-initialize an already created chunk object */
+void Chunk::init(Array *array, int chunk_id, int node_type, DataStatus data_status)
+{
+ clear();
+ do_init(array, chunk_id, node_type, data_status);
+}
+
+void Chunk::do_init(Array *array, int chunk_id, int node_type,
+ DataStatus data_status)
+{
+ int *stride, *base;
+
+ /* Initialize the instance variables */
+ array_ = array;
+ chunk_ = NULL;
+ chunk_id_ = chunk_id;
+ am_subchunk_ = NO;
+ element_size_ = array->element_size();
+
+ stride = (int *) malloc(sizeof(int)*array->rank());
+ base = (int *) malloc(sizeof(int)*array->rank());
+ for(int i=0; i < array->rank(); i++){ stride[i] = 1; base[i] = 0; }
+
+ RegularDistribution *layout=(RegularDistribution *)(array->layout(node_type));
+ calculate_base_size_stride(array->rank(), base, array->size(), stride,
+ layout->layout(), layout->distribution(),
+ layout->block_dist(), chunk_id);
+
+ /* check if we have to allocate the data space */
+ switch(data_status) {
+ case ALLOC:
+ data_ptr_ = (char *)malloc(total_size_in_bytes());
+ data_status_ = data_status;
+ stencil_width_ = 0;
+ break;
+
+ case NO_ALLOC:
+ data_ptr_ = NULL;
+ data_status_ = data_status;
+ stencil_width_ = 0;
+ break;
+
+ default:
+ printf("Unsupported \n");
+ break;
+ }
+}
+
+/* This creates a subchunk , given the chunk and subchunk_id */
+Chunk::Chunk(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status)
+{
+ do_init(mega_chunk, sub_chunkid, data_status);
+}
+
+/* Re-initialize an already created subchunk obj */
+void Chunk::init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status)
+{
+ clear();
+ do_init(mega_chunk, sub_chunkid, data_status);
+}
+
+
+void Chunk::do_init(Chunk* mega_chunk, int sub_chunkid, DataStatus data_status)
+{
+ chunk_id_ = sub_chunkid;
+ element_size_ = mega_chunk->element_size();
+ array_ = mega_chunk->array();
+ chunk_ = mega_chunk;
+ am_subchunk_ = YES;
+
+ RegularDistribution *layout=(RegularDistribution *)(array_->layout(SUB_CHUNK));
+ calculate_base_size_stride(mega_chunk->rank(), mega_chunk->base(),
+ mega_chunk->size(), mega_chunk->stride(),
+ layout->layout(), layout->distribution(),
+ layout->block_dist(), sub_chunkid);
+ /* check if we have to allocate the data space */
+ switch(data_status) {
+ case ALLOC:
+ data_ptr_ = (char *)malloc(total_size_in_bytes());
+ data_status_ = data_status;
+ stencil_width_ = 0;
+ break;
+
+ case NO_ALLOC:
+ data_ptr_ = NULL;
+ data_status_ = data_status;
+ stencil_width_ = 0;
+ break;
+
+ default:
+ data_ptr_ = NULL;
+ printf("Unsupported \n");
+ break;
+ }
+}
+
+Chunk::~Chunk()
+{
+ if (base_) delete base_;
+ if (stride_) delete stride_;
+
+ /* Delete the data buffer only if we allocated it in the first place */
+ if ((data_status_ == ALLOC) && data_ptr_) delete data_ptr_;
+}
+
+
+void Chunk::clear()
+{
+ if (base_) free (base_);
+ if (stride_) free (stride_);
+ if (data_ptr_) free( data_ptr_);
+ if (size_) free(size_);
+ base_ = size_ = stride_ = NULL;
+ data_ptr_ = NULL;
+}
+
+/* This function takes as input the information about the global
+ * Array and returns the overlapping compute node chunk indices
+ * via a singly linked list.
+ *
+ * Currently this function can only handle BLOCK,* arrays (Needs
+ * to be extended for the CYCLIC case)
+ */
+void Chunk::chunk_overlaps(Array *global_array, int* num_overlaps,
+ int *ret_list, int node_type)
+{
+ RegularDistribution *layout1 =
+ (RegularDistribution *)global_array->layout(node_type);
+ ArrayLayout *layout= layout1->layout();
+ int layout_rank = layout->rank();
+ int *overlap_base = (int *)malloc(sizeof(int)*layout_rank);
+ int *overlap_size = (int *)malloc(sizeof(int)*layout_rank);
+
+ /* Find out the list of possible overlaps */
+ compute_first_last_chunk(global_array->rank(), global_array->size(),
+ layout, layout1->distribution(), layout1->block_dist(),
+ overlap_base, overlap_size);
+#ifdef DEBUG
+ printf("In chunk_overlaps\n");
+ for(int i=0;i<layout_rank;i++)
+ printf("base[%d] = %d size[%d] = %d\n", i, overlap_base[i], i, overlap_size[i]);
+#endif
+ layout->indices_list(overlap_base, overlap_size, num_overlaps, ret_list);
+ free(overlap_base);
+ free(overlap_size);
+}
+
+
+/* This function isn't general enough. It implicitly assumes that the I/O
+ * chunks are distributed using only BLOCK.* distributions. Also the
+ * compute node chunks are assumed to be distributed using only
+ * BLOCK,* (can be extended to support CYLCIC later)
+ *
+ * Function assumes that the memory for the return paramters
+ * overlap_base and overlap_size have been allocated
+ */
+void Chunk::compute_first_last_chunk(int array_rank, int *array_size,
+ ArrayLayout *layout, Distribution *dist,
+ Block_Distribution block_dist,
+ int *overlap_base, int *overlap_size)
+{
+ /* Validation of input data */
+ if (!(layout->valid_distribution(array_rank, dist)))
+ {
+ printf("Invalid distribution in compute_first_last_chunk\n");
+ exit(1);
+ }
+
+ /* Verify to see if we are dealing with BLOCK,* case only */
+ for(int i=0;i<layout->rank();i++)
+ {
+ if (dist[i] == CYCLIC)
+ {
+ printf("Cyclic schema not yet supported\n");
+ exit(2);
+ }
+ }
+
+ for(i=0; i<array_rank;i++)
+ {
+ if (stride_[i] != 1)
+ {
+ printf("Cyclic schema not yet supported\n");
+ exit(2);
+ }
+ }
+
+
+ /* Now we can get down to business */
+ int *overlap_last = (int*)malloc(sizeof(int)*layout->rank());
+ int layout_idx=0, array_idx;
+ int def_chunk_size,rem,tmp,last;
+
+ for(array_idx=0;array_idx < array_rank; array_idx++)
+ {
+ switch(dist[array_idx])
+ {
+ case NONE:
+ break;
+
+ case CYCLIC:
+ printf("Cyclic schema not yet supported\n");
+ exit(3);
+ break;
+
+ /* Need to verify this stuff - especially the NAS stuff */
+ case BLOCK:
+ switch(block_dist)
+ {
+ case HPF:
+ def_chunk_size = (array_size[array_idx]+layout->size(layout_idx)-1)
+ / (layout->size(layout_idx));
+ overlap_base[layout_idx] = base_[array_idx]
+ / def_chunk_size;
+ overlap_last[layout_idx] = (base_[array_idx]+size_[array_idx] -1)
+ / def_chunk_size;
+ break;
+
+ case NAS:
+ def_chunk_size = array_size[array_idx]
+ / layout->size(layout_idx);
+ rem = array_size[array_idx]
+ % layout->size(layout_idx);
+ if (rem == 0)
+ {
+ /* perfect distribution */
+ overlap_base[layout_idx] = base_[array_idx]
+ / def_chunk_size;
+ overlap_last[layout_idx] = (base_[array_idx]
+ + size_[array_idx] -1)
+ / def_chunk_size;
+ }
+ else
+ {
+ /* first "rem" blocks have "def_chunk+1" elements */
+ tmp = (def_chunk_size+1)*rem;
+ if (base_[array_idx] < tmp)
+ {
+ overlap_base[layout_idx] = base_[array_idx]
+ / (def_chunk_size + 1);
+ }
+ else
+ {
+ overlap_base[layout_idx] = ((base_[array_idx] - tmp)
+ / def_chunk_size) + rem;
+ }
+
+ last = base_[array_idx] + size_[array_idx] -1;
+ if (last < tmp)
+ {
+ overlap_last[layout_idx] = last / (def_chunk_size+1);
+
+ }
+ else
+ {
+ overlap_last[layout_idx] = ((last - tmp)
+ / def_chunk_size) + rem;
+ }
+ }
+ break;
+
+ default:
+ printf("Unsupported block distribution\n");
+ exit(2);
+ break;
+ }
+ overlap_size[layout_idx] = overlap_last[layout_idx]
+ - overlap_base[layout_idx] + 1;
+ layout_idx++;
+ break;
+
+ default:
+ printf("Unsupported distribution\n");
+ exit(3);
+ break;
+ }
+
+ }
+
+ free(overlap_last);
+ return;
+}
+
+
+
+
+int Chunk::total_size_in_bytes()
+{
+ return (total_size_in_elements()*element_size_);
+}
+
+
+
+int Chunk::total_size_in_elements()
+{
+ return total_elements();
+}
+
+
+int Chunk::chunk_id(){return chunk_id_;}
+
+
+void * Chunk::data_ptr(){return data_ptr_;}
+
+
+
+/* This is not a method. It is an generalized inline function to
+ * calculate the overlap between two chunks. The input parameters
+ * are rank,base,stride,size of the two arrays and the pointers to
+ * the base,strides and sizes of the resultant chunk. The functions
+ * assumes that the rank of the input arrays are equal
+ *
+ * This function also assumes that the memory for the return values
+ * r_base, r_stride, rsize have already been allocated.
+ */
+inline void determine_overlap(int rank, int *c1_base, int* c1_size,
+ int* c1_stride,
+ int* c2_base, int* c2_size, int* c2_stride,
+ int* r_base, int* r_size, int* r_stride)
+{
+
+ int tmp_base,tmp_size,n;
+
+ for(int i=0; i< rank;i++)
+ {
+ /* Compute overlap in each dimension */
+ if ((c1_stride[i] == 1) && (c2_stride[i] == 1))
+ {
+ /* Simplest case
+ * r_base = max(c1_base, c2_base)
+ * r_size = max( min(c1_base+c1_size, c2_base+c2_size)-r_base, 0);
+ */
+ r_base[i] = max(c1_base[i], c2_base[i]);
+ r_size[i] = max((min(c1_base[i]+c1_size[i], c2_base[i]+c2_size[i])
+ - r_base[i]), 0);
+ r_stride[i] = 1;
+ }
+ else if (c1_stride[i] == 1)
+ {
+ /* Not so simple - this needs to be verified
+ * tmp_B = max(c1_base,c2_base)
+ * B = tmp_B + (N - ((tmp_B - c2_base)%N))%N
+ * U = min(c1_base+(c1_size-1), c2_base+(c2_size-1)*N) - B
+ * if (U < 0) the no overlap else r_size = U/N + 1
+ */
+ n = c2_stride[i];
+ tmp_base = max(c1_base[i], c2_base[i]);
+ r_base[i] = tmp_base + (n -((tmp_base - c2_base[i])%n))%n;
+ tmp_size = min(c1_base[i]+(c1_size[i]-1), c2_base[i]+(c2_size[i]-1)*n);
+ if (tmp_size < 0)
+ {
+ /* no overlap */
+ r_size[i] = 0;
+ r_stride[i] = 1;
+ }
+ else
+ {
+ r_size[i] = tmp_size / n + 1;
+ r_stride[i] = n;
+ }
+ }
+ else if (c2_stride[i] == 1)
+ {
+ /* Similar to the previous case */
+ n = c1_stride[i];
+ tmp_base = max(c1_base[i], c2_base[i]);
+ r_base[i] = tmp_base + (n -((tmp_base - c1_base[i])%n))%n;
+ tmp_size = min(c1_base[i]+(c1_size[i]-1)*n, c2_base[i]+(c2_size[i]-1));
+ if (tmp_size < 0)
+ {
+ /* no overlap */
+ r_size[i] = 0;
+ r_stride[i] = 1;
+ }
+ else
+ {
+ r_size[i] = tmp_size / n + 1;
+ r_stride[i] = n;
+ }
+ }
+ else if (c1_stride[i] = c2_stride[i])
+ {
+ /* Can do this one later */
+ }
+ else
+ {
+ /* I give up */
+ }
+ }
+#ifdef DEBUG
+ /* Debugging output */
+ printf ("In determine overlap rank= %d\n", rank);
+ int k;
+ for(k=0;k<rank;k++)
+ printf("%d %d %d %d %d %d %d %d %d\n", c1_base[k], c1_size[k], c1_stride[k],
+ c2_base[k], c2_size[k], c2_stride[k],
+ r_base[k], r_size[k], r_stride[k]);
+#endif
+ return;
+}
+
+
+void Chunk::compute_overlap(Chunk *compute_chunk, int *overlap_base,
+ int *overlap_size, int *overlap_stride)
+{
+ determine_overlap(rank_, base_, size_, stride_,
+ compute_chunk->base(),
+ compute_chunk->size(),
+ compute_chunk->stride(),
+ overlap_base,
+ overlap_size,
+ overlap_stride);
+}
+
+
+int* Chunk::base(){return base_;}
+int* Chunk::size(){return size_;}
+int* Chunk::stride(){return stride_;}
+
+int Chunk::element_size() { return element_size_; }
+/* This function needs to be verified when the stride is not 1 */
+void Chunk::base_offset(int *base, void **ptr)
+{
+ int base_offset = 0;
+ int offset=1;
+
+ for(int i=rank_ - 1; i>= 0; i--)
+ {
+ base_offset += ((base[i]-base_[i]) / stride_[i])*offset;
+ offset *= size_[i];
+ }
+ base_offset *= element_size_;
+ *ptr = (char *)data_ptr_ + base_offset;
+}
+
+void Chunk::convert_from_number_to_index(int num, int *result)
+{
+ int i,j, product=1;
+
+ for(i=0;i<rank_;i++)
+ {
+ product=1;
+ for(j=i+1; j< rank_;j++) product *= size_[j];
+ result[i] = num / product;
+ num -= num/product * product;
+ }
+}
+
+
+/* This method calculates the rank, base, stride of the chunk *
+ * (subchunk), given the dimensions of the array (chunk) and its *
+ * layout, distribution and the chunk (subchunk index) */
+void Chunk::calculate_base_size_stride(int rank, int* old_base,
+ int* old_size, int* old_stride,
+ ArrayLayout *layout, Distribution *dist,
+ Block_Distribution block_dist, int id)
+{
+ int *chunk_index=NULL;
+ int idx=0, layout_idx=0;
+ int default_size, rem;
+
+ chunk_index = layout->convert_from_number_to_index(id);
+ rank_ = rank;
+ size_ = (int *) malloc(sizeof(int)*rank);
+ base_ = (int *) malloc(sizeof(int)*rank);
+ stride_ = (int *) malloc(sizeof(int)*rank);
+
+
+ /* Verify if it is possible to distribute the array (subchunk) */
+ if (!(layout->valid_index(chunk_index)))
+ {
+ printf("Invalid chunk index %d in compute_base_size_stride\n", id);
+ exit(1);
+ }
+ if (!(layout->valid_distribution(rank, dist)))
+ {
+ printf("Unable to distribute array in compute_base_size_stride\n");
+ exit(2);
+ }
+
+ for(idx=0; idx < rank; idx++)
+ {
+ switch(dist[idx])
+ {
+ case NONE:
+ base_[idx] = old_base[idx];
+ size_[idx] = old_size[idx];
+ stride_[idx] = old_stride[idx]*1;
+ break;
+
+ case CYCLIC:
+ base_[idx] = old_base[idx] + chunk_index[layout_idx]*old_stride[idx];
+ size_[idx] = (old_size[idx] - chunk_index[layout_idx]
+ + layout->size(layout_idx)-1)/ layout->size(layout_idx);
+ stride_[idx] = layout->size(layout_idx) * old_stride[idx];
+ layout_idx++;
+ break;
+
+ case BLOCK:
+ switch(block_dist)
+ {
+ case HPF:
+ default_size = (old_size[idx] + layout->size(layout_idx)-1)
+ /layout->size(layout_idx);
+ base_[idx] = old_base[idx] + default_size *
+ chunk_index[layout_idx] *old_stride[idx];
+ size_[idx] = default_size;
+ stride_[idx] = old_stride[idx]*1;
+ /* The last chunk may be smaller */
+ if (chunk_index[layout_idx] ==(layout->size(layout_idx)-1))
+ {
+ size_[idx] = old_size[idx] -
+ (default_size * chunk_index[layout_idx]);
+ }
+ break;
+
+ case NAS:
+ default_size = old_size[idx] / layout->size(layout_idx);
+ rem = old_size[idx] % layout->size(layout_idx);
+ if (chunk_index[layout_idx] < rem)
+ {
+ base_[idx] = old_base[idx] + (chunk_index[layout_idx] +
+ chunk_index[layout_idx]*default_size)
+ *old_stride[idx];
+ size_[idx] = default_size + 1;
+ }
+ else
+ {
+ base_[idx] = old_base[idx] + (rem +
+ chunk_index[layout_idx]*default_size)
+ *old_stride[idx];
+ size_[idx] = default_size;
+ }
+ stride_[idx] = old_stride[idx] * 1;
+ break;
+
+
+ default:
+ printf("Unsupported Block Distribution specified\n");
+ exit(3);
+ break;
+ }
+ layout_idx++;
+ break;
+
+ default:
+ printf("Unsupported Distribution specified\n");
+ exit(3);
+ break;
+ }
+ }
+
+ free(chunk_index);
+ return;
+}
+
+Array* Chunk::array(){return array_;}
+
+Boolean Chunk::am_subchunk(){return am_subchunk_;}
+
+void Chunk::copy_base_size_stride(int *base, int *size, int *stride)
+{
+ for(int i=0; i< rank_; i++){
+ base[i] = base_[i];
+ size[i] = size_[i];
+ stride[i] = stride_[i];
+ }
+ }
+
+
+
+/* This assumes that all the strides are 1 - i.e no cyclic */
+void Chunk::make_datatype(int *overlap_base, int *overlap_size,
+ int *overlap_stride, void **ptr,
+ MPI_Datatype *return_data_type)
+{
+
+ MPI_Datatype *tmp_types = (MPI_Datatype *) malloc(sizeof(MPI_Datatype) * rank_);
+ int i,j , offset = 1;
+ int base_offset = 0;
+ int *size, *base;
+ Boolean allocate;
+
+ // If there is a ghost region
+ int *array_size = array_->size();
+ int bound;
+ if (stencil_width_ > 0) {
+ size = (int *)malloc(sizeof(int) * rank_);
+ base = (int *)malloc(sizeof(int) * rank_);
+ for (i=0; i<rank_; i++) {
+ bound = base_[i] + size_[i];
+ base[i] = max(base_[i] - stencil_width_, 0);
+ bound = min(bound + stencil_width_, array_size[i]);
+ size[i] = bound - base[i];
+ }
+ allocate = YES;
+ //printf("##### stencil %d base %d %d %d size %d %d %d\n", stencil_width_, base[0], base[1], base[2], size[0], size[1], size[2]);
+ } else {
+ size = size_;
+ base = base_;
+ allocate = NO;
+ }
+
+ MPI_Type_contiguous(element_size_, MPI_CHAR, &tmp_types[rank_-1]);
+ if (overlap_stride[rank_ -1] != 1)
+ {
+ printf("error - stride is %d", overlap_stride[rank_ -1]);
+ exit(10);
+ }
+ MPI_Type_vector(overlap_size[rank_-1], 1, 1, tmp_types[rank_-1], &tmp_types[rank_-2]);
+ for(i=rank_-1; i > 0; i--)
+ {
+ offset=1;
+ for(j=i;j <rank_; j++) offset *= size[j];
+ if (overlap_stride[i-1] != 1)
+ {
+ printf("error - stride is %d\n", overlap_stride[i-1]);
+ exit(10);
+ }
+ if (i != 1){
+
+ MPI_Type_hvector(overlap_size[i-1],1,offset*element_size_,
+ tmp_types[i-1],
+ &tmp_types[i-2]);
+ }
+ else
+ MPI_Type_hvector(overlap_size[i-1],1,offset*element_size_,
+ tmp_types[i-1],
+ return_data_type);
+ }
+ MPI_Type_commit(return_data_type);
+ offset=1;
+ for(i=rank_-1;i >= 0; i--)
+ {
+ base_offset += (overlap_base[i] - base[i])*offset;
+ offset *= size[i];
+ }
+
+ *ptr = data_ptr_ + base_offset*element_size_;
+ free (tmp_types);
+ if (allocate) {
+ free(size);
+ free(base);
+ }
+}
+
+
+/* Old data buffer should be freed by someother function */
+void Chunk::set_data_ptr(char *data_ptr){
+ data_ptr_ = data_ptr;
+}
+
+void Chunk::set_stencil_width(int stencil_width){
+ stencil_width_ = stencil_width;
+}
+
+Chunk::Chunk(Array *array, int *base, int *size)
+{
+ array_ = array;
+ rank_ = array->rank();
+ element_size_ = array->element_size();
+ chunk_id_ = 0;
+ am_subchunk_ = NO;
+
+ base_ = copy_int_list(rank_, base);
+ size_ = copy_int_list(rank_, size);
+ stride_ = (int *)malloc(sizeof(int) * rank_);
+ for (int i=0; i<rank_; i++) stride_[i] = 1;
+ data_status_ = NO_ALLOC; data_ptr_ = NULL;
+}