#include "definitions.h"
#include "MPIFS.h"
#include "Array.h"

#include "CactusExternal/FlexIO/src/Arch.h"

extern "C" {
  int IOsizeOf(int);
  int IOreadAttributeInfo(IOFile, char *,int *, int *);
  int IOreadAttribute(IOFile,int,void*);
}

extern int global_system_type_;
extern MPIFS* MPIFS_global_obj;
extern int SUBCHUNK_SIZE;

/***************************************************************************
 * Class: Array
 * Description: This is a user-visible class. This is used to describe the
 *              global array. It also stores pointers to local chunks of 
 *              data.
 *
 * Instance-variables:
 *        name_  - name of the array
 *        rank_  - rank of the array (inherited variable)
 *        size_  - size of the array (elements) in the various dimensions
 *        element_size_ - size of each array element (in bytes)
 *        compute_node_layout_ - layout of the compute nodes 
 *        io_node_layout_ - layout of the io nodes
 *        subchunk_layout_ - layout of the subchunks
 *        natural_chunked - whether the array is natural chunked
 *        compute_node_alloc_policy - chunk dist policy on compute nodes
 *        io_node_alloc_policy - chunk dist policy on the io nodes
 ****************************************************************************
 */

Array::Array() : Template()
{
  subchunk_layout_ = NULL;
  element_size_ = 0;
  natural_chunked_ = NO;
  sub_chunked_ = NO;
  overlap_ = NO;
  io_strategy_ = SIMPLE_IO;
}  

/* This function is used on the compute nodes to create the array  *
 * object. In this case there is no user-specified subchunking and *
 * the chunk distribution on the compute nodes is 1 chunk per      *
 * compute node and round-robin on the io nodes.                   */
Array::Array(char *name, int rank, int *sizearray, int elt_size, 
       ArrayLayout *mem_layout, Distribution *mem_dist, 
       ArrayLayout *io_layout, Distribution *io_dist):Template(rank, sizearray)
{
  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist, 
	  io_layout, io_dist, NULL, NULL, REGULAR, ROUND_ROBIN, HPF);
  /* call function to allocate chunk_list */
  if (sizearray) allocate_chunks(COMPUTE_NODE);
  overlap_ = NO;
}

/* This function is used on the compute nodes to create the array  *
 * object. In this case there is no user-specified subchunking and *
 * the chunk distribution on the compute nodes is 1 chunk per      *
 * compute node and round-robin on the io nodes.  Also in this case*
 * the user specifies the data ptr to be used.                     */
Array::Array(char *name, int rank, int *sizearray, int elt_size, 
	     ArrayLayout *mem_layout, Distribution *mem_dist, 
	     ArrayLayout *io_layout, Distribution *io_dist,
	     char *data_ptr) : Template(rank, sizearray)
{
  char *ptr = data_ptr;
  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist, 
	  io_layout, io_dist, NULL, NULL, REGULAR, ROUND_ROBIN, HPF);
  /* call function to allocate chunk_list */
  if (sizearray) allocate_chunks(COMPUTE_NODE, 1 , &ptr, 0);
  overlap_ = NO;
}

/* This function is used on the compute nodes to create the array  *
 * object. In this case there is no user-specified subchunking and *
 * the chunk distribution on the compute nodes is 1 chunk per      *
 * compute node and round-robin on the io nodes.  Also in this case*
 * the user specifies the data ptr to be used and stencil width.   */
Array::Array(char *name, int rank, int *sizearray, int elt_size,
             ArrayLayout *mem_layout, Distribution *mem_dist,
             ArrayLayout *io_layout, Distribution *io_dist,
             char *data_ptr, int stencil_width) : Template(rank, sizearray)
{
  char *ptr = data_ptr;
  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist,
          io_layout, io_dist, NULL, NULL, REGULAR, ROUND_ROBIN, HPF);
  /* call function to allocate chunk_list */
  if (sizearray) allocate_chunks(COMPUTE_NODE, 1 , &ptr, stencil_width);
  if (stencil_width > 0) overlap_ = YES;
  else overlap_ = NO;
}

/* This function is used on the compute nodes to create the array  *
 * object. In this case there is user-specified subchunking and    *
 * the chunk distribution on the compute nodes is 1 chunk per      *
 * compute node and round-robin on the io nodes.                   */
Array::Array(char *name, int rank, int *sizearray, int elt_size, 
	ArrayLayout *mem_layout, Distribution *mem_dist, 
	ArrayLayout *io_layout, Distribution *io_dist,
	ArrayLayout *sub_layout, Distribution* sub_dist)
        : Template(rank, sizearray)
{
  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist, 
	  io_layout, io_dist, sub_layout, sub_dist, 
	  REGULAR, ROUND_ROBIN, HPF);
  /* call function to allocate chunk_list */
  if (sizearray) allocate_chunks(COMPUTE_NODE);
  overlap_ = NO;
}

/* This function is used on the compute nodes to create the array  *
 * object. In this case there is user-specified subchunking and    *
 * the chunk distribution on the compute nodes is 1 chunk per      *
 * compute node and round-robin on the io nodes. This function is  *
 * used to when the user provides the data_ptr.                    */
Array::Array(char *name, int rank, int *sizearray, int elt_size, 
	     ArrayLayout *mem_layout, Distribution *mem_dist, 
	     ArrayLayout *io_layout, Distribution *io_dist,
	     ArrayLayout *sub_layout, Distribution* sub_dist,
	     char *data_ptr) : Template(rank, sizearray)
{
  char *ptr = data_ptr;
  do_init(name, rank, sizearray, elt_size,  mem_layout, mem_dist, 
	  io_layout, io_dist, sub_layout, sub_dist, 
	  REGULAR, ROUND_ROBIN, HPF);
  /* call function to allocate chunk_list */
  if (sizearray) allocate_chunks(COMPUTE_NODE, 1, &ptr, 0);
  overlap_ = NO;
}

/* Initializes the state of the array object. the chunks are allocated *
 * via another function                                                */
void Array::do_init(char *name,  int rank,  int *sizearray,  int elt_size, 
	    ArrayLayout *mem_layout,       Distribution *mem_dist,
	    ArrayLayout *io_layout,        Distribution *io_dist,
	    ArrayLayout *subchunk_layout,  Distribution *subchunk_dist,
            ChunkAllocPolicy comp_node_policy, ChunkAllocPolicy io_node_policy,
	    Block_Distribution block_dist)
{
  io_strategy_ = SIMPLE_IO;

  name_ = (char *) malloc(sizeof(char)*(strlen(name)+5));
  strcpy(name_, name);
  ieee_size_ = elt_size;
  element_size_ = IOsizeOf(ieee_size_);
 
  compute_node_layout_ = new RegularDistribution(rank, mem_layout, mem_dist,
				         comp_node_policy, block_dist);
  if (io_layout) 
    io_node_layout_ = new RegularDistribution(rank, io_layout, io_dist,
                                              io_node_policy, block_dist);
  else io_node_layout_ = NULL;
  if (subchunk_layout) 
    subchunk_layout_ = new RegularDistribution(rank, subchunk_layout,
                                              subchunk_dist, ROUND_ROBIN,
                                              block_dist);
  else subchunk_layout_ = NULL;

  /* Check if there is any sub-chunking */
  if (subchunk_layout_) sub_chunked_ = YES;
  else sub_chunked_ = NO;

  /* Check if there is any natural chuunking */
  if (compute_node_layout_->equal(io_node_layout_)) natural_chunked_ = YES;
  else natural_chunked_ = NO;
}
	 
/* This function is used to initialize the array objects on the io *
 * node side. 							   */
Array::Array(int **schema_buf)
{
   int* ptr = *schema_buf;

   io_strategy_ 	= *ptr++; 
   op_type_ 		= *ptr++;
   int len 		= *ptr++;
   name_ 		= (char *) malloc(len+1);
   for (int i=0; i< len; i++) name_[i] = (char) *ptr++;
   name_[len] = '\0';
   rank_	    	= *ptr++;

   if (*ptr++ > 0) {
     size_ = (int *) malloc(sizeof(int) * rank_);
     for(int i=0; i < rank_; i++) size_[i] = *ptr++;
   } else size_ = NULL;

   element_size_    	= *ptr++;
   ieee_size_    	= *ptr++;
   natural_chunked_ 	= (Boolean) *ptr++;
   sub_chunked_ 	= (Boolean) *ptr++;
   overlap_		= (Boolean) *ptr++;

   compute_node_layout_ = unpack_layout(&ptr);
   io_node_layout_ = unpack_layout(&ptr);

   if (sub_chunked_) subchunk_layout_ = unpack_layout(&ptr);
   else subchunk_layout_	= NULL;

   *schema_buf = ptr;
}

ArrayDistribution *Array::unpack_layout(int **schema_buf)
{
  int *ptr = *schema_buf;
  int type = *ptr++;
  ArrayDistribution *tmp;

  if (type == UNSET) tmp = NULL;
  else if (type == Regular) tmp = new RegularDistribution(&ptr);
  else if (type == Irregular) {printf("Irregular is not supported\n"); exit(0);}
  else tmp = NULL;

  *schema_buf = ptr;
  return tmp;
}

/* Allocate chunks - Currently only used on the compute node side */
void Array::allocate_chunks(int node_type)
{
  int my_rank;
  Chunk *new_chunk;

  if (node_type == COMPUTE_NODE) {
     /* First find out what kind of system we have (MPI or sequential) */
     if (global_system_type_ ==  MPI_SYSTEM) {
       /* Allocate a single chunk with index=compute_node_rank */
       my_rank = MPIFS_global_obj->my_rank(COMPUTE_NODE);
       new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, ALLOC);
       compute_node_layout_->add_last(new_chunk);
     } else if (global_system_type_ == UNIX_SYSTEM) {
       /* There is only one kind of Allocation policy */
       int num = compute_node_layout_->total_elements();
       for (my_rank=0; my_rank<num; my_rank++) {
	 new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, ALLOC);
         compute_node_layout_->add_last(new_chunk);
       }
     } else printf("Unsupported filesystem\n");
  } else if (node_type == IO_NODE) {
     printf("Will have to do this later\n");
  } else {
     printf("Error: Don't know the node type\n");
  }
}

/* Allocate chunks with user-specified data pointer. This function 
 * currently supports only the REGULAR distribution of chunks in
 * the MPI-based file system  and 
 * should be called only on the compute node side 
 */
void Array::allocate_chunks(int node_type, int num_ptrs,
                             char **data_ptr, int stencil_width)
{
  int my_rank;
  Chunk *new_chunk;

  if (node_type == COMPUTE_NODE) {
     /* First find out what kind of system we have (MPI or sequential) */
     if (global_system_type_ ==  MPI_SYSTEM) {
       /* Allocate a single chunk with index=compute_node_rank */
       my_rank = MPIFS_global_obj->my_rank(COMPUTE_NODE);
       new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, NO_ALLOC);
       new_chunk->set_data_ptr(data_ptr[0]);
       new_chunk->set_stencil_width(stencil_width);
       compute_node_layout_->add_last(new_chunk);
     } else if (global_system_type_ == UNIX_SYSTEM) {
       /* There is only one kind of Allocation policy */
       int num = compute_node_layout_->total_elements();
       for (my_rank=0; my_rank<num; my_rank++) {
	 new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, NO_ALLOC);
         new_chunk->set_data_ptr(data_ptr[my_rank]);
         new_chunk->set_stencil_width(stencil_width);
         compute_node_layout_->add_last(new_chunk);
       }
     } else printf("Unsupported filesystem\n");
  } else if (node_type == IO_NODE) {
     printf("Will have to do this later\n");
  } else {
     printf("Error: Don't know the node type\n");
  }
}

Array::~Array()
{
    if (name_) free(name_);
    name_ = NULL;
    if (compute_node_layout_) delete(compute_node_layout_);
    if (io_node_layout_)      delete(io_node_layout_);
    if (subchunk_layout_)     delete(subchunk_layout_);
    compute_node_layout_ = io_node_layout_ = subchunk_layout_ = NULL;
}

/* We are not packing the chunk information here */
void Array::pack(int** schema_buf, int *schema_size)
{
  int *ptr, *head;
  int i, len;
 
  ptr = (int *) malloc(sizeof(int)*100);
  head = ptr;
  
  *ptr++ = io_strategy_;
  *ptr++ = op_type_;
  len = strlen(name_);
  *ptr++ = len;
  for(i=0; i<len;i++) *ptr++ = (int) name_[i]; 
  *ptr++ = rank_;
  if (size_) { *ptr++ = 1; for(int i=0; i < rank_;i++) *ptr++ = size_[i]; }
  else *ptr++ = 0;
  *ptr++ = element_size_;
  *ptr++ = ieee_size_;
  *ptr++ = (int)natural_chunked_;
  *ptr++ = (int)sub_chunked_;
  *ptr++ = (int)overlap_;
  
  if (compute_node_layout_) compute_node_layout_->pack(&ptr);
  else *ptr++ = (int)UNSET;
  if (io_node_layout_) io_node_layout_->pack(&ptr);
  else *ptr++ = (int)UNSET;
  if (sub_chunked_) subchunk_layout_->pack(&ptr);

  *schema_size = (int)(ptr - head);
  *schema_buf = head;
}     

ArrayDistribution* Array::layout(int layout_type)
{
  switch(layout_type) {
    case COMPUTE_NODE:
      return compute_node_layout_;
    case IO_NODE:
      return io_node_layout_;
    case SUB_CHUNK:
      return subchunk_layout_;
    default:
      printf("Invalid type\n");
      return NULL;
  }
}

/* The following two functions are used for regular layouts (HPF-style) only */
/* Given a chunk index and node type, this function returns the  *
 * the relative node number on which the chunk resides           */
int Array::which_node(int chunk_id, int node_type)
{
  if (node_type  == COMPUTE_NODE)
    if (compute_node_layout_->alloc_policy() == REGULAR) return chunk_id;
    else {
      printf("Unsupported chunk alloc  type\n");
      exit(1);
    }
  else if (node_type == IO_NODE) {
    printf("Currently this is unsupported\n");
    exit(1);
  } else {
    printf("Unsupported node type\n");
    exit(1);
  }
  return -1;
}

int Array::which_node(int chunk_id, int node_type, int num_io_nodes)
{
  if (node_type == IO_NODE){
    switch(io_node_layout_->alloc_policy()){
    case ROUND_ROBIN:
      return(chunk_id % num_io_nodes);

    default:
      printf("Error in which_node(int,int,int).. Invalid distribution type\n");
      exit(1);
    }
  } else if (node_type == COMPUTE_NODE) {
    switch(compute_node_layout_->alloc_policy()){
    case REGULAR:
      return chunk_id;

    default:
      printf("Error in which_node(int,int,int)... Invalid distribution type\n");
      exit(1);
    }
  } else {
    printf("Error in which_node(int,int,int)... Invalid node type\n");
    exit(1);
  }
  return -1;
}

Chunk* Array::get_next_chunk()
{
  return compute_node_layout_->get_next_chunk();
}

/* The following seven functions are called by compute nodes only */
/* Given a chunk index, find the chunk */
Chunk* Array::find_chunk(int id)
{
  List *list = compute_node_layout_->chunk_list();
  Cell *list_ptr = list != NULL ? list->head_: NULL;
  Chunk *chunk_ptr;

  while (list_ptr) {
    chunk_ptr =   (Chunk *)list_ptr->item();
    if (chunk_ptr->chunk_id() == id) return chunk_ptr;
    list_ptr = list_ptr->next();
  }
  return NULL;
}

int Array::element_size(){return element_size_;}
int Array::ieee_size(){return ieee_size_;}

Boolean Array::nat_chunked(){return natural_chunked_;}

Boolean Array::sub_chunked(){return sub_chunked_;}

/* This function needs to be checked and refined */
void Array::make_sub_chunks(Chunk *chunk)
{
  Distribution *subchunk_dist;
  int  *subchunk_layout_sizes;
  int i, tmp_size, dim, val_dim;
  int *chunk_size = chunk->size();

  if (sub_chunked_) {
    printf("Error: Array already subchunked\n");
    exit(1);
  } else {
    subchunk_dist = (Distribution *) malloc(sizeof(Distribution)*rank_);
    subchunk_layout_sizes = (int*) malloc(sizeof(int)*rank_);
    tmp_size = chunk->total_size_in_bytes();
    if (tmp_size < SUBCHUNK_SIZE){
      for(i=0;i<rank_;i++){
	subchunk_dist[i] = BLOCK;
	subchunk_layout_sizes[i] = 1;
      }
    } else {
      tmp_size = element_size_;
      i = rank_;
      while(tmp_size < SUBCHUNK_SIZE){
	i--;
	tmp_size *= chunk_size[i];
      }
      dim =i;
      tmp_size /=chunk_size[i];
      val_dim = SUBCHUNK_SIZE / tmp_size;
      for(i=0;i<dim;i++){
	subchunk_dist[i] = BLOCK;
	subchunk_layout_sizes[i] = chunk_size[i];
      }
      subchunk_dist[dim] = BLOCK;
      subchunk_layout_sizes[dim] = (chunk_size[i] + val_dim -1)/val_dim;
      for(i=dim+1;i<rank_; i++){
	subchunk_dist[i] = BLOCK;
	subchunk_layout_sizes[i] = 1;
      }
    }
    ArrayLayout *tmp_layout = new ArrayLayout(rank_, subchunk_layout_sizes);
    subchunk_layout_ = new RegularDistribution(rank_, tmp_layout,
                                        subchunk_dist, ROUND_ROBIN, HPF);
    sub_chunked_ = YES;
    free(subchunk_layout_sizes);
    free(subchunk_dist);
  }
}

int Array::array_info()
{
  List *list = compute_node_layout_->chunk_list();
  Cell *list_ptr = list->head_;
  Chunk *chunk_ptr;
  int ret =0;

  while(list_ptr) {
    chunk_ptr = (Chunk *)list_ptr->item();
    ret += chunk_ptr->total_size_in_bytes();
    list_ptr = list_ptr->next();
  }
  return ret;
}

/* Called only on the I/O node side */
int Array::get_next_index(Chunk *&chunk, int old_val, int io_node_num, 
			  int num_io_nodes, int max)
{
  int ret = io_node_layout_->get_next_index(chunk, old_val, io_node_num,
					    num_io_nodes, max);
  if (io_node_layout_->distribution_type() == Regular) 
    if (ret < max) chunk->init(this, ret, IO_NODE, NO_ALLOC);
  return ret;
}

/* This function should be called only on the compute node side and *
 * make sense only for the regular distribution of chunks,          */
void Array::set_data_ptr(char *data_ptr)
{
  List *list = compute_node_layout_->chunk_list();
  Chunk *chunk_ptr;

  if (list && list->head_){
    chunk_ptr = (Chunk *) list->head_->item();
    chunk_ptr->set_data_ptr(data_ptr);
  } else {
    printf("Error: No chunks present - cannot set data ptr\n");
  }
}


/* This function should be called only on the compute node side and *
 * make sense only for the regular distribution of chunks,          */
char* Array::get_data_ptr()
{
  List *list = compute_node_layout_->chunk_list();
  Chunk *chunk_ptr;

  if (list && list->head_){
    chunk_ptr = (Chunk *) list->head_->item();
    return ((char *)chunk_ptr->data_ptr());
  } else {
    printf("Error: No chunks present - cannot set data ptr\n");
    return NULL;
  }
}

Boolean Array::overlaped()
{
  return overlap_;
}

void Array::read_schema_file(IOFile file_ptr)
{
  int *base = (int *)malloc(sizeof(int) * rank_);
  int *size = (int *)malloc(sizeof(int) * rank_);
  int index, length, datatype;
  Chunk *new_chunk;

  index = IOreadAttributeInfo(file_ptr, "chunk_origin", &datatype, &length);
  if (index >=0 ) { // the attribute exists
    IOreadAttribute(file_ptr, index, base);
    index = IOreadAttributeInfo(file_ptr, "chunk_size",&datatype,&length);
    if (index < 0) { printf("Error in reading attributes\n"); exit(0); }
    IOreadAttribute(file_ptr, index, size);
    new_chunk = new Chunk(this, base, size);
  } else {
    for (int j=0; j<rank_; j++) base[j] = 0;
    new_chunk = new Chunk(this, base, size_);
  }
  io_node_layout_ = new IrregularDistribution(1, &new_chunk);
  free(base);
  free(size);
}

/* The collective io operation to write out the arrays. */
void Array::timestep()
{
   int *schema, schema_size;
   int node_type = MPIFS_global_obj->node_type();
 
   op_type_ = TIMESTEP;
   if (node_type == COMPUTE_NODE){
     MPIFS_global_obj->send_array_schema(this);
     MPIFS_global_obj->compute_node_io_loop(this);
   }
   else if (node_type == PART_TIME_COMPUTE)
     MPIFS_global_obj->compute_node_io_loop(this);
   else {
     pack(&schema, &schema_size);
     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
   }
}
 
/* The collective io operation to write out the arrays. */
void Array::checkpoint()
{
   int *schema, schema_size;
   int node_type = MPIFS_global_obj->node_type();
 
   op_type_ = CHECKPOINT;
   if (node_type == COMPUTE_NODE){
     MPIFS_global_obj->send_array_schema(this);
     MPIFS_global_obj->compute_node_io_loop(this);
   }
   else if (node_type == PART_TIME_COMPUTE)
     MPIFS_global_obj->compute_node_io_loop(this);
   else {
     pack(&schema, &schema_size);
     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size, this);
   }
}
 
/* The collective io operation to read in the arrays from a *
 * checkpoint file. Currently (for testing purposes) this   *
 * does not happen.                                         */
void Array::restart()
{
  int *schema, schema_size;
  int node_type = MPIFS_global_obj->node_type();
 
  op_type_ = RESTART;
  if (node_type == COMPUTE_NODE){
     MPIFS_global_obj->send_array_schema(this);
     MPIFS_global_obj->compute_node_io_loop(this);
  }
  else if (node_type == PART_TIME_COMPUTE)
     MPIFS_global_obj->compute_node_io_loop(this);
  else {
     pack(&schema, &schema_size);
     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
  }
}
 
void Array::read_timestep()
{
   int *schema, schema_size;
   int node_type = MPIFS_global_obj->node_type();
 
   op_type_ = READ_TIMESTEP;
   if (node_type == COMPUTE_NODE){
     MPIFS_global_obj->send_array_schema(this);
     MPIFS_global_obj->compute_node_io_loop(this);
   }
   else if (node_type == PART_TIME_COMPUTE)
     MPIFS_global_obj->compute_node_io_loop(this);
   else {
     pack(&schema, &schema_size);
     MPIFS_global_obj->part_time_io_node_loop(schema, schema_size , this);
   }
}

int Array::op_type() { return op_type_; }
int Array::io_strategy() { return io_strategy_; }

void Array::init(int rank, int ieee_size, int *size, int node_type)
{
  rank_ = rank;
  ieee_size_ = ieee_size;
  element_size_ = IOsizeOf(ieee_size_);
  size_ = size;
  if (node_type == COMPUTE_NODE) {
    int my_rank = MPIFS_global_obj->my_rank(COMPUTE_NODE);
    Chunk *new_chunk = new Chunk(this, my_rank, COMPUTE_NODE, ALLOC);
    compute_node_layout_->add_last(new_chunk);
  }
}