aboutsummaryrefslogtreecommitdiff
path: root/src/chunkSpeed.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/chunkSpeed.cc')
-rw-r--r--src/chunkSpeed.cc179
1 files changed, 179 insertions, 0 deletions
diff --git a/src/chunkSpeed.cc b/src/chunkSpeed.cc
new file mode 100644
index 0000000..757c7e1
--- /dev/null
+++ b/src/chunkSpeed.cc
@@ -0,0 +1,179 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "IEEEIO.hh"
+#include "vatoi.hh"
+#include <sys/time.h>
+
+double MPI_Wtime(){
+ timeval tv;
+ gettimeofday(&tv);
+ return tv.tv_sec + tv.tv_usec/1000000.0l;
+}
+int StupidProcLayoutSeive(int np,int decomp[3]){
+ // Brute-Force method
+ // stupidly attempts all decomposition combinations
+ // and filters out unusable combinations.
+ // trys to select minimum surface area processor arrangement
+ int minarea = np*np*np; // impossibly high value
+ for(int i=1;i<=np;i++){
+ for(int j=1;i*j<=np;j++){
+ for(int k=1;i*j*k<=np;k++){
+ if(i*j*k!=np) continue;
+ int area = i+j+k; // area metric (optimizing this weakly minimizes surface area)
+ if(area<minarea) {
+ minarea=area;
+ decomp[0]=i;
+ decomp[1]=j;
+ decomp[2]=k;
+ }
+ }
+ }
+ }
+ return minarea;
+}
+
+class CommandLine {
+public:
+ enum DimType {Local,Global};
+ int verbose,dims[3],dimtype,np;
+ char *programname;
+ int bufsize;
+ void setDefaults(){
+ verbose=0;
+ dims[0]=dims[1]=dims[2]=20;
+ dimtype = Local;
+ np=1;
+ bufsize=0;
+ }
+ CommandLine(int argc,char *argv[]){
+ setDefaults();
+ parse(argc,argv);
+ }
+ void parse(int argc,char *argv[]){
+ programname=argv[0];
+ for(int i=1;i<argc;i++){
+ char *flag = argv[i],*val=argv[i+1];
+ if(*flag=='-') flag++;
+ else continue; // ignore non-flags
+ if(!strcmp(flag,"np")) {
+ if(++i<argc)
+ np = atoi(val);
+ }
+ if(*flag=='v') {verbose=1;}
+ else if(!strcmp(flag,"dimtype")){
+ if((++i<argc) && (!strcmp(val,"global") || !strcmp(val,"Global")))
+ dimtype=Global;
+ }
+ else if(!strcmp(flag,"dims")){
+ if(++i<argc) sscanf(val,"%u,%u,%u",dims,dims+1,dims+2);
+ }
+ else if(!strcmp(flag,"buffer")) {
+ if(++i<argc)
+ bufsize = vatoi(val);
+ }
+ else if(*flag=='h') usage();
+ }
+ }
+ void usage(){
+ printf("Usage: %s -np int <-verbose> <-dims int,int,int> <-dimtype global|local>",
+ programname);
+ printf("\tnp: Number of virtual processors (pretends to be parallel)\n");
+ printf("\tverbose: Opens file for reading after complete and prints all values.\n");
+ printf("\tdims: Set the dimensions of the dataset. By default this is per-cpu.\n");
+ printf("\t\tThis can be Global dimensions if you use -dimtype flag.\n");
+ printf("\t\tDefault is 20,20,20.\n");
+ printf("\tdimtype: \'global\' makes dimensions apply to the overall dataset\n");
+ printf("\t\tregardless of number of processors and \'local\' is per-processor.\n");
+ printf("\t\tThe default is per-processor.\n");
+ }
+ void printStatus(FILE *f=stdout){
+ fprintf(f,"%s: np=%u verbose=%u dims=%u,%u,%u dimtype=%s\n",
+ programname,np,verbose,
+ dims[0],dims[1],dims[2],
+ (dimtype==Local)?"Local":"Global");
+ }
+};
+
+int main(int argc,char *argv[]){
+ CommandLine cmdln(argc,argv);
+
+ typedef int Array[3];
+ //int origin[3],dims[3];
+ Array *vorigin,*vdims;
+ int *origin,*dims;
+ int proclayout[3],area;
+ area=StupidProcLayoutSeive(cmdln.np,proclayout);
+ cmdln.printStatus();
+ printf("Proclayout = %u : %u,%u,%u\n",area,
+ proclayout[0],proclayout[1],proclayout[2]);
+ vorigin = new Array[cmdln.np];
+ vdims = new Array[cmdln.np];
+ int i;
+ for(int p=0;p<cmdln.np;p++){
+ dims=vdims[p];
+ for(i=0;i<3;i++) dims[i]=cmdln.dims[i];
+ if(cmdln.dimtype==CommandLine::Global)
+ for(i=0;i<3;i++) dims[i]/=proclayout[i]; // create dims for layout computation
+ }
+ for(int p=0,k=0;k<proclayout[2];k++){
+ for(int j=0;j<proclayout[1];j++){
+ for(i=0;i<proclayout[0];i++,p++){
+ origin = vorigin[p];
+ dims = vdims[p];
+ // assumes same dims per process
+ origin[0]=dims[0]*i;
+ origin[1]=dims[1]*j;
+ origin[2]=dims[2]*k;
+ if(cmdln.dimtype==CommandLine::Global){
+ // compute remainder for globaldims
+ if(k==(proclayout[2]-1)) dims[2]=cmdln.dims[2]-dims[2]*(k);
+ if(j==(proclayout[1]-1)) dims[1]=cmdln.dims[1]-dims[1]*(j);
+ if(i==(proclayout[0]-1)) dims[0]=cmdln.dims[0]-dims[0]*(i);
+ }
+ }
+ }
+ }
+ int globaldims[3]={0,0,0};
+ for(int p=0;p<cmdln.np;p++){
+ dims=vdims[p];
+ origin=vorigin[p];
+ printf("PE(%u) origin{%u,%u,%u} localdims{%u,%u,%u}\n",
+ p,origin[0],origin[1],origin[2],
+ dims[0],dims[1],dims[2]);
+ for(i=0;i<3;i++)
+ if(globaldims[i]<origin[i]+dims[i])
+ globaldims[i]=origin[i]+dims[i];
+ }
+ printf("Globaldims=%u,%u,%u\n",globaldims[0],globaldims[1],globaldims[2]);
+ int size,maxsize;
+ // find maxsize
+ {
+ int p;
+ for(p=0,maxsize=0;p<cmdln.np;p++){
+ size = IObase::nElements(3,vdims[p]);
+ // Everybody allocates their own local data
+ if(size>maxsize) maxsize=size;
+ }
+ size = maxsize; // for convenience
+ }
+ float *data = new float[size];
+ for(i=0;i<size;i++) data[i] = (float)i;
+ IEEEIO *file = new IEEEIO("chunkdata.raw",IObase::Write);
+ if(cmdln.bufsize>0)
+ file->bufferOn(cmdln.bufsize);
+ double stime = MPI_Wtime();
+ for(i=0;i<3;i++){ // do this 3 times
+ file->reserveChunk(IObase::Float32,3,globaldims);
+ for(int p=0;p<cmdln.np;p++){
+ dims=vdims[p];
+ origin=vorigin[p];
+ file->writeChunk(origin,dims,data); // should be same as MPIO file contents
+ }
+ }
+ double etime = MPI_Wtime();
+ printf("Elapsed time to write=%lf\n",etime-stime);
+ printf("IO performance = %f Megabytes/sec\n",
+ IObase::nBytes(IObase::Float32,3,globaldims)/(1024*1024*(etime-stime)));
+ delete file;
+ delete data;
+}