diff options
Diffstat (limited to 'src/MPIOspeed.cc')
-rw-r--r-- | src/MPIOspeed.cc | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/src/MPIOspeed.cc b/src/MPIOspeed.cc new file mode 100644 index 0000000..8ad0019 --- /dev/null +++ b/src/MPIOspeed.cc @@ -0,0 +1,201 @@ +#include <stdio.h> +#include "vatoi.hh" +#include "IEEEIO.hh" +#include "MPIutils.hh" +#include "MPIO.hh" +#include "Timer.hh" + +int StupidProcLayoutSeive(int np,int decomp[3]){ + // Brute-Force method + // stupidly attempts all decomposition combinations + // and filters out unusable combinations. + // trys to select minimum surface area processor arrangement + int minarea = np*np*np; // impossibly high value + for(int i=1;i<=np;i++){ + for(int j=1;i*j<=np;j++){ + for(int k=1;i*j*k<=np;k++){ + if(i*j*k!=np) continue; + int area = i+j+k; // area metric (optimizing this weakly minimizes surface area) + if(area<minarea) { + minarea=area; + decomp[0]=i; + decomp[1]=j; + decomp[2]=k; + } + } + } + } + return minarea; +} + +class CommandLine { +public: + enum DimType {Local,Global}; + int verbose,dims[3],dimtype; + char *programname; + int bufsize; + int ntrials; + int syncsends; + void setDefaults(){ + verbose=0; + ntrials=40; + dims[0]=dims[1]=dims[2]=20; + dimtype = Local; + bufsize=0; + syncsends=1; // default for now because it does so much better + } + CommandLine(int argc,char *argv[],MPIcomm *comm){ + setDefaults(); + parse(argc,argv,comm); + } + void parse(int argc,char *argv[],MPIcomm *comm){ + programname=argv[0]; + for(int i=1;i<argc;i++){ + char *flag = argv[i],*val=argv[i+1]; + if(*flag=='-') flag++; + else continue; // ignore non-flags + if(*flag=='v') {verbose=1;} + else if(!strcmp(flag,"dimtype")){ + if((++i<argc) && (!strcmp(val,"global") || !strcmp(val,"Global"))) + dimtype=Global; + } + else if(!strcmp(flag,"dims")){ + if(++i<argc) sscanf(val,"%u,%u,%u",dims,dims+1,dims+2); + } + else if(*flag=='n'){ + ntrials = atoi(val); + i++; + } + else if(*flag=='s'){ + syncsends=1; + } + else if(!strcmp(flag,"buffer")) { + if(++i<argc) + bufsize = vatoi(val); + } + else if(*flag=='h' && !comm->rank()) usage(); + } + } + void usage(){ + printf("Usage: %s <-verbose> <-dims int,int,int> <-dimtype global|local>", + programname); + printf("\tverbose: Opens file for reading after complete and prints all values.\n"); + printf("\tdims: Set the dimensions of the dataset. By default this is per-cpu.\n"); + printf("\t\tThis can be Global dimensions if you use -dimtype flag.\n"); + printf("\t\tDefault is 20,20,20.\n"); + printf("\tdimtype: \'global\' makes dimensions apply to the overall dataset\n"); + printf("\t\tregardless of number of processors and \'local\' is per-processor.\n"); + printf("\t\tThe default is per-processor.\n"); + } + void printStatus(FILE *f=stdout){ + fprintf(f,"%s: verbose=%u dims=%u,%u,%u dimtype=%s\n", + programname,verbose, + dims[0],dims[1],dims[2], + (dimtype==Local)?"Local":"Global"); + fprintf(f,"\t Buffer Cache is at %u bytes\n",bufsize); + } +}; + +int main(int argc,char *argv[]){ + MPIenv env(argc,argv); // start MPI threads + MPIcomm *comm = env.getComm(); + CommandLine cmdln(argc,argv,comm); + + int origin[3],dims[3]; + int proclayout[3],area; + int nghost=1; + area=StupidProcLayoutSeive(comm->numProcs(),proclayout); + // if(!comm->rank()) { + cmdln.printStatus(); + fprintf(stderr,"Proclayout = %u : %u,%u,%u\n",area, + proclayout[0],proclayout[1],proclayout[2]); + //} + for(int i=0;i<3;i++) dims[i]=cmdln.dims[i]; + if(cmdln.dimtype==CommandLine::Global) + for(int i=0;i<3;i++) dims[i]/=proclayout[i]; // create dims for layout computation + for(int p=0,k=0;k<proclayout[2];k++){ + for(int j=0;j<proclayout[1];j++){ + for(int i=0;i<proclayout[0];i++,p++){ + if(p==comm->rank()){ + // assumes same dims per process + origin[0]=dims[0]*i; + origin[1]=dims[1]*j; + origin[2]=dims[2]*k; + for(int n=0;n<3;n++){ + if(origin[n]-nghost>0) {origin[n]-=nghost; dims[n]+=(nghost);} + dims[n]+=nghost; + } + if(cmdln.dimtype==CommandLine::Global){ + // compute remainder for globaldims + if(k==(proclayout[2]-1)) + dims[2]=cmdln.dims[2]-origin[2]; + if(j==(proclayout[1]-1)) + dims[1]=cmdln.dims[1]-origin[1]; + if(i==(proclayout[0]-1)) + dims[0]=cmdln.dims[0]-origin[0]; + } + } + } + } + } + fprintf(stderr,"PE(%u) origin{%u,%u,%u} localdims{%u,%u,%u} nghost=%u\n", + comm->rank(),origin[0],origin[1],origin[2], + dims[0],dims[1],dims[2],nghost); + int size=1; + for(int i=0;i<3;i++) size*=dims[i]; + // Everybody allocates their own local data + float *data = new float[size]; + for(int i=0;i<size;i++) data[i] = (float)i; + IEEEIO *file=0; + if(!comm->rank()){ // only PE0 needs to open a file + IEEEIO *iofile; + file = iofile = new IEEEIO("mpdata.raw",IObase::Write); + if(cmdln.bufsize>0) + iofile->bufferOn(cmdln.bufsize); + } + else file=0; // else it is null + + MPIO *pfile = new MPIO(file,*comm); // but everyone creates the pfile + pfile->setLocalDims(3,origin,dims); + // if(cmdln.syncsends) pfile->useSyncrhonousSend(); + // puts("Write"); + comm->barrier(); + double stime = MPI_Wtime(); // start timing + //Timer t; + // t.start(); + for(int i=0;i<cmdln.ntrials;i++){ + // if(comm->rank()==0) fprintf(stderr,"Trial %4u\n",i); + comm->barrier(); + pfile->write(data); + } + comm->barrier(); // synchronize completion + double etime = MPI_Wtime(); + // t.end(); + if(!comm->rank()){ + printf("\nElapsed time to write=%lf\n",etime-stime); + } + puts("done writing"); + delete pfile; + if(file) delete file; + delete data; + if(!comm->rank()){ + file = new IEEEIO("mpdata.raw",IObase::Read); + int ndims[3],nrank; + IObase::DataType dt; + file->readInfo(dt,nrank,ndims); + printf("IO performance = %f Megabytes/sec\n", + (float)(cmdln.ntrials)*(float)(IObase::nBytes(dt,nrank,ndims))/(1024*1024*(etime-stime))); + printf("Read mpdata.raw: rank=%u dims=%u,%u,%u\n",nrank,ndims[0],ndims[1],ndims[2]); + if(cmdln.verbose){ // dangerous on T3E + data = new float[IObase::nElements(nrank,ndims)]; + file->read(data); + for(int k=0,p=0;k<ndims[2];k++) + for(int j=0;j<ndims[1];j++) + for(int i=0;i<ndims[0];i++,p++) + printf("data[%u:%u:%u](%u) = %f\n",i,j,k,p,data[p]); + delete data; + } + delete file; + } + return 0; +} |