diff options
Diffstat (limited to 'src/chunkSpeed.cc')
-rw-r--r-- | src/chunkSpeed.cc | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/src/chunkSpeed.cc b/src/chunkSpeed.cc new file mode 100644 index 0000000..757c7e1 --- /dev/null +++ b/src/chunkSpeed.cc @@ -0,0 +1,179 @@ +#include <stdio.h> +#include <stdlib.h> +#include "IEEEIO.hh" +#include "vatoi.hh" +#include <sys/time.h> + +double MPI_Wtime(){ + timeval tv; + gettimeofday(&tv); + return tv.tv_sec + tv.tv_usec/1000000.0l; +} +int StupidProcLayoutSeive(int np,int decomp[3]){ + // Brute-Force method + // stupidly attempts all decomposition combinations + // and filters out unusable combinations. + // trys to select minimum surface area processor arrangement + int minarea = np*np*np; // impossibly high value + for(int i=1;i<=np;i++){ + for(int j=1;i*j<=np;j++){ + for(int k=1;i*j*k<=np;k++){ + if(i*j*k!=np) continue; + int area = i+j+k; // area metric (optimizing this weakly minimizes surface area) + if(area<minarea) { + minarea=area; + decomp[0]=i; + decomp[1]=j; + decomp[2]=k; + } + } + } + } + return minarea; +} + +class CommandLine { +public: + enum DimType {Local,Global}; + int verbose,dims[3],dimtype,np; + char *programname; + int bufsize; + void setDefaults(){ + verbose=0; + dims[0]=dims[1]=dims[2]=20; + dimtype = Local; + np=1; + bufsize=0; + } + CommandLine(int argc,char *argv[]){ + setDefaults(); + parse(argc,argv); + } + void parse(int argc,char *argv[]){ + programname=argv[0]; + for(int i=1;i<argc;i++){ + char *flag = argv[i],*val=argv[i+1]; + if(*flag=='-') flag++; + else continue; // ignore non-flags + if(!strcmp(flag,"np")) { + if(++i<argc) + np = atoi(val); + } + if(*flag=='v') {verbose=1;} + else if(!strcmp(flag,"dimtype")){ + if((++i<argc) && (!strcmp(val,"global") || !strcmp(val,"Global"))) + dimtype=Global; + } + else if(!strcmp(flag,"dims")){ + if(++i<argc) sscanf(val,"%u,%u,%u",dims,dims+1,dims+2); + } + else if(!strcmp(flag,"buffer")) { + if(++i<argc) + bufsize = vatoi(val); + } + else if(*flag=='h') usage(); + } + } + void usage(){ + printf("Usage: %s -np int <-verbose> <-dims int,int,int> <-dimtype global|local>", + programname); + printf("\tnp: Number of virtual processors (pretends to be parallel)\n"); + printf("\tverbose: Opens file for reading after complete and prints all values.\n"); + printf("\tdims: Set the dimensions of the dataset. By default this is per-cpu.\n"); + printf("\t\tThis can be Global dimensions if you use -dimtype flag.\n"); + printf("\t\tDefault is 20,20,20.\n"); + printf("\tdimtype: \'global\' makes dimensions apply to the overall dataset\n"); + printf("\t\tregardless of number of processors and \'local\' is per-processor.\n"); + printf("\t\tThe default is per-processor.\n"); + } + void printStatus(FILE *f=stdout){ + fprintf(f,"%s: np=%u verbose=%u dims=%u,%u,%u dimtype=%s\n", + programname,np,verbose, + dims[0],dims[1],dims[2], + (dimtype==Local)?"Local":"Global"); + } +}; + +int main(int argc,char *argv[]){ + CommandLine cmdln(argc,argv); + + typedef int Array[3]; + //int origin[3],dims[3]; + Array *vorigin,*vdims; + int *origin,*dims; + int proclayout[3],area; + area=StupidProcLayoutSeive(cmdln.np,proclayout); + cmdln.printStatus(); + printf("Proclayout = %u : %u,%u,%u\n",area, + proclayout[0],proclayout[1],proclayout[2]); + vorigin = new Array[cmdln.np]; + vdims = new Array[cmdln.np]; + int i; + for(int p=0;p<cmdln.np;p++){ + dims=vdims[p]; + for(i=0;i<3;i++) dims[i]=cmdln.dims[i]; + if(cmdln.dimtype==CommandLine::Global) + for(i=0;i<3;i++) dims[i]/=proclayout[i]; // create dims for layout computation + } + for(int p=0,k=0;k<proclayout[2];k++){ + for(int j=0;j<proclayout[1];j++){ + for(i=0;i<proclayout[0];i++,p++){ + origin = vorigin[p]; + dims = vdims[p]; + // assumes same dims per process + origin[0]=dims[0]*i; + origin[1]=dims[1]*j; + origin[2]=dims[2]*k; + if(cmdln.dimtype==CommandLine::Global){ + // compute remainder for globaldims + if(k==(proclayout[2]-1)) dims[2]=cmdln.dims[2]-dims[2]*(k); + if(j==(proclayout[1]-1)) dims[1]=cmdln.dims[1]-dims[1]*(j); + if(i==(proclayout[0]-1)) dims[0]=cmdln.dims[0]-dims[0]*(i); + } + } + } + } + int globaldims[3]={0,0,0}; + for(int p=0;p<cmdln.np;p++){ + dims=vdims[p]; + origin=vorigin[p]; + printf("PE(%u) origin{%u,%u,%u} localdims{%u,%u,%u}\n", + p,origin[0],origin[1],origin[2], + dims[0],dims[1],dims[2]); + for(i=0;i<3;i++) + if(globaldims[i]<origin[i]+dims[i]) + globaldims[i]=origin[i]+dims[i]; + } + printf("Globaldims=%u,%u,%u\n",globaldims[0],globaldims[1],globaldims[2]); + int size,maxsize; + // find maxsize + { + int p; + for(p=0,maxsize=0;p<cmdln.np;p++){ + size = IObase::nElements(3,vdims[p]); + // Everybody allocates their own local data + if(size>maxsize) maxsize=size; + } + size = maxsize; // for convenience + } + float *data = new float[size]; + for(i=0;i<size;i++) data[i] = (float)i; + IEEEIO *file = new IEEEIO("chunkdata.raw",IObase::Write); + if(cmdln.bufsize>0) + file->bufferOn(cmdln.bufsize); + double stime = MPI_Wtime(); + for(i=0;i<3;i++){ // do this 3 times + file->reserveChunk(IObase::Float32,3,globaldims); + for(int p=0;p<cmdln.np;p++){ + dims=vdims[p]; + origin=vorigin[p]; + file->writeChunk(origin,dims,data); // should be same as MPIO file contents + } + } + double etime = MPI_Wtime(); + printf("Elapsed time to write=%lf\n",etime-stime); + printf("IO performance = %f Megabytes/sec\n", + IObase::nBytes(IObase::Float32,3,globaldims)/(1024*1024*(etime-stime))); + delete file; + delete data; +} |