/* Written by Holguer A Becerra UBC Student Master of Biomedical Engineering Course: ECE 328 Prof. Guy Lemiuex. Year 2015-Winter Term 1 */ #include #include #include #include #include #define INT 0 #define FLOAT 1 #define DOUBLE 1 #define TYPE_VAR INT #if TYPE_VAR == INT int * matrix1; int * matrix2; int * matrix3; int sum; #elif TYPE_VAR == FLOAT float * matrix1; float * matrix2; float * matrix3; float sum; #elif TYPE_VAR == DOUBLE double * matrix1; double * matrix2; double * matrix3; double sum; #endif typedef unsigned long long int64; unsigned int measurement=0,ut,mt; void init_timer(); static __inline__ int64 getticks(void); int main(int argc, char *argv[]) { int64 tick,tick1; int N,rows,columns; int c,d,k; //initializing timer variables and timer init_timer(); if(argc<=1){ printf("too few arguments"); return 0; } // the first argument has the number of rows, in this case the number of columns are the same as the number of rows. sscanf(argv[1],"%d",&N); /* initialize random seed: */ srand (time(NULL)); /*Get Timer*/ tick = getticks(); // Initialize Matrices #if TYPE_VAR == INT matrix1=(int *)malloc(N*N*sizeof(int)); matrix2=(int *)malloc(N*N*sizeof(int)); matrix3=(int *)malloc(N*N*sizeof(int)); #elif TYPE_VAR == FLOAT matrix1=(float *)malloc(N*N*sizeof(float)); matrix2=(float *)malloc(N*N*sizeof(float)); matrix3=(float *)malloc(N*N*sizeof(float)); #elif TYPE_VAR == DOUBLE matrix1=(double *)malloc(N*N*sizeof(double)); matrix2=(double *)malloc(N*N*sizeof(double)); matrix3=(double *)malloc(N*N*sizeof(double)); #endif printf("Initializing matrices\n"); for (rows= 0; rows < N*N; rows++){ #if TYPE_VAR == INT *(matrix1+rows)=rand() % 1000 + 1; *(matrix2+rows)=rand() % 1000 + 7; //printf("1[%d,%d]=%d\n",rows/N,rows,*(matrix1+rows)); //printf("2[%d,%d]=%d\n",rows/N,rows,*(matrix2+rows)); #elif TYPE_VAR == FLOAT *(matrix1+rows)=rand() % 1000 + 1.5; *(matrix2+rows)=rand() % 1000 + 7.07; //printf("1[%d,%d]=%f\n",rows/N,rows,*(matrix1+rows)); //printf("2[%d,%d]=%f\n",rows/N,rows,*(matrix2+rows)); #elif TYPE_VAR == DOUBLE *(matrix1+rows)=rand() % 1000 + 1.5; *(matrix2+rows)=rand() % 1000 + 7.07; //printf("1[%d,%d]=%f\n",rows/N,rows,*(matrix1+rows)); //printf("2[%d,%d]=%f\n",rows/N,rows,*(matrix2+rows)); #endif } // Performance Finish and Measure /*Get Timer*/ tick1 = getticks(); measurement = (unsigned)((tick1-tick)/ut); printf("\n Initialization time: %d us\n",measurement); printf("Initialization Done, moving over Multiplication\n"); /*Get Timer*/ tick = getticks(); // Improved int ho=0; for (c = 0; c < N; c++) { ho=c*N; for (k = 0; k < N; k++) { for (d = 0; d < N; d++) { *(matrix3+ho+d) += (*(matrix1+ho+k))*(*(matrix2+k*N+d)); //printf("C(%d,%d)=A(%d,%d)*B(%d,%d);\n",ho,d,ho,k,k*N,d); } } } // Performance Finish and Measure /*Get Timer*/ tick1 = getticks(); measurement = (unsigned)((tick1-tick)/ut); printf("\n nMutiplication time: %d us\n",measurement); // free memory free(matrix1); free(matrix2); return 0; } static __inline__ int64 getticks(void) { unsigned a, d; asm volatile("rdtsc" : "=a" (a), "=d" (d)); return (((int64)a) | (((int64)d) << 32)); } void init_timer(){ int64 tick,tick1; // ut is the divisor to give microseconds // mt gives milliseconds FILE *pf; int i,r,l,n=0; char s[100]; // time how long it takes to get the divisors, as a test tick = getticks(); // get the divisors - todo: for max performance this can // output a new binary or library with these values hardcoded // for the relevant CPU - a kind-of ludicrous notion considering // that this will only work on x86 compatible cpus anyways where // performance is the least of your issues... // ... curse of the assembly coder ;-) pf = fopen("/proc/cpuinfo","r"); do { r=fscanf(pf,"%s",&s[0]); if (r<0) { n=5; break; } else if (n==0) { if (strcmp("MHz",s)==0) n=1; } else if (n==1) { if (strcmp(":",s)==0) n=2; } else if (n==2) { n=3; }; } while (n<3); fclose(pf); l=strlen(s); s[l-4]=s[l-3]; s[l-3]=s[l-2]; s[l-2]=s[l-1]; s[l-1]=(char)0; mt=atoi(s); s[l-4]=(char)0; ut=atoi(s); //printf("%s Mhz - ut = %u, mt = %u // hardcode these for your a CPU-specific binary ;-)\n",s,ut,mt); }