/************************************************************************ C-DAC Tech Workshop : hyPACK-2013 October 15-18, 2013 Example : CUBlasSMatMatMult.cu Objective : Write a CUDA Program for Matrix Matrix multiplication using CUBLAS3 library function calls. Input : None Output : Execution time in seconds , Gflops achieved Created : August-2013 E-mail : hpcfte@cdac.in *************************************************************************/ #include #include #include #include #include #include "cublas.h" #define SIZE 128 #define EPS 1.0e-15 cudaEvent_t start,stop; cudaError_t ret; cublasStatus status; cudaDeviceProp deviceProp; double *host_MatA,*host_MatB,*host_Res,*cpu_Res; double *device_MatA,*device_MatB,*device_Res; int RowA,ColA,ColB; float Tsec; float elapsedTime; int size=SIZE; /* checking GPU all kind of ERROR*/ #define CUBLAS_SAFE_CALL(call) \ status=call; \ if(status != CUBLAS_STATUS_SUCCESS) \ { printf(" Error in CUBLAS call.Program terminating\n");\ exit(-1); \ } /*Check for safe return of all calls to the device */ void CUDA_SAFE_CALL(cudaError_t call) { cudaError_t ret = call; //printf("RETURN FROM THE CUDA CALL:%d\t:",ret); switch(ret) { case cudaSuccess: // printf("Success\n"); break; /* case cudaErrorInvalidValue: { printf("ERROR: InvalidValue:%i.\n",__LINE__); exit(-1); break; } case cudaErrorInvalidDevicePointer: { printf("ERROR:Invalid Device pointeri:%i.\n",__LINE__); exit(-1); break; } case cudaErrorInvalidMemcpyDirection: { printf("ERROR:Invalid memcpy direction:%i.\n",__LINE__); exit(-1); break; } */ default: { printf(" ERROR at line :%i.%d' ' %s\n",__LINE__,ret,cudaGetErrorString(ret)); exit(-1); break; } } } /*Get the number of GPU devices present on the host */ int get_DeviceCount() { int count; cudaGetDeviceCount(&count); return count; } /*Fill in the vector with double precision values */ void fill_dp_vector(double* vec,int size) { int ind; for(ind=0;ind fabs(dRes[i])) relativeError = fabs((hRes[i] - dRes[i]) / hRes[i]); else relativeError = fabs((dRes[i] - hRes[i]) / dRes[i]); if (relativeError > EPS && relativeError != 0.0e+00 ) { if(errorNorm < relativeError) { errorNorm = relativeError; flag=1; } } } if( flag == 1) { printf(" \n Results verfication : Failed"); printf(" \n Considered machine precision : %e", EPS); printf(" \n Relative Error : %e\n", errorNorm); } else printf("\n Results verfication : Success\n"); } /* sequential mat mat multiplication */ void CPU_MatMat() { cpu_Res = (double *)malloc(RowA*ColB*sizeof(double)); if(cpu_Res==NULL) mem_error("host_Res","matmatmul",RowA*ColB,"double"); int i,j; for(i=0;i