/* ****************************************************************** C-DAC Tech Workshop : hyPACK-2013 October 15-18, 2013 Example : vect-vect-addition-sequential-native.c Objective : Sustained Performance for Vector-Vector Addition Vector-Vector Addition (Sequential Implementation) Formulation - Second Type Execute on Intel Xeon-Phi Co-proc. & Measure Performance Quantify the performance Input : a) size of Vector (Size of Vector A and Vector B) b) Iterations Output : Print the Gflop/s and output Matrix C Time Elapsed and GFLOPS Single : Peak Perf of Xeon -Phi : Precision 1.1091 GHz X 61 cores X 16 lanes X 2 = 2129.6 GigaFlops/s Peak Perf of Single Core = 34.90164 GigaFlop/s Double : Peak Perf : Precison 1.091. GHz X 61 Cores X 8 lanes X 2 = 1064.8 GigaFlops/s Created : August-2013 E-mail : hpcfte@cdac.in *******************************************************************/ // // // A simple example to try to get lots of Flops on Intel Xeon // Phi Co-processors. // #include #include #include #include // //dtime (Wall Clock time ....) // //utility routine to return //the current wall clock time // double dtime() { double tseconds = 0.0f; struct timeval mytime; gettimeofday(&mytime,(struct timezone*)0); tseconds = (double)(mytime.tv_sec + mytime.tv_usec*1.0e-6); return( tseconds); } #define FLOPS_ARRAY_SIZE (1024*1024) #define MAXFLOPS_ITERS 100000000 #define LOOP_COUNT 128 //Floating pt ops per inner loop iteration #define FLOPSPERCALC 2 //Define some arrays : 64 byte aligned for fast cache access float Vector_A[FLOPS_ARRAY_SIZE] __attribute__((align(64))); float Vector_B[FLOPS_ARRAY_SIZE] __attribute__((align(64))); // /* Main Program to Compute Gflops for different problem size(s) */ // int main(int argc, char*argv[] ) { int i,j,k; double tstart, tstop, ttime; double gflops = 0.0; float a = 1.1; // //initialize the compute arrays // printf("Initializing \r\n "); for(i=0; i 0.0f) { printf(" GFLOPS = %10.31f, Secs = %10.31f, GFLOPS per sec = %10.31f \r \n ", gflops, ttime, gflops/ttime); } return( 0 ); }