/* ****************************************************************** C-DAC Tech Workshop : hyPACK-2013 October 15-18, 2013 Example : openmp4x-vect-vect-addition-perf.c Objective : Sustained Performance for Vector-Vector Addition Vector-Vector Addition (Sequential Implementation) Formulation - Second Type Execute on Intel Xeon-Phi Co-proc. & Measure Performance Quantify the performance Input : a) size of Vector (Size of Vector A and Vector B) b) Iterations Output : Print the Gflop/s and output Matrix C Time Elapsed and GFLOPS Single : Peak Perf of Xeon -Phi : Precision 1.1091 GHz X 61 cores X 16 lanes X 2 = 2129.6 GigaFlops/s Peak Perf of Single Core = 34.90164 GigaFlop/s Double : Peak Perf : Precison 1.091. GHz X 61 Cores X 8 lanes X 2 = 1064.8 GigaFlops/s Created : August-2013 E-mail : hpcfte@cdac.in *******************************************************************/ // // // A simple example to try to get lots of Flops on Intel Xeon // Phi Co-processors. // #include #include #include #include #define SIZE 1000 #pragma omp declare target #define FLOPS_ARRAY_SIZE (1024*1024) #define MAXFLOPS_ITERS 1000 #define LOOP_COUNT 128 //Floating pt ops per inner loop iteration #define FLOPSPERCALC 2 // //dtime (Wall Clock time ....) // //utility routine to return //the current wall clock time // int My_saxpy(float *Vector_A,float *Vector_B) { float a=1.1; #pragma omp target map(Vector_A[0:FLOPS_ARRAY_SIZE]) map(Vector_B[0:FLOPS_ARRAY_SIZE]) map(a) { for(int i=0;i 0.0f) { printf(" GFLOPS = %10.5f, Secs = %10.5f, GFLOPS per sec = %10.5f \r \n ", gflops, ttime, gflops/ttime); } return( 0 ); }