c c******************************************************************** c c C-DAC Tech Workshop : hyPACK-2013 c October 15-18, 2013 c c Example 5.2 : vv_mult_blkstp_nonunf.f c c Objective : Vector_Vector Multiplication c (Using blockstriped Non-uniform Data Partitioning) c c Input : Read files vdata1.inp for Vector_A and c vdata2.inp Vector_B c c Output : Process with Rank 0 prints the Vector-Vector c Multiplication results c c Necessary Condition : Number of Processes should be c less than or equal to 8 c c Created : August-2013 c c E-mail : hpcfte@cdac.in c c******************************************************************** c program main include 'mpif.h' integer DATA_SIZE, PROC_SIZE real Epsilon parameter (Epsilon = 1.0E-10, DATA_SIZE = 16) parameter (PROC_SIZE = 8) integer Numprocs, MyRank integer VectorSize integer VectorSize_A, VectorSize_B integer index, iproc integer Root, DataSize integer Destination, Destination_tag integer Source, Source_tag integer Distribute_Cols, Remaining_Cols integer Displacement(PROC_SIZE) integer RecvCount, SendCount(PROC_SIZE) real Mybuffer_A(DATA_SIZE), Mybuffer_B(DATA_SIZE) real MyFinalVector, FinalAnswer real Vector_A(DATA_SIZE), Vector_B(DATA_SIZE) integer status(MPI_STATUS_SIZE) C ........MPI Initialisation ....... call MPI_INIT(ierror) call MPI_COMM_RANK(MPI_COMM_WORLD, MyRank, ierror) call MPI_COMM_SIZE(MPI_COMM_WORLD, Numprocs, ierror) Root = 0 if(MyRank .eq. Root) then open(unit=12, file = './data/vdata1.inp') read(12,*) VectorSize_A read(12,*) (Vector_A(i), i=1,VectorSize_A) open(unit=12, file = './data/vdata2.inp') read(12,*) VectorSize_B read(12,*) (Vector_B(i), i=1,VectorSize_B) endif C .......Allocate memory and read data for vector A ..... call MPI_BCAST(VectorSize_A, 1, MPI_INTEGER, Root, $ MPI_COMM_WORLD, ierror) call MPI_BCAST(VectorSize_B, 1, MPI_INTEGER, Root, $ MPI_COMM_WORLD, ierror) if(VectorSize_A .ne. VectorSize_B) then if(MyRank .eq. Root) then print *,"Two Vectors can not be multiplied - exit" endif goto 100 else VectorSize = VectorSize_A endif if(VectorSize .lt. Numprocs) then if(MyRank .eq. Root) then print*,"VectorSize should be more than No of Processors" endif goto 100 endif C ....... Initial arrangement for Scatterv operation .... if (MyRank .eq. Root) then Distribute_Cols = VectorSize/Numprocs Remaining_Cols = mod(VectorSize, Numprocs) do iproc = 1, Numprocs SendCount(iproc) = Distribute_Cols enddo do iproc=Remaining_Cols,1,-1 SendCount(iproc) = SendCount(iproc) + 1 enddo Displacement(1) = 1 do iproc = 1, Numprocs disp_value = 0 do jproc = 1, iproc-1 disp_value = disp_value + SendCount(jproc) enddo Displacement(iproc) = disp_value enddo endif C .......Send RecvCount to each process ........ Destination_tag = 0 Source_tag = 0 if(MyRank .eq. Root) then do iproc = 1, Numprocs-1 Destination = iproc DataSize = 1 RecvCount = SendCount(iproc+1) call MPI_SEND(RecvCount,DataSize,MPI_INTEGER,Destination, $ Destination_tag, MPI_COMM_WORLD, ierror) enddo RecvCount = SendCount(1) else Source = Root DataSize = 1 call MPI_RECV(RecvCount, DataSize, MPI_INTEGER, Source, $ Source_tag, MPI_COMM_WORLD, status, ierror) endif C ......... Scatter Vector A and Vector B ......... call MPI_SCATTERV(Vector_A, SendCount, Displacement, MPI_REAL, $ Mybuffer_A,RecvCount,MPI_REAL,Root,MPI_COMM_WORLD,ierror) call MPI_SCATTERV(Vector_B, SendCount, Displacement, MPI_REAL, $ Mybuffer_B,RecvCount,MPI_REAL,Root,MPI_COMM_WORLD,ierror) C ........ Calculate partial sum ........ MyFinalVector = 0.0 do index = 1, RecvCount MyFinalVector = MyFinalVector + $ Mybuffer_A(index)*Mybuffer_B(index) enddo C .... Collective computation : Final answer on process 0 .. call MPI_REDUCE(MyFinalVector, FinalAnswer, 1, MPI_REAL, $ MPI_SUM, Root, MPI_COMM_WORLD, ierror) if(MyRank .eq. Root) then print*,'FinalAnswer', FinalAnswer endif 100 call MPI_FINALIZE(ierror) stop end