/******************************************************************** C-DAC Tech Workshop : hyPACK-2013 October 15-18,2013 Example 1.4 : mpi-Pthreads-matrix-vector.c Objective : To write an MPI-Pthreads program, for computing the matrix-vector multiplication using Self-Scheduling algorithm. Pthreads and MPI library calls are used. This example demonstrates use of: pthread_create() pthread_join() pthread_mutex_lock() pthread_mutex_unlock() MPI_Init MPI_Comm_rank MPI_Comm_size MPI_Barrier MPI_Bcast MPI_Allgather MPI_Gatherv MPI_Finalize Input : Number of Rows, Columns of the Matrix Output : Product of Matrix Vector Multiplication. Created :MAY-2012 E-mail : betatest@cdac.in **************************************************************************/ #include #include #include "mpi.h" #include int MyRank, currentRow, MyNoofRows, NoofCols, GlobalIndex = -1; float **Matrix, *Vector, *MyResult; int flag = 0, rowlimit; pthread_mutex_t mutex_Row = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t mutex_Flag = PTHREAD_MUTEX_INITIALIZER; /* Routine executed by each thread */ void * MyPartOfCalc(int Id) { int myRow, icol, myindex; if (flag == 0) { pthread_mutex_lock(&mutex_Flag); rowlimit = currentRow + MyNoofRows; flag++; pthread_mutex_unlock(&mutex_Flag); } while (1) { /* * Thread selects the row of Matrix on which it has to do the * operation */ pthread_mutex_lock(&mutex_Row); { if (currentRow >= rowlimit) { pthread_mutex_unlock(&mutex_Row); pthread_exit(0); } myRow = currentRow; currentRow++; GlobalIndex++; myindex = GlobalIndex; } pthread_mutex_unlock(&mutex_Row); /* * Perform the multiplication on the row selected and store * the addendum in MyResult array */ printf(" Thread Id %d of process with Rank %d operated on Matrix Row %d\n", Id, MyRank, myRow); MyResult[myindex] = 0.0; for (icol = 0; icol < NoofCols; icol++) MyResult[myindex] += Matrix[myRow][icol] * Vector[icol]; } } int main(int argc, char **argv) { int iproc, irow, icol, modval, divval, *Displacement, iprocb, *ArrayNoofRows; int Numprocs, Root = 0, NoofRows, VectorSize; float *Results; pthread_t *threads; /* MPI Initialisation ... */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &MyRank); MPI_Comm_size(MPI_COMM_WORLD, &Numprocs); /* Validity checking for minimum number of processors */ if (Numprocs < 2) { printf("Invalid Number of Processors ..... \n"); printf("Numprocs must be greater than 1 ......\n"); MPI_Finalize(); exit(0); } /* Read the sizes of Matrix and Vector */ if (MyRank == Root) { printf("Enter the number of rows and columns of Matrix\n"); scanf("%d %d", &NoofRows, &NoofCols); printf("Enter the size of Vector\n"); scanf("%d", &VectorSize); } MPI_Barrier(MPI_COMM_WORLD); MPI_Bcast(&NoofRows, 1, MPI_INT, Root, MPI_COMM_WORLD); MPI_Bcast(&NoofCols, 1, MPI_INT, Root, MPI_COMM_WORLD); MPI_Bcast(&VectorSize, 1, MPI_INT, Root, MPI_COMM_WORLD); /* Validity checking for negative sizes of Matrix and Vector */ if (NoofRows < 1 || NoofCols < 1 || VectorSize < 1) { printf("The number of rows,columns or size of Vector should be atleast one\n"); MPI_Finalize(); exit(-1); } /* Validity checking for minimum number of Rows of Matrix */ if (NoofRows < Numprocs) { printf("The number of rows of Matrix should be greater than number of processors\n"); MPI_Finalize(); exit(-1); } /* Validity checking for suitability of sizes for multiplication */ if (NoofCols != VectorSize) { printf("The number of columns of Matrix should be equal to size of Vector\n"); MPI_Finalize(); exit(-1); } /* Allocating and Populating the Matrices */ Matrix = (float **) malloc(NoofRows * sizeof(float *)); for (irow = 0; irow < NoofRows; irow++) Matrix[irow] = (float *) malloc(NoofCols * sizeof(float)); Vector = (float *) malloc(NoofRows * sizeof(float)); for (icol = 0; icol < NoofCols; icol++) { for (irow = 0; irow < NoofRows; irow++) Matrix[irow][icol] = irow + icol; Vector[icol] = icol; } /* Storing the number of Rows to be operated by each process in array */ modval = NoofRows % Numprocs; divval = NoofRows / Numprocs; MyNoofRows = (MyRank < modval ? divval + 1 : divval); ArrayNoofRows = (int *) malloc(Numprocs * sizeof(int)); MPI_Allgather(&MyNoofRows, 1, MPI_INT, ArrayNoofRows, 1, MPI_INT, MPI_COMM_WORLD); /* Storing the starting Row to be operated by each process in array */ Displacement = (int *) malloc(Numprocs * sizeof(int)); Displacement[0] = 0; for (iproc = 1; iproc < Numprocs; iproc++) Displacement[iproc] = Displacement[iproc - 1] + ArrayNoofRows[iproc - 1]; MyResult = (float *) malloc(MyNoofRows * sizeof(float)); currentRow = Displacement[MyRank]; MPI_Barrier(MPI_COMM_WORLD); /* * Call threads equal to number of Rows to be processed by this * process */ threads = (pthread_t *) malloc(sizeof(pthread_t) * MyNoofRows); for (irow = 0; irow < MyNoofRows; irow++) pthread_create(&threads[irow], NULL, (void *(*) (void *)) MyPartOfCalc, (void *) irow); MPI_Barrier(MPI_COMM_WORLD); for (irow = 0; irow < MyNoofRows; irow++) pthread_join(threads[irow], NULL); MPI_Barrier(MPI_COMM_WORLD); /* Collection of results from each process using MPI_Gatherv */ Results = (float *) malloc(NoofRows * sizeof(float)); MPI_Gatherv(MyResult, MyNoofRows, MPI_FLOAT, Results, ArrayNoofRows, Displacement, MPI_FLOAT, Root, MPI_COMM_WORLD); /* Printing of the Matrix , Vector and the Result Vector by Root */ if (MyRank == Root) { printf("\n\nMatrix is\n\n"); for (irow = 0; irow < NoofRows; irow++) { printf("Row %d : ", irow); for (icol = 0; icol < NoofCols; icol++) printf(" %f ", Matrix[irow][icol]); printf("\n"); } printf("\n"); printf("\n"); printf("Vector is\n\n"); for (icol = 0; icol < NoofCols; icol++) printf("Row %d : %f \n", icol, Vector[icol]); printf("\n"); printf("\n"); printf("\n\nResult Vector is \n\n"); for (irow = 0; irow < NoofRows; irow++) printf("Row %d : %f \n", irow, Results[irow]); } printf("\n"); MPI_Finalize(); return 0; }