#include #include #include #include #include #include #include "mpi.h" void PrintSurfaceMatrix(double** M, int LD, int D); int main(int argc, char* argv[]){ MPI_Init(&argc,&argv); int NCPUS, MY_PE; MPI_Comm_size(MPI_COMM_WORLD, &NCPUS); MPI_Comm_rank(MPI_COMM_WORLD, &MY_PE); // Read Input Parameters if(argc < 4) { cerr << "\nToo many input paramters.. should be four\n"; exit(2); } int Dimension = atoi(argv[1]); int Iterations = atoi(argv[2]); int RowPeek = atoi(argv[3]); int ColPeek = atoi(argv[4]); if((RowPeek > Dimension) || (ColPeek > Dimension)){ if(MY_PE==0){ cerr << "Cannot Peek a matrix element outside of the surface"; cerr << "Arguments 3 and 4 must be smaller than " << Dimension; } MPI_Finalize(); exit(3); } // Initialize Matrix // Now each processor gets a cube of the matrix, not a row. double SQ = sqrt(NCPUS); int SQint = floor(SQ); if((floor(SQ)-SQ)!=0){ if(MY_PE==0){ cerr << "\nThe number of processors needs to be a perfect sqare (e.g 16)"; } MPI_Finalize(); exit(4); } // For this assignment, we are assuming that the size of the matrix is // mappable to the CPUs. if((Dimension % SQint)!=0){ if(MY_PE == 0){ cerr << "\n The Dimensions must be divisible by "<= (SQint*(SQint-1))){ int localDim = (MY_PE-(SQint*(SQint-1)))*BlockSize; for(int i=1;i<=BlockSize;i++){ SurfaceMatrix[BlockSize+1][i] = values[localDim+i-1]; SurfaceMatrix_t[BlockSize+1][i] = values[localDim+i-1]; } } //PrintSurfaceMatrix(SurfaceMatrix,BlockSize,Dimension); //MPI_Finalize(); //exit(1); //Iterate double TimeStart = MPI_Wtime(); MPI_Barrier(MPI_COMM_WORLD); // We will need a buffer to pass the rows double *sRbuffer = (double*)malloc(sizeof(double)*BlockSize); double *sLbuffer = (double*)malloc(sizeof(double)*BlockSize); // double *sTbuffer = (double*)malloc(sizeof(double)*BlockSize); double *rRbuffer = (double*)malloc(sizeof(double)*BlockSize); double *rLbuffer = (double*)malloc(sizeof(double)*BlockSize); MPI_Request reqs1,reqs2,reqs3,reqs4; MPI_Request reqr1,reqr2,reqr3,reqr4; MPI_Status status; //MPI_Finalize(); //exit(1); for(int iCount = 1; iCount <=Iterations; iCount++){ if(MY_PE % SQint != (SQint-1)) //Buffer up right edges for(int i=0;i= SQint){ //Recieve from Above MPI_Irecv(&SurfaceMatrix[0][1],BlockSize,MPI_DOUBLE,MY_PE-SQint, 0,MPI_COMM_WORLD,&reqr4); } //Send Up if(MY_PE >= SQint){ MPI_Isend(&SurfaceMatrix[1][1],BlockSize,MPI_DOUBLE,MY_PE-SQint, 0,MPI_COMM_WORLD,&reqs3); } //Send right if(MY_PE % SQint != (SQint-1)) { MPI_Isend(sRbuffer,BlockSize,MPI_DOUBLE,MY_PE+1,0, MPI_COMM_WORLD,&reqs1); } //Send left if(MY_PE % SQint != 0) { // Right Edge // fill buffer MPI_Isend(sLbuffer,BlockSize,MPI_DOUBLE,MY_PE-1,0, MPI_COMM_WORLD,&reqs2); } //Send Down if(MY_PE < (SQint*(SQint-1))){ //Bottom Edge MPI_Isend(&SurfaceMatrix[BlockSize][1],BlockSize,MPI_DOUBLE,MY_PE+SQint, 0,MPI_COMM_WORLD,&reqs4); } //Posts the waits for the right and left so that we can process them. if(MY_PE % SQint != (SQint-1)){ // Right Edge MPI_Wait(&reqs1,&status); MPI_Wait(&reqr2,&status); } if(MY_PE % SQint != 0){ //Left Edge MPI_Wait(&reqs2,&status); MPI_Wait(&reqr1,&status); } //The rights and lefts are here, Unpack em if(MY_PE % SQint !=0) //Left Edge for(int i=0; i<=BlockSize;i++) SurfaceMatrix[i+1][0] = rRbuffer[i]; if(MY_PE % SQint != (SQint-1)) for(int i=0;i= SQint){ MPI_Wait(&reqs3,&status); MPI_Wait(&reqr4,&status); } //This will be a row dominant program. for(int i=1;i<=BlockSize;i++) for(int j=1;j<=BlockSize;j++) SurfaceMatrix_t[i][j] = (0.25)*(SurfaceMatrix[i-1][j] + SurfaceMatrix[i][j+1] + SurfaceMatrix[i+1][j] + SurfaceMatrix[i][j-1]); // Swap pointers double ** tmp; tmp = SurfaceMatrix; SurfaceMatrix = SurfaceMatrix_t; SurfaceMatrix_t = tmp; } // These are no longer needed free(sRbuffer); free(sLbuffer); free(rRbuffer); free(rLbuffer); double TimeEnd = MPI_Wtime(); if(MY_PE == 0) cout <<"\n Time Iterations = "<