#include #include #include #include #include #include #include #include #include #include "mpi.h" #undef SEEK_SET #undef SEEK_CUR #undef SEEK_END const double ARmass = 39.94800000; //A.U.s const double ARsigma = 3.40500000; // Angstroms const double AReps = 119.800000; // Kelvins const double CellDim = 12.0000000; // Angstroms int NPartPerCell = 10; using namespace std; // Class for keeping track of the properties for a particle class Particle{ public: double x; // position in x axis double y; // position in y axis double fx; // total forces on x axis double fy; // total forces on y axis double ax; // acceleration on x axis double ay; // acceleration on y axis double vx; // velocity on x axis double vy; // velocity on y axis // Default constructor Particle() : x(0.0),y(0.0),fx(0.0),fy(0.0),ax(0.0), ay(0.0),vx(0.0),vy(0.0){ } double update(){ // We are using a 1.0 fs timestep, this is converted double DT = 0.000911633; ax = fx/ARmass; ay = fy/ARmass; vx += ax*0.5*DT; vy += ay*0.5*DT; x += DT*vx; y += DT*vy; fx = 0.0; fy = 0.0; return 0.5*ARmass*(vx*vx+vy*vy); } }; class Cell { public: vector particles; // Buffer to hold coordinates recieved double **remote_particles; // Buffer to hold coordinates to send double *PartCoordsBuff; int absx,absy; MPI_Request reqr[8],reqs[8]; int nreqr, nreqs; Cell(int x, int y, int ax, int ay, int nParticles) : particles(nParticles){ absx = ax; absy = ay; // We will be filling the cells, making sure than // No atom is less than 2 Angstroms from another double rcelldim = double(CellDim); double centerx = rcelldim*double(ax) + 0.5*rcelldim; double centery = rcelldim*double(ay) + 0.5*rcelldim; // Randomly initialize particles to be some distance // from the center of the square // place first atom in cell particles[0].x = centerx + ((drand48()-0.5)*(CellDim-2)); particles[0].y = centery + ((drand48()-0.5)*(CellDim-2)); double r,rx,ry; for (int i = 1; i < particles.size(); i++) { r = 0; while(r < 2.7) { // square of 2 r = 2.70001; rx = centerx + ((drand48()-0.5)*(CellDim-2)); ry = centery + ((drand48()-0.5)*(CellDim-2)); //loop over all other current atoms for(int j = 0; j= NParts) { return max; } } } } return max; } int main(int argc,char* argv[]){ MPI_Init(&argc,&argv); int MY_PE,NCPUS; MPI_Comm_rank(MPI_COMM_WORLD,&MY_PE); MPI_Comm_size(MPI_COMM_WORLD,&NCPUS); // Get Command Line Arguments if(argc!=3){ if(MY_PE == 0) cerr << "\nIncorrect syntax: should be two arguments\n"; MPI_Finalize(); exit(2); } // The Number of CPUs must be a Perfect Square int SQncpus = int(sqrt(double(NCPUS))); if(NCPUS % SQncpus != 0) { if(MY_PE == 0) cerr << "\nThe Number of cores must be a perfect square (e.g. 4,16,64, etc...)\n"; MPI_Finalize(); exit(3); } int NumParticles = atoi(argv[1]); int NumIterations= atoi(argv[2]); int Dimension = int(sqrt(NumParticles/double(NPartPerCell))); int TotalCells = Dimension*Dimension; int NCellsPerCPU = 0; int MaxCellSize; // Variables for the local size of data int NLocRows,NLocRowsShort,NLocRowsPE; int NLocCols,NLocColsShort,NLocColsPE; //Figuring out the dimensions of the local data is not trivial if(Dimension % SQncpus) { NLocRows = Dimension/SQncpus; NLocCols = Dimension/SQncpus; NLocColsShort = Dimension-((SQncpus-1)*NLocCols); NLocRowsShort = Dimension-((SQncpus-1)*NLocRows); if((MY_PE % SQncpus) == (SQncpus-1)) NLocColsPE = NLocColsShort; else NLocColsPE = NLocCols; if((MY_PE >= (SQncpus*(SQncpus-1)))) NLocRowsPE = NLocRowsShort; else NLocRowsPE = NLocRows; } else{ NLocRows = Dimension/SQncpus; NLocCols = Dimension/SQncpus; NLocColsShort= 0; NLocRowsShort= 0; NLocRowsPE = NLocRows; NLocColsPE = NLocCols; } // Create a map for the which processor each cell is assigned // This map is replicated, but small // First Assign the processors int ***CellCpus = (int***)malloc(sizeof(int**)*Dimension); for(int i=0;i Dimension) || (i==(SQncpus-1))) ixend = NLocRowsShort; for(int j=0;j Dimension) || (j==(SQncpus-1))) iyend = NLocColsShort; int ixcount =0; int iycount =0; for(int ix=0;ix > cells(NLocRowsPE); for (int i = 0; i < NLocRowsPE; i++) { for (int j = 0; j < NLocColsPE; j++) { int ax = ((MY_PE/SQncpus)*NLocRows)+i; int ay = ((MY_PE%SQncpus)*NLocCols)+j; cells[i].push_back(Cell(i,j,ax,ay,CellCpus[ax][ay][3])); } } double TimeStart = MPI_Wtime(); for (int t = 0; t < NumIterations; t++) { // For each timestep double TotPot1=0.0; double TotPot2=0.0; double TotKin=0.0; // Start the communications going // We can communicate whilst all of the cells are computing the // interactions with themselves. for (int cx1 = 0; cx1 < NLocRowsPE; cx1++) { for (int cy1 = 0; cy1 < NLocColsPE; cy1++) { cells[cx1][cy1].Communicate(CellCpus,Dimension); } } // Computing the interactions of the cells with the particles inside. for (int cx1 = 0; cx1 < NLocRowsPE; cx1++) { for (int cy1 = 0; cy1 < NLocColsPE; cy1++) { // Consider interactions between particles within the cell for (int i = 0; i < cells[cx1][cy1].particles.size(); i++) { for (int j = 0; j < cells[cx1][cy1].particles.size(); j++) { if(i!=j) TotPot1 += interact(cells[cx1][cy1].particles[i], cells[cx1][cy1].particles[j]); } } } } // We have to wait here for the comm to finish for(int cx1=0;cx1 < NLocRowsPE;cx1++) for (int cy1 = 0; cy1 < NLocColsPE; cy1++) cells[cx1][cy1].PostWaits(); // Consider each other cell... for(int cx1=0;cx1 < NLocRowsPE;cx1++){ for(int cy1=0;cy1 < NLocColsPE;cy1++){ for (int cx2 = 0; cx2 < cells[cx1][cy1].nreqr; cx2++) { // Consider all interactions between particles int nparts=cells[cx1][cy1].remote_particles[cx2][0]; for (int i = 0; i < cells[cx1][cy1].particles.size(); i++) { for (int j = 0; j < nparts; j++) { TotPot2 += interact(cells[cx1][cy1].particles[i], cells[cx1][cy1].remote_particles[cx2][j*2+1], cells[cx1][cy1].remote_particles[cx2][j*2+2]); } } } } } // End iteration over cells // Apply the accumulated forces; update accelerations, velocities, positions for (int cx1 = 0; cx1 < NLocRowsPE; cx1++) { for (int cy1 = 0; cy1 < NLocColsPE; cy1++) { for(int i=0; i < cells[cx1][cy1].particles.size(); i++) { TotKin += cells[cx1][cy1].particles[i].update(); } } } double TotPotSum = 0.0; double TotKinSum = 0.0; double LSum = TotPot1+TotPot2; MPI_Reduce(&LSum,&TotPotSum,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&TotKin,&TotKinSum,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); if(MY_PE == 0) printf("\nIteration#%d with Total Energy %e per Atom", t,(TotKinSum+TotPotSum)/NumParticles); } // iterate time steps double TimeEnd = MPI_Wtime(); if(MY_PE == 0) cout << "\nTime for " << NumIterations << " is "<< TimeEnd-TimeStart; for(int i=0;i