Tried parallelizing the code, but the compiler can already do that automatically

This commit is contained in:
Afonso Franco 2023-10-18 18:30:54 +01:00
parent 1b0a9ab380
commit 66dc92b5f4
Signed by: afonso
SSH key fingerprint: SHA256:JiuxZNdA5bRWXPMUJChI0AQ75yC+cXY4xM0IaVwEVys
4 changed files with 605 additions and 522 deletions

View file

@ -1,7 +1,10 @@
BasedOnStyle: GNU BasedOnStyle: LLVM
IndentWidth: 4 IndentWidth: 4
TabWidth: 4 TabWidth: 4
UseTab: Always UseTab: Always
BreakBeforeBraces: Attach BreakBeforeBraces: Attach
ColumnLimit: 100 ColumnLimit: 100
AlignReturnType: Always PenaltyReturnTypeOnItsOwnLine: 1000000
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterDefinitionReturnType: None
SeparateDefinitionBlocks: Always

View file

@ -6,7 +6,7 @@ ts := $(shell /usr/bin/date "+%d-%m__%H_%M_%S")
.DEFAULT_GOAL = MD .DEFAULT_GOAL = MD
MD: $(SRC)/MD.cpp MD: $(SRC)/MD.cpp
$(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -O2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD $(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -mtune=native -mavx -O2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD
MDorig: $(SRC)/MD-original.cpp MDorig: $(SRC)/MD-original.cpp
$(CC) $(CFLAGS) $(SRC)MD-original.cpp -lm -O2 -pg -o ./out/MD-original $(CC) $(CFLAGS) $(SRC)MD-original.cpp -lm -O2 -pg -o ./out/MD-original

BIN
out/MD

Binary file not shown.

View file

@ -25,8 +25,9 @@
*/ */
#include <emmintrin.h> #include <immintrin.h>
#include <math.h> #include <math.h>
#include <omp.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -246,8 +247,7 @@ int main() {
// Files that we can write different quantities to // Files that we can write different quantities to
tfp = fopen(tfn, "w"); // The MD trajectory, coordinates of every tfp = fopen(tfn, "w"); // The MD trajectory, coordinates of every
// particle at each timestep // particle at each timestep
ofp = fopen( ofp = fopen(ofn,
ofn,
"w"); // Output of other quantities (T, P, gc, etc) at every timestep "w"); // Output of other quantities (T, P, gc, etc) at every timestep
afp = fopen(afn, "w"); // Average T, P, gc, etc from the simulation afp = fopen(afn, "w"); // Average T, P, gc, etc from the simulation
@ -287,8 +287,7 @@ int main() {
Tavg = 0; Tavg = 0;
int tenp = floor(NumTime / 10); int tenp = floor(NumTime / 10);
fprintf(ofp, fprintf(ofp, " time (s) T(t) (K) P(t) (Pa) "
" time (s) T(t) (K) P(t) (Pa) "
"Kinetic En. (n.u.) Potential En. (n.u.) Total En. (n.u.)\n"); "Kinetic En. (n.u.) Potential En. (n.u.) Total En. (n.u.)\n");
printf(" PERCENTAGE OF CALCULATION COMPLETE:\n ["); printf(" PERCENTAGE OF CALCULATION COMPLETE:\n [");
for (i = 0; i < NumTime + 1; i++) { for (i = 0; i < NumTime + 1; i++) {
@ -346,8 +345,8 @@ int main() {
Tavg += Temp; Tavg += Temp;
Pavg += Press; Pavg += Press;
fprintf(ofp, " %8.4e %20.12f %20.12f %20.12f %20.12f %20.12f \n", fprintf(ofp, " %8.4e %20.12f %20.12f %20.12f %20.12f %20.12f \n", i * dt * timefac,
i * dt * timefac, Temp, Press, KE, PE, KE + PE); Temp, Press, KE, PE, KE + PE);
} }
// Because we have calculated the instantaneous temperature and pressure, // Because we have calculated the instantaneous temperature and pressure,
@ -358,8 +357,7 @@ int main() {
gc = NA * Pavg * (Vol * VolFac) / (N * Tavg); gc = NA * Pavg * (Vol * VolFac) / (N * Tavg);
fprintf(afp, " Total Time (s) T (K) P (Pa) PV/nT " fprintf(afp, " Total Time (s) T (K) P (Pa) PV/nT "
"(J/(mol K)) Z V (m^3) N\n"); "(J/(mol K)) Z V (m^3) N\n");
fprintf(afp, fprintf(afp, " -------------- ----------- --------------- "
" -------------- ----------- --------------- "
"-------------- --------------- ------------ -----------\n"); "-------------- --------------- ------------ -----------\n");
fprintf(afp, fprintf(afp,
" %8.4e %15.5f %15.5f %10.5f %10.5f %10.5e " " %8.4e %15.5f %15.5f %10.5f %10.5f %10.5e "
@ -382,8 +380,7 @@ int main() {
printf("\n PERCENT ERROR of pV/nT AND GAS CONSTANT: %15.5f\n", printf("\n PERCENT ERROR of pV/nT AND GAS CONSTANT: %15.5f\n",
100 * fabs(gc - 8.3144598) / 8.3144598); 100 * fabs(gc - 8.3144598) / 8.3144598);
printf("\n THE COMPRESSIBILITY (unitless): %15.5f \n", Z); printf("\n THE COMPRESSIBILITY (unitless): %15.5f \n", Z);
printf("\n TOTAL VOLUME (m^3): %10.5e \n", printf("\n TOTAL VOLUME (m^3): %10.5e \n", Vol * VolFac);
Vol * VolFac);
printf("\n NUMBER OF PARTICLES (unitless): %i \n", N); printf("\n NUMBER OF PARTICLES (unitless): %i \n", N);
fclose(tfp); fclose(tfp);
@ -483,6 +480,7 @@ void transposeMatrix(double mat[MAXPART][3], double matT[3][MAXPART]) {
} }
} }
} }
void transposeMatrix2(double matT[MAXPART][3], double mat[3][MAXPART]) { void transposeMatrix2(double matT[MAXPART][3], double mat[3][MAXPART]) {
for (int i = 0; i < MAXPART; i++) { for (int i = 0; i < MAXPART; i++) {
for (int j = 0; j < 3; j++) { for (int j = 0; j < 3; j++) {
@ -491,6 +489,91 @@ void transposeMatrix2(double matT[MAXPART][3], double mat[3][MAXPART]) {
} }
} }
double PotentialAndAccelerationSIMD(double dt) {
memset(a, 0, sizeof(a));
double Pot = 0.;
for (int i = 0; i < N; i++) {
__m256d riMinusRj[4];
memset(riMinusRj, 0, sizeof(riMinusRj));
double dist[4];
memset(dist, 0, sizeof(dist));
for (int j = i + 1; j < N; j += 4) {
for (int k = 0; k < 4; k++) {
double quot, rnorm, term1, term2;
// CALCULATE POTENTIAL ENERGY
// double dist = 0.;
// double posItoJ[3]; // position of i relative to j
// POT
// double distTmp = r[i][k] - r[j][k];
// dist += distTmp * distTmp;
// ACCEL
// posItoJ[k] = distTmp;
// r[i]
__m256d ri = _mm256_loadu_pd(r[i]);
// r[j]
__m256d rj = _mm256_loadu_pd(r[j + k]);
// r[i] - r[j]
__m256d rij = _mm256_sub_pd(ri, rj);
riMinusRj[k] = rij;
// riMinusRj * riMinusRj
__m256d distTmp = _mm256_mul_pd(rij, rij);
dist[k] = distTmp[0] + distTmp[1] + distTmp[2];
}
__m256d dists = _mm256_loadu_pd(dist);
__m256d quot = _mm256_div_pd(_mm256_set1_pd(sigma * sigma), dists);
__m256d term2 = _mm256_mul_pd(quot, _mm256_mul_pd(quot, quot));
__m256d Pots =
_mm256_mul_pd(_mm256_set1_pd(epsilon_8),
_mm256_mul_pd(term2, _mm256_sub_pd(term2, _mm256_set1_pd(1.))));
Pot += Pots[0] + Pots[1] + Pots[2] + Pots[3];
// quot = sigma * sigma / dist;
// term2 = quot * quot * quot;
// Pot += epsilon_8 * term2 * (term2 - 1.);
__m256d distSqd = _mm256_mul_pd(dists, _mm256_mul_pd(dists, dists));
__m256d rSqd_inv7 = _mm256_mul_pd(distSqd, _mm256_mul_pd(distSqd, dists));
__m256d f8 = _mm256_div_pd(
_mm256_sub_pd(_mm256_set1_pd(48.), _mm256_mul_pd(_mm256_set1_pd(24.), distSqd)),
rSqd_inv7);
// // From derivative of Lennard-Jones with sigma and epsilon
// // set equal to 1 in natural units!
// double distSqd = dist * dist * dist;
// double rSqd_inv7 = distSqd * distSqd * dist;
// double f = (48. - (24. * distSqd)) / rSqd_inv7;
// Go back to the original loop
for (int k = 0; k < 4; k++) {
__m256d tmp = _mm256_mul_pd(riMinusRj[k], _mm256_set1_pd(f8[k]));
__m256d aI = _mm256_loadu_pd(a[i]);
__m256d aJ = _mm256_loadu_pd(a[i]);
_mm256_storeu_pd(a[i], _mm256_add_pd(aI, tmp));
_mm256_storeu_pd(a[j], _mm256_sub_pd(aJ, tmp));
// // from F = ma, where m = 1 in natural units!
// for (int k = 0; k < 3; k++) {
// double tmp = posItoJ[k] * f;
// a[i][k] += tmp;
// a[j][k] -= tmp;
// }
//
// }
// for (int j = 0; j < 3; j++) {
// v[i][j] += 0.5 * a[i][j] * dt;
// }
}
}
}
//}
return Pot;
}
double PotentialAndAcceleration(double dt) { double PotentialAndAcceleration(double dt) {
memset(a, 0, sizeof(a)); memset(a, 0, sizeof(a));
double Pot = 0.; double Pot = 0.;
@ -525,9 +608,6 @@ double PotentialAndAcceleration(double dt) {
term2 = quot * quot * quot; term2 = quot * quot * quot;
Pot += epsilon_8 * term2 * (term2 - 1.); Pot += epsilon_8 * term2 * (term2 - 1.);
} }
for (int j = 0; j < 3; j++) {
v[i][j] += 0.5 * a[i][j] * dt;
}
} }
return Pot; return Pot;
} }
@ -620,11 +700,11 @@ double VelocityVerlet(double dt, int iter, double *PE, FILE *fp) {
// computeAccelerations (); // computeAccelerations ();
*PE = PotentialAndAcceleration(dt); *PE = PotentialAndAcceleration(dt);
// Update velocity with updated acceleration // Update velocity with updated acceleration
// for (i = 0; i < N; i++) { for (i = 0; i < N; i++) {
// for (j = 0; j < 3; j++) { for (j = 0; j < 3; j++) {
// v[i][j] += 0.5 * a[i][j] * dt; v[i][j] += 0.5 * a[i][j] * dt;
// } }
//} }
// Elastic walls // Elastic walls
for (i = 0; i < N; i++) { for (i = 0; i < N; i++) {