Tried parallelizing the code, but the compiler can already do that automatically
This commit is contained in:
parent
1b0a9ab380
commit
66dc92b5f4
4 changed files with 605 additions and 522 deletions
|
|
@ -1,7 +1,10 @@
|
|||
BasedOnStyle: GNU
|
||||
BasedOnStyle: LLVM
|
||||
IndentWidth: 4
|
||||
TabWidth: 4
|
||||
UseTab: Always
|
||||
BreakBeforeBraces: Attach
|
||||
ColumnLimit: 100
|
||||
AlignReturnType: Always
|
||||
PenaltyReturnTypeOnItsOwnLine: 1000000
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
SeparateDefinitionBlocks: Always
|
||||
|
|
|
|||
2
Makefile
2
Makefile
|
|
@ -6,7 +6,7 @@ ts := $(shell /usr/bin/date "+%d-%m__%H_%M_%S")
|
|||
.DEFAULT_GOAL = MD
|
||||
|
||||
MD: $(SRC)/MD.cpp
|
||||
$(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -O2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD
|
||||
$(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -mtune=native -mavx -O2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD
|
||||
|
||||
MDorig: $(SRC)/MD-original.cpp
|
||||
$(CC) $(CFLAGS) $(SRC)MD-original.cpp -lm -O2 -pg -o ./out/MD-original
|
||||
|
|
|
|||
BIN
out/MD
BIN
out/MD
Binary file not shown.
118
src/MD.cpp
118
src/MD.cpp
|
|
@ -25,8 +25,9 @@
|
|||
|
||||
*/
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include <math.h>
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
|
@ -246,8 +247,7 @@ int main() {
|
|||
// Files that we can write different quantities to
|
||||
tfp = fopen(tfn, "w"); // The MD trajectory, coordinates of every
|
||||
// particle at each timestep
|
||||
ofp = fopen(
|
||||
ofn,
|
||||
ofp = fopen(ofn,
|
||||
"w"); // Output of other quantities (T, P, gc, etc) at every timestep
|
||||
afp = fopen(afn, "w"); // Average T, P, gc, etc from the simulation
|
||||
|
||||
|
|
@ -287,8 +287,7 @@ int main() {
|
|||
Tavg = 0;
|
||||
|
||||
int tenp = floor(NumTime / 10);
|
||||
fprintf(ofp,
|
||||
" time (s) T(t) (K) P(t) (Pa) "
|
||||
fprintf(ofp, " time (s) T(t) (K) P(t) (Pa) "
|
||||
"Kinetic En. (n.u.) Potential En. (n.u.) Total En. (n.u.)\n");
|
||||
printf(" PERCENTAGE OF CALCULATION COMPLETE:\n [");
|
||||
for (i = 0; i < NumTime + 1; i++) {
|
||||
|
|
@ -346,8 +345,8 @@ int main() {
|
|||
Tavg += Temp;
|
||||
Pavg += Press;
|
||||
|
||||
fprintf(ofp, " %8.4e %20.12f %20.12f %20.12f %20.12f %20.12f \n",
|
||||
i * dt * timefac, Temp, Press, KE, PE, KE + PE);
|
||||
fprintf(ofp, " %8.4e %20.12f %20.12f %20.12f %20.12f %20.12f \n", i * dt * timefac,
|
||||
Temp, Press, KE, PE, KE + PE);
|
||||
}
|
||||
|
||||
// Because we have calculated the instantaneous temperature and pressure,
|
||||
|
|
@ -358,8 +357,7 @@ int main() {
|
|||
gc = NA * Pavg * (Vol * VolFac) / (N * Tavg);
|
||||
fprintf(afp, " Total Time (s) T (K) P (Pa) PV/nT "
|
||||
"(J/(mol K)) Z V (m^3) N\n");
|
||||
fprintf(afp,
|
||||
" -------------- ----------- --------------- "
|
||||
fprintf(afp, " -------------- ----------- --------------- "
|
||||
"-------------- --------------- ------------ -----------\n");
|
||||
fprintf(afp,
|
||||
" %8.4e %15.5f %15.5f %10.5f %10.5f %10.5e "
|
||||
|
|
@ -382,8 +380,7 @@ int main() {
|
|||
printf("\n PERCENT ERROR of pV/nT AND GAS CONSTANT: %15.5f\n",
|
||||
100 * fabs(gc - 8.3144598) / 8.3144598);
|
||||
printf("\n THE COMPRESSIBILITY (unitless): %15.5f \n", Z);
|
||||
printf("\n TOTAL VOLUME (m^3): %10.5e \n",
|
||||
Vol * VolFac);
|
||||
printf("\n TOTAL VOLUME (m^3): %10.5e \n", Vol * VolFac);
|
||||
printf("\n NUMBER OF PARTICLES (unitless): %i \n", N);
|
||||
|
||||
fclose(tfp);
|
||||
|
|
@ -483,6 +480,7 @@ void transposeMatrix(double mat[MAXPART][3], double matT[3][MAXPART]) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void transposeMatrix2(double matT[MAXPART][3], double mat[3][MAXPART]) {
|
||||
for (int i = 0; i < MAXPART; i++) {
|
||||
for (int j = 0; j < 3; j++) {
|
||||
|
|
@ -491,6 +489,91 @@ void transposeMatrix2(double matT[MAXPART][3], double mat[3][MAXPART]) {
|
|||
}
|
||||
}
|
||||
|
||||
double PotentialAndAccelerationSIMD(double dt) {
|
||||
memset(a, 0, sizeof(a));
|
||||
double Pot = 0.;
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
|
||||
__m256d riMinusRj[4];
|
||||
memset(riMinusRj, 0, sizeof(riMinusRj));
|
||||
double dist[4];
|
||||
memset(dist, 0, sizeof(dist));
|
||||
|
||||
for (int j = i + 1; j < N; j += 4) {
|
||||
for (int k = 0; k < 4; k++) {
|
||||
double quot, rnorm, term1, term2;
|
||||
// CALCULATE POTENTIAL ENERGY
|
||||
// double dist = 0.;
|
||||
// double posItoJ[3]; // position of i relative to j
|
||||
|
||||
// POT
|
||||
// double distTmp = r[i][k] - r[j][k];
|
||||
// dist += distTmp * distTmp;
|
||||
// ACCEL
|
||||
// posItoJ[k] = distTmp;
|
||||
|
||||
// r[i]
|
||||
__m256d ri = _mm256_loadu_pd(r[i]);
|
||||
// r[j]
|
||||
__m256d rj = _mm256_loadu_pd(r[j + k]);
|
||||
// r[i] - r[j]
|
||||
__m256d rij = _mm256_sub_pd(ri, rj);
|
||||
riMinusRj[k] = rij;
|
||||
// riMinusRj * riMinusRj
|
||||
__m256d distTmp = _mm256_mul_pd(rij, rij);
|
||||
|
||||
dist[k] = distTmp[0] + distTmp[1] + distTmp[2];
|
||||
}
|
||||
__m256d dists = _mm256_loadu_pd(dist);
|
||||
__m256d quot = _mm256_div_pd(_mm256_set1_pd(sigma * sigma), dists);
|
||||
__m256d term2 = _mm256_mul_pd(quot, _mm256_mul_pd(quot, quot));
|
||||
__m256d Pots =
|
||||
_mm256_mul_pd(_mm256_set1_pd(epsilon_8),
|
||||
_mm256_mul_pd(term2, _mm256_sub_pd(term2, _mm256_set1_pd(1.))));
|
||||
Pot += Pots[0] + Pots[1] + Pots[2] + Pots[3];
|
||||
|
||||
// quot = sigma * sigma / dist;
|
||||
// term2 = quot * quot * quot;
|
||||
// Pot += epsilon_8 * term2 * (term2 - 1.);
|
||||
__m256d distSqd = _mm256_mul_pd(dists, _mm256_mul_pd(dists, dists));
|
||||
__m256d rSqd_inv7 = _mm256_mul_pd(distSqd, _mm256_mul_pd(distSqd, dists));
|
||||
__m256d f8 = _mm256_div_pd(
|
||||
_mm256_sub_pd(_mm256_set1_pd(48.), _mm256_mul_pd(_mm256_set1_pd(24.), distSqd)),
|
||||
rSqd_inv7);
|
||||
// // From derivative of Lennard-Jones with sigma and epsilon
|
||||
// // set equal to 1 in natural units!
|
||||
// double distSqd = dist * dist * dist;
|
||||
// double rSqd_inv7 = distSqd * distSqd * dist;
|
||||
// double f = (48. - (24. * distSqd)) / rSqd_inv7;
|
||||
|
||||
// Go back to the original loop
|
||||
for (int k = 0; k < 4; k++) {
|
||||
__m256d tmp = _mm256_mul_pd(riMinusRj[k], _mm256_set1_pd(f8[k]));
|
||||
__m256d aI = _mm256_loadu_pd(a[i]);
|
||||
__m256d aJ = _mm256_loadu_pd(a[i]);
|
||||
_mm256_storeu_pd(a[i], _mm256_add_pd(aI, tmp));
|
||||
_mm256_storeu_pd(a[j], _mm256_sub_pd(aJ, tmp));
|
||||
|
||||
// // from F = ma, where m = 1 in natural units!
|
||||
// for (int k = 0; k < 3; k++) {
|
||||
// double tmp = posItoJ[k] * f;
|
||||
// a[i][k] += tmp;
|
||||
// a[j][k] -= tmp;
|
||||
// }
|
||||
//
|
||||
// }
|
||||
// for (int j = 0; j < 3; j++) {
|
||||
// v[i][j] += 0.5 * a[i][j] * dt;
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
//}
|
||||
|
||||
return Pot;
|
||||
}
|
||||
|
||||
double PotentialAndAcceleration(double dt) {
|
||||
memset(a, 0, sizeof(a));
|
||||
double Pot = 0.;
|
||||
|
|
@ -525,9 +608,6 @@ double PotentialAndAcceleration(double dt) {
|
|||
term2 = quot * quot * quot;
|
||||
Pot += epsilon_8 * term2 * (term2 - 1.);
|
||||
}
|
||||
for (int j = 0; j < 3; j++) {
|
||||
v[i][j] += 0.5 * a[i][j] * dt;
|
||||
}
|
||||
}
|
||||
return Pot;
|
||||
}
|
||||
|
|
@ -620,11 +700,11 @@ double VelocityVerlet(double dt, int iter, double *PE, FILE *fp) {
|
|||
// computeAccelerations ();
|
||||
*PE = PotentialAndAcceleration(dt);
|
||||
// Update velocity with updated acceleration
|
||||
// for (i = 0; i < N; i++) {
|
||||
// for (j = 0; j < 3; j++) {
|
||||
// v[i][j] += 0.5 * a[i][j] * dt;
|
||||
// }
|
||||
//}
|
||||
for (i = 0; i < N; i++) {
|
||||
for (j = 0; j < 3; j++) {
|
||||
v[i][j] += 0.5 * a[i][j] * dt;
|
||||
}
|
||||
}
|
||||
|
||||
// Elastic walls
|
||||
for (i = 0; i < N; i++) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue