revert some things, my mental health is at an all time low
This commit is contained in:
parent
66dc92b5f4
commit
fbfb16c650
5 changed files with 108 additions and 93 deletions
5
Makefile
5
Makefile
|
@ -6,7 +6,10 @@ ts := $(shell /usr/bin/date "+%d-%m__%H_%M_%S")
|
|||
.DEFAULT_GOAL = MD
|
||||
|
||||
MD: $(SRC)/MD.cpp
|
||||
$(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -mtune=native -mavx -O2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD
|
||||
$(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -mavx -O2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD
|
||||
|
||||
MD-errors: $(SRC)/MD.cpp
|
||||
$(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -mtune=native -mavx -O2 -fopt-info-vec-all -ftree-vectorizer-verbose=2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD
|
||||
|
||||
MDorig: $(SRC)/MD-original.cpp
|
||||
$(CC) $(CFLAGS) $(SRC)MD-original.cpp -lm -O2 -pg -o ./out/MD-original
|
||||
|
|
BIN
out/MD
BIN
out/MD
Binary file not shown.
BIN
out/MD-orig
Executable file
BIN
out/MD-orig
Executable file
Binary file not shown.
|
@ -540,7 +540,7 @@ void computeAccelerations() {
|
|||
}
|
||||
|
||||
// returns sum of dv/dt*m/A (aka Pressure) from elastic collisions with walls
|
||||
double VelocityVerlet(double dt, int iter, double *PE, FILE *fp) {
|
||||
double VelocityVerlet(double dt, int iter, FILE *fp) {
|
||||
int i, j, k;
|
||||
|
||||
double psum = 0.;
|
||||
|
|
194
src/MD.cpp
194
src/MD.cpp
|
@ -32,6 +32,25 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// Readability
|
||||
|
||||
typedef __m256d fourD;
|
||||
typedef __m128d twoD;
|
||||
|
||||
#define load_4d(ptr) _mm256_load_pd(ptr)
|
||||
#define store_4d(ptr, val) _mm256_store_pd(ptr, val)
|
||||
#define store_lower_1d(ptr, val) _mm_store_pd(ptr, val)
|
||||
#define add_4d(a, b) _mm256_add_pd(a, b)
|
||||
#define add_2d(a, b) _mm_add_pd(a, b)
|
||||
#define hadd_4d(a, b) _mm256_hadd_pd(a, b)
|
||||
#define sub_4d(a, b) _mm256_sub_pd(a, b)
|
||||
#define mul_4d(a, b) _mm256_mul_pd(a, b)
|
||||
#define div_4d(a, b) _mm256_div_pd(a, b)
|
||||
#define create_4d(a) _mm256_set1_pd(a)
|
||||
#define get_lower_2d(a) _mm256_castpd256_pd128(a)
|
||||
#define get_upper_2d(a) _mm256_extractf128_pd(a, 1)
|
||||
|
||||
#define ALIGNMENT 32
|
||||
// Number of particles
|
||||
int N;
|
||||
|
||||
|
@ -55,11 +74,8 @@ double Tinit; // 2;
|
|||
const int MAXPART = 5001;
|
||||
// Position
|
||||
double r[MAXPART][3];
|
||||
// Velocity
|
||||
double v[MAXPART][3];
|
||||
// Acceleration
|
||||
double a[MAXPART][3];
|
||||
// Force
|
||||
double F[MAXPART][3];
|
||||
|
||||
// atom type
|
||||
|
@ -394,6 +410,7 @@ void initialize() {
|
|||
int n, p, i, j, k;
|
||||
double pos;
|
||||
|
||||
|
||||
// Number of atoms in each direction
|
||||
n = int(ceil(pow(N, 1.0 / 3)));
|
||||
|
||||
|
@ -489,96 +506,91 @@ void transposeMatrix2(double matT[MAXPART][3], double mat[3][MAXPART]) {
|
|||
}
|
||||
}
|
||||
|
||||
double PotentialAndAccelerationSIMD(double dt) {
|
||||
memset(a, 0, sizeof(a));
|
||||
double Pot = 0.;
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
|
||||
__m256d riMinusRj[4];
|
||||
memset(riMinusRj, 0, sizeof(riMinusRj));
|
||||
double dist[4];
|
||||
memset(dist, 0, sizeof(dist));
|
||||
|
||||
for (int j = i + 1; j < N; j += 4) {
|
||||
for (int k = 0; k < 4; k++) {
|
||||
double quot, rnorm, term1, term2;
|
||||
// CALCULATE POTENTIAL ENERGY
|
||||
// double dist = 0.;
|
||||
// double posItoJ[3]; // position of i relative to j
|
||||
|
||||
// POT
|
||||
// double distTmp = r[i][k] - r[j][k];
|
||||
// dist += distTmp * distTmp;
|
||||
// ACCEL
|
||||
// posItoJ[k] = distTmp;
|
||||
|
||||
// r[i]
|
||||
__m256d ri = _mm256_loadu_pd(r[i]);
|
||||
// r[j]
|
||||
__m256d rj = _mm256_loadu_pd(r[j + k]);
|
||||
// r[i] - r[j]
|
||||
__m256d rij = _mm256_sub_pd(ri, rj);
|
||||
riMinusRj[k] = rij;
|
||||
// riMinusRj * riMinusRj
|
||||
__m256d distTmp = _mm256_mul_pd(rij, rij);
|
||||
|
||||
dist[k] = distTmp[0] + distTmp[1] + distTmp[2];
|
||||
}
|
||||
__m256d dists = _mm256_loadu_pd(dist);
|
||||
__m256d quot = _mm256_div_pd(_mm256_set1_pd(sigma * sigma), dists);
|
||||
__m256d term2 = _mm256_mul_pd(quot, _mm256_mul_pd(quot, quot));
|
||||
__m256d Pots =
|
||||
_mm256_mul_pd(_mm256_set1_pd(epsilon_8),
|
||||
_mm256_mul_pd(term2, _mm256_sub_pd(term2, _mm256_set1_pd(1.))));
|
||||
Pot += Pots[0] + Pots[1] + Pots[2] + Pots[3];
|
||||
|
||||
// quot = sigma * sigma / dist;
|
||||
// term2 = quot * quot * quot;
|
||||
// Pot += epsilon_8 * term2 * (term2 - 1.);
|
||||
__m256d distSqd = _mm256_mul_pd(dists, _mm256_mul_pd(dists, dists));
|
||||
__m256d rSqd_inv7 = _mm256_mul_pd(distSqd, _mm256_mul_pd(distSqd, dists));
|
||||
__m256d f8 = _mm256_div_pd(
|
||||
_mm256_sub_pd(_mm256_set1_pd(48.), _mm256_mul_pd(_mm256_set1_pd(24.), distSqd)),
|
||||
rSqd_inv7);
|
||||
// // From derivative of Lennard-Jones with sigma and epsilon
|
||||
// // set equal to 1 in natural units!
|
||||
// double distSqd = dist * dist * dist;
|
||||
// double rSqd_inv7 = distSqd * distSqd * dist;
|
||||
// double f = (48. - (24. * distSqd)) / rSqd_inv7;
|
||||
|
||||
// Go back to the original loop
|
||||
for (int k = 0; k < 4; k++) {
|
||||
__m256d tmp = _mm256_mul_pd(riMinusRj[k], _mm256_set1_pd(f8[k]));
|
||||
__m256d aI = _mm256_loadu_pd(a[i]);
|
||||
__m256d aJ = _mm256_loadu_pd(a[i]);
|
||||
_mm256_storeu_pd(a[i], _mm256_add_pd(aI, tmp));
|
||||
_mm256_storeu_pd(a[j], _mm256_sub_pd(aJ, tmp));
|
||||
|
||||
// // from F = ma, where m = 1 in natural units!
|
||||
// for (int k = 0; k < 3; k++) {
|
||||
// double tmp = posItoJ[k] * f;
|
||||
// a[i][k] += tmp;
|
||||
// a[j][k] -= tmp;
|
||||
// }
|
||||
//
|
||||
// }
|
||||
// for (int j = 0; j < 3; j++) {
|
||||
// v[i][j] += 0.5 * a[i][j] * dt;
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
//}
|
||||
|
||||
return Pot;
|
||||
}
|
||||
// double PotentialAndAccelerationSIMD(double dt) {
|
||||
// for (int i = 0; i < N; i++) {
|
||||
// for (int j = 0; j < 3; j++)
|
||||
// a[i][j] = 0.;
|
||||
// }
|
||||
// double Pot = 0.;
|
||||
//
|
||||
// for (int i = 0; i < N; i++) {
|
||||
//
|
||||
// fourD rij4[4];
|
||||
// memset(rij4, 0, sizeof(rij4));
|
||||
// double *dist = (double *)_mm_malloc(4 * sizeof(double), ALIGNMENT);
|
||||
// memset(dist, 0, sizeof(double) * 4);
|
||||
// //__m256i mask =
|
||||
// //_mm256_setr_epi64x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0);
|
||||
//
|
||||
// for (int j = i + 1; j < N; j += 4) {
|
||||
// fourD ri = load_4d(r[i]);
|
||||
// int numParts = fmin(4, N - j);
|
||||
// for (int k = 0; k < numParts; k++) {
|
||||
// if (j + k >= N) {
|
||||
// break;
|
||||
// }
|
||||
// double quot, rnorm, term1, term2;
|
||||
//
|
||||
// fourD rj = load_4d(r[j + k]);
|
||||
// fourD rij = sub_4d(ri, rj);
|
||||
// fourD distTmp = mul_4d(rij, rij);
|
||||
// rij4[k] = rij;
|
||||
//
|
||||
// fourD temp = hadd_4d(distTmp, distTmp);
|
||||
// twoD lo = get_lower_2d(temp);
|
||||
// twoD hi = get_upper_2d(temp);
|
||||
// twoD sum = add_2d(lo, hi);
|
||||
//
|
||||
// // Convert the scalar result to a double
|
||||
// double finalSum;
|
||||
// store_lower_1d(&dist[k], sum);
|
||||
// }
|
||||
// fourD dists = load_4d(dist);
|
||||
// fourD quot = div_4d(create_4d(sigma * sigma), dists);
|
||||
// fourD term2 = mul_4d(quot, mul_4d(quot, quot));
|
||||
// fourD Pots = mul_4d(create_4d(epsilon_8), mul_4d(term2, sub_4d(term2, create_4d(1.))));
|
||||
// // Perform horizontal addition and store the result in a scalar
|
||||
// fourD temp = hadd_4d(Pots, Pots);
|
||||
// twoD lo = get_lower_2d(temp);
|
||||
// twoD hi = get_upper_2d(temp);
|
||||
// twoD sum = add_2d(lo, hi);
|
||||
//
|
||||
// // Convert the scalar result to a double
|
||||
// double finalSum;
|
||||
// store_lower_1d(&finalSum, sum);
|
||||
// Pot += finalSum;
|
||||
//
|
||||
// fourD distSqd = mul_4d(dists, mul_4d(dists, dists));
|
||||
// fourD rSqd_inv7 = mul_4d(distSqd, mul_4d(distSqd, dists));
|
||||
// fourD f8 = div_4d(sub_4d(create_4d(48.), mul_4d(create_4d(24.), distSqd)), rSqd_inv7);
|
||||
//
|
||||
// // Go back to the original loop
|
||||
// for (int k = 0; k < numParts; k++) {
|
||||
// if (j + k >= N) {
|
||||
// break;
|
||||
// }
|
||||
// fourD tmp = mul_4d(rij4[k], create_4d(f8[k]));
|
||||
// fourD aI = load_4d(a[i]);
|
||||
// fourD aJ = load_4d(a[j + k]);
|
||||
// fourD sum = add_4d(aI, tmp);
|
||||
// fourD sub = sub_4d(aJ, tmp);
|
||||
// store_4d(a[i], sum);
|
||||
// store_4d(a[j + k], sub);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// //}
|
||||
//
|
||||
// return Pot;
|
||||
// }
|
||||
|
||||
double PotentialAndAcceleration(double dt) {
|
||||
|
||||
memset(a, 0, sizeof(a));
|
||||
double Pot = 0.;
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (int i = 0; i < N-1; i++) {
|
||||
double ri[3] = {r[i][0], r[i][1], r[i][2]};
|
||||
for (int j = i + 1; j < N; j++) {
|
||||
double quot, rnorm, term1, term2;
|
||||
// CALCULATE POTENTIAL ENERGY
|
||||
|
@ -587,7 +599,7 @@ double PotentialAndAcceleration(double dt) {
|
|||
|
||||
for (int k = 0; k < 3; k++) {
|
||||
// POT
|
||||
double distTmp = r[i][k] - r[j][k];
|
||||
double distTmp = ri[k] - r[j][k];
|
||||
dist += distTmp * distTmp;
|
||||
// ACCEL
|
||||
posItoJ[k] = distTmp;
|
||||
|
@ -597,7 +609,7 @@ double PotentialAndAcceleration(double dt) {
|
|||
double distSqd = dist * dist * dist;
|
||||
double rSqd_inv7 = distSqd * distSqd * dist;
|
||||
double f = (48. - (24. * distSqd)) / rSqd_inv7;
|
||||
// from F = ma, where m = 1 in natural units!
|
||||
// from F = ma, where m = 1 in natural units!
|
||||
for (int k = 0; k < 3; k++) {
|
||||
double tmp = posItoJ[k] * f;
|
||||
a[i][k] += tmp;
|
||||
|
@ -646,8 +658,8 @@ void computeAccelerations() {
|
|||
int i, j, k;
|
||||
double f, rSqd, tmp = 0.;
|
||||
double rij[3]; // position of i relative to j
|
||||
memset(a, 0, sizeof(a));
|
||||
|
||||
memset(a, 0, sizeof(a));
|
||||
for (i = 0; i < N - 1; i++) { // loop over all distinct pairs i,j
|
||||
for (j = i + 1; j < N; j++) {
|
||||
// initialize r^2 to zero
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue