revert some things, my mental health is at an all time low

This commit is contained in:
Afonso Franco 2023-10-19 21:19:56 +01:00
parent 66dc92b5f4
commit fbfb16c650
Signed by: afonso
SSH key fingerprint: SHA256:JiuxZNdA5bRWXPMUJChI0AQ75yC+cXY4xM0IaVwEVys
5 changed files with 108 additions and 93 deletions

View file

@ -6,7 +6,10 @@ ts := $(shell /usr/bin/date "+%d-%m__%H_%M_%S")
.DEFAULT_GOAL = MD .DEFAULT_GOAL = MD
MD: $(SRC)/MD.cpp MD: $(SRC)/MD.cpp
$(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -mtune=native -mavx -O2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD $(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -mavx -O2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD
MD-errors: $(SRC)/MD.cpp
$(CC) $(CFLAGS) $(SRC)MD.cpp -lm -march=native -mtune=native -mavx -O2 -fopt-info-vec-all -ftree-vectorizer-verbose=2 -ftree-vectorize -funroll-loops -pg -g -o ./out/MD
MDorig: $(SRC)/MD-original.cpp MDorig: $(SRC)/MD-original.cpp
$(CC) $(CFLAGS) $(SRC)MD-original.cpp -lm -O2 -pg -o ./out/MD-original $(CC) $(CFLAGS) $(SRC)MD-original.cpp -lm -O2 -pg -o ./out/MD-original

BIN
out/MD

Binary file not shown.

BIN
out/MD-orig Executable file

Binary file not shown.

View file

@ -540,7 +540,7 @@ void computeAccelerations() {
} }
// returns sum of dv/dt*m/A (aka Pressure) from elastic collisions with walls // returns sum of dv/dt*m/A (aka Pressure) from elastic collisions with walls
double VelocityVerlet(double dt, int iter, double *PE, FILE *fp) { double VelocityVerlet(double dt, int iter, FILE *fp) {
int i, j, k; int i, j, k;
double psum = 0.; double psum = 0.;

View file

@ -32,6 +32,25 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
// Readability
typedef __m256d fourD;
typedef __m128d twoD;
#define load_4d(ptr) _mm256_load_pd(ptr)
#define store_4d(ptr, val) _mm256_store_pd(ptr, val)
#define store_lower_1d(ptr, val) _mm_store_pd(ptr, val)
#define add_4d(a, b) _mm256_add_pd(a, b)
#define add_2d(a, b) _mm_add_pd(a, b)
#define hadd_4d(a, b) _mm256_hadd_pd(a, b)
#define sub_4d(a, b) _mm256_sub_pd(a, b)
#define mul_4d(a, b) _mm256_mul_pd(a, b)
#define div_4d(a, b) _mm256_div_pd(a, b)
#define create_4d(a) _mm256_set1_pd(a)
#define get_lower_2d(a) _mm256_castpd256_pd128(a)
#define get_upper_2d(a) _mm256_extractf128_pd(a, 1)
#define ALIGNMENT 32
// Number of particles // Number of particles
int N; int N;
@ -55,11 +74,8 @@ double Tinit; // 2;
const int MAXPART = 5001; const int MAXPART = 5001;
// Position // Position
double r[MAXPART][3]; double r[MAXPART][3];
// Velocity
double v[MAXPART][3]; double v[MAXPART][3];
// Acceleration
double a[MAXPART][3]; double a[MAXPART][3];
// Force
double F[MAXPART][3]; double F[MAXPART][3];
// atom type // atom type
@ -394,6 +410,7 @@ void initialize() {
int n, p, i, j, k; int n, p, i, j, k;
double pos; double pos;
// Number of atoms in each direction // Number of atoms in each direction
n = int(ceil(pow(N, 1.0 / 3))); n = int(ceil(pow(N, 1.0 / 3)));
@ -489,96 +506,91 @@ void transposeMatrix2(double matT[MAXPART][3], double mat[3][MAXPART]) {
} }
} }
double PotentialAndAccelerationSIMD(double dt) { // double PotentialAndAccelerationSIMD(double dt) {
memset(a, 0, sizeof(a)); // for (int i = 0; i < N; i++) {
double Pot = 0.; // for (int j = 0; j < 3; j++)
// a[i][j] = 0.;
for (int i = 0; i < N; i++) { // }
// double Pot = 0.;
__m256d riMinusRj[4]; //
memset(riMinusRj, 0, sizeof(riMinusRj)); // for (int i = 0; i < N; i++) {
double dist[4]; //
memset(dist, 0, sizeof(dist)); // fourD rij4[4];
// memset(rij4, 0, sizeof(rij4));
for (int j = i + 1; j < N; j += 4) { // double *dist = (double *)_mm_malloc(4 * sizeof(double), ALIGNMENT);
for (int k = 0; k < 4; k++) { // memset(dist, 0, sizeof(double) * 4);
double quot, rnorm, term1, term2; // //__m256i mask =
// CALCULATE POTENTIAL ENERGY // //_mm256_setr_epi64x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0);
// double dist = 0.; //
// double posItoJ[3]; // position of i relative to j // for (int j = i + 1; j < N; j += 4) {
// fourD ri = load_4d(r[i]);
// POT // int numParts = fmin(4, N - j);
// double distTmp = r[i][k] - r[j][k]; // for (int k = 0; k < numParts; k++) {
// dist += distTmp * distTmp; // if (j + k >= N) {
// ACCEL // break;
// posItoJ[k] = distTmp; // }
// double quot, rnorm, term1, term2;
// r[i] //
__m256d ri = _mm256_loadu_pd(r[i]); // fourD rj = load_4d(r[j + k]);
// r[j] // fourD rij = sub_4d(ri, rj);
__m256d rj = _mm256_loadu_pd(r[j + k]); // fourD distTmp = mul_4d(rij, rij);
// r[i] - r[j] // rij4[k] = rij;
__m256d rij = _mm256_sub_pd(ri, rj); //
riMinusRj[k] = rij; // fourD temp = hadd_4d(distTmp, distTmp);
// riMinusRj * riMinusRj // twoD lo = get_lower_2d(temp);
__m256d distTmp = _mm256_mul_pd(rij, rij); // twoD hi = get_upper_2d(temp);
// twoD sum = add_2d(lo, hi);
dist[k] = distTmp[0] + distTmp[1] + distTmp[2]; //
} // // Convert the scalar result to a double
__m256d dists = _mm256_loadu_pd(dist); // double finalSum;
__m256d quot = _mm256_div_pd(_mm256_set1_pd(sigma * sigma), dists); // store_lower_1d(&dist[k], sum);
__m256d term2 = _mm256_mul_pd(quot, _mm256_mul_pd(quot, quot)); // }
__m256d Pots = // fourD dists = load_4d(dist);
_mm256_mul_pd(_mm256_set1_pd(epsilon_8), // fourD quot = div_4d(create_4d(sigma * sigma), dists);
_mm256_mul_pd(term2, _mm256_sub_pd(term2, _mm256_set1_pd(1.)))); // fourD term2 = mul_4d(quot, mul_4d(quot, quot));
Pot += Pots[0] + Pots[1] + Pots[2] + Pots[3]; // fourD Pots = mul_4d(create_4d(epsilon_8), mul_4d(term2, sub_4d(term2, create_4d(1.))));
// // Perform horizontal addition and store the result in a scalar
// quot = sigma * sigma / dist; // fourD temp = hadd_4d(Pots, Pots);
// term2 = quot * quot * quot; // twoD lo = get_lower_2d(temp);
// Pot += epsilon_8 * term2 * (term2 - 1.); // twoD hi = get_upper_2d(temp);
__m256d distSqd = _mm256_mul_pd(dists, _mm256_mul_pd(dists, dists)); // twoD sum = add_2d(lo, hi);
__m256d rSqd_inv7 = _mm256_mul_pd(distSqd, _mm256_mul_pd(distSqd, dists)); //
__m256d f8 = _mm256_div_pd( // // Convert the scalar result to a double
_mm256_sub_pd(_mm256_set1_pd(48.), _mm256_mul_pd(_mm256_set1_pd(24.), distSqd)), // double finalSum;
rSqd_inv7); // store_lower_1d(&finalSum, sum);
// // From derivative of Lennard-Jones with sigma and epsilon // Pot += finalSum;
// // set equal to 1 in natural units! //
// double distSqd = dist * dist * dist; // fourD distSqd = mul_4d(dists, mul_4d(dists, dists));
// double rSqd_inv7 = distSqd * distSqd * dist; // fourD rSqd_inv7 = mul_4d(distSqd, mul_4d(distSqd, dists));
// double f = (48. - (24. * distSqd)) / rSqd_inv7; // fourD f8 = div_4d(sub_4d(create_4d(48.), mul_4d(create_4d(24.), distSqd)), rSqd_inv7);
//
// Go back to the original loop // // Go back to the original loop
for (int k = 0; k < 4; k++) { // for (int k = 0; k < numParts; k++) {
__m256d tmp = _mm256_mul_pd(riMinusRj[k], _mm256_set1_pd(f8[k])); // if (j + k >= N) {
__m256d aI = _mm256_loadu_pd(a[i]); // break;
__m256d aJ = _mm256_loadu_pd(a[i]); // }
_mm256_storeu_pd(a[i], _mm256_add_pd(aI, tmp)); // fourD tmp = mul_4d(rij4[k], create_4d(f8[k]));
_mm256_storeu_pd(a[j], _mm256_sub_pd(aJ, tmp)); // fourD aI = load_4d(a[i]);
// fourD aJ = load_4d(a[j + k]);
// // from F = ma, where m = 1 in natural units! // fourD sum = add_4d(aI, tmp);
// for (int k = 0; k < 3; k++) { // fourD sub = sub_4d(aJ, tmp);
// double tmp = posItoJ[k] * f; // store_4d(a[i], sum);
// a[i][k] += tmp; // store_4d(a[j + k], sub);
// a[j][k] -= tmp; // }
// } // }
// // }
// } // //}
// for (int j = 0; j < 3; j++) { //
// v[i][j] += 0.5 * a[i][j] * dt; // return Pot;
// } // }
}
}
}
//}
return Pot;
}
double PotentialAndAcceleration(double dt) { double PotentialAndAcceleration(double dt) {
memset(a, 0, sizeof(a)); memset(a, 0, sizeof(a));
double Pot = 0.; double Pot = 0.;
for (int i = 0; i < N; i++) { for (int i = 0; i < N-1; i++) {
double ri[3] = {r[i][0], r[i][1], r[i][2]};
for (int j = i + 1; j < N; j++) { for (int j = i + 1; j < N; j++) {
double quot, rnorm, term1, term2; double quot, rnorm, term1, term2;
// CALCULATE POTENTIAL ENERGY // CALCULATE POTENTIAL ENERGY
@ -587,7 +599,7 @@ double PotentialAndAcceleration(double dt) {
for (int k = 0; k < 3; k++) { for (int k = 0; k < 3; k++) {
// POT // POT
double distTmp = r[i][k] - r[j][k]; double distTmp = ri[k] - r[j][k];
dist += distTmp * distTmp; dist += distTmp * distTmp;
// ACCEL // ACCEL
posItoJ[k] = distTmp; posItoJ[k] = distTmp;
@ -597,7 +609,7 @@ double PotentialAndAcceleration(double dt) {
double distSqd = dist * dist * dist; double distSqd = dist * dist * dist;
double rSqd_inv7 = distSqd * distSqd * dist; double rSqd_inv7 = distSqd * distSqd * dist;
double f = (48. - (24. * distSqd)) / rSqd_inv7; double f = (48. - (24. * distSqd)) / rSqd_inv7;
// from F = ma, where m = 1 in natural units! // from F = ma, where m = 1 in natural units!
for (int k = 0; k < 3; k++) { for (int k = 0; k < 3; k++) {
double tmp = posItoJ[k] * f; double tmp = posItoJ[k] * f;
a[i][k] += tmp; a[i][k] += tmp;
@ -646,8 +658,8 @@ void computeAccelerations() {
int i, j, k; int i, j, k;
double f, rSqd, tmp = 0.; double f, rSqd, tmp = 0.;
double rij[3]; // position of i relative to j double rij[3]; // position of i relative to j
memset(a, 0, sizeof(a));
memset(a, 0, sizeof(a));
for (i = 0; i < N - 1; i++) { // loop over all distinct pairs i,j for (i = 0; i < N - 1; i++) { // loop over all distinct pairs i,j
for (j = i + 1; j < N; j++) { for (j = i + 1; j < N; j++) {
// initialize r^2 to zero // initialize r^2 to zero