Program Listing for File laplace.h¶
↰ Return to documentation for file (include/laplace.h
)
#ifndef laplace_h
#define laplace_h
#include "exafmm_t.h"
#include "fmm_scale_invariant.h"
#include "geometry.h"
#include "intrinsics.h"
#include "timer.h"
namespace exafmm_t {
class LaplaceFmm : public FmmScaleInvariant<real_t> {
public:
LaplaceFmm() {}
LaplaceFmm(int p_, int ncrit_, std::string filename_=std::string()) :
FmmScaleInvariant<real_t>(p_, ncrit_, filename_)
{
if (this->filename.empty()) {
this->filename = std::string("laplace_") + (std::is_same<real_t, float>::value ? "f" : "d")
+ std::string("_p") + std::to_string(p) + std::string(".dat");
}
}
void potential_P2P(RealVec& src_coord, RealVec& src_value, RealVec& trg_coord, RealVec& trg_value) {
simdvec zero(real_t(0));
real_t newton_coef = 16; // comes from Newton's method in simd rsqrt function
simdvec coef(real_t(1.0/(4*PI*newton_coef)));
int nsrcs = src_coord.size() / 3;
int ntrgs = trg_coord.size() / 3;
int t;
for (t=0; t+NSIMD<=ntrgs; t+=NSIMD) {
simdvec tx(&trg_coord[3*t+0], 3*(int)sizeof(real_t));
simdvec ty(&trg_coord[3*t+1], 3*(int)sizeof(real_t));
simdvec tz(&trg_coord[3*t+2], 3*(int)sizeof(real_t));
simdvec tv(zero);
for (int s=0; s<nsrcs; s++) {
simdvec sx(src_coord[3*s+0]);
sx -= tx;
simdvec sy(src_coord[3*s+1]);
sy -= ty;
simdvec sz(src_coord[3*s+2]);
sz -= tz;
simdvec sv(src_value[s]);
simdvec r2(zero);
r2 += sx * sx;
r2 += sy * sy;
r2 += sz * sz;
simdvec invr = rsqrt(r2);
invr &= r2 > zero;
tv += invr * sv;
}
tv *= coef;
for (int m=0; m<NSIMD && t+m<ntrgs; m++) {
trg_value[t+m] += tv[m];
}
}
for (; t<ntrgs; t++) {
real_t potential = 0;
for (int s=0; s<nsrcs; ++s) {
vec3 dx = 0;
for (int d=0; d<3; d++) {
dx[d] += trg_coord[3*t+d] - src_coord[3*s+d];
}
real_t r2 = norm(dx);
if (r2!=0) {
real_t invr = 1 / std::sqrt(r2);
potential += src_value[s] * invr;
}
}
trg_value[t] += potential / (4*PI);
}
add_flop((long long)ntrgs*(long long)nsrcs*(12+4*2));
}
void gradient_P2P(RealVec& src_coord, RealVec& src_value, RealVec& trg_coord, RealVec& trg_value) {
simdvec zero(real_t(0));
real_t newton_coef = 16; // comes from Newton's method in simd rsqrt function
simdvec coefp(real_t(1.0/(4*PI*newton_coef)));
simdvec coefg(real_t(1.0/(4*PI*newton_coef*newton_coef*newton_coef)));
int nsrcs = src_coord.size() / 3;
int ntrgs = trg_coord.size() / 3;
int t;
for (t=0; t+NSIMD<=ntrgs; t+=NSIMD) {
simdvec tx(&trg_coord[3*t+0], 3*(int)sizeof(real_t));
simdvec ty(&trg_coord[3*t+1], 3*(int)sizeof(real_t));
simdvec tz(&trg_coord[3*t+2], 3*(int)sizeof(real_t));
simdvec tv0(zero);
simdvec tv1(zero);
simdvec tv2(zero);
simdvec tv3(zero);
for (int s=0; s<nsrcs; s++) {
simdvec sx(src_coord[3*s+0]);
sx -= tx;
simdvec sy(src_coord[3*s+1]);
sy -= ty;
simdvec sz(src_coord[3*s+2]);
sz -= tz;
simdvec r2(zero);
r2 += sx * sx;
r2 += sy * sy;
r2 += sz * sz;
simdvec invr = rsqrt(r2);
invr &= r2 > zero;
simdvec invr3 = (invr*invr) * invr;
simdvec sv(src_value[s]);
tv0 += sv * invr;
sv *= invr3;
tv1 += sv * sx;
tv2 += sv * sy;
tv3 += sv * sz;
}
tv0 *= coefp;
tv1 *= coefg;
tv2 *= coefg;
tv3 *= coefg;
for (int m=0; m<NSIMD && t+m<ntrgs; m++) {
trg_value[0+4*(t+m)] += tv0[m];
trg_value[1+4*(t+m)] += tv1[m];
trg_value[2+4*(t+m)] += tv2[m];
trg_value[3+4*(t+m)] += tv3[m];
}
}
for (; t<ntrgs; t++) {
real_t potential = 0;
vec3 gradient = 0;
for (int s=0; s<nsrcs; ++s) {
vec3 dx = 0;
for (int d=0; d<3; ++d) {
dx[d] = trg_coord[3*t+d] - src_coord[3*s+d];
}
real_t r2 = norm(dx);
if (r2!=0) {
real_t invr2 = 1.0 / r2;
real_t invr = src_value[s] * std::sqrt(invr2);
potential += invr;
dx *= invr2 * invr;
gradient[0] += dx[0];
gradient[1] += dx[1];
gradient[2] += dx[2];
}
}
trg_value[4*t] += potential / (4*PI) ;
trg_value[4*t+1] -= gradient[0] / (4*PI);
trg_value[4*t+2] -= gradient[1] / (4*PI);
trg_value[4*t+3] -= gradient[2] / (4*PI);
}
add_flop((long long)ntrgs*(long long)nsrcs*(20+4*2));
}
};
} // end namespace exafmm_t
#endif