Program Listing for File fmm_base.h¶
↰ Return to documentation for file (include/fmm_base.h
)
#ifndef fmm_base_h
#define fmm_base_h
#include <algorithm> // std::fill
#include "exafmm_t.h"
#include "geometry.h"
#include "timer.h"
namespace exafmm_t {
template <typename T>
class FmmBase {
public:
int p;
int nsurf;
int nconv;
int nfreq;
int ncrit;
int depth;
real_t r0;
vec3 x0;
bool is_precomputed;
bool is_real;
std::string filename;
FmmBase() {}
FmmBase(int p_, int ncrit_, std::string filename_=std::string()) :
p(p_), ncrit(ncrit_), filename(filename_)
{
nsurf = 6*(p_-1)*(p_-1) + 2;
int n1 = 2 * p_;
nconv = n1 * n1 * n1;
is_real = std::is_same<T, real_t>::value;
nfreq = is_real ? n1*n1*(n1/2+1) : nconv;
is_precomputed = false;
}
virtual void potential_P2P(RealVec& src_coord, std::vector<T>& src_value,
RealVec& trg_coord, std::vector<T>& trg_value) = 0;
virtual void gradient_P2P(RealVec& src_coord, std::vector<T>& src_value,
RealVec& trg_coord, std::vector<T>& trg_value) = 0;
virtual void M2L(Nodes<T>& nodes) = 0;
virtual void M2M(Node<T>* node) = 0;
virtual void L2L(Node<T>* node) = 0;
virtual void P2M(NodePtrs<T>& leafs) = 0;
virtual void L2P(NodePtrs<T>& leafs) = 0;
void kernel_matrix(RealVec& src_coord, RealVec& trg_coord, std::vector<T>& matrix) {
std::vector<T> src_value(1, 1.); // use unit weight to generate kernel matrix
int nsrcs = src_coord.size() / 3;
int ntrgs = trg_coord.size() / 3;
#pragma omp parallel for
for (int i=0; i<nsrcs; i++) {
RealVec src_coord_(src_coord.data()+3*i, src_coord.data()+3*(i+1));
std::vector<T> trg_value_(ntrgs, 0.);
potential_P2P(src_coord_, src_value, trg_coord, trg_value_);
std::copy(trg_value_.begin(), trg_value_.end(), matrix.data()+i*ntrgs);
}
}
/* the following kernels do not use precomputation matrices
* thus can be defined in the base class */
void P2P(NodePtrs<T>& leafs) {
NodePtrs<T>& targets = leafs;
#pragma omp parallel for
for (size_t i=0; i<targets.size(); i++) {
Node<T>* target = targets[i];
NodePtrs<T>& sources = target->P2P_list;
for (size_t j=0; j<sources.size(); j++) {
Node<T>* source = sources[j];
gradient_P2P(source->src_coord, source->src_value,
target->trg_coord, target->trg_value);
}
}
}
void M2P(NodePtrs<T>& leafs) {
NodePtrs<T>& targets = leafs;
real_t c[3] = {0.0};
std::vector<RealVec> up_equiv_surf;
up_equiv_surf.resize(depth+1);
for (int level=0; level<=depth; level++) {
up_equiv_surf[level].resize(nsurf*3);
up_equiv_surf[level] = surface(p, r0, level, c, 1.05);
}
#pragma omp parallel for
for (size_t i=0; i<targets.size(); i++) {
Node<T>* target = targets[i];
NodePtrs<T>& sources = target->M2P_list;
for (size_t j=0; j<sources.size(); j++) {
Node<T>* source = sources[j];
RealVec src_equiv_coord(nsurf*3);
int level = source->level;
// source node's equiv coord = relative equiv coord + node's center
for (int k=0; k<nsurf; k++) {
src_equiv_coord[3*k+0] = up_equiv_surf[level][3*k+0] + source->x[0];
src_equiv_coord[3*k+1] = up_equiv_surf[level][3*k+1] + source->x[1];
src_equiv_coord[3*k+2] = up_equiv_surf[level][3*k+2] + source->x[2];
}
gradient_P2P(src_equiv_coord, source->up_equiv,
target->trg_coord, target->trg_value);
}
}
}
void P2L(Nodes<T>& nodes) {
Nodes<T>& targets = nodes;
real_t c[3] = {0.0};
std::vector<RealVec> dn_check_surf;
dn_check_surf.resize(depth+1);
for (int level=0; level<=depth; level++) {
dn_check_surf[level].resize(nsurf*3);
dn_check_surf[level] = surface(p, r0, level, c, 1.05);
}
#pragma omp parallel for
for (size_t i=0; i<targets.size(); i++) {
Node<T>* target = &targets[i];
NodePtrs<T>& sources = target->P2L_list;
for (size_t j=0; j<sources.size(); j++) {
Node<T>* source = sources[j];
RealVec trg_check_coord(nsurf*3);
int level = target->level;
// target node's check coord = relative check coord + node's center
for (int k=0; k<nsurf; k++) {
trg_check_coord[3*k+0] = dn_check_surf[level][3*k+0] + target->x[0];
trg_check_coord[3*k+1] = dn_check_surf[level][3*k+1] + target->x[1];
trg_check_coord[3*k+2] = dn_check_surf[level][3*k+2] + target->x[2];
}
potential_P2P(source->src_coord, source->src_value,
trg_check_coord, target->dn_equiv);
}
}
}
void upward_pass(Nodes<T>& nodes, NodePtrs<T>& leafs, bool verbose=true) {
start("P2M");
P2M(leafs);
stop("P2M", verbose);
start("M2M");
#pragma omp parallel
#pragma omp single nowait
M2M(&nodes[0]);
stop("M2M", verbose);
}
void downward_pass(Nodes<T>& nodes, NodePtrs<T>& leafs, bool verbose=true) {
start("P2L");
P2L(nodes);
stop("P2L", verbose);
start("M2P");
M2P(leafs);
stop("M2P", verbose);
start("P2P");
P2P(leafs);
stop("P2P", verbose);
start("M2L");
M2L(nodes);
stop("M2L", verbose);
start("L2L");
#pragma omp parallel
#pragma omp single nowait
L2L(&nodes[0]);
stop("L2L", verbose);
start("L2P");
L2P(leafs);
stop("L2P", verbose);
}
RealVec verify(NodePtrs<T>& leafs, bool sample=false) {
Nodes<T> targets; // vector of target nodes
if (sample) {
int nsamples = 10;
int stride = leafs.size() / nsamples;
for (int i=0; i<nsamples; i++) {
targets.push_back(*(leafs[i*stride]));
}
} else { // compute all values directly without sampling
for (size_t i=0; i<leafs.size(); i++) {
targets.push_back(*leafs[i]);
}
}
Nodes<T> targets2 = targets; // target2 is used for direct summation
#pragma omp parallel for
for (size_t i=0; i<targets2.size(); i++) {
Node<T>* target = &targets2[i];
std::fill(target->trg_value.begin(), target->trg_value.end(), 0.);
for (size_t j=0; j<leafs.size(); j++) {
gradient_P2P(leafs[j]->src_coord, leafs[j]->src_value, target->trg_coord, target->trg_value);
}
}
// relative error in L-infinity norm
/*
double potential_max_err = -1;
double gradient_max_err = -1;
for (size_t i=0; i<targets.size(); i++) {
for (int j=0; j<targets[i].ntrgs; j++) {
auto potential_diff = std::abs(targets[i].trg_value[4*j+0] - targets2[i].trg_value[4*j+0]);
auto potential_norm = std::abs(targets2[i].trg_value[4*j+0]);
potential_max_err = std::max(potential_max_err, potential_diff/potential_norm);
for (int d=1; d<4; d++) {
auto gradient_diff = std::abs(targets[i].trg_value[4*j+d] - targets2[i].trg_value[4*j+d]);
auto gradient_norm = std::abs(targets2[i].trg_value[4*j+d]);
gradient_max_err = std::max(gradient_max_err, gradient_diff/gradient_norm);
}
}
}
*/
// relative error in L2 norm
double p_diff = 0, p_norm = 0, g_diff = 0, g_norm = 0;
for (size_t i=0; i<targets.size(); i++) {
for (int j=0; j<targets[i].ntrgs; j++) {
p_norm += std::norm(targets2[i].trg_value[4*j+0]);
p_diff += std::norm(targets2[i].trg_value[4*j+0] - targets[i].trg_value[4*j+0]);
for (int d=1; d<4; d++) {
g_diff += std::norm(targets2[i].trg_value[4*j+d] - targets[i].trg_value[4*j+d]);
g_norm += std::norm(targets2[i].trg_value[4*j+d]);
}
}
}
RealVec err(2);
err[0] = sqrt(p_diff/p_norm); // potential error in L2 norm
err[1] = sqrt(g_diff/g_norm); // gradient error in L2 norm
return err;
}
};
} // end namespace
#endif