Program Listing for File modified_helmholtz.h

Return to documentation for file (include/modified_helmholtz.h)

#ifndef modified_helmholtz_h
#define modified_helmholtz_h
#include "exafmm_t.h"
#include "fmm.h"
#include "geometry.h"
#include "intrinsics.h"
#include "timer.h"

namespace exafmm_t {
  class ModifiedHelmholtzFmm : public Fmm<real_t> {
  public:
    real_t wavek;

    ModifiedHelmholtzFmm() {}

    ModifiedHelmholtzFmm(int p_, int ncrit_, real_t wavek_, std::string filename_=std::string()) :
      Fmm<real_t>(p_, ncrit_, filename_)
    {
      wavek = wavek_;
      if (this->filename.empty()) {
        this->filename = std::string("modified_helmholtz_") + (std::is_same<real_t, float>::value ? "f" : "d")
                       + std::string("_p") + std::to_string(p) + std::string(".dat");
      }
    }

    void potential_P2P(RealVec& src_coord, RealVec& src_value, RealVec& trg_coord, RealVec& trg_value) {
      simdvec zero(real_t(0));
      real_t newton_coef = 16;   // it comes from Newton's method in simd rsqrt function
      simdvec coef(real_t(1.0/(4*PI*newton_coef)));
      simdvec k(-wavek/newton_coef);
      int nsrcs = src_coord.size() / 3;
      int ntrgs = trg_coord.size() / 3;
      int t;
      for (t=0; t+NSIMD<=ntrgs; t+=NSIMD) {
        simdvec tx(&trg_coord[3*t+0], 3*(int)sizeof(real_t));
        simdvec ty(&trg_coord[3*t+1], 3*(int)sizeof(real_t));
        simdvec tz(&trg_coord[3*t+2], 3*(int)sizeof(real_t));
        simdvec tv(zero);
        for (int s=0; s<nsrcs; s++) {
          simdvec sx(src_coord[3*s+0]);
          sx -= tx;
          simdvec sy(src_coord[3*s+1]);
          sy -= ty;
          simdvec sz(src_coord[3*s+2]);
          sz -= tz;
          simdvec sv(src_value[s]);
          simdvec r2(zero);
          r2 += sx * sx;
          r2 += sy * sy;
          r2 += sz * sz;
          simdvec invr = rsqrt(r2);
          invr &= r2 > zero;

          tv += exp(k*r2*invr) * invr * sv;  // k has negative sign
        }
        tv *= coef;
        for (int m=0; m<NSIMD && (t+m)<ntrgs; m++) {
          trg_value[t+m] += tv[m];
        }
      }
      for (; t<ntrgs; t++) {
        real_t potential = 0;
        for (int s=0; s<nsrcs; s++) {
          vec3 dx;
          for (int d=0; d<3; d++)
            dx[d] = trg_coord[3*t+d] - src_coord[3*s+d];
          real_t r2 = norm(dx);
          if (r2!=0) {
            real_t r = std::sqrt(r2);
            potential += std::exp(-wavek*r) * src_value[s] / r;
          }
        }
        trg_value[t] += potential / (4*PI);
      }
    }

    void gradient_P2P(RealVec& src_coord, RealVec& src_value, RealVec& trg_coord, RealVec& trg_value) {
      simdvec zero(real_t(0));
      simdvec one(real_t(1));
      real_t newton_coef = 16;   // it comes from Newton's method in simd rsqrt function
      simdvec coefp(real_t(1.0/(4*PI*newton_coef)));  // potential: r
      simdvec coefg(real_t(1.0/(4*PI*newton_coef*newton_coef*newton_coef)));  // gradient: r3
      simdvec k(-wavek/newton_coef);
      int nsrcs = src_coord.size() / 3;
      int ntrgs = trg_coord.size() / 3;
      int t;
      for (t=0; t+NSIMD<=ntrgs; t+=NSIMD) {
        simdvec tx(&trg_coord[3*t+0], 3*(int)sizeof(real_t));
        simdvec ty(&trg_coord[3*t+1], 3*(int)sizeof(real_t));
        simdvec tz(&trg_coord[3*t+2], 3*(int)sizeof(real_t));
        simdvec tv0(zero);
        simdvec tv1(zero);
        simdvec tv2(zero);
        simdvec tv3(zero);
        for (int s=0; s<nsrcs; s++) {
          simdvec sx(src_coord[3*s+0]);
          sx -= tx;
          simdvec sy(src_coord[3*s+1]);
          sy -= ty;
          simdvec sz(src_coord[3*s+2]);
          sz -= tz;
          simdvec sv(src_value[s]);
          simdvec r2(zero);
          r2 += sx * sx;
          r2 += sy * sy;
          r2 += sz * sz;
          simdvec invr = rsqrt(r2);
          invr &= r2 > zero;

          simdvec invr2 = invr * invr;

          simdvec kr = (k * r2) * invr;     // -k*r
          simdvec potential = exp(kr) * invr * sv;  // exp(-kr) / r * q
          tv0 += potential;
          simdvec krp1 = one - kr;          // k*r+1
          potential *= krp1 * invr2;        // exp(-kr) * (kr+1) * q / r3
          tv1 += potential * sx;
          tv2 += potential * sy;
          tv3 += potential * sz;
        }
        tv0 *= coefp;
        tv1 *= coefg;
        tv2 *= coefg;
        tv3 *= coefg;
        for (int m=0; m<NSIMD && (t+m)<ntrgs; m++) {
          trg_value[0+4*(t+m)] += tv0[m];
          trg_value[1+4*(t+m)] += tv1[m];
          trg_value[2+4*(t+m)] += tv2[m];
          trg_value[3+4*(t+m)] += tv3[m];
        }
      }
      for (; t<ntrgs; t++) {
        real_t potential = 0;
        vec3 gradient = 0;
        for (int s=0; s<nsrcs; s++) {
          vec3 dx;
          for (int d=0; d<3; d++)
            dx[d] = trg_coord[3*t+d] - src_coord[3*s+d];
          real_t r2 = norm(dx);
          if (r2!=0) {
            real_t r = std::sqrt(r2);
            real_t kernel = std::exp(-wavek*r) / r * src_value[s];
            potential += kernel;
            real_t dpdr = - kernel * (wavek*r+1) / r;
            gradient[0] += dpdr / r * dx[0];
            gradient[1] += dpdr / r * dx[1];
            gradient[2] += dpdr / r * dx[2];
          }
        }
        trg_value[4*t+0] += potential / (4*PI);
        trg_value[4*t+1] += gradient[0] / (4*PI);
        trg_value[4*t+2] += gradient[1] / (4*PI);
        trg_value[4*t+3] += gradient[2] / (4*PI);
      }
    }
  };
}  // end namespace exafmm_t
#endif