diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index 81f9db0aa..299eb6aa6 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -32,6 +32,7 @@ set(HEADERS_PUBLIC Cabana_Utils.hpp Cabana_VerletList.hpp Cabana_Version.hpp + Cabana_GaussianMixtureModel.hpp ) if(Cabana_ENABLE_ARBORX) @@ -58,7 +59,11 @@ endif() set(HEADERS_IMPL impl/Cabana_CartesianGrid.hpp + impl/Cabana_Erfinv.hpp impl/Cabana_Index.hpp + impl/Cabana_GaussianWeight.hpp + impl/Cabana_Hammersley.hpp + impl/Cabana_Matrix2d.hpp impl/Cabana_PerformanceTraits.hpp impl/Cabana_TypeTraits.hpp ) diff --git a/core/src/Cabana_Core.hpp b/core/src/Cabana_Core.hpp index 393562327..fdda1896c 100644 --- a/core/src/Cabana_Core.hpp +++ b/core/src/Cabana_Core.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/core/src/Cabana_GaussianMixtureModel.hpp b/core/src/Cabana_GaussianMixtureModel.hpp new file mode 100644 index 000000000..fc2ab68e5 --- /dev/null +++ b/core/src/Cabana_GaussianMixtureModel.hpp @@ -0,0 +1,1059 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +/*! + \file Cabana_GaussianMixtureModel.hpp + \brief Creation of a Gaussian Mixture Model +*/ +#ifndef CABANA_GMM_HPP +#define CABANA_GMM_HPP + +#include + +#include +#include +#include +#include + +/*! + Possible parameters to describe a Gaussian + + In one dimension we use MuX and Cxx. In two dimension we assume a cylindrical + velocity space and use MuPar, MuPer, Cparpar and Cperper. The off-diagonal + elements Cparper and Cperpar are not currently in use. In three dimensions we + use all MuX, MuY, MuZ, as well as a full (symmetric) 3-by-3 covariance matrix. +*/ +enum GaussianFields { + Weight, + // Can we make only the relevant fields vissible based on ? + MuPar, MuPer, + Cparpar, Cparper, + Cperpar, Cperper, + MuX, MuY, MuZ, + Cxx, Cxy, Cxz, + Cyx, Cyy, Cyz, + Czx, Czy, Czz, + n_gaussian_param +}; + + +namespace Cabana { + +// Everything that is implementation dependent and not for the user is in a +// class indicating as much. The nicer high level user facing functions are at +// the vbottom of the file + +template +class GMMImpl { +public: + +/*! + Do a scan of the particles and compute the variance of the 1d velocity data + + This is done for the particles where the entry in the cell slide matches c, + respecting the possibly different weights in the macro slice and the result is + reported in cov[0][0]. +*/ +template +static void variance(const CellSliceType& cell, const WeightSliceType& macro, const VelocitySliceType& velocity_x, const int c, double(&cov)[1][1]) { + using gmm_float_type = typename WeightSliceType::value_type; + + gmm_float_type sum_x = 0.; + gmm_float_type sum_xx = 0.; + gmm_float_type N = 0; + + // How does a single particle contribute to the total + auto _add = KOKKOS_LAMBDA(const int& i, gmm_float_type& local_N, + gmm_float_type& local_sum_x, gmm_float_type& local_sum_xx) { + if(cell(i) == c) { + local_N += macro(i); + local_sum_x += macro(i) * velocity_x(i); + local_sum_xx += macro(i) * velocity_x(i)*velocity_x(i); + } + }; + + // Execute + Kokkos::parallel_reduce("Particle Scan", velocity_x.size(), _add, N, sum_x, sum_xx); + const gmm_float_type mu_x = sum_x/gmm_float_type(N); + const gmm_float_type var_x = sum_xx/gmm_float_type(N) - mu_x*mu_x; + + cov[0][0] = var_x; +} + +/*! + Do a scan of the particles and compute the variance of the 2d velocity data + + This is done for the particles where the entry in the cell slide matches c, + respecting the possibly different weights in the macro slice and the result is + reported in cov[0..1][0..1]. +*/ +template +static void variance(const CellSliceType& cell, const WeightSliceType& macro, const VelocitySliceType& velocity_par, const VelocitySliceType& velocity_per, const int c, double(&cov)[2][2]) { + using gmm_float_type = typename WeightSliceType::value_type; + + gmm_float_type sum_par = 0.; + gmm_float_type sum_per = 0.; + gmm_float_type sum_parpar = 0.; + gmm_float_type sum_perper = 0.; + gmm_float_type sum_parper = 0.; + gmm_float_type N = 0; + + // How does a single particle contribute to the total + auto _add = KOKKOS_LAMBDA(const int& i, gmm_float_type& local_N, + gmm_float_type& local_sum_par, gmm_float_type& local_sum_per, + gmm_float_type& local_sum_parpar, gmm_float_type& local_sum_perper, gmm_float_type& local_sum_parper) { + if(cell(i) == c) { + local_N += macro(i); + local_sum_par += macro(i) * velocity_par(i); + local_sum_per += macro(i) * velocity_per(i); + local_sum_parpar += macro(i) * velocity_par(i)*velocity_par(i); + local_sum_perper += macro(i) * velocity_per(i)*velocity_per(i); + local_sum_parper += macro(i) * velocity_par(i)*velocity_per(i); + } + }; + + // Execute + Kokkos::parallel_reduce("Particle Scan", velocity_par.size(), _add, N, sum_par, sum_per, sum_parpar, sum_perper, sum_parper); + + printf("N = %e, sum_par=%e, sum_per = %e, sum_parpar = %e, sum_perper = %e, sum_parper = %e\n", N, sum_par, sum_per, sum_parpar, sum_perper, sum_parper); + const gmm_float_type mu_par = sum_par/gmm_float_type(N); + const gmm_float_type mu_per = 0.; // in cylindrical coordinates we don't expect this to be the perpendicular drift speed but sqrt(pi/2)*vthper + const gmm_float_type var_par = sum_parpar/gmm_float_type(N) - mu_par*mu_par; + const gmm_float_type var_per = sum_perper/gmm_float_type(N);// - mu_per*mu_per; + printf("var_par = %e, var_per = %e\n", var_par, var_per); + + cov[0][0] = var_par; cov[0][1] = 0.; + cov[1][0] = 0.; cov[1][1] = var_per; + + printf("variance: cell %d. var = %e %e, %e %e\n", c, cov[0][0], cov[0][1], cov[1][0], cov[1][1]); +} + +/*! + Do a scan of the particles and compute the variance of the 3d velocity data + + This is done for the particles where the entry in the cell slide matches c, + respecting the possibly different weights in the macro slice and the result is + reported in the 3x3 covariance matrix cov +*/ +template +static void variance(const CellSliceType& cell, const WeightSliceType& macro, const VelocitySliceType& velocity_x, const VelocitySliceType& velocity_y, const VelocitySliceType& velocity_z, const int c, double(&cov)[3][3]) { + using gmm_float_type = typename WeightSliceType::value_type; + + gmm_float_type sum_x = 0.; + gmm_float_type sum_y = 0.; + gmm_float_type sum_z = 0.; + gmm_float_type sum_xx = 0.; + gmm_float_type sum_yy = 0.; + gmm_float_type sum_zz = 0.; + gmm_float_type sum_xy = 0.; + gmm_float_type sum_xz = 0.; + gmm_float_type sum_yz = 0.; + gmm_float_type N = 0; + + // How does a single particle contribute to the total + auto _add = KOKKOS_LAMBDA(const int& i, gmm_float_type& local_N, + gmm_float_type& local_sum_x, gmm_float_type& local_sum_y, gmm_float_type& local_sum_z, + gmm_float_type& local_sum_xx, gmm_float_type& local_sum_yy, gmm_float_type& local_sum_zz, + gmm_float_type& local_sum_xy, gmm_float_type& local_sum_xz, gmm_float_type& local_sum_yz) { + if(cell(i) == c) { + local_N += macro(i); + local_sum_x += macro(i) * velocity_x(i); + local_sum_y += macro(i) * velocity_y(i); + local_sum_z += macro(i) * velocity_z(i); + local_sum_xx += macro(i) * velocity_x(i)*velocity_x(i); + local_sum_yy += macro(i) * velocity_y(i)*velocity_y(i); + local_sum_zz += macro(i) * velocity_z(i)*velocity_z(i); + local_sum_xy += macro(i) * velocity_x(i)*velocity_y(i); + local_sum_xz += macro(i) * velocity_x(i)*velocity_z(i); + local_sum_yz += macro(i) * velocity_y(i)*velocity_z(i); + } + }; + + // Execute + Kokkos::parallel_reduce("Particle Scan", velocity_x.size(), _add, N, sum_x, sum_y, sum_z, sum_xx, sum_yy, sum_zz, sum_xy, sum_xz, sum_yz); + const gmm_float_type mu_x = sum_x/gmm_float_type(N); + const gmm_float_type mu_y = sum_y/gmm_float_type(N); + const gmm_float_type mu_z = sum_z/gmm_float_type(N); + const gmm_float_type var_x = sum_xx/gmm_float_type(N) - mu_x*mu_x; + const gmm_float_type var_y = sum_yy/gmm_float_type(N) - mu_y*mu_y; + const gmm_float_type var_z = sum_zz/gmm_float_type(N) - mu_z*mu_z; + const gmm_float_type cov_xy = sum_xy/gmm_float_type(N) - mu_x*mu_y; + const gmm_float_type cov_xz = sum_xz/gmm_float_type(N) - mu_x*mu_z; + const gmm_float_type cov_yz = sum_yz/gmm_float_type(N) - mu_y*mu_z; + //printf("N = %e, sum_x=%e, sum_y = %e, sum_z = %e, sum_xx = %e, sum_yy = %e, sum_zz = %e\n", N, sum_x, sum_y, sum_z, sum_xx, sum_yy, sum_zz); + //printf("var_x = %e, var_y = %e var_z = %e\n", var_x, var_y, var_z); + printf("var_iance: cell %d. sum_ = %e %e %e\n", c, sum_x,sum_y,sum_z); + printf("var_iance: cell %d. sum_^2 = %e %e %e\n", c, sum_xx,sum_yy,sum_zz); + printf("var_iance: cell %d. sum_ offdiag = %e %e %e\n", c, sum_xy,sum_xz,sum_yz); + + printf("var_iance: cell %d. mu_ = %e %e %e\n", c, mu_x,mu_y,mu_z); + printf("var_iance: cell %d. var_ = %e %e %e\n", c, var_x,var_y,var_z); + printf("var_iance: cell %d. cov_ = %e %e %e\n", c, cov_xy,cov_xz,cov_yz); + + cov[0][0] = var_x; cov[0][1] = cov_xy; cov[0][2] = cov_xz; + cov[1][0] = cov_xy; cov[1][1] = var_y; cov[1][2] = cov_yz; + cov[2][0] = cov_xz; cov[2][1] = cov_yz; cov[2][2] = var_z; + + printf("variance: cell %d. var = %e %e %e, %e %e %e, %e %e %e\n", c, cov[0][0], cov[0][1], cov[0][2], cov[1][0], cov[1][1], cov[1][2], cov[2][0], cov[2][1], cov[2][2]); + + //return (var_x+var_y+var_z)/3. ; +} + +/*! + Compute the sum of alpha and return a version normalized to 1 in alpha_norm +*/ +template +static void normalize(AlphaType& alpha_norm, const AlphaType& alpha) { + using gmm_float_type = typename AlphaType::value_type; + + for(size_t c = 0; c < alpha.extent(0); c++) { + gmm_float_type sum = 0.; + int k_max = alpha.extent(1); + auto _add = KOKKOS_LAMBDA(const int& m, gmm_float_type& lsum) { + lsum += alpha(c,m); + }; + Kokkos::parallel_reduce("Sum alpha", k_max, _add, sum); + + auto _norm = KOKKOS_LAMBDA(const int&m) { + alpha_norm(c,m) = alpha(c,m) / sum; + }; + Kokkos::parallel_for("Norm alpha", k_max, _norm); + } +} + + +/*! + Compute the w for each combination of a Gaussian and a particle by multiplying alpha into u +*/ +template +static void prefillW(WType w, const AlphaType& alpha_norm, const UType& u) { + using gmm_float_type = typename AlphaType::value_type; + + auto _mult = KOKKOS_LAMBDA(const int& i) { + for(size_t c = 0; c < w.extent(0); c++) { + for(size_t m = 0; m < w.extent(1); m++) { + w(c,m,i) = alpha_norm(c,m) * u(c,m,i); + } + } + }; + // We have way more particles than gaussians, so that is what we parallelize over + Kokkos::parallel_for("Prefill w", w.extent(2), _mult); +} + +/*! + Compute the w_norm by normalizing w such that the sum over all Gaussians for a given particle is one +*/ +template +static void prefillWNorm(WType& w_norm, const WType& w) { + using gmm_float_type = typename WType::value_type; + + auto _norm = KOKKOS_LAMBDA(const int& i) { + for(size_t c = 0; c < w.extent(0); c++) { + gmm_float_type sum = 0.; + for(size_t m = 0; m < w.extent(1); m++) { + sum += w(c,m,i); + } + for(size_t m = 0; m < w.extent(1); m++) { + if(sum == 0.) { + w_norm(c,m,i) = 0.; + } else { + w_norm(c,m,i) = w(c,m,i)/sum; + } + } + } + }; + // We have way more particles than gaussians, so that is what we parallelize over + Kokkos::parallel_for("Prefill wnorm", w.extent(2), _norm); +} + + +/*! + recompute alpha as per line 10 +*/ +template +static void updateAlpha(AlphaType& alpha, const WType& w_norm, const int c, const int m) { + using gmm_float_type = typename AlphaType::value_type; + + int N = dims + dims*(dims+1)/2; // Number of actually independent degrees of freedom + if (dims == 1) { + N = 1 + 1*(1+1)/2; // Number of actually independent degrees of freedom for each 1d Gaussian + } else if (dims == 2) { + N = 2 + 2; // Number of actually independent degrees of freedom for each uncorrelated(!) 2d Gaussian + } else if (dims == 3) { + N = 3 + 3*(3+1)/2; // Number of actually independent degrees of freedom for each 3d Gaussian + } + + Kokkos::View tmp1("tmp1", w_norm.extent(1)); + auto _sett = KOKKOS_LAMBDA(const int j) { + gmm_float_type sum = 0.; + for(size_t n = 0; n < w_norm.extent(2); n++) { + sum += w_norm(c,j,n); + } + tmp1(j) = Kokkos::max(0., sum-N/2.); + }; + Kokkos::parallel_for("compute tmp1", w_norm.extent(1), _sett); + + gmm_float_type sum = 0.; + auto _sum = KOKKOS_LAMBDA(const int n, gmm_float_type& lsum) { + lsum += tmp1(n); + }; + Kokkos::parallel_reduce("sum tmp1", w_norm.extent(1), _sum, sum); + + auto _seta = KOKKOS_LAMBDA(const int j) { + if(j == m) { + alpha(c,j) = tmp1(j) / sum; + } + }; + + // Not actually parallel but should run on-device + Kokkos::parallel_for("update alpha", w_norm.extent(1), _seta); +} + + +/*! + compute weights u from the probability to get particles given one gaussian +*/ +template +static void updateWeights(weight_type u, const CellSliceType& cell, const VelocitySliceType vx, const VelocitySliceType& vy, const VelocitySliceType vz, const GaussianType& g_dev, const int c, const int m) { + + using gmm_float_type = typename GaussianType::value_type; + + if(u.extent(0) != g_dev.extent(0)) { + fprintf(stderr, "We need a separate value of the first index in u for each cell\n"); + exit(1); + } + if(u.extent(1) != g_dev.extent(1)) { + fprintf(stderr, "We need a separate value of the second index in u for each gaussian\n"); + exit(1); + } + if(u.extent(2) != vx.size()) { + fprintf(stderr, "We need a separate value of the third index in u for each particle\n"); + exit(1); + } + + if (dims == 1) { + VelocitySliceType velocity_x = vx; + + // The value of u is given by the probability to draw a single particle from a Gaussian + auto _weight = KOKKOS_LAMBDA(const int s, const int i) { + int n = (s)*cell.vector_length + i; + if(cell(n) == c) { + gmm_float_type p = 0.; + gmm_float_type v[1] = {velocity_x(n)}; + gmm_float_type Mu[1] = {g_dev(c,m,MuX)}; + gmm_float_type C[1][1] = {{g_dev(c,m,Cxx)}}; + p = Impl::GaussianWeight::weight_1d(v, Mu, C); + + u(c,m,n) = p; + } else { + u(c,m,n) = 0.; + } + }; + + // Define an execution policy + SimdPolicy vec_policy(0, cell.size()); + + // Execute for all particles in parallel + simd_parallel_for(vec_policy, _weight, "weight()"); + + } else if (dims == 2) { + VelocitySliceType velocity_par = vx; + VelocitySliceType velocity_per = vy; + + // The value of u is given by the probability to draw a single particle from a Gaussian + auto _weight = KOKKOS_LAMBDA(const int s, const int i) { + int n = (s)*cell.vector_length + i; + if(cell(n) == c) { + gmm_float_type p = 0.; + gmm_float_type v[2] = {velocity_par(n), velocity_per(n)}; + gmm_float_type Mu[2] = {g_dev(c,m,MuPar), g_dev(c,m,MuPer)}; + gmm_float_type C[2][2] = {{g_dev(c,m,Cparpar), g_dev(c,m,Cparper)}, + {g_dev(c,m,Cperpar), g_dev(c,m,Cperper)}}; + p = Impl::GaussianWeight::weight_2d(v, Mu, C); + + u(c,m,n) = p; + } else { + u(c,m,n) = 0.; + } + }; + + // Define an execution policy + SimdPolicy vec_policy(0, cell.size()); + + // Execute for all particles in parallel + simd_parallel_for(vec_policy, _weight, "weight()"); + + } else if (dims == 3) { + VelocitySliceType velocity_x = vx; + VelocitySliceType velocity_y = vy; + VelocitySliceType velocity_z = vz; + + // The value of u is given by the probability to draw a single particle from a Gaussian + auto _weight = KOKKOS_LAMBDA(const int s, const int i) { + int n = (s)*cell.vector_length + i; + if(cell(n) == c) { + gmm_float_type p = 0.; + gmm_float_type v[3] = {velocity_x(n), velocity_y(n), velocity_z(n)}; + gmm_float_type Mu[3] = {g_dev(c,m,MuX), g_dev(c,m,MuY), g_dev(c,m,MuZ)}; + gmm_float_type C[3][3] = {{g_dev(c,m,Cxx), g_dev(c,m,Cxy), g_dev(c,m,Cxz)}, + {g_dev(c,m,Cyx), g_dev(c,m,Cyy), g_dev(c,m,Cyz)}, + {g_dev(c,m,Czx), g_dev(c,m,Czy), g_dev(c,m,Czz)}}; + p = Impl::GaussianWeight::weight_3d(v, Mu, C); + + u(c,m,n) = p; + } else { + u(c,m,n) = 0.; + } + }; + + // Define an execution policy + SimdPolicy vec_policy(0, cell.size()); + + // Execute for all particles in parallel + simd_parallel_for(vec_policy, _weight, "weight()"); + } + +} + +/*! + update gaussian m from the particles where we consider weights w_norm for the impact +*/ +template +static void updateGMM(GaussianType& g_dev, const CellSliceType& cell, const VelocitySliceType& vx, const VelocitySliceType& vy, const VelocitySliceType& vz, const weight_type& w_norm, const int c, const int m) { + using gmm_float_type = typename GaussianType::value_type; + + if (dims == 1) { + auto velocity_x = vx; + + auto _moments = KOKKOS_LAMBDA(const int i, gmm_float_type& lM0, gmm_float_type& lM1x, gmm_float_type& lM2xx) { + if(cell(i) == c) { + lM0 += w_norm(c,m,i); + lM1x += velocity_x(i) * w_norm(c,m,i); + lM2xx += velocity_x(i)*velocity_x(i) * w_norm(c,m,i); + } + }; + + // Compute moments + gmm_float_type M0 = 0.; + gmm_float_type M1x = 0.; + gmm_float_type M2xx = 0.; + Kokkos::parallel_reduce("update gaussian", w_norm.extent(2), _moments, M0, M1x, M2xx); + + // Compute underlying parameters + auto _update = KOKKOS_LAMBDA(const int j) { + if(j == m) { + g_dev(c,m,MuX) = M1x/M0; + g_dev(c,m,Cxx) = M2xx/M0 - (M1x/M0)*(M1x/M0); + } + }; + + // Not actually parallel but should be on-device + Kokkos::parallel_for("update Gaussian", g_dev.extent(1), _update); + } else if (dims == 2) { + auto velocity_par = vx; + auto velocity_per = vy; + + auto _moments = KOKKOS_LAMBDA(const int i, gmm_float_type& lM0, + gmm_float_type& lM1par, gmm_float_type& lM1per, + gmm_float_type& lM2parpar, gmm_float_type& lM2parper, gmm_float_type& lM2perper) { + if(cell(i) == c) { + lM0 += w_norm(c,m,i) / velocity_per(i); + lM1par += velocity_par(i) * w_norm(c,m,i) / velocity_per(i); + lM1per += velocity_per(i) * w_norm(c,m,i) / velocity_per(i); + lM2parpar += velocity_par(i)*velocity_par(i) * w_norm(c,m,i) / velocity_per(i); + lM2parper += velocity_par(i)*velocity_per(i) * w_norm(c,m,i) / velocity_per(i); + lM2perper += velocity_per(i)*velocity_per(i) * w_norm(c,m,i) / velocity_per(i); + } + }; + + // Compute moments + gmm_float_type M0 = 0.; + gmm_float_type M1par = 0.; + gmm_float_type M1per = 0.; + gmm_float_type M2parpar = 0.; + gmm_float_type M2parper = 0.; + gmm_float_type M2perper = 0.; + Kokkos::parallel_reduce("update gaussian", w_norm.extent(2), _moments, M0, M1par,M1per, M2parpar,M2parper,M2perper); + + // Compute underlying parameters + auto _update = KOKKOS_LAMBDA(const int j) { + if(j == m) { + g_dev(c,m,MuPar) = M1par / M0; + g_dev(c,m,MuPer) = 0.; + g_dev(c,m,Cparpar) = M2parpar/M0 - (M1par/M0)*(M1par/M0); + g_dev(c,m,Cparper) = 0.; + g_dev(c,m,Cperpar) = 0.; + g_dev(c,m,Cperper) = M2perper/M0; // Eq. 93 has a factor 2 here + + gmm_float_type disc = 4.*M1per*M1per - M_PI*M0*M2perper; + //printf("c=%d,m=%d, M0=%f,M1per=%f,M2perper=%f, disc=%f\n", c,m, M0, M1per, M2perper, disc); + gmm_float_type alpha, cperper; + if(disc < 0.) { + alpha = 0.; + cperper = M2perper/M0; + } else { + gmm_float_type rad = (disc + 2.*M1per*Kokkos::sqrt(disc)) / (M_PI*M_PI*M0*M2perper); + alpha = Kokkos::sqrt(rad); + cperper = 0.5*M_PI*M2perper*M2perper / (8.*M1per*M1per - M_PI*M0*M2perper + 4.*M1per*Kokkos::sqrt(disc)); + } + gmm_float_type uper = 2.*alpha*cperper; + //printf("c=%d,m=%d, alpha=%f, MuPer=%f, Cperper=%f\n", c,m, alpha, uper, cperper); + if(alpha > 0.01) { + // Iterative solution + double I0 = Kokkos::Experimental::cyl_bessel_i0, double, int>(Kokkos::complex(0.25*uper*uper/cperper)).real(); + double I1 = Kokkos::Experimental::cyl_bessel_i1, double, int>(Kokkos::complex(0.25*uper*uper/cperper)).real(); + + double Exp = Kokkos::exp(-0.25*uper*uper/cperper); + for(int i = 0; i<500; i++) { + uper = Kokkos::sqrt((-M_PI*Exp*Exp*I0*I1*M0*M2perper + 2*M1per*(-M1per + Kokkos::sqrt(M_PI*Exp*Exp*I0*I1*M0*M2perper + M_PI*Exp*Exp*I1*I1*M0*M2perper + M1per*M1per)))/M0/M0)/(Kokkos::sqrt(M_PI)*Exp*I1); + cperper = (M_PI*Exp*Exp*I0*I1*M0*M2perper + M_PI*Exp*Exp*I1*I1*M0*M2perper + 2*M1per*(M1per - Kokkos::sqrt(M_PI*Exp*Exp*I0*I1*M0*M2perper + M_PI*Exp*Exp*I1*I1*M0*M2perper + M1per*M1per)))/(2*M_PI*Exp*Exp*I1*I1*M0*M0); + I0 = Kokkos::Experimental::cyl_bessel_i0, double, int>(Kokkos::complex(0.25*uper*uper/cperper)).real(); + I1 = Kokkos::Experimental::cyl_bessel_i1, double, int>(Kokkos::complex(0.25*uper*uper/cperper)).real(); + Exp = Kokkos::exp(-0.25*uper*uper/cperper); + } + //printf("c=%d,m=%d, alpha=%f, MuPer=%f, Cperper=%f\n", c,m, alpha, uper, cperper); + } + g_dev(c,m,MuPer) = uper; + g_dev(c,m,Cperper) = cperper; + } + }; + + // This shouldn't actually be parallel, but it should be on-device + Kokkos::parallel_for("update Gaussian", g_dev.extent(1), _update); + } else if (dims == 3) { + auto velocity_x = vx; + auto velocity_y = vy; + auto velocity_z = vz; + + auto _moments = KOKKOS_LAMBDA(const int i, gmm_float_type& lM0, + gmm_float_type& lM1x, gmm_float_type& lM1y, gmm_float_type& lM1z, + gmm_float_type& lM2xx, gmm_float_type& lM2xy, gmm_float_type& lM2xz, + gmm_float_type& lM2yx, gmm_float_type& lM2yy, gmm_float_type& lM2yz, + gmm_float_type& lM2zx, gmm_float_type& lM2zy, gmm_float_type& lM2zz) { + if(cell(i) == c) { + lM0 += w_norm(c,m,i); + lM1x += velocity_x(i) * w_norm(c,m,i); + lM1y += velocity_y(i) * w_norm(c,m,i); + lM1z += velocity_z(i) * w_norm(c,m,i); + lM2xx += velocity_x(i)*velocity_x(i) * w_norm(c,m,i); + lM2xy += velocity_x(i)*velocity_y(i) * w_norm(c,m,i); + lM2xz += velocity_x(i)*velocity_z(i) * w_norm(c,m,i); + lM2yx += velocity_y(i)*velocity_x(i) * w_norm(c,m,i); + lM2yy += velocity_y(i)*velocity_y(i) * w_norm(c,m,i); + lM2yz += velocity_y(i)*velocity_z(i) * w_norm(c,m,i); + lM2zx += velocity_z(i)*velocity_x(i) * w_norm(c,m,i); + lM2zy += velocity_z(i)*velocity_y(i) * w_norm(c,m,i); + lM2zz += velocity_z(i)*velocity_z(i) * w_norm(c,m,i); + } + }; + + // Compute moments + gmm_float_type M0 = 0.; + gmm_float_type M1x = 0.; + gmm_float_type M1y = 0.; + gmm_float_type M1z = 0.; + gmm_float_type M2xx = 0.; + gmm_float_type M2xy = 0.; + gmm_float_type M2xz = 0.; + gmm_float_type M2yx = 0.; + gmm_float_type M2yy = 0.; + gmm_float_type M2yz = 0.; + gmm_float_type M2zx = 0.; + gmm_float_type M2zy = 0.; + gmm_float_type M2zz = 0.; + Kokkos::parallel_reduce("update gaussian", w_norm.extent(2), _moments, M0, M1x,M1y,M1z, M2xx,M2xy,M2xz, M2yx,M2yy,M2yz, M2zx,M2zy,M2zz); + + // Compute underlying parameters + auto _update = KOKKOS_LAMBDA(const int j) { + if(j == m) { + g_dev(c,m,MuX) = M1x/M0; + g_dev(c,m,MuY) = M1y/M0; + g_dev(c,m,MuZ) = M1z/M0; + g_dev(c,m,Cxx) = M2xx/M0 - (M1x/M0)*(M1x/M0); + g_dev(c,m,Cxy) = M2xy/M0 - (M1x/M0)*(M1y/M0); + g_dev(c,m,Cxz) = M2xz/M0 - (M1x/M0)*(M1z/M0); + g_dev(c,m,Cyx) = M2yx/M0 - (M1y/M0)*(M1x/M0); + g_dev(c,m,Cyy) = M2yy/M0 - (M1y/M0)*(M1y/M0); + g_dev(c,m,Cyz) = M2yz/M0 - (M1y/M0)*(M1z/M0); + g_dev(c,m,Czx) = M2zx/M0 - (M1z/M0)*(M1x/M0); + g_dev(c,m,Czy) = M2zy/M0 - (M1z/M0)*(M1y/M0); + g_dev(c,m,Czz) = M2zz/M0 - (M1z/M0)*(M1z/M0); + } + }; + + // This shouldn't actually be parallel, but it should be on-device + Kokkos::parallel_for("update Gaussian", g_dev.extent(1), _update); + } +} + + +/*! + Find the component that has the smallest remaining non-zero alpha_norm +*/ +template +static size_t findWeakestComponent(const AlphaType& alpha_norm, const int c) { + using gmm_float_type = typename AlphaType::value_type; + + auto alpha_host = Kokkos::create_mirror_view(Kokkos::HostSpace(), alpha_norm); + Kokkos::deep_copy(alpha_host, alpha_norm); + size_t idx = alpha_host.extent(1) + 1; + gmm_float_type min_weight = std::numeric_limits::infinity(); + for(size_t m = 0; m < alpha_host.extent(1); m++) { + if(alpha_host(c,m) > 0. && alpha_host(c,m) < min_weight) { + min_weight = alpha_host(c,m); + idx = m; + } + } + + if(idx == alpha_host.extent(1) + 1) { + printf("No component left to remove\n"); + exit(1); + } + + return idx; +} + + +/*! + remove the selected component from the Gaussian Mixture Model +*/ +template +static void removeGaussianComponent(AlphaType& alpha, AlphaType& alpha_norm, WType& w, WType& w_norm, WType& u, const size_t c, const size_t m) { + printf("removing component m = %zd from cell %zd\n", m, c); + + auto _remove = KOKKOS_LAMBDA(const int n) { + if(n == 0) { // Should we allow conflicting writes instead of branching? + alpha(c,m) = 0.; + alpha_norm(c,m) = 0.; + } + w(c,m,n) = 0.; + w_norm(c,m,n) = 0.; + u(c,m,n) = 0.; + }; + + Kokkos::parallel_for("remove smallest Gaussian", w_norm.extent(2), _remove); +} + +/*! + Compute the Gaussian Mixture Model given the particle information +*/ +template +static void implReconstructGMM(GaussianType& gaussians, const double eps, const int seed, CellSliceType const& cell, WeightSliceType const& weight, VelocitySliceType const& vx, VelocitySliceType const& vy, VelocitySliceType const& vz) { + using gmm_float_type = typename GaussianType::value_type; + + const int Nparticles = vx.size(); + const int c_max = gaussians.extent(0); // Maximum number of Cells + const size_t k_max = gaussians.extent(1); // Maximum number of Gaussians + int N; + if(dims == 1) { + N = 1 + 1*(1+1)/2; // Number of actually independent degrees of freedom for each 1d Gaussian + } else if (dims == 2) { + N = 2 + 2; // Number of actually independent degrees of freedom for each uncorrelated(!) 2d Gaussian + } else if (dims == 3) { + N = 3 + 3*(3+1)/2; // Number of actually independent degrees of freedom for each 3d Gaussian + } + + // Line 1 in Fig. 2 + + Kokkos::View alpha("alpha", c_max, k_max); + Kokkos::View alpha_norm("alpha norm", c_max, k_max); + + auto theta = Kokkos::create_mirror_view(Kokkos::DefaultExecutionSpace::memory_space(), gaussians); + Kokkos::deep_copy(theta, gaussians); + + // Parameters for the case where we have the best number of gaussians + auto theta_best = Kokkos::create_mirror(theta); + auto alpha_best = Kokkos::create_mirror(alpha_norm); + + // Generate initial guesses for all Gaussians + Kokkos::Random_XorShift64_Pool<> random_pool(seed); + + if (dims == 1) { + VelocitySliceType velocity_x = vx; + + for(int c = 0; c < c_max; c++) { + double cov[1][1]; + GMMImpl<1>::variance(cell, weight, velocity_x, c, cov); + printf("cell %d. var = %e\n", c, cov[0][0]); + + // Define how to initialize one Gaussian + auto _init = KOKKOS_LAMBDA(const int m) { + auto generator = random_pool.get_state(); + theta(c,m,Weight) = 0.; // can we use this instead of alpha? + // Are we worried that me might draw the same particle multiple times? + theta(c,m,MuX) = velocity_x(generator.drand(0, Nparticles)); // FIXME: Draw a particle in the right cell + theta(c,m,Cxx) = cov[0][0]; + alpha(c,m) = 1.; + random_pool.free_state(generator); + }; + + // execute for all Gaussians + Kokkos::parallel_for("initial guesses", k_max, _init); + } + } else if (dims == 2) { + VelocitySliceType velocity_par = vx; + VelocitySliceType velocity_per = vz; + + for(int c = 0; c < c_max; c++) { + double cov[2][2]; + GMMImpl<2>::variance(cell, weight, velocity_par, velocity_per, c, cov); + printf("cell %d. var = %e %e, %e %e\n", c, cov[0][0], cov[0][1], cov[1][0], cov[1][1]); + + // Define how to initialize one Gaussian + auto _init = KOKKOS_LAMBDA(const int m) { + auto generator = random_pool.get_state(); + theta(c,m,Weight) = 0.; // can we use this instead of alpha? + // Are we worried that me might draw the same particle multiple times? + const int particle_idx = generator.drand(0, Nparticles); // FIXME: Draw a particle in the right cell + theta(c,m,MuPar) = velocity_par(particle_idx); + theta(c,m,MuPer) = velocity_per(particle_idx); + theta(c,m,Cparpar) = cov[0][0]; + theta(c,m,Cparper) = cov[0][1]; + theta(c,m,Cperpar) = cov[1][0]; + theta(c,m,Cperper) = cov[1][1]; + alpha(c,m) = 1.; + random_pool.free_state(generator); + }; + + // execute for all Gaussians + Kokkos::parallel_for("initial guesses", k_max, _init); + } + } else if (dims == 3) { + VelocitySliceType velocity_x = vx; + VelocitySliceType velocity_y = vy; + VelocitySliceType velocity_z = vz; + + for(int c = 0; c < c_max; c++) { + double cov[3][3]; + GMMImpl<3>::variance(cell, weight, velocity_x, velocity_y, velocity_z, c, cov); + printf("cell %d. var = %e %e %e, %e %e %e, %e %e %e\n", c, cov[0][0], cov[0][1], cov[0][2], cov[1][0], cov[1][1], cov[1][2], cov[2][0], cov[2][1], cov[2][2]); + + // Define how to initialize one Gaussian + auto _init = KOKKOS_LAMBDA(const int m) { + auto generator = random_pool.get_state(); + theta(c,m,Weight) = 0.; // can we use this instead of alpha? + // Are we worried that me might draw the same particle multiple times? + const int particle_idx = generator.drand(0, Nparticles); // FIXME: Draw a particle in the right cell + theta(c,m,MuX) = velocity_x(particle_idx); + theta(c,m,MuY) = velocity_y(particle_idx); + theta(c,m,MuZ) = velocity_z(particle_idx); + theta(c,m,Cxx) = cov[0][0]; + theta(c,m,Cxy) = cov[0][1]; + theta(c,m,Cxz) = cov[0][2]; + theta(c,m,Cyx) = cov[1][0]; + theta(c,m,Cyy) = cov[1][1]; + theta(c,m,Cyz) = cov[1][2]; + theta(c,m,Czx) = cov[2][0]; + theta(c,m,Czy) = cov[2][1]; + theta(c,m,Czz) = cov[2][2]; + }; + + // execute for all Gaussians + Kokkos::parallel_for("initial guesses", k_max, _init); + } + } + + // print initial gaussians + auto thetahost = Kokkos::create_mirror(theta); + Kokkos::deep_copy(thetahost, theta); + printf("Initital Guess for gaussians (using diag entries):\n"); + for(size_t c = 0; c < thetahost.extent(0); c++) { + for(size_t m = 0; m < k_max; m++) { +#if VelocityDimensions == 1 + printf("Gaussian(%zd) in cell %zd weight=%f, Mu=%f, Cov=%f\n", m, c, thetahost(c,m,Weight), thetahost(c,m,MuX), thetahost(c,m,Cxx)); +#elif VelocityDimensions == 2 + printf("Gaussian(%zd) in cell %zd weight=%f, Mu=(%f,%f), Cov=((%f,%f),(%f,%f))\n", m, c, thetahost(c,m,Weight), thetahost(c,m,MuPar),thetahost(c,m,MuPer), + thetahost(c,m,Cparpar), thetahost(c,m,Cparper), + thetahost(c,m,Cperpar), thetahost(c,m,Cperper)); +#elif VelocityDimensions == 3 + printf("Gaussian(%zd) in cell %zd weight=%f, Mu=(%f,%f,%f), Cov=((%f,%f,%f),(%f,%f,%f),(%f,%f,%f))\n", m, c, thetahost(c,m,Weight), thetahost(c,m,MuX),thetahost(c,m,MuY),thetahost(c,m,MuZ), + thetahost(c,m,Cxx), thetahost(c,m,Cxy), thetahost(c,m,Cxz), + thetahost(c,m,Cyx), thetahost(c,m,Cyy), thetahost(c,m,Cyz), + thetahost(c,m,Czx), thetahost(c,m,Czy), thetahost(c,m,Czz)); +#endif + } + } + + + // normalize alpha to sum 1 + normalize(alpha_norm, alpha); + + // at some points we need to have alpha_norm on the host + auto alpha_host = Kokkos::create_mirror_view(Kokkos::HostSpace(), alpha_norm); + + // Line 4 in Fig. 2 + // Compute the probabilities of all particles relative to all gaussians + Kokkos::View u("u", c_max,k_max,Nparticles); + for(int c = 0; c < c_max; c++) { + for(size_t m = 0; m < k_max; m++) { + updateWeights(u, cell, vx,vy,vz, theta, c,m); + } + } + + // Compute weights + Kokkos::View w("w", c_max,k_max,Nparticles); + prefillW(w, alpha_norm, u); + + // Normalize weights + Kokkos::View w_norm("w_norm", c_max,k_max,Nparticles); + prefillWNorm(w_norm, w); + + for(int c = 0; c < c_max; c++) { // For now we just do things cell by cell + printf("#\n# c = %d\n#\n", c); + + // Line 3 in Fig. 2 + int knz = k_max; + gmm_float_type Lmin = std::numeric_limits::infinity(); + + // Line 5 in Fig. 2 + while(knz>0) { + int t = 0; + printf("\nknz = %d\n", knz); + fflush(NULL); + + bool is_first_iter = true; + gmm_float_type L = std::numeric_limits::infinity(); + gmm_float_type Lold = std::numeric_limits::infinity(); + // Line 6 in Fig. 2 + bool converged = false; + while(!converged) { + // Line 7 in Fig. 2 + t += 1; + + // Line 8 in Fig. 2 + for(size_t m = 0; m < k_max; m++) { + // Get a current copy of alpha_norm + Kokkos::deep_copy(alpha_host, alpha_norm); + if(alpha_host(c,m) <= 0.) { + continue; // with next m + } + + // Line 9 of Fig. 2 + auto _updatew = KOKKOS_LAMBDA(const int& i) { + w(c,m,i) = alpha_norm(c,m) * u(c,m,i); + }; + Kokkos::parallel_for("Update w", Nparticles, _updatew); + auto _updatewnorm = KOKKOS_LAMBDA(const int& i) { + gmm_float_type sum = 0.; + for(size_t mprime = 0; mprime < k_max; mprime++) { + sum += w(c,mprime,i); + } + if(sum == 0.) { + w_norm(c,m,i) = 0.; + } else { + w_norm(c,m,i) = weight(i) * w(c,m,i) / sum; + } + }; + Kokkos::parallel_for("Update wnorm", Nparticles, _updatewnorm); + + // Line 10 of Fig. 2 + updateAlpha(alpha, w_norm, c,m); + + // Line 11 of Fig. 2 + // normalize alpha to sum 1 + normalize(alpha_norm, alpha); + + Kokkos::deep_copy(alpha_host, alpha_norm); + + // Line 12 of Fig. 2 + // Get a current copy of alpha_norm + Kokkos::deep_copy(alpha_host, alpha_norm); + if(alpha_host(c,m) > 0.) { + // Line 13 of Fig. 2 + updateGMM(theta, cell, vx,vy,vz, w_norm, c,m); + + // Line 14 of Fig. 2 + // update u + updateWeights(u, cell, vx,vy,vz, theta, c,m); + // update w + Kokkos::parallel_for("Update w", Nparticles, _updatew); + // update w_norm + Kokkos::parallel_for("Update wnorm", Nparticles, _updatewnorm); + } else { // Line 15 of Fig. 2 + printf("alpha(%d,%lu) <= 0\n", c,m); + removeGaussianComponent(alpha, alpha_norm, w, w_norm, u, c,m); + // Line 16 of Fig. 2 + // count how many gaussians have alpha_norm(m)>0 + int new_knz = 0; + for(size_t mprime = 0; mprime < k_max; mprime++) { + if(alpha_host(c,mprime) > 0.) { + new_knz++; + } + } + knz = new_knz; + printf("knz = %d\n", knz); + if(knz <= 0) { + Kokkos::abort("Unexpectedly all gaussian weights have fallen below zero. Construction of Gaussian mixture model failed."); + } + } // Line 17 of Fig. 2 + } // Line 18 of Fig. 2 + // Line 19 of Fig. 2 + auto thetat = Kokkos::create_mirror(theta); + Kokkos::deep_copy(thetat, theta); + + // Line 20 of Fig. 2 + gmm_float_type term1b = 0.; + Kokkos::deep_copy(alpha_host, alpha_norm); + for(size_t mprime = 0; mprime < k_max; mprime++) { + if(alpha_host(c,mprime) > 0.) { + term1b += std::log(alpha_host(c,mprime)); + } + } + + gmm_float_type term2 = 0.; + auto _term2 = KOKKOS_LAMBDA(const int i, gmm_float_type& lsum) { + gmm_float_type tmp2 = 0.; + for(size_t mprime = 0; mprime < k_max; mprime++) { + if(alpha_norm(c,mprime) > 0.) { + tmp2 += alpha_norm(c,mprime) * u(c,mprime,i); + } + } + if(tmp2 > 0.) { + lsum += weight(i) * Kokkos::log(tmp2); + } + }; + Kokkos::parallel_reduce("get term2", Nparticles, _term2, term2); + + Lold = L; + gmm_float_type d = knz * N + knz - 1.; + L = N/2. * term1b + 0.5*d*std::log(Nparticles) - term2; + + // Line 21 of Fig 2. + if(is_first_iter) { + is_first_iter = false; + } else { + //printf("knz = %d, Lold = %f, L = %f\n", knz, Lold, L); + if(fabs(Lold-L) < eps*fabs(L)) { + printf("converged on %d gaussians with L= %f in %d iterations!\n", knz, L, t); + converged = true; + } + } + } + + // Line 22 of Fig. 2 + if(L < Lmin) { + size_t mu_min = findWeakestComponent(alpha_norm, c); + Kokkos::deep_copy(alpha_host, alpha_norm); + if(alpha_host(c,mu_min) * Nparticles >= pow(2,dims)) { + // Line 23 + Lmin = L; + // Line 24 + Kokkos::deep_copy(theta_best, theta); + Kokkos::deep_copy(alpha_best, alpha_norm); + printf("new Lmin = %f for knz = %d\n", Lmin, knz); + } + } // Line 25 + + // Line 26 of Fig. 2 + if(knz > 1) { // don't remove the last Gaussian + size_t mu_min = findWeakestComponent(alpha_norm, c); + Kokkos::deep_copy(alpha_host, alpha_norm); + if(alpha_host(c,mu_min) * Nparticles < pow(2,dims)) { + printf("component predicts zero particles, removing\n"); + } else { + printf("weakest component isn't that weak, why should this make anything better?\n"); + } + removeGaussianComponent(alpha, alpha_norm, w, w_norm, u, c, mu_min); + } + knz--; + + } // Line 27 + + Kokkos::deep_copy(gaussians, theta_best); // When we are done, copy the gaussians back to the host + + // Copy back alpha norm and set the weights of the Gaussians from that + Kokkos::deep_copy(alpha_host, alpha_best); + for(size_t m = 0; m < k_max; m++) { + gaussians(c,m,Weight) = alpha_host(c,m); + } + + // copy that value of theta_best back to the device, because we trashed + // out theta there, trying smaller numbers of gaussians in that cell + Kokkos::deep_copy(theta, gaussians); + } + + printf("Reconstruction done\n"); +} + + +}; // end of GMMImpl class + + +/*! + Reconstruct a Gaussian Mixture Model in one dimension + + eps set the limit on relative change of the penalized log liklyhood function + when we consider the reconstruction to be converged. seed is used when drawing + the initial random Gaussians that the algorithm starts from. Reconstruction is + done separately for each unique value in cell. This slice is assume to be a + dense set of integers from 0 to gaussians.extent(0). The entries in weight can + be all equal or can be different for particles with different statistical + weight / macro factor, but it is assumed that the sum of all weights is equal + to the number of particles. All particle slices have to have the same extent. +*/ +template +void reconstructGMM(GaussianType& gaussians, const double eps, const int seed, CellSliceType const& cell, WeightSliceType const& weight, VelocitySliceType const& vx) { + GMMImpl<1>::implReconstructGMM(gaussians, eps, seed, cell, weight, vx, vx, vx); +} + +/*! + Reconstruct a Gaussian Mixture Model in two (cylindrical) dimensions + + eps set the limit on relative change of the penalized log liklyhood function + when we consider the reconstruction to be converged. seed is used when drawing + the initial random Gaussians that the algorithm starts from. Reconstruction is + done separately for each unique value in cell. This slice is assume to be a + dense set of integers from 0 to gaussians.extent(0). The entries in weight can + be all equal or can be different for particles with different statistical + weight / macro factor, but it is assumed that the sum of all weights is equal + to the number of particles. All particle slices have to have the same extent. +*/ +template +void reconstructGMM(GaussianType& gaussians, const double eps, const int seed, CellSliceType const& cell, WeightSliceType const& weight, VelocitySliceType const& vpar, VelocitySliceType const& vper) { + GMMImpl<2>::implReconstructGMM(gaussians, eps, seed, cell, weight, vpar, vper, vper); +} + +/*! + Reconstruct a Gaussian Mixture Model in three (cartesian) dimensions + + eps set the limit on relative change of the penalized log liklyhood function + when we consider the reconstruction to be converged. seed is used when drawing + the initial random Gaussians that the algorithm starts from. Reconstruction is + done separately for each unique value in cell. This slice is assume to be a + dense set of integers from 0 to gaussians.extent(0). The entries in weight can + be all equal or can be different for particles with different statistical + weight / macro factor, but it is assumed that the sum of all weights is equal + to the number of particles. All particle slices have to have the same extent. +*/ +template +void reconstructGMM(GaussianType& gaussians, const double eps, const int seed, CellSliceType const& cell, WeightSliceType const& weight, VelocitySliceType const& vx, VelocitySliceType const& vy, VelocitySliceType const& vz) { + GMMImpl<3>::implReconstructGMM(gaussians, eps, seed, cell, weight, vx, vy, vz); +} + + + +} // end namespace Cabana + +#endif // end CABANA_GMM_HPP + + + + + + + + + + + + + + diff --git a/core/src/Cabana_ParticleInit.hpp b/core/src/Cabana_ParticleInit.hpp index aaae4592f..9fb4549ac 100644 --- a/core/src/Cabana_ParticleInit.hpp +++ b/core/src/Cabana_ParticleInit.hpp @@ -18,6 +18,10 @@ #include #include +#include +#include +#include +#include #include #include @@ -494,6 +498,1194 @@ createRandomParticlesMinDistance( ExecutionSpace exec_space, Kokkos::fence(); } +/*! + Populate the particles based on the description of the distribution function + in gaussians. Particles are places in 1d1v phase space. +*/ +template +size_t initializeRandomParticles( CellSliceType& cell, WeightSliceType& macro, + PositionSliceType& position_x, + VelocitySliceType& velocity_x, + const GaussianType& gaussians, + const int seed ) +{ + using gmm_float_type = typename GaussianType::value_type; + using particle_float_type = typename WeightSliceType::value_type; + Kokkos::Random_XorShift64_Pool<> random_pool( seed ); + + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + + // Define how to create ONE particle + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + // acquire the state of the random number generator engine + auto generator = random_pool.get_state(); + + // Figure out which Cell this particle is in + const int c = generator.drand( 0., g_dev.extent( 0 ) ); + + // Pick a random location in that cell + const particle_float_type x = generator.drand( c, c + 1. ); + + // Figure out which Gaussian to draw this particle from + const gmm_float_type r = generator.drand( 0., 1. ); + unsigned int j = 0; + gmm_float_type sum = g_dev( c, j, Weight ); + while ( ( sum < r ) && ( j < g_dev.extent( 1 ) - 1 ) ) + { + j++; + sum += g_dev( c, j, Weight ); + } + + // Generate standard normal random variables + const gmm_float_type rx = generator.normal( 0., 1. ); + + velocity_x.access( s, i ) = + g_dev( c, j, MuX ) + Kokkos::sqrt( g_dev( c, j, Cxx ) ) * rx; + + // Store the cell index + cell.access( s, i ) = c; + + // Assign uniform particle weight + macro.access( s, i ) = 1.; + + // Store particle position; + position_x.access( s, i ) = x; + + // do not forget to release the state of the engine + random_pool.free_state( generator ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( 0, cell.size() ); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + return cell.size(); +} + +/*! + Populate the particles based on the description of the distribution function + in gaussians. Particles are places in 1d2v phase space with cylindrical + velocity space. +*/ +template +size_t initializeRandomParticles( CellSliceType& cell, WeightSliceType& macro, + PositionSliceType& position_x, + VelocitySliceType& velocity_par, + VelocitySliceType& velocity_per, + const GaussianType& gaussians, + const int seed ) +{ + using gmm_float_type = typename GaussianType::value_type; + using particle_float_type = typename WeightSliceType::value_type; + Kokkos::Random_XorShift64_Pool<> random_pool( seed ); + + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + + // Define how to create ONE particle + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + // acquire the state of the random number generator engine + auto generator = random_pool.get_state(); + + // Figure out which Cell this particle is in + const int c = generator.drand( 0., g_dev.extent( 0 ) ); + + // Pick a random location in that cell + const particle_float_type x = generator.drand( c, c + 1. ); + + // Figure out which Gaussian to draw this particle from + const gmm_float_type r = generator.drand( 0., 1. ); + unsigned int j = 0; + gmm_float_type sum = g_dev( c, j, Weight ); + while ( ( sum < r ) && ( j < g_dev.extent( 1 ) - 1 ) ) + { + j++; + sum += g_dev( c, j, Weight ); + } + + // TODO: Extend this to ring distributions that allow for correlations + // between parallel and perpendicular velocity components + // + // Put Covariance matrix of the jth Gaussian into 3x3 Matrix + const gmm_float_type C[3][3] = { { g_dev( c, j, Cparpar ), 0, 0 }, + { 0, g_dev( c, j, Cperper ), 0 }, + { 0, 0, g_dev( c, j, Cperper ) } }; + gmm_float_type B[3][3]; + // Get Cholesky decomposition + Cabana::Impl::Matrix2d::cholesky( B, C ); + + // Generate standard normal random variables + const gmm_float_type rx = generator.normal( 0., 1. ); + const gmm_float_type ry = generator.normal( 0., 1. ); + const gmm_float_type rz = generator.normal( 0., 1. ); + + const particle_float_type vx = + g_dev( c, j, MuPar ) + B[0][0] * rx + B[0][1] * ry + B[0][2] * rz; + const particle_float_type vy = + g_dev( c, j, MuPer ) + B[1][0] * rx + B[1][1] * ry + B[1][2] * rz; + const particle_float_type vz = + B[2][0] * rx + B[2][1] * ry + B[2][2] * rz; + + velocity_par.access( s, i ) = vx; + velocity_per.access( s, i ) = Kokkos::sqrt( vy * vy + vz * vz ); + + // Store the cell index + cell.access( s, i ) = c; + + // Assign uniform particle weight + macro.access( s, i ) = 1.; + + // Store particle position; + position_x.access( s, i ) = x; + + // do not forget to release the state of the engine + random_pool.free_state( generator ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( 0, cell.size() ); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + return cell.size(); +} + +/*! + Populate the particles based on the description of the distribution function + in gaussians. Particles are places in 1d3v phase space. +*/ +template +size_t initializeRandomParticles( CellSliceType& cell, WeightSliceType& macro, + PositionSliceType& position_x, + VelocitySliceType& velocity_x, + VelocitySliceType& velocity_y, + VelocitySliceType& velocity_z, + const GaussianType& gaussians, + const int seed ) +{ + using gmm_float_type = typename GaussianType::value_type; + using particle_float_type = typename WeightSliceType::value_type; + Kokkos::Random_XorShift64_Pool<> random_pool( seed ); + + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + + // Define how to create ONE particle + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + // acquire the state of the random number generator engine + auto generator = random_pool.get_state(); + + // Figure out which Cell this particle is in + const int c = generator.drand( 0., g_dev.extent( 0 ) ); + + // Pick a random location in that cell + const particle_float_type x = generator.drand( c, c + 1. ); + + // Figure out which Gaussian to draw this particle from + const gmm_float_type r = generator.drand( 0., 1. ); + unsigned int j = 0; + gmm_float_type sum = g_dev( c, j, Weight ); + while ( ( sum < r ) && ( j < g_dev.extent( 1 ) - 1 ) ) + { + j++; + sum += g_dev( c, j, Weight ); + } + + // Put Covariance matrix of the jth Gaussian into 3x3 Matrix + const gmm_float_type C[3][3] = { + { g_dev( c, j, Cxx ), g_dev( c, j, Cxy ), g_dev( c, j, Cxz ) }, + { g_dev( c, j, Cyx ), g_dev( c, j, Cyy ), g_dev( c, j, Cyz ) }, + { g_dev( c, j, Czx ), g_dev( c, j, Czy ), g_dev( c, j, Czz ) } }; + gmm_float_type B[3][3]; + // Get Cholesky decomposition + Cabana::Impl::Matrix2d::cholesky( B, C ); + + // Generate standard normal random variables + const particle_float_type rx = generator.normal( 0., 1. ); + const particle_float_type ry = generator.normal( 0., 1. ); + const particle_float_type rz = generator.normal( 0., 1. ); + + velocity_x.access( s, i ) = + g_dev( c, j, MuX ) + B[0][0] * rx + B[0][1] * ry + B[0][2] * rz; + velocity_y.access( s, i ) = + g_dev( c, j, MuY ) + B[1][0] * rx + B[1][1] * ry + B[1][2] * rz; + velocity_z.access( s, i ) = + g_dev( c, j, MuZ ) + B[2][0] * rx + B[2][1] * ry + B[2][2] * rz; + + // Store the cell index + cell.access( s, i ) = c; + + // Assign uniform particle weight + macro.access( s, i ) = 1.; + + // Store particle position; + position_x.access( s, i ) = x; + + // do not forget to release the state of the engine + random_pool.free_state( generator ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( 0, cell.size() ); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + return cell.size(); +} + +/*! + Populate the particles based on the description of the distribution function in + gaussians +*/ +template +size_t initializeParticlesFromCDF( CellSliceType& cell, WeightSliceType& macro, + PositionSliceType& position_x, + VelocitySliceType& velocity_x, + const GaussianType& gaussians, + const int seed ) +{ + using gmm_float_type = typename WeightSliceType::value_type; + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + Kokkos::Random_XorShift64_Pool<> random_pool( seed ); + + const int N_cells = gaussians.extent( 0 ); + const int N_gauss = gaussians.extent( 1 ); + const int N_particles = cell.size(); + + int start = 0; + for ( int c = 0; c < N_cells; c++ ) + { + for ( int m = 0; m < N_gauss; m++ ) + { + int Np = int( + N_particles * gaussians( c, m, Weight ) / + gmm_float_type( + N_cells ) ); // number of particles to add for that gaussian + + // Define how to create ONE particle in cell c, gaussian m + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + // acquire the state of the random number generator engine + auto generator = random_pool.get_state(); + + int id = (s)*cell.vector_length + i; + + gmm_float_type cdf = + ( ( id - start ) + 1 ) / gmm_float_type( Np + 1 ); + + // solution to 1/2 * (1+erf((x-mu)/(sqrt(2)*sigma))) == cdf + gmm_float_type vx = + g_dev( c, m, MuX ) + + Kokkos::sqrt( g_dev( c, m, Cxx ) ) * Cabana::Impl::ppnd7( cdf ); + velocity_x.access( s, i ) = vx; + + // Store the cell index + cell.access( s, i ) = c; + + // Assign uniform particle weight + macro.access( s, i ) = 1.; + + // Generate and store a random position in that cell + position_x.access( s, i ) = generator.drand( c, c + 1. ); + + // do not forget to release the state of the engine + random_pool.free_state( generator ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( start, start + Np ); + + // printf("Generating %d particles from %d to %d from Gaussian %d in + // cell %d\n", Np, start, start+Np, m, c); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + start += Np; + } + } + + // TODO: we may not have created exactly N_particles particles. Do we want + // to bother creating a few more to fix that? + return ( start ); +} + +/*! + Populate the particles based on the description of the distribution function in + gaussians +*/ +template +size_t initializeEqualDensityParticlesWithHammersley( + CellSliceType& cell, WeightSliceType& macro, PositionSliceType& position_x, + VelocitySliceType& velocity_x, const GaussianType& gaussians ) +{ + using gmm_float_type = typename WeightSliceType::value_type; + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + + const int N_cells = gaussians.extent( 0 ); // Number of cells + const int N_gauss = + gaussians.extent( 1 ); // Maximum number of Gaussians per cell + const int N_particles_in_total = + cell.size(); // Total number of particles the weight off all particles + // _also_ needs to add to this number + + int start = 0; + for ( int c = 0; c < N_cells; c++ ) + { + // number of physical particles in this cell + // We assume that these get evenly distributed to all cells. We might + // change that in the future to be proportional to the plasma density + // in each cell. + int N_particles_in_cell = N_particles_in_total / N_cells; + + // number of computational particles to add for each Gaussian + Kokkos::View Ncd( "Nc double", N_gauss ); + Kokkos::View Nc( "Nc int", N_gauss ); + auto Ncd_host = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), Ncd ); + auto Nc_host = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), Nc ); + + double sum = 0.; + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) > 0. ) + { + Ncd_host( m ) = Kokkos::pow( gaussians( c, m, Weight ), 1.0 ); + sum += Ncd_host( m ); + } + else + { + Ncd_host( m ) = 0.; + } + } + + double norm = N_particles_in_cell / sum; + + N_particles_in_cell = 0; + for ( int m = 0; m < N_gauss; m++ ) + { + Nc_host( m ) = int( Ncd_host( m ) * norm ); + N_particles_in_cell += Nc_host( m ); + // if(Nc_host(m) > 0) { + // printf("Nc(%d,%d) = %d\n", c,m, Nc_host(m)); + // } + } + Kokkos::deep_copy( Nc, Nc_host ); + + // number of physical particles for each Gaussian + Kokkos::View Npd( "Np double", N_gauss ); + auto Npd_host = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), Npd ); + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) > 0. ) + { + Npd_host( m ) = gaussians( c, m, Weight ) * N_particles_in_cell; + // printf("Npd(%d,%d) = %f\n", c,m, Npd_host(m)); + } + else + { + Npd_host( m ) = 0.; + } + } + Kokkos::deep_copy( Npd, Npd_host ); + + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) <= 0. ) + { + continue; + } + // printf("\n"); + + // Define how to create ONE particle in cell c, gaussian m + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + int id = (s)*cell.vector_length + i; + + const gmm_float_type vx = + g_dev( c, m, MuX ) + + 5. * Kokkos::sqrt( g_dev( c, m, Cxx ) ) * + ( 2. * Cabana::Impl::hammersley( 1, id - start, Nc( m ) ) - + 1. ); + const gmm_float_type dv = 10. * Kokkos::sqrt( g_dev( c, m, Cxx ) ); + const gmm_float_type v[1] = { vx }; + const gmm_float_type Mu[1] = { g_dev( c, m, MuX ) }; + const gmm_float_type C[1][1] = { { g_dev( c, m, Cxx ) } }; + const gmm_float_type p = + Cabana::Impl::GaussianWeight::weight_1d( + v, Mu, C ); + + velocity_x.access( s, i ) = vx; + + // Store the cell index + cell.access( s, i ) = c; + + // Assign uniform particle weight + macro.access( s, i ) = + p * Npd( m ) / (gmm_float_type)Nc( m ) * dv; + + // Store particle position + position_x.access( s, i ) = + c + Cabana::Impl::hammersley( 0, id - start + 1, Nc( m ) + 1 ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( start, start + Nc_host( m ) ); + + // printf("Generating %d particles from %d to %d from Gaussian %d in + // cell %d\n", Nc_host(m), start, start+Nc_host(m), m, c); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + // Count weight of generated particles + double count_of_gaussian = 0.; + double weight_of_gaussian = 0.; + auto _sum = + KOKKOS_LAMBDA( const int i, double& lcount, double& lsum ) + { + if ( ( start <= i ) && ( i < start + Nc( m ) ) ) + { + lcount += 1.; + lsum += macro( i ); + } + }; + // printf("Checking particles %d to %d\n", start, start+Nc_host(m)); + Kokkos::parallel_reduce( "Particle Scan", N_particles_in_total, + _sum, count_of_gaussian, + weight_of_gaussian ); + // printf("Generated %f particles have weight %f\n", + // count_of_gaussian, weight_of_gaussian); + + start += Nc_host( m ); + } + // printf("\n"); + } + + // TODO: we may not have created exactly N_particles_in_total particles. Do + // we want to bother creating a few more to fix that? + return ( start ); +} + +/*! + Populate the particles based on the description of the distribution function + in gaussians. Particles are places in 1d2v phase space. +*/ +template +size_t initializeEqualDensityParticlesWithHammersley( + CellSliceType& cell, WeightSliceType& macro, PositionSliceType& position_x, + VelocitySliceType& velocity_par, VelocitySliceType& velocity_per, + const GaussianType& gaussians ) +{ + using gmm_float_type = typename WeightSliceType::value_type; + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + + const int N_cells = gaussians.extent( 0 ); // Number of cells + const int N_gauss = + gaussians.extent( 1 ); // Maximum number of Gaussians per cell + const int N_particles_in_total = + cell.size(); // Total number of particles the weight off all particles + // _also_ needs to add to this number + + int start = 0; + for ( int c = 0; c < N_cells; c++ ) + { + // number of physical particles in this cell + // We assume that these get evenly distributed to all cells. We might + // change that in the future to be proportional to the plasma density + // in each cell. + int N_particles_in_cell = N_particles_in_total / N_cells; + + // number of computational particles to add for each Gaussian + Kokkos::View Ncd( "Nc double", N_gauss ); + Kokkos::View Nc( "Nc int", N_gauss ); + auto Ncd_host = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), Ncd ); + auto Nc_host = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), Nc ); + + double sum = 0.; + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) > 0. ) + { + Ncd_host( m ) = Kokkos::pow( gaussians( c, m, Weight ), 1.0 ); + sum += Ncd_host( m ); + } + else + { + Ncd_host( m ) = 0.; + } + } + + double norm = N_particles_in_cell / sum; + + N_particles_in_cell = 0; + for ( int m = 0; m < N_gauss; m++ ) + { + Nc_host( m ) = int( Ncd_host( m ) * norm ); + N_particles_in_cell += Nc_host( m ); + // if(Nc_host(m) > 0) { + // printf("Nc(%d,%d) = %d\n", c,m, Nc_host(m)); + // } + } + Kokkos::deep_copy( Nc, Nc_host ); + + // number of physical particles for each Gaussian + Kokkos::View Npd( "Np double", N_gauss ); + auto Npd_host = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), Npd ); + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) > 0. ) + { + Npd_host( m ) = gaussians( c, m, Weight ) * N_particles_in_cell; + // printf("Npd(%d,%d) = %f\n", c,m, Npd_host(m)); + } + else + { + Npd_host( m ) = 0.; + } + } + Kokkos::deep_copy( Npd, Npd_host ); + + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) <= 0. ) + { + continue; + } + // printf("\n"); + + // Define how to create ONE particle in cell c, gaussian m + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + int id = (s)*cell.vector_length + i; + + const gmm_float_type vpar = + g_dev( c, m, MuPar ) + + 5. * Kokkos::sqrt( g_dev( c, m, Cparpar ) ) * + ( 2. * Cabana::Impl::hammersley( 1, id - start + 1, + Nc( m ) + 1 ) - + 1. ); + const gmm_float_type vpermin = + Kokkos::max( 0., g_dev( c, m, MuPer ) - + 5. * Kokkos::sqrt( g_dev( c, m, Cperper ) ) ); + const gmm_float_type vpermax = + g_dev( c, m, MuPer ) + 5. * Kokkos::sqrt( g_dev( c, m, Cperper ) ); + const gmm_float_type vper = + vpermin + + ( vpermax - vpermin ) * + Cabana::Impl::hammersley( 2, id - start + 1, Nc( m ) + 1 ); + const gmm_float_type dvpar = + 10. * Kokkos::sqrt( g_dev( c, m, Cparpar ) ); + const gmm_float_type dvper = ( vpermax - vpermin ); + const gmm_float_type v[2] = { vpar, vper }; + const gmm_float_type Mu[2] = { g_dev( c, m, MuPar ), + g_dev( c, m, MuPer ) }; + const gmm_float_type C[2][2] = { + { g_dev( c, m, Cparpar ), g_dev( c, m, Cparper ) }, + { g_dev( c, m, Cperpar ), g_dev( c, m, Cperper ) } }; + const gmm_float_type p = + Cabana::Impl::GaussianWeight::weight_2d( + v, Mu, C ); + + velocity_par.access( s, i ) = vpar; + velocity_per.access( s, i ) = vper; + + // Store the cell index + cell.access( s, i ) = c; + + // Assign uniform particle weight + macro.access( s, i ) = + p * Npd( m ) / (gmm_float_type)Nc( m ) * dvpar * dvper; + + // Store particle position + position_x.access( s, i ) = + c + Cabana::Impl::hammersley( 0, id - start + 1, Nc( m ) + 1 ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( start, start + Nc_host( m ) ); + + // printf("Generating %d particles from %d to %d from Gaussian %d in + // cell %d\n", Nc_host(m), start, start+Nc_host(m), m, c); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + // Count weight of generated particles + double count_of_gaussian = 0.; + double weight_of_gaussian = 0.; + auto _sum = + KOKKOS_LAMBDA( const int i, double& lcount, double& lsum ) + { + if ( ( start <= i ) && ( i < start + Nc( m ) ) ) + { + lcount += 1.; + lsum += macro( i ); + } + }; + // printf("Checking particles %d to %d\n", start, start+Nc_host(m)); + Kokkos::parallel_reduce( "Particle Scan", N_particles_in_total, + _sum, count_of_gaussian, + weight_of_gaussian ); + // printf("Generated %f particles have weight %f\n", + // count_of_gaussian, weight_of_gaussian); + + start += Nc_host( m ); + } + // printf("\n"); + } + + // TODO: we may not have created exactly N_particles_in_total particles. Do + // we want to bother creating a few more to fix that? + return ( start ); +} + +/*! + Populate the particles based on the description of the distribution function + in gaussians. Particles are places in 1d3v phase space. +*/ +template +size_t initializeEqualDensityParticlesWithHammersley( + CellSliceType& cell, WeightSliceType& macro, PositionSliceType& position_x, + VelocitySliceType& velocity_x, VelocitySliceType& velocity_y, + VelocitySliceType& velocity_z, const GaussianType& gaussians ) +{ + using gmm_float_type = typename WeightSliceType::value_type; + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + + const int N_cells = gaussians.extent( 0 ); // Number of cells + const int N_gauss = + gaussians.extent( 1 ); // Maximum number of Gaussians per cell + const int N_particles_in_total = + cell.size(); // Total number of particles the weight off all particles + // _also_ needs to add to this number + + int start = 0; + for ( int c = 0; c < N_cells; c++ ) + { + // number of physical particles in this cell + // We assume that these get evenly distributed to all cells. We might + // change that in the future to be proportional to the plasma density + // in each cell. + int N_particles_in_cell = N_particles_in_total / N_cells; + + // number of computational particles to add for each Gaussian + Kokkos::View Ncd( "Nc double", N_gauss ); + Kokkos::View Nc( "Nc int", N_gauss ); + auto Ncd_host = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), Ncd ); + auto Nc_host = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), Nc ); + + double sum = 0.; + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) > 0. ) + { + Ncd_host( m ) = Kokkos::pow( gaussians( c, m, Weight ), 1.0 ); + sum += Ncd_host( m ); + } + else + { + Ncd_host( m ) = 0.; + } + } + + double norm = N_particles_in_cell / sum; + + N_particles_in_cell = 0; + for ( int m = 0; m < N_gauss; m++ ) + { + Nc_host( m ) = int( Ncd_host( m ) * norm ); + N_particles_in_cell += Nc_host( m ); + // if(Nc_host(m) > 0) { + // printf("Nc(%d,%d) = %d\n", c,m, Nc_host(m)); + // } + } + Kokkos::deep_copy( Nc, Nc_host ); + + // number of physical particles for each Gaussian + Kokkos::View Npd( "Np double", N_gauss ); + auto Npd_host = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), Npd ); + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) > 0. ) + { + Npd_host( m ) = gaussians( c, m, Weight ) * N_particles_in_cell; + // printf("Npd(%d,%d) = %f\n", c,m, Npd_host(m)); + } + else + { + Npd_host( m ) = 0.; + } + } + Kokkos::deep_copy( Npd, Npd_host ); + + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) <= 0. ) + { + continue; + } + // printf("\n"); + + // Define how to create ONE particle in cell c, gaussian m + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + int id = (s)*cell.vector_length + i; + + const gmm_float_type vx = + g_dev( c, m, MuX ) + + 3. * Kokkos::sqrt( g_dev( c, m, Cxx ) ) * + ( 2. * Cabana::Impl::hammersley( 1, id - start + 1, + Nc( m ) + 1 ) - + 1. ); + const gmm_float_type vy = + g_dev( c, m, MuY ) + + 3. * Kokkos::sqrt( g_dev( c, m, Cyy ) ) * + ( 2. * Cabana::Impl::hammersley( 2, id - start + 1, + Nc( m ) + 1 ) - + 1. ); + const gmm_float_type vz = + g_dev( c, m, MuZ ) + + 3. * Kokkos::sqrt( g_dev( c, m, Czz ) ) * + ( 2. * Cabana::Impl::hammersley( 3, id - start + 1, + Nc( m ) + 1 ) - + 1. ); + const gmm_float_type dvx = 6. * Kokkos::sqrt( g_dev( c, m, Cxx ) ); + const gmm_float_type dvy = 6. * Kokkos::sqrt( g_dev( c, m, Cyy ) ); + const gmm_float_type dvz = 6. * Kokkos::sqrt( g_dev( c, m, Czz ) ); + + const gmm_float_type v[3] = { vx, vy, vz }; + const gmm_float_type Mu[3] = { g_dev( c, m, MuX ), + g_dev( c, m, MuY ), + g_dev( c, m, MuZ ) }; + const gmm_float_type C[3][3] = { + { g_dev( c, m, Cxx ), g_dev( c, m, Cxy ), + g_dev( c, m, Cxz ) }, + { g_dev( c, m, Cyx ), g_dev( c, m, Cyy ), + g_dev( c, m, Cyz ) }, + { g_dev( c, m, Czx ), g_dev( c, m, Czy ), + g_dev( c, m, Czz ) } }; + const gmm_float_type p = + Cabana::Impl::GaussianWeight::weight_3d( + v, Mu, C ); + + velocity_x.access( s, i ) = vx; + velocity_y.access( s, i ) = vy; + velocity_z.access( s, i ) = vz; + + // Store the cell index + cell.access( s, i ) = c; + + // Assign particle weight + macro.access( s, i ) = + p * Npd( m ) / (gmm_float_type)Nc( m ) * dvx * dvy * dvz; + + // Store particle position + position_x.access( s, i ) = + c + Cabana::Impl::hammersley( 0, id - start + 1, Nc( m ) + 1 ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( start, start + Nc_host( m ) ); + + // printf("Generating %d particles from %d to %d from Gaussian %d in + // cell %d\n", Nc_host(m), start, start+Nc_host(m), m, c); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + // Count weight of generated particles + double count_of_gaussian = 0.; + double weight_of_gaussian = 0.; + auto _sum = + KOKKOS_LAMBDA( const int i, double& lcount, double& lsum ) + { + if ( ( start <= i ) && ( i < start + Nc( m ) ) ) + { + lcount += 1.; + lsum += macro( i ); + } + }; + // printf("Checking particles %d to %d\n", start, start+Nc_host(m)); + Kokkos::parallel_reduce( "Particle Scan", N_particles_in_total, + _sum, count_of_gaussian, + weight_of_gaussian ); + // printf("Generated %f particles have weight %f\n", + // count_of_gaussian, weight_of_gaussian); + + start += Nc_host( m ); + } + // printf("\n"); + } + + // TODO: we may not have created exactly N_particles_in_total particles. Do + // we want to bother creating a few more to fix that? + return ( start ); +} + +/*! + Populate the particles based on the description of the distribution function + in gaussians +*/ +template +size_t initializeEqualWeightParticlesWithHammersley( + CellSliceType& cell, WeightSliceType& macro, PositionSliceType& position_x, + VelocitySliceType& velocity_x, const GaussianType& gaussians ) +{ + using gmm_float_type = typename WeightSliceType::value_type; + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + + const int N_cells = gaussians.extent( 0 ); + const int N_gauss = gaussians.extent( 1 ); + const int N_particles = cell.size(); + + int start = 0; + for ( int c = 0; c < N_cells; c++ ) + { + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) <= 0. ) + { + continue; + } + + int Np = int( + N_particles * gaussians( c, m, Weight ) / + gmm_float_type( + N_cells ) ); // number of particles to add for that gaussian + + // Define how to create ONE particle in cell c, gaussian m + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + int id = (s)*cell.vector_length + i; + + // Generate uniform pseudo-radom variables + const gmm_float_type w = + Cabana::Impl::hammersley( 1, id - start + 1, Np + 1 ); + const gmm_float_type x = + Cabana::Impl::hammersley( 2, id - start + 1, Np + 1 ); + + // Generate standard normal random variables + const gmm_float_type rx = + Kokkos::sqrt( -2. * Kokkos::log( w ) ) * Kokkos::sin( 2. * M_PI * x ); + + velocity_x.access( s, i ) = + g_dev( c, m, MuX ) + Kokkos::sqrt( g_dev( c, m, Cxx ) ) * rx; + + // Store the cell index + cell.access( s, i ) = c; + + // Assign uniform particle weight + macro.access( s, i ) = 1.; + + // Store particle position + position_x.access( s, i ) = + c + Cabana::Impl::hammersley( 0, id - start, Np ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( start, start + Np ); + + // printf("Generating %d particles from %d to %d from Gaussian %d in + // cell %d\n", Np, start, start+Np, m, c); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + start += Np; + } + } + + // TODO: we may not have created exactly N_particles particles. Do we want + // to bother creating a few more to fix that? + return ( start ); +} + +/*! + Populate the particles based on the description of the distribution function + in gaussians. Particles are places in 1d2v phase space. +*/ +template +size_t initializeEqualWeightParticlesWithHammersley( + CellSliceType& cell, WeightSliceType& macro, PositionSliceType& position_x, + VelocitySliceType& velocity_par, VelocitySliceType& velocity_per, + const GaussianType& gaussians ) +{ + using gmm_float_type = typename WeightSliceType::value_type; + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + + const int N_cells = gaussians.extent( 0 ); + const int N_gauss = gaussians.extent( 1 ); + const int N_particles = cell.size(); + + int start = 0; + for ( int c = 0; c < N_cells; c++ ) + { + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) <= 0. ) + { + continue; + } + + int Np = int( + N_particles * gaussians( c, m, Weight ) / + gmm_float_type( + N_cells ) ); // number of particles to add for that gaussian + + // Define how to create ONE particle in cell c, gaussian m + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + int id = (s)*cell.vector_length + i; + + // TODO: Extend this to ring distributions that allow for correlations + // between parallel and perpendicular velocity components + // + // Put Covariance matrix of the jth Gaussian into 3x3 Matrix + const gmm_float_type C[3][3] = { + { g_dev( c, m, Cparpar ), 0, 0 }, + { 0, g_dev( c, m, Cperper ), 0 }, + { 0, 0, g_dev( c, m, Cperper ) } }; + gmm_float_type B[3][3]; + // Get Cholesky decomposition + Cabana::Impl::Matrix2d::cholesky( B, C ); + + // Generate uniform pseudo-radom variables + const gmm_float_type w = + Cabana::Impl::hammersley( 1, id - start + 1, Np + 1 ); + const gmm_float_type x = + Cabana::Impl::hammersley( 2, id - start + 1, Np + 1 ); + const gmm_float_type y = + Cabana::Impl::hammersley( 3, id - start + 1, Np + 1 ); + const gmm_float_type z = + Cabana::Impl::hammersley( 4, id - start + 1, Np + 1 ); + + // Generate standard normal random variables + const gmm_float_type rx = + Kokkos::sqrt( -2. * Kokkos::log( w ) ) * Kokkos::sin( 2. * M_PI * x ); + const gmm_float_type ry = + Kokkos::sqrt( -2. * Kokkos::log( w ) ) * Kokkos::cos( 2. * M_PI * x ); + const gmm_float_type rz = + Kokkos::sqrt( -2. * Kokkos::log( y ) ) * Kokkos::sin( 2. * M_PI * z ); + + const gmm_float_type vx = g_dev( c, m, MuPar ) + B[0][0] * rx + + B[0][1] * ry + B[0][2] * rz; + const gmm_float_type vy = g_dev( c, m, MuPer ) + B[1][0] * rx + + B[1][1] * ry + B[1][2] * rz; + const gmm_float_type vz = + B[2][0] * rx + B[2][1] * ry + B[2][2] * rz; + + velocity_par.access( s, i ) = vx; + velocity_per.access( s, i ) = Kokkos::sqrt( vy * vy + vz * vz ); + + // Store the cell index + cell.access( s, i ) = c; + + // Assign uniform particle weight + macro.access( s, i ) = 1.; + + // Store particle position + position_x.access( s, i ) = + c + Cabana::Impl::hammersley( 0, id - start, Np ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( start, start + Np ); + + // printf("Generating %d particles from %d to %d from Gaussian %d in + // cell %d\n", Np, start, start+Np, m, c); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + start += Np; + } + } + + // TODO: we may not have created exactly N_particles particles. Do we want + // to bother creating a few more to fix that? + return ( start ); +} + +/*! + Populate the particles based on the description of the distribution function + in gaussians. Particles are places in 1d3v phase space. +*/ +template +size_t initializeEqualWeightParticlesWithHammersley( + CellSliceType& cell, WeightSliceType& macro, PositionSliceType& position_x, + VelocitySliceType& velocity_x, VelocitySliceType& velocity_y, + VelocitySliceType& velocity_z, const GaussianType& gaussians ) +{ + using gmm_float_type = typename WeightSliceType::value_type; + auto g_dev = Kokkos::create_mirror_view( + Kokkos::DefaultExecutionSpace::memory_space(), gaussians ); + Kokkos::deep_copy( g_dev, gaussians ); + + const int N_cells = gaussians.extent( 0 ); + const int N_gauss = gaussians.extent( 1 ); + const int N_particles = cell.size(); + + int start = 0; + for ( int c = 0; c < N_cells; c++ ) + { + for ( int m = 0; m < N_gauss; m++ ) + { + if ( gaussians( c, m, Weight ) <= 0. ) + { + continue; + } + + int Np = int( + N_particles * gaussians( c, m, Weight ) / + gmm_float_type( + N_cells ) ); // number of particles to add for that gaussian + + // Define how to create ONE particle in cell c, gaussian m + auto _init = KOKKOS_LAMBDA( const int s, const int i ) + { + int id = (s)*cell.vector_length + i; + + // Put Covariance matrix of the mth Gaussian into 3x3 Matrix + const gmm_float_type C[3][3] = { + { g_dev( c, m, Cxx ), g_dev( c, m, Cxy ), + g_dev( c, m, Cxz ) }, + { g_dev( c, m, Cyx ), g_dev( c, m, Cyy ), + g_dev( c, m, Cyz ) }, + { g_dev( c, m, Czx ), g_dev( c, m, Czy ), + g_dev( c, m, Czz ) } }; + gmm_float_type B[3][3]; + // Get Cholesky decomposition + Cabana::Impl::Matrix2d::cholesky( B, C ); + + // Generate uniform pseudo-radom variables + const gmm_float_type w = + Cabana::Impl::hammersley( 1, id - start + 1, Np + 1 ); + const gmm_float_type x = + Cabana::Impl::hammersley( 2, id - start + 1, Np + 1 ); + const gmm_float_type y = + Cabana::Impl::hammersley( 3, id - start + 1, Np + 1 ); + const gmm_float_type z = + Cabana::Impl::hammersley( 4, id - start + 1, Np + 1 ); + + // Generate standard normal random variables + const gmm_float_type rx = + Kokkos::sqrt( -2 * Kokkos::log( w ) ) * Kokkos::sin( 2. * M_PI * x ); + const gmm_float_type ry = + Kokkos::sqrt( -2 * Kokkos::log( w ) ) * Kokkos::cos( 2. * M_PI * x ); + const gmm_float_type rz = + Kokkos::sqrt( -2 * Kokkos::log( y ) ) * Kokkos::sin( 2. * M_PI * z ); + + const gmm_float_type vx = g_dev( c, m, MuX ) + B[0][0] * rx + + B[0][1] * ry + B[0][2] * rz; + const gmm_float_type vy = g_dev( c, m, MuY ) + B[1][0] * rx + + B[1][1] * ry + B[1][2] * rz; + const gmm_float_type vz = g_dev( c, m, MuZ ) + B[2][0] * rx + + B[2][1] * ry + B[2][2] * rz; + + velocity_x.access( s, i ) = vx; + velocity_y.access( s, i ) = vy; + velocity_z.access( s, i ) = vz; + + // Store the cell index + cell.access( s, i ) = c; + + // Assign uniform particle weight + macro.access( s, i ) = 1.; + + // Store particle position + position_x.access( s, i ) = + c + Cabana::Impl::hammersley( 0, id - start, Np ); + }; + + // Define an execution policy + Cabana::SimdPolicy + vec_policy( start, start + Np ); + + // printf("Generating %d particles from %d to %d from Gaussian %d in + // cell %d\n", Np, start, start+Np, m, c); + + // Execute + Cabana::simd_parallel_for( vec_policy, _init, "init()" ); + + start += Np; + } + } + + // TODO: we may not have created exactly N_particles particles. Do we want + // to bother creating a few more to fix that? + return ( start ); +} + + + } // namespace Cabana #endif diff --git a/core/src/impl/Cabana_Erfinv.hpp b/core/src/impl/Cabana_Erfinv.hpp new file mode 100644 index 000000000..def01caff --- /dev/null +++ b/core/src/impl/Cabana_Erfinv.hpp @@ -0,0 +1,108 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +/*! + \file impl/Cabana_Erfinv.hpp +*/ +#ifndef CABANA_ERFINV_HPP +#define CABANA_ERFINV_HPP + +#include +#include + +namespace Cabana { +namespace Impl { + +/*! + This produces the inverse of + Phi(zp) = integrate(1/sqrt(2*pi) * exp(-zeta**2/2), (zeta,-oo,zp)) + = erf(sqrt(2)*zp/2)/2 + 1/2 + This means that ppnd7(p) = sqrt(2)*erfinv(2*p - 1) +*/ +double KOKKOS_INLINE_FUNCTION ppnd7(double p) { + /* + Algorithm AS 241 Appl. Statist. (1988) Vol. 37, No. 3 + Produce the normal deviate z corresponding to a given lower + tail area of p; z is accurate to about 1 part in 10**7 + */ + + const double split1 = 0.425E0; + const double split2 = 5.0E0; + const double const1 = 0.180625E0; + const double const2 = 1.6E0; + + // coefficicents for p close to 1/2 + const double A0 = 3.38713'27179E0; + const double A1 = 5.04342'71938E1; + const double A2 = 1.59291'13202E2; + const double A3 = 5.91093'74720E1; + const double B1 = 1.78951'69469E1; + const double B2 = 7.87577'57664E1; + const double B3 = 6.71875'63600E1; + + // coefficients for p neither close to 1/2 nor 0 or 1 + const double C0 = 1.42343'72777E0; + const double C1 = 2.75681'53900E0; + const double C2 = 1.30672'84816E0; + const double C3 = 1.70238'21103E-1; + const double D1 = 7.37001'64250E-1; + const double D2 = 1.20211'32975E-1; + + // coefficients for p near 0 or 1 + const double E0 = 6.65790'51150E0; + const double E1 = 3.08122'63860E0; + const double E2 = 4.28682'94337E-1; + const double E3 = 1.73372'03997E-2; + const double F1 = 2.41978'94225E-1; + const double F2 = 1.22582'02635E-2; + + const double q = p - 0.5; + if(fabs(q) < split1) { + const double r = const1 - q*q; + const double out = q * (((A3 * r + A2) * r + A1) * r + A0) / (((B3 * r + B2) * r + B1) * r + 1.0); + return out; + } else { + double r, out; + if(q < 0.) { + r = p; + } else { + r = 1.0 - p; + } + if((r < 0.) || (r > 1.)) { + Kokkos::abort("unexpected problem in ppnd7() inside erfinv()"); + } + r = Kokkos::sqrt(-log(r)); + if(r < split2) { + r = r - const2; + out = (((C3 * r + C2) * r + C1) * r + C0) / ((D2 * r + D1) * r + 1.0); + } else { + r = r - split2; + out = (((E3 * r + E2) * r + E1) * r + E0) / ((F2 * r + F1) * r + 1.0); + } + + if(q < 0.) { + out = -out; + } + + return out; + } +} + +double KOKKOS_INLINE_FUNCTION erfinv(double x) { + const double out = ppnd7((x+1.)/2.)/Kokkos::sqrt(2.); + return out; +} + + +} // end namespace Impl +} // end namespace Cabana + +#endif // end CABANA_ERFINV_HPP diff --git a/core/src/impl/Cabana_GaussianWeight.hpp b/core/src/impl/Cabana_GaussianWeight.hpp new file mode 100644 index 000000000..5d726eaee --- /dev/null +++ b/core/src/impl/Cabana_GaussianWeight.hpp @@ -0,0 +1,70 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +/*! + \file impl/Cabana_GaussianWeight.hpp +*/ +#ifndef CABANA_GAUSSIAN_WEIGHT_HPP +#define CABANA_GAUSSIAN_WEIGHT_HPP + +#include + +namespace Cabana { +namespace Impl { + +template +class GaussianWeight { + +public: + +/*! + Compute the value of a 1d Gaussian distribution function with mean Mu and covariance C at velocity v +*/ +static GMMFloatType KOKKOS_INLINE_FUNCTION weight_1d(const GMMFloatType (&v)[1], const GMMFloatType (&Mu)[1], const GMMFloatType (&C)[1][1]) { + GMMFloatType I[1][1]; + Matrix2d::invert(I,C); + const GMMFloatType det = Matrix2d::determinant(C); + + const GMMFloatType arg = (v[0]-Mu[0]) * I[0][0] * (v[0]-Mu[0]); + return pow(2.*M_PI, -0.5*1)/Kokkos::sqrt(det) * Kokkos::exp(-0.5*arg); +} + +/*! + Compute the value of a 2d ring distribution function with mean Mu and covariance C at velocity v +*/ +static GMMFloatType KOKKOS_INLINE_FUNCTION weight_2d(const GMMFloatType (&v)[2], const GMMFloatType (&Mu)[2], const GMMFloatType (&C)[2][2]) { + return v[1]/Kokkos::sqrt(2.*M_PI*C[0][0])/C[1][1] * Kokkos::exp(-0.5*Mu[1]*Mu[1]/C[1][1]) * Kokkos::exp(-0.5*(v[0]-Mu[0])*(v[0]-Mu[0])/C[0][0]) * Kokkos::exp(-0.5*v[1]*v[1]/C[1][1]) * + Kokkos::Experimental::cyl_bessel_i0, double, int>(Kokkos::complex(v[1]*Mu[1]/C[1][1])).real(); +} + +/*! + Compute the value of a 3d Gaussian distribution function with mean Mu and covariance C at velocity v +*/ +static GMMFloatType KOKKOS_INLINE_FUNCTION weight_3d(const GMMFloatType (&v)[3], const GMMFloatType (&Mu)[3], const GMMFloatType (&C)[3][3]) { + GMMFloatType I[3][3]; + Matrix2d::invert(I,C); + const GMMFloatType det = Matrix2d::determinant(C); + + const GMMFloatType rx = I[0][0]*(v[0]-Mu[0]) + I[0][1]*(v[1]-Mu[1]) + I[0][2]*(v[2]-Mu[2]); + const GMMFloatType ry = I[1][0]*(v[0]-Mu[0]) + I[1][1]*(v[1]-Mu[1]) + I[1][2]*(v[2]-Mu[2]); + const GMMFloatType rz = I[2][0]*(v[0]-Mu[0]) + I[2][1]*(v[1]-Mu[1]) + I[2][2]*(v[2]-Mu[2]); + + const GMMFloatType arg = (v[0]-Mu[0])*rx + (v[1]-Mu[1])*ry + (v[2]-Mu[2])*rz; + return pow(2.*M_PI, -0.5*3)/Kokkos::sqrt(det) * Kokkos::exp(-0.5*arg); +} + +}; + + +} // end namespace Impl +} // end namespace Cabana + +#endif // end CABANA_GAUSSIAN_WEIGHT_HPP diff --git a/core/src/impl/Cabana_Hammersley.hpp b/core/src/impl/Cabana_Hammersley.hpp new file mode 100644 index 000000000..7f1b239c7 --- /dev/null +++ b/core/src/impl/Cabana_Hammersley.hpp @@ -0,0 +1,101 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +/*! + \file Cabana_Hammersley.hpp +*/ +#ifndef CABANA_HAMMERSLEY_HPP +#define CABANA_HAMMERSLEY_HPP + +#include + +namespace Cabana { +namespace Impl { + +/*! + Reverse the bits in the unsigned 64bit input +*/ +uint64_t KOKKOS_INLINE_FUNCTION reverseBits64(uint64_t n) { + n = ((n & 0x00000000ffffffff) << 32) | ((n & 0xffffffff00000000) >> 32); + n = ((n & 0x0000ffff0000ffff) << 16) | ((n & 0xffff0000ffff0000) >> 16); + n = ((n & 0x00ff00ff00ff00ff) << 8) | ((n & 0xff00ff00ff00ff00) >> 8); + n = ((n & 0x0f0f0f0f0f0f0f0f) << 4) | ((n & 0xf0f0f0f0f0f0f0f0) >> 4); + n = ((n & 0x3333333333333333) << 2) | ((n & 0xcccccccccccccccc) >> 2); + n = ((n & 0x5555555555555555) << 1) | ((n & 0xaaaaaaaaaaaaaaaa) >> 1); + return n; +} + +/*! + Reverse the digits of the input n in the number system with basis base to + compute the nth entry in the Van der Corput sequence. +*/ +template struct Corput { +static double KOKKOS_INLINE_FUNCTION value(int n){ + const double inv_Base = (double)1. / (double)base; + uint64_t out = 0; + double power = 1.; + while (n > 0.) { + uint64_t next = n / base; // next value with the last digit removed / after the right shift + uint64_t digit = n - next * base; // last digit that we pop off + out = out * base + digit; // shift output left and append digit + power *= inv_Base; // keep track of how often we have done that + n = next; // shift input right + } + // At this point out is the reversed string when writing n as digits in base + return out * power; // get a value between 0 and 1 +} +}; + +/*! + In base 2 reversing the digits is just reversing the bits and then scaling by + the appropriate constant to shift things to the right of the decimal point. + This is much faster than the general case. +*/ +template<> struct Corput<2> { +static double KOKKOS_INLINE_FUNCTION value(int n) { + return reverseBits64(n) * 0x1p-64; +} +}; + + +/*! + Generate the nth sample (of N) of the hammersley sequence and return the value in dimension i +*/ +double KOKKOS_INLINE_FUNCTION hammersley(const int i, const uint64_t n, const uint64_t N) { + if(n >= N) { + printf("Called hammersely with n = %zd, N=%zd, which is wrong\n", n, N); + } + if((i < 0) || (i > 5)) { + printf("Called hammersely with i = %d, which is wrong\n", i); + } + + if(i == 0) { + return (double)n/(double)N; + } else if (i == 1) { + return Corput<2>::value(n); + } else if (i == 2) { + return Corput<3>::value(n); + } else if (i == 3) { + return Corput<5>::value(n); + } else if (i == 4) { + return Corput<7>::value(n); + } else if (i == 5) { + return Corput<11>::value(n); + } else { + return 0. / 0.; + } +} + + +} // end namespace Impl +} // end namespace Cabana + +#endif // end CABANA_HAMMERSLEY_HPP diff --git a/core/src/impl/Cabana_Matrix2d.hpp b/core/src/impl/Cabana_Matrix2d.hpp new file mode 100644 index 000000000..6cb740f18 --- /dev/null +++ b/core/src/impl/Cabana_Matrix2d.hpp @@ -0,0 +1,231 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +/*! + \file impl/Cabana_Matrix2d.hpp + \brief + Linear algebra helpers. A lot of them are not implemented for generic + matricies and we probably shouldn't do that either, but call to some + appropriate linear algebra package. Special cases for small matricies are + implemented below. +*/ +#ifndef CABANA_MATRIX2D_IMPL_HPP +#define CABANA_MATRIX2D_IMPL_HPP + +#include + +namespace Cabana { + +namespace Impl { + +template +class Matrix2d { + +public: +/*! + Compute the determinant of a dim-by-dim matrix +*/ + static FloatType KOKKOS_INLINE_FUNCTION determinant(const FloatType (&A)[dim][dim]) { + // I would use fprintf to stderr, but that isn't possible from device code + printf("Somebody should implement matrix determinants for larger than 3x3\n"); + return 0./0.; + } + +/*! + Compute the Cholesky decomposition of a dim-by-dim matrix A and return B such that B * B.T = A +*/ + static void KOKKOS_INLINE_FUNCTION cholesky(FloatType (&B)[dim][dim], const FloatType (&A)[dim][dim]) { + printf("Somebody should implement Cholesky decomposition for larger than 3x3\n"); + for(size_t i = 0; i < dim; i++) { + for(size_t j = 0; j < dim; j++) { + B[i][j] = 0./0.; + } + } + } + +/*! + Transpose a dim-by-dim matrix matrix A and returns B such that B.T = A +*/ + static void KOKKOS_INLINE_FUNCTION transpose(FloatType (&B)[dim][dim], const FloatType (&A)[dim][dim]) { + for(size_t i = 0; i < dim; i++) { + for(size_t j = 0; j < dim; j++) { + B[i][j] = A[j][i]; + } + } + } + +/*! + Invert a dim-by-dim matrix matrix A and returns B such that B.A = 1 +*/ + static void KOKKOS_INLINE_FUNCTION invert(FloatType (&B)[dim][dim], const FloatType (&A)[dim][dim]) { + printf("Somebody should implement matrix inversion for larger than 3x3\n"); + for(size_t i = 0; i < dim; i++) { + for(size_t j = 0; j < dim; j++) { + B[i][j] = 0./0.; + } + } + } +}; + +// +// The trivial 1x1 case +// +template struct Matrix2d { +/*! + Compute the determinant of a trivial 1-by-1 matrix +*/ + static FloatType KOKKOS_INLINE_FUNCTION determinant(const FloatType (&A)[1][1]) { + return A[0][0]; + } + +/*! + Compute the Cholesky decomposition of a trival 1-by-1 matrix A and return B such that B * B.T = A +*/ + static void KOKKOS_INLINE_FUNCTION cholesky(FloatType (&B)[1][1], const FloatType (&A)[1][1]) { + B[0][0] = Kokkos::sqrt(A[0][0]); + } + +/*! + Transpose a trivial 1-by-1 matrix matrix A and returns B such that B.T = A +*/ + static void KOKKOS_INLINE_FUNCTION transpose(FloatType (&B)[1][1], const FloatType (&A)[1][1]) { + B[0][0] = A[0][0]; + } + +/*! + Invert a trivial 1-by-1 matrix matrix A and returns B such that B.A = 1 +*/ + static void KOKKOS_INLINE_FUNCTION invert(FloatType (&B)[1][1], const FloatType (&A)[1][1]) { + // determinant + const FloatType det = determinant(A); + + // elements of the inverse + B[0][0] = 1./det; + } +}; + +// +// The 2x2 case +// +template struct Matrix2d { +/*! + Compute the determinant of a 2-by-2 matrix +*/ + static FloatType KOKKOS_INLINE_FUNCTION determinant(const FloatType (&A)[2][2]) { + return A[0][0]*A[1][1] - A[1][0]*A[0][1]; + } + +/*! + Compute the Cholesky decomposition of a 2-by-2 matrix A and return B such that B * B.T = A +*/ + static void KOKKOS_INLINE_FUNCTION cholesky(FloatType (&B)[2][2], const FloatType (&A)[2][2]) { + B[0][0] = Kokkos::sqrt(A[0][0]); + B[0][1] = 0.; + B[1][0] = A[1][0] / B[0][0]; + B[1][1] = Kokkos::sqrt(A[1][1] - B[1][0]*B[1][0]); + } + +/*! + Transpose a 2-by-2 matrix matrix A and returns B such that B.T = A +*/ + static void KOKKOS_INLINE_FUNCTION transpose(FloatType (&B)[2][2], const FloatType (&A)[2][2]) { + B[0][0] = A[0][0]; + B[0][1] = A[1][0]; + B[1][0] = A[0][1]; + B[1][1] = A[1][1]; + } + +/*! + Invert a 2-by-2 matrix matrix A and returns B such that B.A = 1 +*/ + static void KOKKOS_INLINE_FUNCTION invert(FloatType (&B)[2][2], const FloatType (&A)[2][2]) { + // determinant + const FloatType det = determinant(A); + + // elements of the inverse + B[0][0] = A[1][1]/det; + B[0][1] = -A[0][1]/det; + B[1][0] = -A[1][0]/det; + B[1][1] = A[0][0]/det; + } +}; + +// +// The 3x3 case +// +template struct Matrix2d { +/*! + Compute the determinant of a 3-by-3 matrix +*/ + static FloatType KOKKOS_INLINE_FUNCTION determinant(const FloatType (&A)[3][3]) { + return A[0][0]*A[1][1]*A[2][2] + + A[0][1]*A[1][2]*A[2][0] + + A[0][2]*A[1][0]*A[2][1] + - A[2][0]*A[1][1]*A[0][2] + - A[2][1]*A[1][2]*A[0][0] + - A[2][2]*A[1][0]*A[0][1]; + } + +/*! + Compute the Cholesky decomposition of a 3-by-3 matrix A and return B such that B * B.T = A +*/ + static void KOKKOS_INLINE_FUNCTION cholesky(FloatType (&B)[3][3], const FloatType (&A)[3][3]) { + B[0][0] = Kokkos::sqrt(A[0][0]); + B[0][1] = 0.; + B[0][2] = 0.; + B[1][0] = A[1][0] / B[0][0]; + B[1][1] = Kokkos::sqrt(A[1][1] - B[1][0]*B[1][0]); + B[1][2] = 0.; + B[2][0] = A[2][0] / B[0][0]; + B[2][1] = (A[2][1] - B[2][0]*B[1][0]) / B[1][1]; + B[2][2] = Kokkos::sqrt(A[2][2] - B[2][0]*B[2][0] - B[2][1]*B[2][1]); + } + +/*! + Transpose a 3-by-3 matrix matrix A and returns B such that B.T = A +*/ + static void KOKKOS_INLINE_FUNCTION transpose(FloatType (&B)[3][3], const FloatType (&A)[3][3]) { + B[0][0] = A[0][0]; + B[0][1] = A[1][0]; + B[0][2] = A[2][0]; + B[1][0] = A[0][1]; + B[1][1] = A[1][1]; + B[1][2] = A[2][1]; + B[2][0] = A[0][2]; + B[2][1] = A[1][2]; + B[2][2] = A[2][2]; + } + +/*! + Invert a 3-by-3 matrix matrix A and returns B such that B.A = 1 +*/ + static void KOKKOS_INLINE_FUNCTION invert(FloatType (&B)[3][3], const FloatType (&A)[3][3]) { + // determinant + const FloatType det = determinant(A); + + // elements of the inverse + B[0][0] = (A[1][1]*A[2][2]-A[1][2]*A[2][1])/det; + B[0][1] = (A[0][2]*A[2][1]-A[0][1]*A[2][2])/det; + B[0][2] = (A[0][1]*A[1][2]-A[0][2]*A[1][1])/det; + B[1][0] = (A[1][2]*A[2][0]-A[1][0]*A[2][2])/det; + B[1][1] = (A[0][0]*A[2][2]-A[0][2]*A[2][0])/det; + B[1][2] = (A[0][2]*A[1][0]-A[0][0]*A[1][2])/det; + B[2][0] = (A[1][0]*A[2][1]-A[1][1]*A[2][0])/det; + B[2][1] = (A[0][1]*A[2][0]-A[0][0]*A[2][1])/det; + B[2][2] = (A[0][0]*A[1][1]-A[0][1]*A[1][0])/det; + } +}; + +} // end namespace Impl + +} // end namespace Cabana + +#endif // end CABANA_MATRIX2D_IMPL_HPP diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index 00aa49379..fc7c74cf9 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -16,6 +16,11 @@ set(NOBACKEND_TESTS Index CartesianGrid SoA + Erfinv + Hammersley + Matrix1 + Matrix2 + Matrix3 ) set(SERIAL_TESTS diff --git a/core/unit_test/tstErfinv.cpp b/core/unit_test/tstErfinv.cpp new file mode 100644 index 000000000..e209a4a5b --- /dev/null +++ b/core/unit_test/tstErfinv.cpp @@ -0,0 +1,44 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include + +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +// Erfinv test +void testErfinv() +{ + // Test values from the original paper + EXPECT_NEAR(Cabana::Impl::ppnd7(0.25), -0.6744897501960817, 1e-7); + EXPECT_NEAR(Cabana::Impl::ppnd7(0.001), -3.090232306167814, 1e-6); + EXPECT_NEAR(Cabana::Impl::ppnd7(1e-20), -9.262340089798408, 1e-6); + + // test that erfinv() is actually the inverse of erf() + double testpoints[] = {-0.999, -0.9, -0.5, -0.1, -1e-3, -1e-8, 1e-8, 1e-3, 0.1, 0.5, 0.9, 0.999}; + for(int i = 0; i < sizeof(testpoints)/sizeof(testpoints[0]); i++) { + const double x = testpoints[i]; + EXPECT_NEAR(x, Kokkos::erf(Cabana::Impl::erfinv(x)), 1e-7); + } +} + +//---------------------------------------------------------------------------// +// TESTS +//---------------------------------------------------------------------------// +TEST( cabana_erfinv, erfinv_test ) { testErfinv(); } + +//---------------------------------------------------------------------------// + +} // end namespace Test diff --git a/core/unit_test/tstHammersley.cpp b/core/unit_test/tstHammersley.cpp new file mode 100644 index 000000000..9120c6720 --- /dev/null +++ b/core/unit_test/tstHammersley.cpp @@ -0,0 +1,61 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +// Corput test +void testCorput() +{ + EXPECT_NEAR(Cabana::Impl::Corput<2>::value(0), 0./8., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<2>::value(1), 4./8., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<2>::value(2), 2./8., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<2>::value(3), 6./8., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<2>::value(4), 1./8., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<2>::value(5), 5./8., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<2>::value(6), 3./8., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<2>::value(7), 7./8., 1e-7); + + EXPECT_NEAR(Cabana::Impl::Corput<3>::value(0), 0./9., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<3>::value(1), 3./9., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<3>::value(2), 6./9., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<3>::value(3), 1./9., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<3>::value(4), 4./9., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<3>::value(5), 7./9., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<3>::value(6), 2./9., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<3>::value(7), 5./9., 1e-7); + EXPECT_NEAR(Cabana::Impl::Corput<3>::value(8), 8./9., 1e-7); + + const int N = 1020; // 34 * 2*3*5 + auto _print_hammersley = KOKKOS_LAMBDA(const int& i) { + printf("%f %f %f %f\n", Cabana::Impl::hammersley(0,i,N), Cabana::Impl::hammersley(1,i,N), Cabana::Impl::hammersley(2,i,N), Cabana::Impl::hammersley(3,i,N)); + }; + Kokkos::parallel_for("print hammersley", N, _print_hammersley); +} + +//---------------------------------------------------------------------------// +// TESTS +//---------------------------------------------------------------------------// +TEST( cabana_corput, corput_test ) { testCorput(); } + +//---------------------------------------------------------------------------// + +} // end namespace Test diff --git a/core/unit_test/tstMatrix1.cpp b/core/unit_test/tstMatrix1.cpp new file mode 100644 index 000000000..c4114b8cb --- /dev/null +++ b/core/unit_test/tstMatrix1.cpp @@ -0,0 +1,74 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include + +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +// Matrix test +void testMatrix() +{ + // Hard code a random matrix + const float A1[1][1] = {4.f}; + + // Check determinant + auto _determinant_A1 = KOKKOS_LAMBDA(const int& i) { + float det = Cabana::Impl::Matrix2d::determinant(A1); + + assert(det == 4.f); + }; + Kokkos::parallel_for("Determinant A1", 1, _determinant_A1); + + // Check transposition + auto _transpose_A1 = KOKKOS_LAMBDA(const int& i) { + float T1[1][1]; + Cabana::Impl::Matrix2d::transpose(T1, A1); + + assert(T1[0][0] == 4.f); + }; + Kokkos::parallel_for("Transpose A1", 1, _transpose_A1); + + // Check inverse + auto _invert_A1 = KOKKOS_LAMBDA(const int& i) { + float I1[1][1]; + Cabana::Impl::Matrix2d::invert(I1, A1); + + assert(I1[0][0] == 0.25f); + }; + Kokkos::parallel_for("Invert A1", 1, _invert_A1); + + // Test cholesky decomposition + auto _cholesky_A1 = KOKKOS_LAMBDA(const int& i) { + float C1[1][1]; + Cabana::Impl::Matrix2d::cholesky(C1, A1); + + assert(C1[0][0] == 2.f); + }; + Kokkos::parallel_for("Decompose A1", 1, _cholesky_A1); +} + +//---------------------------------------------------------------------------// +// TESTS +//---------------------------------------------------------------------------// +TEST( cabana_matrix, matrix_test ) { testMatrix(); } + +//---------------------------------------------------------------------------// + +} // end namespace Test + + + + diff --git a/core/unit_test/tstMatrix2.cpp b/core/unit_test/tstMatrix2.cpp new file mode 100644 index 000000000..0b7aaef39 --- /dev/null +++ b/core/unit_test/tstMatrix2.cpp @@ -0,0 +1,113 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include + +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +// Matrix test +void testMatrix() +{ + // Hard code a random matrix + const float A2[2][2] = {{1.94f, 1.14f}, + {5.58f, 6.66f}}; + + // Hard code a second, symmetrix matrix + const float B2[2][2] = {{ 4.f, 12.f}, + {12.f, 37.f}}; + + // Check determinant + auto _determinant_A2 = KOKKOS_LAMBDA(const int& i) { + float det = Cabana::Impl::Matrix2d::determinant(A2); + + assert(det == 6.5592f); + }; + Kokkos::parallel_for("Determinant A2", 1, _determinant_A2); + + auto _determinant_B2 = KOKKOS_LAMBDA(const int& i) { + float det = Cabana::Impl::Matrix2d::determinant(B2); + + assert(det == 4.f); + }; + Kokkos::parallel_for("Determinant B2", 1, _determinant_B2); + + // Check transposition + auto _transpose_A2 = KOKKOS_LAMBDA(const int& i) { + float T2[2][2]; + Cabana::Impl::Matrix2d::transpose(T2, A2); + + assert(T2[0][0] == 1.94f); + assert(T2[0][1] == 5.58f); + assert(T2[1][0] == 1.14f); + assert(T2[1][1] == 6.66f); + }; + Kokkos::parallel_for("Transpose A2", 1, _transpose_A2); + + auto _transpose_B2 = KOKKOS_LAMBDA(const int& i) { + float T2[2][2]; + Cabana::Impl::Matrix2d::transpose(T2, B2); + + assert(T2[0][0] == B2[0][0]); + assert(T2[0][1] == B2[0][1]); + assert(T2[1][0] == B2[1][0]); + assert(T2[1][1] == B2[1][1]); + }; + Kokkos::parallel_for("Transpose B2", 1, _transpose_B2); + + // Check inverse + auto _invert_A2 = KOKKOS_LAMBDA(const int& i) { + float I2[2][2]; + Cabana::Impl::Matrix2d::invert(I2, A2); + + assert(I2[0][0] == 1.015370f); + assert(I2[0][1] == -0.173802f); + assert(I2[1][0] == -0.850714f); + assert(I2[1][1] == 0.295768f); + }; + Kokkos::parallel_for("Invert A2", 1, _invert_A2); + + auto _invert_B2 = KOKKOS_LAMBDA(const int& i) { + float I2[2][2]; + Cabana::Impl::Matrix2d::invert(I2, B2); + + assert(I2[0][0] == 9.25f); + assert(I2[0][1] == -3.00f); + assert(I2[1][0] == -3.00f); + assert(I2[1][1] == 1.00f); + }; + Kokkos::parallel_for("Invert B2", 1, _invert_B2); + + // Test cholesky decomposition + auto _cholesky_B2 = KOKKOS_LAMBDA(const int& i) { + float C2[2][2]; + Cabana::Impl::Matrix2d::cholesky(C2, B2); + + assert(C2[0][0] == 2.f); + assert(C2[0][1] == 0.f); + assert(C2[1][0] == 6.f); + assert(C2[1][1] == 1.f); + }; + Kokkos::parallel_for("Decompose B2", 1, _cholesky_B2); +} + +//---------------------------------------------------------------------------// +// TESTS +//---------------------------------------------------------------------------// +TEST( cabana_matrix, matrix_test ) { testMatrix(); } + +//---------------------------------------------------------------------------// + +} // end namespace Test diff --git a/core/unit_test/tstMatrix3.cpp b/core/unit_test/tstMatrix3.cpp new file mode 100644 index 000000000..3a08efba0 --- /dev/null +++ b/core/unit_test/tstMatrix3.cpp @@ -0,0 +1,140 @@ +/**************************************************************************** + * Copyright (c) 2023 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include + +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +// Matrix test +void testMatrix() +{ + // Hard code a random matrix + const float A3[3][3] = {{1.94f, 1.14f, 1.47f}, + {5.58f, 6.66f, 0.25f}, + {8.16f, 0.83f, 8.81f}}; + + // Hard code a second, symmetrix matrix + const float B3[3][3] = {{ 4.f, 12.f, -16.f}, + { 12.f, 37.f, -43.f}, + {-16.f, -43.f, 98.f}}; + + // Check determinant + auto _determinant_A3 = KOKKOS_LAMBDA(const int& i) { + float det = Cabana::Impl::Matrix2d::determinant(A3); + + assert(det == -13.3703f); + }; + Kokkos::parallel_for("Determinant A3", 1, _determinant_A3); + + auto _determinant_B3 = KOKKOS_LAMBDA(const int& i) { + float det = Cabana::Impl::Matrix2d::determinant(B3); + + assert(det == 36.f); + }; + Kokkos::parallel_for("Determinant B3", 1, _determinant_B3); + + // Check transposition + auto _transpose_A3 = KOKKOS_LAMBDA(const int& i) { + float T3[3][3]; + Cabana::Impl::Matrix2d::transpose(T3, A3); + + assert(T3[0][0] == 1.94f); + assert(T3[0][1] == 5.58f); + assert(T3[0][2] == 8.16f); + assert(T3[1][0] == 1.14f); + assert(T3[1][1] == 6.66f); + assert(T3[1][2] == 0.83f); + assert(T3[2][0] == 1.47f); + assert(T3[2][1] == 0.25f); + assert(T3[2][2] == 8.81f); + }; + Kokkos::parallel_for("Transpose A3", 1, _transpose_A3); + + auto _transpose_B3 = KOKKOS_LAMBDA(const int& i) { + float T3[3][3]; + Cabana::Impl::Matrix2d::transpose(T3, B3); + + assert(T3[0][0] == B3[0][0]); + assert(T3[0][1] == B3[0][1]); + assert(T3[0][2] == B3[0][2]); + assert(T3[1][0] == B3[1][0]); + assert(T3[1][1] == B3[1][1]); + assert(T3[1][2] == B3[1][2]); + assert(T3[2][0] == B3[2][0]); + assert(T3[2][1] == B3[2][1]); + assert(T3[2][2] == B3[2][2]); + }; + Kokkos::parallel_for("Transpose B3", 1, _transpose_B3); + + // Check inverse + auto _invert_A3 = KOKKOS_LAMBDA(const int& i) { + float I3[3][3]; + Cabana::Impl::Matrix2d::invert(I3, A3); + + assert(I3[0][0] == -4.372920f); + assert(I3[0][1] == 0.659919f); + assert(I3[0][2] == 0.710920f); + assert(I3[1][0] == 3.524220f); + assert(I3[1][1] == -0.381159f); + assert(I3[1][2] == -0.577221f); + assert(I3[2][0] == 3.718266f); + assert(I3[2][1] == -0.575321f); + assert(I3[2][2] == -0.490581f); + }; + Kokkos::parallel_for("Invert A3", 1, _invert_A3); + + auto _invert_B3 = KOKKOS_LAMBDA(const int& i) { + float I3[3][3]; + Cabana::Impl::Matrix2d::invert(I3, B3); + + assert(I3[0][0] == 49.361111f); + assert(I3[0][1] == -13.555555f); + assert(I3[0][2] == 2.111111f); + assert(I3[1][0] == -13.555555f); + assert(I3[1][1] == 3.777777f); + assert(I3[1][2] == -0.555555f); + assert(I3[2][0] == 2.111111f); + assert(I3[2][1] == -0.555555f); + assert(I3[2][2] == 0.111111f); + }; + Kokkos::parallel_for("Invert B3", 1, _invert_B3); + + // Test cholesky decomposition + auto _cholesky_B3 = KOKKOS_LAMBDA(const int& i) { + float C3[3][3]; + Cabana::Impl::Matrix2d::cholesky(C3, B3); + + assert(C3[0][0] == 2.f); + assert(C3[0][1] == 0.f); + assert(C3[0][2] == 0.f); + assert(C3[1][0] == 6.f); + assert(C3[1][1] == 1.f); + assert(C3[1][2] == 0.f); + assert(C3[2][0] == -8.f); + assert(C3[2][1] == 5.f); + assert(C3[2][2] == 3.f); + }; + Kokkos::parallel_for("Decompose B3", 1, _cholesky_B3); +} + +//---------------------------------------------------------------------------// +// TESTS +//---------------------------------------------------------------------------// +TEST( cabana_matrix, matrix_test ) { testMatrix(); } + +//---------------------------------------------------------------------------// + +} // end namespace Test diff --git a/example/core_tutorial/14_gmm/CMakeLists.txt b/example/core_tutorial/14_gmm/CMakeLists.txt new file mode 100644 index 000000000..e63d14cbe --- /dev/null +++ b/example/core_tutorial/14_gmm/CMakeLists.txt @@ -0,0 +1,52 @@ +############################################################################ +# Copyright (c) 2018-2023 by the Cabana authors # +# All rights reserved. # +# # +# This file is part of the Cabana library. Cabana is distributed under a # +# BSD 3-clause license. For the licensing terms see the LICENSE file in # +# the top-level directory. # +# # +# SPDX-License-Identifier: BSD-3-Clause # +############################################################################ + +# the next for lines are just, so that we can use this as +# a standalone CMakeLists.txt for testing of the Target export +cmake_minimum_required(VERSION 3.16) +project(Cabana_GMM) +if(NOT TARGET Cabana::cabanacore) + find_package(Cabana) +endif() + +# Dimensions of velocity space +set(VelocityDimensions "3" CACHE STRING "Properties of the dimensions of velocity space") +set(VelocityDimensionsValues 1 2 3) # 1d Cartesian, 2d cylindrical, 3d cartesian. If we ever want 2d cartestian we have to come up with something +set_property(CACHE VelocityDimensions PROPERTY STRINGS ${VelocityDimensionsValues}) +message(STATUS "VelocityDimensions='${VelocityDimensions}'") + +# Floating point type for particle data +set(ParticleFloatType "double" CACHE STRING "Floating point type for particle data") +set(ParticleFloatTypeValues float double) +set_property(CACHE ParticleFloatType PROPERTY STRINGS ${ParticleFloatTypeValues}) +message(STATUS "ParticleFloatType='${ParticleFloatType}'") + +# Floating point type for particle data +set(GmmFloatType "double" CACHE STRING "Floating point type for particle data") +set(GmmFloatTypeValues float double) +set_property(CACHE GmmFloatType PROPERTY STRINGS ${GmmFloatTypeValues}) +message(STATUS "GmmFloatType='${GmmFloatType}'") + +file(GLOB HEADERS "*.h *hpp") +file(GLOB SOURCES "*.cpp") + +install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +set (CMAKE_CXX_FLAGS "-Wall -Wextra -Wshadow -g3") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVelocityDimensions=${VelocityDimensions}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DParticleFloatType=${ParticleFloatType}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGmmFloatType=${GmmFloatType}") + +add_executable(gmm ./main.cpp) +target_link_libraries(gmm PUBLIC Cabana::cabanacore) +target_include_directories(gmm PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(gmm PUBLIC "${KokkosKernels_CMAKE_DIR}/../../../include") +install(TARGETS gmm DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/example/core_tutorial/14_gmm/main.cpp b/example/core_tutorial/14_gmm/main.cpp new file mode 100644 index 000000000..8b04881f6 --- /dev/null +++ b/example/core_tutorial/14_gmm/main.cpp @@ -0,0 +1,1090 @@ +#include +#include +#include + +#include +#include +#include + + +using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space; +using ExecutionSpace = Kokkos::DefaultExecutionSpace; + +enum ParticleFields { + CellIndex, + MacroFactor, + PositionX, +#if VelocityDimensions == 1 + VelocityX +#elif VelocityDimensions == 2 + VelocityPar, + VelocityPer +#elif VelocityDimensions == 3 + VelocityX, + VelocityY, + VelocityZ +#endif +}; + + +// Designate the types that the particles will hold. +using ParticleDataTypes = +Cabana::MemberTypes< + int, // (0) Cell index + ParticleFloatType, // (1) macro factor + ParticleFloatType, // (2) x-position +#if VelocityDimensions == 1 + ParticleFloatType // (3) x-velocity +#elif VelocityDimensions == 2 + ParticleFloatType, // (3) parallel-velocity + ParticleFloatType // (4) perp-velocity +#elif VelocityDimensions == 3 + ParticleFloatType, // (3) x-velocity + ParticleFloatType, // (4) y-velocity + ParticleFloatType // (5) z-velocity +#endif +>; + +// Set the type for the particle AoSoA. +using particle_list_t = Cabana::AoSoA; + + +// Set the type for a list of Gaussians +using gaussian_list_t = Kokkos::View; +using gaussian_dev_t = Kokkos::View; // Should we force both host and device view to be LayoutLeft instead? + + +enum ParticleGenerators { + RandomSampling, + CDFInversion, + HammersleyEqualDensity, + HammersleyEqualWeight +}; + +void check_gaussians(const gaussian_list_t& gaussians) { + int cmax = gaussians.extent(0); + int kmax = gaussians.extent(1); + Kokkos::View C("C", cmax,kmax); + Kokkos::View B("B", cmax,kmax); + + auto g_dev = Kokkos::create_mirror_view(MemorySpace(), gaussians); + Kokkos::deep_copy(g_dev, gaussians); + + auto _cholesky = KOKKOS_LAMBDA(const int& j) { + for(int c = 0; c < cmax; c++) { + if(g_dev(c,j,Weight)>0.) { +#if VelocityDimensions == 1 + C(c,j,0,0) = g_dev(c,j,Cxx); + B(c,j,0,0) = g_dev(c,j,Cxx); +#elif VelocityDimensions == 2 + C(c,j,0,0) = g_dev(c,j,Cparpar); C(c,j,0,1) = g_dev(c,j,Cparper); + C(c,j,1,0) = g_dev(c,j,Cperpar); C(c,j,1,1) = g_dev(c,j,Cperper); + GmmFloatType Cj[2][2] = {{g_dev(c,j,Cparpar), g_dev(c,j,Cparper)}, + {g_dev(c,j,Cperpar), g_dev(c,j,Cperper)}}; + GmmFloatType Bj[2][2]; + Cabana::Impl::Matrix::cholesky(Bj,Cj); + B(c,j,0,0) = Bj[0][0]; B(c,j,0,1) = Bj[0][1]; + B(c,j,1,0) = Bj[1][0]; B(c,j,1,1) = Bj[1][1]; +#elif VelocityDimensions == 3 + C(c,j,0,0) = g_dev(c,j,Cxx); C(c,j,0,1) = g_dev(c,j,Cxy); C(c,j,0,2) = g_dev(c,j,Cxz); + C(c,j,1,0) = g_dev(c,j,Cyx); C(c,j,1,1) = g_dev(c,j,Cyy); C(c,j,1,2) = g_dev(c,j,Cyz); + C(c,j,2,0) = g_dev(c,j,Czx); C(c,j,2,1) = g_dev(c,j,Czy); C(c,j,2,2) = g_dev(c,j,Czz); + GmmFloatType Cj[3][3] = {{g_dev(c,j,Cxx), g_dev(c,j,Cxy), g_dev(c,j,Cxz)}, + {g_dev(c,j,Cyx), g_dev(c,j,Cyy), g_dev(c,j,Cyz)}, + {g_dev(c,j,Czx), g_dev(c,j,Czy), g_dev(c,j,Czz)}}; + GmmFloatType Bj[3][3]; + Cabana::Impl::Matrix2d::cholesky(Bj,Cj); + B(c,j,0,0) = Bj[0][0]; B(c,j,0,1) = Bj[0][1]; B(c,j,0,2) = Bj[0][2]; + B(c,j,1,0) = Bj[1][0]; B(c,j,1,1) = Bj[1][1]; B(c,j,1,2) = Bj[1][2]; + B(c,j,2,0) = Bj[1][0]; B(c,j,2,1) = Bj[2][1]; B(c,j,2,2) = Bj[2][2]; +#endif + } + } + }; + + Kokkos::parallel_for("Check gaussians", kmax, _cholesky); + + auto B_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), B); + auto C_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), C); + + for(int c = 0; c < cmax; c++) { + for(int m = 0; m < kmax; m++) { + if(gaussians(c,m,Weight)>0.) { +#if VelocityDimensions == 1 + if(std::isnan(B_host(c,m,0,0))) { + printf("Gaussian %d in cell %d has covariance matrix C=(%e), but is that really symmetric and positive definite, because I get a Cholesky decomposition of (%e)\n", m,c, C_host(c,m,0,0), B_host(c,m,0,0)); + exit(1); + } +#elif VelocityDimensions == 2 + if(std::isnan(B_host(c,m,0,0))||std::isnan(B_host(c,m,0,1)) || + std::isnan(B_host(c,m,1,0))||std::isnan(B_host(c,m,1,1)) ) { + printf("Gaussian %d in cell %d has covariance matrix C=((%e,%e),(%e,%e)), but is that really symmetric and positive definite, because I get a Cholesky decomposition of ((%e,%e),(%e,%e))\n", m,c, C_host(c,m,0,0),C_host(c,m,0,1), C_host(c,m,1,0),C_host(c,m,1,1), B_host(c,m,0,0),B_host(c,m,0,1), B_host(c,m,1,0),B_host(c,m,1,1)); + exit(1); + } +#elif VelocityDimensions == 3 + if(std::isnan(B_host(c,m,0,0))||std::isnan(B_host(c,m,0,1))||std::isnan(B_host(c,m,0,2)) || + std::isnan(B_host(c,m,1,0))||std::isnan(B_host(c,m,1,1))||std::isnan(B_host(c,m,1,2)) || + std::isnan(B_host(c,m,2,0))||std::isnan(B_host(c,m,2,1))||std::isnan(B_host(c,m,2,2)) ) { + printf("Gaussian %d in cell %d has covariance matrix C=((%e,%e,%e),(%e,%e,%e),(%e,%e,%e)), but is that really symmetric and positive definite, because I get a Cholesky decomposition of ((%e,%e,%e),(%e,%e,%e),(%e,%e,%e))\n", m,c, C_host(c,m,0,0),C_host(c,m,0,1),C_host(c,m,0,2), C_host(c,m,1,0),C_host(c,m,1,1),C_host(c,m,1,2), C_host(c,m,2,0),C_host(c,m,2,1),C_host(c,m,2,2), B_host(c,m,0,0),B_host(c,m,0,1),B_host(c,m,0,2), B_host(c,m,1,0),B_host(c,m,1,1),B_host(c,m,1,2), B_host(c,m,2,0),B_host(c,m,2,1),B_host(c,m,2,2)); + exit(1); + } +#endif + } + } + } +} + +void initialize_particles(particle_list_t& particles, const gaussian_list_t& gaussians, const int seed, enum ParticleGenerators samplingtype = RandomSampling) { + size_t Np; + + auto cell = Cabana::slice(particles); + auto macro = Cabana::slice(particles); + auto position_x = Cabana::slice(particles); +#if VelocityDimensions == 1 + auto velocity_x = Cabana::slice(particles); + + if(samplingtype == RandomSampling) { + Np = Cabana::initializeRandomParticles(cell, macro, position_x, velocity_x, gaussians, seed); + } else if (samplingtype == CDFInversion) { + Np = Cabana::initializeParticlesFromCDF(cell, macro, position_x, velocity_x, gaussians, seed); + } else if (samplingtype == HammersleyEqualDensity) { + Np = Cabana::initializeEqualDensityParticlesWithHammersley(cell, macro, position_x, velocity_x, gaussians); + } else if (samplingtype == HammersleyEqualWeight) { + Np = Cabana::initializeEqualWeightParticlesWithHammersley(cell, macro, position_x, velocity_x, gaussians); + } else { + fprintf(stderr, "Unimplemented sampling type %d\n", samplingtype); + exit(1); + } + +#elif VelocityDimensions == 2 + auto velocity_par = Cabana::slice(particles); + auto velocity_per = Cabana::slice(particles); + if(samplingtype == RandomSampling) { + Np = Cabana::initializeRandomParticles(cell, macro, position_x, velocity_par, velocity_per, gaussians, seed); + } else if (samplingtype == HammersleyEqualDensity) { + Np = Cabana::initializeEqualDensityParticlesWithHammersley(cell, macro, position_x, velocity_par, velocity_per, gaussians); + } else if (samplingtype == HammersleyEqualWeight) { + Np = Cabana::initializeEqualWeightParticlesWithHammersley(cell, macro, position_x, velocity_par, velocity_per, gaussians); + } else { + fprintf(stderr, "Unimplemented sampling type %d\n", samplingtype); + exit(1); + } + +#elif VelocityDimensions == 3 + auto velocity_x = Cabana::slice(particles); + auto velocity_y = Cabana::slice(particles); + auto velocity_z = Cabana::slice(particles); + + if(samplingtype == RandomSampling) { + Np = Cabana::initializeRandomParticles(cell, macro, position_x, velocity_x, velocity_y, velocity_z, gaussians, seed); + } else if (samplingtype == HammersleyEqualDensity) { + Np = Cabana::initializeEqualDensityParticlesWithHammersley(cell, macro, position_x, velocity_x, velocity_y, velocity_z, gaussians); + } else if (samplingtype == HammersleyEqualWeight) { + Np = Cabana::initializeEqualWeightParticlesWithHammersley(cell, macro, position_x, velocity_x, velocity_y, velocity_z, gaussians); + } else { + fprintf(stderr, "Unimplemented sampling type %d\n", samplingtype); + exit(1); + } + +#endif + + particles.resize(Np); + + GmmFloatType N = 0.; + auto _sum = KOKKOS_LAMBDA(const int& i, GmmFloatType& lN) { + lN += macro(i); + }; + Kokkos::parallel_reduce("Particle Scan", particles.size(), _sum, N); + + printf("We have %f particles with total weight %ld\n", N, Np); + printf("adjusting particle weights by a factor %f\n", Np/N); + + auto _norm = KOKKOS_LAMBDA(const int s, const int i) { + macro.access(s,i) *= Np/N; + }; + Cabana::SimdPolicy vec_policy(0, Np); + Cabana::simd_parallel_for(vec_policy, _norm, "norm()"); + +} + +void save_particles(particle_list_t particles, const char* name) { + // Write out particles + FILE* fp = fopen(name, "w"); + auto pl_host = Cabana::create_mirror_view_and_copy(Kokkos::HostSpace(), particles); + auto macro_host = Cabana::slice(pl_host); + auto cell_host = Cabana::slice(pl_host); + auto position_x_host = Cabana::slice(pl_host); +#if VelocityDimensions == 1 + auto velocity_x_host = Cabana::slice(pl_host); + for(size_t i = 0; i < pl_host.size(); i++) { + fprintf(fp, "%zd %d %f %f %e\n", i, cell_host(i), macro_host(i), position_x_host(i), velocity_x_host(i)); + } +#elif VelocityDimensions == 2 + auto velocity_par_host = Cabana::slice(pl_host); + auto velocity_per_host = Cabana::slice(pl_host); + for(size_t i = 0; i < pl_host.size(); i++) { + fprintf(fp, "%zd %d %f %f %e %e\n", i, cell_host(i), macro_host(i), position_x_host(i), velocity_par_host(i), velocity_per_host(i)); + } +#elif VelocityDimensions == 3 + auto velocity_x_host = Cabana::slice(pl_host); + auto velocity_y_host = Cabana::slice(pl_host); + auto velocity_z_host = Cabana::slice(pl_host); + for(size_t i = 0; i < pl_host.size(); i++) { + fprintf(fp, "%zd %d %f %f %e %e %e\n", i, cell_host(i), macro_host(i), position_x_host(i), velocity_x_host(i), velocity_y_host(i), velocity_z_host(i)); + } +#endif + fclose(fp); +} + +// solve M.A = rho-rho2 for A +void jacobi(const Kokkos::View&M, Kokkos::View& A, const Kokkos::View& rho, const Kokkos::View& rho2) { + // Square matrix + assert(M.extent(0) == M.extent(1)); + // And matching sizes + assert(M.extent(0) == A.extent(0)); + assert(M.extent(0) == rho.extent(0)); + assert(M.extent(0) == rho2.extent(0)); + + // Initial guess for dA from diagonal elements + // M(i,i) * A(i) = rho(i) - rho2(i) + auto _init = KOKKOS_LAMBDA(const int& i) { + A(i) = (rho(i) - rho2(i)) / M(i,i); + }; + Kokkos::parallel_for("guess A", A.extent(0), _init); + + double error = 1.; + for(size_t k = 0; k < A.extent(0)*A.extent(0); k++) { + // Perform one jacobi iteration + Kokkos::View Anext("Anext", A.extent(0)); + auto _jacobi = KOKKOS_LAMBDA(const size_t& i) { + double dot = 0.; + for(size_t j = 0; j < Anext.extent(0); j++) { + dot += M(i,j) * A(j); + } + Anext(i) = (rho(i)-rho2(i) - dot + M(i,i)*A(i)) / M(i,i); + }; + + Kokkos::parallel_for("update A", A.extent(0), _jacobi); + + // Meassure convergence + auto _sum = KOKKOS_LAMBDA(const int&i, double& lsum) { + lsum += (Anext(i)-A(i)) * (Anext(i)-A(i)); + }; + double sum = 0.; + Kokkos::parallel_reduce("squared difference", A.extent(0), _sum, sum); + //printf("summed squared differences = %e\n", sum); + + // Overwrite A + auto _copy = KOKKOS_LAMBDA(const int& i) { + A(i) = Anext(i); + }; + Kokkos::parallel_for("overwrite A", A.extent(0), _copy); + + if(sum < error) { + error = sum; + } else { + break; + } + } +} + +KOKKOS_INLINE_FUNCTION double W_TSC(const double x) { + if(fabs(x) < 0.5) { + return 0.75 - fabs(x)*fabs(x); + } else if (fabs(x) < 1.5) { + return 0.5 * (1.5-fabs(x)) * (1.5-fabs(x)); + } else { + return 0.; + } +} + +template + struct ChargeAdd { + + using V = ParticleFloatType; + using size_type = size_t; + using value_type = V[]; + + KOKKOS_INLINE_FUNCTION + void operator()(const int particleindex, value_type rho) const { + for(int cellindex = cell_(particleindex)-2; cellindex <= cell_(particleindex)+2; cellindex++) { + // we have a ghost cell on the left which shifts indices of rho relative to cell indices + const double xg = cellindex; + rho[cellindex+2] += macro_(particleindex) * W_TSC(posx_(particleindex) - xg); + } + } + + KOKKOS_INLINE_FUNCTION void join(value_type dst, const value_type src) const { + for(size_type i = 0; i < value_count; ++i) { + dst[i] += src[i]; + } + } + + KOKKOS_INLINE_FUNCTION void init(value_type dst) const { + for(size_type i = 0; i < value_count; ++i) { + dst[i] = 0.; + } + } + + using has_final = std::true_type; + KOKKOS_INLINE_FUNCTION void final(value_type dst) const { + for(size_type i = 0; i < value_count; ++i) { results_(i) = dst[i]; } + } + + macro_t macro_; + cell_t cell_; + pos_t posx_; + size_type particle_count; + size_type value_count; // cell count, but the name is special to kokkos + result_t results_; // total rho, but the name is special to kokkos + }; + +// This version parallelizes over particles like a typical particle-in-cell +// code. It needs some extra doing because the output is an array. For more +// than one spatial dimension this leads to problems. +void chargedensity(const particle_list_t& particles, Kokkos::View& rho) { + + const size_t numParticles = particles.size(); + const size_t numCells = rho.extent(0); + + auto macro = Cabana::slice(particles); + auto cell = Cabana::slice(particles); + auto posx = Cabana::slice(particles); + + using macro_t = typeof(macro); + using cell_t = typeof(cell); + using pos_t = typeof(posx); + using result_t = Kokkos::View; + + Kokkos::View rhotmp("rhotmp", numCells+4); + + Kokkos::parallel_reduce(numParticles, ChargeAdd{macro, cell, posx, numParticles, numCells+4, rhotmp}); + + auto rhotmp_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rhotmp); + FILE* fp = fopen("debug_cd1.dat", "w"); + double sum = 0.; + for(size_t i = 0; i < rhotmp_host.extent(0); i++) { + fprintf(fp, "%ld %f\n", i, rhotmp_host(i)); + sum += rhotmp_host(i); + } + fprintf(fp, "#sum = %f\n", sum); + fclose(fp); + + // Periodic boundaries on rho + auto _copy = KOKKOS_LAMBDA(const size_t& i) { + // copy interior + rho(i) = rhotmp(i+2); + // fold ghost cells + if(i == 0) { + rho(i) += rhotmp(rho.extent(0)+2); + } + if(i == 1) { + rho(i) += rhotmp(rho.extent(0)+3); + } + if(i == rho.extent(0)-2) { + rho(i) += rhotmp(0); + } + if(i == rho.extent(0)-1) { + rho(i) += rhotmp(1); + } + }; + Kokkos::parallel_for("boundary of rho", rho.extent(0), _copy); + + auto rho_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rho); + fp = fopen("debug_cd1b.dat", "w"); + sum = 0.; + for(size_t i = 0; i < rho_host.extent(0); i++) { + fprintf(fp, "%ld %f\n", i, rho_host(i)); + sum += rho_host(i); + } + fprintf(fp, "#sum = %f\n", sum); + fclose(fp); +} + + +// This version parallelizes over cells in rho. It avoids write conflicts but +// processes each particle multiple times which I suspected would be slower. +// But it turns out to run faster in many cases. +void chargedensity2(const particle_list_t& particles, Kokkos::View& rho) { + + const size_t numParticles = particles.size(); + const size_t numCells = rho.extent(0); + + auto macro = Cabana::slice(particles); + auto cell = Cabana::slice(particles); + auto posx = Cabana::slice(particles); + + Kokkos::View rhotmp("rhotmp", numCells+4); + + // how to compute a single cell + auto _deposit = KOKKOS_LAMBDA(const int& cellindex) { + rhotmp(cellindex) = 0.; + for(size_t n = 0; n < numParticles; n++) { + const double xg = cellindex - 2; + rhotmp[cellindex] += macro(n) * W_TSC(posx(n) - xg); + } + }; + + Kokkos::parallel_for("deposit rho", rhotmp.extent(0), _deposit); + + auto rhotmp_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rhotmp); + FILE* fp = fopen("debug_cd2.dat", "w"); + double sum = 0.; + for(size_t i = 0; i < rhotmp_host.extent(0); i++) { + fprintf(fp, "%ld %f\n", i, rhotmp_host(i)); + sum += rhotmp_host(i); + } + fprintf(fp, "#sum = %f\n", sum); + fclose(fp); + + // Periodic boundaries on rho + auto _copy = KOKKOS_LAMBDA(const size_t& i) { + // copy interior + rho(i) = rhotmp(i+2); + // fold ghost cells + if(i == 0) { + rho(i) += rhotmp(numCells+2); + } + if(i == 1) { + rho(i) += rhotmp(numCells+3); + } + if(i == numCells-2) { + rho(i) += rhotmp(0); + } + if(i == numCells-1) { + rho(i) += rhotmp(1); + } + }; + Kokkos::parallel_for("boundary of rho", numCells, _copy); + + auto rho_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rho); + fp = fopen("debug_cd2b.dat", "w"); + sum = 0.; + fprintf(fp, "\n"); + for(size_t i = 0; i < rho_host.extent(0); i++) { + fprintf(fp, "%ld %f\n", i, rho_host(i)); + sum += rho_host(i); + } + fprintf(fp, "#sum = %f\n", sum); + fclose(fp); +} + + +// This version parallelizes over cells in rho. It avoids write conflicts but +// processes each particle multiple times which I suspected would be slower. +// But it turns out to run faster in many cases. +template +void chargedensity3(FunctorType& deposit, const particle_list_t& particles, Kokkos::View& rho, Kokkos::View& rhotmp) { + + const size_t numParticles = particles.size(); + const size_t numCells = rho.extent(0); + + Kokkos::parallel_for("deposit rho", rhotmp.extent(0), deposit); + + auto rhotmp_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rhotmp); + FILE* fp = fopen("debug_cd3.dat", "w"); + double sum = 0.; + for(size_t i = 0; i < rhotmp_host.extent(0); i++) { + fprintf(fp, "%ld %f\n", i, rhotmp_host(i)); + sum += rhotmp_host(i); + } + fprintf(fp, "#sum = %f\n", sum); + fclose(fp); + + // Periodic boundaries on rho + auto _copy = KOKKOS_LAMBDA(const size_t& i) { + // copy interior + rho(i) = rhotmp(i+2); + // fold ghost cells + if(i == 0) { + rho(i) += rhotmp(numCells+2); + } + if(i == 1) { + rho(i) += rhotmp(numCells+3); + } + if(i == numCells-2) { + rho(i) += rhotmp(0); + } + if(i == numCells-1) { + rho(i) += rhotmp(1); + } + }; + Kokkos::parallel_for("boundary of rho", numCells, _copy); + + auto rho_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rho); + fp = fopen("debug_cd2b.dat", "w"); + sum = 0.; + fprintf(fp, "\n"); + for(size_t i = 0; i < rho_host.extent(0); i++) { + fprintf(fp, "%ld %f\n", i, rho_host(i)); + sum += rho_host(i); + } + fprintf(fp, "#sum = %f\n", sum); + fclose(fp); +} + +void massmatrix(const particle_list_t& particles, Kokkos::View& M) { + + const size_t numParticles = particles.size(); + const size_t numCells = M.extent(0); + + auto macro = Cabana::slice(particles); + auto cell = Cabana::slice(particles); + auto posx = Cabana::slice(particles); + + // ghost cells in index i + Kokkos::View Mtmp("Mtmp", numCells+4, M.extent(1)); + + // how to compute a single cell + auto _deposit = KOKKOS_LAMBDA(const int& cellindex) { + const double xg = cellindex - 2; + for(size_t n = 0; n < numParticles; n++) { + size_t j = floor(posx(n)+0.5); + if(j >= Mtmp.extent(1)) { j = 0; } + Mtmp(cellindex,j) += macro(n) * W_TSC(posx(n) - xg); + } + }; + + Kokkos::parallel_for("deposit M", Mtmp.extent(0), _deposit); + + auto _copy = KOKKOS_LAMBDA(const int& j) { + // copy interior + for(size_t i = 0; i < numCells; i++) { + M(i,j) = Mtmp(i+2,j); + } + // fold ghost cells + M(0,j) += Mtmp(numCells+2,j); + M(1,j) += Mtmp(numCells+3,j); + M(M.extent(0)-2,j) += Mtmp(0,j); + M(M.extent(0)-1,j) += Mtmp(1,j); + }; + Kokkos::parallel_for("boundary of M", M.extent(1), _copy); +} + + +void adjust_for_charge_conservation(particle_list_t& particles, const Kokkos::View& dA) { + + auto cell = Cabana::slice(particles); + auto macro = Cabana::slice(particles); + auto posx = Cabana::slice(particles); + + auto _update = KOKKOS_LAMBDA(const int s, const int i) { + int n = (s)*particle_list_t::vector_length + i; + size_t j = floor(posx(n)+0.5); + if(j >= dA.extent(0)) { j = 0; } + ParticleFloatType dalpha_p = dA(j); + if(macro(n) > -dalpha_p) { + macro(n) += dalpha_p; + } else { + printf("Can't adjust a particle with alpha_p = %f using delta alpha_p = %f in cell %d\n", macro(n), dalpha_p, cell(n)); + } + }; + + // Define an execution policy + Cabana::SimdPolicy vec_policy(0, particles.size()); + + // Execute for all particles in parallel + Cabana::simd_parallel_for(vec_policy, _update, "update()"); +} + + +void chargeconservation(const particle_list_t& particles, particle_list_t new_particles, const int num_cells) { + Kokkos::View rho("rho", num_cells); // ghost cells only temporary internally to the function + //chargedensity(particles, rho); + chargedensity2(particles, rho); + auto rho_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rho); + + FILE* fp = fopen("rho.dat", "w"); + for(size_t i = 0; i < rho_host.size(); i++) { + fprintf(fp, "%zd %f\n", i, rho_host(i)); + } + fclose(fp); + + Kokkos::View rhoprime("rhoprime", num_cells); + //chargedensity(new_particles, rhoprime); + chargedensity2(new_particles, rhoprime); + auto rhoprime_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rhoprime); + + fp = fopen("rhoprime.dat", "w"); + for(size_t i = 0; i < rhoprime_host.size(); i++) { + fprintf(fp, "%zd %f\n", i, rhoprime_host(i)); + } + fclose(fp); + + Kokkos::View M("M", num_cells, num_cells); // ghost cells only temporary internally to the function + massmatrix(new_particles, M); + auto M_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), M); + + fp = fopen("M.dat", "w"); + for(size_t i = 0; i < M_host.extent(0); i++) { + double sum = 0.; + for(size_t j = 0; j < M_host.extent(1); j++) { + fprintf(fp, "%zd %zd %f\n", i, j, M_host(i,j)); + sum += M_host(i,j); + } + fprintf(fp, "#sum = %f\n", sum); + } + fclose(fp); + + Kokkos::View dA("dA", num_cells); + jacobi(M, dA, rho, rhoprime); + auto dA_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), dA); + + fp = fopen("dA.dat", "w"); + for(size_t i = 0; i < dA_host.size(); i++) { + fprintf(fp, "%zd %f\n", i, dA_host(i)); + } + fclose(fp); + + adjust_for_charge_conservation(new_particles, dA); + + Kokkos::View rhoadj("rhoadj", num_cells); + //chargedensity(new_particles, rhoadj); + chargedensity2(new_particles, rhoadj); + auto rhoadj_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rhoadj); + + fp = fopen("rhoadj.dat", "w"); + for(size_t i = 0; i < rhoadj_host.size(); i++) { + fprintf(fp, "%zd %f\n", i, rhoadj_host(i)); + } + fclose(fp); +} + +void get_stats(const particle_list_t& particles, Kokkos::View& Np, Kokkos::View& P, Kokkos::View& E) { + auto macro = Cabana::slice(particles); + auto cell = Cabana::slice(particles); +#if VelocityDimensions == 1 + auto velX = Cabana::slice(particles); +#elif VelocityDimensions == 2 + auto velPar = Cabana::slice(particles); + auto velPer = Cabana::slice(particles); +#elif VelocityDimensions == 3 + auto velX = Cabana::slice(particles); + auto velY = Cabana::slice(particles); + auto velZ = Cabana::slice(particles); +#endif + + const int numParticles = particles.size(); + const size_t numCells = Np.extent(0); + + auto _moments = KOKKOS_LAMBDA(const int& cellindex) { + Np(cellindex) = 0.; + for(int d = 0; d < VelocityDimensions; d++) { + P(cellindex,d) = 0.; + E(cellindex,d) = 0.; + } + for(int n = 0; n < numParticles; n++) { + if(cell(n) == cellindex) { + Np(cellindex) += macro(n); +#if VelocityDimensions == 1 + P(cellindex,0) += macro(n) * velX(n); + E(cellindex,0) += 0.5*macro(n) * velX(n)*velX(n); +#elif VelocityDimensions == 2 + P(cellindex,0) += macro(n) * velPar(n); + P(cellindex,1) += macro(n) * velPer(n); + E(cellindex,0) += 0.5*macro(n) * velPar(n)*velPar(n); + E(cellindex,1) += 0.5*macro(n) * velPer(n)*velPer(n); +#elif VelocityDimensions == 3 + P(cellindex,0) += macro(n) * velX(n); + P(cellindex,1) += macro(n) * velY(n); + P(cellindex,2) += macro(n) * velZ(n); + E(cellindex,0) += 0.5*macro(n) * velX(n)*velX(n); + E(cellindex,1) += 0.5*macro(n) * velY(n)*velY(n); + E(cellindex,2) += 0.5*macro(n) * velZ(n)*velZ(n); +#endif + } + } + }; + + Kokkos::parallel_for("compute stats", numCells, _moments); +} + +void get_stats(const gaussian_list_t gaussians, const double nppc, Kokkos::View& P, Kokkos::View& E) { + int cmax = gaussians.extent(0); + int kmax = gaussians.extent(1); + auto g_dev = Kokkos::create_mirror_view(MemorySpace(), gaussians); + Kokkos::deep_copy(g_dev, gaussians); + + auto _moments = KOKKOS_LAMBDA(const int& cellindex) { + for(int d = 0; d < VelocityDimensions; d++) { + P(cellindex,d) = 0.; + E(cellindex,d) = 0.; + } + for(int k = 0; k < kmax; k++) { + if(g_dev(cellindex,k,Weight)>0.) { +#if VelocityDimensions == 1 + P(cellindex,0) += nppc*g_dev(cellindex,k,Weight) * g_dev(cellindex,k,MuX); + E(cellindex,0) += 0.5*nppc * g_dev(cellindex,k,Weight) * (g_dev(cellindex,k,Cxx) + g_dev(cellindex,k,MuX)*g_dev(cellindex,k,MuX)); +#elif VelocityDimensions == 2 + P(cellindex,0) += nppc * g_dev(cellindex,k,Weight) * g_dev(cellindex,k,MuPar); + P(cellindex,1) += nppc * g_dev(cellindex,k,Weight) * Kokkos::sqrt(0.5*M_PI)*Kokkos::sqrt(g_dev(cellindex,k,Cperper)); + E(cellindex,0) += 0.5*nppc * g_dev(cellindex,k,Weight) * (g_dev(cellindex,k,Cparpar) + g_dev(cellindex,k,MuPar)*g_dev(cellindex,k,MuPar)); + E(cellindex,1) += 0.5*nppc * g_dev(cellindex,k,Weight) * (2.*g_dev(cellindex,k,Cperper) ); +#elif VelocityDimensions == 3 + P(cellindex,0) += nppc*g_dev(cellindex,k,Weight)*g_dev(cellindex,k,MuX); + P(cellindex,1) += nppc*g_dev(cellindex,k,Weight)*g_dev(cellindex,k,MuY); + P(cellindex,2) += nppc*g_dev(cellindex,k,Weight)*g_dev(cellindex,k,MuZ); + E(cellindex,0) += 0.5*nppc * g_dev(cellindex,k,Weight) * (g_dev(cellindex,k,Cxx) + g_dev(cellindex,k,MuX)*g_dev(cellindex,k,MuX)); + E(cellindex,1) += 0.5*nppc * g_dev(cellindex,k,Weight) * (g_dev(cellindex,k,Cyy) + g_dev(cellindex,k,MuY)*g_dev(cellindex,k,MuY)); + E(cellindex,2) += 0.5*nppc * g_dev(cellindex,k,Weight) * (g_dev(cellindex,k,Czz) + g_dev(cellindex,k,MuZ)*g_dev(cellindex,k,MuZ)); +#endif + } + } + }; + + Kokkos::parallel_for("compute stats", cmax, _moments); +} + + +void get_corrections(const Kokkos::View& Npprime, const Kokkos::View& Pprime, const Kokkos::View& Eprime, + const Kokkos::View& P, const Kokkos::View& E, + Kokkos::View& a, Kokkos::View& b) { + + auto _correct = KOKKOS_LAMBDA(const size_t c) { +#if VelocityDimensions == 1 + const double num = 2.*E(c,0)*Npprime(c) - P(c,0)*P(c,0); + if(num >= 0.) { + const double denom = 2.*Eprime(c,0)*Npprime(c) - Pprime(c,0)*Pprime(c,0); + a(c,0) = Kokkos::sqrt(num/denom); + b(c,0) = (P(c,0)-a(c,0)*Pprime(c,0)) / (a(c,0)*Npprime(c)); + } else{ + a(c,0) = 1.; + b(c,0) = 0.; + } +#elif VelocityDimensions == 2 + const double num = 2.*(E(c,0)+E(c,1))*Npprime(c) - 2.*P(c,1)*P(c,1)/Pprime(c,1)/Pprime(c,1)*Eprime(c,1)*Npprime(c) - P(c,0)*P(c,0); + const double denom = 2.*Eprime(c,0)*Npprime(c) - Pprime(c,0)*Pprime(c,0); + if(num/denom >= 0.) { + a(c,0) = Kokkos::sqrt(num/denom); + a(c,1) = P(c,1) / Pprime(c,1); + b(c,0) = (P(c,0) - a(c,0)*Pprime(c,0)) / (a(c,0)*Npprime(c)); + } else { + a(c,0) = 1.; + a(c,1) = 1.; + b(c,0) = 0.; + } +#elif VelocityDimensions == 3 + const double num = 2. * (E(c,0)+E(c,1)+E(c,2)) * Npprime(c) - (P(c,0)*P(c,0) + P(c,1)*P(c,1) + P(c,2)*P(c,2)); + if(num >= 0.) { + const double denom = 2. * (Eprime(c,0)+Eprime(c,1)+Eprime(c,2)) * Npprime(c) - (Pprime(c,0)*Pprime(c,0) + Pprime(c,1)*Pprime(c,1) + Pprime(c,2)*Pprime(c,2)); + a(c,0) = Kokkos::sqrt(num/denom); + for(int d = 0; d < VelocityDimensions; d++) { + b(c,d) = (P(c,d)-a(c,0)*Pprime(c,d)) / (a(c,0)*Npprime(c)); + } + } else{ + a(c,0) = 1.; + for(int d = 0; d < VelocityDimensions; d++) { + b(c,d) = 0.; + } + } +#endif + }; + + Kokkos::parallel_for("Compute Lemon corrections", Npprime.extent(0), _correct); +} + +void scale_and_shift_particles(particle_list_t& particles, const Kokkos::View& a, const Kokkos::View& b) { + auto cell = Cabana::slice(particles); +#if VelocityDimensions == 1 + auto velX = Cabana::slice(particles); +#elif VelocityDimensions == 2 + auto velPar = Cabana::slice(particles); + auto velPer = Cabana::slice(particles); +#elif VelocityDimensions == 3 + auto velX = Cabana::slice(particles); + auto velY = Cabana::slice(particles); + auto velZ = Cabana::slice(particles); +#endif + + // Define how to adjust ONE particle + auto _adjust = KOKKOS_LAMBDA(const int s, const int i) { + auto c = cell.access(s,i); +#if VelocityDimensions == 1 + velX.access(s,i) = a(c,0) * (velX.access(s,i) + b(c,0)); +#elif VelocityDimensions == 2 + // No shift in vper, different scaling in the two dimensions + velPar.access(s,i) = a(c,0) * (velPar.access(s,i) + b(c,0)); + velPer.access(s,i) = a(c,1) * (velPer.access(s,i) ); +#elif VelocityDimensions == 3 + // Same scaling for all three dimensions, different shifts in the three dimensions + velX.access(s,i) = a(c,0) * (velX.access(s,i) + b(c,0)); + velY.access(s,i) = a(c,0) * (velY.access(s,i) + b(c,1)); + velZ.access(s,i) = a(c,0) * (velZ.access(s,i) + b(c,2)); +#endif + + }; + + // Define an execution policy + Cabana::SimdPolicy vec_policy(0, particles.size()); + + // Execute + Cabana::simd_parallel_for(vec_policy, _adjust, "adjust()"); + +} + +//---------------------------------------------------------------------------// +// Main +//---------------------------------------------------------------------------// +int main(int argc, char* argv[]) { + // Initialize the kokkos runtime. + Kokkos::ScopeGuard scope_guard( argc, argv ); + + const int seed = 12345; + const int num_cells = 10; + const int num_particles = 2429*num_cells; + + { + // Do work here + + // Build outselves a (set of) Gaussian(s) + gaussian_list_t true_f("true f", num_cells,2); // Ten cells, up to two Gaussians each + +#if VelocityDimensions == 1 + // first cell + true_f(0,0,Weight) = 0.6; + true_f(0,0,MuX) = 2.0; + true_f(0,0,Cxx) = 1.5; + + true_f(0,1,Weight) = 0.4; + true_f(0,1,MuX) = -1.0; + true_f(0,1,Cxx) = 1.0; + + for(size_t c=1; c 0.) { +#if VelocityDimensions == 1 + fprintf(fp, "%zd %zd %e %e %e\n", c, m, true_f(c,m,Weight), true_f(c,m,MuX), true_f(c,m,Cxx)); +#elif VelocityDimensions == 2 + fprintf(fp, "%zd %zd %e %e %e %e %e %e %e\n", c, m, true_f(c,m,Weight), true_f(c,m,MuPar), true_f(c,m,MuPer), true_f(c,m,Cparpar), true_f(c,m,Cparper), true_f(c,m,Cperpar), true_f(c,m,Cperper)); +#elif VelocityDimensions == 3 + fprintf(fp, "%zd %zd %e %e %e %e %e %e %e %e %e %e %e %e %e\n", c, m, true_f(c,m,Weight), true_f(c,m,MuX), true_f(c,m,MuY), true_f(c,m,MuZ), true_f(c,m,Cxx), true_f(c,m,Cxy), true_f(c,m,Cxz), true_f(c,m,Cyx), true_f(c,m,Cyy), true_f(c,m,Cyz), true_f(c,m,Czx), true_f(c,m,Czy), true_f(c,m,Czz)); +#endif + } + } + } + fclose(fp); + + // Create particles + particle_list_t particles("particles", num_particles); + printf("Initalize particles\n"); + initialize_particles(particles, true_f, 12345, RandomSampling); + + // Write out particles + save_particles(particles, "part.dat"); + + // Compute GMM for the particles + size_t kmax = 10; + const GmmFloatType eps = 1e-16; // eps says what relative decrease in the loss function do we consider CEM converged + gaussian_list_t gmm("reconstructed f", num_cells, kmax); + printf("Reconstruct GMM\n"); + //reconstruct_gmm(particles, gmm, eps); + + auto cell = Cabana::slice(particles); + auto macro = Cabana::slice(particles); +#if VelocityDimensions==1 + auto velocity_x = Cabana::slice(particles); + Cabana::reconstructGMM(gmm, eps, seed, cell, macro, velocity_x); +#elif VelocityDimensions==2 + auto velocity_par = Cabana::slice(particles); + auto velocity_per = Cabana::slice(particles); + Cabana::reconstructGMM(gmm, eps, seed, cell, macro, velocity_par, velocity_per); +#elif VelocityDimensions==3 + auto velocity_x = Cabana::slice(particles); + auto velocity_y = Cabana::slice(particles); + auto velocity_z = Cabana::slice(particles); + Cabana::reconstructGMM(gmm, eps, seed, cell, macro, velocity_x, velocity_y, velocity_z); +#endif + + + + printf("Checking results\n"); + + // Take a look at the gaussians + printf("Reconstructed distribution function:\n"); + for(size_t c = 0; c < gmm.extent(0); c++) { + for(size_t m = 0; m < kmax; m++) { + if(gmm(c,m,Weight) > 0.) { +#if VelocityDimensions == 1 + printf("Gaussian(%zd) in cell %zd weight=%f, Mu=%f, Cov=%f\n", m, c, gmm(c,m,Weight), gmm(c,m,MuX), gmm(c,m,Cxx)); +#elif VelocityDimensions == 2 + printf("Gaussian(%zd) in cell %zd weight=%f, Mu=(%f,%f), Cov=((%f,%f),(%f,%f))\n", m, c, gmm(c,m,Weight), gmm(c,m,MuPar),gmm(c,m,MuPer), + gmm(c,m,Cparpar), gmm(c,m,Cparper), + gmm(c,m,Cperpar), gmm(c,m,Cperper)); +#elif VelocityDimensions == 3 + printf("Gaussian(%zd) in cell %zd weight=%f, Mu=(%f,%f,%f), Cov=((%f,%f,%f),(%f,%f,%f),(%f,%f,%f))\n", m, c, gmm(c,m,Weight), gmm(c,m,MuX),gmm(c,m,MuY),gmm(c,m,MuZ), + gmm(c,m,Cxx), gmm(c,m,Cxy), gmm(c,m,Cxz), + gmm(c,m,Cyx), gmm(c,m,Cyy), gmm(c,m,Cyz), + gmm(c,m,Czx), gmm(c,m,Czy), gmm(c,m,Czz)); +#endif + } + } + } + fp = fopen("reconstructed_f.dat", "w"); + for(size_t c = 0; c < gmm.extent(0); c++) { + for(size_t m = 0; m < gmm.extent(1); m++) { + if(gmm(c,m,Weight) > 0.) { +#if VelocityDimensions == 1 + fprintf(fp, "%zd %zd %e %e %e\n", c,m, gmm(c,m,Weight), gmm(c,m,MuX), gmm(c,m,Cxx)); +#elif VelocityDimensions == 2 + fprintf(fp, "%zd %zd %e %e %e %e %e %e %e\n", c,m, gmm(c,m,Weight), gmm(c,m,MuPar),gmm(c,m,MuPer), gmm(c,m,Cparpar),gmm(c,m,Cparper), gmm(c,m,Cperpar),gmm(c,m,Cperper)); +#elif VelocityDimensions == 3 + fprintf(fp, "%zd %zd %e %e %e %e %e %e %e %e %e %e %e %e %e\n", c,m, gmm(c,m,Weight), gmm(c,m,MuX), gmm(c,m,MuY), gmm(c,m,MuZ), gmm(c,m,Cxx), gmm(c,m,Cxy), gmm(c,m,Cxz), gmm(c,m,Cyx), gmm(c,m,Cyy), gmm(c,m,Cyz), gmm(c,m,Czx), gmm(c,m,Czy), gmm(c,m,Czz)); +#endif + } + } + } + fclose(fp); + + + // Print ground truth for reference + printf("True distribution function:\n"); + for(size_t c = 0; c 0.) { +#if VelocityDimensions == 1 + printf("Gaussian(%zd) in cell %zd weight=%f, Mu=%f, Cov=%f\n", m,c, true_f(c,m,Weight), true_f(c,m,MuX), true_f(c,m,Cxx)); +#elif VelocityDimensions == 2 + printf("Gaussian(%zd) in cell %zd weight=%f, Mu=(%f,%f), Cov=((%f,%f)(%f,%f))\n", m,c, true_f(c,m,Weight), true_f(c,m,MuPar),true_f(c,m,MuPer), + true_f(c,m,Cparpar),true_f(c,m,Cparper), + true_f(c,m,Cperpar),true_f(c,m,Cperper)); +#elif VelocityDimensions == 3 + printf("Gaussian(%zd) in cell %zd weight=%f, Mu=(%f,%f,%f), Cov=((%f,%f,%f),(%f,%f,%f),(%f,%f,%f))\n", m,c, true_f(c,m,Weight), true_f(c,m,MuX),true_f(c,m,MuY),true_f(c,m,MuZ), + true_f(c,m,Cxx), true_f(c,m,Cxy), true_f(c,m,Cxz), + true_f(c,m,Cyx), true_f(c,m,Cyy), true_f(c,m,Cyz), + true_f(c,m,Czx), true_f(c,m,Czy), true_f(c,m,Czz)); +#endif + } + } + } + + // Create new particles from recovered gaussian distribution functions + particle_list_t new_particles("new particles", num_particles); + initialize_particles(new_particles, gmm, 23456, HammersleyEqualWeight); + + // Ensure that the new particles represent the same charge density distribution on cell centers + chargeconservation(particles, new_particles, num_cells); // this breaks if you use different particle numbers + + // Get statistics from particles + Kokkos::View P("P", num_cells, VelocityDimensions); + Kokkos::View E("E", num_cells, VelocityDimensions); + const double nppc = num_particles/(double)num_cells; + get_stats(gmm, nppc, P, E); + + Kokkos::View Npprime("Np'", num_cells); + Kokkos::View Pprime("P'", num_cells, VelocityDimensions); + Kokkos::View Eprime("E'", num_cells, VelocityDimensions); + get_stats(new_particles, Npprime, Pprime, Eprime); + + Kokkos::View a("a", num_cells, VelocityDimensions); + Kokkos::View b("b", num_cells, VelocityDimensions); + get_corrections(Npprime, Pprime, Eprime, P, E, a, b); + + auto P_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), P); + auto E_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), E); + auto Npprime_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), Npprime); + auto Pprime_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), Pprime); + auto Eprime_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), Eprime); + auto a_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a); + auto b_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b); + + for(size_t c=0; c < num_cells; c++) { +#if VelocityDimensions == 1 + printf("N' = %f, P = %f, P' = %f, E = %f, E' = %f\n", Npprime_host(c), P_host(c,0), Pprime_host(c,0), E_host(c,0), Eprime_host(c,0)); + printf("a = %f, b = %f\n", a_host(c,0), b_host(c,0)); +#elif VelocityDimensions == 2 + printf("N' = %f, P = %f,%f, P' = %f,%f, E = %f, E' = %f\n", Npprime_host(c), P_host(c,0),P_host(c,1), Pprime_host(c,0),Pprime_host(c,1), E_host(c,0)+E_host(c,1), Eprime_host(c,0)+Eprime_host(c,1)); + printf("a = %f,%f, b = %f\n", a_host(c,0),a_host(c,1), b_host(c,0)); +#elif VelocityDimensions == 3 + printf("N' = %f, P = %f,%f,%f, P' = %f,%f,%f, E = %f, E' = %f\n", Npprime_host(c), P_host(c,0),P_host(c,1),P_host(c,2), Pprime_host(c,0),Pprime_host(c,1),Pprime_host(c,2), E_host(c,0)+E_host(c,1)+E_host(c,2), Eprime_host(c,0)+Eprime_host(c,1)+Eprime_host(c,2)); + printf("a = %f,%f,%f b = %f,%f,%f\n", a_host(c,0),a_host(c,1),a_host(c,2), b_host(c,0),b_host(c,1),b_host(c,2)); +#endif + } + + // Ensure conservation of momentum and energy + scale_and_shift_particles(new_particles, a, b); + + // Check statistics again + get_stats(new_particles, Npprime, Pprime, Eprime); + + Kokkos::deep_copy(Npprime_host, Npprime); + Kokkos::deep_copy(Pprime_host, Pprime); + Kokkos::deep_copy(Eprime_host, Eprime); + + for(size_t c=0; c < num_cells; c++) { +#if VelocityDimensions == 1 + printf("N' = %f, P = %f, P' = %f, E = %f, E' = %f\n", Npprime_host(c), P_host(c,0), Pprime_host(c,0), E_host(c,0), Eprime_host(c,0)); + printf("deltaP = %f, deltaE = %f\n", P_host(c,0)-Pprime_host(c,0), E_host(c,0)-Eprime_host(c,0)); +#elif VelocityDimensions == 2 + printf("N' = %f, P = %f,%f, P' = %f,%f, E = %f, E' = %f\n", Npprime_host(c), P_host(c,0),P_host(c,1), Pprime_host(c,0),Pprime_host(c,1), E_host(c,0)+E_host(c,1), Eprime_host(c,0)+Eprime_host(c,1)); + printf("deltaP = %f,%f, deltaE = %f\n", P_host(c,0)-Pprime_host(c,0), P_host(c,1)-Pprime_host(c,1), E_host(c,0)+E_host(c,1)-Eprime_host(c,0)-Eprime_host(c,1)); +#elif VelocityDimensions == 3 + printf("N' = %f, P = %f,%f,%f, P' = %f,%f,%f, E = %f, E' = %f\n", Npprime_host(c), P_host(c,0),P_host(c,1),P_host(c,2), Pprime_host(c,0),Pprime_host(c,1),Pprime_host(c,2), E_host(c,0)+E_host(c,1)+E_host(c,2), Eprime_host(c,0)+Eprime_host(c,1)+Eprime_host(c,2)); + printf("deltaP = %f,%f,%f, deltaE = %f\n", P_host(c,0)-Pprime_host(c,0), P_host(c,1)-Pprime_host(c,1), P_host(c,2)-Pprime_host(c,2), E_host(c,0)+E_host(c,1)+E_host(c,2)-Eprime_host(c,0)-Eprime_host(c,1)-Eprime_host(c,2)); +#endif + } + + // Save particles + save_particles(new_particles, "part2.dat"); + } + + return 0; +} diff --git a/example/core_tutorial/CMakeLists.txt b/example/core_tutorial/CMakeLists.txt index 26e0493ea..e1ac76836 100644 --- a/example/core_tutorial/CMakeLists.txt +++ b/example/core_tutorial/CMakeLists.txt @@ -38,3 +38,4 @@ endif() if(Cabana_ENABLE_HDF5) add_subdirectory(13_hdf5_output) endif() +add_subdirectory(14_gmm)