From 0ff0d30d8a756e7ca17502191d5a6cbe269cfd80 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Tue, 2 Jun 2026 14:13:06 -0700 Subject: [PATCH 01/22] #525 adding LagrangeBasis/Serendipity function support and unit tests for refactored basis functions --- Code/Source/solver/CMakeLists.txt | 25 +- Code/Source/solver/FE/Basis/BasisCache.cpp | 309 + Code/Source/solver/FE/Basis/BasisCache.h | 456 + Code/Source/solver/FE/Basis/BasisExceptions.h | 134 + Code/Source/solver/FE/Basis/BasisFactory.cpp | 160 + Code/Source/solver/FE/Basis/BasisFactory.h | 57 + Code/Source/solver/FE/Basis/BasisFunction.cpp | 366 + Code/Source/solver/FE/Basis/BasisFunction.h | 426 + Code/Source/solver/FE/Basis/BasisTolerance.h | 52 + Code/Source/solver/FE/Basis/BasisTraits.h | 218 + Code/Source/solver/FE/Basis/LagrangeBasis.cpp | 8323 +++++++++++++++++ Code/Source/solver/FE/Basis/LagrangeBasis.h | 175 + .../solver/FE/Basis/LagrangeBasisFast.h | 1378 +++ .../solver/FE/Basis/LagrangeBasisPyramid.cpp | 2069 ++++ .../solver/FE/Basis/LagrangeBasisPyramid.h | 67 + .../solver/FE/Basis/LagrangeBasisSimplex.cpp | 2457 +++++ .../solver/FE/Basis/LagrangeBasisSimplex.h | 78 + .../solver/FE/Basis/LagrangeBasisUtility.h | 25 + .../FE/Basis/NodeOrderingConventions.cpp | 818 ++ .../solver/FE/Basis/NodeOrderingConventions.h | 538 ++ .../solver/FE/Basis/PyramidModalBasis.h | 265 + .../solver/FE/Basis/SerendipityBasis.cpp | 882 ++ .../Source/solver/FE/Basis/SerendipityBasis.h | 70 + Code/Source/solver/FE/Basis/VectorBasis.h | 255 + .../FE/Basis/VectorBasisEvaluationHelpers.cpp | 593 ++ .../FE/Basis/VectorBasisEvaluationHelpers.h | 751 ++ .../FE/Basis/VectorBasisModalPolynomial.h | 77 + Code/Source/solver/FE/Common/Alignment.h | 23 + Code/Source/solver/FE/Common/Types.h | 532 ++ .../solver/FE/Math/DenseLinearAlgebra.cpp | 480 + .../solver/FE/Math/DenseLinearAlgebra.h | 119 + .../solver/FE/Math/DenseTransformKernels.h | 78 + Code/Source/solver/FE/Math/ExpressionOps.h | 99 + Code/Source/solver/FE/Math/IntegerMath.h | 98 + Code/Source/solver/FE/Math/MathConstants.h | 388 + Code/Source/solver/FE/Math/Matrix.h | 1487 +++ Code/Source/solver/FE/Math/MatrixExpr.h | 626 ++ Code/Source/solver/FE/Math/Vector.h | 831 ++ Code/Source/solver/FE/Math/VectorExpr.h | 418 + .../solver/FE/Quadrature/QuadratureRule.h | 237 + Code/Source/solver/fs.cpp | 71 +- Code/Source/solver/nn.cpp | 666 +- .../FE/Basis/test_BasisCacheFactory.cpp | 256 + .../FE/Basis/test_BasisErrorPaths.cpp | 203 + .../unitTests/FE/Basis/test_BasisHessians.cpp | 314 + .../FE/Basis/test_ConstexprBasis.cpp | 226 + .../FE/Basis/test_HigherOrderWedgePyramid.cpp | 173 + .../unitTests/FE/Basis/test_LagrangeBasis.cpp | 3028 ++++++ .../FE/Basis/test_SerendipityTensorModal.cpp | 116 + .../FE/Math/test_DenseLinearAlgebra.cpp | 265 + .../unitTests/FE/Math/test_ExpressionOps.cpp | 509 + .../unitTests/FE/Math/test_MathConstants.cpp | 341 + tests/unitTests/FE/Math/test_Matrix.cpp | 594 ++ tests/unitTests/FE/Math/test_MatrixExpr.cpp | 528 ++ tests/unitTests/FE/Math/test_Vector.cpp | 589 ++ tests/unitTests/FE/Math/test_VectorExpr.cpp | 409 + 56 files changed, 34681 insertions(+), 47 deletions(-) create mode 100644 Code/Source/solver/FE/Basis/BasisCache.cpp create mode 100644 Code/Source/solver/FE/Basis/BasisCache.h create mode 100644 Code/Source/solver/FE/Basis/BasisExceptions.h create mode 100644 Code/Source/solver/FE/Basis/BasisFactory.cpp create mode 100644 Code/Source/solver/FE/Basis/BasisFactory.h create mode 100644 Code/Source/solver/FE/Basis/BasisFunction.cpp create mode 100644 Code/Source/solver/FE/Basis/BasisFunction.h create mode 100644 Code/Source/solver/FE/Basis/BasisTolerance.h create mode 100644 Code/Source/solver/FE/Basis/BasisTraits.h create mode 100644 Code/Source/solver/FE/Basis/LagrangeBasis.cpp create mode 100644 Code/Source/solver/FE/Basis/LagrangeBasis.h create mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisFast.h create mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisPyramid.cpp create mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisPyramid.h create mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisSimplex.cpp create mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisSimplex.h create mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisUtility.h create mode 100644 Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp create mode 100644 Code/Source/solver/FE/Basis/NodeOrderingConventions.h create mode 100644 Code/Source/solver/FE/Basis/PyramidModalBasis.h create mode 100644 Code/Source/solver/FE/Basis/SerendipityBasis.cpp create mode 100644 Code/Source/solver/FE/Basis/SerendipityBasis.h create mode 100644 Code/Source/solver/FE/Basis/VectorBasis.h create mode 100644 Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.cpp create mode 100644 Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.h create mode 100644 Code/Source/solver/FE/Basis/VectorBasisModalPolynomial.h create mode 100644 Code/Source/solver/FE/Common/Alignment.h create mode 100644 Code/Source/solver/FE/Common/Types.h create mode 100644 Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp create mode 100644 Code/Source/solver/FE/Math/DenseLinearAlgebra.h create mode 100644 Code/Source/solver/FE/Math/DenseTransformKernels.h create mode 100644 Code/Source/solver/FE/Math/ExpressionOps.h create mode 100644 Code/Source/solver/FE/Math/IntegerMath.h create mode 100644 Code/Source/solver/FE/Math/MathConstants.h create mode 100644 Code/Source/solver/FE/Math/Matrix.h create mode 100644 Code/Source/solver/FE/Math/MatrixExpr.h create mode 100644 Code/Source/solver/FE/Math/Vector.h create mode 100644 Code/Source/solver/FE/Math/VectorExpr.h create mode 100644 Code/Source/solver/FE/Quadrature/QuadratureRule.h create mode 100644 tests/unitTests/FE/Basis/test_BasisCacheFactory.cpp create mode 100644 tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp create mode 100644 tests/unitTests/FE/Basis/test_BasisHessians.cpp create mode 100644 tests/unitTests/FE/Basis/test_ConstexprBasis.cpp create mode 100644 tests/unitTests/FE/Basis/test_HigherOrderWedgePyramid.cpp create mode 100644 tests/unitTests/FE/Basis/test_LagrangeBasis.cpp create mode 100644 tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp create mode 100644 tests/unitTests/FE/Math/test_DenseLinearAlgebra.cpp create mode 100644 tests/unitTests/FE/Math/test_ExpressionOps.cpp create mode 100644 tests/unitTests/FE/Math/test_MathConstants.cpp create mode 100644 tests/unitTests/FE/Math/test_Matrix.cpp create mode 100644 tests/unitTests/FE/Math/test_MatrixExpr.cpp create mode 100644 tests/unitTests/FE/Math/test_Vector.cpp create mode 100644 tests/unitTests/FE/Math/test_VectorExpr.cpp diff --git a/Code/Source/solver/CMakeLists.txt b/Code/Source/solver/CMakeLists.txt index c546c2822..e42391862 100644 --- a/Code/Source/solver/CMakeLists.txt +++ b/Code/Source/solver/CMakeLists.txt @@ -23,15 +23,18 @@ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) include_directories(${SV_SOURCE_DIR}/ThirdParty/eigen/include) +include_directories(${SV_SOURCE_DIR}/ThirdParty/eigen/include/eigen3) include_directories(${SV_SOURCE_DIR}/ThirdParty/parmetis_internal/simvascular_parmetis_internal/ParMETISLib) include_directories(${SV_SOURCE_DIR}/ThirdParty/tetgen/simvascular_tetgen) include_directories(${SV_SOURCE_DIR}/ThirdParty/tinyxml/simvascular_tinyxml) include_directories(${MPI_C_INCLUDE_PATH}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/Core) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/FE) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/FE/Common) # Find Trilinos package if requested @@ -86,7 +89,7 @@ endif() # add trilinos flags and defines if(USE_TRILINOS) ADD_DEFINITIONS(-DWITH_TRILINOS) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20") endif() # Build with the PETSc linear algebra package. @@ -245,9 +248,27 @@ file(GLOB SOLVER_FE_COMMON_SRCS CONFIGURE_DEPENDS FE/Common/*.h ) +file(GLOB SOLVER_FE_BASIS_SRCS CONFIGURE_DEPENDS + FE/Basis/*.cpp + FE/Basis/*.h +) + +file(GLOB SOLVER_FE_MATH_SRCS CONFIGURE_DEPENDS + FE/Math/*.cpp + FE/Math/*.h +) + +file(GLOB SOLVER_FE_QUADRATURE_SRCS CONFIGURE_DEPENDS + FE/Quadrature/*.cpp + FE/Quadrature/*.h +) + list(APPEND CSRCS ${SOLVER_CORE_SRCS} ${SOLVER_FE_COMMON_SRCS} + ${SOLVER_FE_BASIS_SRCS} + ${SOLVER_FE_MATH_SRCS} + ${SOLVER_FE_QUADRATURE_SRCS} ) # Set PETSc interace code. diff --git a/Code/Source/solver/FE/Basis/BasisCache.cpp b/Code/Source/solver/FE/Basis/BasisCache.cpp new file mode 100644 index 000000000..6d8a4ede3 --- /dev/null +++ b/Code/Source/solver/FE/Basis/BasisCache.cpp @@ -0,0 +1,309 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#include "BasisCache.h" +#include + +namespace svmp { +namespace FE { +namespace basis { + +namespace { + +QuadratureCacheKey make_quadrature_cache_key(const quadrature::QuadratureRule& quad) noexcept { + const auto fingerprint = quad.point_fingerprint(); + return QuadratureCacheKey{fingerprint.dimension, + fingerprint.num_points, + fingerprint.points_hash_a, + fingerprint.points_hash_b}; +} + +void mix_hash_word(std::uint64_t word, + std::uint64_t& hash_a, + std::uint64_t& hash_b) noexcept { + hash_a ^= word + 0x9e3779b97f4a7c15ULL + (hash_a << 6u) + (hash_a >> 2u); + hash_b ^= (word + 0xbf58476d1ce4e5b9ULL) + (hash_b << 7u) + (hash_b >> 3u); +} + +std::pair +identity_fingerprint(const std::string& identity) noexcept { + std::uint64_t hash_a = 0xa4093822299f31d0ULL; + std::uint64_t hash_b = 0x082efa98ec4e6c89ULL; + mix_hash_word(static_cast(identity.size()), hash_a, hash_b); + for (const char c : identity) { + mix_hash_word(static_cast(static_cast(c)), hash_a, hash_b); + } + return {hash_a, hash_b}; +} + +BasisCacheKey make_basis_cache_key(const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients, + bool hessians) { + StructuralBasisKey structural_key{ + basis.basis_type(), + basis.element_type(), + basis.dimension(), + basis.order(), + basis.size(), + basis.is_vector_valued(), + make_quadrature_cache_key(quad), + gradients, + hessians + }; + + BasisCacheKey key; + const bool uses_basis_identity = !basis.cache_identity_is_structural(); + if (!uses_basis_identity) { + key.value = structural_key; + return key; + } + + std::vector basis_identity_words; + const bool uses_structured_identity = basis.cache_identity_words(basis_identity_words); + if (!uses_structured_identity) { + basis_identity_words.clear(); + } + const std::string basis_identity = + uses_structured_identity ? std::string{} : basis.cache_identity(); + BasisIdentityFingerprint cached_identity_hash{}; + const bool has_cached_identity_hash = + uses_structured_identity && + basis.cache_identity_fingerprint(cached_identity_hash.hash_a, + cached_identity_hash.hash_b); + const auto identity_hash = uses_structured_identity + ? has_cached_identity_hash + ? std::pair{ + cached_identity_hash.hash_a, + cached_identity_hash.hash_b} + : [&basis_identity_words] { + const auto fingerprint = + compute_basis_identity_fingerprint(basis_identity_words); + return std::pair{ + fingerprint.hash_a, + fingerprint.hash_b}; + }() + : identity_fingerprint(basis_identity); + key.value = ParameterizedBasisKey{ + structural_key, + uses_structured_identity, + identity_hash.first, + identity_hash.second, + std::move(basis_identity_words), + basis_identity + }; + return key; +} + +} // namespace + +BasisCache& BasisCache::instance() { + static BasisCache cache; + return cache; +} + +const BasisCacheEntry& BasisCache::get_or_compute( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients, + bool hessians) { + return *get_or_compute_shared(basis, quad, gradients, hessians); +} + +std::shared_ptr BasisCache::get_or_compute_shared( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients, + bool hessians) { + const BasisCacheKey key = make_basis_cache_key(basis, quad, gradients, hessians); + + // Warm path: shared (reader) lock allows concurrent cache hits. + { + std::shared_lock read_lock(mutex_); + auto it = slots_.find(key); + if (it != slots_.end() && it->second.entry) { + return it->second.entry; + } + } + + std::shared_ptr in_flight; + bool owner = false; + { + std::unique_lock write_lock(mutex_); + auto& slot = slots_[key]; + if (slot.entry) { + return slot.entry; + } + + if (!slot.pending) { + in_flight = std::make_shared(); + slot.pending = in_flight; + owner = true; + } else { + in_flight = slot.pending; + } + } + + if (!owner) { + std::unique_lock wait_lock(in_flight->mutex); + in_flight->ready_cv.wait(wait_lock, [&in_flight] { return in_flight->ready; }); + if (in_flight->exception) { + std::rethrow_exception(in_flight->exception); + } + return in_flight->entry; + } + + try { + auto entry = std::make_shared(compute(basis, quad, gradients, hessians)); + { + std::unique_lock write_lock(mutex_); + auto slot_it = slots_.find(key); + if (slot_it == slots_.end()) { + slot_it = slots_.emplace(key, CacheSlot{}).first; + } + auto& slot = slot_it->second; + if (slot.entry) { + entry = slot.entry; + } else { + slot.entry = entry; + } + if (slot.pending == in_flight) { + slot.pending.reset(); + } + } + { + std::lock_guard ready_lock(in_flight->mutex); + in_flight->entry = entry; + in_flight->ready = true; + } + in_flight->ready_cv.notify_all(); + return entry; + } catch (...) { + { + std::lock_guard ready_lock(in_flight->mutex); + in_flight->exception = std::current_exception(); + in_flight->ready = true; + } + { + std::unique_lock write_lock(mutex_); + auto slot_it = slots_.find(key); + if (slot_it != slots_.end() && slot_it->second.pending == in_flight) { + slot_it->second.pending.reset(); + if (!slot_it->second.entry) { + slots_.erase(slot_it); + } + } + } + in_flight->ready_cv.notify_all(); + throw; + } +} + +const BasisCacheEntry& BasisCache::prewarm( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients, + bool hessians) { + return get_or_compute(basis, quad, gradients, hessians); +} + +BasisCacheHandle BasisCache::prewarm_handle( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients, + bool hessians) { + return BasisCacheHandle(get_or_compute_shared(basis, quad, gradients, hessians)); +} + +BasisCacheEntry BasisCache::compute_uncached( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients, + bool hessians) const { + return compute(basis, quad, gradients, hessians); +} + +void BasisCache::clear() { + std::unique_lock lock(mutex_); + for (auto it = slots_.begin(); it != slots_.end();) { + if (it->second.pending) { + it->second.entry.reset(); + ++it; + } else { + it = slots_.erase(it); + } + } +} + +std::size_t BasisCache::size() const { + std::shared_lock lock(mutex_); + std::size_t completed = 0; + for (const auto& [key, slot] : slots_) { + (void)key; + if (slot.entry) { + ++completed; + } + } + return completed; +} + +BasisCacheEntry BasisCache::compute(const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients, + bool hessians) const { + BasisCacheEntry entry; + const auto& points = quad.points(); + entry.num_qpts = points.size(); + entry.num_dofs = basis.size(); + + const bool vector_basis = basis.is_vector_valued(); + if (!vector_basis) { + entry.scalar_values.assign(entry.num_dofs * entry.num_qpts, Real(0)); + if (gradients) { + entry.gradients.assign(entry.num_dofs * 3u * entry.num_qpts, Real(0)); + } + if (hessians) { + entry.hessians.assign(entry.num_dofs * 9u * entry.num_qpts, Real(0)); + } + } else { + entry.vector_values_xyz.assign(entry.num_dofs * 3u * entry.num_qpts, Real(0)); + if (gradients && basis.supports_vector_jacobians()) { + entry.vector_jacobians.assign(entry.num_dofs * 9u * entry.num_qpts, Real(0)); + } + if (gradients && basis.supports_curl()) { + entry.vector_curls_xyz.assign(entry.num_dofs * 3u * entry.num_qpts, Real(0)); + } + if (gradients && basis.supports_divergence()) { + entry.vector_divergence.assign(entry.num_dofs * entry.num_qpts, Real(0)); + } + } + + if (vector_basis) { + if (entry.num_dofs > 0 && entry.num_qpts > 0) { + basis.evaluate_vector_at_quadrature_points( + points, + entry.vector_values_xyz.data(), + entry.vector_jacobians.empty() ? nullptr : entry.vector_jacobians.data(), + entry.vector_curls_xyz.empty() ? nullptr : entry.vector_curls_xyz.data(), + entry.vector_divergence.empty() ? nullptr : entry.vector_divergence.data()); + } + return entry; + } + + if (entry.num_dofs > 0 && entry.num_qpts > 0) { + basis.fill_scalar_cache_entry(points, + entry.num_qpts, + entry.scalar_values.data(), + gradients ? entry.gradients.data() : nullptr, + hessians ? entry.hessians.data() : nullptr); + } + + return entry; +} +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/BasisCache.h b/Code/Source/solver/FE/Basis/BasisCache.h new file mode 100644 index 000000000..a84c0e87a --- /dev/null +++ b/Code/Source/solver/FE/Basis/BasisCache.h @@ -0,0 +1,456 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_BASISCACHE_H +#define SVMP_FE_BASIS_BASISCACHE_H + +/** + * @file BasisCache.h + * @brief Cache for basis evaluations at quadrature points + */ + +#include "BasisFunction.h" +#include "Quadrature/QuadratureRule.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +struct QuadratureCacheKey { + int dimension{0}; + std::size_t num_points{0}; + // Quadrature coordinates are intentionally fingerprinted from their exact + // Real bit patterns. Values such as -0.0 and +0.0 therefore produce + // distinct cache keys unless a future API explicitly normalizes them. The + // key intentionally ignores weights and rule class because basis values only + // depend on reference coordinates; bit-identical point sets share entries. + std::uint64_t points_hash_a{0}; + std::uint64_t points_hash_b{0}; + + bool operator==(const QuadratureCacheKey& other) const noexcept { + return dimension == other.dimension && + num_points == other.num_points && + points_hash_a == other.points_hash_a && + points_hash_b == other.points_hash_b; + } +}; + +struct StructuralBasisKey { + BasisType basis_type{BasisType::Custom}; + ElementType element_type{ElementType::Unknown}; + int dimension{0}; + int order{0}; + std::size_t num_dofs{0}; + bool vector_valued{false}; + QuadratureCacheKey quadrature; + bool with_gradients{false}; + bool with_hessians{false}; + + bool operator==(const StructuralBasisKey& other) const noexcept { + return basis_type == other.basis_type && + element_type == other.element_type && + dimension == other.dimension && + order == other.order && + num_dofs == other.num_dofs && + vector_valued == other.vector_valued && + quadrature == other.quadrature && + with_gradients == other.with_gradients && + with_hessians == other.with_hessians; + } +}; + +struct ParameterizedBasisKey { + StructuralBasisKey structural; + bool uses_structured_identity{false}; + std::uint64_t identity_hash_a{0}; + std::uint64_t identity_hash_b{0}; + std::vector basis_identity_words; + std::string basis_identity; + + bool operator==(const ParameterizedBasisKey& other) const noexcept { + return structural == other.structural && + uses_structured_identity == other.uses_structured_identity && + identity_hash_a == other.identity_hash_a && + identity_hash_b == other.identity_hash_b && + basis_identity_words == other.basis_identity_words && + basis_identity == other.basis_identity; + } +}; + +struct BasisCacheKey { + std::variant value; + + bool operator==(const BasisCacheKey& other) const noexcept { + return value == other.value; + } +}; + +struct BasisCacheKeyHash { + std::size_t operator()(const BasisCacheKey& key) const noexcept { + std::size_t seed = 0; + auto combine = [&seed](std::size_t value) noexcept { + seed ^= value + 0x9e3779b97f4a7c15ULL + (seed << 6u) + (seed >> 2u); + }; + + auto hash_structural = [&](const StructuralBasisKey& structural) noexcept { + combine(std::hash()(structural.quadrature.dimension)); + combine(std::hash()(structural.quadrature.num_points)); + combine(std::hash()(structural.quadrature.points_hash_a)); + combine(std::hash()(structural.quadrature.points_hash_b)); + combine(std::hash()(static_cast(structural.basis_type))); + combine(std::hash()(static_cast(structural.element_type))); + combine(std::hash()(structural.dimension)); + combine(std::hash()(structural.order)); + combine(std::hash()(structural.num_dofs)); + unsigned flags = 0u; + flags |= structural.vector_valued ? 1u : 0u; + flags |= structural.with_gradients ? 2u : 0u; + flags |= structural.with_hessians ? 4u : 0u; + combine(std::hash()(flags)); + }; + + std::visit([&](const auto& active_key) { + using ActiveKey = std::decay_t; + if constexpr (std::is_same_v) { + combine(0x5354525543544b45ULL); + hash_structural(active_key); + } else { + combine(0x504152414d4b4559ULL); + hash_structural(active_key.structural); + combine(active_key.uses_structured_identity ? 1u : 0u); + combine(std::hash()(active_key.identity_hash_a)); + combine(std::hash()(active_key.identity_hash_b)); + } + }, key.value); + return seed; + } +}; + +struct BasisCacheEntry { + std::size_t num_qpts{0}; + std::size_t num_dofs{0}; + // Scalar basis values in dof-major SoA layout: [dof * num_qpts + qp]. + std::vector scalar_values; + // Scalar reference gradients in dof/component/qpt SoA layout: + // [(dof * 3 + component) * num_qpts + qp]. + std::vector gradients; + // Scalar reference Hessians in dof/component/qpt SoA layout: + // [(dof * 9 + row * 3 + col) * num_qpts + qp]. + std::vector hessians; + + // Vector basis values in dof/component/qpt SoA layout: + // [(dof * 3 + component) * num_qpts + qp]. + std::vector vector_values_xyz; + // Vector basis reference Jacobians in dof/component/derivative/qpt layout: + // [(dof * 9 + component * 3 + derivative) * num_qpts + qp]. + std::vector vector_jacobians; + // Vector basis curls in dof/component/qpt SoA layout. + std::vector vector_curls_xyz; + // Vector basis divergences in dof/qpt SoA layout. + std::vector vector_divergence; + + // The object-returning accessors below are convenience helpers for tests, + // diagnostics, and occasional scalar use. Hot loops should prefer the SoA + // span accessors so they do not reconstruct Gradient, Hessian, or matrix + // objects per DOF and quadrature point. + + [[nodiscard]] Real scalarValue(std::size_t dof, std::size_t qp) const noexcept { + return scalar_values[dof * num_qpts + qp]; + } + + [[nodiscard]] std::span scalarValuesForDof(std::size_t dof) const noexcept { + if (num_qpts == 0) return {}; + return std::span(scalar_values.data() + dof * num_qpts, num_qpts); + } + + [[nodiscard]] Real gradientValue(std::size_t dof, + std::size_t component, + std::size_t qp) const noexcept { + return gradients[(dof * 3u + component) * num_qpts + qp]; + } + + [[nodiscard]] Gradient gradientVector(std::size_t dof, std::size_t qp) const noexcept { + Gradient out{}; + for (std::size_t component = 0; component < 3u; ++component) { + out[component] = gradientValue(dof, component, qp); + } + return out; + } + + [[nodiscard]] std::span gradientsForDofComponent(std::size_t dof, + std::size_t component) const noexcept { + if (num_qpts == 0) return {}; + return std::span(gradients.data() + (dof * 3u + component) * num_qpts, num_qpts); + } + + [[nodiscard]] std::span gradientsForDof(std::size_t dof) const noexcept { + if (num_qpts == 0) return {}; + return std::span(gradients.data() + dof * 3u * num_qpts, 3u * num_qpts); + } + + [[nodiscard]] Real hessianValue(std::size_t dof, + std::size_t row, + std::size_t col, + std::size_t qp) const noexcept { + return hessians[(dof * 9u + row * 3u + col) * num_qpts + qp]; + } + + [[nodiscard]] Hessian hessianMatrix(std::size_t dof, std::size_t qp) const noexcept { + Hessian out{}; + for (std::size_t row = 0; row < 3u; ++row) { + for (std::size_t col = 0; col < 3u; ++col) { + out(row, col) = hessianValue(dof, row, col, qp); + } + } + return out; + } + + [[nodiscard]] std::span hessiansForDofComponent(std::size_t dof, + std::size_t row, + std::size_t col) const noexcept { + if (num_qpts == 0) return {}; + return std::span(hessians.data() + (dof * 9u + row * 3u + col) * num_qpts, num_qpts); + } + + [[nodiscard]] std::span hessiansForDof(std::size_t dof) const noexcept { + if (num_qpts == 0) return {}; + return std::span(hessians.data() + dof * 9u * num_qpts, 9u * num_qpts); + } + + [[nodiscard]] Real vectorValue(std::size_t dof, + std::size_t component, + std::size_t qp) const noexcept { + return vector_values_xyz[(dof * 3u + component) * num_qpts + qp]; + } + + [[nodiscard]] math::Vector vectorValue(std::size_t dof, + std::size_t qp) const noexcept { + math::Vector out{}; + for (std::size_t component = 0; component < 3u; ++component) { + out[component] = vectorValue(dof, component, qp); + } + return out; + } + + [[nodiscard]] std::span vectorValuesForDofComponent(std::size_t dof, + std::size_t component) const noexcept { + if (num_qpts == 0) return {}; + return std::span(vector_values_xyz.data() + (dof * 3u + component) * num_qpts, num_qpts); + } + + [[nodiscard]] std::span vectorValuesForDof(std::size_t dof) const noexcept { + if (num_qpts == 0 || vector_values_xyz.empty()) return {}; + return std::span(vector_values_xyz.data() + dof * 3u * num_qpts, 3u * num_qpts); + } + + [[nodiscard]] Real vectorJacobianValue(std::size_t dof, + std::size_t component, + std::size_t derivative, + std::size_t qp) const noexcept { + return vector_jacobians[(dof * 9u + component * 3u + derivative) * num_qpts + qp]; + } + + [[nodiscard]] VectorJacobian vectorJacobianMatrix(std::size_t dof, + std::size_t qp) const noexcept { + VectorJacobian out{}; + for (std::size_t component = 0; component < 3u; ++component) { + for (std::size_t derivative = 0; derivative < 3u; ++derivative) { + out(component, derivative) = + vectorJacobianValue(dof, component, derivative, qp); + } + } + return out; + } + + [[nodiscard]] std::span vectorJacobiansForDofComponentDerivative( + std::size_t dof, + std::size_t component, + std::size_t derivative) const noexcept { + if (num_qpts == 0 || vector_jacobians.empty()) return {}; + return std::span( + vector_jacobians.data() + (dof * 9u + component * 3u + derivative) * num_qpts, + num_qpts); + } + + [[nodiscard]] std::span vectorJacobiansForDof(std::size_t dof) const noexcept { + if (num_qpts == 0 || vector_jacobians.empty()) return {}; + return std::span(vector_jacobians.data() + dof * 9u * num_qpts, 9u * num_qpts); + } + + [[nodiscard]] Real vectorCurlValue(std::size_t dof, + std::size_t component, + std::size_t qp) const noexcept { + return vector_curls_xyz[(dof * 3u + component) * num_qpts + qp]; + } + + [[nodiscard]] math::Vector vectorCurl(std::size_t dof, + std::size_t qp) const noexcept { + math::Vector out{}; + for (std::size_t component = 0; component < 3u; ++component) { + out[component] = vectorCurlValue(dof, component, qp); + } + return out; + } + + [[nodiscard]] std::span vectorCurlsForDofComponent(std::size_t dof, + std::size_t component) const noexcept { + if (num_qpts == 0 || vector_curls_xyz.empty()) return {}; + return std::span(vector_curls_xyz.data() + (dof * 3u + component) * num_qpts, num_qpts); + } + + [[nodiscard]] std::span vectorCurlsForDof(std::size_t dof) const noexcept { + if (num_qpts == 0 || vector_curls_xyz.empty()) return {}; + return std::span(vector_curls_xyz.data() + dof * 3u * num_qpts, 3u * num_qpts); + } + + [[nodiscard]] Real vectorDivergenceValue(std::size_t dof, + std::size_t qp) const noexcept { + return vector_divergence[dof * num_qpts + qp]; + } + + [[nodiscard]] std::span vectorDivergenceForDof(std::size_t dof) const noexcept { + if (num_qpts == 0 || vector_divergence.empty()) return {}; + return std::span(vector_divergence.data() + dof * num_qpts, num_qpts); + } +}; + +class BasisCacheHandle { +public: + BasisCacheHandle() = default; + + [[nodiscard]] const BasisCacheEntry& entry() const { + BASIS_CHECK_CONFIG(entry_ != nullptr, + "BasisCacheHandle: attempted to access an empty handle"); + return *entry_; + } + + [[nodiscard]] bool valid() const noexcept { return entry_ != nullptr; } + explicit operator bool() const noexcept { return valid(); } + +private: + friend class BasisCache; + + explicit BasisCacheHandle(std::shared_ptr entry) + : entry_(std::move(entry)) {} + + std::shared_ptr entry_; +}; + +class BasisCache { +public: + static BasisCache& instance(); + + const BasisCacheEntry& get_or_compute( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients = true, + bool hessians = false); + + /** + * @brief Compute an entry without consulting, publishing to, or waiting on + * the shared cache. + */ + BasisCacheEntry compute_uncached( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients = true, + bool hessians = false) const; + + /** + * @brief Eagerly populate the cache for the given (basis, quadrature) key + * + * Pays the compute cost up front so that subsequent get_or_compute calls + * for the same key hit the warm-cache path immediately. Equivalent to + * calling get_or_compute and discarding the return value. + * + * Returns the inserted (or pre-existing) entry for convenience. + */ + const BasisCacheEntry& prewarm( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients = true, + bool hessians = false); + + /** + * @brief Eagerly populate the cache and return a hot-loop handle. + * + * The returned handle owns a shared reference to the completed entry. Access + * through BasisCacheHandle::entry() performs no key construction, hashing, + * map lookup, or cache mutex acquisition. Calling clear() removes the entry + * from the global lookup map but does not invalidate existing handles. + */ + BasisCacheHandle prewarm_handle( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients = true, + bool hessians = false); + + /** + * @brief Remove completed cache entries. + * + * This is a soft clear: computations that were already in flight before + * clear() was called are allowed to publish their completed entry afterward. + * This preserves the returned-reference lifetime contract for concurrent + * get_or_compute() callers while still dropping all entries that had already + * completed at the time of the call. + */ + void clear(); + std::size_t size() const; + +private: + struct InFlightComputation { + std::mutex mutex; + std::condition_variable ready_cv; + bool ready{false}; + std::shared_ptr entry; + std::exception_ptr exception; + }; + + struct CacheSlot { + std::shared_ptr entry; + std::shared_ptr pending; + }; + + BasisCache() = default; + BasisCache(const BasisCache&) = delete; + BasisCache& operator=(const BasisCache&) = delete; + + BasisCacheEntry compute(const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients, + bool hessians) const; + + std::shared_ptr get_or_compute_shared( + const BasisFunction& basis, + const quadrature::QuadratureRule& quad, + bool gradients, + bool hessians); + + mutable std::shared_mutex mutex_; + std::unordered_map slots_; +}; + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_BASISCACHE_H diff --git a/Code/Source/solver/FE/Basis/BasisExceptions.h b/Code/Source/solver/FE/Basis/BasisExceptions.h new file mode 100644 index 000000000..8ee92a3dd --- /dev/null +++ b/Code/Source/solver/FE/Basis/BasisExceptions.h @@ -0,0 +1,134 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_BASISEXCEPTIONS_H +#define SVMP_FE_BASIS_BASISEXCEPTIONS_H + +#include "FEException.h" + +namespace svmp { +namespace FE { +namespace basis { + +/** + * @brief Base exception type for errors originating in the Basis module + */ +class BasisException : public FEException { +public: + BasisException(const std::string& message, + const char* file = "", + int line = 0, + const char* function = "", + StatusCode status = StatusCode::Unknown) + : FEException(message, status, file, line, function) {} +}; + +/** + * @brief Invalid Basis request or configuration + */ +class BasisConfigurationException : public BasisException { +public: + BasisConfigurationException(const std::string& message, + const char* file = "", + int line = 0, + const char* function = "") + : BasisException(message, file, line, function, StatusCode::InvalidArgument) {} +}; + +/** + * @brief Requested element topology is incompatible with the basis family + */ +class BasisElementCompatibilityException : public BasisException { +public: + BasisElementCompatibilityException(const std::string& message, + const char* file = "", + int line = 0, + const char* function = "") + : BasisException(message, file, line, function, StatusCode::InvalidArgument) {} +}; + +/** + * @brief Basis evaluation request cannot be satisfied + */ +class BasisEvaluationException : public BasisException { +public: + BasisEvaluationException(const std::string& message, + const char* file = "", + int line = 0, + const char* function = "") + : BasisException(message, file, line, function, StatusCode::InvalidArgument) {} +}; + +/** + * @brief Public-to-canonical node ordering or coordinate lookup failure + */ +class BasisNodeOrderingException : public BasisException { +public: + BasisNodeOrderingException(const std::string& message, + const char* file = "", + int line = 0, + const char* function = "") + : BasisException(message, file, line, function, StatusCode::InvalidArgument) {} +}; + +/** + * @brief Internal basis construction or transform setup failure + */ +class BasisConstructionException : public BasisException { +public: + BasisConstructionException(const std::string& message, + const char* file = "", + int line = 0, + const char* function = "") + : BasisException(message, file, line, function, StatusCode::InternalError) {} +}; + +#define BASIS_CHECK_CONFIG(condition, message) \ + do { \ + if (!(condition)) { \ + throw ::svmp::FE::basis::BasisConfigurationException((message), \ + __FILE__, __LINE__, __func__); \ + } \ + } while (false) + +#define BASIS_CHECK_COMPAT(condition, message) \ + do { \ + if (!(condition)) { \ + throw ::svmp::FE::basis::BasisElementCompatibilityException((message), \ + __FILE__, __LINE__, __func__); \ + } \ + } while (false) + +#define BASIS_CHECK_EVAL(condition, message) \ + do { \ + if (!(condition)) { \ + throw ::svmp::FE::basis::BasisEvaluationException((message), \ + __FILE__, __LINE__, __func__); \ + } \ + } while (false) + +#define BASIS_CHECK_NODE_ORDER(condition, message) \ + do { \ + if (!(condition)) { \ + throw ::svmp::FE::basis::BasisNodeOrderingException((message), \ + __FILE__, __LINE__, __func__); \ + } \ + } while (false) + +#define BASIS_CHECK_CONSTRUCTION(condition, message) \ + do { \ + if (!(condition)) { \ + throw ::svmp::FE::basis::BasisConstructionException((message), \ + __FILE__, __LINE__, __func__); \ + } \ + } while (false) + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_BASISEXCEPTIONS_H diff --git a/Code/Source/solver/FE/Basis/BasisFactory.cpp b/Code/Source/solver/FE/Basis/BasisFactory.cpp new file mode 100644 index 000000000..dddbd4c5c --- /dev/null +++ b/Code/Source/solver/FE/Basis/BasisFactory.cpp @@ -0,0 +1,160 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#include "BasisFactory.h" + +#include "LagrangeBasis.h" +#include "SerendipityBasis.h" + +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +namespace { + +using CustomRegistryMap = + std::unordered_map; + +CustomRegistryMap& custom_registry() { + static CustomRegistryMap registry; + return registry; +} + +std::mutex& custom_registry_mutex() { + static std::mutex mutex; + return mutex; +} + +int require_basis_order(const BasisRequest& req, + const char* missing_message, + const char* negative_message) { + if (!req.order.has_value()) { + throw BasisConfigurationException(missing_message, + __FILE__, __LINE__, __func__); + } + if (*req.order < 0) { + throw BasisConfigurationException(negative_message, + __FILE__, __LINE__, __func__); + } + return *req.order; +} + +void require_scalar_c0_request(const BasisRequest& req) { + if (req.field_type != FieldType::Scalar) { + throw BasisConfigurationException( + "BasisFactory: Lagrange/Serendipity bases currently support scalar fields only", + __FILE__, __LINE__, __func__); + } + if (req.continuity != Continuity::C0) { + throw BasisConfigurationException( + "BasisFactory: migrated Lagrange/Serendipity scope supports C0 continuity only", + __FILE__, __LINE__, __func__); + } +} + +std::shared_ptr create_lagrange(const BasisRequest& req) { + require_scalar_c0_request(req); + const int order = require_basis_order( + req, + "BasisFactory: Lagrange creation requires an explicit order", + "BasisFactory: Lagrange requires non-negative order"); + return std::make_shared(req.element_type, order); +} + +std::shared_ptr create_serendipity(const BasisRequest& req) { + require_scalar_c0_request(req); + const int order = require_basis_order( + req, + "BasisFactory: Serendipity creation requires an explicit order", + "BasisFactory: Serendipity requires non-negative order"); + return std::make_shared(req.element_type, order); +} + +std::shared_ptr create_custom(const BasisRequest& req) { + if (req.custom_id.empty()) { + throw BasisConfigurationException( + "BasisFactory: custom basis requests require custom_id", + __FILE__, __LINE__, __func__); + } + + basis_factory::CustomFactory factory; + { + std::lock_guard lock(custom_registry_mutex()); + const auto it = custom_registry().find(req.custom_id); + if (it == custom_registry().end()) { + throw BasisConfigurationException( + "BasisFactory: no custom basis factory registered for id '" + + req.custom_id + "'", + __FILE__, __LINE__, __func__); + } + factory = it->second; + } + + auto basis = factory(req); + if (!basis) { + throw BasisConstructionException( + "BasisFactory: custom factory returned null basis for id '" + + req.custom_id + "'", + __FILE__, __LINE__, __func__); + } + return basis; +} + +} // namespace + +namespace basis_factory { + +std::shared_ptr create(const BasisRequest& req) { + switch (req.basis_type) { + case BasisType::Lagrange: + return create_lagrange(req); + case BasisType::Serendipity: + return create_serendipity(req); + case BasisType::Custom: + return create_custom(req); + default: + throw BasisConfigurationException( + "BasisFactory: requested basis family is outside the migrated Lagrange/Serendipity scope", + __FILE__, __LINE__, __func__); + } +} + +void register_custom(std::string custom_id, CustomFactory factory) { + if (custom_id.empty()) { + throw BasisConfigurationException( + "BasisFactory: custom factory id must not be empty", + __FILE__, __LINE__, __func__); + } + if (!factory) { + throw BasisConfigurationException( + "BasisFactory: custom factory must be callable", + __FILE__, __LINE__, __func__); + } + + std::lock_guard lock(custom_registry_mutex()); + custom_registry()[std::move(custom_id)] = std::move(factory); +} + +void unregister_custom(const std::string& custom_id) { + std::lock_guard lock(custom_registry_mutex()); + custom_registry().erase(custom_id); +} + +void clear_custom_registry_for_tests() { + std::lock_guard lock(custom_registry_mutex()); + custom_registry().clear(); +} + +} // namespace basis_factory + +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/BasisFactory.h b/Code/Source/solver/FE/Basis/BasisFactory.h new file mode 100644 index 000000000..cedf1ba6d --- /dev/null +++ b/Code/Source/solver/FE/Basis/BasisFactory.h @@ -0,0 +1,57 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_BASISFACTORY_H +#define SVMP_FE_BASIS_BASISFACTORY_H + +/** + * @file BasisFactory.h + * @brief Runtime creation of basis families + */ + +#include "BasisFunction.h" +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +struct BasisRequest { + ElementType element_type; + BasisType basis_type; + std::optional order{}; + Continuity continuity{Continuity::C0}; + FieldType field_type{FieldType::Scalar}; + std::vector knot_vector{}; + std::vector weights{}; + std::vector axis_orders{}; + std::vector> axis_knot_vectors{}; + std::vector> axis_weights{}; + std::vector tensor_extents{}; + std::string custom_id{}; +}; + +namespace basis_factory { + +using CustomFactory = std::function(const BasisRequest&)>; + +[[nodiscard]] std::shared_ptr create(const BasisRequest& req); +void register_custom(std::string custom_id, CustomFactory factory); +void unregister_custom(const std::string& custom_id); +void clear_custom_registry_for_tests(); + +} // namespace basis_factory + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_BASISFACTORY_H diff --git a/Code/Source/solver/FE/Basis/BasisFunction.cpp b/Code/Source/solver/FE/Basis/BasisFunction.cpp new file mode 100644 index 000000000..49c8d8763 --- /dev/null +++ b/Code/Source/solver/FE/Basis/BasisFunction.cpp @@ -0,0 +1,366 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#include "BasisFunction.h" +#include "VectorBasisEvaluationHelpers.h" +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +namespace { + +struct BasisFunctionScratch { + std::vector scalar_values; + std::vector scalar_gradients; + std::vector scalar_hessians; + std::vector> vector_values; + std::vector vector_jacobians; + std::vector> vector_curls; + std::vector vector_divergences; + + void prewarm(std::size_t max_size) { + scalar_values.reserve(max_size); + scalar_gradients.reserve(max_size); + scalar_hessians.reserve(max_size); + vector_values.reserve(max_size); + vector_jacobians.reserve(max_size); + vector_curls.reserve(max_size); + vector_divergences.reserve(max_size); + } +}; + +BasisFunctionScratch& basis_function_scratch() { + // Scratch is intentionally thread-local: production assembly uses a + // persistent worker-thread team, so buffers stay warm on each worker. + static thread_local BasisFunctionScratch scratch; + return scratch; +} + +void mix_identity_hash_word(std::uint64_t word, + std::uint64_t& hash_a, + std::uint64_t& hash_b) noexcept { + hash_a ^= word + 0x9e3779b97f4a7c15ULL + (hash_a << 6u) + (hash_a >> 2u); + hash_b ^= (word + 0xbf58476d1ce4e5b9ULL) + (hash_b << 7u) + (hash_b >> 3u); +} + +} // namespace + +BasisIdentityFingerprint +compute_basis_identity_fingerprint(std::span words) noexcept { + BasisIdentityFingerprint fingerprint{0x243f6a8885a308d3ULL, + 0x13198a2e03707344ULL}; + mix_identity_hash_word(static_cast(words.size()), + fingerprint.hash_a, + fingerprint.hash_b); + for (const auto word : words) { + mix_identity_hash_word(word, fingerprint.hash_a, fingerprint.hash_b); + } + return fingerprint; +} + +std::string BasisFunction::cache_identity() const { + std::ostringstream oss; + oss << "basis=" << static_cast(basis_type()) + << "|elem=" << static_cast(element_type()) + << "|dim=" << dimension() + << "|order=" << order() + << "|size=" << size() + << "|vector=" << is_vector_valued(); + return oss.str(); +} + +bool BasisFunction::cache_identity_words(std::vector& words) const { + (void)words; + return false; +} + +bool BasisFunction::cache_identity_fingerprint(std::uint64_t& hash_a, + std::uint64_t& hash_b) const { + (void)hash_a; + (void)hash_b; + return false; +} + +void prewarm_basis_function_scratch(std::size_t max_size, + std::size_t max_qpts) { + (void)max_qpts; + basis_function_scratch().prewarm(max_size); +} + +void BasisFunction::evaluate_gradients(const math::Vector& xi, + std::vector& gradients) const { + (void)xi; + (void)gradients; + throw BasisEvaluationException("Analytic gradient evaluation is not implemented for this basis", + __FILE__, __LINE__, __func__); +} + +void BasisFunction::evaluate_hessians(const math::Vector& xi, + std::vector& hessians) const { + (void)xi; + (void)hessians; + throw BasisEvaluationException("Analytic Hessian evaluation is not implemented for this basis", + __FILE__, __LINE__, __func__); +} + +void BasisFunction::evaluate_all(const math::Vector& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians) const { + evaluate_values(xi, values); + evaluate_gradients(xi, gradients); + evaluate_hessians(xi, hessians); +} + +void BasisFunction::evaluate_values_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out) const { + auto& tmp = basis_function_scratch().scalar_values; + tmp.resize(size()); + evaluate_values(xi, tmp); + std::copy_n(tmp.data(), tmp.size(), values_out); +} + +void BasisFunction::evaluate_gradients_to(const math::Vector& xi, + Real* SVMP_RESTRICT gradients_out) const { + auto& tmp = basis_function_scratch().scalar_gradients; + tmp.resize(size()); + evaluate_gradients(xi, tmp); + for (std::size_t i = 0; i < tmp.size(); ++i) { + gradients_out[i * 3u + 0u] = tmp[i][0]; + gradients_out[i * 3u + 1u] = tmp[i][1]; + gradients_out[i * 3u + 2u] = tmp[i][2]; + } +} + +void BasisFunction::evaluate_hessians_to(const math::Vector& xi, + Real* SVMP_RESTRICT hessians_out) const { + auto& tmp = basis_function_scratch().scalar_hessians; + tmp.resize(size()); + evaluate_hessians(xi, tmp); + for (std::size_t i = 0; i < tmp.size(); ++i) { + store_hessian(tmp[i], hessians_out + i * 9u); + } +} + +void BasisFunction::evaluate_at_quadrature_points( + const std::vector>& points, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + evaluate_at_quadrature_points_strided( + points, points.size(), values_out, gradients_out, hessians_out); +} + +void BasisFunction::evaluate_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + const std::size_t num_qpts = points.size(); + const std::size_t num_dofs = size(); + if (output_stride < num_qpts) { + throw BasisConfigurationException( + "BasisFunction strided evaluation requires output_stride >= points.size()", + __FILE__, __LINE__, __func__); + } + + auto& scratch = basis_function_scratch(); + auto& v_tmp = scratch.scalar_values; + auto& g_tmp = scratch.scalar_gradients; + auto& h_tmp = scratch.scalar_hessians; + if (values_out) v_tmp.resize(num_dofs); + if (gradients_out) g_tmp.resize(num_dofs); + if (hessians_out) h_tmp.resize(num_dofs); + + for (std::size_t q = 0; q < num_qpts; ++q) { + if (values_out && gradients_out && hessians_out) { + evaluate_all(points[q], v_tmp, g_tmp, h_tmp); + } else { + if (values_out) evaluate_values(points[q], v_tmp); + if (gradients_out) evaluate_gradients(points[q], g_tmp); + if (hessians_out) evaluate_hessians(points[q], h_tmp); + } + + if (values_out) { + for (std::size_t dof = 0; dof < num_dofs; ++dof) { + values_out[dof * output_stride + q] = v_tmp[dof]; + } + } + if (gradients_out) { + for (std::size_t dof = 0; dof < num_dofs; ++dof) { + for (std::size_t component = 0; component < 3u; ++component) { + gradients_out[(dof * 3u + component) * output_stride + q] = + g_tmp[dof][component]; + } + } + } + if (hessians_out) { + for (std::size_t dof = 0; dof < num_dofs; ++dof) { + store_hessian_strided( + h_tmp[dof], hessians_out + dof * 9u * output_stride, output_stride, q); + } + } + } +} + +void BasisFunction::fill_scalar_cache_entry( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + evaluate_at_quadrature_points_strided( + points, output_stride, values_out, gradients_out, hessians_out); +} + +void BasisFunction::evaluate_vector_at_quadrature_points( + const std::vector>& points, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out) const { + evaluate_vector_at_quadrature_points_strided( + points, points.size(), values_out, jacobians_out, curls_out, divergence_out); +} + +void BasisFunction::evaluate_vector_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out) const { + const std::size_t num_qpts = points.size(); + const std::size_t num_dofs = size(); + detail::vector_common::validate_vector_strided_outputs( + num_qpts, output_stride, "BasisFunction"); + + auto& scratch = basis_function_scratch(); + auto& v_tmp = scratch.vector_values; + auto& j_tmp = scratch.vector_jacobians; + auto& c_tmp = scratch.vector_curls; + auto& d_tmp = scratch.vector_divergences; + if (values_out) v_tmp.resize(num_dofs); + if (jacobians_out) j_tmp.resize(num_dofs); + if (curls_out) c_tmp.resize(num_dofs); + if (divergence_out) d_tmp.resize(num_dofs); + + for (std::size_t q = 0; q < num_qpts; ++q) { + if (values_out) { + evaluate_vector_values(points[q], v_tmp); + detail::vector_common::write_vector_values_strided( + v_tmp, num_dofs, output_stride, q, values_out); + } + + if (jacobians_out) { + evaluate_vector_jacobians(points[q], j_tmp); + detail::vector_common::write_vector_jacobians_strided( + j_tmp, num_dofs, output_stride, q, jacobians_out); + } + + if (curls_out) { + evaluate_curl(points[q], c_tmp); + detail::vector_common::write_vector_curl_strided( + c_tmp, num_dofs, output_stride, q, curls_out); + } + + if (divergence_out) { + evaluate_divergence(points[q], d_tmp); + detail::vector_common::write_vector_divergence_strided( + d_tmp, num_dofs, output_stride, q, divergence_out); + } + } +} + +void BasisFunction::evaluate_vector_values( + const math::Vector&, + std::vector>&) const { + throw BasisEvaluationException("Vector-valued evaluation requested on scalar basis", + __FILE__, __LINE__, __func__); +} + +void BasisFunction::evaluate_vector_jacobians( + const math::Vector&, + std::vector&) const { + throw BasisEvaluationException("Vector-basis Jacobian evaluation requested on scalar basis", + __FILE__, __LINE__, __func__); +} + +void BasisFunction::evaluate_divergence( + const math::Vector&, + std::vector&) const { + throw BasisEvaluationException("Divergence requested on scalar basis", + __FILE__, __LINE__, __func__); +} + +void BasisFunction::evaluate_curl( + const math::Vector&, + std::vector>&) const { + throw BasisEvaluationException("Curl requested on scalar basis", + __FILE__, __LINE__, __func__); +} + +void BasisFunction::numerical_gradient(const math::Vector& xi, + std::vector& gradients, + Real eps) const { + std::vector base; + evaluate_values(xi, base); + gradients.assign(base.size(), Gradient{}); + + for (int d = 0; d < dimension(); ++d) { + math::Vector forward = xi; + math::Vector backward = xi; + const std::size_t idx = static_cast(d); + forward[idx] += eps; + backward[idx] -= eps; + + std::vector fwd, bwd; + evaluate_values(forward, fwd); + evaluate_values(backward, bwd); + + for (std::size_t i = 0; i < base.size(); ++i) { + gradients[i][idx] = (fwd[i] - bwd[i]) / (Real(2) * eps); + } + } +} + +void BasisFunction::numerical_hessian(const math::Vector& xi, + std::vector& hessians, + Real eps) const { + std::vector base_grad; + evaluate_gradients(xi, base_grad); + hessians.assign(base_grad.size(), Hessian{}); + + for (int d = 0; d < dimension(); ++d) { + math::Vector forward = xi; + math::Vector backward = xi; + const std::size_t col = static_cast(d); + forward[col] += eps; + backward[col] -= eps; + + std::vector g_forward, g_backward; + evaluate_gradients(forward, g_forward); + evaluate_gradients(backward, g_backward); + + for (std::size_t i = 0; i < base_grad.size(); ++i) { + for (int k = 0; k < dimension(); ++k) { + const std::size_t row = static_cast(k); + hessians[i](row, col) = (g_forward[i][row] - g_backward[i][row]) / (Real(2) * eps); + } + } + } +} + +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/BasisFunction.h b/Code/Source/solver/FE/Basis/BasisFunction.h new file mode 100644 index 000000000..ee38a5b19 --- /dev/null +++ b/Code/Source/solver/FE/Basis/BasisFunction.h @@ -0,0 +1,426 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_BASISFUNCTION_H +#define SVMP_FE_BASIS_BASISFUNCTION_H + +/** + * @file BasisFunction.h + * @brief Abstract interface for basis function evaluation on reference elements + * + * The Basis module operates purely on reference elements and is independent of + * mesh-specific data structures. Implementations may leverage Math and + * Quadrature utilities but must not read mesh connectivity or geometry. + */ + +#include "Types.h" +#include "BasisExceptions.h" +#include "Math/Vector.h" +#include "Math/Matrix.h" +#include +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +using Gradient = math::Vector; +using Hessian = math::Matrix; +using VectorJacobian = math::Matrix; + +struct BasisIdentityFingerprint { + std::uint64_t hash_a{0}; + std::uint64_t hash_b{0}; +}; + +[[nodiscard]] BasisIdentityFingerprint +compute_basis_identity_fingerprint(std::span words) noexcept; + +void prewarm_basis_function_scratch(std::size_t max_size, + std::size_t max_qpts = 0); + +[[nodiscard]] inline Hessian make_symmetric_hessian(Real xx, + Real yy, + Real zz, + Real xy, + Real xz, + Real yz) { + Hessian hessian{}; + hessian(0, 0) = xx; + hessian(1, 1) = yy; + hessian(2, 2) = zz; + hessian(0, 1) = xy; + hessian(1, 0) = xy; + hessian(0, 2) = xz; + hessian(2, 0) = xz; + hessian(1, 2) = yz; + hessian(2, 1) = yz; + return hessian; +} + +// Raw Hessian buffers use row-major 3x3 blocks: +// dst[row * 3 + col] = H(row, col). +inline void store_hessian(const Hessian& hessian, Real* dst) noexcept { + dst[0u] = hessian(0u, 0u); + dst[1u] = hessian(0u, 1u); + dst[2u] = hessian(0u, 2u); + dst[3u] = hessian(1u, 0u); + dst[4u] = hessian(1u, 1u); + dst[5u] = hessian(1u, 2u); + dst[6u] = hessian(2u, 0u); + dst[7u] = hessian(2u, 1u); + dst[8u] = hessian(2u, 2u); +} + +inline void store_hessian_strided(const Hessian& hessian, + Real* dst, + std::size_t stride, + std::size_t offset) noexcept { + dst[0u * stride + offset] = hessian(0u, 0u); + dst[1u * stride + offset] = hessian(0u, 1u); + dst[2u * stride + offset] = hessian(0u, 2u); + dst[3u * stride + offset] = hessian(1u, 0u); + dst[4u * stride + offset] = hessian(1u, 1u); + dst[5u * stride + offset] = hessian(1u, 2u); + dst[6u * stride + offset] = hessian(2u, 0u); + dst[7u * stride + offset] = hessian(2u, 1u); + dst[8u * stride + offset] = hessian(2u, 2u); +} + +inline void scatter_hessian_components_strided(const Real* src, + Real* dst, + std::size_t stride, + std::size_t offset) noexcept { + dst[0u * stride + offset] = src[0u]; + dst[1u * stride + offset] = src[1u]; + dst[2u * stride + offset] = src[2u]; + dst[3u * stride + offset] = src[3u]; + dst[4u * stride + offset] = src[4u]; + dst[5u * stride + offset] = src[5u]; + dst[6u * stride + offset] = src[6u]; + dst[7u * stride + offset] = src[7u]; + dst[8u * stride + offset] = src[8u]; +} + +[[nodiscard]] inline Hessian load_hessian(const Real* src) noexcept { + Hessian hessian{}; + hessian(0u, 0u) = src[0u]; + hessian(0u, 1u) = src[1u]; + hessian(0u, 2u) = src[2u]; + hessian(1u, 0u) = src[3u]; + hessian(1u, 1u) = src[4u]; + hessian(1u, 2u) = src[5u]; + hessian(2u, 0u) = src[6u]; + hessian(2u, 1u) = src[7u]; + hessian(2u, 2u) = src[8u]; + return hessian; +} + +inline void add_scaled_hessian(Hessian& target, + const Hessian& source, + Real scale) noexcept { + target(0u, 0u) += scale * source(0u, 0u); + target(0u, 1u) += scale * source(0u, 1u); + target(0u, 2u) += scale * source(0u, 2u); + target(1u, 0u) += scale * source(1u, 0u); + target(1u, 1u) += scale * source(1u, 1u); + target(1u, 2u) += scale * source(1u, 2u); + target(2u, 0u) += scale * source(2u, 0u); + target(2u, 1u) += scale * source(2u, 1u); + target(2u, 2u) += scale * source(2u, 2u); +} + +/** + * @brief Base interface for scalar and vector-valued basis families + * + * All basis implementations operate in reference space. Physical mappings are + * handled by the Geometry module. Derivatives are returned with unused + * components set to zero for lower dimensional elements. + */ +class BasisFunction { +public: + virtual ~BasisFunction() = default; + + /// Basis family identifier + virtual BasisType basis_type() const noexcept = 0; + + /// Underlying element type on the reference domain + virtual ElementType element_type() const noexcept = 0; + + /// Reference dimensionality (1, 2, or 3) + virtual int dimension() const noexcept = 0; + + /// Polynomial order (modal/nodal definition dependent) + virtual int order() const noexcept = 0; + + /// Number of basis functions (scalar or vector-valued) + virtual std::size_t size() const noexcept = 0; + + /** + * @brief Whether BasisCache can key this basis from common structural fields. + * + * Return true only when basis_type/element_type/dimension/order/size and + * vector-valued status fully determine evaluation behavior. Parameterized + * bases such as splines and custom user bases should keep the default false + * so BasisCache includes cache_identity() in the key. + */ + virtual bool cache_identity_is_structural() const noexcept { return false; } + + /// Whether the basis is vector-valued (H(div)/H(curl)) + virtual bool is_vector_valued() const noexcept { return false; } + + /// Whether vector-valued basis Jacobians are available. + virtual bool supports_vector_jacobians() const noexcept { return false; } + + /// Whether vector-valued basis curls are available. + virtual bool supports_curl() const noexcept { return false; } + + /// Whether vector-valued basis divergences are available. + virtual bool supports_divergence() const noexcept { return false; } + + /** + * @brief Stable semantic identity used by BasisCache + * + * Derived classes should override this when evaluation depends on + * additional state beyond basis family / element / order metadata. + */ + virtual std::string cache_identity() const; + + /** + * @brief Optional exact structured identity payload for BasisCache keys. + * + * Parameterized bases may append stable integer/bit-pattern words and + * return true to let BasisCache avoid using cache_identity() as the exact + * key payload. The human-readable cache_identity() remains available for + * diagnostics and for custom bases that do not implement this path. + */ + virtual bool cache_identity_words(std::vector& words) const; + + /** + * @brief Optional cached fingerprint for structured identity words. + * + * Implementations that precompute cache_identity_words() may also cache the + * corresponding fingerprint. BasisCache still retains exact identity words + * for equality after hash matches. + */ + virtual bool cache_identity_fingerprint(std::uint64_t& hash_a, + std::uint64_t& hash_b) const; + + /** + * @brief Evaluate scalar basis values at a reference point + * @param xi Reference coordinates (unused entries are ignored) + * @param[out] values Output array resized to size() + */ + virtual void evaluate_values(const math::Vector& xi, + std::vector& values) const = 0; + + /** + * @brief Evaluate gradients of scalar basis functions + * + * Production bases must override this with analytic derivatives. + * Use numerical_gradient explicitly in tests or diagnostics when a finite + * difference approximation is intended. + */ + virtual void evaluate_gradients(const math::Vector& xi, + std::vector& gradients) const; + + /** + * @brief Evaluate Hessians of scalar basis functions + * + * Production bases must override this with analytic second derivatives. + * Use numerical_hessian explicitly in tests or diagnostics when a finite + * difference approximation is intended. + */ + virtual void evaluate_hessians(const math::Vector& xi, + std::vector& hessians) const; + + /** + * @brief Fused evaluation of values, gradients, and Hessians at one point + * + * Default implementation calls evaluate_values, evaluate_gradients, and + * evaluate_hessians in sequence. Bases that share intermediate + * computations (e.g., LagrangeBasis sharing per-axis 1D evaluations) + * should override this to avoid redundant work. + */ + virtual void evaluate_all(const math::Vector& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians) const; + + /** + * @brief Fill SoA buffers with basis evaluations at all quadrature points + * + * Outputs are written directly to caller-provided strided buffers in + * DOF-major SoA layout — no scratch+transpose required by the caller. + * Pass `nullptr` for any output that is not needed. + * + * values_out: size num_dofs * num_qpts; element [d * num_qpts + q] + * gradients_out: size num_dofs * 3 * num_qpts; element [(d*3 + c) * num_qpts + q] + * hessians_out: size num_dofs * 9 * num_qpts; element [(d*9 + r*3 + c) * num_qpts + q] + * + * Non-null output ranges must not overlap each other. Implementations may + * fill requested quantities in any order that is efficient for the basis. + * + * Default implementation calls evaluate_all (or evaluate_values/gradients/ + * hessians as appropriate) per QP, materializing into temp buffers then + * scatter-writing to the output. Performance-sensitive bases must override + * this path so batched assembly does not fall back to Q virtual point + * evaluations. Unit coverage keeps an explicit list of hot bases that are + * expected to provide a direct strided implementation. + */ + virtual void evaluate_at_quadrature_points( + const std::vector>& points, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const; + + /** + * @brief Fill strided SoA buffers with basis evaluations at quadrature points + * + * Same component layout as evaluate_at_quadrature_points, but each + * dof/component row advances by `output_stride` rather than `points.size()`. + * This lets padded SIMD cache storage be filled directly. Non-null output + * ranges have the same non-overlap requirement. + */ + virtual void evaluate_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const; + + /** + * @brief Fill zero-initialized scalar cache storage. + * + * BasisCache allocates and zero-initializes its scalar SoA buffers before + * calling this hook. The default implementation overwrites all requested + * entries through the public strided evaluator. Sparse-support bases may + * override this and write only active entries, relying on the caller's + * zero-initialization for inactive DOFs and unused derivative components. + */ + virtual void fill_scalar_cache_entry( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const; + + /** + * @brief Fill SoA buffers with vector-basis evaluations at all quadrature points + * + * Outputs are written in DOF-major SoA layout. Pass `nullptr` for any + * quantity that is not needed. + * + * values_out: size num_dofs * 3 * num_qpts; element [(d*3 + c) * num_qpts + q] + * jacobians_out: size num_dofs * 9 * num_qpts; element [(d*9 + c*3 + r) * num_qpts + q] + * curls_out: size num_dofs * 3 * num_qpts; element [(d*3 + c) * num_qpts + q] + * divergence_out: size num_dofs * num_qpts; element [d * num_qpts + q] + * + * Non-null output ranges must not overlap each other. Implementations may + * fill requested quantities in any order that is efficient for the basis. + */ + virtual void evaluate_vector_at_quadrature_points( + const std::vector>& points, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out) const; + + /** + * @brief Fill strided SoA buffers with vector-basis evaluations + * + * Same component layout as evaluate_vector_at_quadrature_points, but each + * dof/component row advances by `output_stride` rather than `points.size()`. + * Non-null output ranges have the same non-overlap requirement. + * + * The base fallback loops over quadrature points through virtual point + * evaluation. H(div)/H(curl) bases used in assembly should override this + * method directly, and tests track the current hot vector families. + */ + virtual void evaluate_vector_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out) const; + + /** + * @brief Evaluate scalar basis values into a caller-provided raw buffer + * + * Caller is responsible for providing a buffer of at least size() Real + * entries. This avoids the per-call std::vector::resize() cost of the + * vector-output overload. Default implementation forwards through a temp + * vector; bases should override for direct write. + */ + virtual void evaluate_values_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out) const; + + /** + * @brief Evaluate gradients into a flat caller-provided buffer + * + * Layout: gradients_out[i * 3 + c] = component c of gradient of basis i. + * Caller provides a buffer of size() * 3 Real entries. + */ + virtual void evaluate_gradients_to(const math::Vector& xi, + Real* SVMP_RESTRICT gradients_out) const; + + /** + * @brief Evaluate Hessians into a flat caller-provided buffer + * + * Layout: hessians_out[i * 9 + r * 3 + c] = H_i(r, c). + */ + virtual void evaluate_hessians_to(const math::Vector& xi, + Real* SVMP_RESTRICT hessians_out) const; + + /** + * @brief Evaluate vector-valued basis functions (H(div)/H(curl)) + * + * Default implementation throws; vector bases must override. + */ + virtual void evaluate_vector_values(const math::Vector& xi, + std::vector>& values) const; + + /** + * @brief Evaluate reference-space Jacobians of vector-valued basis functions + * + * The returned matrix for basis function `i` has entries + * `jacobians[i](component, derivative_direction) = d phi_i_component / d xi_direction`. + * Unused rows/columns are zero-filled for lower-dimensional elements. + */ + virtual void evaluate_vector_jacobians(const math::Vector& xi, + std::vector& jacobians) const; + + /// Evaluate divergence of vector-valued basis functions (if applicable) + virtual void evaluate_divergence(const math::Vector& xi, + std::vector& divergence) const; + + /// Evaluate curl of vector-valued basis functions (if applicable) + virtual void evaluate_curl(const math::Vector& xi, + std::vector>& curl) const; + +protected: + /// Finite-difference helper for gradients of scalar bases + void numerical_gradient(const math::Vector& xi, + std::vector& gradients, + Real eps = Real(1e-6)) const; + + /// Finite-difference helper for Hessians of scalar bases + void numerical_hessian(const math::Vector& xi, + std::vector& hessians, + Real eps = Real(1e-5)) const; +}; + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_BASISFUNCTION_H diff --git a/Code/Source/solver/FE/Basis/BasisTolerance.h b/Code/Source/solver/FE/Basis/BasisTolerance.h new file mode 100644 index 000000000..423551f09 --- /dev/null +++ b/Code/Source/solver/FE/Basis/BasisTolerance.h @@ -0,0 +1,52 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_BASISTOLERANCE_H +#define SVMP_FE_BASIS_BASISTOLERANCE_H + +#include "Types.h" + +#include + +namespace svmp { +namespace FE { +namespace basis { +namespace detail { + +[[nodiscard]] constexpr Real basis_abs(Real value) noexcept { + return value < Real(0) ? -value : value; +} + +[[nodiscard]] constexpr Real basis_max(Real lhs, Real rhs) noexcept { + return lhs < rhs ? rhs : lhs; +} + +[[nodiscard]] constexpr Real basis_scaled_tolerance(Real scale = Real(1), + Real multiplier = Real(64)) noexcept { + return multiplier * std::numeric_limits::epsilon() * + basis_max(Real(1), basis_abs(scale)); +} + +[[nodiscard]] constexpr bool basis_near_zero(Real value, + Real scale = Real(1), + Real multiplier = Real(64)) noexcept { + return basis_abs(value) <= basis_scaled_tolerance(scale, multiplier); +} + +[[nodiscard]] constexpr bool basis_nearly_equal(Real a, + Real b, + Real multiplier = Real(64)) noexcept { + const Real scale = basis_max(Real(1), basis_max(basis_abs(a), basis_abs(b))); + return basis_abs(a - b) <= basis_scaled_tolerance(scale, multiplier); +} + +} // namespace detail +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_BASISTOLERANCE_H diff --git a/Code/Source/solver/FE/Basis/BasisTraits.h b/Code/Source/solver/FE/Basis/BasisTraits.h new file mode 100644 index 000000000..835dfe705 --- /dev/null +++ b/Code/Source/solver/FE/Basis/BasisTraits.h @@ -0,0 +1,218 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_BASISTRAITS_H +#define SVMP_FE_BASIS_BASISTRAITS_H + +#include "Types.h" + +#include + +namespace svmp { +namespace FE { +namespace basis { + +enum class BasisTopology { + Unknown, + Point, + Line, + Triangle, + Quadrilateral, + Tetrahedron, + Hexahedron, + Wedge, + Pyramid, +}; + +[[nodiscard]] constexpr bool is_point(ElementType type) noexcept { + return type == ElementType::Point1; +} + +[[nodiscard]] constexpr bool is_line(ElementType type) noexcept { + return type == ElementType::Line2 || type == ElementType::Line3; +} + +[[nodiscard]] constexpr bool is_triangle(ElementType type) noexcept { + return type == ElementType::Triangle3 || type == ElementType::Triangle6; +} + +[[nodiscard]] constexpr bool is_quadrilateral(ElementType type) noexcept { + return type == ElementType::Quad4 || type == ElementType::Quad8 || + type == ElementType::Quad9; +} + +[[nodiscard]] constexpr bool is_tetrahedron(ElementType type) noexcept { + return type == ElementType::Tetra4 || type == ElementType::Tetra10; +} + +[[nodiscard]] constexpr bool is_hexahedron(ElementType type) noexcept { + return type == ElementType::Hex8 || type == ElementType::Hex20 || + type == ElementType::Hex27; +} + +[[nodiscard]] constexpr bool is_wedge(ElementType type) noexcept { + return type == ElementType::Wedge6 || type == ElementType::Wedge15 || + type == ElementType::Wedge18; +} + +[[nodiscard]] constexpr bool is_pyramid(ElementType type) noexcept { + return type == ElementType::Pyramid5 || type == ElementType::Pyramid13 || + type == ElementType::Pyramid14; +} + +[[nodiscard]] constexpr bool is_simplex(ElementType type) noexcept { + return is_triangle(type) || is_tetrahedron(type); +} + +[[nodiscard]] constexpr bool is_tensor_product(ElementType type) noexcept { + return is_line(type) || is_quadrilateral(type) || is_hexahedron(type); +} + +[[nodiscard]] constexpr int reference_dimension(ElementType type) noexcept { + return element_dimension(type); +} + +[[nodiscard]] constexpr BasisTopology topology(ElementType type) noexcept { + if (is_point(type)) { + return BasisTopology::Point; + } + if (is_line(type)) { + return BasisTopology::Line; + } + if (is_triangle(type)) { + return BasisTopology::Triangle; + } + if (is_quadrilateral(type)) { + return BasisTopology::Quadrilateral; + } + if (is_tetrahedron(type)) { + return BasisTopology::Tetrahedron; + } + if (is_hexahedron(type)) { + return BasisTopology::Hexahedron; + } + if (is_wedge(type)) { + return BasisTopology::Wedge; + } + if (is_pyramid(type)) { + return BasisTopology::Pyramid; + } + return BasisTopology::Unknown; +} + +[[nodiscard]] constexpr ElementType canonical_lagrange_type(ElementType type) noexcept { + switch (type) { + case ElementType::Line2: + case ElementType::Line3: + return ElementType::Line2; + case ElementType::Triangle3: + case ElementType::Triangle6: + return ElementType::Triangle3; + case ElementType::Quad4: + case ElementType::Quad9: + return ElementType::Quad4; + case ElementType::Tetra4: + case ElementType::Tetra10: + return ElementType::Tetra4; + case ElementType::Hex8: + case ElementType::Hex27: + return ElementType::Hex8; + case ElementType::Wedge6: + case ElementType::Wedge18: + return ElementType::Wedge6; + case ElementType::Pyramid5: + case ElementType::Pyramid14: + return ElementType::Pyramid5; + default: + return type; + } +} + +[[nodiscard]] constexpr int complete_lagrange_alias_order(ElementType type) noexcept { + switch (type) { + case ElementType::Line2: + case ElementType::Triangle3: + case ElementType::Quad4: + case ElementType::Tetra4: + case ElementType::Hex8: + case ElementType::Wedge6: + case ElementType::Pyramid5: + return 1; + case ElementType::Line3: + case ElementType::Triangle6: + case ElementType::Quad9: + case ElementType::Tetra10: + case ElementType::Hex27: + case ElementType::Wedge18: + case ElementType::Pyramid14: + return 2; + default: + return -1; + } +} + +[[nodiscard]] constexpr std::size_t line_lagrange_size(int order) noexcept { + return order >= 0 ? static_cast(order + 1) : 0u; +} + +[[nodiscard]] constexpr std::size_t triangle_lagrange_size(int order) noexcept { + return order >= 0 ? static_cast((order + 1) * (order + 2) / 2) : 0u; +} + +[[nodiscard]] constexpr std::size_t quad_lagrange_size(int order) noexcept { + return order >= 0 ? static_cast((order + 1) * (order + 1)) : 0u; +} + +[[nodiscard]] constexpr std::size_t tetra_lagrange_size(int order) noexcept { + return order >= 0 ? static_cast((order + 1) * (order + 2) * (order + 3) / 6) : 0u; +} + +[[nodiscard]] constexpr std::size_t hex_lagrange_size(int order) noexcept { + return order >= 0 ? static_cast((order + 1) * (order + 1) * (order + 1)) : 0u; +} + +[[nodiscard]] constexpr std::size_t wedge_lagrange_size(int order) noexcept { + return triangle_lagrange_size(order) * line_lagrange_size(order); +} + +[[nodiscard]] constexpr std::size_t pyramid_lagrange_size(int order) noexcept { + if (order < 0) { + return 0u; + } + const std::size_t p = static_cast(order); + return (p + 1u) * (p + 2u) * (2u * p + 3u) / 6u; +} + +[[nodiscard]] constexpr std::size_t complete_lagrange_alias_size(ElementType type) noexcept { + const int order = complete_lagrange_alias_order(type); + switch (canonical_lagrange_type(type)) { + case ElementType::Point1: + return 1u; + case ElementType::Line2: + return line_lagrange_size(order); + case ElementType::Triangle3: + return triangle_lagrange_size(order); + case ElementType::Quad4: + return quad_lagrange_size(order); + case ElementType::Tetra4: + return tetra_lagrange_size(order); + case ElementType::Hex8: + return hex_lagrange_size(order); + case ElementType::Wedge6: + return wedge_lagrange_size(order); + case ElementType::Pyramid5: + return pyramid_lagrange_size(order); + default: + return 0u; + } +} + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_BASISTRAITS_H diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp new file mode 100644 index 000000000..63b947516 --- /dev/null +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp @@ -0,0 +1,8323 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#include "LagrangeBasis.h" +#include "BasisTraits.h" +#include "BasisTolerance.h" +#include "LagrangeBasisFast.h" +#include "NodeOrderingConventions.h" +#include "LagrangeBasisPyramid.h" +#include "LagrangeBasisSimplex.h" +#include "LagrangeBasisUtility.h" +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +namespace { + +using LagrangeTopology = BasisTopology; + +#if defined(_MSC_VER) +#define SVMP_LAGRANGE_NOINLINE __declspec(noinline) +#define SVMP_LAGRANGE_ALIGN64 +#elif defined(__GNUC__) || defined(__clang__) +#define SVMP_LAGRANGE_NOINLINE __attribute__((noinline)) +#define SVMP_LAGRANGE_ALIGN64 __attribute__((aligned(64))) +#else +#define SVMP_LAGRANGE_NOINLINE +#define SVMP_LAGRANGE_ALIGN64 +#endif + +#ifndef FE_ALWAYS_INLINE +#if defined(_MSC_VER) +#define FE_ALWAYS_INLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) +#define FE_ALWAYS_INLINE __attribute__((always_inline)) inline +#else +#define FE_ALWAYS_INLINE inline +#endif +#endif + +SVMP_LAGRANGE_NOINLINE void evaluate_triangle_order1_gradients_strided( + std::size_t num_qpts, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out); + +struct LagrangeTopologyTraits { + LagrangeTopology topology; + int dimension; +}; + +struct SimplexExponentHash { + std::size_t operator()(const std::array& exponents) const noexcept { + std::size_t seed = 0x9e3779b97f4a7c15ull; + for (const int exponent : exponents) { + const auto value = static_cast(exponent); + seed ^= value + 0x9e3779b97f4a7c15ull + (seed << 6u) + (seed >> 2u); + } + return seed; + } +}; + +template +void assign_array(std::vector& out, const std::array& values) { + out.assign(values.begin(), values.end()); +} + +bool coordinate_matches_expected(Real coord, Real expected) noexcept { + return detail::basis_nearly_equal(coord, expected); +} + +template +void evaluate_fast_outputs(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) { + if (values != nullptr) { + std::array fast_values{}; + FastBasis::evaluate(xi, fast_values); + assign_array(*values, fast_values); + } + if (gradients != nullptr) { + std::array fast_gradients{}; + FastBasis::evaluate_gradients(xi, fast_gradients); + assign_array(*gradients, fast_gradients); + } + if (hessians != nullptr) { + std::array fast_hessians{}; + FastBasis::evaluate_hessians(xi, fast_hessians); + assign_array(*hessians, fast_hessians); + } +} + +template +void evaluate_fast_outputs_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + if (values_out != nullptr) { + std::array fast_values{}; + FastBasis::evaluate(xi, fast_values); + for (std::size_t i = 0; i < fast_values.size(); ++i) { + values_out[i] = fast_values[i]; + } + } + if (gradients_out != nullptr) { + std::array fast_gradients{}; + FastBasis::evaluate_gradients(xi, fast_gradients); + for (std::size_t i = 0; i < fast_gradients.size(); ++i) { + gradients_out[i * 3u + 0u] = fast_gradients[i][0]; + gradients_out[i * 3u + 1u] = fast_gradients[i][1]; + gradients_out[i * 3u + 2u] = fast_gradients[i][2]; + } + } + if (hessians_out != nullptr) { + std::array fast_hessians{}; + FastBasis::evaluate_hessians(xi, fast_hessians); + for (std::size_t i = 0; i < fast_hessians.size(); ++i) { + store_hessian(fast_hessians[i], hessians_out + i * 9u); + } + } +} + +template +void evaluate_fast_outputs_strided(const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + if (values_out != nullptr) { + std::array fast_values{}; + FastBasis::evaluate(xi, fast_values); + for (std::size_t i = 0; i < fast_values.size(); ++i) { + values_out[i * output_stride + q] = fast_values[i]; + } + } + if (gradients_out != nullptr) { + std::array fast_gradients{}; + FastBasis::evaluate_gradients(xi, fast_gradients); + for (std::size_t i = 0; i < fast_gradients.size(); ++i) { + Real* g = gradients_out + i * 3u * output_stride; + g[0u * output_stride + q] = fast_gradients[i][0]; + g[1u * output_stride + q] = fast_gradients[i][1]; + g[2u * output_stride + q] = fast_gradients[i][2]; + } + } + if (hessians_out != nullptr) { + std::array fast_hessians{}; + FastBasis::evaluate_hessians(xi, fast_hessians); + for (std::size_t i = 0; i < fast_hessians.size(); ++i) { + const Hessian& hessian = fast_hessians[i]; + Real* H = hessians_out + i * 9u * output_stride; + H[0u * output_stride + q] = hessian(0, 0); + H[1u * output_stride + q] = hessian(0, 1); + H[2u * output_stride + q] = hessian(0, 2); + H[3u * output_stride + q] = hessian(1, 0); + H[4u * output_stride + q] = hessian(1, 1); + H[5u * output_stride + q] = hessian(1, 2); + H[6u * output_stride + q] = hessian(2, 0); + H[7u * output_stride + q] = hessian(2, 1); + H[8u * output_stride + q] = hessian(2, 2); + } + } + } +} + +template +bool evaluate_fixed_lagrange_fast_order(LagrangeTopology topology, + const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) { + switch (topology) { + case LagrangeTopology::Line: + evaluate_fast_outputs>(xi, values, gradients, hessians); + return true; + case LagrangeTopology::Quadrilateral: + evaluate_fast_outputs>(xi, values, gradients, hessians); + return true; + case LagrangeTopology::Hexahedron: + evaluate_fast_outputs>(xi, values, gradients, hessians); + return true; + case LagrangeTopology::Triangle: + evaluate_fast_outputs>(xi, values, gradients, hessians); + return true; + case LagrangeTopology::Tetrahedron: + evaluate_fast_outputs>(xi, values, gradients, hessians); + return true; + default: + return false; + } +} + +template +bool evaluate_fixed_lagrange_fast_to_order(LagrangeTopology topology, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + switch (topology) { + case LagrangeTopology::Line: + evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); + return true; + case LagrangeTopology::Quadrilateral: + evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); + return true; + case LagrangeTopology::Hexahedron: + evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); + return true; + case LagrangeTopology::Triangle: + evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); + return true; + case LagrangeTopology::Tetrahedron: + evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); + return true; + default: + return false; + } +} + +template +bool evaluate_fixed_lagrange_fast_strided_order( + LagrangeTopology topology, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + switch (topology) { + case LagrangeTopology::Line: + evaluate_fast_outputs_strided>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case LagrangeTopology::Quadrilateral: + evaluate_fast_outputs_strided>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case LagrangeTopology::Hexahedron: + evaluate_fast_outputs_strided>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case LagrangeTopology::Triangle: + evaluate_fast_outputs_strided>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case LagrangeTopology::Tetrahedron: + evaluate_fast_outputs_strided>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + default: + return false; + } +} + +void evaluate_triangle_order3_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + Real* row4 = values_out + 4u * output_stride; + Real* row5 = values_out + 5u * output_stride; + Real* row6 = values_out + 6u * output_stride; + Real* row7 = values_out + 7u * output_stride; + Real* row8 = values_out + 8u * output_stride; + Real* row9 = values_out + 9u * output_stride; + + if (points.size() == 4u && output_stride == 4u) { + Real p10[4]; + Real p11[4]; + Real p12[4]; + Real p20[4]; + Real p21[4]; + Real p22[4]; + Real p30[4]; + Real p31[4]; + Real p32[4]; + + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + + p10[q] = Real(3) * l0; + p11[q] = Real(3) * l1; + p12[q] = Real(3) * l2; + p20[q] = Real(0.5) * p10[q] * (p10[q] - Real(1)); + p21[q] = Real(0.5) * p11[q] * (p11[q] - Real(1)); + p22[q] = Real(0.5) * p12[q] * (p12[q] - Real(1)); + p30[q] = (p10[q] * (p10[q] - Real(1)) * (p10[q] - Real(2))) / Real(6); + p31[q] = (p11[q] * (p11[q] - Real(1)) * (p11[q] - Real(2))) / Real(6); + p32[q] = (p12[q] * (p12[q] - Real(1)) * (p12[q] - Real(2))) / Real(6); + } + + row0[0] = p30[0]; row0[1] = p30[1]; row0[2] = p30[2]; row0[3] = p30[3]; + row1[0] = p31[0]; row1[1] = p31[1]; row1[2] = p31[2]; row1[3] = p31[3]; + row2[0] = p32[0]; row2[1] = p32[1]; row2[2] = p32[2]; row2[3] = p32[3]; + row3[0] = p20[0] * p11[0]; + row3[1] = p20[1] * p11[1]; + row3[2] = p20[2] * p11[2]; + row3[3] = p20[3] * p11[3]; + row4[0] = p10[0] * p21[0]; + row4[1] = p10[1] * p21[1]; + row4[2] = p10[2] * p21[2]; + row4[3] = p10[3] * p21[3]; + row5[0] = p21[0] * p12[0]; + row5[1] = p21[1] * p12[1]; + row5[2] = p21[2] * p12[2]; + row5[3] = p21[3] * p12[3]; + row6[0] = p11[0] * p22[0]; + row6[1] = p11[1] * p22[1]; + row6[2] = p11[2] * p22[2]; + row6[3] = p11[3] * p22[3]; + row7[0] = p10[0] * p22[0]; + row7[1] = p10[1] * p22[1]; + row7[2] = p10[2] * p22[2]; + row7[3] = p10[3] * p22[3]; + row8[0] = p20[0] * p12[0]; + row8[1] = p20[1] * p12[1]; + row8[2] = p20[2] * p12[2]; + row8[3] = p20[3] * p12[3]; + row9[0] = p10[0] * p11[0] * p12[0]; + row9[1] = p10[1] * p11[1] * p12[1]; + row9[2] = p10[2] * p11[2] * p12[2]; + row9[3] = p10[3] * p11[3] * p12[3]; + return; + } + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + + const Real p10 = Real(3) * l0; + const Real p11 = Real(3) * l1; + const Real p12 = Real(3) * l2; + const Real p20 = Real(0.5) * p10 * (p10 - Real(1)); + const Real p21 = Real(0.5) * p11 * (p11 - Real(1)); + const Real p22 = Real(0.5) * p12 * (p12 - Real(1)); + const Real p30 = (p10 * (p10 - Real(1)) * (p10 - Real(2))) / Real(6); + const Real p31 = (p11 * (p11 - Real(1)) * (p11 - Real(2))) / Real(6); + const Real p32 = (p12 * (p12 - Real(1)) * (p12 - Real(2))) / Real(6); + + row0[q] = p30; + row1[q] = p31; + row2[q] = p32; + row3[q] = p20 * p11; + row4[q] = p10 * p21; + row5[q] = p21 * p12; + row6[q] = p11 * p22; + row7[q] = p10 * p22; + row8[q] = p20 * p12; + row9[q] = p10 * p11 * p12; + } +} + +void evaluate_triangle_order2_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + Real* row4 = values_out + 4u * output_stride; + Real* row5 = values_out + 5u * output_stride; + + if (points.size() == 4u && output_stride == 4u) { + Real l0[4]; + Real l1[4]; + Real l2[4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + l1[q] = xi[0]; + l2[q] = xi[1]; + l0[q] = Real(1) - l1[q] - l2[q]; + } + + row0[0] = l0[0] * (Real(2) * l0[0] - Real(1)); + row0[1] = l0[1] * (Real(2) * l0[1] - Real(1)); + row0[2] = l0[2] * (Real(2) * l0[2] - Real(1)); + row0[3] = l0[3] * (Real(2) * l0[3] - Real(1)); + row1[0] = l1[0] * (Real(2) * l1[0] - Real(1)); + row1[1] = l1[1] * (Real(2) * l1[1] - Real(1)); + row1[2] = l1[2] * (Real(2) * l1[2] - Real(1)); + row1[3] = l1[3] * (Real(2) * l1[3] - Real(1)); + row2[0] = l2[0] * (Real(2) * l2[0] - Real(1)); + row2[1] = l2[1] * (Real(2) * l2[1] - Real(1)); + row2[2] = l2[2] * (Real(2) * l2[2] - Real(1)); + row2[3] = l2[3] * (Real(2) * l2[3] - Real(1)); + row3[0] = Real(4) * l0[0] * l1[0]; + row3[1] = Real(4) * l0[1] * l1[1]; + row3[2] = Real(4) * l0[2] * l1[2]; + row3[3] = Real(4) * l0[3] * l1[3]; + row4[0] = Real(4) * l1[0] * l2[0]; + row4[1] = Real(4) * l1[1] * l2[1]; + row4[2] = Real(4) * l1[2] * l2[2]; + row4[3] = Real(4) * l1[3] * l2[3]; + row5[0] = Real(4) * l0[0] * l2[0]; + row5[1] = Real(4) * l0[1] * l2[1]; + row5[2] = Real(4) * l0[2] * l2[2]; + row5[3] = Real(4) * l0[3] * l2[3]; + return; + } + + auto write_q = [&](std::size_t q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + row0[q] = l0 * (Real(2) * l0 - Real(1)); + row1[q] = l1 * (Real(2) * l1 - Real(1)); + row2[q] = l2 * (Real(2) * l2 - Real(1)); + row3[q] = Real(4) * l0 * l1; + row4[q] = Real(4) * l1 * l2; + row5[q] = Real(4) * l0 * l2; + }; + + if (points.size() == 4u) { + write_q(0u); + write_q(1u); + write_q(2u); + write_q(3u); + return; + } + + for (std::size_t q = 0; q < points.size(); ++q) { + write_q(q); + } +} + +void evaluate_triangle_order1_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + row0[q] = Real(1) - xi[0] - xi[1]; + row1[q] = xi[0]; + row2[q] = xi[1]; + } +} + +void evaluate_triangle_order2_gradients_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + Real* row0 = gradients_out + 0u * 3u * output_stride; + Real* row1 = gradients_out + 1u * 3u * output_stride; + Real* row2 = gradients_out + 2u * 3u * output_stride; + Real* row3 = gradients_out + 3u * 3u * output_stride; + Real* row4 = gradients_out + 4u * 3u * output_stride; + Real* row5 = gradients_out + 5u * 3u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + const Real g0 = Real(1) - Real(4) * l0; + row0[0u * output_stride + q] = g0; + row0[1u * output_stride + q] = g0; + row0[2u * output_stride + q] = Real(0); + row1[0u * output_stride + q] = Real(4) * l1 - Real(1); + row1[1u * output_stride + q] = Real(0); + row1[2u * output_stride + q] = Real(0); + row2[0u * output_stride + q] = Real(0); + row2[1u * output_stride + q] = Real(4) * l2 - Real(1); + row2[2u * output_stride + q] = Real(0); + row3[0u * output_stride + q] = Real(4) * (l0 - l1); + row3[1u * output_stride + q] = Real(-4) * l1; + row3[2u * output_stride + q] = Real(0); + row4[0u * output_stride + q] = Real(4) * l2; + row4[1u * output_stride + q] = Real(4) * l1; + row4[2u * output_stride + q] = Real(0); + row5[0u * output_stride + q] = Real(-4) * l2; + row5[1u * output_stride + q] = Real(4) * (l0 - l2); + row5[2u * output_stride + q] = Real(0); + } +} + +inline void write_constant_hessian_q4(Real* SVMP_RESTRICT row, + std::size_t output_stride, + Real h00, + Real h01, + Real h02, + Real h10, + Real h11, + Real h12, + Real h20, + Real h21, + Real h22) { + Real* c0 = row + 0u * output_stride; + Real* c1 = row + 1u * output_stride; + Real* c2 = row + 2u * output_stride; + Real* c3 = row + 3u * output_stride; + Real* c4 = row + 4u * output_stride; + Real* c5 = row + 5u * output_stride; + Real* c6 = row + 6u * output_stride; + Real* c7 = row + 7u * output_stride; + Real* c8 = row + 8u * output_stride; + + c0[0] = h00; c0[1] = h00; c0[2] = h00; c0[3] = h00; + c1[0] = h01; c1[1] = h01; c1[2] = h01; c1[3] = h01; + c2[0] = h02; c2[1] = h02; c2[2] = h02; c2[3] = h02; + c3[0] = h10; c3[1] = h10; c3[2] = h10; c3[3] = h10; + c4[0] = h11; c4[1] = h11; c4[2] = h11; c4[3] = h11; + c5[0] = h12; c5[1] = h12; c5[2] = h12; c5[3] = h12; + c6[0] = h20; c6[1] = h20; c6[2] = h20; c6[3] = h20; + c7[0] = h21; c7[1] = h21; c7[2] = h21; c7[3] = h21; + c8[0] = h22; c8[1] = h22; c8[2] = h22; c8[3] = h22; +} + +SVMP_LAGRANGE_NOINLINE void evaluate_triangle_order2_hessians_q4( + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + write_constant_hessian_q4(hessians_out + 0u * 9u * output_stride, + output_stride, + Real(4), Real(4), Real(0), + Real(4), Real(4), Real(0), + Real(0), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 1u * 9u * output_stride, + output_stride, + Real(4), Real(0), Real(0), + Real(0), Real(0), Real(0), + Real(0), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 2u * 9u * output_stride, + output_stride, + Real(0), Real(0), Real(0), + Real(0), Real(4), Real(0), + Real(0), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 3u * 9u * output_stride, + output_stride, + Real(-8), Real(-4), Real(0), + Real(-4), Real(0), Real(0), + Real(0), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 4u * 9u * output_stride, + output_stride, + Real(0), Real(4), Real(0), + Real(4), Real(0), Real(0), + Real(0), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 5u * 9u * output_stride, + output_stride, + Real(0), Real(-4), Real(0), + Real(-4), Real(-8), Real(0), + Real(0), Real(0), Real(0)); +} + +SVMP_LAGRANGE_NOINLINE void evaluate_tet_order2_hessians_q4( + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + write_constant_hessian_q4(hessians_out + 0u * 9u * output_stride, + output_stride, + Real(4), Real(4), Real(4), + Real(4), Real(4), Real(4), + Real(4), Real(4), Real(4)); + write_constant_hessian_q4(hessians_out + 1u * 9u * output_stride, + output_stride, + Real(4), Real(0), Real(0), + Real(0), Real(0), Real(0), + Real(0), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 2u * 9u * output_stride, + output_stride, + Real(0), Real(0), Real(0), + Real(0), Real(4), Real(0), + Real(0), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 3u * 9u * output_stride, + output_stride, + Real(0), Real(0), Real(0), + Real(0), Real(0), Real(0), + Real(0), Real(0), Real(4)); + write_constant_hessian_q4(hessians_out + 4u * 9u * output_stride, + output_stride, + Real(-8), Real(-4), Real(-4), + Real(-4), Real(0), Real(0), + Real(-4), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 5u * 9u * output_stride, + output_stride, + Real(0), Real(4), Real(0), + Real(4), Real(0), Real(0), + Real(0), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 6u * 9u * output_stride, + output_stride, + Real(0), Real(-4), Real(0), + Real(-4), Real(-8), Real(-4), + Real(0), Real(-4), Real(0)); + write_constant_hessian_q4(hessians_out + 7u * 9u * output_stride, + output_stride, + Real(0), Real(0), Real(-4), + Real(0), Real(0), Real(-4), + Real(-4), Real(-4), Real(-8)); + write_constant_hessian_q4(hessians_out + 8u * 9u * output_stride, + output_stride, + Real(0), Real(0), Real(4), + Real(0), Real(0), Real(0), + Real(4), Real(0), Real(0)); + write_constant_hessian_q4(hessians_out + 9u * 9u * output_stride, + output_stride, + Real(0), Real(0), Real(0), + Real(0), Real(0), Real(4), + Real(0), Real(4), Real(0)); +} + +void evaluate_tet_order1_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + row0[q] = Real(1) - xi[0] - xi[1] - xi[2]; + row1[q] = xi[0]; + row2[q] = xi[1]; + row3[q] = xi[2]; + } +} + +void evaluate_tet_order1_gradients_strided( + std::size_t num_qpts, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + Real* row0 = gradients_out + 0u * 3u * output_stride; + Real* row1 = gradients_out + 1u * 3u * output_stride; + Real* row2 = gradients_out + 2u * 3u * output_stride; + Real* row3 = gradients_out + 3u * 3u * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + row0[0u * output_stride + q] = Real(-1); + row0[1u * output_stride + q] = Real(-1); + row0[2u * output_stride + q] = Real(-1); + row1[0u * output_stride + q] = Real(1); + row1[1u * output_stride + q] = Real(0); + row1[2u * output_stride + q] = Real(0); + row2[0u * output_stride + q] = Real(0); + row2[1u * output_stride + q] = Real(1); + row2[2u * output_stride + q] = Real(0); + row3[0u * output_stride + q] = Real(0); + row3[1u * output_stride + q] = Real(0); + row3[2u * output_stride + q] = Real(1); + } +} + +void evaluate_zero_hessians_strided( + std::size_t num_nodes, + std::size_t num_qpts, + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + if (num_qpts == 4u) { + for (std::size_t node = 0; node < num_nodes; ++node) { + write_constant_hessian_q4(hessians_out + node * 9u * output_stride, + output_stride, + Real(0), Real(0), Real(0), + Real(0), Real(0), Real(0), + Real(0), Real(0), Real(0)); + } + return; + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + Real* row = hessians_out + node * 9u * output_stride; + for (std::size_t q = 0; q < num_qpts; ++q) { + row[0u * output_stride + q] = Real(0); + row[1u * output_stride + q] = Real(0); + row[2u * output_stride + q] = Real(0); + row[3u * output_stride + q] = Real(0); + row[4u * output_stride + q] = Real(0); + row[5u * output_stride + q] = Real(0); + row[6u * output_stride + q] = Real(0); + row[7u * output_stride + q] = Real(0); + row[8u * output_stride + q] = Real(0); + } + } +} + +void evaluate_tet_order2_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + Real* row4 = values_out + 4u * output_stride; + Real* row5 = values_out + 5u * output_stride; + Real* row6 = values_out + 6u * output_stride; + Real* row7 = values_out + 7u * output_stride; + Real* row8 = values_out + 8u * output_stride; + Real* row9 = values_out + 9u * output_stride; + + if (points.size() == 4u && output_stride == 4u) { + Real l0[4]; + Real l1[4]; + Real l2[4]; + Real l3[4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + l1[q] = xi[0]; + l2[q] = xi[1]; + l3[q] = xi[2]; + l0[q] = Real(1) - l1[q] - l2[q] - l3[q]; + } + + row0[0] = l0[0] * (Real(2) * l0[0] - Real(1)); + row0[1] = l0[1] * (Real(2) * l0[1] - Real(1)); + row0[2] = l0[2] * (Real(2) * l0[2] - Real(1)); + row0[3] = l0[3] * (Real(2) * l0[3] - Real(1)); + row1[0] = l1[0] * (Real(2) * l1[0] - Real(1)); + row1[1] = l1[1] * (Real(2) * l1[1] - Real(1)); + row1[2] = l1[2] * (Real(2) * l1[2] - Real(1)); + row1[3] = l1[3] * (Real(2) * l1[3] - Real(1)); + row2[0] = l2[0] * (Real(2) * l2[0] - Real(1)); + row2[1] = l2[1] * (Real(2) * l2[1] - Real(1)); + row2[2] = l2[2] * (Real(2) * l2[2] - Real(1)); + row2[3] = l2[3] * (Real(2) * l2[3] - Real(1)); + row3[0] = l3[0] * (Real(2) * l3[0] - Real(1)); + row3[1] = l3[1] * (Real(2) * l3[1] - Real(1)); + row3[2] = l3[2] * (Real(2) * l3[2] - Real(1)); + row3[3] = l3[3] * (Real(2) * l3[3] - Real(1)); + row4[0] = Real(4) * l0[0] * l1[0]; + row4[1] = Real(4) * l0[1] * l1[1]; + row4[2] = Real(4) * l0[2] * l1[2]; + row4[3] = Real(4) * l0[3] * l1[3]; + row5[0] = Real(4) * l1[0] * l2[0]; + row5[1] = Real(4) * l1[1] * l2[1]; + row5[2] = Real(4) * l1[2] * l2[2]; + row5[3] = Real(4) * l1[3] * l2[3]; + row6[0] = Real(4) * l0[0] * l2[0]; + row6[1] = Real(4) * l0[1] * l2[1]; + row6[2] = Real(4) * l0[2] * l2[2]; + row6[3] = Real(4) * l0[3] * l2[3]; + row7[0] = Real(4) * l0[0] * l3[0]; + row7[1] = Real(4) * l0[1] * l3[1]; + row7[2] = Real(4) * l0[2] * l3[2]; + row7[3] = Real(4) * l0[3] * l3[3]; + row8[0] = Real(4) * l1[0] * l3[0]; + row8[1] = Real(4) * l1[1] * l3[1]; + row8[2] = Real(4) * l1[2] * l3[2]; + row8[3] = Real(4) * l1[3] * l3[3]; + row9[0] = Real(4) * l2[0] * l3[0]; + row9[1] = Real(4) * l2[1] * l3[1]; + row9[2] = Real(4) * l2[2] * l3[2]; + row9[3] = Real(4) * l2[3] * l3[3]; + return; + } + + auto write_q = [&](std::size_t q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + row0[q] = l0 * (Real(2) * l0 - Real(1)); + row1[q] = l1 * (Real(2) * l1 - Real(1)); + row2[q] = l2 * (Real(2) * l2 - Real(1)); + row3[q] = l3 * (Real(2) * l3 - Real(1)); + row4[q] = Real(4) * l0 * l1; + row5[q] = Real(4) * l1 * l2; + row6[q] = Real(4) * l0 * l2; + row7[q] = Real(4) * l0 * l3; + row8[q] = Real(4) * l1 * l3; + row9[q] = Real(4) * l2 * l3; + }; + + if (points.size() == 4u) { + write_q(0u); + write_q(1u); + write_q(2u); + write_q(3u); + return; + } + + for (std::size_t q = 0; q < points.size(); ++q) { + write_q(q); + } +} + +inline void write_tet_order2_gradient_q(Real* SVMP_RESTRICT row, + std::size_t output_stride, + std::size_t q, + Real gx, + Real gy, + Real gz) { + row[0u * output_stride + q] = gx; + row[1u * output_stride + q] = gy; + row[2u * output_stride + q] = gz; +} + +SVMP_LAGRANGE_NOINLINE void evaluate_tet_order2_gradients_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + Real* row0 = gradients_out + 0u * 3u * output_stride; + Real* row1 = gradients_out + 1u * 3u * output_stride; + Real* row2 = gradients_out + 2u * 3u * output_stride; + Real* row3 = gradients_out + 3u * 3u * output_stride; + Real* row4 = gradients_out + 4u * 3u * output_stride; + Real* row5 = gradients_out + 5u * 3u * output_stride; + Real* row6 = gradients_out + 6u * 3u * output_stride; + Real* row7 = gradients_out + 7u * 3u * output_stride; + Real* row8 = gradients_out + 8u * 3u * output_stride; + Real* row9 = gradients_out + 9u * 3u * output_stride; + + auto write_q = [&](std::size_t q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + const Real four = Real(4); + const Real g0 = Real(1) - four * l0; + + write_tet_order2_gradient_q(row0, output_stride, q, g0, g0, g0); + write_tet_order2_gradient_q(row1, output_stride, q, four * l1 - Real(1), Real(0), Real(0)); + write_tet_order2_gradient_q(row2, output_stride, q, Real(0), four * l2 - Real(1), Real(0)); + write_tet_order2_gradient_q(row3, output_stride, q, Real(0), Real(0), four * l3 - Real(1)); + write_tet_order2_gradient_q(row4, output_stride, q, four * (l0 - l1), -four * l1, -four * l1); + write_tet_order2_gradient_q(row5, output_stride, q, four * l2, four * l1, Real(0)); + write_tet_order2_gradient_q(row6, output_stride, q, -four * l2, four * (l0 - l2), -four * l2); + write_tet_order2_gradient_q(row7, output_stride, q, -four * l3, -four * l3, four * (l0 - l3)); + write_tet_order2_gradient_q(row8, output_stride, q, four * l3, Real(0), four * l1); + write_tet_order2_gradient_q(row9, output_stride, q, Real(0), four * l3, four * l2); + }; + + if (points.size() == 4u) { + write_q(0u); + write_q(1u); + write_q(2u); + write_q(3u); + return; + } + + for (std::size_t q = 0; q < points.size(); ++q) { + write_q(q); + } +} + +inline void fill_simplex_order3_factor_values(Real lambda, Real* SVMP_RESTRICT phi) { + const Real t = Real(3) * lambda; + phi[0] = Real(1); + phi[1] = t; + phi[2] = phi[1] * (t - Real(1)) * Real(0.5); + phi[3] = phi[2] * (t - Real(2)) / Real(3); +} + +void evaluate_tet_order3_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + Real* row4 = values_out + 4u * output_stride; + Real* row5 = values_out + 5u * output_stride; + Real* row6 = values_out + 6u * output_stride; + Real* row7 = values_out + 7u * output_stride; + Real* row8 = values_out + 8u * output_stride; + Real* row9 = values_out + 9u * output_stride; + Real* row10 = values_out + 10u * output_stride; + Real* row11 = values_out + 11u * output_stride; + Real* row12 = values_out + 12u * output_stride; + Real* row13 = values_out + 13u * output_stride; + Real* row14 = values_out + 14u * output_stride; + Real* row15 = values_out + 15u * output_stride; + Real* row16 = values_out + 16u * output_stride; + Real* row17 = values_out + 17u * output_stride; + Real* row18 = values_out + 18u * output_stride; + Real* row19 = values_out + 19u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + Real p0[4]; + Real p1[4]; + Real p2[4]; + Real p3[4]; + fill_simplex_order3_factor_values(l0, p0); + fill_simplex_order3_factor_values(l1, p1); + fill_simplex_order3_factor_values(l2, p2); + fill_simplex_order3_factor_values(l3, p3); + + row0[q] = p0[3]; + row1[q] = p1[3]; + row2[q] = p2[3]; + row3[q] = p3[3]; + row4[q] = p0[2] * p1[1]; + row5[q] = p0[1] * p1[2]; + row6[q] = p1[2] * p2[1]; + row7[q] = p1[1] * p2[2]; + row8[q] = p0[1] * p2[2]; + row9[q] = p0[2] * p2[1]; + row10[q] = p0[2] * p3[1]; + row11[q] = p0[1] * p3[2]; + row12[q] = p1[2] * p3[1]; + row13[q] = p1[1] * p3[2]; + row14[q] = p2[2] * p3[1]; + row15[q] = p2[1] * p3[2]; + row16[q] = p0[1] * p1[1] * p2[1]; + row17[q] = p0[1] * p1[1] * p3[1]; + row18[q] = p1[1] * p2[1] * p3[1]; + row19[q] = p0[1] * p2[1] * p3[1]; + } +} + +void evaluate_triangle_order3_gradients_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + Real* rows[10] = { + gradients_out + 0u * 3u * output_stride, + gradients_out + 1u * 3u * output_stride, + gradients_out + 2u * 3u * output_stride, + gradients_out + 3u * 3u * output_stride, + gradients_out + 4u * 3u * output_stride, + gradients_out + 5u * 3u * output_stride, + gradients_out + 6u * 3u * output_stride, + gradients_out + 7u * 3u * output_stride, + gradients_out + 8u * 3u * output_stride, + gradients_out + 9u * 3u * output_stride, + }; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + + const Real p10 = Real(3) * l0; + const Real p11 = Real(3) * l1; + const Real p12 = Real(3) * l2; + const Real p20 = Real(0.5) * p10 * (p10 - Real(1)); + const Real p21 = Real(0.5) * p11 * (p11 - Real(1)); + const Real p22 = Real(0.5) * p12 * (p12 - Real(1)); + const Real d10 = Real(3); + const Real d11 = Real(3); + const Real d12 = Real(3); + const Real d20 = Real(3) * p10 - Real(1.5); + const Real d21 = Real(3) * p11 - Real(1.5); + const Real d22 = Real(3) * p12 - Real(1.5); + const Real d30 = Real(1.5) * p10 * p10 - Real(3) * p10 + Real(1); + const Real d31 = Real(1.5) * p11 * p11 - Real(3) * p11 + Real(1); + const Real d32 = Real(1.5) * p12 * p12 - Real(3) * p12 + Real(1); + + const Real dl0[10] = { + d30, + Real(0), + Real(0), + d20 * p11, + d10 * p21, + Real(0), + Real(0), + d10 * p22, + d20 * p12, + d10 * p11 * p12, + }; + const Real dl1[10] = { + Real(0), + d31, + Real(0), + p20 * d11, + p10 * d21, + d21 * p12, + d11 * p22, + Real(0), + Real(0), + p10 * d11 * p12, + }; + const Real dl2[10] = { + Real(0), + Real(0), + d32, + Real(0), + Real(0), + p21 * d12, + p11 * d22, + p10 * d22, + p20 * d12, + p10 * p11 * d12, + }; + + for (std::size_t node = 0; node < 10u; ++node) { + Real* g = rows[node]; + g[0u * output_stride + q] = dl1[node] - dl0[node]; + g[1u * output_stride + q] = dl2[node] - dl0[node]; + g[2u * output_stride + q] = Real(0); + } + } +} + +void evaluate_hex_order1_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + Real* row4 = values_out + 4u * output_stride; + Real* row5 = values_out + 5u * output_stride; + Real* row6 = values_out + 6u * output_stride; + Real* row7 = values_out + 7u * output_stride; + + const auto write_q = [&](std::size_t q) { + const auto& xi = points[q]; + const Real lx = (Real(1) - xi[0]) * Real(0.5); + const Real ly = (Real(1) - xi[1]) * Real(0.5); + const Real lz = (Real(1) - xi[2]) * Real(0.5); + const Real ux = (Real(1) + xi[0]) * Real(0.5); + const Real uy = (Real(1) + xi[1]) * Real(0.5); + const Real uz = (Real(1) + xi[2]) * Real(0.5); + const Real lxly = lx * ly; + const Real uxly = ux * ly; + const Real uxuy = ux * uy; + const Real lxuy = lx * uy; + row0[q] = lxly * lz; + row1[q] = uxly * lz; + row2[q] = uxuy * lz; + row3[q] = lxuy * lz; + row4[q] = lxly * uz; + row5[q] = uxly * uz; + row6[q] = uxuy * uz; + row7[q] = lxuy * uz; + }; + if (points.size() == 4u) { + write_q(0u); + write_q(1u); + write_q(2u); + write_q(3u); + return; + } + + for (std::size_t q = 0; q < points.size(); ++q) { + write_q(q); + } +} + +template +void evaluate_hex_order1_outputs_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + constexpr Real half = Real(0.5); + constexpr std::array dx{{-half, half, half, -half, -half, half, half, -half}}; + constexpr std::array dy{{-half, -half, half, half, -half, -half, half, half}}; + constexpr std::array dz{{-half, -half, -half, -half, half, half, half, half}}; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real lx = (Real(1) - xi[0]) * half; + const Real ly = (Real(1) - xi[1]) * half; + const Real lz = (Real(1) - xi[2]) * half; + const Real ux = (Real(1) + xi[0]) * half; + const Real uy = (Real(1) + xi[1]) * half; + const Real uz = (Real(1) + xi[2]) * half; + const Real xval[8] = {lx, ux, ux, lx, lx, ux, ux, lx}; + const Real yval[8] = {ly, ly, uy, uy, ly, ly, uy, uy}; + const Real zval[8] = {lz, lz, lz, lz, uz, uz, uz, uz}; + + for (std::size_t node = 0; node < 8u; ++node) { + if constexpr (NeedValues) { + values_out[node * output_stride + q] = + xval[node] * yval[node] * zval[node]; + } + if constexpr (NeedGradients) { + Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; + g[0u * output_stride + q] = dx[node] * yval[node] * zval[node]; + g[1u * output_stride + q] = xval[node] * dy[node] * zval[node]; + g[2u * output_stride + q] = xval[node] * yval[node] * dz[node]; + } + if constexpr (NeedHessians) { + Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; + const Real hxy = dx[node] * dy[node] * zval[node]; + const Real hxz = dx[node] * yval[node] * dz[node]; + const Real hyz = xval[node] * dy[node] * dz[node]; + H[0u * output_stride + q] = Real(0); + H[1u * output_stride + q] = hxy; + H[2u * output_stride + q] = hxz; + H[3u * output_stride + q] = hxy; + H[4u * output_stride + q] = Real(0); + H[5u * output_stride + q] = hyz; + H[6u * output_stride + q] = hxz; + H[7u * output_stride + q] = hyz; + H[8u * output_stride + q] = Real(0); + } + } + } +} + +void evaluate_quad_order1_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + + if (points.size() == 4u && output_stride == 4u) { + Real lx[4]; + Real ux[4]; + Real ly[4]; + Real uy[4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + lx[q] = (Real(1) - xi[0]) * Real(0.5); + ux[q] = (Real(1) + xi[0]) * Real(0.5); + ly[q] = (Real(1) - xi[1]) * Real(0.5); + uy[q] = (Real(1) + xi[1]) * Real(0.5); + } + row0[0] = lx[0] * ly[0]; + row0[1] = lx[1] * ly[1]; + row0[2] = lx[2] * ly[2]; + row0[3] = lx[3] * ly[3]; + row1[0] = ux[0] * ly[0]; + row1[1] = ux[1] * ly[1]; + row1[2] = ux[2] * ly[2]; + row1[3] = ux[3] * ly[3]; + row2[0] = ux[0] * uy[0]; + row2[1] = ux[1] * uy[1]; + row2[2] = ux[2] * uy[2]; + row2[3] = ux[3] * uy[3]; + row3[0] = lx[0] * uy[0]; + row3[1] = lx[1] * uy[1]; + row3[2] = lx[2] * uy[2]; + row3[3] = lx[3] * uy[3]; + return; + } + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real lx = (Real(1) - xi[0]) * Real(0.5); + const Real ly = (Real(1) - xi[1]) * Real(0.5); + const Real ux = (Real(1) + xi[0]) * Real(0.5); + const Real uy = (Real(1) + xi[1]) * Real(0.5); + row0[q] = lx * ly; + row1[q] = ux * ly; + row2[q] = ux * uy; + row3[q] = lx * uy; + } +} + +void evaluate_quad_order1_gradients_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + Real* row0 = gradients_out + 0u * 3u * output_stride; + Real* row1 = gradients_out + 1u * 3u * output_stride; + Real* row2 = gradients_out + 2u * 3u * output_stride; + Real* row3 = gradients_out + 3u * 3u * output_stride; + + if (points.size() == 4u) { + Real lx[4]; + Real ly[4]; + Real ux[4]; + Real uy[4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + lx[q] = (Real(1) - xi[0]) * Real(0.5); + ly[q] = (Real(1) - xi[1]) * Real(0.5); + ux[q] = (Real(1) + xi[0]) * Real(0.5); + uy[q] = (Real(1) + xi[1]) * Real(0.5); + } + + auto write_component = [](Real* SVMP_RESTRICT row, + Real a0, + Real a1, + Real a2, + Real a3) { + row[0] = a0; + row[1] = a1; + row[2] = a2; + row[3] = a3; + }; + + write_component(row0, Real(-0.5) * ly[0], Real(-0.5) * ly[1], + Real(-0.5) * ly[2], Real(-0.5) * ly[3]); + write_component(row0 + output_stride, Real(-0.5) * lx[0], Real(-0.5) * lx[1], + Real(-0.5) * lx[2], Real(-0.5) * lx[3]); + write_component(row0 + 2u * output_stride, Real(0), Real(0), Real(0), Real(0)); + + write_component(row1, Real(0.5) * ly[0], Real(0.5) * ly[1], + Real(0.5) * ly[2], Real(0.5) * ly[3]); + write_component(row1 + output_stride, Real(-0.5) * ux[0], Real(-0.5) * ux[1], + Real(-0.5) * ux[2], Real(-0.5) * ux[3]); + write_component(row1 + 2u * output_stride, Real(0), Real(0), Real(0), Real(0)); + + write_component(row2, Real(0.5) * uy[0], Real(0.5) * uy[1], + Real(0.5) * uy[2], Real(0.5) * uy[3]); + write_component(row2 + output_stride, Real(0.5) * ux[0], Real(0.5) * ux[1], + Real(0.5) * ux[2], Real(0.5) * ux[3]); + write_component(row2 + 2u * output_stride, Real(0), Real(0), Real(0), Real(0)); + + write_component(row3, Real(-0.5) * uy[0], Real(-0.5) * uy[1], + Real(-0.5) * uy[2], Real(-0.5) * uy[3]); + write_component(row3 + output_stride, Real(0.5) * lx[0], Real(0.5) * lx[1], + Real(0.5) * lx[2], Real(0.5) * lx[3]); + write_component(row3 + 2u * output_stride, Real(0), Real(0), Real(0), Real(0)); + return; + } + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real lx = (Real(1) - xi[0]) * Real(0.5); + const Real ly = (Real(1) - xi[1]) * Real(0.5); + const Real ux = (Real(1) + xi[0]) * Real(0.5); + const Real uy = (Real(1) + xi[1]) * Real(0.5); + row0[0u * output_stride + q] = Real(-0.5) * ly; + row0[1u * output_stride + q] = Real(-0.5) * lx; + row0[2u * output_stride + q] = Real(0); + row1[0u * output_stride + q] = Real( 0.5) * ly; + row1[1u * output_stride + q] = Real(-0.5) * ux; + row1[2u * output_stride + q] = Real(0); + row2[0u * output_stride + q] = Real( 0.5) * uy; + row2[1u * output_stride + q] = Real( 0.5) * ux; + row2[2u * output_stride + q] = Real(0); + row3[0u * output_stride + q] = Real(-0.5) * uy; + row3[1u * output_stride + q] = Real( 0.5) * lx; + row3[2u * output_stride + q] = Real(0); + } +} + +inline void write_quad_order1_hessian_q( + Real* SVMP_RESTRICT row, + std::size_t output_stride, + std::size_t q, + Real xy) { + row[0u * output_stride + q] = Real(0); + row[1u * output_stride + q] = xy; + row[2u * output_stride + q] = Real(0); + row[3u * output_stride + q] = xy; + row[4u * output_stride + q] = Real(0); + row[5u * output_stride + q] = Real(0); + row[6u * output_stride + q] = Real(0); + row[7u * output_stride + q] = Real(0); + row[8u * output_stride + q] = Real(0); +} + +void evaluate_quad_order1_hessians_strided( + std::size_t num_qpts, + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + Real* row0 = hessians_out + 0u * 9u * output_stride; + Real* row1 = hessians_out + 1u * 9u * output_stride; + Real* row2 = hessians_out + 2u * 9u * output_stride; + Real* row3 = hessians_out + 3u * 9u * output_stride; + + constexpr Real positive = Real(0.25); + constexpr Real negative = Real(-0.25); + for (std::size_t q = 0; q < num_qpts; ++q) { + write_quad_order1_hessian_q(row0, output_stride, q, positive); + write_quad_order1_hessian_q(row1, output_stride, q, negative); + write_quad_order1_hessian_q(row2, output_stride, q, positive); + write_quad_order1_hessian_q(row3, output_stride, q, negative); + } +} + +template +inline void write_quad_order1_all_q4( + std::size_t output_stride, + std::size_t i, + std::size_t j, + const Real lx[4][2], + const Real ly[4][2], + Real* SVMP_RESTRICT value_row, + Real* SVMP_RESTRICT grad_row, + Real* SVMP_RESTRICT hess_row) { + const Real xv = lx[Q][i]; + const Real yv = ly[Q][j]; + const Real xd = (i == 0u) ? Real(-0.5) : Real(0.5); + const Real yd = (j == 0u) ? Real(-0.5) : Real(0.5); + const Real hxy = xd * yd; + + value_row[Q] = xv * yv; + grad_row[0u * output_stride + Q] = xd * yv; + grad_row[1u * output_stride + Q] = xv * yd; + grad_row[2u * output_stride + Q] = Real(0); + hess_row[0u * output_stride + Q] = Real(0); + hess_row[4u * output_stride + Q] = Real(0); + hess_row[8u * output_stride + Q] = Real(0); + hess_row[1u * output_stride + Q] = hxy; + hess_row[3u * output_stride + Q] = hxy; + hess_row[2u * output_stride + Q] = Real(0); + hess_row[6u * output_stride + Q] = Real(0); + hess_row[5u * output_stride + Q] = Real(0); + hess_row[7u * output_stride + Q] = Real(0); +} + +void evaluate_quad_order1_all_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + constexpr auto node_axes = detail::make_quad_tensor_node_axes<1>(); + + Real lx[4][2]; + Real ly[4][2]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + lx[q][0] = (Real(1) - xi[0]) * Real(0.5); + lx[q][1] = (Real(1) + xi[0]) * Real(0.5); + ly[q][0] = (Real(1) - xi[1]) * Real(0.5); + ly[q][1] = (Real(1) + xi[1]) * Real(0.5); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + const std::size_t i = axes[0]; + const std::size_t j = axes[1]; + Real* value_row = values_out + node * output_stride; + Real* grad_row = gradients_out + node * 3u * output_stride; + Real* hess_row = hessians_out + node * 9u * output_stride; + write_quad_order1_all_q4<0u>( + output_stride, i, j, lx, ly, value_row, grad_row, hess_row); + write_quad_order1_all_q4<1u>( + output_stride, i, j, lx, ly, value_row, grad_row, hess_row); + write_quad_order1_all_q4<2u>( + output_stride, i, j, lx, ly, value_row, grad_row, hess_row); + write_quad_order1_all_q4<3u>( + output_stride, i, j, lx, ly, value_row, grad_row, hess_row); + } +} + +void evaluate_quad_order2_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + Real* row4 = values_out + 4u * output_stride; + Real* row5 = values_out + 5u * output_stride; + Real* row6 = values_out + 6u * output_stride; + Real* row7 = values_out + 7u * output_stride; + Real* row8 = values_out + 8u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real x = xi[0]; + const Real y = xi[1]; + const Real x0 = x * (x - Real(1)) * Real(0.5); + const Real x1 = x * (x + Real(1)) * Real(0.5); + const Real x2 = Real(1) - x * x; + const Real y0 = y * (y - Real(1)) * Real(0.5); + const Real y1 = y * (y + Real(1)) * Real(0.5); + const Real y2 = Real(1) - y * y; + + row0[q] = x0 * y0; + row1[q] = x1 * y0; + row2[q] = x1 * y1; + row3[q] = x0 * y1; + row4[q] = x2 * y0; + row5[q] = x1 * y2; + row6[q] = x2 * y1; + row7[q] = x0 * y2; + row8[q] = x2 * y2; + } +} + +inline void write_quad_order2_gradient_q( + Real* SVMP_RESTRICT row, + std::size_t output_stride, + std::size_t q, + Real dx, + Real dy) { + row[0u * output_stride + q] = dx; + row[1u * output_stride + q] = dy; + row[2u * output_stride + q] = Real(0); +} + +void evaluate_quad_order2_gradients_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + if (points.size() == 4u) { + Real xv[4][3]; + Real yv[4][3]; + Real xd[4][3]; + Real yd[4][3]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real x = xi[0]; + const Real y = xi[1]; + xv[q][0] = x * (x - Real(1)) * Real(0.5); + xv[q][1] = x * (x + Real(1)) * Real(0.5); + xv[q][2] = Real(1) - x * x; + yv[q][0] = y * (y - Real(1)) * Real(0.5); + yv[q][1] = y * (y + Real(1)) * Real(0.5); + yv[q][2] = Real(1) - y * y; + xd[q][0] = x - Real(0.5); + xd[q][1] = x + Real(0.5); + xd[q][2] = Real(-2) * x; + yd[q][0] = y - Real(0.5); + yd[q][1] = y + Real(0.5); + yd[q][2] = Real(-2) * y; + } + + auto write_node = [&](std::size_t node, std::size_t i, std::size_t j) { + Real* SVMP_RESTRICT row = gradients_out + node * 3u * output_stride; + row[0u] = xd[0][i] * yv[0][j]; + row[1u] = xd[1][i] * yv[1][j]; + row[2u] = xd[2][i] * yv[2][j]; + row[3u] = xd[3][i] * yv[3][j]; + row[output_stride + 0u] = xv[0][i] * yd[0][j]; + row[output_stride + 1u] = xv[1][i] * yd[1][j]; + row[output_stride + 2u] = xv[2][i] * yd[2][j]; + row[output_stride + 3u] = xv[3][i] * yd[3][j]; + row[2u * output_stride + 0u] = Real(0); + row[2u * output_stride + 1u] = Real(0); + row[2u * output_stride + 2u] = Real(0); + row[2u * output_stride + 3u] = Real(0); + }; + + write_node(0u, 0u, 0u); + write_node(1u, 1u, 0u); + write_node(2u, 1u, 1u); + write_node(3u, 0u, 1u); + write_node(4u, 2u, 0u); + write_node(5u, 1u, 2u); + write_node(6u, 2u, 1u); + write_node(7u, 0u, 2u); + write_node(8u, 2u, 2u); + return; + } + + Real* row0 = gradients_out + 0u * 3u * output_stride; + Real* row1 = gradients_out + 1u * 3u * output_stride; + Real* row2 = gradients_out + 2u * 3u * output_stride; + Real* row3 = gradients_out + 3u * 3u * output_stride; + Real* row4 = gradients_out + 4u * 3u * output_stride; + Real* row5 = gradients_out + 5u * 3u * output_stride; + Real* row6 = gradients_out + 6u * 3u * output_stride; + Real* row7 = gradients_out + 7u * 3u * output_stride; + Real* row8 = gradients_out + 8u * 3u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real x = xi[0]; + const Real y = xi[1]; + const Real x0 = x * (x - Real(1)) * Real(0.5); + const Real x1 = x * (x + Real(1)) * Real(0.5); + const Real x2 = Real(1) - x * x; + const Real y0 = y * (y - Real(1)) * Real(0.5); + const Real y1 = y * (y + Real(1)) * Real(0.5); + const Real y2 = Real(1) - y * y; + const Real dx0 = x - Real(0.5); + const Real dx1 = x + Real(0.5); + const Real dx2 = Real(-2) * x; + const Real dy0 = y - Real(0.5); + const Real dy1 = y + Real(0.5); + const Real dy2 = Real(-2) * y; + + write_quad_order2_gradient_q(row0, output_stride, q, dx0 * y0, x0 * dy0); + write_quad_order2_gradient_q(row1, output_stride, q, dx1 * y0, x1 * dy0); + write_quad_order2_gradient_q(row2, output_stride, q, dx1 * y1, x1 * dy1); + write_quad_order2_gradient_q(row3, output_stride, q, dx0 * y1, x0 * dy1); + write_quad_order2_gradient_q(row4, output_stride, q, dx2 * y0, x2 * dy0); + write_quad_order2_gradient_q(row5, output_stride, q, dx1 * y2, x1 * dy2); + write_quad_order2_gradient_q(row6, output_stride, q, dx2 * y1, x2 * dy1); + write_quad_order2_gradient_q(row7, output_stride, q, dx0 * y2, x0 * dy2); + write_quad_order2_gradient_q(row8, output_stride, q, dx2 * y2, x2 * dy2); + } +} + +inline void write_quad_order2_hessian_q( + Real* SVMP_RESTRICT row, + std::size_t output_stride, + std::size_t q, + Real hxx, + Real hxy, + Real hyy) { + row[0u * output_stride + q] = hxx; + row[1u * output_stride + q] = hxy; + row[2u * output_stride + q] = Real(0); + row[3u * output_stride + q] = hxy; + row[4u * output_stride + q] = hyy; + row[5u * output_stride + q] = Real(0); + row[6u * output_stride + q] = Real(0); + row[7u * output_stride + q] = Real(0); + row[8u * output_stride + q] = Real(0); +} + +void evaluate_quad_order2_hessians_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + Real* row0 = hessians_out + 0u * 9u * output_stride; + Real* row1 = hessians_out + 1u * 9u * output_stride; + Real* row2 = hessians_out + 2u * 9u * output_stride; + Real* row3 = hessians_out + 3u * 9u * output_stride; + Real* row4 = hessians_out + 4u * 9u * output_stride; + Real* row5 = hessians_out + 5u * 9u * output_stride; + Real* row6 = hessians_out + 6u * 9u * output_stride; + Real* row7 = hessians_out + 7u * 9u * output_stride; + Real* row8 = hessians_out + 8u * 9u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real x = xi[0]; + const Real y = xi[1]; + const Real x0 = x * (x - Real(1)) * Real(0.5); + const Real x1 = x * (x + Real(1)) * Real(0.5); + const Real x2 = Real(1) - x * x; + const Real y0 = y * (y - Real(1)) * Real(0.5); + const Real y1 = y * (y + Real(1)) * Real(0.5); + const Real y2 = Real(1) - y * y; + const Real dx0 = x - Real(0.5); + const Real dx1 = x + Real(0.5); + const Real dx2 = Real(-2) * x; + const Real dy0 = y - Real(0.5); + const Real dy1 = y + Real(0.5); + const Real dy2 = Real(-2) * y; + + write_quad_order2_hessian_q(row0, output_stride, q, y0, dx0 * dy0, x0); + write_quad_order2_hessian_q(row1, output_stride, q, y0, dx1 * dy0, x1); + write_quad_order2_hessian_q(row2, output_stride, q, y1, dx1 * dy1, x1); + write_quad_order2_hessian_q(row3, output_stride, q, y1, dx0 * dy1, x0); + write_quad_order2_hessian_q(row4, output_stride, q, Real(-2) * y0, dx2 * dy0, x2); + write_quad_order2_hessian_q(row5, output_stride, q, y2, dx1 * dy2, Real(-2) * x1); + write_quad_order2_hessian_q(row6, output_stride, q, Real(-2) * y1, dx2 * dy1, x2); + write_quad_order2_hessian_q(row7, output_stride, q, y2, dx0 * dy2, Real(-2) * x0); + write_quad_order2_hessian_q(row8, output_stride, q, Real(-2) * y2, dx2 * dy2, Real(-2) * x2); + } +} + +inline void fill_order3_axis_values(Real x, Real* SVMP_RESTRICT values) { + const Real x2 = x * x; + values[0] = Real(-9.0 / 16.0) * (x - Real(1)) * (x2 - Real(1.0 / 9.0)); + values[1] = Real( 9.0 / 16.0) * (x + Real(1)) * (x2 - Real(1.0 / 9.0)); + values[2] = Real(27.0 / 16.0) * (x2 - Real(1)) * (x - Real(1.0 / 3.0)); + values[3] = Real(-27.0 / 16.0) * (x2 - Real(1)) * (x + Real(1.0 / 3.0)); +} + +inline void fill_order3_axis_value_scalars(Real x, + Real& v0, + Real& v1, + Real& v2, + Real& v3) { + const Real x2 = x * x; + v0 = Real(-9.0 / 16.0) * (x - Real(1)) * (x2 - Real(1.0 / 9.0)); + v1 = Real( 9.0 / 16.0) * (x + Real(1)) * (x2 - Real(1.0 / 9.0)); + v2 = Real(27.0 / 16.0) * (x2 - Real(1)) * (x - Real(1.0 / 3.0)); + v3 = Real(-27.0 / 16.0) * (x2 - Real(1)) * (x + Real(1.0 / 3.0)); +} + +void evaluate_line_order1_values_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + for (std::size_t q = 0; q < 4u; ++q) { + const Real x = points[q][0]; + row0[q] = (Real(1) - x) * Real(0.5); + row1[q] = (Real(1) + x) * Real(0.5); + } +} + +void evaluate_line_order2_values_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + for (std::size_t q = 0; q < 4u; ++q) { + const Real x = points[q][0]; + row0[q] = x * (x - Real(1)) * Real(0.5); + row1[q] = x * (x + Real(1)) * Real(0.5); + row2[q] = Real(1) - x * x; + } +} + +void evaluate_line_order3_values_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + for (std::size_t q = 0; q < 4u; ++q) { + Real values[4]; + fill_order3_axis_values(points[q][0], values); + row0[q] = values[0]; + row1[q] = values[1]; + row2[q] = values[2]; + row3[q] = values[3]; + } +} + +inline void fill_order3_axis_values_first(Real x, + Real* SVMP_RESTRICT values, + Real* SVMP_RESTRICT first); + +inline void fill_order3_axis_values_first_second(Real x, + Real* SVMP_RESTRICT values, + Real* SVMP_RESTRICT first, + Real* SVMP_RESTRICT second); + +inline void write_line_gradient_q4_row(Real* SVMP_RESTRICT row, + std::size_t output_stride, + Real g0, + Real g1, + Real g2, + Real g3) { + row[0] = g0; + row[1] = g1; + row[2] = g2; + row[3] = g3; + row[output_stride + 0u] = Real(0); + row[output_stride + 1u] = Real(0); + row[output_stride + 2u] = Real(0); + row[output_stride + 3u] = Real(0); + row[2u * output_stride + 0u] = Real(0); + row[2u * output_stride + 1u] = Real(0); + row[2u * output_stride + 2u] = Real(0); + row[2u * output_stride + 3u] = Real(0); +} + +inline void write_line_hessian_q4_row(Real* SVMP_RESTRICT row, + std::size_t output_stride, + Real h0, + Real h1, + Real h2, + Real h3) { + row[0] = h0; + row[1] = h1; + row[2] = h2; + row[3] = h3; + for (std::size_t component = 1u; component < 9u; ++component) { + Real* slot = row + component * output_stride; + slot[0] = Real(0); + slot[1] = Real(0); + slot[2] = Real(0); + slot[3] = Real(0); + } +} + +SVMP_LAGRANGE_NOINLINE void evaluate_line_order1_gradients_q4( + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + write_line_gradient_q4_row(gradients_out + 0u * 3u * output_stride, + output_stride, + Real(-0.5), Real(-0.5), Real(-0.5), Real(-0.5)); + write_line_gradient_q4_row(gradients_out + 1u * 3u * output_stride, + output_stride, + Real(0.5), Real(0.5), Real(0.5), Real(0.5)); +} + +SVMP_LAGRANGE_NOINLINE void evaluate_line_order1_hessians_q4( + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + write_line_hessian_q4_row(hessians_out + 0u * 9u * output_stride, + output_stride, Real(0), Real(0), Real(0), Real(0)); + write_line_hessian_q4_row(hessians_out + 1u * 9u * output_stride, + output_stride, Real(0), Real(0), Real(0), Real(0)); +} + +SVMP_LAGRANGE_NOINLINE void evaluate_line_order1_all_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + evaluate_line_order1_values_q4(points, output_stride, values_out); + evaluate_line_order1_gradients_q4(output_stride, gradients_out); + evaluate_line_order1_hessians_q4(output_stride, hessians_out); +} + +SVMP_LAGRANGE_NOINLINE void evaluate_line_order2_gradients_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + const Real x0 = points[0][0]; + const Real x1 = points[1][0]; + const Real x2 = points[2][0]; + const Real x3 = points[3][0]; + write_line_gradient_q4_row(gradients_out + 0u * 3u * output_stride, + output_stride, + x0 - Real(0.5), x1 - Real(0.5), + x2 - Real(0.5), x3 - Real(0.5)); + write_line_gradient_q4_row(gradients_out + 1u * 3u * output_stride, + output_stride, + x0 + Real(0.5), x1 + Real(0.5), + x2 + Real(0.5), x3 + Real(0.5)); + write_line_gradient_q4_row(gradients_out + 2u * 3u * output_stride, + output_stride, + Real(-2) * x0, Real(-2) * x1, + Real(-2) * x2, Real(-2) * x3); +} + +SVMP_LAGRANGE_NOINLINE void evaluate_line_order2_hessians_q4( + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + write_line_hessian_q4_row(hessians_out + 0u * 9u * output_stride, + output_stride, Real(1), Real(1), Real(1), Real(1)); + write_line_hessian_q4_row(hessians_out + 1u * 9u * output_stride, + output_stride, Real(1), Real(1), Real(1), Real(1)); + write_line_hessian_q4_row(hessians_out + 2u * 9u * output_stride, + output_stride, Real(-2), Real(-2), Real(-2), Real(-2)); +} + +SVMP_LAGRANGE_NOINLINE void evaluate_line_order2_all_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + evaluate_line_order2_values_q4(points, output_stride, values_out); + evaluate_line_order2_gradients_q4(points, output_stride, gradients_out); + evaluate_line_order2_hessians_q4(output_stride, hessians_out); +} + +SVMP_LAGRANGE_NOINLINE void evaluate_line_order3_gradients_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + Real first[4][4]; + for (std::size_t q = 0; q < 4u; ++q) { + Real values[4]; + fill_order3_axis_values_first(points[q][0], values, first[q]); + } + for (std::size_t node = 0; node < 4u; ++node) { + write_line_gradient_q4_row(gradients_out + node * 3u * output_stride, + output_stride, + first[0][node], first[1][node], + first[2][node], first[3][node]); + } +} + +SVMP_LAGRANGE_NOINLINE void evaluate_line_order3_hessians_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + Real second[4][4]; + for (std::size_t q = 0; q < 4u; ++q) { + Real values[4]; + Real first[4]; + fill_order3_axis_values_first_second(points[q][0], values, first, second[q]); + } + for (std::size_t node = 0; node < 4u; ++node) { + write_line_hessian_q4_row(hessians_out + node * 9u * output_stride, + output_stride, + second[0][node], second[1][node], + second[2][node], second[3][node]); + } +} + +SVMP_LAGRANGE_NOINLINE void evaluate_line_order3_all_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + Real values[4][4]; + Real first[4][4]; + Real second[4][4]; + for (std::size_t q = 0; q < 4u; ++q) { + fill_order3_axis_values_first_second(points[q][0], values[q], first[q], second[q]); + } + for (std::size_t node = 0; node < 4u; ++node) { + Real* value_row = values_out + node * output_stride; + value_row[0] = values[0][node]; + value_row[1] = values[1][node]; + value_row[2] = values[2][node]; + value_row[3] = values[3][node]; + write_line_gradient_q4_row(gradients_out + node * 3u * output_stride, + output_stride, + first[0][node], first[1][node], + first[2][node], first[3][node]); + write_line_hessian_q4_row(hessians_out + node * 9u * output_stride, + output_stride, + second[0][node], second[1][node], + second[2][node], second[3][node]); + } +} + +inline void fill_order3_axis_values_first(Real x, + Real* SVMP_RESTRICT values, + Real* SVMP_RESTRICT first) { + fill_order3_axis_values(x, values); + const Real x2 = x * x; + first[0] = Real(-9.0 / 16.0) * (Real(3) * x2 - Real(2) * x - Real(1.0 / 9.0)); + first[1] = Real( 9.0 / 16.0) * (Real(3) * x2 + Real(2) * x - Real(1.0 / 9.0)); + first[2] = Real(27.0 / 16.0) * (Real(3) * x2 - Real(2.0 / 3.0) * x - Real(1)); + first[3] = Real(-27.0 / 16.0) * (Real(3) * x2 + Real(2.0 / 3.0) * x - Real(1)); +} + +inline void fill_order3_axis_values_first_second(Real x, + Real* SVMP_RESTRICT values, + Real* SVMP_RESTRICT first, + Real* SVMP_RESTRICT second) { + fill_order3_axis_values_first(x, values, first); + second[0] = Real(-9.0 / 16.0) * (Real(6) * x - Real(2)); + second[1] = Real( 9.0 / 16.0) * (Real(6) * x + Real(2)); + second[2] = Real(27.0 / 16.0) * (Real(6) * x - Real(2.0 / 3.0)); + second[3] = Real(-27.0 / 16.0) * (Real(6) * x + Real(2.0 / 3.0)); +} + +inline void write_quad_order3_value_row_q4(Real* SVMP_RESTRICT row, + const Real lx[4][4], + const Real ly[4][4], + std::size_t i, + std::size_t j) { + row[0] = lx[0][i] * ly[0][j]; + row[1] = lx[1][i] * ly[1][j]; + row[2] = lx[2][i] * ly[2][j]; + row[3] = lx[3][i] * ly[3][j]; +} + +void evaluate_quad_order3_values_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + if (output_stride == 4u) { + Real* row0 = values_out + 0u * 4u; + Real* row1 = values_out + 1u * 4u; + Real* row2 = values_out + 2u * 4u; + Real* row3 = values_out + 3u * 4u; + Real* row4 = values_out + 4u * 4u; + Real* row5 = values_out + 5u * 4u; + Real* row6 = values_out + 6u * 4u; + Real* row7 = values_out + 7u * 4u; + Real* row8 = values_out + 8u * 4u; + Real* row9 = values_out + 9u * 4u; + Real* row10 = values_out + 10u * 4u; + Real* row11 = values_out + 11u * 4u; + Real* row12 = values_out + 12u * 4u; + Real* row13 = values_out + 13u * 4u; + Real* row14 = values_out + 14u * 4u; + Real* row15 = values_out + 15u * 4u; + + auto write_q = [&](std::size_t q) { + const auto& xi = points[q]; + Real x0; + Real x1; + Real x2; + Real x3; + Real y0; + Real y1; + Real y2; + Real y3; + fill_order3_axis_value_scalars(xi[0], x0, x1, x2, x3); + fill_order3_axis_value_scalars(xi[1], y0, y1, y2, y3); + row0[q] = x0 * y0; + row1[q] = x1 * y0; + row2[q] = x1 * y1; + row3[q] = x0 * y1; + row4[q] = x2 * y0; + row5[q] = x3 * y0; + row6[q] = x1 * y2; + row7[q] = x1 * y3; + row8[q] = x3 * y1; + row9[q] = x2 * y1; + row10[q] = x0 * y3; + row11[q] = x0 * y2; + row12[q] = x2 * y2; + row13[q] = x3 * y2; + row14[q] = x2 * y3; + row15[q] = x3 * y3; + }; + + write_q(0u); + write_q(1u); + write_q(2u); + write_q(3u); + return; + } + + Real lx[4][4]; + Real ly[4][4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order3_axis_values(xi[0], lx[q]); + fill_order3_axis_values(xi[1], ly[q]); + } + + write_quad_order3_value_row_q4(values_out + 0u * output_stride, lx, ly, 0u, 0u); + write_quad_order3_value_row_q4(values_out + 1u * output_stride, lx, ly, 1u, 0u); + write_quad_order3_value_row_q4(values_out + 2u * output_stride, lx, ly, 1u, 1u); + write_quad_order3_value_row_q4(values_out + 3u * output_stride, lx, ly, 0u, 1u); + write_quad_order3_value_row_q4(values_out + 4u * output_stride, lx, ly, 2u, 0u); + write_quad_order3_value_row_q4(values_out + 5u * output_stride, lx, ly, 3u, 0u); + write_quad_order3_value_row_q4(values_out + 6u * output_stride, lx, ly, 1u, 2u); + write_quad_order3_value_row_q4(values_out + 7u * output_stride, lx, ly, 1u, 3u); + write_quad_order3_value_row_q4(values_out + 8u * output_stride, lx, ly, 3u, 1u); + write_quad_order3_value_row_q4(values_out + 9u * output_stride, lx, ly, 2u, 1u); + write_quad_order3_value_row_q4(values_out + 10u * output_stride, lx, ly, 0u, 3u); + write_quad_order3_value_row_q4(values_out + 11u * output_stride, lx, ly, 0u, 2u); + write_quad_order3_value_row_q4(values_out + 12u * output_stride, lx, ly, 2u, 2u); + write_quad_order3_value_row_q4(values_out + 13u * output_stride, lx, ly, 3u, 2u); + write_quad_order3_value_row_q4(values_out + 14u * output_stride, lx, ly, 2u, 3u); + write_quad_order3_value_row_q4(values_out + 15u * output_stride, lx, ly, 3u, 3u); +} + +void evaluate_quad_order3_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + if (points.size() == 4u) { + evaluate_quad_order3_values_q4(points, output_stride, values_out); + return; + } + + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + Real* row4 = values_out + 4u * output_stride; + Real* row5 = values_out + 5u * output_stride; + Real* row6 = values_out + 6u * output_stride; + Real* row7 = values_out + 7u * output_stride; + Real* row8 = values_out + 8u * output_stride; + Real* row9 = values_out + 9u * output_stride; + Real* row10 = values_out + 10u * output_stride; + Real* row11 = values_out + 11u * output_stride; + Real* row12 = values_out + 12u * output_stride; + Real* row13 = values_out + 13u * output_stride; + Real* row14 = values_out + 14u * output_stride; + Real* row15 = values_out + 15u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + Real lx[4]; + Real ly[4]; + fill_order3_axis_values(xi[0], lx); + fill_order3_axis_values(xi[1], ly); + row0[q] = lx[0] * ly[0]; + row1[q] = lx[1] * ly[0]; + row2[q] = lx[1] * ly[1]; + row3[q] = lx[0] * ly[1]; + row4[q] = lx[2] * ly[0]; + row5[q] = lx[3] * ly[0]; + row6[q] = lx[1] * ly[2]; + row7[q] = lx[1] * ly[3]; + row8[q] = lx[3] * ly[1]; + row9[q] = lx[2] * ly[1]; + row10[q] = lx[0] * ly[3]; + row11[q] = lx[0] * ly[2]; + row12[q] = lx[2] * ly[2]; + row13[q] = lx[3] * ly[2]; + row14[q] = lx[2] * ly[3]; + row15[q] = lx[3] * ly[3]; + } +} + +template +inline void write_quad_gradient_row_q4( + Real* SVMP_RESTRICT row, + std::size_t output_stride, + const Real (&lx)[4][N], + const Real (&ly)[4][N], + const Real (&dx)[4][N], + const Real (&dy)[4][N], + std::size_t i, + std::size_t j) { + row[0u] = dx[0][i] * ly[0][j]; + row[1u] = dx[1][i] * ly[1][j]; + row[2u] = dx[2][i] * ly[2][j]; + row[3u] = dx[3][i] * ly[3][j]; + row[output_stride + 0u] = lx[0][i] * dy[0][j]; + row[output_stride + 1u] = lx[1][i] * dy[1][j]; + row[output_stride + 2u] = lx[2][i] * dy[2][j]; + row[output_stride + 3u] = lx[3][i] * dy[3][j]; + row[2u * output_stride + 0u] = Real(0); + row[2u * output_stride + 1u] = Real(0); + row[2u * output_stride + 2u] = Real(0); + row[2u * output_stride + 3u] = Real(0); +} + +inline void fill_order4_axis_values_first(Real x, + Real* SVMP_RESTRICT values, + Real* SVMP_RESTRICT first) { + const Real r = (x + Real(1)) * Real(2); + const Real r2 = r * r; + const Real r3 = r2 * r; + const Real f0 = r; + const Real f1 = r - Real(1); + const Real f2 = r - Real(2); + const Real f3 = r - Real(3); + const Real f4 = r - Real(4); + const Real f01 = f0 * f1; + const Real f12 = f1 * f2; + const Real f23 = f2 * f3; + const Real f34 = f3 * f4; + + values[0] = (f12 * f34) / Real(24); + values[1] = (f01 * f23) / Real(24); + values[2] = -(f0 * f2 * f34) / Real(6); + values[3] = (f01 * f34) / Real(4); + values[4] = -(f01 * f2 * f4) / Real(6); + + first[0] = (Real(4) * r3 - Real(30) * r2 + Real(70) * r - Real(50)) / Real(12); + first[1] = (Real(4) * r3 - Real(18) * r2 + Real(22) * r - Real(6)) / Real(12); + first[2] = (-Real(4) * r3 + Real(27) * r2 - Real(52) * r + Real(24)) / Real(3); + first[3] = Real(2) * r3 - Real(12) * r2 + Real(19) * r - Real(6); + first[4] = (-Real(4) * r3 + Real(21) * r2 - Real(28) * r + Real(8)) / Real(3); +} + +SVMP_LAGRANGE_NOINLINE void evaluate_quad_order3_gradients_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + constexpr auto node_axes = detail::make_quad_tensor_node_axes<3>(); + + Real lx[4][4]; + Real ly[4][4]; + Real dx[4][4]; + Real dy[4][4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order3_axis_values_first(xi[0], lx[q], dx[q]); + fill_order3_axis_values_first(xi[1], ly[q], dy[q]); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + write_quad_gradient_row_q4( + gradients_out + node * 3u * output_stride, + output_stride, + lx, + ly, + dx, + dy, + axes[0], + axes[1]); + } +} + +SVMP_LAGRANGE_NOINLINE void evaluate_quad_order4_gradients_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + constexpr auto node_axes = detail::make_quad_tensor_node_axes<4>(); + + Real lx[4][5]; + Real ly[4][5]; + Real dx[4][5]; + Real dy[4][5]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order4_axis_values_first(xi[0], lx[q], dx[q]); + fill_order4_axis_values_first(xi[1], ly[q], dy[q]); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + write_quad_gradient_row_q4( + gradients_out + node * 3u * output_stride, + output_stride, + lx, + ly, + dx, + dy, + axes[0], + axes[1]); + } +} + +void evaluate_quad_order3_gradients_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + if (points.size() == 4u) { + evaluate_quad_order3_gradients_q4(points, output_stride, gradients_out); + return; + } + + Real* row0 = gradients_out + 0u * 3u * output_stride; + Real* row1 = gradients_out + 1u * 3u * output_stride; + Real* row2 = gradients_out + 2u * 3u * output_stride; + Real* row3 = gradients_out + 3u * 3u * output_stride; + Real* row4 = gradients_out + 4u * 3u * output_stride; + Real* row5 = gradients_out + 5u * 3u * output_stride; + Real* row6 = gradients_out + 6u * 3u * output_stride; + Real* row7 = gradients_out + 7u * 3u * output_stride; + Real* row8 = gradients_out + 8u * 3u * output_stride; + Real* row9 = gradients_out + 9u * 3u * output_stride; + Real* row10 = gradients_out + 10u * 3u * output_stride; + Real* row11 = gradients_out + 11u * 3u * output_stride; + Real* row12 = gradients_out + 12u * 3u * output_stride; + Real* row13 = gradients_out + 13u * 3u * output_stride; + Real* row14 = gradients_out + 14u * 3u * output_stride; + Real* row15 = gradients_out + 15u * 3u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + Real lx[4]; + Real ly[4]; + Real dx[4]; + Real dy[4]; + fill_order3_axis_values_first(xi[0], lx, dx); + fill_order3_axis_values_first(xi[1], ly, dy); + write_quad_order2_gradient_q(row0, output_stride, q, dx[0] * ly[0], lx[0] * dy[0]); + write_quad_order2_gradient_q(row1, output_stride, q, dx[1] * ly[0], lx[1] * dy[0]); + write_quad_order2_gradient_q(row2, output_stride, q, dx[1] * ly[1], lx[1] * dy[1]); + write_quad_order2_gradient_q(row3, output_stride, q, dx[0] * ly[1], lx[0] * dy[1]); + write_quad_order2_gradient_q(row4, output_stride, q, dx[2] * ly[0], lx[2] * dy[0]); + write_quad_order2_gradient_q(row5, output_stride, q, dx[3] * ly[0], lx[3] * dy[0]); + write_quad_order2_gradient_q(row6, output_stride, q, dx[1] * ly[2], lx[1] * dy[2]); + write_quad_order2_gradient_q(row7, output_stride, q, dx[1] * ly[3], lx[1] * dy[3]); + write_quad_order2_gradient_q(row8, output_stride, q, dx[3] * ly[1], lx[3] * dy[1]); + write_quad_order2_gradient_q(row9, output_stride, q, dx[2] * ly[1], lx[2] * dy[1]); + write_quad_order2_gradient_q(row10, output_stride, q, dx[0] * ly[3], lx[0] * dy[3]); + write_quad_order2_gradient_q(row11, output_stride, q, dx[0] * ly[2], lx[0] * dy[2]); + write_quad_order2_gradient_q(row12, output_stride, q, dx[2] * ly[2], lx[2] * dy[2]); + write_quad_order2_gradient_q(row13, output_stride, q, dx[3] * ly[2], lx[3] * dy[2]); + write_quad_order2_gradient_q(row14, output_stride, q, dx[2] * ly[3], lx[2] * dy[3]); + write_quad_order2_gradient_q(row15, output_stride, q, dx[3] * ly[3], lx[3] * dy[3]); + } +} + +void evaluate_quad_order3_hessians_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + Real* row0 = hessians_out + 0u * 9u * output_stride; + Real* row1 = hessians_out + 1u * 9u * output_stride; + Real* row2 = hessians_out + 2u * 9u * output_stride; + Real* row3 = hessians_out + 3u * 9u * output_stride; + Real* row4 = hessians_out + 4u * 9u * output_stride; + Real* row5 = hessians_out + 5u * 9u * output_stride; + Real* row6 = hessians_out + 6u * 9u * output_stride; + Real* row7 = hessians_out + 7u * 9u * output_stride; + Real* row8 = hessians_out + 8u * 9u * output_stride; + Real* row9 = hessians_out + 9u * 9u * output_stride; + Real* row10 = hessians_out + 10u * 9u * output_stride; + Real* row11 = hessians_out + 11u * 9u * output_stride; + Real* row12 = hessians_out + 12u * 9u * output_stride; + Real* row13 = hessians_out + 13u * 9u * output_stride; + Real* row14 = hessians_out + 14u * 9u * output_stride; + Real* row15 = hessians_out + 15u * 9u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + Real lx[4]; + Real ly[4]; + Real dx[4]; + Real dy[4]; + Real hx[4]; + Real hy[4]; + fill_order3_axis_values_first_second(xi[0], lx, dx, hx); + fill_order3_axis_values_first_second(xi[1], ly, dy, hy); + write_quad_order2_hessian_q(row0, output_stride, q, hx[0] * ly[0], dx[0] * dy[0], lx[0] * hy[0]); + write_quad_order2_hessian_q(row1, output_stride, q, hx[1] * ly[0], dx[1] * dy[0], lx[1] * hy[0]); + write_quad_order2_hessian_q(row2, output_stride, q, hx[1] * ly[1], dx[1] * dy[1], lx[1] * hy[1]); + write_quad_order2_hessian_q(row3, output_stride, q, hx[0] * ly[1], dx[0] * dy[1], lx[0] * hy[1]); + write_quad_order2_hessian_q(row4, output_stride, q, hx[2] * ly[0], dx[2] * dy[0], lx[2] * hy[0]); + write_quad_order2_hessian_q(row5, output_stride, q, hx[3] * ly[0], dx[3] * dy[0], lx[3] * hy[0]); + write_quad_order2_hessian_q(row6, output_stride, q, hx[1] * ly[2], dx[1] * dy[2], lx[1] * hy[2]); + write_quad_order2_hessian_q(row7, output_stride, q, hx[1] * ly[3], dx[1] * dy[3], lx[1] * hy[3]); + write_quad_order2_hessian_q(row8, output_stride, q, hx[3] * ly[1], dx[3] * dy[1], lx[3] * hy[1]); + write_quad_order2_hessian_q(row9, output_stride, q, hx[2] * ly[1], dx[2] * dy[1], lx[2] * hy[1]); + write_quad_order2_hessian_q(row10, output_stride, q, hx[0] * ly[3], dx[0] * dy[3], lx[0] * hy[3]); + write_quad_order2_hessian_q(row11, output_stride, q, hx[0] * ly[2], dx[0] * dy[2], lx[0] * hy[2]); + write_quad_order2_hessian_q(row12, output_stride, q, hx[2] * ly[2], dx[2] * dy[2], lx[2] * hy[2]); + write_quad_order2_hessian_q(row13, output_stride, q, hx[3] * ly[2], dx[3] * dy[2], lx[3] * hy[2]); + write_quad_order2_hessian_q(row14, output_stride, q, hx[2] * ly[3], dx[2] * dy[3], lx[2] * hy[3]); + write_quad_order2_hessian_q(row15, output_stride, q, hx[3] * ly[3], dx[3] * dy[3], lx[3] * hy[3]); + } +} + +template +inline void write_quad_order3_all_q4( + std::size_t output_stride, + std::size_t i, + std::size_t j, + const Real lx[4][4], + const Real ly[4][4], + const Real dx[4][4], + const Real dy[4][4], + const Real hx[4][4], + const Real hy[4][4], + Real* SVMP_RESTRICT value_row, + Real* SVMP_RESTRICT grad_row, + Real* SVMP_RESTRICT hess_row) { + const Real xv = lx[Q][i]; + const Real yv = ly[Q][j]; + const Real xd = dx[Q][i]; + const Real yd = dy[Q][j]; + const Real hxy = xd * yd; + + value_row[Q] = xv * yv; + grad_row[0u * output_stride + Q] = xd * yv; + grad_row[1u * output_stride + Q] = xv * yd; + grad_row[2u * output_stride + Q] = Real(0); + hess_row[0u * output_stride + Q] = hx[Q][i] * yv; + hess_row[4u * output_stride + Q] = xv * hy[Q][j]; + hess_row[8u * output_stride + Q] = Real(0); + hess_row[1u * output_stride + Q] = hxy; + hess_row[3u * output_stride + Q] = hxy; + hess_row[2u * output_stride + Q] = Real(0); + hess_row[6u * output_stride + Q] = Real(0); + hess_row[5u * output_stride + Q] = Real(0); + hess_row[7u * output_stride + Q] = Real(0); +} + +void evaluate_quad_order3_all_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + constexpr auto node_axes = detail::make_quad_tensor_node_axes<3>(); + + Real lx[4][4]; + Real ly[4][4]; + Real dx[4][4]; + Real dy[4][4]; + Real hx[4][4]; + Real hy[4][4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order3_axis_values_first_second(xi[0], lx[q], dx[q], hx[q]); + fill_order3_axis_values_first_second(xi[1], ly[q], dy[q], hy[q]); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + const std::size_t i = axes[0]; + const std::size_t j = axes[1]; + Real* value_row = values_out + node * output_stride; + Real* grad_row = gradients_out + node * 3u * output_stride; + Real* hess_row = hessians_out + node * 9u * output_stride; + write_quad_order3_all_q4<0u>( + output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); + write_quad_order3_all_q4<1u>( + output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); + write_quad_order3_all_q4<2u>( + output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); + write_quad_order3_all_q4<3u>( + output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); + } +} + +void evaluate_hex_order3_values_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + constexpr auto node_axes = detail::make_hex_tensor_node_axes<3>(); + + Real lx[4][4]; + Real ly[4][4]; + Real lz[4][4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order3_axis_values(xi[0], lx[q]); + fill_order3_axis_values(xi[1], ly[q]); + fill_order3_axis_values(xi[2], lz[q]); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + const std::size_t i = axes[0]; + const std::size_t j = axes[1]; + const std::size_t k = axes[2]; + Real* row = values_out + node * output_stride; + row[0] = lx[0][i] * ly[0][j] * lz[0][k]; + row[1] = lx[1][i] * ly[1][j] * lz[1][k]; + row[2] = lx[2][i] * ly[2][j] * lz[2][k]; + row[3] = lx[3][i] * ly[3][j] * lz[3][k]; + } +} + +void evaluate_hex_order3_gradients_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + constexpr auto node_axes = detail::make_hex_tensor_node_axes<3>(); + + Real lx[4][4]; + Real ly[4][4]; + Real lz[4][4]; + Real dx[4][4]; + Real dy[4][4]; + Real dz[4][4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order3_axis_values_first(xi[0], lx[q], dx[q]); + fill_order3_axis_values_first(xi[1], ly[q], dy[q]); + fill_order3_axis_values_first(xi[2], lz[q], dz[q]); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + const std::size_t i = axes[0]; + const std::size_t j = axes[1]; + const std::size_t k = axes[2]; + Real* row = gradients_out + node * 3u * output_stride; + row[0] = dx[0][i] * ly[0][j] * lz[0][k]; + row[1] = dx[1][i] * ly[1][j] * lz[1][k]; + row[2] = dx[2][i] * ly[2][j] * lz[2][k]; + row[3] = dx[3][i] * ly[3][j] * lz[3][k]; + row[output_stride + 0u] = lx[0][i] * dy[0][j] * lz[0][k]; + row[output_stride + 1u] = lx[1][i] * dy[1][j] * lz[1][k]; + row[output_stride + 2u] = lx[2][i] * dy[2][j] * lz[2][k]; + row[output_stride + 3u] = lx[3][i] * dy[3][j] * lz[3][k]; + row[2u * output_stride + 0u] = lx[0][i] * ly[0][j] * dz[0][k]; + row[2u * output_stride + 1u] = lx[1][i] * ly[1][j] * dz[1][k]; + row[2u * output_stride + 2u] = lx[2][i] * ly[2][j] * dz[2][k]; + row[2u * output_stride + 3u] = lx[3][i] * ly[3][j] * dz[3][k]; + } +} + +template +inline void write_hex_order3_q4_hessian_outputs( + std::size_t output_stride, + std::size_t i, + std::size_t j, + std::size_t k, + const Real lx[4][4], + const Real ly[4][4], + const Real lz[4][4], + const Real dx[4][4], + const Real dy[4][4], + const Real dz[4][4], + const Real hx[4][4], + const Real hy[4][4], + const Real hz[4][4], + Real* SVMP_RESTRICT value_row, + Real* SVMP_RESTRICT grad_row, + Real* SVMP_RESTRICT hess_row) { + const Real xv = lx[Q][i]; + const Real yv = ly[Q][j]; + const Real zv = lz[Q][k]; + const Real yz = yv * zv; + + if constexpr (WriteValue) { + value_row[Q] = xv * yz; + } + + const Real xd = dx[Q][i]; + const Real yd = dy[Q][j]; + const Real zd = dz[Q][k]; + const Real yd_z = yd * zv; + const Real yv_zd = yv * zd; + + if constexpr (WriteGradient) { + grad_row[0u * output_stride + Q] = xd * yz; + grad_row[1u * output_stride + Q] = xv * yd_z; + grad_row[2u * output_stride + Q] = xv * yv_zd; + } + + const Real hxy = xd * yd_z; + const Real hxz = xd * yv_zd; + const Real hyz = xv * yd * zd; + hess_row[0u * output_stride + Q] = hx[Q][i] * yz; + hess_row[4u * output_stride + Q] = xv * hy[Q][j] * zv; + hess_row[8u * output_stride + Q] = xv * yv * hz[Q][k]; + hess_row[1u * output_stride + Q] = hxy; + hess_row[3u * output_stride + Q] = hxy; + hess_row[2u * output_stride + Q] = hxz; + hess_row[6u * output_stride + Q] = hxz; + hess_row[5u * output_stride + Q] = hyz; + hess_row[7u * output_stride + Q] = hyz; +} + +template +void evaluate_hex_order3_q4_hessian_outputs( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + constexpr auto node_axes = detail::make_hex_tensor_node_axes<3>(); + + Real lx[4][4]; + Real ly[4][4]; + Real lz[4][4]; + Real dx[4][4]; + Real dy[4][4]; + Real dz[4][4]; + Real hx[4][4]; + Real hy[4][4]; + Real hz[4][4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order3_axis_values_first_second(xi[0], lx[q], dx[q], hx[q]); + fill_order3_axis_values_first_second(xi[1], ly[q], dy[q], hy[q]); + fill_order3_axis_values_first_second(xi[2], lz[q], dz[q], hz[q]); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + const std::size_t i = axes[0]; + const std::size_t j = axes[1]; + const std::size_t k = axes[2]; + Real* value_row = values_out ? values_out + node * output_stride : nullptr; + Real* grad_row = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; + Real* hess_row = hessians_out + node * 9u * output_stride; + write_hex_order3_q4_hessian_outputs<0u, WriteValue, WriteGradient>( + output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, + value_row, grad_row, hess_row); + write_hex_order3_q4_hessian_outputs<1u, WriteValue, WriteGradient>( + output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, + value_row, grad_row, hess_row); + write_hex_order3_q4_hessian_outputs<2u, WriteValue, WriteGradient>( + output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, + value_row, grad_row, hess_row); + write_hex_order3_q4_hessian_outputs<3u, WriteValue, WriteGradient>( + output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, + value_row, grad_row, hess_row); + } +} + +void evaluate_hex_order3_hessians_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + evaluate_hex_order3_q4_hessian_outputs( + points, output_stride, nullptr, nullptr, hessians_out); +} + +void evaluate_hex_order3_all_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + evaluate_hex_order3_q4_hessian_outputs( + points, output_stride, values_out, gradients_out, hessians_out); +} + +void evaluate_hex_order2_values_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + Real* row4 = values_out + 4u * output_stride; + Real* row5 = values_out + 5u * output_stride; + Real* row6 = values_out + 6u * output_stride; + Real* row7 = values_out + 7u * output_stride; + Real* row8 = values_out + 8u * output_stride; + Real* row9 = values_out + 9u * output_stride; + Real* row10 = values_out + 10u * output_stride; + Real* row11 = values_out + 11u * output_stride; + Real* row12 = values_out + 12u * output_stride; + Real* row13 = values_out + 13u * output_stride; + Real* row14 = values_out + 14u * output_stride; + Real* row15 = values_out + 15u * output_stride; + Real* row16 = values_out + 16u * output_stride; + Real* row17 = values_out + 17u * output_stride; + Real* row18 = values_out + 18u * output_stride; + Real* row19 = values_out + 19u * output_stride; + Real* row20 = values_out + 20u * output_stride; + Real* row21 = values_out + 21u * output_stride; + Real* row22 = values_out + 22u * output_stride; + Real* row23 = values_out + 23u * output_stride; + Real* row24 = values_out + 24u * output_stride; + Real* row25 = values_out + 25u * output_stride; + Real* row26 = values_out + 26u * output_stride; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real x = xi[0]; + const Real y = xi[1]; + const Real z = xi[2]; + const Real x0 = x * (x - Real(1)) * Real(0.5); + const Real x1 = x * (x + Real(1)) * Real(0.5); + const Real x2 = Real(1) - x * x; + const Real y0 = y * (y - Real(1)) * Real(0.5); + const Real y1 = y * (y + Real(1)) * Real(0.5); + const Real y2 = Real(1) - y * y; + const Real z0 = z * (z - Real(1)) * Real(0.5); + const Real z1 = z * (z + Real(1)) * Real(0.5); + const Real z2 = Real(1) - z * z; + const Real x0y0 = x0 * y0; + const Real x1y0 = x1 * y0; + const Real x1y1 = x1 * y1; + const Real x0y1 = x0 * y1; + const Real x2y0 = x2 * y0; + const Real x1y2 = x1 * y2; + const Real x2y1 = x2 * y1; + const Real x0y2 = x0 * y2; + const Real x2y2 = x2 * y2; + + row0[q] = x0y0 * z0; + row1[q] = x1y0 * z0; + row2[q] = x1y1 * z0; + row3[q] = x0y1 * z0; + row4[q] = x0y0 * z1; + row5[q] = x1y0 * z1; + row6[q] = x1y1 * z1; + row7[q] = x0y1 * z1; + row8[q] = x2y0 * z0; + row9[q] = x1y2 * z0; + row10[q] = x2y1 * z0; + row11[q] = x0y2 * z0; + row12[q] = x2y0 * z1; + row13[q] = x1y2 * z1; + row14[q] = x2y1 * z1; + row15[q] = x0y2 * z1; + row16[q] = x0y0 * z2; + row17[q] = x1y0 * z2; + row18[q] = x1y1 * z2; + row19[q] = x0y1 * z2; + row20[q] = x2y2 * z0; + row21[q] = x2y2 * z1; + row22[q] = x2y0 * z2; + row23[q] = x1y2 * z2; + row24[q] = x2y1 * z2; + row25[q] = x0y2 * z2; + row26[q] = x2y2 * z2; + } +} + +inline void fill_order2_axis_values_first(Real x, + Real* SVMP_RESTRICT values, + Real* SVMP_RESTRICT first) { + values[0] = x * (x - Real(1)) * Real(0.5); + values[1] = x * (x + Real(1)) * Real(0.5); + values[2] = Real(1) - x * x; + first[0] = x - Real(0.5); + first[1] = x + Real(0.5); + first[2] = Real(-2) * x; +} + +inline void fill_order2_axis_values_first_second(Real x, + Real* SVMP_RESTRICT values, + Real* SVMP_RESTRICT first, + Real* SVMP_RESTRICT second) { + fill_order2_axis_values_first(x, values, first); + second[0] = Real(1); + second[1] = Real(1); + second[2] = Real(-2); +} + +template +inline void write_hex_order2_hessian_q4( + std::size_t output_stride, + std::size_t i, + std::size_t j, + std::size_t k, + const Real lx[4][3], + const Real ly[4][3], + const Real lz[4][3], + const Real dx[4][3], + const Real dy[4][3], + const Real dz[4][3], + const Real hx[4][3], + const Real hy[4][3], + const Real hz[4][3], + Real* SVMP_RESTRICT hess_row) { + const Real xv = lx[Q][i]; + const Real yv = ly[Q][j]; + const Real zv = lz[Q][k]; + const Real yz = yv * zv; + const Real xd = dx[Q][i]; + const Real yd = dy[Q][j]; + const Real zd = dz[Q][k]; + const Real yd_z = yd * zv; + const Real yv_zd = yv * zd; + const Real hxy = xd * yd_z; + const Real hxz = xd * yv_zd; + const Real hyz = xv * yd * zd; + hess_row[0u * output_stride + Q] = hx[Q][i] * yz; + hess_row[4u * output_stride + Q] = xv * hy[Q][j] * zv; + hess_row[8u * output_stride + Q] = xv * yv * hz[Q][k]; + hess_row[1u * output_stride + Q] = hxy; + hess_row[3u * output_stride + Q] = hxy; + hess_row[2u * output_stride + Q] = hxz; + hess_row[6u * output_stride + Q] = hxz; + hess_row[5u * output_stride + Q] = hyz; + hess_row[7u * output_stride + Q] = hyz; +} + +void evaluate_hex_order2_hessians_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + constexpr auto node_axes = detail::make_hex_tensor_node_axes<2>(); + + Real lx[4][3]; + Real ly[4][3]; + Real lz[4][3]; + Real dx[4][3]; + Real dy[4][3]; + Real dz[4][3]; + Real hx[4][3]; + Real hy[4][3]; + Real hz[4][3]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order2_axis_values_first_second(xi[0], lx[q], dx[q], hx[q]); + fill_order2_axis_values_first_second(xi[1], ly[q], dy[q], hy[q]); + fill_order2_axis_values_first_second(xi[2], lz[q], dz[q], hz[q]); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + const std::size_t i = axes[0]; + const std::size_t j = axes[1]; + const std::size_t k = axes[2]; + Real* hess_row = hessians_out + node * 9u * output_stride; + write_hex_order2_hessian_q4<0u>( + output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, hess_row); + write_hex_order2_hessian_q4<1u>( + output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, hess_row); + write_hex_order2_hessian_q4<2u>( + output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, hess_row); + write_hex_order2_hessian_q4<3u>( + output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, hess_row); + } +} + +template +inline void write_quad_order2_all_q4( + std::size_t output_stride, + std::size_t i, + std::size_t j, + const Real lx[4][3], + const Real ly[4][3], + const Real dx[4][3], + const Real dy[4][3], + const Real hx[4][3], + const Real hy[4][3], + Real* SVMP_RESTRICT value_row, + Real* SVMP_RESTRICT grad_row, + Real* SVMP_RESTRICT hess_row) { + const Real xv = lx[Q][i]; + const Real yv = ly[Q][j]; + const Real xd = dx[Q][i]; + const Real yd = dy[Q][j]; + const Real hxy = xd * yd; + + value_row[Q] = xv * yv; + grad_row[0u * output_stride + Q] = xd * yv; + grad_row[1u * output_stride + Q] = xv * yd; + grad_row[2u * output_stride + Q] = Real(0); + hess_row[0u * output_stride + Q] = hx[Q][i] * yv; + hess_row[4u * output_stride + Q] = xv * hy[Q][j]; + hess_row[8u * output_stride + Q] = Real(0); + hess_row[1u * output_stride + Q] = hxy; + hess_row[3u * output_stride + Q] = hxy; + hess_row[2u * output_stride + Q] = Real(0); + hess_row[6u * output_stride + Q] = Real(0); + hess_row[5u * output_stride + Q] = Real(0); + hess_row[7u * output_stride + Q] = Real(0); +} + +void evaluate_quad_order2_all_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + constexpr auto node_axes = detail::make_quad_tensor_node_axes<2>(); + + Real lx[4][3]; + Real ly[4][3]; + Real dx[4][3]; + Real dy[4][3]; + Real hx[4][3]; + Real hy[4][3]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order2_axis_values_first_second(xi[0], lx[q], dx[q], hx[q]); + fill_order2_axis_values_first_second(xi[1], ly[q], dy[q], hy[q]); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + const std::size_t i = axes[0]; + const std::size_t j = axes[1]; + Real* value_row = values_out + node * output_stride; + Real* grad_row = gradients_out + node * 3u * output_stride; + Real* hess_row = hessians_out + node * 9u * output_stride; + write_quad_order2_all_q4<0u>( + output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); + write_quad_order2_all_q4<1u>( + output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); + write_quad_order2_all_q4<2u>( + output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); + write_quad_order2_all_q4<3u>( + output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); + } +} + +void evaluate_hex_order2_gradients_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + constexpr std::array, 27> node_axes = {{ + {{0u, 0u, 0u}}, {{1u, 0u, 0u}}, {{1u, 1u, 0u}}, {{0u, 1u, 0u}}, + {{0u, 0u, 1u}}, {{1u, 0u, 1u}}, {{1u, 1u, 1u}}, {{0u, 1u, 1u}}, + {{2u, 0u, 0u}}, {{1u, 2u, 0u}}, {{2u, 1u, 0u}}, {{0u, 2u, 0u}}, + {{2u, 0u, 1u}}, {{1u, 2u, 1u}}, {{2u, 1u, 1u}}, {{0u, 2u, 1u}}, + {{0u, 0u, 2u}}, {{1u, 0u, 2u}}, {{1u, 1u, 2u}}, {{0u, 1u, 2u}}, + {{2u, 2u, 0u}}, {{2u, 2u, 1u}}, {{2u, 0u, 2u}}, {{1u, 2u, 2u}}, + {{2u, 1u, 2u}}, {{0u, 2u, 2u}}, {{2u, 2u, 2u}}, + }}; + + Real lx[4][3]; + Real ly[4][3]; + Real lz[4][3]; + Real dx[4][3]; + Real dy[4][3]; + Real dz[4][3]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + fill_order2_axis_values_first(xi[0], lx[q], dx[q]); + fill_order2_axis_values_first(xi[1], ly[q], dy[q]); + fill_order2_axis_values_first(xi[2], lz[q], dz[q]); + } + + for (std::size_t node = 0; node < node_axes.size(); ++node) { + const auto& axes = node_axes[node]; + const std::size_t i = axes[0]; + const std::size_t j = axes[1]; + const std::size_t k = axes[2]; + Real* row = gradients_out + node * 3u * output_stride; + row[0] = dx[0][i] * ly[0][j] * lz[0][k]; + row[1] = dx[1][i] * ly[1][j] * lz[1][k]; + row[2] = dx[2][i] * ly[2][j] * lz[2][k]; + row[3] = dx[3][i] * ly[3][j] * lz[3][k]; + row[output_stride + 0u] = lx[0][i] * dy[0][j] * lz[0][k]; + row[output_stride + 1u] = lx[1][i] * dy[1][j] * lz[1][k]; + row[output_stride + 2u] = lx[2][i] * dy[2][j] * lz[2][k]; + row[output_stride + 3u] = lx[3][i] * dy[3][j] * lz[3][k]; + row[2u * output_stride + 0u] = lx[0][i] * ly[0][j] * dz[0][k]; + row[2u * output_stride + 1u] = lx[1][i] * ly[1][j] * dz[1][k]; + row[2u * output_stride + 2u] = lx[2][i] * ly[2][j] * dz[2][k]; + row[2u * output_stride + 3u] = lx[3][i] * ly[3][j] * dz[3][k]; + } +} + +template +void evaluate_constant_fast_hessians_strided( + std::size_t num_qpts, + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + std::array fast_hessians{}; + FastBasis::evaluate_hessians(math::Vector{}, fast_hessians); + for (std::size_t i = 0; i < fast_hessians.size(); ++i) { + const Hessian& hessian = fast_hessians[i]; + Real* H = hessians_out + i * 9u * output_stride; + const Real h00 = hessian(0, 0); + const Real h01 = hessian(0, 1); + const Real h02 = hessian(0, 2); + const Real h10 = hessian(1, 0); + const Real h11 = hessian(1, 1); + const Real h12 = hessian(1, 2); + const Real h20 = hessian(2, 0); + const Real h21 = hessian(2, 1); + const Real h22 = hessian(2, 2); + for (std::size_t q = 0; q < num_qpts; ++q) { + H[0u * output_stride + q] = h00; + H[1u * output_stride + q] = h01; + H[2u * output_stride + q] = h02; + H[3u * output_stride + q] = h10; + H[4u * output_stride + q] = h11; + H[5u * output_stride + q] = h12; + H[6u * output_stride + q] = h20; + H[7u * output_stride + q] = h21; + H[8u * output_stride + q] = h22; + } + } +} + +template +void evaluate_fast_outputs_with_constant_hessians_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + if (values_out != nullptr) { + std::array fast_values{}; + FastBasis::evaluate(xi, fast_values); + for (std::size_t i = 0; i < fast_values.size(); ++i) { + values_out[i * output_stride + q] = fast_values[i]; + } + } + if (gradients_out != nullptr) { + std::array fast_gradients{}; + FastBasis::evaluate_gradients(xi, fast_gradients); + for (std::size_t i = 0; i < fast_gradients.size(); ++i) { + Real* g = gradients_out + i * 3u * output_stride; + g[0u * output_stride + q] = fast_gradients[i][0]; + g[1u * output_stride + q] = fast_gradients[i][1]; + g[2u * output_stride + q] = fast_gradients[i][2]; + } + } + } + evaluate_constant_fast_hessians_strided( + points.size(), output_stride, hessians_out); +} + +template +void evaluate_wedge_fast_outputs_strided( + const std::vector>& wedge_indices, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + static_assert(Order >= 1 && Order <= 2, + "wedge fast outputs rely on low-order public triangle ordering"); + using TriFast = LagrangeTriFast; + constexpr std::size_t axis_size = static_cast(Order + 1); + const bool need_grad = gradients_out != nullptr; + const bool need_hess = hessians_out != nullptr; + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + std::array tri_values{}; + std::array tri_gradients{}; + std::array tri_hessians{}; + std::array z_values{}; + std::array z_first{}; + std::array z_second{}; + + TriFast::evaluate(xi, tri_values); + if (need_grad || need_hess) { + TriFast::evaluate_gradients(xi, tri_gradients); + } + if (need_hess) { + TriFast::evaluate_hessians(xi, tri_hessians); + detail::fill_axis_values_first_second(xi[2], z_values, z_first, z_second); + } else if (need_grad) { + detail::fill_axis_values_first(xi[2], z_values, z_first); + } else { + detail::fill_axis_values(xi[2], z_values); + } + + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + const Real tri_v = tri_values[tri]; + const Real zv = z_values[z]; + + if (values_out != nullptr) { + values_out[node * output_stride + q] = tri_v * zv; + } + + if (gradients_out != nullptr) { + Real* g = gradients_out + node * 3u * output_stride; + const Gradient& tri_g = tri_gradients[tri]; + g[0u * output_stride + q] = tri_g[0] * zv; + g[1u * output_stride + q] = tri_g[1] * zv; + g[2u * output_stride + q] = tri_v * z_first[z]; + } + + if (hessians_out != nullptr) { + Real* H = hessians_out + node * 9u * output_stride; + const Gradient& tri_g = tri_gradients[tri]; + const Hessian& tri_H = tri_hessians[tri]; + const Real zd = z_first[z]; + const Real hxz = tri_g[0] * zd; + const Real hxy = tri_H(0, 1) * zv; + const Real hyz = tri_g[1] * zd; + H[0u * output_stride + q] = tri_H(0, 0) * zv; + H[1u * output_stride + q] = hxy; + H[2u * output_stride + q] = hxz; + H[3u * output_stride + q] = hxy; + H[4u * output_stride + q] = tri_H(1, 1) * zv; + H[5u * output_stride + q] = hyz; + H[6u * output_stride + q] = hxz; + H[7u * output_stride + q] = hyz; + H[8u * output_stride + q] = tri_v * z_second[z]; + } + } + } +} + +template +inline void fill_triangle_simplex_product_factors(Real lambda, Real* SVMP_RESTRICT factors) { + const Real t = static_cast(Order) * lambda; + factors[0] = Real(1); + for (int a = 1; a <= Order; ++a) { + factors[a] = + factors[a - 1] * + (t - static_cast(a - 1)) / + static_cast(a); + } +} + +template +SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 bool evaluate_wedge_values_product_q4( + const std::vector>& simplex_exponents, + const std::vector>& wedge_indices, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + constexpr std::size_t tri_count = + static_cast((Order + 1) * (Order + 2) / 2); + if (simplex_exponents.size() != tri_count || points.size() != 4u) { + return false; + } + + Real tri_values[4][tri_count]; + std::array z_values[4]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + Real f0[Order + 1]; + Real f1[Order + 1]; + Real f2[Order + 1]; + fill_triangle_simplex_product_factors(l0, f0); + fill_triangle_simplex_product_factors(l1, f1); + fill_triangle_simplex_product_factors(l2, f2); + detail::fill_axis_values(xi[2], z_values[q]); + + for (std::size_t tri = 0; tri < tri_count; ++tri) { + const auto& e = simplex_exponents[tri]; + tri_values[q][tri] = + f0[static_cast(e[0])] * + f1[static_cast(e[1])] * + f2[static_cast(e[2])]; + } + } + + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* SVMP_RESTRICT row = values_out + node * output_stride; + row[0] = tri_values[0][tri] * z_values[0][z]; + row[1] = tri_values[1][tri] * z_values[1][z]; + row[2] = tri_values[2][tri] * z_values[2][z]; + row[3] = tri_values[3][tri] * z_values[3][z]; + } + return true; +} + +bool try_evaluate_wedge_values_product_q4( + const std::vector>& simplex_exponents, + const std::vector>& wedge_indices, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + switch (order) { + case 4: + return evaluate_wedge_values_product_q4<4>( + simplex_exponents, wedge_indices, points, output_stride, values_out); + case 5: + return evaluate_wedge_values_product_q4<5>( + simplex_exponents, wedge_indices, points, output_stride, values_out); + case 6: + return evaluate_wedge_values_product_q4<6>( + simplex_exponents, wedge_indices, points, output_stride, values_out); + case 7: + return evaluate_wedge_values_product_q4<7>( + simplex_exponents, wedge_indices, points, output_stride, values_out); + case 8: + return evaluate_wedge_values_product_q4<8>( + simplex_exponents, wedge_indices, points, output_stride, values_out); + default: + return false; + } +} + +void evaluate_wedge_order1_values_q4( + const std::vector>& wedge_indices, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real tri[4][3]; + Real axis[4][2]; + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + tri[q][0] = Real(1) - xi[0] - xi[1]; + tri[q][1] = xi[0]; + tri[q][2] = xi[1]; + axis[q][0] = (Real(1) - xi[2]) * Real(0.5); + axis[q][1] = (Real(1) + xi[2]) * Real(0.5); + } + + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri_node = index[0]; + const std::size_t axis_node = index[1]; + Real* row = values_out + node * output_stride; + row[0] = tri[0][tri_node] * axis[0][axis_node]; + row[1] = tri[1][tri_node] * axis[1][axis_node]; + row[2] = tri[2][tri_node] * axis[2][axis_node]; + row[3] = tri[3][tri_node] * axis[3][axis_node]; + } +} + +bool evaluate_wedge_fast_strided( + int order, + const std::vector>& wedge_indices, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + if (order == 3) { + return false; + } + if (order == 1 && + points.size() == 4u && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_wedge_order1_values_q4(wedge_indices, points, output_stride, values_out); + return true; + } + + switch (order) { + case 1: + evaluate_wedge_fast_outputs_strided<1>( + wedge_indices, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 2: + evaluate_wedge_fast_outputs_strided<2>( + wedge_indices, points, output_stride, values_out, gradients_out, hessians_out); + return true; + default: + return false; + } +} + +bool evaluate_fixed_lagrange_fast(LagrangeTopology topology, + int order, + const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) { + switch (order) { + case 1: + return evaluate_fixed_lagrange_fast_order<1>( + topology, xi, values, gradients, hessians); + case 2: + return evaluate_fixed_lagrange_fast_order<2>( + topology, xi, values, gradients, hessians); + case 3: + return evaluate_fixed_lagrange_fast_order<3>( + topology, xi, values, gradients, hessians); + default: + return false; + } +} + +bool evaluate_fixed_lagrange_fast_strided(LagrangeTopology topology, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + if (topology == LagrangeTopology::Line && + points.size() == 4u) { + const bool values_only = + values_out != nullptr && gradients_out == nullptr && hessians_out == nullptr; + const bool gradients_only = + values_out == nullptr && gradients_out != nullptr && hessians_out == nullptr; + const bool hessians_only = + values_out == nullptr && gradients_out == nullptr && hessians_out != nullptr; + const bool all_outputs = + values_out != nullptr && gradients_out != nullptr && hessians_out != nullptr; + if (values_only) { + if (order == 1) { + evaluate_line_order1_values_q4(points, output_stride, values_out); + return true; + } + if (order == 2) { + evaluate_line_order2_values_q4(points, output_stride, values_out); + return true; + } + if (order == 3) { + evaluate_line_order3_values_q4(points, output_stride, values_out); + return true; + } + } + if (order == 1) { + if (gradients_only) { + evaluate_line_order1_gradients_q4(output_stride, gradients_out); + return true; + } + if (hessians_only) { + evaluate_line_order1_hessians_q4(output_stride, hessians_out); + return true; + } + if (all_outputs) { + evaluate_line_order1_all_q4( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + } + if (order == 2) { + if (gradients_only) { + evaluate_line_order2_gradients_q4(points, output_stride, gradients_out); + return true; + } + if (hessians_only) { + evaluate_line_order2_hessians_q4(output_stride, hessians_out); + return true; + } + if (all_outputs) { + evaluate_line_order2_all_q4( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + } + if (order == 3) { + if (gradients_only) { + evaluate_line_order3_gradients_q4(points, output_stride, gradients_out); + return true; + } + if (hessians_only) { + evaluate_line_order3_hessians_q4(points, output_stride, hessians_out); + return true; + } + if (all_outputs) { + evaluate_line_order3_all_q4( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + } + } + + if (topology == LagrangeTopology::Tetrahedron && + order == 3 && + (gradients_out != nullptr || hessians_out != nullptr)) { + return false; + } + if (topology == LagrangeTopology::Triangle && + order == 3 && + hessians_out != nullptr) { + return false; + } + if (topology == LagrangeTopology::Triangle && + order == 1 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_triangle_order1_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Triangle && + order == 1 && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_triangle_order1_gradients_strided(points.size(), output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron && + order == 1 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_tet_order1_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron && + order == 1 && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_tet_order1_gradients_strided(points.size(), output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Triangle && + order == 1 && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_zero_hessians_strided(3u, points.size(), output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron && + order == 1 && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_zero_hessians_strided(4u, points.size(), output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Triangle && + order == 1 && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_triangle_order1_values_strided(points, output_stride, values_out); + evaluate_triangle_order1_gradients_strided(points.size(), output_stride, gradients_out); + evaluate_zero_hessians_strided(3u, points.size(), output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron && + order == 1 && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_tet_order1_values_strided(points, output_stride, values_out); + evaluate_tet_order1_gradients_strided(points.size(), output_stride, gradients_out); + evaluate_zero_hessians_strided(4u, points.size(), output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Triangle && + order == 2 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_triangle_order2_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Triangle && + order == 2 && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_triangle_order2_gradients_strided(points, output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Triangle && + order == 2 && + points.size() == 4u && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_triangle_order2_hessians_q4(output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Triangle && + order == 2 && + points.size() == 4u && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_triangle_order2_values_strided(points, output_stride, values_out); + evaluate_triangle_order2_gradients_strided(points, output_stride, gradients_out); + evaluate_triangle_order2_hessians_q4(output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron && + order == 2 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_tet_order2_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron && + order == 2 && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_tet_order2_gradients_strided(points, output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron && + order == 2 && + points.size() == 4u && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_tet_order2_hessians_q4(output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron && + order == 2 && + points.size() == 4u && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_tet_order2_values_strided(points, output_stride, values_out); + evaluate_tet_order2_gradients_strided(points, output_stride, gradients_out); + evaluate_tet_order2_hessians_q4(output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron && + order == 3 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_tet_order3_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Triangle && + order == 3 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_triangle_order3_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Triangle && + order == 3 && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_triangle_order3_gradients_strided(points, output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 1 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_hex_order1_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 1 && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_hex_order1_outputs_strided( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 1 && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_hex_order1_outputs_strided( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 1 && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_hex_order1_outputs_strided( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 1 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_quad_order1_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 1 && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_quad_order1_gradients_strided(points, output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 1 && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_quad_order1_hessians_strided(points.size(), output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 1 && + points.size() == 4u && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_quad_order1_all_q4(points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 2 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_quad_order2_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 2 && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_quad_order2_gradients_strided(points, output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 2 && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_quad_order2_hessians_strided(points, output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 2 && + points.size() == 4u && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_quad_order2_all_q4(points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 3 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_quad_order3_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 3 && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_quad_order3_gradients_strided(points, output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 3 && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_quad_order3_hessians_strided(points, output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Quadrilateral && + order == 3 && + points.size() == 4u && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_quad_order3_all_q4(points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 2 && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_hex_order2_values_strided(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 2 && + points.size() == 4u && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_hex_order2_gradients_q4(points, output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 2 && + points.size() == 4u && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_hex_order2_hessians_q4(points, output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 2 && + points.size() == 4u && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_hex_order2_values_strided(points, output_stride, values_out); + evaluate_hex_order2_gradients_q4(points, output_stride, gradients_out); + evaluate_hex_order2_hessians_q4(points, output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 3 && + points.size() == 4u && + output_stride == 4u && + hessians_out != nullptr) { + return false; + } + if (topology == LagrangeTopology::Hexahedron && + order == 3 && + points.size() == 4u && + values_out != nullptr && + gradients_out == nullptr && + hessians_out == nullptr) { + evaluate_hex_order3_values_q4(points, output_stride, values_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 3 && + points.size() == 4u && + values_out == nullptr && + gradients_out != nullptr && + hessians_out == nullptr) { + evaluate_hex_order3_gradients_q4(points, output_stride, gradients_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 3 && + points.size() == 4u && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + evaluate_hex_order3_hessians_q4(points, output_stride, hessians_out); + return true; + } + if (topology == LagrangeTopology::Hexahedron && + order == 3 && + points.size() == 4u && + values_out != nullptr && + gradients_out != nullptr && + hessians_out != nullptr) { + evaluate_hex_order3_all_q4(points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + if (hessians_out != nullptr && order > 1 && + (topology == LagrangeTopology::Quadrilateral || + topology == LagrangeTopology::Hexahedron)) { + return false; + } + if (hessians_out != nullptr) { + const bool hessians_only = values_out == nullptr && gradients_out == nullptr; + if (order == 1) { + if (topology == LagrangeTopology::Triangle && hessians_only) { + evaluate_fast_outputs_with_constant_hessians_strided>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron) { + evaluate_fast_outputs_with_constant_hessians_strided>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + } else if (order == 2) { + if (topology == LagrangeTopology::Triangle && hessians_only) { + evaluate_fast_outputs_with_constant_hessians_strided>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + if (topology == LagrangeTopology::Tetrahedron) { + evaluate_fast_outputs_with_constant_hessians_strided>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + } + } + } + + switch (order) { + case 1: + return evaluate_fixed_lagrange_fast_strided_order<1>( + topology, points, output_stride, values_out, gradients_out, hessians_out); + case 2: + return evaluate_fixed_lagrange_fast_strided_order<2>( + topology, points, output_stride, values_out, gradients_out, hessians_out); + case 3: + return evaluate_fixed_lagrange_fast_strided_order<3>( + topology, points, output_stride, values_out, gradients_out, hessians_out); + default: + return false; + } +} + +bool evaluate_fixed_lagrange_fast_to(LagrangeTopology topology, + int order, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + switch (order) { + case 1: + return evaluate_fixed_lagrange_fast_to_order<1>( + topology, xi, values_out, gradients_out, hessians_out); + case 2: + return evaluate_fixed_lagrange_fast_to_order<2>( + topology, xi, values_out, gradients_out, hessians_out); + case 3: + return evaluate_fixed_lagrange_fast_to_order<3>( + topology, xi, values_out, gradients_out, hessians_out); + default: + return false; + } +} + +template +struct AxisMonomialCoefficientTable { + std::array values{}; + std::array 1 ? N - 1 : 0)> first{}; + std::array 2 ? N - 2 : 0)> second{}; +}; + +template +constexpr AxisMonomialCoefficientTable make_axis_monomial_coefficient_table() { + AxisMonomialCoefficientTable table{}; + std::array nodes{}; + constexpr int order = static_cast(N) - 1; + for (std::size_t i = 0; i < N; ++i) { + nodes[i] = detail::equispaced_pm_one_coord(static_cast(i), order); + } + + for (std::size_t i = 0; i < N; ++i) { + std::array coeffs{}; + std::array next{}; + coeffs[0] = Real(1); + std::size_t degree = 0; + for (std::size_t j = 0; j < N; ++j) { + if (j == i) { + continue; + } + next = {}; + for (std::size_t k = 0; k <= degree; ++k) { + next[k] -= nodes[j] * coeffs[k]; + next[k + 1] += coeffs[k]; + } + coeffs = next; + ++degree; + } + + Real denominator = Real(1); + for (std::size_t j = 0; j < N; ++j) { + if (j != i) { + denominator *= nodes[i] - nodes[j]; + } + } + const Real inv_denominator = Real(1) / denominator; + for (std::size_t k = 0; k < N; ++k) { + table.values[i * N + k] = coeffs[k] * inv_denominator; + } + if constexpr (N >= 2) { + for (std::size_t k = 1; k < N; ++k) { + table.first[i * (N - 1) + (k - 1)] = + static_cast(k) * table.values[i * N + k]; + } + } + if constexpr (N >= 3) { + for (std::size_t k = 2; k < N; ++k) { + table.second[i * (N - 2) + (k - 2)] = + static_cast(k * (k - 1)) * table.values[i * N + k]; + } + } + } + + return table; +} + +template +void assign_axis_coefficient_table(const AxisMonomialCoefficientTable& table, + std::vector& values, + std::vector& first, + std::vector& second) { + assign_array(values, table.values); + assign_array(first, table.first); + assign_array(second, table.second); +} + +bool assign_precomputed_axis_coefficients(int n_axis, + std::vector& values, + std::vector& first, + std::vector& second) { + static constexpr auto kAxisCoefficients1 = make_axis_monomial_coefficient_table<1>(); + static constexpr auto kAxisCoefficients2 = make_axis_monomial_coefficient_table<2>(); + static constexpr auto kAxisCoefficients3 = make_axis_monomial_coefficient_table<3>(); + static constexpr auto kAxisCoefficients4 = make_axis_monomial_coefficient_table<4>(); + static constexpr auto kAxisCoefficients5 = make_axis_monomial_coefficient_table<5>(); + + switch (n_axis) { + case 1: + assign_axis_coefficient_table(kAxisCoefficients1, values, first, second); + return true; + case 2: + assign_axis_coefficient_table(kAxisCoefficients2, values, first, second); + return true; + case 3: + assign_axis_coefficient_table(kAxisCoefficients3, values, first, second); + return true; + case 4: + assign_axis_coefficient_table(kAxisCoefficients4, values, first, second); + return true; + case 5: + assign_axis_coefficient_table(kAxisCoefficients5, values, first, second); + return true; + default: + return false; + } +} + +LagrangeTopologyTraits lagrange_topology_traits(ElementType type) { + const auto topo = topology(type); + if (topo != LagrangeTopology::Unknown) { + return {topo, reference_dimension(type)}; + } + + throw BasisElementCompatibilityException("Unsupported element type for LagrangeBasis", + __FILE__, __LINE__, __func__); +} + +std::size_t lattice_index_pm_one(Real coord, int order, const char* context) { + if (order <= 0) { + if (!coordinate_matches_expected(coord, Real(0))) { + throw BasisNodeOrderingException(context, __FILE__, __LINE__, __func__); + } + return 0; + } + + const Real scaled = (coord + Real(1)) * static_cast(order) / Real(2); + const long idx = std::lround(scaled); + if (idx < 0 || idx > order || + !coordinate_matches_expected( + coord, + detail::equispaced_pm_one_coord(static_cast(idx), order))) { + throw BasisNodeOrderingException(context, __FILE__, __LINE__, __func__); + } + return static_cast(idx); +} + +int simplex_lattice_index(Real coord, int order, const char* context) { + if (order <= 0) { + if (!coordinate_matches_expected(coord, Real(0)) && + !coordinate_matches_expected(coord, Real(0.25)) && + !coordinate_matches_expected(coord, Real(1) / Real(3))) { + throw BasisNodeOrderingException(context, __FILE__, __LINE__, __func__); + } + return 0; + } + + const Real scaled = coord * static_cast(order); + const long idx = std::lround(scaled); + const Real reconstructed = static_cast(idx) / static_cast(order); + if (idx < 0 || idx > order || !coordinate_matches_expected(coord, reconstructed)) { + throw BasisNodeOrderingException(context, __FILE__, __LINE__, __func__); + } + return static_cast(idx); +} + +std::array triangle_exponents_from_public_node(const math::Vector& node, + int order) { + if (order == 0) { + return {0, 0, 0, 0}; + } + + const int j = simplex_lattice_index(node[0], order, + "LagrangeBasis: invalid triangle node coordinate for public ordering"); + const int k = simplex_lattice_index(node[1], order, + "LagrangeBasis: invalid triangle node coordinate for public ordering"); + const int i = order - j - k; + if (i < 0) { + throw BasisNodeOrderingException("LagrangeBasis: invalid triangle barycentric coordinates for public ordering", + __FILE__, __LINE__, __func__); + } + return {i, j, k, 0}; +} + +std::array tetrahedron_exponents_from_public_node(const math::Vector& node, + int order) { + if (order == 0) { + return {0, 0, 0, 0}; + } + + const int j = simplex_lattice_index(node[0], order, + "LagrangeBasis: invalid tetrahedron node x-coordinate for public ordering"); + const int k = simplex_lattice_index(node[1], order, + "LagrangeBasis: invalid tetrahedron node y-coordinate for public ordering"); + const int l = simplex_lattice_index(node[2], order, + "LagrangeBasis: invalid tetrahedron node z-coordinate for public ordering"); + const int i = order - j - k - l; + if (i < 0) { + throw BasisNodeOrderingException("LagrangeBasis: invalid tetrahedron barycentric coordinates for public ordering", + __FILE__, __LINE__, __func__); + } + return {i, j, k, l}; +} + +struct NormalizedLagrangeRequest { + ElementType element_type; + int order; +}; + +// Non-owning view of the per-axis 1D Lagrange basis evaluations +// (values, first derivative, second derivative), each of length `size`. +struct AxisBasisEvaluations { + const Real* values; + const Real* first; + const Real* second; + std::size_t size; +}; + +AxisBasisEvaluations constant_axis_basis() { + static const Real kOne[1] = {Real(1)}; + static const Real kZero[1] = {Real(0)}; + return AxisBasisEvaluations{kOne, kZero, kZero, 1}; +} + +// Horner-form evaluator for the precomputed 1D Lagrange basis. +// +// Inputs are precomputed monomial coefficients of L_i(x), L_i'(x), L_i''(x) +// (built once at LagrangeBasis construction). Evaluation is purely +// multiply-add on the coefficients — no divisions and no node-position +// lookups in the hot path. Templated on N for compile-time loop unrolling +// and FMA-friendly straight-line code on the common Hex/Quad/Line orders. +// +// Layout: +// v_coeffs: N * N entries; row i holds [c_i0, c_i1, ..., c_i(N-1)] +// such that L_i(x) = sum_k c_ik * x^k +// d_coeffs: N * (N-1) entries; row i holds derivative coefficients of L_i'(x) +// d2_coeffs: N * (N-2) entries; row i holds coefficients of L_i''(x) +// (only valid when N >= 3) +template +inline void evaluate_1d_horner_impl(const Real* v_coeffs, + const Real* d_coeffs, + const Real* d2_coeffs, + Real xi, + Real* values, Real* first, Real* second) { + if constexpr (N == 1) { + values[0] = v_coeffs[0]; + if (first) first[0] = Real(0); + if (second) second[0] = Real(0); + return; + } else { + // Values: degree N-1 polynomials. + for (int i = 0; i < N; ++i) { + const Real* c = v_coeffs + i * N; + Real r = c[N - 1]; + for (int k = N - 1; k > 0; --k) { + r = r * xi + c[k - 1]; + } + values[i] = r; + } + + if (!first && !second) return; + + if (first) { + // First derivatives: degree N-2 polynomials (per row of d_coeffs). + for (int i = 0; i < N; ++i) { + const Real* c = d_coeffs + i * (N - 1); + Real r = c[N - 2]; + for (int k = N - 2; k > 0; --k) { + r = r * xi + c[k - 1]; + } + first[i] = r; + } + } + + if (!second) return; + + if constexpr (N <= 2) { + for (int i = 0; i < N; ++i) second[i] = Real(0); + } else { + // Second derivatives: degree N-3 polynomials (per row of d2_coeffs). + for (int i = 0; i < N; ++i) { + const Real* c = d2_coeffs + i * (N - 2); + Real r = c[N - 3]; + for (int k = N - 3; k > 0; --k) { + r = r * xi + c[k - 1]; + } + second[i] = r; + } + } + } +} + +void fill_equispaced_barycentric_weights(int n_axis, Real* weights) { + const int order = n_axis - 1; + Real weight = (order % 2 == 0) ? Real(1) : Real(-1); + Real max_abs = Real(0); + for (int i = 0; i < n_axis; ++i) { + weights[i] = weight; + max_abs = std::max(max_abs, std::abs(weight)); + if (i < order) { + weight *= -static_cast(order - i) / static_cast(i + 1); + } + } + + if (max_abs > Real(0)) { + const Real inv_scale = Real(1) / max_abs; + for (int i = 0; i < n_axis; ++i) { + weights[i] *= inv_scale; + } + } +} + +bool coordinate_matches_axis_node(Real xi, Real node) { + return coordinate_matches_expected(xi, node); +} + +struct CompensatedSum { + Real sum{Real(0)}; + Real compensation{Real(0)}; + + void add(Real value) noexcept { + const Real y = value - compensation; + const Real t = sum + y; + compensation = (t - sum) - y; + sum = t; + } +}; + +void distribute_residual_by_abs(int n_axis, Real* values, Real residual) { + if (values == nullptr || n_axis <= 0 || residual == Real(0)) { + return; + } + + CompensatedSum abs_sum; + int largest_index = 0; + Real largest_abs = Real(0); + for (int i = 0; i < n_axis; ++i) { + const Real magnitude = std::abs(values[i]); + abs_sum.add(magnitude); + if (magnitude > largest_abs) { + largest_abs = magnitude; + largest_index = i; + } + } + + if (abs_sum.sum <= Real(0)) { + values[0] += residual; + return; + } + + const Real inv_abs_sum = Real(1) / abs_sum.sum; + CompensatedSum applied; + for (int i = 0; i < n_axis; ++i) { + const Real correction = residual * std::abs(values[i]) * inv_abs_sum; + values[i] += correction; + applied.add(correction); + } + values[largest_index] += residual - applied.sum; +} + +void evaluate_1d_barycentric_runtime(int n_axis, + Real xi, + const Real* weights, + Real* values, + Real* first, + Real* second) { + const int order = n_axis - 1; + BASIS_CHECK_EVAL(weights != nullptr, + "LagrangeBasis: missing cached barycentric weights for runtime axis evaluation"); + + int node_index = -1; + for (int i = 0; i < n_axis; ++i) { + const Real node = detail::equispaced_pm_one_coord(i, order); + if (coordinate_matches_axis_node(xi, node)) { + node_index = i; + break; + } + } + + if (node_index >= 0) { + std::fill(values, values + n_axis, Real(0)); + values[node_index] = Real(1); + if (!first && !second) { + return; + } + + const Real xk = detail::equispaced_pm_one_coord(node_index, order); + const Real wk = weights[static_cast(node_index)]; + Real reciprocal_sum = Real(0); + if (second) { + for (int m = 0; m < n_axis; ++m) { + if (m == node_index) { + continue; + } + const Real xm = detail::equispaced_pm_one_coord(m, order); + reciprocal_sum += Real(1) / (xk - xm); + } + } + + Real first_diagonal = Real(0); + Real second_diagonal = Real(0); + if (first) { + std::fill(first, first + n_axis, Real(0)); + } + if (second) { + std::fill(second, second + n_axis, Real(0)); + } + + for (int j = 0; j < n_axis; ++j) { + if (j == node_index) { + continue; + } + const Real xj = detail::equispaced_pm_one_coord(j, order); + const Real distance = xk - xj; + const Real offdiag_first = weights[static_cast(j)] / (wk * distance); + first_diagonal -= offdiag_first; + if (first) { + first[j] = offdiag_first; + } + if (second) { + const Real offdiag_second = + Real(2) * offdiag_first * (reciprocal_sum - Real(1) / distance); + second[j] = offdiag_second; + second_diagonal -= offdiag_second; + } + } + if (first) { + first[node_index] = first_diagonal; + } + if (second) { + second[node_index] = second_diagonal; + } + return; + } + + Real sum0 = Real(0); + Real sum1 = Real(0); + Real sum2 = Real(0); + for (int i = 0; i < n_axis; ++i) { + const Real node = detail::equispaced_pm_one_coord(i, order); + const Real inv_distance = Real(1) / (xi - node); + const Real weighted = weights[static_cast(i)] * inv_distance; + sum0 += weighted; + sum1 += weighted * inv_distance; + sum2 += weighted * inv_distance * inv_distance; + } + + const Real inv_sum0 = Real(1) / sum0; + const Real first_ratio = sum1 * inv_sum0; + const Real second_ratio = sum2 * inv_sum0; + const Real first_ratio_sq = first_ratio * first_ratio; + + CompensatedSum value_sum; + CompensatedSum first_sum; + CompensatedSum second_sum; + for (int i = 0; i < n_axis; ++i) { + const Real node = detail::equispaced_pm_one_coord(i, order); + const Real inv_distance = Real(1) / (xi - node); + const Real value = weights[static_cast(i)] * inv_distance * inv_sum0; + values[i] = value; + value_sum.add(value); + if (first || second) { + const Real derivative_factor = first_ratio - inv_distance; + if (first) { + first[i] = value * derivative_factor; + first_sum.add(first[i]); + } + if (second) { + second[i] = value * (derivative_factor * derivative_factor + + inv_distance * inv_distance - + Real(2) * second_ratio + + first_ratio_sq); + second_sum.add(second[i]); + } + } + } + + distribute_residual_by_abs(n_axis, values, Real(1) - value_sum.sum); + if (first) { + distribute_residual_by_abs(n_axis, first, -first_sum.sum); + } + if (second) { + distribute_residual_by_abs(n_axis, second, -second_sum.sum); + } +} + +// 1D Lagrange-basis evaluator. Writes n_axis entries to each non-null output +// buffer. Dispatches to compile-time Horner specializations for sizes 1..9 +// (orders 0..8 — the Lagrange performance sweep) and uses barycentric +// evaluation above that threshold to avoid high-order monomial conditioning +// issues. +void evaluate_1d_basis_to(const Real* v_coeffs, + const Real* d_coeffs, + const Real* d2_coeffs, + const Real* barycentric_weights, + int n_axis, Real xi, + Real* values, Real* first, Real* second) { + switch (n_axis) { + case 1: evaluate_1d_horner_impl<1>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; + case 2: evaluate_1d_horner_impl<2>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; + case 3: evaluate_1d_horner_impl<3>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; + case 4: evaluate_1d_horner_impl<4>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; + case 5: evaluate_1d_horner_impl<5>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; + case 6: evaluate_1d_horner_impl<6>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; + case 7: evaluate_1d_horner_impl<7>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; + case 8: evaluate_1d_horner_impl<8>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; + case 9: evaluate_1d_horner_impl<9>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; + default: + evaluate_1d_barycentric_runtime(n_axis, xi, barycentric_weights, values, first, second); + return; + } +} + +// Selects which derivative passes are computed by the 1D evaluator. +enum class AxisDeriv { + ValuesOnly, // skip first and second + ValuesAndFirst, // for gradients + ValuesAndFirstAndSecond, // for hessians or fused evaluate_all +}; + +// Per-axis storage (values, first derivative, second derivative). Backed by +// per-thread scratch that grows lazily; subsequent calls reuse capacity with no +// reallocation. +struct AxisScratch { + std::vector values; + std::vector first; + std::vector second; + + void reserveFor(std::size_t n) { + if (values.size() < n) values.resize(n); + if (first.size() < n) first.resize(n); + if (second.size() < n) second.resize(n); + } +}; + +struct AxisBatchScratch { + std::vector values; + std::vector first; + std::vector second; + + void resizeFor(std::size_t count, AxisDeriv level) { + if (values.size() < count) values.resize(count); + if (level != AxisDeriv::ValuesOnly && first.size() < count) first.resize(count); + if (level == AxisDeriv::ValuesAndFirstAndSecond && second.size() < count) second.resize(count); + } +}; + +template +inline void fill_simplex_factor_sequence_fixed(Real lambda, + Real* SVMP_RESTRICT phi, + Real* SVMP_RESTRICT dphi, + Real* SVMP_RESTRICT d2phi) { + static_assert(!NeedSecond || NeedFirst, + "second derivative factors require first-derivative recurrence state"); + phi[0] = Real(1); + if constexpr (NeedFirst) { + dphi[0] = Real(0); + } + if constexpr (NeedSecond) { + d2phi[0] = Real(0); + } + + const Real t = static_cast(Order) * lambda; + const Real dt_dlambda = static_cast(Order); + Real dphi_dt_prev = Real(0); + Real d2phi_dt2_prev = Real(0); + for (int a = 1; a <= Order; ++a) { + const std::size_t au = static_cast(a); + const Real inv_a = Real(1) / static_cast(a); + const Real s = (t - static_cast(a - 1)) * inv_a; + phi[au] = s * phi[au - 1]; + + if constexpr (NeedFirst) { + const Real dphi_dt_old = dphi_dt_prev; + const Real dphi_dt = inv_a * phi[au - 1] + s * dphi_dt_old; + dphi[au] = dt_dlambda * dphi_dt; + + if constexpr (NeedSecond) { + const Real d2phi_dt2 = Real(2) * inv_a * dphi_dt_old + s * d2phi_dt2_prev; + d2phi[au] = dt_dlambda * dt_dlambda * d2phi_dt2; + d2phi_dt2_prev = d2phi_dt2; + } + dphi_dt_prev = dphi_dt; + } + } +} + +template +inline void fill_triangle_factors_q4( + const std::vector>& points, + Real (&phi0)[4][Order + 1], + Real (&phi1)[4][Order + 1], + Real (&phi2)[4][Order + 1], + Real (&dphi0)[4][Order + 1], + Real (&dphi1)[4][Order + 1], + Real (&dphi2)[4][Order + 1], + Real (&d2phi0)[4][Order + 1], + Real (&d2phi1)[4][Order + 1], + Real (&d2phi2)[4][Order + 1]) { + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + if constexpr (NeedSecond) { + fill_simplex_factor_sequence_fixed( + l0, phi0[q], dphi0[q], d2phi0[q]); + fill_simplex_factor_sequence_fixed( + l1, phi1[q], dphi1[q], d2phi1[q]); + fill_simplex_factor_sequence_fixed( + l2, phi2[q], dphi2[q], d2phi2[q]); + } else { + fill_simplex_factor_sequence_fixed( + l0, phi0[q], dphi0[q], nullptr); + fill_simplex_factor_sequence_fixed( + l1, phi1[q], dphi1[q], nullptr); + fill_simplex_factor_sequence_fixed( + l2, phi2[q], dphi2[q], nullptr); + } + } +} + +template +inline void write_wedge_gradient_strided_q(std::size_t tri_stride, + std::size_t axis_stride, + std::size_t tri, + std::size_t z, + std::size_t output_stride, + const Real* SVMP_RESTRICT tri_values, + const Real* SVMP_RESTRICT tri_g, + const AxisBatchScratch& axis_batch, + Real* SVMP_RESTRICT g) { + const std::size_t tri_q = tri * tri_stride + Q; + const std::size_t z_q = Q * axis_stride + z; + const Real tri_v = tri_values[tri_q]; + const Real zv = axis_batch.values[z_q]; + g[0u * output_stride + Q] = tri_g[0u * tri_stride + Q] * zv; + g[1u * output_stride + Q] = tri_g[1u * tri_stride + Q] * zv; + g[2u * output_stride + Q] = tri_v * axis_batch.first[z_q]; +} + +template +inline void write_wedge_gradient_stride4_q(std::size_t tri_stride, + std::size_t axis_stride, + std::size_t tri, + std::size_t z, + const Real* SVMP_RESTRICT tri_values, + const Real* SVMP_RESTRICT tri_g, + const AxisBatchScratch& axis_batch, + Real* SVMP_RESTRICT g) { + const std::size_t tri_q = tri * tri_stride + Q; + const std::size_t z_q = Q * axis_stride + z; + const Real tri_v = tri_values[tri_q]; + const Real zv = axis_batch.values[z_q]; + g[Q] = tri_g[0u * tri_stride + Q] * zv; + g[4u + Q] = tri_g[1u * tri_stride + Q] * zv; + g[8u + Q] = tri_v * axis_batch.first[z_q]; +} + +template +inline void write_wedge_hessian_strided_q(std::size_t tri_stride, + std::size_t axis_stride, + std::size_t tri, + std::size_t z, + std::size_t output_stride, + const Real* SVMP_RESTRICT tri_values, + const Real* SVMP_RESTRICT tri_g, + const Real* SVMP_RESTRICT tri_H, + const AxisBatchScratch& axis_batch, + Real* SVMP_RESTRICT H) { + const std::size_t tri_q = tri * tri_stride + Q; + const std::size_t z_q = Q * axis_stride + z; + const Real tri_v = tri_values[tri_q]; + const Real zv = axis_batch.values[z_q]; + const Real zd = axis_batch.first[z_q]; + const Real tri_gx = tri_g[0u * tri_stride + Q]; + const Real tri_gy = tri_g[1u * tri_stride + Q]; + const Real tri_hxx = tri_H[0u * tri_stride + Q]; + const Real tri_hxy = tri_H[1u * tri_stride + Q]; + const Real tri_hyy = tri_H[2u * tri_stride + Q]; + const Real hxz = tri_gx * zd; + const Real hxy = tri_hxy * zv; + const Real hyz = tri_gy * zd; + + H[0u * output_stride + Q] = tri_hxx * zv; + H[1u * output_stride + Q] = hxy; + H[2u * output_stride + Q] = hxz; + H[3u * output_stride + Q] = hxy; + H[4u * output_stride + Q] = tri_hyy * zv; + H[5u * output_stride + Q] = hyz; + H[6u * output_stride + Q] = hxz; + H[7u * output_stride + Q] = hyz; + H[8u * output_stride + Q] = tri_v * axis_batch.second[z_q]; +} + +template +inline void write_wedge_hessian_stride4_q(std::size_t tri_stride, + std::size_t axis_stride, + std::size_t tri, + std::size_t z, + const Real* SVMP_RESTRICT tri_values, + const Real* SVMP_RESTRICT tri_g, + const Real* SVMP_RESTRICT tri_H, + const AxisBatchScratch& axis_batch, + Real* SVMP_RESTRICT H) { + const std::size_t tri_q = tri * tri_stride + Q; + const std::size_t z_q = Q * axis_stride + z; + const Real tri_v = tri_values[tri_q]; + const Real zv = axis_batch.values[z_q]; + const Real zd = axis_batch.first[z_q]; + const Real tri_gx = tri_g[0u * tri_stride + Q]; + const Real tri_gy = tri_g[1u * tri_stride + Q]; + const Real tri_hxx = tri_H[0u * tri_stride + Q]; + const Real tri_hxy = tri_H[1u * tri_stride + Q]; + const Real tri_hyy = tri_H[2u * tri_stride + Q]; + const Real hxz = tri_gx * zd; + const Real hxy = tri_hxy * zv; + const Real hyz = tri_gy * zd; + + H[Q] = tri_hxx * zv; + H[4u + Q] = hxy; + H[8u + Q] = hxz; + H[12u + Q] = hxy; + H[16u + Q] = tri_hyy * zv; + H[20u + Q] = hyz; + H[24u + Q] = hxz; + H[28u + Q] = hyz; + H[32u + Q] = tri_v * axis_batch.second[z_q]; +} + +template +inline void write_wedge_all_strided_q(std::size_t tri_stride, + std::size_t axis_stride, + std::size_t tri, + std::size_t z, + std::size_t output_stride, + const Real* SVMP_RESTRICT tri_values, + const Real* SVMP_RESTRICT tri_g, + const Real* SVMP_RESTRICT tri_H, + const AxisBatchScratch& axis_batch, + Real* SVMP_RESTRICT value_row, + Real* SVMP_RESTRICT g, + Real* SVMP_RESTRICT H) { + const std::size_t tri_q = tri * tri_stride + Q; + const std::size_t z_q = Q * axis_stride + z; + const Real tri_v = tri_values[tri_q]; + const Real zv = axis_batch.values[z_q]; + const Real zd = axis_batch.first[z_q]; + const Real tri_gx = tri_g[0u * tri_stride + Q]; + const Real tri_gy = tri_g[1u * tri_stride + Q]; + const Real tri_hxx = tri_H[0u * tri_stride + Q]; + const Real tri_hxy = tri_H[1u * tri_stride + Q]; + const Real tri_hyy = tri_H[2u * tri_stride + Q]; + const Real hxz = tri_gx * zd; + const Real hxy = tri_hxy * zv; + const Real hyz = tri_gy * zd; + + value_row[Q] = tri_v * zv; + g[0u * output_stride + Q] = tri_gx * zv; + g[1u * output_stride + Q] = tri_gy * zv; + g[2u * output_stride + Q] = tri_v * zd; + H[0u * output_stride + Q] = tri_hxx * zv; + H[1u * output_stride + Q] = hxy; + H[2u * output_stride + Q] = hxz; + H[3u * output_stride + Q] = hxy; + H[4u * output_stride + Q] = tri_hyy * zv; + H[5u * output_stride + Q] = hyz; + H[6u * output_stride + Q] = hxz; + H[7u * output_stride + Q] = hyz; + H[8u * output_stride + Q] = tri_v * axis_batch.second[z_q]; +} + +template +inline void write_wedge_all_stride4_q(std::size_t tri_stride, + std::size_t axis_stride, + std::size_t tri, + std::size_t z, + const Real* SVMP_RESTRICT tri_values, + const Real* SVMP_RESTRICT tri_g, + const Real* SVMP_RESTRICT tri_H, + const AxisBatchScratch& axis_batch, + Real* SVMP_RESTRICT value_row, + Real* SVMP_RESTRICT g, + Real* SVMP_RESTRICT H) { + const std::size_t tri_q = tri * tri_stride + Q; + const std::size_t z_q = Q * axis_stride + z; + const Real tri_v = tri_values[tri_q]; + const Real zv = axis_batch.values[z_q]; + const Real zd = axis_batch.first[z_q]; + const Real tri_gx = tri_g[0u * tri_stride + Q]; + const Real tri_gy = tri_g[1u * tri_stride + Q]; + const Real tri_hxx = tri_H[0u * tri_stride + Q]; + const Real tri_hxy = tri_H[1u * tri_stride + Q]; + const Real tri_hyy = tri_H[2u * tri_stride + Q]; + const Real hxz = tri_gx * zd; + const Real hxy = tri_hxy * zv; + const Real hyz = tri_gy * zd; + + value_row[Q] = tri_v * zv; + g[Q] = tri_gx * zv; + g[4u + Q] = tri_gy * zv; + g[8u + Q] = tri_v * zd; + H[Q] = tri_hxx * zv; + H[4u + Q] = hxy; + H[8u + Q] = hxz; + H[12u + Q] = hxy; + H[16u + Q] = tri_hyy * zv; + H[20u + Q] = hyz; + H[24u + Q] = hxz; + H[28u + Q] = hyz; + H[32u + Q] = tri_v * axis_batch.second[z_q]; +} + +template +bool evaluate_wedge_fused_stride4_q4( + const std::vector>& simplex_exponents, + const std::vector& wedge_node_by_tri_z, + const std::vector>& points, + const AxisBatchScratch& axis_batch, + int n_axis, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + static_assert(Order >= 3 && Order <= 8, "fused wedge q4 path covers orders 3..8"); + const std::size_t tri_count = simplex_exponents.size(); + const std::size_t z_count = static_cast(n_axis); + if (points.size() != 4u || + z_count != static_cast(Order + 1) || + wedge_node_by_tri_z.size() != tri_count * z_count) { + return false; + } + + Real phi0[4][Order + 1]; + Real phi1[4][Order + 1]; + Real phi2[4][Order + 1]; + Real dphi0[4][Order + 1]; + Real dphi1[4][Order + 1]; + Real dphi2[4][Order + 1]; + Real d2phi0[4][Order + 1]; + Real d2phi1[4][Order + 1]; + Real d2phi2[4][Order + 1]; + fill_triangle_factors_q4( + points, phi0, phi1, phi2, dphi0, dphi1, dphi2, d2phi0, d2phi1, d2phi2); + + for (std::size_t tri = 0; tri < tri_count; ++tri) { + const auto& e = simplex_exponents[tri]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + + Real tri_v[4]; + Real tri_gx[4]; + Real tri_gy[4]; + Real tri_hxx[4]; + Real tri_hxy[4]; + Real tri_hyy[4]; + for (std::size_t q = 0; q < 4u; ++q) { + const Real v0 = phi0[q][i0]; + const Real v1 = phi1[q][i1]; + const Real v2 = phi2[q][i2]; + const Real D0 = dphi0[q][i0]; + const Real D1 = dphi1[q][i1]; + const Real D2 = dphi2[q][i2]; + const Real dl0 = D0 * v1 * v2; + tri_v[q] = v0 * v1 * v2; + tri_gx[q] = v0 * D1 * v2 - dl0; + tri_gy[q] = v0 * v1 * D2 - dl0; + + if constexpr (NeedHess) { + const Real DD0 = d2phi0[q][i0]; + const Real DD1 = d2phi1[q][i1]; + const Real DD2 = d2phi2[q][i2]; + const Real H00 = DD0 * v1 * v2; + const Real H11 = v0 * DD1 * v2; + const Real H22 = v0 * v1 * DD2; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + tri_hxx[q] = H00 - Real(2) * H01 + H11; + tri_hxy[q] = H00 - H01 - H02 + H12; + tri_hyy[q] = H00 - Real(2) * H02 + H22; + } + } + + for (std::size_t z = 0; z < z_count; ++z) { + const std::size_t node = wedge_node_by_tri_z[tri * z_count + z]; + Real* SVMP_RESTRICT value_row = + values_out != nullptr ? values_out + node * 4u : nullptr; + Real* SVMP_RESTRICT g = + gradients_out != nullptr ? gradients_out + node * 12u : nullptr; + Real* SVMP_RESTRICT H = + hessians_out != nullptr ? hessians_out + node * 36u : nullptr; + + const Real z0 = axis_batch.values[z]; + const Real z1 = axis_batch.values[z_count + z]; + const Real z2 = axis_batch.values[2u * z_count + z]; + const Real z3 = axis_batch.values[3u * z_count + z]; + const Real dz0 = axis_batch.first[z]; + const Real dz1 = axis_batch.first[z_count + z]; + const Real dz2 = axis_batch.first[2u * z_count + z]; + const Real dz3 = axis_batch.first[3u * z_count + z]; + + if (value_row != nullptr) { + value_row[0] = tri_v[0] * z0; + value_row[1] = tri_v[1] * z1; + value_row[2] = tri_v[2] * z2; + value_row[3] = tri_v[3] * z3; + } + if (g != nullptr) { + g[0] = tri_gx[0] * z0; + g[1] = tri_gx[1] * z1; + g[2] = tri_gx[2] * z2; + g[3] = tri_gx[3] * z3; + g[4] = tri_gy[0] * z0; + g[5] = tri_gy[1] * z1; + g[6] = tri_gy[2] * z2; + g[7] = tri_gy[3] * z3; + g[8] = tri_v[0] * dz0; + g[9] = tri_v[1] * dz1; + g[10] = tri_v[2] * dz2; + g[11] = tri_v[3] * dz3; + } + if constexpr (NeedHess) { + if (H != nullptr) { + const Real d2z0 = axis_batch.second[z]; + const Real d2z1 = axis_batch.second[z_count + z]; + const Real d2z2 = axis_batch.second[2u * z_count + z]; + const Real d2z3 = axis_batch.second[3u * z_count + z]; + const Real hxz0 = tri_gx[0] * dz0; + const Real hxz1 = tri_gx[1] * dz1; + const Real hxz2 = tri_gx[2] * dz2; + const Real hxz3 = tri_gx[3] * dz3; + const Real hyz0 = tri_gy[0] * dz0; + const Real hyz1 = tri_gy[1] * dz1; + const Real hyz2 = tri_gy[2] * dz2; + const Real hyz3 = tri_gy[3] * dz3; + H[0] = tri_hxx[0] * z0; + H[1] = tri_hxx[1] * z1; + H[2] = tri_hxx[2] * z2; + H[3] = tri_hxx[3] * z3; + H[4] = tri_hxy[0] * z0; + H[5] = tri_hxy[1] * z1; + H[6] = tri_hxy[2] * z2; + H[7] = tri_hxy[3] * z3; + H[8] = hxz0; + H[9] = hxz1; + H[10] = hxz2; + H[11] = hxz3; + H[12] = H[4]; + H[13] = H[5]; + H[14] = H[6]; + H[15] = H[7]; + H[16] = tri_hyy[0] * z0; + H[17] = tri_hyy[1] * z1; + H[18] = tri_hyy[2] * z2; + H[19] = tri_hyy[3] * z3; + H[20] = hyz0; + H[21] = hyz1; + H[22] = hyz2; + H[23] = hyz3; + H[24] = hxz0; + H[25] = hxz1; + H[26] = hxz2; + H[27] = hxz3; + H[28] = hyz0; + H[29] = hyz1; + H[30] = hyz2; + H[31] = hyz3; + H[32] = tri_v[0] * d2z0; + H[33] = tri_v[1] * d2z1; + H[34] = tri_v[2] * d2z2; + H[35] = tri_v[3] * d2z3; + } + } + } + } + return true; +} + +template +bool try_evaluate_wedge_fused_stride4_q4( + const std::vector>& simplex_exponents, + const std::vector& wedge_node_by_tri_z, + int order, + const std::vector>& points, + const AxisBatchScratch& axis_batch, + int n_axis, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + switch (order) { + case 3: + return evaluate_wedge_fused_stride4_q4<3, NeedHess>( + simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, + values_out, gradients_out, hessians_out); + case 4: + return evaluate_wedge_fused_stride4_q4<4, NeedHess>( + simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, + values_out, gradients_out, hessians_out); + case 5: + return evaluate_wedge_fused_stride4_q4<5, NeedHess>( + simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, + values_out, gradients_out, hessians_out); + case 6: + return evaluate_wedge_fused_stride4_q4<6, NeedHess>( + simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, + values_out, gradients_out, hessians_out); + case 7: + return evaluate_wedge_fused_stride4_q4<7, NeedHess>( + simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, + values_out, gradients_out, hessians_out); + case 8: + return evaluate_wedge_fused_stride4_q4<8, NeedHess>( + simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, + values_out, gradients_out, hessians_out); + default: + return false; + } +} + +struct TensorProductTableScratch { + std::vector vv; + std::vector dv; + std::vector vd; + std::vector d2v; + std::vector vd2; + std::vector dd; + + void resizeFor(std::size_t count) { + if (vv.size() < count) vv.resize(count); + if (dv.size() < count) dv.resize(count); + if (vd.size() < count) vd.resize(count); + if (d2v.size() < count) d2v.resize(count); + if (vd2.size() < count) vd2.resize(count); + if (dd.size() < count) dd.resize(count); + } +}; + +// Caller-provided scratch buffers used by tensor-product evaluation. Three +// independent axes plus reusable simplex/wedge intermediates. +struct LagrangeEvaluateScratch { + AxisScratch axis_x; + AxisScratch axis_y; + AxisScratch axis_z; + AxisBatchScratch axis_x_batch; + AxisBatchScratch axis_y_batch; + AxisBatchScratch axis_z_batch; + TensorProductTableScratch tensor_tables; + + std::vector tri_values; + std::vector tri_gradients; + std::vector tri_hessians; + std::vector tri_gradient_components; + std::vector tri_hessian_components; + std::vector wedge_tri_values_batch; + std::vector wedge_tri_gradient_batch; + std::vector wedge_tri_hessian_batch; + + std::vector strided_values_tmp; + std::vector strided_gradients_tmp; + std::vector strided_hessians_tmp; + + void prewarm(int max_order, std::size_t max_qpts) { + const int clamped_order = std::max(max_order, 0); + const std::size_t axis_size = static_cast(clamped_order) + 1u; + const std::size_t axis_batch_size = axis_size * max_qpts; + const std::size_t tensor_table_size = + axis_size * axis_size * std::max(max_qpts, 1u); + const std::size_t tensor_dofs = tensor_table_size * axis_size; + const std::size_t tri_count = axis_size * (axis_size + 1u) / 2u; + + axis_x.reserveFor(axis_size); + axis_y.reserveFor(axis_size); + axis_z.reserveFor(axis_size); + axis_x_batch.resizeFor(axis_batch_size, AxisDeriv::ValuesAndFirstAndSecond); + axis_y_batch.resizeFor(axis_batch_size, AxisDeriv::ValuesAndFirstAndSecond); + axis_z_batch.resizeFor(axis_batch_size, AxisDeriv::ValuesAndFirstAndSecond); + tensor_tables.resizeFor(tensor_table_size); + tri_values.reserve(tri_count); + tri_gradients.reserve(tri_count); + tri_hessians.reserve(tri_count); + tri_gradient_components.reserve(tri_count * 3u); + tri_hessian_components.reserve(tri_count * 9u); + wedge_tri_values_batch.reserve(tri_count * max_qpts); + wedge_tri_gradient_batch.reserve(tri_count * 3u * max_qpts); + wedge_tri_hessian_batch.reserve(tri_count * 9u * max_qpts); + strided_values_tmp.reserve(tensor_dofs); + strided_gradients_tmp.reserve(tensor_dofs * 3u); + strided_hessians_tmp.reserve(tensor_dofs * 9u); + } +}; + +LagrangeEvaluateScratch& evaluate_scratch() { + // Scratch is intentionally thread-local: assembly and benchmark callers run + // evaluation on persistent worker threads, so capacity is reused by thread. + static thread_local LagrangeEvaluateScratch s; + return s; +} + +// Fill axis scratch and return a non-owning view. Uncomputed slots still have +// valid pointers to scratch storage (they may hold stale data) — callers must +// only read the slots they requested via `level`. Common low orders use +// precomputed Horner coefficients; high orders use barycentric axis evaluation. +AxisBasisEvaluations fill_axis_scratch(AxisScratch& s, + const Real* v_coeffs, + const Real* d_coeffs, + const Real* d2_coeffs, + const Real* barycentric_weights, + int n_axis, Real xi, + AxisDeriv level) { + const std::size_t n = static_cast(n_axis); + s.reserveFor(n); + Real* first = (level == AxisDeriv::ValuesOnly) ? nullptr : s.first.data(); + Real* second = (level == AxisDeriv::ValuesAndFirstAndSecond) ? s.second.data() : nullptr; + evaluate_1d_basis_to(v_coeffs, d_coeffs, d2_coeffs, barycentric_weights, + n_axis, xi, s.values.data(), first, second); + return AxisBasisEvaluations{s.values.data(), s.first.data(), s.second.data(), n}; +} + +void fill_axis_batch(AxisBatchScratch& scratch, + const std::vector>& points, + std::size_t component, + const Real* v_coeffs, + const Real* d_coeffs, + const Real* d2_coeffs, + const Real* barycentric_weights, + int n_axis, + AxisDeriv level) { + const std::size_t count = points.size() * static_cast(n_axis); + scratch.resizeFor(count, level); + Real* first = (level == AxisDeriv::ValuesOnly) ? nullptr : scratch.first.data(); + Real* second = (level == AxisDeriv::ValuesAndFirstAndSecond) ? scratch.second.data() : nullptr; + const std::size_t axis_stride = static_cast(n_axis); + for (std::size_t q = 0; q < points.size(); ++q) { + evaluate_1d_basis_to(v_coeffs, d_coeffs, d2_coeffs, barycentric_weights, n_axis, + points[q][component], + scratch.values.data() + q * axis_stride, + first ? first + q * axis_stride : nullptr, + second ? second + q * axis_stride : nullptr); + } +} + +// Maximum yz-table footprint that fits comfortably on the stack for the +// Lagrange performance sweep. Order-8 hex q=4 needs 4*(9x9) entries per table. +// Higher orders fall back to thread_local heap buffers. +inline constexpr std::size_t kMaxStackYZ = 384; + +struct TensorProductVectorSink { + std::vector* values; + std::vector* gradients; + std::vector* hessians; + + bool wants_values() const noexcept { return values != nullptr; } + bool wants_gradients() const noexcept { return gradients != nullptr; } + bool wants_hessians() const noexcept { return hessians != nullptr; } + + void prepare(std::size_t n_nodes) const { + if (values) values->resize(n_nodes); + if (gradients) gradients->resize(n_nodes); + if (hessians) hessians->resize(n_nodes); + } + + void write_value(std::size_t n, Real value) const { + (*values)[n] = value; + } + + void write_gradient(std::size_t n, Real dx, Real dy, Real dz) const { + auto& g = (*gradients)[n]; + g[0] = dx; + g[1] = dy; + g[2] = dz; + } + + void write_hessian(std::size_t n, + Real xx, + Real yy, + Real zz, + Real xy, + Real xz, + Real yz) const { + (*hessians)[n] = make_symmetric_hessian(xx, yy, zz, xy, xz, yz); + } +}; + +struct TensorProductRawSink { + Real* values; + Real* gradients; + Real* hessians; + + bool wants_values() const noexcept { return values != nullptr; } + bool wants_gradients() const noexcept { return gradients != nullptr; } + bool wants_hessians() const noexcept { return hessians != nullptr; } + + void prepare(std::size_t) const {} + + void write_value(std::size_t n, Real value) const { + values[n] = value; + } + + void write_gradient(std::size_t n, Real dx, Real dy, Real dz) const { + Real* g = gradients + n * 3u; + g[0] = dx; + g[1] = dy; + g[2] = dz; + } + + void write_hessian(std::size_t n, + Real xx, + Real yy, + Real zz, + Real xy, + Real xz, + Real yz) const { + Real* H = hessians + n * 9u; + H[0] = xx; + H[4] = yy; + H[8] = zz; + H[1] = xy; H[3] = xy; + H[2] = xz; H[6] = xz; + H[5] = yz; H[7] = yz; + } +}; + +// Fused sum-factorized tensor-product evaluator. +// +// Precomputes one to six (ny x nz)-shaped tables of partial products +// `M_xy[j*nz + k]` so that the inner per-node loop performs at most one +// multiplication per output instead of two. With all three output buffers +// supplied, this is the fused values + gradients + hessians path that shares +// every per-axis evaluation. +// +// Per-node multiply count (vs. the unfactored variants): +// values only : 1 (was 2) +// gradients only : 3 (was 6) +// hessians only : 6 (was 12) +// all three : 10 (was 20) +// +// Dimensional scope: works uniformly for Line/Quadrilateral/Hexahedron with +// the unused axes' size folded to 1 via constant_axis_basis(). +template +void evaluate_tensor_product_factorized_impl( + const std::vector>& tensor_indices, + const AxisBasisEvaluations& x_axis, + const AxisBasisEvaluations& y_axis, + const AxisBasisEvaluations& z_axis, + const Sink& sink) { + const std::size_t ny = y_axis.size; + const std::size_t nz = z_axis.size; + const std::size_t nyz = ny * nz; + const bool need_values = sink.wants_values(); + const bool need_grad = sink.wants_gradients(); + const bool need_hess = sink.wants_hessians(); + + Real Mvv_stack[kMaxStackYZ]; + Real Mdv_stack[kMaxStackYZ]; + Real Mvd_stack[kMaxStackYZ]; + Real Md2v_stack[kMaxStackYZ]; + Real Mvd2_stack[kMaxStackYZ]; + Real Mdd_stack[kMaxStackYZ]; + + Real* Mvv; + Real* Mdv; + Real* Mvd; + Real* Md2v; + Real* Mvd2; + Real* Mdd; + if (nyz <= kMaxStackYZ) { + Mvv = Mvv_stack; + Mdv = Mdv_stack; + Mvd = Mvd_stack; + Md2v = Md2v_stack; + Mvd2 = Mvd2_stack; + Mdd = Mdd_stack; + } else { + auto& tables = evaluate_scratch().tensor_tables; + tables.resizeFor(nyz); + Mvv = tables.vv.data(); + Mdv = tables.dv.data(); + Mvd = tables.vd.data(); + Md2v = tables.d2v.data(); + Mvd2 = tables.vd2.data(); + Mdd = tables.dd.data(); + } + + // M_vv is required by every output (values, ∂ξ, ∂ξ²). + for (std::size_t j = 0; j < ny; ++j) { + const Real yv = y_axis.values[j]; + for (std::size_t k = 0; k < nz; ++k) { + Mvv[j * nz + k] = yv * z_axis.values[k]; + } + } + + if (need_grad || need_hess) { + for (std::size_t j = 0; j < ny; ++j) { + const Real yv = y_axis.values[j]; + const Real yd = y_axis.first[j]; + for (std::size_t k = 0; k < nz; ++k) { + Mdv[j * nz + k] = yd * z_axis.values[k]; + Mvd[j * nz + k] = yv * z_axis.first[k]; + } + } + } + + if (need_hess) { + for (std::size_t j = 0; j < ny; ++j) { + const Real yv = y_axis.values[j]; + const Real yd = y_axis.first[j]; + const Real yd2 = y_axis.second[j]; + for (std::size_t k = 0; k < nz; ++k) { + Md2v[j * nz + k] = yd2 * z_axis.values[k]; + Mvd2[j * nz + k] = yv * z_axis.second[k]; + Mdd[j * nz + k] = yd * z_axis.first[k]; + } + } + } + + const std::size_t n_nodes = tensor_indices.size(); + sink.prepare(n_nodes); + + for (std::size_t n = 0; n < n_nodes; ++n) { + const auto& idx = tensor_indices[n]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + + const Real Lx = x_axis.values[i]; + + if (need_values) { + sink.write_value(n, Lx * Mvv[jk]); + } + + if (need_grad) { + const Real dLx = x_axis.first[i]; + sink.write_gradient(n, + dLx * Mvv[jk], + Lx * Mdv[jk], + Lx * Mvd[jk]); + } + + if (need_hess) { + const Real dLx = x_axis.first[i]; + const Real d2Lx = x_axis.second[i]; + sink.write_hessian(n, + d2Lx * Mvv[jk], + Lx * Md2v[jk], + Lx * Mvd2[jk], + dLx * Mdv[jk], + dLx * Mvd[jk], + Lx * Mdd[jk]); + } + } +} + +void evaluate_tensor_product_factorized( + const std::vector>& tensor_indices, + const AxisBasisEvaluations& x_axis, + const AxisBasisEvaluations& y_axis, + const AxisBasisEvaluations& z_axis, + std::vector* values_out, + std::vector* gradients_out, + std::vector* hessians_out) { + const TensorProductVectorSink sink{values_out, gradients_out, hessians_out}; + evaluate_tensor_product_factorized_impl(tensor_indices, x_axis, y_axis, z_axis, sink); +} + +void evaluate_tensor_product_factorized_to( + const std::vector>& tensor_indices, + const AxisBasisEvaluations& x_axis, + const AxisBasisEvaluations& y_axis, + const AxisBasisEvaluations& z_axis, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const TensorProductRawSink sink{values_out, gradients_out, hessians_out}; + evaluate_tensor_product_factorized_impl(tensor_indices, x_axis, y_axis, z_axis, sink); +} + +template +inline void write_tensor_product_value_strided_q( + std::size_t axis_stride, + std::size_t nyz, + std::size_t i, + std::size_t jk, + const AxisBatchScratch& x_batch, + const Real* SVMP_RESTRICT Mvv, + Real* SVMP_RESTRICT value_row) { + const std::size_t q_axis = Q * axis_stride; + const std::size_t slot = Q * nyz + jk; + value_row[Q] = x_batch.values[q_axis + i] * Mvv[slot]; +} + +template +inline void write_tensor_product_hessian_strided_q( + std::size_t axis_stride, + std::size_t nyz, + std::size_t i, + std::size_t jk, + std::size_t output_stride, + const AxisBatchScratch& x_batch, + const Real* SVMP_RESTRICT Mvv, + const Real* SVMP_RESTRICT Mdv, + const Real* SVMP_RESTRICT Mvd, + const Real* SVMP_RESTRICT Md2v, + const Real* SVMP_RESTRICT Mvd2, + const Real* SVMP_RESTRICT Mdd, + Real* SVMP_RESTRICT hess_row) { + const std::size_t q_axis = Q * axis_stride; + const std::size_t slot = Q * nyz + jk; + const Real xv = x_batch.values[q_axis + i]; + const Real xd = x_batch.first[q_axis + i]; + const Real x2 = x_batch.second[q_axis + i]; + const Real hxy = xd * Mdv[slot]; + const Real hxz = xd * Mvd[slot]; + const Real hyz = xv * Mdd[slot]; + hess_row[0u * output_stride + Q] = x2 * Mvv[slot]; + hess_row[4u * output_stride + Q] = xv * Md2v[slot]; + hess_row[8u * output_stride + Q] = xv * Mvd2[slot]; + hess_row[1u * output_stride + Q] = hxy; + hess_row[3u * output_stride + Q] = hxy; + hess_row[2u * output_stride + Q] = hxz; + hess_row[6u * output_stride + Q] = hxz; + hess_row[5u * output_stride + Q] = hyz; + hess_row[7u * output_stride + Q] = hyz; +} + +template +inline void write_tensor_product_hessian_stride4_q( + std::size_t axis_stride, + std::size_t nyz, + std::size_t i, + std::size_t jk, + const AxisBatchScratch& x_batch, + const Real* SVMP_RESTRICT Mvv, + const Real* SVMP_RESTRICT Mdv, + const Real* SVMP_RESTRICT Mvd, + const Real* SVMP_RESTRICT Md2v, + const Real* SVMP_RESTRICT Mvd2, + const Real* SVMP_RESTRICT Mdd, + Real* SVMP_RESTRICT hess_row) { + const std::size_t q_axis = Q * axis_stride; + const std::size_t slot = Q * nyz + jk; + const Real xv = x_batch.values[q_axis + i]; + const Real xd = x_batch.first[q_axis + i]; + const Real x2 = x_batch.second[q_axis + i]; + const Real hxy = xd * Mdv[slot]; + const Real hxz = xd * Mvd[slot]; + const Real hyz = xv * Mdd[slot]; + hess_row[Q] = x2 * Mvv[slot]; + hess_row[16u + Q] = xv * Md2v[slot]; + hess_row[32u + Q] = xv * Mvd2[slot]; + hess_row[4u + Q] = hxy; + hess_row[12u + Q] = hxy; + hess_row[8u + Q] = hxz; + hess_row[24u + Q] = hxz; + hess_row[20u + Q] = hyz; + hess_row[28u + Q] = hyz; +} + +template +inline void write_tensor_product_gradient_strided_q( + std::size_t axis_stride, + std::size_t nyz, + std::size_t i, + std::size_t jk, + std::size_t output_stride, + const AxisBatchScratch& x_batch, + const Real* SVMP_RESTRICT Mvv, + const Real* SVMP_RESTRICT Mdv, + const Real* SVMP_RESTRICT Mvd, + Real* SVMP_RESTRICT grad_row) { + const std::size_t q_axis = Q * axis_stride; + const std::size_t slot = Q * nyz + jk; + const Real xv = x_batch.values[q_axis + i]; + const Real xd = x_batch.first[q_axis + i]; + grad_row[0u * output_stride + Q] = xd * Mvv[slot]; + grad_row[1u * output_stride + Q] = xv * Mdv[slot]; + grad_row[2u * output_stride + Q] = xv * Mvd[slot]; +} + +template +inline void write_tensor_product_gradient_stride4_q( + std::size_t axis_stride, + std::size_t nyz, + std::size_t i, + std::size_t jk, + const AxisBatchScratch& x_batch, + const Real* SVMP_RESTRICT Mvv, + const Real* SVMP_RESTRICT Mdv, + const Real* SVMP_RESTRICT Mvd, + Real* SVMP_RESTRICT grad_row) { + const std::size_t q_axis = Q * axis_stride; + const std::size_t slot = Q * nyz + jk; + const Real xv = x_batch.values[q_axis + i]; + const Real xd = x_batch.first[q_axis + i]; + grad_row[Q] = xd * Mvv[slot]; + grad_row[4u + Q] = xv * Mdv[slot]; + grad_row[8u + Q] = xv * Mvd[slot]; +} + +template +inline void write_tensor_product_all_strided_q( + std::size_t axis_stride, + std::size_t nyz, + std::size_t i, + std::size_t jk, + std::size_t output_stride, + const AxisBatchScratch& x_batch, + const Real* SVMP_RESTRICT Mvv, + const Real* SVMP_RESTRICT Mdv, + const Real* SVMP_RESTRICT Mvd, + const Real* SVMP_RESTRICT Md2v, + const Real* SVMP_RESTRICT Mvd2, + const Real* SVMP_RESTRICT Mdd, + Real* SVMP_RESTRICT value_row, + Real* SVMP_RESTRICT grad_row, + Real* SVMP_RESTRICT hess_row) { + const std::size_t q_axis = Q * axis_stride; + const std::size_t slot = Q * nyz + jk; + const Real xv = x_batch.values[q_axis + i]; + const Real xd = x_batch.first[q_axis + i]; + value_row[Q] = xv * Mvv[slot]; + grad_row[0u * output_stride + Q] = xd * Mvv[slot]; + grad_row[1u * output_stride + Q] = xv * Mdv[slot]; + grad_row[2u * output_stride + Q] = xv * Mvd[slot]; + + const Real x2 = x_batch.second[q_axis + i]; + const Real hxy = xd * Mdv[slot]; + const Real hxz = xd * Mvd[slot]; + const Real hyz = xv * Mdd[slot]; + hess_row[0u * output_stride + Q] = x2 * Mvv[slot]; + hess_row[4u * output_stride + Q] = xv * Md2v[slot]; + hess_row[8u * output_stride + Q] = xv * Mvd2[slot]; + hess_row[1u * output_stride + Q] = hxy; + hess_row[3u * output_stride + Q] = hxy; + hess_row[2u * output_stride + Q] = hxz; + hess_row[6u * output_stride + Q] = hxz; + hess_row[5u * output_stride + Q] = hyz; + hess_row[7u * output_stride + Q] = hyz; +} + +template +inline void write_tensor_product_all_stride4_q( + std::size_t axis_stride, + std::size_t nyz, + std::size_t i, + std::size_t jk, + const AxisBatchScratch& x_batch, + const Real* SVMP_RESTRICT Mvv, + const Real* SVMP_RESTRICT Mdv, + const Real* SVMP_RESTRICT Mvd, + const Real* SVMP_RESTRICT Md2v, + const Real* SVMP_RESTRICT Mvd2, + const Real* SVMP_RESTRICT Mdd, + Real* SVMP_RESTRICT value_row, + Real* SVMP_RESTRICT grad_row, + Real* SVMP_RESTRICT hess_row) { + const std::size_t q_axis = Q * axis_stride; + const std::size_t slot = Q * nyz + jk; + const Real xv = x_batch.values[q_axis + i]; + const Real xd = x_batch.first[q_axis + i]; + value_row[Q] = xv * Mvv[slot]; + grad_row[Q] = xd * Mvv[slot]; + grad_row[4u + Q] = xv * Mdv[slot]; + grad_row[8u + Q] = xv * Mvd[slot]; + + const Real x2 = x_batch.second[q_axis + i]; + const Real hxy = xd * Mdv[slot]; + const Real hxz = xd * Mvd[slot]; + const Real hyz = xv * Mdd[slot]; + hess_row[Q] = x2 * Mvv[slot]; + hess_row[16u + Q] = xv * Md2v[slot]; + hess_row[32u + Q] = xv * Mvd2[slot]; + hess_row[4u + Q] = hxy; + hess_row[12u + Q] = hxy; + hess_row[8u + Q] = hxz; + hess_row[24u + Q] = hxz; + hess_row[20u + Q] = hyz; + hess_row[28u + Q] = hyz; +} + +SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 bool +evaluate_tensor_product_values_stride4_q4_transposed( + const std::vector>& tensor_indices, + std::size_t axis_stride, + const AxisBatchScratch& x_batch, + const AxisBatchScratch& y_batch, + const AxisBatchScratch& z_batch, + Real* SVMP_RESTRICT values_out) { + const std::size_t nyz = axis_stride * axis_stride; + const std::size_t table_count = 4u * nyz; + if (table_count > kMaxStackYZ || values_out == nullptr) { + return false; + } + + Real Mvv_stack[kMaxStackYZ]; + for (std::size_t j = 0; j < axis_stride; ++j) { + const Real yv0 = y_batch.values[j]; + const Real yv1 = y_batch.values[axis_stride + j]; + const Real yv2 = y_batch.values[2u * axis_stride + j]; + const Real yv3 = y_batch.values[3u * axis_stride + j]; + for (std::size_t k = 0; k < axis_stride; ++k) { + const std::size_t base = (j * axis_stride + k) * 4u; + Mvv_stack[base + 0u] = yv0 * z_batch.values[k]; + Mvv_stack[base + 1u] = yv1 * z_batch.values[axis_stride + k]; + Mvv_stack[base + 2u] = yv2 * z_batch.values[2u * axis_stride + k]; + Mvv_stack[base + 3u] = yv3 * z_batch.values[3u * axis_stride + k]; + } + } + + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = (idx[1] * axis_stride + idx[2]) * 4u; + Real* SVMP_RESTRICT value_row = values_out + node * 4u; + value_row[0u] = x_batch.values[i] * Mvv_stack[jk + 0u]; + value_row[1u] = x_batch.values[axis_stride + i] * Mvv_stack[jk + 1u]; + value_row[2u] = x_batch.values[2u * axis_stride + i] * Mvv_stack[jk + 2u]; + value_row[3u] = x_batch.values[3u * axis_stride + i] * Mvv_stack[jk + 3u]; + } + + return true; +} + +SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 bool +evaluate_tensor_product_gradients_stride4_q4_transposed( + const std::vector>& tensor_indices, + std::size_t axis_stride, + const AxisBatchScratch& x_batch, + const AxisBatchScratch& y_batch, + const AxisBatchScratch& z_batch, + Real* SVMP_RESTRICT gradients_out) { + const std::size_t nyz = axis_stride * axis_stride; + const std::size_t table_count = 4u * nyz; + if (table_count > kMaxStackYZ || gradients_out == nullptr) { + return false; + } + + Real Mvv_stack[kMaxStackYZ]; + Real Mdv_stack[kMaxStackYZ]; + Real Mvd_stack[kMaxStackYZ]; + for (std::size_t j = 0; j < axis_stride; ++j) { + const Real yv0 = y_batch.values[j]; + const Real yv1 = y_batch.values[axis_stride + j]; + const Real yv2 = y_batch.values[2u * axis_stride + j]; + const Real yv3 = y_batch.values[3u * axis_stride + j]; + const Real yd0 = y_batch.first[j]; + const Real yd1 = y_batch.first[axis_stride + j]; + const Real yd2 = y_batch.first[2u * axis_stride + j]; + const Real yd3 = y_batch.first[3u * axis_stride + j]; + for (std::size_t k = 0; k < axis_stride; ++k) { + const std::size_t base = (j * axis_stride + k) * 4u; + const Real zv0 = z_batch.values[k]; + const Real zv1 = z_batch.values[axis_stride + k]; + const Real zv2 = z_batch.values[2u * axis_stride + k]; + const Real zv3 = z_batch.values[3u * axis_stride + k]; + const Real zd0 = z_batch.first[k]; + const Real zd1 = z_batch.first[axis_stride + k]; + const Real zd2 = z_batch.first[2u * axis_stride + k]; + const Real zd3 = z_batch.first[3u * axis_stride + k]; + + Mvv_stack[base + 0u] = yv0 * zv0; + Mvv_stack[base + 1u] = yv1 * zv1; + Mvv_stack[base + 2u] = yv2 * zv2; + Mvv_stack[base + 3u] = yv3 * zv3; + Mdv_stack[base + 0u] = yd0 * zv0; + Mdv_stack[base + 1u] = yd1 * zv1; + Mdv_stack[base + 2u] = yd2 * zv2; + Mdv_stack[base + 3u] = yd3 * zv3; + Mvd_stack[base + 0u] = yv0 * zd0; + Mvd_stack[base + 1u] = yv1 * zd1; + Mvd_stack[base + 2u] = yv2 * zd2; + Mvd_stack[base + 3u] = yv3 * zd3; + } + } + + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = (idx[1] * axis_stride + idx[2]) * 4u; + + const Real xv0 = x_batch.values[i]; + const Real xv1 = x_batch.values[axis_stride + i]; + const Real xv2 = x_batch.values[2u * axis_stride + i]; + const Real xv3 = x_batch.values[3u * axis_stride + i]; + const Real xd0 = x_batch.first[i]; + const Real xd1 = x_batch.first[axis_stride + i]; + const Real xd2 = x_batch.first[2u * axis_stride + i]; + const Real xd3 = x_batch.first[3u * axis_stride + i]; + + Real* SVMP_RESTRICT grad_row = gradients_out + node * 12u; + grad_row[0u] = xd0 * Mvv_stack[jk + 0u]; + grad_row[1u] = xd1 * Mvv_stack[jk + 1u]; + grad_row[2u] = xd2 * Mvv_stack[jk + 2u]; + grad_row[3u] = xd3 * Mvv_stack[jk + 3u]; + grad_row[4u] = xv0 * Mdv_stack[jk + 0u]; + grad_row[5u] = xv1 * Mdv_stack[jk + 1u]; + grad_row[6u] = xv2 * Mdv_stack[jk + 2u]; + grad_row[7u] = xv3 * Mdv_stack[jk + 3u]; + grad_row[8u] = xv0 * Mvd_stack[jk + 0u]; + grad_row[9u] = xv1 * Mvd_stack[jk + 1u]; + grad_row[10u] = xv2 * Mvd_stack[jk + 2u]; + grad_row[11u] = xv3 * Mvd_stack[jk + 3u]; + } + + return true; +} + +template +SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 bool +evaluate_tensor_product_second_stride4_q4_transposed( + const std::vector>& tensor_indices, + std::size_t axis_stride, + const AxisBatchScratch& x_batch, + const AxisBatchScratch& y_batch, + const AxisBatchScratch& z_batch, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const std::size_t nyz = axis_stride * axis_stride; + const std::size_t table_count = 4u * nyz; + if (table_count > kMaxStackYZ || hessians_out == nullptr) { + return false; + } + if constexpr (NeedAllOutputs) { + if (values_out == nullptr || gradients_out == nullptr) { + return false; + } + } + + Real Mvv_stack[kMaxStackYZ]; + Real Mdv_stack[kMaxStackYZ]; + Real Mvd_stack[kMaxStackYZ]; + Real Md2v_stack[kMaxStackYZ]; + Real Mvd2_stack[kMaxStackYZ]; + Real Mdd_stack[kMaxStackYZ]; + + for (std::size_t j = 0; j < axis_stride; ++j) { + const Real yv0 = y_batch.values[j]; + const Real yv1 = y_batch.values[axis_stride + j]; + const Real yv2 = y_batch.values[2u * axis_stride + j]; + const Real yv3 = y_batch.values[3u * axis_stride + j]; + const Real yd0 = y_batch.first[j]; + const Real yd1 = y_batch.first[axis_stride + j]; + const Real yd2 = y_batch.first[2u * axis_stride + j]; + const Real yd3 = y_batch.first[3u * axis_stride + j]; + const Real y20 = y_batch.second[j]; + const Real y21 = y_batch.second[axis_stride + j]; + const Real y22 = y_batch.second[2u * axis_stride + j]; + const Real y23 = y_batch.second[3u * axis_stride + j]; + + for (std::size_t k = 0; k < axis_stride; ++k) { + const std::size_t base = (j * axis_stride + k) * 4u; + const Real zv0 = z_batch.values[k]; + const Real zv1 = z_batch.values[axis_stride + k]; + const Real zv2 = z_batch.values[2u * axis_stride + k]; + const Real zv3 = z_batch.values[3u * axis_stride + k]; + const Real zd0 = z_batch.first[k]; + const Real zd1 = z_batch.first[axis_stride + k]; + const Real zd2 = z_batch.first[2u * axis_stride + k]; + const Real zd3 = z_batch.first[3u * axis_stride + k]; + const Real z20 = z_batch.second[k]; + const Real z21 = z_batch.second[axis_stride + k]; + const Real z22 = z_batch.second[2u * axis_stride + k]; + const Real z23 = z_batch.second[3u * axis_stride + k]; + + Mvv_stack[base + 0u] = yv0 * zv0; + Mvv_stack[base + 1u] = yv1 * zv1; + Mvv_stack[base + 2u] = yv2 * zv2; + Mvv_stack[base + 3u] = yv3 * zv3; + Mdv_stack[base + 0u] = yd0 * zv0; + Mdv_stack[base + 1u] = yd1 * zv1; + Mdv_stack[base + 2u] = yd2 * zv2; + Mdv_stack[base + 3u] = yd3 * zv3; + Mvd_stack[base + 0u] = yv0 * zd0; + Mvd_stack[base + 1u] = yv1 * zd1; + Mvd_stack[base + 2u] = yv2 * zd2; + Mvd_stack[base + 3u] = yv3 * zd3; + Md2v_stack[base + 0u] = y20 * zv0; + Md2v_stack[base + 1u] = y21 * zv1; + Md2v_stack[base + 2u] = y22 * zv2; + Md2v_stack[base + 3u] = y23 * zv3; + Mvd2_stack[base + 0u] = yv0 * z20; + Mvd2_stack[base + 1u] = yv1 * z21; + Mvd2_stack[base + 2u] = yv2 * z22; + Mvd2_stack[base + 3u] = yv3 * z23; + Mdd_stack[base + 0u] = yd0 * zd0; + Mdd_stack[base + 1u] = yd1 * zd1; + Mdd_stack[base + 2u] = yd2 * zd2; + Mdd_stack[base + 3u] = yd3 * zd3; + } + } + + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = (idx[1] * axis_stride + idx[2]) * 4u; + + const Real xv0 = x_batch.values[i]; + const Real xv1 = x_batch.values[axis_stride + i]; + const Real xv2 = x_batch.values[2u * axis_stride + i]; + const Real xv3 = x_batch.values[3u * axis_stride + i]; + const Real xd0 = x_batch.first[i]; + const Real xd1 = x_batch.first[axis_stride + i]; + const Real xd2 = x_batch.first[2u * axis_stride + i]; + const Real xd3 = x_batch.first[3u * axis_stride + i]; + const Real x20 = x_batch.second[i]; + const Real x21 = x_batch.second[axis_stride + i]; + const Real x22 = x_batch.second[2u * axis_stride + i]; + const Real x23 = x_batch.second[3u * axis_stride + i]; + + const Real mvv0 = Mvv_stack[jk + 0u]; + const Real mvv1 = Mvv_stack[jk + 1u]; + const Real mvv2 = Mvv_stack[jk + 2u]; + const Real mvv3 = Mvv_stack[jk + 3u]; + const Real mdv0 = Mdv_stack[jk + 0u]; + const Real mdv1 = Mdv_stack[jk + 1u]; + const Real mdv2 = Mdv_stack[jk + 2u]; + const Real mdv3 = Mdv_stack[jk + 3u]; + const Real mvd0 = Mvd_stack[jk + 0u]; + const Real mvd1 = Mvd_stack[jk + 1u]; + const Real mvd2 = Mvd_stack[jk + 2u]; + const Real mvd3 = Mvd_stack[jk + 3u]; + const Real md2v0 = Md2v_stack[jk + 0u]; + const Real md2v1 = Md2v_stack[jk + 1u]; + const Real md2v2 = Md2v_stack[jk + 2u]; + const Real md2v3 = Md2v_stack[jk + 3u]; + const Real mvd20 = Mvd2_stack[jk + 0u]; + const Real mvd21 = Mvd2_stack[jk + 1u]; + const Real mvd22 = Mvd2_stack[jk + 2u]; + const Real mvd23 = Mvd2_stack[jk + 3u]; + const Real mdd0 = Mdd_stack[jk + 0u]; + const Real mdd1 = Mdd_stack[jk + 1u]; + const Real mdd2 = Mdd_stack[jk + 2u]; + const Real mdd3 = Mdd_stack[jk + 3u]; + + if constexpr (NeedAllOutputs) { + Real* SVMP_RESTRICT value_row = values_out + node * 4u; + value_row[0u] = xv0 * mvv0; + value_row[1u] = xv1 * mvv1; + value_row[2u] = xv2 * mvv2; + value_row[3u] = xv3 * mvv3; + + Real* SVMP_RESTRICT grad_row = gradients_out + node * 12u; + grad_row[0u] = xd0 * mvv0; + grad_row[1u] = xd1 * mvv1; + grad_row[2u] = xd2 * mvv2; + grad_row[3u] = xd3 * mvv3; + grad_row[4u] = xv0 * mdv0; + grad_row[5u] = xv1 * mdv1; + grad_row[6u] = xv2 * mdv2; + grad_row[7u] = xv3 * mdv3; + grad_row[8u] = xv0 * mvd0; + grad_row[9u] = xv1 * mvd1; + grad_row[10u] = xv2 * mvd2; + grad_row[11u] = xv3 * mvd3; + } + + const Real hxy0 = xd0 * mdv0; + const Real hxy1 = xd1 * mdv1; + const Real hxy2 = xd2 * mdv2; + const Real hxy3 = xd3 * mdv3; + const Real hxz0 = xd0 * mvd0; + const Real hxz1 = xd1 * mvd1; + const Real hxz2 = xd2 * mvd2; + const Real hxz3 = xd3 * mvd3; + const Real hyz0 = xv0 * mdd0; + const Real hyz1 = xv1 * mdd1; + const Real hyz2 = xv2 * mdd2; + const Real hyz3 = xv3 * mdd3; + + Real* SVMP_RESTRICT hess_row = hessians_out + node * 36u; + hess_row[0u] = x20 * mvv0; + hess_row[1u] = x21 * mvv1; + hess_row[2u] = x22 * mvv2; + hess_row[3u] = x23 * mvv3; + hess_row[4u] = hxy0; + hess_row[5u] = hxy1; + hess_row[6u] = hxy2; + hess_row[7u] = hxy3; + hess_row[8u] = hxz0; + hess_row[9u] = hxz1; + hess_row[10u] = hxz2; + hess_row[11u] = hxz3; + hess_row[12u] = hxy0; + hess_row[13u] = hxy1; + hess_row[14u] = hxy2; + hess_row[15u] = hxy3; + hess_row[16u] = xv0 * md2v0; + hess_row[17u] = xv1 * md2v1; + hess_row[18u] = xv2 * md2v2; + hess_row[19u] = xv3 * md2v3; + hess_row[20u] = hyz0; + hess_row[21u] = hyz1; + hess_row[22u] = hyz2; + hess_row[23u] = hyz3; + hess_row[24u] = hxz0; + hess_row[25u] = hxz1; + hess_row[26u] = hxz2; + hess_row[27u] = hxz3; + hess_row[28u] = hyz0; + hess_row[29u] = hyz1; + hess_row[30u] = hyz2; + hess_row[31u] = hyz3; + hess_row[32u] = xv0 * mvd20; + hess_row[33u] = xv1 * mvd21; + hess_row[34u] = xv2 * mvd22; + hess_row[35u] = xv3 * mvd23; + } + + return true; +} + +template +constexpr std::size_t line_public_axis_index(std::size_t node) noexcept { + return node == 0u ? 0u : (node == 1u ? static_cast(N - 1) : node - 1u); +} + +template +constexpr std::array make_line_axis_inv_denoms() noexcept { + std::array inv_denoms{}; + for (int i = 0; i < N; ++i) { + Real denom = Real(1); + for (int j = 0; j < N; ++j) { + if (j != i) { + denom *= static_cast(i - j); + } + } + inv_denoms[static_cast(i)] = Real(1) / denom; + } + return inv_denoms; +} + +template +void fill_line_values_product(Real x, Real* SVMP_RESTRICT values) { + static constexpr auto inv_denoms = make_line_axis_inv_denoms(); + const Real p = static_cast(N - 1); + const Real r = (x + Real(1)) * p * Real(0.5); + Real prefix[N]; + Real suffix[N]; + prefix[0] = Real(1); + for (int i = 1; i < N; ++i) { + prefix[i] = prefix[i - 1] * (r - static_cast(i - 1)); + } + suffix[N - 1] = Real(1); + for (int i = N - 2; i >= 0; --i) { + suffix[i] = suffix[i + 1] * (r - static_cast(i + 1)); + } + for (int i = 0; i < N; ++i) { + const std::size_t slot = static_cast(i); + values[slot] = prefix[i] * suffix[i] * inv_denoms[slot]; + } +} + +template +void fill_line_values_product_derivatives(Real x, + Real* SVMP_RESTRICT values, + Real* SVMP_RESTRICT first, + Real* SVMP_RESTRICT second) { + static constexpr auto inv_denoms = make_line_axis_inv_denoms(); + const Real p = static_cast(N - 1); + const Real drdx = p * Real(0.5); + const Real d2rdx2 = drdx * drdx; + const Real r = (x + Real(1)) * drdx; + + Real prefix[N + 1]; + Real prefix_d1[N + 1]; + Real prefix_d2[N + 1]; + Real suffix[N + 1]; + Real suffix_d1[N + 1]; + Real suffix_d2[N + 1]; + + const bool need_second = second != nullptr; + + prefix[0] = Real(1); + prefix_d1[0] = Real(0); + if (need_second) { + prefix_d2[0] = Real(0); + } + for (int i = 0; i < N; ++i) { + const Real factor = r - static_cast(i); + prefix[i + 1] = prefix[i] * factor; + prefix_d1[i + 1] = prefix_d1[i] * factor + prefix[i]; + if (need_second) { + prefix_d2[i + 1] = prefix_d2[i] * factor + Real(2) * prefix_d1[i]; + } + } + + suffix[N] = Real(1); + suffix_d1[N] = Real(0); + if (need_second) { + suffix_d2[N] = Real(0); + } + for (int i = N - 1; i >= 0; --i) { + const Real factor = r - static_cast(i); + suffix[i] = suffix[i + 1] * factor; + suffix_d1[i] = suffix_d1[i + 1] * factor + suffix[i + 1]; + if (need_second) { + suffix_d2[i] = suffix_d2[i + 1] * factor + Real(2) * suffix_d1[i + 1]; + } + } + + for (int i = 0; i < N; ++i) { + const std::size_t slot = static_cast(i); + const Real inv = inv_denoms[slot]; + const Real pre = prefix[i]; + const Real suf = suffix[i + 1]; + const Real pre_d1 = prefix_d1[i]; + const Real suf_d1 = suffix_d1[i + 1]; + values[slot] = pre * suf * inv; + if (first != nullptr) { + first[slot] = (pre_d1 * suf + pre * suf_d1) * inv * drdx; + } + if (second != nullptr) { + const Real d2 = + prefix_d2[i] * suf + + Real(2) * pre_d1 * suf_d1 + + pre * suffix_d2[i + 1]; + second[slot] = d2 * inv * d2rdx2; + } + } +} + +template +void fill_axis_batch_product_q4( + AxisBatchScratch& scratch, + const std::vector>& points, + std::size_t component, + AxisDeriv level) { + constexpr std::size_t axis_stride = static_cast(N); + scratch.resizeFor(4u * axis_stride, level); + for (std::size_t q = 0; q < 4u; ++q) { + Real* values = scratch.values.data() + q * axis_stride; + if (level == AxisDeriv::ValuesOnly) { + fill_line_values_product(points[q][component], values); + } else { + Real* first = scratch.first.data() + q * axis_stride; + Real* second = level == AxisDeriv::ValuesAndFirstAndSecond + ? scratch.second.data() + q * axis_stride + : nullptr; + fill_line_values_product_derivatives( + points[q][component], values, first, second); + } + } +} + +bool try_fill_axis_batch_product_q4( + AxisBatchScratch& scratch, + const std::vector>& points, + std::size_t component, + int n_axis, + AxisDeriv level) { + switch (n_axis) { + case 5: + fill_axis_batch_product_q4<5>(scratch, points, component, level); + return true; + case 6: + fill_axis_batch_product_q4<6>(scratch, points, component, level); + return true; + case 7: + fill_axis_batch_product_q4<7>(scratch, points, component, level); + return true; + case 8: + fill_axis_batch_product_q4<8>(scratch, points, component, level); + return true; + case 9: + fill_axis_batch_product_q4<9>(scratch, points, component, level); + return true; + default: + return false; + } +} + +template +SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 void evaluate_line_values_product_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real q0[N]; + Real q1[N]; + Real q2[N]; + Real q3[N]; + fill_line_values_product(points[0][0], q0); + fill_line_values_product(points[1][0], q1); + fill_line_values_product(points[2][0], q2); + fill_line_values_product(points[3][0], q3); + + for (std::size_t node = 0; node < static_cast(N); ++node) { + const std::size_t i = line_public_axis_index(node); + Real* row = values_out + node * output_stride; + row[0] = q0[i]; + row[1] = q1[i]; + row[2] = q2[i]; + row[3] = q3[i]; + } +} + +FE_ALWAYS_INLINE void write_line_order4_values_q( + Real x, + std::size_t q, + Real* SVMP_RESTRICT row0, + Real* SVMP_RESTRICT row1, + Real* SVMP_RESTRICT row2, + Real* SVMP_RESTRICT row3, + Real* SVMP_RESTRICT row4) { + const Real r = (x + Real(1)) * Real(2); + const Real f0 = r; + const Real f1 = r - Real(1); + const Real f2 = r - Real(2); + const Real f3 = r - Real(3); + const Real f4 = r - Real(4); + const Real f01 = f0 * f1; + const Real f12 = f1 * f2; + const Real f23 = f2 * f3; + const Real f34 = f3 * f4; + const Real v0 = (f12 * f34) / Real(24); + const Real v1 = -(f0 * f2 * f34) / Real(6); + const Real v2 = (f01 * f34) / Real(4); + const Real v3 = -(f01 * f2 * f4) / Real(6); + const Real v4 = (f01 * f23) / Real(24); + row0[q] = v0; + row1[q] = v4; + row2[q] = v1; + row3[q] = v2; + row4[q] = v3; +} + +SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 void evaluate_line_order4_values_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real* row0 = values_out + 0u * output_stride; + Real* row1 = values_out + 1u * output_stride; + Real* row2 = values_out + 2u * output_stride; + Real* row3 = values_out + 3u * output_stride; + Real* row4 = values_out + 4u * output_stride; + write_line_order4_values_q(points[0][0], 0u, row0, row1, row2, row3, row4); + write_line_order4_values_q(points[1][0], 1u, row0, row1, row2, row3, row4); + write_line_order4_values_q(points[2][0], 2u, row0, row1, row2, row3, row4); + write_line_order4_values_q(points[3][0], 3u, row0, row1, row2, row3, row4); +} + +SVMP_LAGRANGE_NOINLINE void evaluate_triangle_order1_gradients_strided( + std::size_t num_qpts, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + Real* SVMP_RESTRICT row0 = gradients_out + 0u * 3u * output_stride; + Real* SVMP_RESTRICT row1 = gradients_out + 1u * 3u * output_stride; + Real* SVMP_RESTRICT row2 = gradients_out + 2u * 3u * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + row0[0u * output_stride + q] = Real(-1); + row0[1u * output_stride + q] = Real(-1); + row0[2u * output_stride + q] = Real(0); + row1[0u * output_stride + q] = Real(1); + row1[1u * output_stride + q] = Real(0); + row1[2u * output_stride + q] = Real(0); + row2[0u * output_stride + q] = Real(0); + row2[1u * output_stride + q] = Real(1); + row2[2u * output_stride + q] = Real(0); + } +} + +template +SVMP_LAGRANGE_NOINLINE void evaluate_line_hessians_product_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT hessians_out) { + Real values[4][N]; + Real second[4][N]; + for (std::size_t q = 0; q < 4u; ++q) { + fill_line_values_product_derivatives( + points[q][0], values[q], nullptr, second[q]); + } + for (std::size_t node = 0; node < static_cast(N); ++node) { + const std::size_t i = line_public_axis_index(node); + write_line_hessian_q4_row(hessians_out + node * 9u * output_stride, + output_stride, + second[0][i], second[1][i], + second[2][i], second[3][i]); + } +} + +template +SVMP_LAGRANGE_NOINLINE void evaluate_line_all_product_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + Real values[4][N]; + Real first[4][N]; + Real second[4][N]; + for (std::size_t q = 0; q < 4u; ++q) { + fill_line_values_product_derivatives( + points[q][0], values[q], first[q], second[q]); + } + for (std::size_t node = 0; node < static_cast(N); ++node) { + const std::size_t i = line_public_axis_index(node); + Real* value_row = values_out + node * output_stride; + value_row[0] = values[0][i]; + value_row[1] = values[1][i]; + value_row[2] = values[2][i]; + value_row[3] = values[3][i]; + write_line_gradient_q4_row(gradients_out + node * 3u * output_stride, + output_stride, + first[0][i], first[1][i], + first[2][i], first[3][i]); + write_line_hessian_q4_row(hessians_out + node * 9u * output_stride, + output_stride, + second[0][i], second[1][i], + second[2][i], second[3][i]); + } +} + +inline void write_quad_product_value_row_q4( + Real* SVMP_RESTRICT row, + const Real* SVMP_RESTRICT x0, + const Real* SVMP_RESTRICT x1, + const Real* SVMP_RESTRICT x2, + const Real* SVMP_RESTRICT x3, + const Real* SVMP_RESTRICT y0, + const Real* SVMP_RESTRICT y1, + const Real* SVMP_RESTRICT y2, + const Real* SVMP_RESTRICT y3, + std::size_t i, + std::size_t j) { + row[0] = x0[i] * y0[j]; + row[1] = x1[i] * y1[j]; + row[2] = x2[i] * y2[j]; + row[3] = x3[i] * y3[j]; +} + +template +void evaluate_quad_values_product_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + Real x0[N]; + Real x1[N]; + Real x2[N]; + Real x3[N]; + Real y0[N]; + Real y1[N]; + Real y2[N]; + Real y3[N]; + fill_line_values_product(points[0][0], x0); + fill_line_values_product(points[1][0], x1); + fill_line_values_product(points[2][0], x2); + fill_line_values_product(points[3][0], x3); + fill_line_values_product(points[0][1], y0); + fill_line_values_product(points[1][1], y1); + fill_line_values_product(points[2][1], y2); + fill_line_values_product(points[3][1], y3); + + constexpr std::size_t p = static_cast(N - 1); + std::size_t node = 0u; + write_quad_product_value_row_q4(values_out + node++ * output_stride, + x0, x1, x2, x3, y0, y1, y2, y3, 0u, 0u); + write_quad_product_value_row_q4(values_out + node++ * output_stride, + x0, x1, x2, x3, y0, y1, y2, y3, p, 0u); + write_quad_product_value_row_q4(values_out + node++ * output_stride, + x0, x1, x2, x3, y0, y1, y2, y3, p, p); + write_quad_product_value_row_q4(values_out + node++ * output_stride, + x0, x1, x2, x3, y0, y1, y2, y3, 0u, p); + + for (std::size_t i = 1u; i < p; ++i) { + write_quad_product_value_row_q4(values_out + node++ * output_stride, + x0, x1, x2, x3, y0, y1, y2, y3, i, 0u); + } + for (std::size_t j = 1u; j < p; ++j) { + write_quad_product_value_row_q4(values_out + node++ * output_stride, + x0, x1, x2, x3, y0, y1, y2, y3, p, j); + } + for (std::size_t i = p - 1u; i > 0u; --i) { + write_quad_product_value_row_q4(values_out + node++ * output_stride, + x0, x1, x2, x3, y0, y1, y2, y3, i, p); + } + for (std::size_t j = p - 1u; j > 0u; --j) { + write_quad_product_value_row_q4(values_out + node++ * output_stride, + x0, x1, x2, x3, y0, y1, y2, y3, 0u, j); + } + for (std::size_t j = 1u; j < p; ++j) { + for (std::size_t i = 1u; i < p; ++i) { + write_quad_product_value_row_q4(values_out + node++ * output_stride, + x0, x1, x2, x3, y0, y1, y2, y3, i, j); + } + } +} + +template +void evaluate_quad_derivatives_product_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const bool need_grad = gradients_out != nullptr; + const bool need_hess = hessians_out != nullptr; + Real xv[4][N]; + Real xd[4][N]; + Real x2[4][N]; + Real yv[4][N]; + Real yd[4][N]; + Real y2[4][N]; + + for (std::size_t q = 0; q < 4u; ++q) { + fill_line_values_product_derivatives( + points[q][0], xv[q], (need_grad || need_hess) ? xd[q] : nullptr, + need_hess ? x2[q] : nullptr); + fill_line_values_product_derivatives( + points[q][1], yv[q], (need_grad || need_hess) ? yd[q] : nullptr, + need_hess ? y2[q] : nullptr); + } + + constexpr std::size_t p = static_cast(N - 1); + std::size_t node = 0u; + auto write_node = [&](std::size_t i, std::size_t j) { + Real* value_row = values_out != nullptr ? values_out + node * output_stride : nullptr; + Real* grad_row = gradients_out != nullptr ? gradients_out + node * 3u * output_stride : nullptr; + Real* hess_row = hessians_out != nullptr ? hessians_out + node * 9u * output_stride : nullptr; + if (grad_row != nullptr) { + grad_row[2u * output_stride + 0u] = Real(0); + grad_row[2u * output_stride + 1u] = Real(0); + grad_row[2u * output_stride + 2u] = Real(0); + grad_row[2u * output_stride + 3u] = Real(0); + } + if (hess_row != nullptr) { + hess_row[2u * output_stride + 0u] = Real(0); + hess_row[2u * output_stride + 1u] = Real(0); + hess_row[2u * output_stride + 2u] = Real(0); + hess_row[2u * output_stride + 3u] = Real(0); + hess_row[5u * output_stride + 0u] = Real(0); + hess_row[5u * output_stride + 1u] = Real(0); + hess_row[5u * output_stride + 2u] = Real(0); + hess_row[5u * output_stride + 3u] = Real(0); + hess_row[6u * output_stride + 0u] = Real(0); + hess_row[6u * output_stride + 1u] = Real(0); + hess_row[6u * output_stride + 2u] = Real(0); + hess_row[6u * output_stride + 3u] = Real(0); + hess_row[7u * output_stride + 0u] = Real(0); + hess_row[7u * output_stride + 1u] = Real(0); + hess_row[7u * output_stride + 2u] = Real(0); + hess_row[7u * output_stride + 3u] = Real(0); + hess_row[8u * output_stride + 0u] = Real(0); + hess_row[8u * output_stride + 1u] = Real(0); + hess_row[8u * output_stride + 2u] = Real(0); + hess_row[8u * output_stride + 3u] = Real(0); + } + for (std::size_t q = 0; q < 4u; ++q) { + const Real x_value = xv[q][i]; + const Real y_value = yv[q][j]; + if (value_row != nullptr) { + value_row[q] = x_value * y_value; + } + if (grad_row != nullptr) { + grad_row[0u * output_stride + q] = xd[q][i] * y_value; + grad_row[1u * output_stride + q] = x_value * yd[q][j]; + } + if (hess_row != nullptr) { + const Real hxy = xd[q][i] * yd[q][j]; + hess_row[0u * output_stride + q] = x2[q][i] * y_value; + hess_row[1u * output_stride + q] = hxy; + hess_row[3u * output_stride + q] = hxy; + hess_row[4u * output_stride + q] = x_value * y2[q][j]; + } + } + ++node; + }; + + write_node(0u, 0u); + write_node(p, 0u); + write_node(p, p); + write_node(0u, p); + for (std::size_t i = 1u; i < p; ++i) { + write_node(i, 0u); + } + for (std::size_t j = 1u; j < p; ++j) { + write_node(p, j); + } + for (std::size_t i = p - 1u; i > 0u; --i) { + write_node(i, p); + } + for (std::size_t j = p - 1u; j > 0u; --j) { + write_node(0u, j); + } + for (std::size_t j = 1u; j < p; ++j) { + for (std::size_t i = 1u; i < p; ++i) { + write_node(i, j); + } + } +} + +SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 void evaluate_quad_order8_gradients_product_q4( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + constexpr int N = 9; + constexpr std::size_t p = 8u; + Real xv[4][N]; + Real xd[4][N]; + Real yv[4][N]; + Real yd[4][N]; + + for (std::size_t q = 0; q < 4u; ++q) { + fill_line_values_product_derivatives(points[q][0], xv[q], xd[q], nullptr); + fill_line_values_product_derivatives(points[q][1], yv[q], yd[q], nullptr); + } + + std::size_t node = 0u; + auto write_node = [&](std::size_t i, std::size_t j) { + Real* SVMP_RESTRICT row = gradients_out + node * 3u * output_stride; + row[0u] = xd[0][i] * yv[0][j]; + row[1u] = xd[1][i] * yv[1][j]; + row[2u] = xd[2][i] * yv[2][j]; + row[3u] = xd[3][i] * yv[3][j]; + row[output_stride + 0u] = xv[0][i] * yd[0][j]; + row[output_stride + 1u] = xv[1][i] * yd[1][j]; + row[output_stride + 2u] = xv[2][i] * yd[2][j]; + row[output_stride + 3u] = xv[3][i] * yd[3][j]; + row[2u * output_stride + 0u] = Real(0); + row[2u * output_stride + 1u] = Real(0); + row[2u * output_stride + 2u] = Real(0); + row[2u * output_stride + 3u] = Real(0); + ++node; + }; + + write_node(0u, 0u); + write_node(p, 0u); + write_node(p, p); + write_node(0u, p); + for (std::size_t i = 1u; i < p; ++i) { + write_node(i, 0u); + } + for (std::size_t j = 1u; j < p; ++j) { + write_node(p, j); + } + for (std::size_t i = p - 1u; i > 0u; --i) { + write_node(i, p); + } + for (std::size_t j = p - 1u; j > 0u; --j) { + write_node(0u, j); + } + for (std::size_t j = 1u; j < p; ++j) { + for (std::size_t i = 1u; i < p; ++i) { + write_node(i, j); + } + } +} + +template +void evaluate_line_gradients_horner_q4( + const std::vector>& points, + std::size_t output_stride, + const Real* SVMP_RESTRICT d_coeffs, + Real* SVMP_RESTRICT gradients_out) { + const Real x0 = points[0][0]; + const Real x1 = points[1][0]; + const Real x2 = points[2][0]; + const Real x3 = points[3][0]; + + for (std::size_t node = 0; node < static_cast(N); ++node) { + const std::size_t i = line_public_axis_index(node); + const Real* c = d_coeffs + i * static_cast(N - 1); + Real r0 = c[N - 2]; + Real r1 = c[N - 2]; + Real r2 = c[N - 2]; + Real r3 = c[N - 2]; + for (int k = N - 2; k > 0; --k) { + const Real ck = c[k - 1]; + r0 = r0 * x0 + ck; + r1 = r1 * x1 + ck; + r2 = r2 * x2 + ck; + r3 = r3 * x3 + ck; + } + Real* row = gradients_out + node * 3u * output_stride; + row[0] = r0; + row[1] = r1; + row[2] = r2; + row[3] = r3; + row[output_stride + 0u] = Real(0); + row[output_stride + 1u] = Real(0); + row[output_stride + 2u] = Real(0); + row[output_stride + 3u] = Real(0); + row[2u * output_stride + 0u] = Real(0); + row[2u * output_stride + 1u] = Real(0); + row[2u * output_stride + 2u] = Real(0); + row[2u * output_stride + 3u] = Real(0); + } +} + +bool try_evaluate_line_values_horner_q4( + const std::vector>& points, + std::size_t output_stride, + const Real* SVMP_RESTRICT v_coeffs, + int n_axis, + Real* SVMP_RESTRICT values_out) { + (void)v_coeffs; + switch (n_axis) { + case 5: + evaluate_line_order4_values_q4(points, output_stride, values_out); + return true; + case 6: + evaluate_line_values_product_q4<6>(points, output_stride, values_out); + return true; + case 7: + evaluate_line_values_product_q4<7>(points, output_stride, values_out); + return true; + case 8: + evaluate_line_values_product_q4<8>(points, output_stride, values_out); + return true; + case 9: + evaluate_line_values_product_q4<9>(points, output_stride, values_out); + return true; + default: + return false; + } +} + +bool try_evaluate_line_gradients_horner_q4( + const std::vector>& points, + std::size_t output_stride, + const Real* SVMP_RESTRICT d_coeffs, + int n_axis, + Real* SVMP_RESTRICT gradients_out) { + switch (n_axis) { + case 5: + evaluate_line_gradients_horner_q4<5>(points, output_stride, d_coeffs, gradients_out); + return true; + case 6: + evaluate_line_gradients_horner_q4<6>(points, output_stride, d_coeffs, gradients_out); + return true; + case 7: + evaluate_line_gradients_horner_q4<7>(points, output_stride, d_coeffs, gradients_out); + return true; + case 8: + evaluate_line_gradients_horner_q4<8>(points, output_stride, d_coeffs, gradients_out); + return true; + case 9: + evaluate_line_gradients_horner_q4<9>(points, output_stride, d_coeffs, gradients_out); + return true; + default: + return false; + } +} + +SVMP_LAGRANGE_NOINLINE bool try_evaluate_line_hessians_product_q4( + const std::vector>& points, + std::size_t output_stride, + int n_axis, + Real* SVMP_RESTRICT hessians_out) { + switch (n_axis) { + case 5: + evaluate_line_hessians_product_q4<5>(points, output_stride, hessians_out); + return true; + case 6: + evaluate_line_hessians_product_q4<6>(points, output_stride, hessians_out); + return true; + case 7: + evaluate_line_hessians_product_q4<7>(points, output_stride, hessians_out); + return true; + case 8: + evaluate_line_hessians_product_q4<8>(points, output_stride, hessians_out); + return true; + case 9: + evaluate_line_hessians_product_q4<9>(points, output_stride, hessians_out); + return true; + default: + return false; + } +} + +SVMP_LAGRANGE_NOINLINE bool try_evaluate_line_all_product_q4( + const std::vector>& points, + std::size_t output_stride, + int n_axis, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + switch (n_axis) { + case 5: + evaluate_line_all_product_q4<5>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 6: + evaluate_line_all_product_q4<6>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 7: + evaluate_line_all_product_q4<7>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 8: + evaluate_line_all_product_q4<8>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 9: + evaluate_line_all_product_q4<9>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + default: + return false; + } +} + +SVMP_LAGRANGE_NOINLINE bool try_evaluate_quad_values_product_q4( + const std::vector>& points, + std::size_t output_stride, + int n_axis, + Real* SVMP_RESTRICT values_out) { + switch (n_axis) { + case 5: + evaluate_quad_values_product_q4<5>(points, output_stride, values_out); + return true; + case 6: + evaluate_quad_values_product_q4<6>(points, output_stride, values_out); + return true; + case 7: + evaluate_quad_values_product_q4<7>(points, output_stride, values_out); + return true; + case 8: + evaluate_quad_values_product_q4<8>(points, output_stride, values_out); + return true; + case 9: + evaluate_quad_values_product_q4<9>(points, output_stride, values_out); + return true; + default: + return false; + } +} + +SVMP_LAGRANGE_NOINLINE bool try_evaluate_quad_derivatives_product_q4( + const std::vector>& points, + std::size_t output_stride, + int n_axis, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + switch (n_axis) { + case 5: + evaluate_quad_derivatives_product_q4<5>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 6: + evaluate_quad_derivatives_product_q4<6>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 7: + evaluate_quad_derivatives_product_q4<7>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 8: + evaluate_quad_derivatives_product_q4<8>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 9: + evaluate_quad_derivatives_product_q4<9>( + points, output_stride, values_out, gradients_out, hessians_out); + return true; + default: + return false; + } +} + +void evaluate_tensor_product_points_strided( + LagrangeTopology topology, + const std::vector>& tensor_indices, + const std::vector>& points, + std::size_t output_stride, + const Real* v_coeffs, + const Real* d_coeffs, + const Real* d2_coeffs, + const Real* barycentric_weights, + int n_axis, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const std::size_t num_qpts = points.size(); + if (num_qpts == 0 || tensor_indices.empty()) { + return; + } + + const bool need_grad = gradients_out != nullptr; + const bool need_hess = hessians_out != nullptr; + const bool values_only = values_out != nullptr && !need_grad && !need_hess; + const bool gradients_only = values_out == nullptr && need_grad && !need_hess; + const bool hessians_only = values_out == nullptr && gradients_out == nullptr && need_hess; + const bool all_outputs = values_out != nullptr && need_grad && need_hess; + const AxisDeriv level = need_hess + ? AxisDeriv::ValuesAndFirstAndSecond + : (need_grad ? AxisDeriv::ValuesAndFirst : AxisDeriv::ValuesOnly); + + if (topology == LagrangeTopology::Line && num_qpts == 4u) { + if (values_only && + try_evaluate_line_values_horner_q4( + points, output_stride, v_coeffs, n_axis, values_out)) { + return; + } + if (gradients_only && + try_evaluate_line_gradients_horner_q4( + points, output_stride, d_coeffs, n_axis, gradients_out)) { + return; + } + if (hessians_only && + try_evaluate_line_hessians_product_q4( + points, output_stride, n_axis, hessians_out)) { + return; + } + if (all_outputs && + try_evaluate_line_all_product_q4( + points, output_stride, n_axis, values_out, gradients_out, hessians_out)) { + return; + } + } + if (topology == LagrangeTopology::Quadrilateral && + values_only && + num_qpts == 4u && + try_evaluate_quad_values_product_q4(points, output_stride, n_axis, values_out)) { + return; + } + if (topology == LagrangeTopology::Quadrilateral && + gradients_only && + num_qpts == 4u && + n_axis == 5) { + evaluate_quad_order4_gradients_q4(points, output_stride, gradients_out); + return; + } + if (topology == LagrangeTopology::Quadrilateral && + gradients_only && + num_qpts == 4u && + n_axis == 9) { + evaluate_quad_order8_gradients_product_q4(points, output_stride, gradients_out); + return; + } + if (topology == LagrangeTopology::Quadrilateral && + (gradients_only || hessians_only || all_outputs) && + num_qpts == 4u && + try_evaluate_quad_derivatives_product_q4( + points, output_stride, n_axis, values_out, gradients_out, hessians_out)) { + return; + } + + auto& scratch = evaluate_scratch(); + AxisBatchScratch& x_batch = scratch.axis_x_batch; + AxisBatchScratch& y_batch = scratch.axis_y_batch; + AxisBatchScratch& z_batch = scratch.axis_z_batch; + + const bool has_y = topology != LagrangeTopology::Line; + const bool has_z = topology == LagrangeTopology::Hexahedron; + const std::size_t axis_stride = static_cast(n_axis); + const bool use_product_axis_batch = + has_z && + gradients_only && + num_qpts == 4u && + n_axis >= 5 && + n_axis <= 9; + auto fill_tensor_axis_batch = [&](AxisBatchScratch& batch, std::size_t component) { + if (use_product_axis_batch && + try_fill_axis_batch_product_q4(batch, points, component, n_axis, level)) { + return; + } + fill_axis_batch(batch, points, component, v_coeffs, d_coeffs, d2_coeffs, + barycentric_weights, n_axis, level); + }; + + fill_tensor_axis_batch(x_batch, 0u); + if (!has_y) { + if (values_only) { + if (num_qpts == 4u) { + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const std::size_t i = tensor_indices[node][0]; + Real* value_row = values_out + node * output_stride; + value_row[0] = x_batch.values[i]; + value_row[1] = x_batch.values[axis_stride + i]; + value_row[2] = x_batch.values[2u * axis_stride + i]; + value_row[3] = x_batch.values[3u * axis_stride + i]; + } + return; + } + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const std::size_t i = tensor_indices[node][0]; + Real* value_row = values_out + node * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + value_row[q] = x_batch.values[q * axis_stride + i]; + } + } + return; + } + + if (gradients_only) { + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const std::size_t i = tensor_indices[node][0]; + Real* grad_row = gradients_out + node * 3u * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + grad_row[0u * output_stride + q] = + x_batch.first[q * axis_stride + i]; + grad_row[1u * output_stride + q] = Real(0); + grad_row[2u * output_stride + q] = Real(0); + } + } + return; + } + + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const std::size_t i = tensor_indices[node][0]; + Real* value_row = values_out ? values_out + node * output_stride : nullptr; + Real* grad_row = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; + Real* hess_row = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t q_axis = q * axis_stride + i; + if (value_row != nullptr) { + value_row[q] = x_batch.values[q_axis]; + } + if (need_grad) { + grad_row[0u * output_stride + q] = x_batch.first[q_axis]; + grad_row[1u * output_stride + q] = Real(0); + grad_row[2u * output_stride + q] = Real(0); + } + if (need_hess) { + hess_row[0u * output_stride + q] = x_batch.second[q_axis]; + hess_row[1u * output_stride + q] = Real(0); + hess_row[2u * output_stride + q] = Real(0); + hess_row[3u * output_stride + q] = Real(0); + hess_row[4u * output_stride + q] = Real(0); + hess_row[5u * output_stride + q] = Real(0); + hess_row[6u * output_stride + q] = Real(0); + hess_row[7u * output_stride + q] = Real(0); + hess_row[8u * output_stride + q] = Real(0); + } + } + } + return; + } + const bool use_tensor_tables = + has_z || + (axis_stride == 8u && !(need_hess && values_out == nullptr && gradients_out == nullptr)); + if (use_tensor_tables) { + fill_tensor_axis_batch(y_batch, 1u); + } else if (has_y) { + fill_tensor_axis_batch(y_batch, 1u); + } + if (has_z) { + fill_tensor_axis_batch(z_batch, 2u); + } + + if (use_tensor_tables) { + const std::size_t ny = axis_stride; + const std::size_t nz = has_z ? axis_stride : 1u; + const std::size_t nyz = ny * nz; + const std::size_t table_count = num_qpts * nyz; + + if (has_z && num_qpts == 4u && output_stride == 4u) { + if (values_only && + evaluate_tensor_product_values_stride4_q4_transposed( + tensor_indices, axis_stride, x_batch, y_batch, z_batch, values_out)) { + return; + } + if (gradients_only && + evaluate_tensor_product_gradients_stride4_q4_transposed( + tensor_indices, axis_stride, x_batch, y_batch, z_batch, gradients_out)) { + return; + } + if (hessians_only && + evaluate_tensor_product_second_stride4_q4_transposed( + tensor_indices, axis_stride, x_batch, y_batch, z_batch, + nullptr, nullptr, hessians_out)) { + return; + } + if (all_outputs && + evaluate_tensor_product_second_stride4_q4_transposed( + tensor_indices, axis_stride, x_batch, y_batch, z_batch, + values_out, gradients_out, hessians_out)) { + return; + } + } + + Real Mvv_stack[kMaxStackYZ]; + Real Mdv_stack[kMaxStackYZ]; + Real Mvd_stack[kMaxStackYZ]; + Real Md2v_stack[kMaxStackYZ]; + Real Mvd2_stack[kMaxStackYZ]; + Real Mdd_stack[kMaxStackYZ]; + + Real* Mvv; + Real* Mdv; + Real* Mvd; + Real* Md2v; + Real* Mvd2; + Real* Mdd; + if (table_count <= kMaxStackYZ) { + Mvv = Mvv_stack; + Mdv = Mdv_stack; + Mvd = Mvd_stack; + Md2v = Md2v_stack; + Mvd2 = Mvd2_stack; + Mdd = Mdd_stack; + } else { + auto& tables = scratch.tensor_tables; + tables.resizeFor(table_count); + Mvv = tables.vv.data(); + Mdv = tables.dv.data(); + Mvd = tables.vd.data(); + Md2v = tables.d2v.data(); + Mvd2 = tables.vd2.data(); + Mdd = tables.dd.data(); + } + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t q_axis = q * axis_stride; + const std::size_t q_table = q * nyz; + for (std::size_t j = 0; j < ny; ++j) { + const Real yv = y_batch.values[q_axis + j]; + const Real yd = (need_grad || need_hess) ? y_batch.first[q_axis + j] : Real(0); + const Real y2 = need_hess ? y_batch.second[q_axis + j] : Real(0); + for (std::size_t k = 0; k < nz; ++k) { + const std::size_t slot = q_table + j * nz + k; + const Real zv = has_z ? z_batch.values[q_axis + k] : Real(1); + Mvv[slot] = yv * zv; + if (need_grad || need_hess) { + const Real zd = has_z ? z_batch.first[q_axis + k] : Real(0); + Mdv[slot] = yd * zv; + Mvd[slot] = yv * zd; + } + if (need_hess) { + const Real zd = has_z ? z_batch.first[q_axis + k] : Real(0); + const Real z2 = has_z ? z_batch.second[q_axis + k] : Real(0); + Md2v[slot] = y2 * zv; + Mvd2[slot] = yv * z2; + Mdd[slot] = yd * zd; + } + } + } + } + + if (values_only) { + if (has_z && num_qpts == 4u) { + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + Real* value_row = values_out + node * output_stride; + + write_tensor_product_value_strided_q<0>( + axis_stride, nyz, i, jk, x_batch, Mvv, value_row); + write_tensor_product_value_strided_q<1>( + axis_stride, nyz, i, jk, x_batch, Mvv, value_row); + write_tensor_product_value_strided_q<2>( + axis_stride, nyz, i, jk, x_batch, Mvv, value_row); + write_tensor_product_value_strided_q<3>( + axis_stride, nyz, i, jk, x_batch, Mvv, value_row); + } + return; + } + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + Real* value_row = values_out + node * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t q_axis = q * axis_stride; + const std::size_t slot = q * nyz + jk; + value_row[q] = x_batch.values[q_axis + i] * Mvv[slot]; + } + } + return; + } + + if (gradients_only) { + if (has_z && num_qpts == 4u) { + if (output_stride == 4u) { + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + Real* grad_row = gradients_out + node * 3u * output_stride; + + write_tensor_product_gradient_stride4_q<0>( + axis_stride, nyz, i, jk, x_batch, Mvv, Mdv, Mvd, grad_row); + write_tensor_product_gradient_stride4_q<1>( + axis_stride, nyz, i, jk, x_batch, Mvv, Mdv, Mvd, grad_row); + write_tensor_product_gradient_stride4_q<2>( + axis_stride, nyz, i, jk, x_batch, Mvv, Mdv, Mvd, grad_row); + write_tensor_product_gradient_stride4_q<3>( + axis_stride, nyz, i, jk, x_batch, Mvv, Mdv, Mvd, grad_row); + } + } else { + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + Real* grad_row = gradients_out + node * 3u * output_stride; + + write_tensor_product_gradient_strided_q<0>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, grad_row); + write_tensor_product_gradient_strided_q<1>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, grad_row); + write_tensor_product_gradient_strided_q<2>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, grad_row); + write_tensor_product_gradient_strided_q<3>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, grad_row); + } + } + return; + } + + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + Real* grad_row = gradients_out + node * 3u * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t q_axis = q * axis_stride; + const std::size_t slot = q * nyz + jk; + const Real xv = x_batch.values[q_axis + i]; + const Real xd = x_batch.first[q_axis + i]; + grad_row[0u * output_stride + q] = xd * Mvv[slot]; + grad_row[1u * output_stride + q] = xv * Mdv[slot]; + grad_row[2u * output_stride + q] = xv * Mvd[slot]; + } + } + return; + } + + if (has_z && num_qpts == 4u && hessians_only) { + if (output_stride == 4u) { + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + Real* hess_row = hessians_out + node * 9u * output_stride; + + write_tensor_product_hessian_stride4_q<0>( + axis_stride, nyz, i, jk, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); + write_tensor_product_hessian_stride4_q<1>( + axis_stride, nyz, i, jk, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); + write_tensor_product_hessian_stride4_q<2>( + axis_stride, nyz, i, jk, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); + write_tensor_product_hessian_stride4_q<3>( + axis_stride, nyz, i, jk, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); + } + } else { + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + Real* hess_row = hessians_out + node * 9u * output_stride; + + write_tensor_product_hessian_strided_q<0>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); + write_tensor_product_hessian_strided_q<1>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); + write_tensor_product_hessian_strided_q<2>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); + write_tensor_product_hessian_strided_q<3>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); + } + } + return; + } + + if (has_z && num_qpts == 4u && all_outputs) { + if (output_stride == 4u) { + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + Real* value_row = values_out + node * output_stride; + Real* grad_row = gradients_out + node * 3u * output_stride; + Real* hess_row = hessians_out + node * 9u * output_stride; + + write_tensor_product_all_stride4_q<0>( + axis_stride, nyz, i, jk, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); + write_tensor_product_all_stride4_q<1>( + axis_stride, nyz, i, jk, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); + write_tensor_product_all_stride4_q<2>( + axis_stride, nyz, i, jk, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); + write_tensor_product_all_stride4_q<3>( + axis_stride, nyz, i, jk, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); + } + } else { + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + Real* value_row = values_out + node * output_stride; + Real* grad_row = gradients_out + node * 3u * output_stride; + Real* hess_row = hessians_out + node * 9u * output_stride; + + write_tensor_product_all_strided_q<0>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); + write_tensor_product_all_strided_q<1>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); + write_tensor_product_all_strided_q<2>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); + write_tensor_product_all_strided_q<3>( + axis_stride, nyz, i, jk, output_stride, x_batch, + Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); + } + } + return; + } + + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t jk = idx[1] * nz + idx[2]; + + Real* value_row = values_out ? values_out + node * output_stride : nullptr; + Real* grad_row = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; + Real* hess_row = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t q_axis = q * axis_stride; + const std::size_t slot = q * nyz + jk; + const Real xv = x_batch.values[q_axis + i]; + + if (value_row != nullptr) { + value_row[q] = xv * Mvv[slot]; + } + + if (need_grad) { + const Real xd = x_batch.first[q_axis + i]; + grad_row[0u * output_stride + q] = xd * Mvv[slot]; + grad_row[1u * output_stride + q] = xv * Mdv[slot]; + grad_row[2u * output_stride + q] = xv * Mvd[slot]; + } + + if (need_hess) { + const Real xd = x_batch.first[q_axis + i]; + const Real x2 = x_batch.second[q_axis + i]; + const Real hxy = xd * Mdv[slot]; + const Real hxz = xd * Mvd[slot]; + const Real hyz = xv * Mdd[slot]; + hess_row[0u * output_stride + q] = x2 * Mvv[slot]; + hess_row[4u * output_stride + q] = xv * Md2v[slot]; + hess_row[8u * output_stride + q] = xv * Mvd2[slot]; + hess_row[1u * output_stride + q] = hxy; + hess_row[3u * output_stride + q] = hxy; + hess_row[2u * output_stride + q] = hxz; + hess_row[6u * output_stride + q] = hxz; + hess_row[5u * output_stride + q] = hyz; + hess_row[7u * output_stride + q] = hyz; + } + } + } + return; + } + + for (std::size_t node = 0; node < tensor_indices.size(); ++node) { + const auto& idx = tensor_indices[node]; + const std::size_t i = idx[0]; + const std::size_t j = idx[1]; + + Real* value_row = values_out ? values_out + node * output_stride : nullptr; + Real* grad_row = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; + Real* hess_row = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t q_axis = q * axis_stride; + const Real xv = x_batch.values[q_axis + i]; + const Real yv = y_batch.values[q_axis + j]; + + if (value_row != nullptr) { + value_row[q] = xv * yv; + } + + if (need_grad) { + const Real xd = x_batch.first[q_axis + i]; + const Real yd = y_batch.first[q_axis + j]; + grad_row[0u * output_stride + q] = xd * yv; + grad_row[1u * output_stride + q] = xv * yd; + grad_row[2u * output_stride + q] = Real(0); + } + + if (need_hess) { + const Real xd = x_batch.first[q_axis + i]; + const Real yd = y_batch.first[q_axis + j]; + const Real x2 = x_batch.second[q_axis + i]; + const Real y2 = y_batch.second[q_axis + j]; + const Real hxy = xd * yd; + + hess_row[0u * output_stride + q] = x2 * yv; + hess_row[4u * output_stride + q] = xv * y2; + hess_row[8u * output_stride + q] = Real(0); + hess_row[1u * output_stride + q] = hxy; + hess_row[3u * output_stride + q] = hxy; + hess_row[2u * output_stride + q] = Real(0); + hess_row[6u * output_stride + q] = Real(0); + hess_row[5u * output_stride + q] = Real(0); + hess_row[7u * output_stride + q] = Real(0); + } + } + } +} + +void evaluate_wedge_points_strided( + const std::vector>& simplex_exponents, + const std::vector>& wedge_indices, + const std::vector& wedge_node_by_tri_z, + int order, + const std::vector>& points, + std::size_t output_stride, + const Real* v_coeffs, + const Real* d_coeffs, + const Real* d2_coeffs, + const Real* barycentric_weights, + int n_axis, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + if (points.empty() || wedge_indices.empty()) { + return; + } + + const bool want_values = values_out != nullptr; + const bool need_grad = gradients_out != nullptr; + const bool need_hess = hessians_out != nullptr; + const bool values_only = want_values && !need_grad && !need_hess; + const bool gradients_only = !want_values && need_grad && !need_hess; + const bool hessians_only = !want_values && !need_grad && need_hess; + const bool all_outputs = want_values && need_grad && need_hess; + const bool use_batched_wedge = + (values_only && order <= 3) || + (gradients_only && order >= 2) || + (hessians_only && order >= 3) || + (all_outputs && order >= 3); + if (values_only && + order >= 4 && + order <= 8 && + try_evaluate_wedge_values_product_q4( + simplex_exponents, wedge_indices, order, points, output_stride, values_out)) { + return; + } + const AxisDeriv level = need_hess + ? AxisDeriv::ValuesAndFirstAndSecond + : (need_grad ? AxisDeriv::ValuesAndFirst : AxisDeriv::ValuesOnly); + + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const std::size_t tri_count = simplex_exponents.size(); + if (use_batched_wedge) { + const std::size_t num_qpts = points.size(); + const std::size_t tri_stride = num_qpts; + if (num_qpts == 4u && + output_stride == 4u && + (gradients_only || hessians_only || all_outputs) && + order >= 3 && + order <= 8 && + wedge_node_by_tri_z.size() == tri_count * static_cast(n_axis)) { + const bool use_product_axis_batch = + gradients_only && + n_axis >= 5 && + n_axis <= 9; + if (!use_product_axis_batch || + !try_fill_axis_batch_product_q4( + scratch.axis_z_batch, points, 2u, n_axis, level)) { + fill_axis_batch(scratch.axis_z_batch, + points, + 2u, + v_coeffs, + d_coeffs, + d2_coeffs, + barycentric_weights, + n_axis, + level); + } + if (need_hess) { + if (try_evaluate_wedge_fused_stride4_q4( + simplex_exponents, wedge_node_by_tri_z, order, points, + scratch.axis_z_batch, n_axis, values_out, gradients_out, hessians_out)) { + return; + } + } else if (try_evaluate_wedge_fused_stride4_q4( + simplex_exponents, wedge_node_by_tri_z, order, points, + scratch.axis_z_batch, n_axis, values_out, gradients_out, hessians_out)) { + return; + } + } + + const std::size_t tri_values_size = tri_count * tri_stride; + scratch.wedge_tri_values_batch.resize(tri_values_size); + if (need_grad || need_hess) { + scratch.wedge_tri_gradient_batch.resize(tri_count * 2u * tri_stride); + } + if (need_hess) { + scratch.wedge_tri_hessian_batch.resize(tri_count * 3u * tri_stride); + } + + detail::evaluate_triangle_simplex_basis_wedge_components_strided( + simplex_exponents, + order, + points, + tri_stride, + scratch.wedge_tri_values_batch.data(), + (need_grad || need_hess) ? scratch.wedge_tri_gradient_batch.data() : nullptr, + need_hess ? scratch.wedge_tri_hessian_batch.data() : nullptr); + + const bool use_product_axis_batch = + gradients_only && + points.size() == 4u && + n_axis >= 5 && + n_axis <= 9; + if (!use_product_axis_batch || + !try_fill_axis_batch_product_q4( + scratch.axis_z_batch, points, 2u, n_axis, level)) { + fill_axis_batch(scratch.axis_z_batch, + points, + 2u, + v_coeffs, + d_coeffs, + d2_coeffs, + barycentric_weights, + n_axis, + level); + } + + const std::size_t axis_stride = static_cast(n_axis); + if (all_outputs) { + if (num_qpts == 4u) { + if (output_stride == 4u) { + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* value_row = values_out + node * output_stride; + Real* g = gradients_out + node * 3u * output_stride; + Real* H = hessians_out + node * 9u * output_stride; + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; + const Real* tri_values = scratch.wedge_tri_values_batch.data(); + + write_wedge_all_stride4_q<0>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); + write_wedge_all_stride4_q<1>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); + write_wedge_all_stride4_q<2>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); + write_wedge_all_stride4_q<3>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); + } + } else { + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* value_row = values_out + node * output_stride; + Real* g = gradients_out + node * 3u * output_stride; + Real* H = hessians_out + node * 9u * output_stride; + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; + const Real* tri_values = scratch.wedge_tri_values_batch.data(); + + write_wedge_all_strided_q<0>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); + write_wedge_all_strided_q<1>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); + write_wedge_all_strided_q<2>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); + write_wedge_all_strided_q<3>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); + } + } + return; + } + + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* value_row = values_out + node * output_stride; + Real* g = gradients_out + node * 3u * output_stride; + Real* H = hessians_out + node * 9u * output_stride; + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t tri_q = tri * tri_stride + q; + const std::size_t z_q = q * axis_stride + z; + const Real tri_v = scratch.wedge_tri_values_batch[tri_q]; + const Real zv = scratch.axis_z_batch.values[z_q]; + const Real zd = scratch.axis_z_batch.first[z_q]; + const Real tri_gx = tri_g[0u * tri_stride + q]; + const Real tri_gy = tri_g[1u * tri_stride + q]; + const Real tri_hxx = tri_H[0u * tri_stride + q]; + const Real tri_hxy = tri_H[1u * tri_stride + q]; + const Real tri_hyy = tri_H[2u * tri_stride + q]; + const Real hxz = tri_gx * zd; + const Real hxy = tri_hxy * zv; + const Real hyz = tri_gy * zd; + + value_row[q] = tri_v * zv; + g[0u * output_stride + q] = tri_gx * zv; + g[1u * output_stride + q] = tri_gy * zv; + g[2u * output_stride + q] = tri_v * zd; + H[0u * output_stride + q] = tri_hxx * zv; + H[1u * output_stride + q] = hxy; + H[2u * output_stride + q] = hxz; + H[3u * output_stride + q] = hxy; + H[4u * output_stride + q] = tri_hyy * zv; + H[5u * output_stride + q] = hyz; + H[6u * output_stride + q] = hxz; + H[7u * output_stride + q] = hyz; + H[8u * output_stride + q] = tri_v * scratch.axis_z_batch.second[z_q]; + } + } + return; + } + + if (hessians_only) { + if (num_qpts == 4u) { + if (output_stride == 4u) { + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* H = hessians_out + node * 9u * output_stride; + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; + const Real* tri_values = scratch.wedge_tri_values_batch.data(); + + write_wedge_hessian_stride4_q<0>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, tri_H, scratch.axis_z_batch, H); + write_wedge_hessian_stride4_q<1>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, tri_H, scratch.axis_z_batch, H); + write_wedge_hessian_stride4_q<2>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, tri_H, scratch.axis_z_batch, H); + write_wedge_hessian_stride4_q<3>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, tri_H, scratch.axis_z_batch, H); + } + } else { + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* H = hessians_out + node * 9u * output_stride; + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; + const Real* tri_values = scratch.wedge_tri_values_batch.data(); + + write_wedge_hessian_strided_q<0>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, tri_H, scratch.axis_z_batch, H); + write_wedge_hessian_strided_q<1>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, tri_H, scratch.axis_z_batch, H); + write_wedge_hessian_strided_q<2>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, tri_H, scratch.axis_z_batch, H); + write_wedge_hessian_strided_q<3>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, tri_H, scratch.axis_z_batch, H); + } + } + return; + } + + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* H = hessians_out + node * 9u * output_stride; + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t tri_q = tri * tri_stride + q; + const std::size_t z_q = q * axis_stride + z; + const Real tri_v = scratch.wedge_tri_values_batch[tri_q]; + const Real zv = scratch.axis_z_batch.values[z_q]; + const Real zd = scratch.axis_z_batch.first[z_q]; + const Real tri_gx = tri_g[0u * tri_stride + q]; + const Real tri_gy = tri_g[1u * tri_stride + q]; + const Real tri_hxx = tri_H[0u * tri_stride + q]; + const Real tri_hxy = tri_H[1u * tri_stride + q]; + const Real tri_hyy = tri_H[2u * tri_stride + q]; + const Real hxz = tri_gx * zd; + const Real hxy = tri_hxy * zv; + const Real hyz = tri_gy * zd; + + H[0u * output_stride + q] = tri_hxx * zv; + H[1u * output_stride + q] = hxy; + H[2u * output_stride + q] = hxz; + H[3u * output_stride + q] = hxy; + H[4u * output_stride + q] = tri_hyy * zv; + H[5u * output_stride + q] = hyz; + H[6u * output_stride + q] = hxz; + H[7u * output_stride + q] = hyz; + H[8u * output_stride + q] = tri_v * scratch.axis_z_batch.second[z_q]; + } + } + return; + } + + if (gradients_only) { + if (num_qpts == 4u) { + if (output_stride == 4u) { + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* g = gradients_out + node * 3u * output_stride; + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + const Real* tri_values = scratch.wedge_tri_values_batch.data(); + + write_wedge_gradient_stride4_q<0>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, scratch.axis_z_batch, g); + write_wedge_gradient_stride4_q<1>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, scratch.axis_z_batch, g); + write_wedge_gradient_stride4_q<2>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, scratch.axis_z_batch, g); + write_wedge_gradient_stride4_q<3>( + tri_stride, axis_stride, tri, z, + tri_values, tri_g, scratch.axis_z_batch, g); + } + } else { + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* g = gradients_out + node * 3u * output_stride; + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + const Real* tri_values = scratch.wedge_tri_values_batch.data(); + + write_wedge_gradient_strided_q<0>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, scratch.axis_z_batch, g); + write_wedge_gradient_strided_q<1>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, scratch.axis_z_batch, g); + write_wedge_gradient_strided_q<2>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, scratch.axis_z_batch, g); + write_wedge_gradient_strided_q<3>( + tri_stride, axis_stride, tri, z, output_stride, + tri_values, tri_g, scratch.axis_z_batch, g); + } + } + return; + } + + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* g = gradients_out + node * 3u * output_stride; + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t tri_q = tri * tri_stride + q; + const std::size_t z_q = q * axis_stride + z; + const Real tri_v = scratch.wedge_tri_values_batch[tri_q]; + const Real zv = scratch.axis_z_batch.values[z_q]; + g[0u * output_stride + q] = tri_g[0u * tri_stride + q] * zv; + g[1u * output_stride + q] = tri_g[1u * tri_stride + q] * zv; + g[2u * output_stride + q] = tri_v * scratch.axis_z_batch.first[z_q]; + } + } + return; + } + + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + Real* value_row = values_out ? values_out + node * output_stride : nullptr; + Real* g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; + Real* H = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t tri_q = tri * tri_stride + q; + const std::size_t z_q = q * axis_stride + z; + const Real tri_v = scratch.wedge_tri_values_batch[tri_q]; + const Real zv = scratch.axis_z_batch.values[z_q]; + if (values_out != nullptr) { + value_row[q] = tri_v * zv; + } + + if (need_grad) { + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + g[0u * output_stride + q] = tri_g[0u * tri_stride + q] * zv; + g[1u * output_stride + q] = tri_g[1u * tri_stride + q] * zv; + g[2u * output_stride + q] = tri_v * scratch.axis_z_batch.first[z_q]; + } + + if (need_hess) { + const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; + const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; + const Real zd = scratch.axis_z_batch.first[z_q]; + const Real hxz = tri_g[0u * tri_stride + q] * zd; + const Real hxy = tri_H[1u * tri_stride + q] * zv; + const Real hyz = tri_g[1u * tri_stride + q] * zd; + H[0u * output_stride + q] = tri_H[0u * tri_stride + q] * zv; + H[1u * output_stride + q] = hxy; + H[2u * output_stride + q] = hxz; + H[3u * output_stride + q] = hxy; + H[4u * output_stride + q] = tri_H[2u * tri_stride + q] * zv; + H[5u * output_stride + q] = hyz; + H[6u * output_stride + q] = hxz; + H[7u * output_stride + q] = hyz; + H[8u * output_stride + q] = tri_v * scratch.axis_z_batch.second[z_q]; + } + } + } + + return; + } + + scratch.tri_values.resize(tri_count); + if (need_grad || need_hess) { + scratch.tri_gradient_components.resize(tri_count * 3u); + } + if (need_hess) { + scratch.tri_hessian_components.resize(tri_count * 9u); + } + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const AxisBasisEvaluations z_axis = + fill_axis_scratch(scratch.axis_z, + v_coeffs, + d_coeffs, + d2_coeffs, + barycentric_weights, + n_axis, + xi[2], + level); + detail::evaluate_triangle_simplex_basis_to( + simplex_exponents, + order, + xi, + scratch.tri_values.data(), + (need_grad || need_hess) ? scratch.tri_gradient_components.data() : nullptr, + need_hess ? scratch.tri_hessian_components.data() : nullptr); + + for (std::size_t node = 0; node < wedge_indices.size(); ++node) { + const auto& index = wedge_indices[node]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + const Real tri_v = scratch.tri_values[tri]; + const Real zv = z_axis.values[z]; + + if (values_out != nullptr) { + values_out[node * output_stride + q] = tri_v * zv; + } + + if (need_grad) { + const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; + Real* g = gradients_out + node * 3u * output_stride; + g[0u * output_stride + q] = tri_g[0] * zv; + g[1u * output_stride + q] = tri_g[1] * zv; + g[2u * output_stride + q] = tri_v * z_axis.first[z]; + } + + if (need_hess) { + const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; + const Real* tri_H = scratch.tri_hessian_components.data() + tri * 9u; + const Real zd = z_axis.first[z]; + const Real hxz = tri_g[0] * zd; + const Real hxy = tri_H[1] * zv; + const Real hyz = tri_g[1] * zd; + Real* H = hessians_out + node * 9u * output_stride; + H[0u * output_stride + q] = tri_H[0] * zv; + H[1u * output_stride + q] = hxy; + H[2u * output_stride + q] = hxz; + H[3u * output_stride + q] = hxy; + H[4u * output_stride + q] = tri_H[4] * zv; + H[5u * output_stride + q] = hyz; + H[6u * output_stride + q] = hxz; + H[7u * output_stride + q] = hyz; + H[8u * output_stride + q] = tri_v * z_axis.second[z]; + } + } + } +} + +NormalizedLagrangeRequest normalize_lagrange_request(ElementType element_type, int order) { + switch (element_type) { + case ElementType::Line3: + return {ElementType::Line2, std::max(order, 2)}; + case ElementType::Triangle6: + return {ElementType::Triangle3, std::max(order, 2)}; + case ElementType::Quad9: + return {ElementType::Quad4, std::max(order, 2)}; + case ElementType::Quad8: + throw BasisElementCompatibilityException( + "Quad8 is a serendipity element; use SerendipityBasis for Quad8", + __FILE__, __LINE__, __func__); + case ElementType::Tetra10: + return {ElementType::Tetra4, std::max(order, 2)}; + case ElementType::Hex27: + return {ElementType::Hex8, std::max(order, 2)}; + case ElementType::Hex20: + throw BasisElementCompatibilityException( + "Hex20 is a serendipity element; use SerendipityBasis for Hex20", + __FILE__, __LINE__, __func__); + case ElementType::Wedge18: + return {ElementType::Wedge6, std::max(order, 2)}; + case ElementType::Wedge15: + throw BasisElementCompatibilityException( + "Wedge15 is a serendipity element; use SerendipityBasis for Wedge15", + __FILE__, __LINE__, __func__); + case ElementType::Pyramid13: + throw BasisElementCompatibilityException( + "Pyramid13 is a serendipity variant; use SerendipityBasis (Pyramid13) or the complete-family Lagrange path via LagrangeBasis (Pyramid5, order >= 2)", + __FILE__, __LINE__, __func__); + case ElementType::Pyramid14: + return {ElementType::Pyramid5, std::max(order, 2)}; + default: + return {element_type, order}; + } +} + +} // namespace + +void prewarm_lagrange_basis_scratch(int max_order, std::size_t max_qpts) { + evaluate_scratch().prewarm(max_order, max_qpts); +} + +LagrangeBasis::LagrangeBasis(ElementType type, int order) + : element_type_(type), dimension_(0), order_(order) { + const NormalizedLagrangeRequest normalized = normalize_lagrange_request(element_type_, order_); + element_type_ = normalized.element_type; + order_ = normalized.order; + + if (order_ < 0) { + throw BasisConfigurationException("LagrangeBasis requires non-negative polynomial order", + __FILE__, __LINE__, __func__); + } + + dimension_ = lagrange_topology_traits(element_type_).dimension; + + init_nodes(); + init_evaluation_dispatch(); +} + +void LagrangeBasis::init_nodes() { + nodes_.clear(); + nodes_1d_.clear(); + tensor_indices_.clear(); + simplex_exponents_.clear(); + wedge_indices_.clear(); + wedge_node_by_tri_z_.clear(); + axis_v_coeffs_.clear(); + axis_d_coeffs_.clear(); + axis_d2_coeffs_.clear(); + axis_barycentric_weights_.clear(); + const auto topology = lagrange_topology_traits(element_type_).topology; + topology_id_ = static_cast(topology); + switch (topology) { + case LagrangeTopology::Point: + build_point_nodes(); + return; + case LagrangeTopology::Line: + build_tensor_product_nodes(1); + compute_axis_monomial_coefficients(); + return; + case LagrangeTopology::Quadrilateral: + build_tensor_product_nodes(2); + compute_axis_monomial_coefficients(); + return; + case LagrangeTopology::Hexahedron: + build_tensor_product_nodes(3); + compute_axis_monomial_coefficients(); + return; + case LagrangeTopology::Triangle: + case LagrangeTopology::Tetrahedron: + build_simplex_nodes(); + return; + case LagrangeTopology::Wedge: + build_wedge_nodes(); + compute_axis_monomial_coefficients(); + return; + case LagrangeTopology::Pyramid: + build_pyramid_nodes(); + return; + case LagrangeTopology::Unknown: + break; + } + + throw BasisElementCompatibilityException("Unsupported element type in LagrangeBasis::init_nodes", + __FILE__, __LINE__, __func__); +} + +void LagrangeBasis::init_evaluation_dispatch() { + const auto topology = static_cast(topology_id_); + switch (topology) { + case LagrangeTopology::Point: + vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_point_vectors; + return; + case LagrangeTopology::Line: + case LagrangeTopology::Quadrilateral: + case LagrangeTopology::Hexahedron: + vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_tensor_product_vectors; + return; + case LagrangeTopology::Triangle: + vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_triangle_vectors; + return; + case LagrangeTopology::Tetrahedron: + vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_tetrahedron_vectors; + return; + case LagrangeTopology::Wedge: + vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_wedge_vectors; + return; + case LagrangeTopology::Pyramid: + vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_pyramid_vectors; + return; + case LagrangeTopology::Unknown: + break; + } + vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_unsupported_vectors; +} + +void LagrangeBasis::compute_axis_monomial_coefficients() { + const int N = static_cast(nodes_1d_.size()); + if (N == 0) return; + + axis_barycentric_weights_.resize(static_cast(N)); + fill_equispaced_barycentric_weights(N, axis_barycentric_weights_.data()); + + if (assign_precomputed_axis_coefficients(N, axis_v_coeffs_, axis_d_coeffs_, axis_d2_coeffs_)) { + return; + } + + axis_v_coeffs_.assign(static_cast(N) * static_cast(N), Real(0)); + if (N >= 2) { + axis_d_coeffs_.assign(static_cast(N) * static_cast(N - 1), Real(0)); + } + if (N >= 3) { + axis_d2_coeffs_.assign(static_cast(N) * static_cast(N - 2), Real(0)); + } + + if (N == 1) { + axis_v_coeffs_[0] = Real(1); + return; + } + + // For each L_i, compute monomial coefficients of P_i(x) = prod_{j != i} (x - x_j), + // then divide by w_i = prod_{j != i} (x_i - x_j). + std::vector coeffs; + coeffs.reserve(static_cast(N)); + for (int i = 0; i < N; ++i) { + coeffs.assign(1, Real(1)); // start with constant polynomial 1 + for (int j = 0; j < N; ++j) { + if (j == i) continue; + // Multiply (x - x_j) into coeffs (in-place via temp). + std::vector next(coeffs.size() + 1, Real(0)); + for (std::size_t k = 0; k < coeffs.size(); ++k) { + next[k] -= nodes_1d_[static_cast(j)] * coeffs[k]; + next[k + 1] += coeffs[k]; + } + coeffs.swap(next); + } + // Divide by w_i. + Real denom = Real(1); + for (int j = 0; j < N; ++j) { + if (j == i) continue; + denom *= (nodes_1d_[static_cast(i)] - nodes_1d_[static_cast(j)]); + } + const Real inv_denom = Real(1) / denom; + for (int k = 0; k < N; ++k) { + axis_v_coeffs_[static_cast(i) * static_cast(N) + static_cast(k)] + = coeffs[static_cast(k)] * inv_denom; + } + + // First derivative coefficients: d/dx (sum_k c_ik * x^k) = sum_{k>=1} k*c_ik * x^(k-1). + if (N >= 2) { + for (int k = 1; k < N; ++k) { + axis_d_coeffs_[static_cast(i) * static_cast(N - 1) + + static_cast(k - 1)] + = static_cast(k) + * axis_v_coeffs_[static_cast(i) * static_cast(N) + + static_cast(k)]; + } + } + + // Second derivative coefficients: d^2/dx^2 = sum_{k>=2} k*(k-1)*c_ik * x^(k-2). + if (N >= 3) { + for (int k = 2; k < N; ++k) { + axis_d2_coeffs_[static_cast(i) * static_cast(N - 2) + + static_cast(k - 2)] + = static_cast(k * (k - 1)) + * axis_v_coeffs_[static_cast(i) * static_cast(N) + + static_cast(k)]; + } + } + } +} + +void LagrangeBasis::build_point_nodes() { + nodes_.push_back(math::Vector{Real(0), Real(0), Real(0)}); +} + +void LagrangeBasis::init_equispaced_1d_nodes() { + nodes_1d_.clear(); + for (int i = 0; i <= std::max(order_, 0); ++i) { + nodes_1d_.push_back(detail::equispaced_pm_one_coord(i, order_)); + } +} + +void LagrangeBasis::build_tensor_product_nodes(int dimensions) { + init_equispaced_1d_nodes(); + + if (dimensions < 1 || dimensions > 3) { + throw BasisConfigurationException("LagrangeBasis::build_tensor_product_nodes requires dimension 1, 2, or 3", + __FILE__, __LINE__, __func__); + } + + nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); + tensor_indices_.resize(nodes_.size(), TensorNodeIndex{0u, 0u, 0u}); + for (std::size_t n = 0; n < nodes_.size(); ++n) { + tensor_indices_[n][0] = lattice_index_pm_one( + nodes_[n][0], order_, + "LagrangeBasis: invalid tensor-product x-coordinate in public node ordering"); + if (dimensions >= 2) { + tensor_indices_[n][1] = lattice_index_pm_one( + nodes_[n][1], order_, + "LagrangeBasis: invalid tensor-product y-coordinate in public node ordering"); + } + if (dimensions == 3) { + tensor_indices_[n][2] = lattice_index_pm_one( + nodes_[n][2], order_, + "LagrangeBasis: invalid tensor-product z-coordinate in public node ordering"); + } + } +} + +void LagrangeBasis::build_simplex_nodes() { + nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); + const auto topology = static_cast(topology_id_); + simplex_exponents_.clear(); + simplex_exponents_.reserve(nodes_.size()); + for (const auto& node : nodes_) { + switch (topology) { + case LagrangeTopology::Triangle: + simplex_exponents_.push_back(triangle_exponents_from_public_node(node, order_)); + break; + case LagrangeTopology::Tetrahedron: + simplex_exponents_.push_back(tetrahedron_exponents_from_public_node(node, order_)); + break; + default: + throw BasisElementCompatibilityException("LagrangeBasis::build_simplex_nodes requires simplex topology", + __FILE__, __LINE__, __func__); + } + } +} + +void LagrangeBasis::build_wedge_nodes() { + init_equispaced_1d_nodes(); + const auto triangle_nodes = ReferenceNodeLayout::get_lagrange_node_coords(ElementType::Triangle3, order_); + simplex_exponents_.clear(); + simplex_exponents_.reserve(triangle_nodes.size()); + std::unordered_map, std::size_t, SimplexExponentHash> triangle_index_by_exponent; + triangle_index_by_exponent.reserve(triangle_nodes.size()); + for (std::size_t tri = 0; tri < triangle_nodes.size(); ++tri) { + const auto exponents = triangle_exponents_from_public_node(triangle_nodes[tri], order_); + simplex_exponents_.push_back(exponents); + const auto inserted = triangle_index_by_exponent.emplace(exponents, tri); + if (!inserted.second) { + throw BasisNodeOrderingException("LagrangeBasis: duplicate wedge triangle descriptor", + __FILE__, __LINE__, __func__); + } + } + + nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); + wedge_indices_.clear(); + wedge_indices_.reserve(nodes_.size()); + const std::size_t z_count = static_cast(order_ + 1); + const std::size_t missing_node = nodes_.size(); + wedge_node_by_tri_z_.assign(triangle_nodes.size() * z_count, missing_node); + for (std::size_t node_index = 0; node_index < nodes_.size(); ++node_index) { + const auto& node = nodes_[node_index]; + const auto exponents = triangle_exponents_from_public_node(node, order_); + const auto found = triangle_index_by_exponent.find(exponents); + if (found == triangle_index_by_exponent.end()) { + throw BasisNodeOrderingException("LagrangeBasis: failed to resolve wedge triangle descriptor in public ordering", + __FILE__, __LINE__, __func__); + } + const std::size_t tri = found->second; + const std::size_t z = + lattice_index_pm_one(node[2], order_, + "LagrangeBasis: invalid wedge z-coordinate in public node ordering"); + wedge_indices_.push_back(WedgeNodeIndex{tri, z}); + wedge_node_by_tri_z_[tri * z_count + z] = node_index; + } + for (std::size_t entry = 0; entry < wedge_node_by_tri_z_.size(); ++entry) { + if (wedge_node_by_tri_z_[entry] == missing_node) { + throw BasisNodeOrderingException("LagrangeBasis: incomplete wedge tensor-product node map", + __FILE__, __LINE__, __func__); + } + } +} + +void LagrangeBasis::build_pyramid_nodes() { + nodes_ = detail::lagrange_pyramid::nodes(order_); +} + +void LagrangeBasis::evaluate_point_vectors(const math::Vector&, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const { + if (values != nullptr) { + values->resize(1u); + (*values)[0] = Real(1); + } + if (gradients != nullptr) { + gradients->resize(1u); + (*gradients)[0] = Gradient{}; + } + if (hessians != nullptr) { + hessians->resize(1u); + (*hessians)[0] = Hessian{}; + } +} + +void LagrangeBasis::evaluate_tensor_product_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const { + const auto topology = static_cast(topology_id_); + if (evaluate_fixed_lagrange_fast(topology, order_, xi, values, gradients, hessians)) { + return; + } + + const int n_axis = static_cast(nodes_1d_.size()); + const Real* vc = axis_v_coeffs_.data(); + const Real* dc = axis_d_coeffs_.data(); + const Real* d2c = axis_d2_coeffs_.data(); + const Real* bw = axis_barycentric_weights_.data(); + const AxisDeriv level = hessians != nullptr ? AxisDeriv::ValuesAndFirstAndSecond + : gradients != nullptr ? AxisDeriv::ValuesAndFirst + : AxisDeriv::ValuesOnly; + + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations x_axis = + fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], level); + AxisBasisEvaluations y_axis = constant_axis_basis(); + AxisBasisEvaluations z_axis = constant_axis_basis(); + + if (topology != LagrangeTopology::Line) { + y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], level); + } + if (topology == LagrangeTopology::Hexahedron) { + z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], level); + } + + evaluate_tensor_product_factorized(tensor_indices_, x_axis, y_axis, z_axis, + values, gradients, hessians); +} + +void LagrangeBasis::evaluate_triangle_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const { + const auto topology = static_cast(topology_id_); + if (evaluate_fixed_lagrange_fast(topology, order_, xi, values, gradients, hessians)) { + return; + } + detail::evaluate_triangle_simplex_basis(simplex_exponents_, order_, xi, + values, gradients, hessians); +} + +void LagrangeBasis::evaluate_tetrahedron_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const { + const auto topology = static_cast(topology_id_); + if (evaluate_fixed_lagrange_fast(topology, order_, xi, values, gradients, hessians)) { + return; + } + detail::evaluate_tetrahedron_simplex_basis(simplex_exponents_, order_, xi, + values, gradients, hessians); +} + +void LagrangeBasis::evaluate_wedge_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const { + const int n_axis = static_cast(nodes_1d_.size()); + const Real* vc = axis_v_coeffs_.data(); + const Real* dc = axis_d_coeffs_.data(); + const Real* d2c = axis_d2_coeffs_.data(); + const Real* bw = axis_barycentric_weights_.data(); + const AxisDeriv level = hessians != nullptr ? AxisDeriv::ValuesAndFirstAndSecond + : gradients != nullptr ? AxisDeriv::ValuesAndFirst + : AxisDeriv::ValuesOnly; + + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations z_axis = + fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], level); + + if (hessians != nullptr) { + detail::evaluate_triangle_simplex_basis( + simplex_exponents_, order_, xi, + &scratch.tri_values, &scratch.tri_gradients, &scratch.tri_hessians); + } else if (gradients != nullptr) { + detail::evaluate_triangle_simplex_basis( + simplex_exponents_, order_, xi, + &scratch.tri_values, &scratch.tri_gradients, nullptr); + } else { + detail::evaluate_triangle_simplex_basis( + simplex_exponents_, order_, xi, + &scratch.tri_values, nullptr, nullptr); + } + + const std::size_t n_nodes = wedge_indices_.size(); + if (values != nullptr) { + values->resize(n_nodes); + } + if (gradients != nullptr) { + gradients->resize(n_nodes); + } + if (hessians != nullptr) { + hessians->resize(n_nodes); + } + + for (std::size_t n = 0; n < n_nodes; ++n) { + const auto& index = wedge_indices_[n]; + const std::size_t tri_idx = index[0]; + const std::size_t z_idx = index[1]; + const Real zv = z_axis.values[z_idx]; + const Real tri_v = scratch.tri_values[tri_idx]; + + if (values != nullptr) { + (*values)[n] = tri_v * zv; + } + if (gradients != nullptr) { + const Real zd = z_axis.first[z_idx]; + (*gradients)[n][0] = scratch.tri_gradients[tri_idx][0] * zv; + (*gradients)[n][1] = scratch.tri_gradients[tri_idx][1] * zv; + (*gradients)[n][2] = tri_v * zd; + } + if (hessians != nullptr) { + const Real zd = z_axis.first[z_idx]; + const Real zd2 = z_axis.second[z_idx]; + Hessian H{}; + H(0, 0) = scratch.tri_hessians[tri_idx](0, 0) * zv; + H(1, 1) = scratch.tri_hessians[tri_idx](1, 1) * zv; + H(0, 1) = scratch.tri_hessians[tri_idx](0, 1) * zv; + H(1, 0) = H(0, 1); + H(2, 2) = tri_v * zd2; + H(0, 2) = scratch.tri_gradients[tri_idx][0] * zd; + H(2, 0) = H(0, 2); + H(1, 2) = scratch.tri_gradients[tri_idx][1] * zd; + H(2, 1) = H(1, 2); + (*hessians)[n] = H; + } + } +} + +void LagrangeBasis::evaluate_pyramid_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const { + if (values != nullptr && gradients != nullptr && hessians != nullptr) { + detail::lagrange_pyramid::evaluate_all(order_, xi, *values, *gradients, *hessians); + return; + } + if (values != nullptr) { + detail::lagrange_pyramid::evaluate_values(order_, xi, *values); + } + if (gradients != nullptr) { + detail::lagrange_pyramid::evaluate_gradients(order_, xi, *gradients); + } + if (hessians != nullptr) { + detail::lagrange_pyramid::evaluate_hessians(order_, xi, *hessians); + } +} + +void LagrangeBasis::evaluate_unsupported_vectors(const math::Vector&, + std::vector*, + std::vector*, + std::vector*) const { + throw BasisEvaluationException("Unsupported element in LagrangeBasis vector evaluation", + __FILE__, __LINE__, __func__); +} + +void LagrangeBasis::evaluate_values(const math::Vector& xi, + std::vector& values) const { + (this->*vector_evaluation_dispatch_)(xi, &values, nullptr, nullptr); +} + +void LagrangeBasis::evaluate_gradients(const math::Vector& xi, + std::vector& gradients) const { + (this->*vector_evaluation_dispatch_)(xi, nullptr, &gradients, nullptr); +} + +void LagrangeBasis::evaluate_hessians(const math::Vector& xi, + std::vector& hessians) const { + (this->*vector_evaluation_dispatch_)(xi, nullptr, nullptr, &hessians); +} + +void LagrangeBasis::evaluate_all(const math::Vector& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians) const { + (this->*vector_evaluation_dispatch_)(xi, &values, &gradients, &hessians); +} + +void LagrangeBasis::evaluate_values_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out) const { + const auto topology = static_cast(topology_id_); + if (evaluate_fixed_lagrange_fast_to(topology, order_, xi, values_out, nullptr, nullptr)) { + return; + } + + const int n_axis = static_cast(nodes_1d_.size()); + const Real* vc = axis_v_coeffs_.data(); + const Real* dc = axis_d_coeffs_.data(); + const Real* d2c = axis_d2_coeffs_.data(); + const Real* bw = axis_barycentric_weights_.data(); + switch (topology) { + case LagrangeTopology::Point: + values_out[0] = Real(1); + return; + case LagrangeTopology::Line: + case LagrangeTopology::Quadrilateral: + case LagrangeTopology::Hexahedron: { + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations x_axis = + fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], AxisDeriv::ValuesOnly); + AxisBasisEvaluations y_axis = constant_axis_basis(); + AxisBasisEvaluations z_axis = constant_axis_basis(); + if (topology != LagrangeTopology::Line) { + y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], AxisDeriv::ValuesOnly); + } + if (topology == LagrangeTopology::Hexahedron) { + z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesOnly); + } + evaluate_tensor_product_factorized_to(tensor_indices_, x_axis, y_axis, z_axis, + values_out, nullptr, nullptr); + return; + } + case LagrangeTopology::Triangle: + detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, + values_out, nullptr, nullptr); + return; + case LagrangeTopology::Tetrahedron: + detail::evaluate_tetrahedron_simplex_basis_to(simplex_exponents_, order_, xi, + values_out, nullptr, nullptr); + return; + case LagrangeTopology::Wedge: { + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations z_axis = + fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesOnly); + scratch.tri_values.resize(simplex_exponents_.size()); + detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, + scratch.tri_values.data(), nullptr, nullptr); + for (std::size_t n = 0; n < wedge_indices_.size(); ++n) { + const auto& index = wedge_indices_[n]; + values_out[n] = scratch.tri_values[index[0]] * z_axis.values[index[1]]; + } + return; + } + case LagrangeTopology::Pyramid: { + detail::lagrange_pyramid::evaluate_values_to(order_, xi, values_out); + return; + } + case LagrangeTopology::Unknown: + break; + } + + throw BasisEvaluationException("Unsupported element in evaluate_values_to", + __FILE__, __LINE__, __func__); +} + +void LagrangeBasis::evaluate_gradients_to(const math::Vector& xi, + Real* SVMP_RESTRICT gradients_out) const { + const auto topology = static_cast(topology_id_); + if (evaluate_fixed_lagrange_fast_to(topology, order_, xi, nullptr, gradients_out, nullptr)) { + return; + } + + const int n_axis = static_cast(nodes_1d_.size()); + const Real* vc = axis_v_coeffs_.data(); + const Real* dc = axis_d_coeffs_.data(); + const Real* d2c = axis_d2_coeffs_.data(); + const Real* bw = axis_barycentric_weights_.data(); + switch (topology) { + case LagrangeTopology::Point: + gradients_out[0] = Real(0); + gradients_out[1] = Real(0); + gradients_out[2] = Real(0); + return; + case LagrangeTopology::Line: + case LagrangeTopology::Quadrilateral: + case LagrangeTopology::Hexahedron: { + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations x_axis = + fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], AxisDeriv::ValuesAndFirst); + AxisBasisEvaluations y_axis = constant_axis_basis(); + AxisBasisEvaluations z_axis = constant_axis_basis(); + if (topology != LagrangeTopology::Line) { + y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], AxisDeriv::ValuesAndFirst); + } + if (topology == LagrangeTopology::Hexahedron) { + z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirst); + } + evaluate_tensor_product_factorized_to(tensor_indices_, x_axis, y_axis, z_axis, + nullptr, gradients_out, nullptr); + return; + } + case LagrangeTopology::Triangle: + detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, + nullptr, gradients_out, nullptr); + return; + case LagrangeTopology::Tetrahedron: + detail::evaluate_tetrahedron_simplex_basis_to(simplex_exponents_, order_, xi, + nullptr, gradients_out, nullptr); + return; + case LagrangeTopology::Wedge: { + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations z_axis = + fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirst); + const std::size_t tri_count = simplex_exponents_.size(); + scratch.tri_values.resize(tri_count); + scratch.tri_gradient_components.resize(tri_count * 3u); + detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, + scratch.tri_values.data(), + scratch.tri_gradient_components.data(), + nullptr); + for (std::size_t n = 0; n < wedge_indices_.size(); ++n) { + const auto& index = wedge_indices_[n]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; + Real* g = gradients_out + n * 3u; + g[0] = tri_g[0] * z_axis.values[z]; + g[1] = tri_g[1] * z_axis.values[z]; + g[2] = scratch.tri_values[tri] * z_axis.first[z]; + } + return; + } + case LagrangeTopology::Pyramid: { + detail::lagrange_pyramid::evaluate_gradients_to(order_, xi, gradients_out); + return; + } + case LagrangeTopology::Unknown: + break; + } + + throw BasisEvaluationException("Unsupported element in evaluate_gradients_to", + __FILE__, __LINE__, __func__); +} + +void LagrangeBasis::evaluate_hessians_to(const math::Vector& xi, + Real* SVMP_RESTRICT hessians_out) const { + const auto topology = static_cast(topology_id_); + if (evaluate_fixed_lagrange_fast_to(topology, order_, xi, nullptr, nullptr, hessians_out)) { + return; + } + + const int n_axis = static_cast(nodes_1d_.size()); + const Real* vc = axis_v_coeffs_.data(); + const Real* dc = axis_d_coeffs_.data(); + const Real* d2c = axis_d2_coeffs_.data(); + const Real* bw = axis_barycentric_weights_.data(); + switch (topology) { + case LagrangeTopology::Point: + for (std::size_t i = 0; i < 9; ++i) { + hessians_out[i] = Real(0); + } + return; + case LagrangeTopology::Line: + case LagrangeTopology::Quadrilateral: + case LagrangeTopology::Hexahedron: { + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations x_axis = + fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], AxisDeriv::ValuesAndFirstAndSecond); + AxisBasisEvaluations y_axis = constant_axis_basis(); + AxisBasisEvaluations z_axis = constant_axis_basis(); + if (topology != LagrangeTopology::Line) { + y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], AxisDeriv::ValuesAndFirstAndSecond); + } + if (topology == LagrangeTopology::Hexahedron) { + z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirstAndSecond); + } + evaluate_tensor_product_factorized_to(tensor_indices_, x_axis, y_axis, z_axis, + nullptr, nullptr, hessians_out); + return; + } + case LagrangeTopology::Triangle: + detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, + nullptr, nullptr, hessians_out); + return; + case LagrangeTopology::Tetrahedron: + detail::evaluate_tetrahedron_simplex_basis_to(simplex_exponents_, order_, xi, + nullptr, nullptr, hessians_out); + return; + case LagrangeTopology::Wedge: { + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations z_axis = + fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirstAndSecond); + const std::size_t tri_count = simplex_exponents_.size(); + scratch.tri_values.resize(tri_count); + scratch.tri_gradient_components.resize(tri_count * 3u); + scratch.tri_hessian_components.resize(tri_count * 9u); + detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, + scratch.tri_values.data(), + scratch.tri_gradient_components.data(), + scratch.tri_hessian_components.data()); + for (std::size_t n = 0; n < wedge_indices_.size(); ++n) { + const auto& index = wedge_indices_[n]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + const Real zv = z_axis.values[z]; + const Real zd = z_axis.first[z]; + const Real zd2 = z_axis.second[z]; + const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; + const Real* tri_H = scratch.tri_hessian_components.data() + tri * 9u; + const Real hxy = tri_H[1] * zv; + const Real hxz = tri_g[0] * zd; + const Real hyz = tri_g[1] * zd; + Real* H = hessians_out + n * 9u; + H[0] = tri_H[0] * zv; + H[4] = tri_H[4] * zv; + H[1] = hxy; + H[3] = hxy; + H[8] = scratch.tri_values[tri] * zd2; + H[2] = hxz; + H[6] = hxz; + H[5] = hyz; + H[7] = hyz; + } + return; + } + case LagrangeTopology::Pyramid: { + detail::lagrange_pyramid::evaluate_hessians_to(order_, xi, hessians_out); + return; + } + case LagrangeTopology::Unknown: + break; + } + + throw BasisEvaluationException("Unsupported element in evaluate_hessians_to", + __FILE__, __LINE__, __func__); +} + +void LagrangeBasis::evaluate_all_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + const auto topology = static_cast(topology_id_); + if (evaluate_fixed_lagrange_fast_to(topology, order_, xi, values_out, gradients_out, hessians_out)) { + return; + } + + const int n_axis = static_cast(nodes_1d_.size()); + const Real* vc = axis_v_coeffs_.data(); + const Real* dc = axis_d_coeffs_.data(); + const Real* d2c = axis_d2_coeffs_.data(); + const Real* bw = axis_barycentric_weights_.data(); + switch (topology) { + case LagrangeTopology::Point: + values_out[0] = Real(1); + gradients_out[0] = Real(0); + gradients_out[1] = Real(0); + gradients_out[2] = Real(0); + for (std::size_t i = 0; i < 9; ++i) { + hessians_out[i] = Real(0); + } + return; + case LagrangeTopology::Line: + case LagrangeTopology::Quadrilateral: + case LagrangeTopology::Hexahedron: { + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations x_axis = + fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], AxisDeriv::ValuesAndFirstAndSecond); + AxisBasisEvaluations y_axis = constant_axis_basis(); + AxisBasisEvaluations z_axis = constant_axis_basis(); + if (topology != LagrangeTopology::Line) { + y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], AxisDeriv::ValuesAndFirstAndSecond); + } + if (topology == LagrangeTopology::Hexahedron) { + z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirstAndSecond); + } + evaluate_tensor_product_factorized_to(tensor_indices_, x_axis, y_axis, z_axis, + values_out, gradients_out, hessians_out); + return; + } + case LagrangeTopology::Triangle: + detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, + values_out, gradients_out, hessians_out); + return; + case LagrangeTopology::Tetrahedron: + detail::evaluate_tetrahedron_simplex_basis_to(simplex_exponents_, order_, xi, + values_out, gradients_out, hessians_out); + return; + case LagrangeTopology::Wedge: { + LagrangeEvaluateScratch& scratch = evaluate_scratch(); + const AxisBasisEvaluations z_axis = + fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirstAndSecond); + const std::size_t tri_count = simplex_exponents_.size(); + scratch.tri_values.resize(tri_count); + scratch.tri_gradient_components.resize(tri_count * 3u); + scratch.tri_hessian_components.resize(tri_count * 9u); + detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, + scratch.tri_values.data(), + scratch.tri_gradient_components.data(), + scratch.tri_hessian_components.data()); + for (std::size_t n = 0; n < wedge_indices_.size(); ++n) { + const auto& index = wedge_indices_[n]; + const std::size_t tri = index[0]; + const std::size_t z = index[1]; + const Real zv = z_axis.values[z]; + const Real zd = z_axis.first[z]; + const Real zd2 = z_axis.second[z]; + const Real tri_v = scratch.tri_values[tri]; + const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; + const Real* tri_H = scratch.tri_hessian_components.data() + tri * 9u; + const Real hxy = tri_H[1] * zv; + const Real hxz = tri_g[0] * zd; + const Real hyz = tri_g[1] * zd; + + values_out[n] = tri_v * zv; + + Real* g = gradients_out + n * 3u; + g[0] = tri_g[0] * zv; + g[1] = tri_g[1] * zv; + g[2] = tri_v * zd; + + Real* H = hessians_out + n * 9u; + H[0] = tri_H[0] * zv; + H[4] = tri_H[4] * zv; + H[1] = hxy; + H[3] = hxy; + H[8] = tri_v * zd2; + H[2] = hxz; + H[6] = hxz; + H[5] = hyz; + H[7] = hyz; + } + return; + } + case LagrangeTopology::Pyramid: { + detail::lagrange_pyramid::evaluate_all_to( + order_, xi, values_out, gradients_out, hessians_out); + return; + } + case LagrangeTopology::Unknown: + break; + } + + throw BasisEvaluationException("Unsupported element in evaluate_all_to", + __FILE__, __LINE__, __func__); +} + +void LagrangeBasis::evaluate_at_quadrature_points( + const std::vector>& points, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + evaluate_at_quadrature_points_strided(points, points.size(), values_out, gradients_out, hessians_out); +} + +void LagrangeBasis::evaluate_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + const std::size_t num_qpts = points.size(); + const std::size_t num_dofs = size(); + if (output_stride < num_qpts) { + throw BasisConfigurationException("LagrangeBasis strided evaluation requires output_stride >= points.size()", + __FILE__, __LINE__, __func__); + } + if (values_out == nullptr && gradients_out == nullptr && hessians_out == nullptr) { + return; + } + + const auto topology = static_cast(topology_id_); + if (evaluate_fixed_lagrange_fast_strided(topology, + order_, + points, + output_stride, + values_out, + gradients_out, + hessians_out)) { + return; + } + + if (topology == LagrangeTopology::Line || + topology == LagrangeTopology::Quadrilateral || + topology == LagrangeTopology::Hexahedron) { + evaluate_tensor_product_points_strided(topology, + tensor_indices_, + points, + output_stride, + axis_v_coeffs_.data(), + axis_d_coeffs_.data(), + axis_d2_coeffs_.data(), + axis_barycentric_weights_.data(), + static_cast(nodes_1d_.size()), + values_out, + gradients_out, + hessians_out); + return; + } + + if (topology == LagrangeTopology::Triangle) { + detail::evaluate_triangle_simplex_basis_strided( + simplex_exponents_, order_, points, output_stride, values_out, gradients_out, hessians_out); + return; + } + + if (topology == LagrangeTopology::Tetrahedron) { + detail::evaluate_tetrahedron_simplex_basis_strided( + simplex_exponents_, order_, points, output_stride, values_out, gradients_out, hessians_out); + return; + } + + if (topology == LagrangeTopology::Wedge && + evaluate_wedge_fast_strided(order_, + wedge_indices_, + points, + output_stride, + values_out, + gradients_out, + hessians_out)) { + return; + } + + const bool wedge_scalar_hessian_fallback = + topology == LagrangeTopology::Wedge && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr && + order_ <= 2; + if (topology == LagrangeTopology::Wedge && !wedge_scalar_hessian_fallback) { + evaluate_wedge_points_strided(simplex_exponents_, + wedge_indices_, + wedge_node_by_tri_z_, + order_, + points, + output_stride, + axis_v_coeffs_.data(), + axis_d_coeffs_.data(), + axis_d2_coeffs_.data(), + axis_barycentric_weights_.data(), + static_cast(nodes_1d_.size()), + values_out, + gradients_out, + hessians_out); + return; + } + + if (topology == LagrangeTopology::Pyramid) { + detail::lagrange_pyramid::evaluate_at_quadrature_points_strided( + order_, points, output_stride, values_out, gradients_out, hessians_out); + return; + } + + auto& scratch = evaluate_scratch(); + auto& v_tmp = scratch.strided_values_tmp; + auto& g_tmp = scratch.strided_gradients_tmp; + auto& h_tmp = scratch.strided_hessians_tmp; + + if (values_out) v_tmp.resize(num_dofs); + if (gradients_out) g_tmp.resize(num_dofs * 3u); + if (hessians_out) h_tmp.resize(num_dofs * 9u); + + for (std::size_t q = 0; q < num_qpts; ++q) { + if (values_out && gradients_out && hessians_out) { + evaluate_all_to(points[q], v_tmp.data(), g_tmp.data(), h_tmp.data()); + } else { + if (values_out) evaluate_values_to(points[q], v_tmp.data()); + if (gradients_out) evaluate_gradients_to(points[q], g_tmp.data()); + if (hessians_out) evaluate_hessians_to(points[q], h_tmp.data()); + } + + if (values_out) { + for (std::size_t d = 0; d < num_dofs; ++d) { + values_out[d * output_stride + q] = v_tmp[d]; + } + } + if (gradients_out) { + for (std::size_t d = 0; d < num_dofs; ++d) { + gradients_out[(d * 3u + 0u) * output_stride + q] = g_tmp[d * 3u + 0u]; + gradients_out[(d * 3u + 1u) * output_stride + q] = g_tmp[d * 3u + 1u]; + gradients_out[(d * 3u + 2u) * output_stride + q] = g_tmp[d * 3u + 2u]; + } + } + if (hessians_out) { + for (std::size_t d = 0; d < num_dofs; ++d) { + scatter_hessian_components_strided( + h_tmp.data() + d * 9u, + hessians_out + d * 9u * output_stride, + output_stride, + q); + } + } + } +} + +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.h b/Code/Source/solver/FE/Basis/LagrangeBasis.h new file mode 100644 index 000000000..91f7e379c --- /dev/null +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.h @@ -0,0 +1,175 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_LAGRANGEBASIS_H +#define SVMP_FE_BASIS_LAGRANGEBASIS_H + +/** + * @file LagrangeBasis.h + * @brief Nodal Lagrange polynomial basis on reference elements + */ + +#include "BasisFunction.h" +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +void prewarm_lagrange_basis_scratch(int max_order, std::size_t max_qpts = 0); + +/** + * @brief Complete nodal H1 Lagrange basis on canonical reference topologies + * + * Supports arbitrary polynomial order on the canonical complete families: + * `Line2`, `Triangle3`, `Quad4`, `Tetra4`, `Hex8`, `Wedge6`, and `Pyramid5`. + * Low-order complete-family aliases (`Line3`, `Triangle6`, `Quad9`, + * `Tetra10`, `Hex27`, `Wedge18`, `Pyramid14`) normalize to their canonical + * topology plus order. Serendipity variants remain intentionally excluded. + * + * Node locations are generated on canonical reference elements using + * equispaced coordinates on tensor-product elements, barycentric grids on + * simplices, tensorized triangle-line grids on wedges, and a rational nodal + * pyramid construction on `Pyramid5`. + * + * The evaluator is numerically stabilized for those nodes, but the + * interpolation problem itself remains the equispaced Lagrange problem. For + * high-order interpolation, especially order >= 4, prefer `SpectralBasis` + * (GLL / Warp & Blend nodes) unless exact equispaced nodal placement is part + * of the requested discretization. + * + * For the rational pyramid family, basis values remain exact at the apex. + * Gradients and Hessians are analytic on the supported interior reference + * domain, but the exact-apex nodal derivative limit is not unique and those + * derivative queries throw at the exact apex. + */ +class LagrangeBasis : public BasisFunction { +public: + LagrangeBasis(ElementType type, int order); + + BasisType basis_type() const noexcept override { return BasisType::Lagrange; } + ElementType element_type() const noexcept override { return element_type_; } + int dimension() const noexcept override { return dimension_; } + int order() const noexcept override { return order_; } + std::size_t size() const noexcept override { return nodes_.size(); } + bool cache_identity_is_structural() const noexcept override { return true; } + + const std::vector>& nodes() const noexcept { return nodes_; } + + void evaluate_values(const math::Vector& xi, + std::vector& values) const final; + void evaluate_gradients(const math::Vector& xi, + std::vector& gradients) const final; + void evaluate_hessians(const math::Vector& xi, + std::vector& hessians) const final; + void evaluate_all(const math::Vector& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians) const final; + + void evaluate_at_quadrature_points( + const std::vector>& points, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const final; + void evaluate_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const final; + + // Raw-pointer output API. Caller must pre-size buffers to size(). + void evaluate_values_to(const math::Vector& xi, Real* SVMP_RESTRICT values_out) const final; + void evaluate_gradients_to(const math::Vector& xi, Real* SVMP_RESTRICT gradients_out) const final; + void evaluate_hessians_to(const math::Vector& xi, Real* SVMP_RESTRICT hessians_out) const final; + +private: + using TensorNodeIndex = std::array; + using WedgeNodeIndex = std::array; + using VectorEvaluationDispatch = void (LagrangeBasis::*)( + const math::Vector&, + std::vector*, + std::vector*, + std::vector*) const; + + // Cached topology encoded as int because the topology enum lives in + // the .cpp anon namespace. Set once in init_nodes. + int topology_id_ = 0; + + ElementType element_type_; + int dimension_; + int order_; + + std::vector nodes_1d_; + std::vector> nodes_; + std::vector tensor_indices_; + std::vector> simplex_exponents_; + std::vector wedge_indices_; + std::vector wedge_node_by_tri_z_; + + // Precomputed Horner-form coefficients of the 1D Lagrange basis. + // Layout per axis (n_axis = nodes_1d_.size() = order_+1): + // axis_v_coeffs_[i * n_axis + k] = coeff of x^k in L_i(x), 0 <= i,k < n_axis + // axis_d_coeffs_[i * (n_axis - 1) + k] = coeff of x^k in L_i'(x) + // axis_d2_coeffs_[i * (n_axis - 2) + k] = coeff of x^k in L_i''(x) (only if n_axis >= 3) + // Populated by build_tensor_product_nodes / build_wedge_nodes. + std::vector axis_v_coeffs_; + std::vector axis_d_coeffs_; + std::vector axis_d2_coeffs_; + std::vector axis_barycentric_weights_; + VectorEvaluationDispatch vector_evaluation_dispatch_{nullptr}; + + void init_nodes(); + void init_evaluation_dispatch(); + void build_point_nodes(); + void build_tensor_product_nodes(int dimensions); + void build_simplex_nodes(); + void build_wedge_nodes(); + void build_pyramid_nodes(); + void init_equispaced_1d_nodes(); + void compute_axis_monomial_coefficients(); + void evaluate_point_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const; + void evaluate_tensor_product_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const; + void evaluate_triangle_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const; + void evaluate_tetrahedron_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const; + void evaluate_wedge_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const; + void evaluate_pyramid_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const; + void evaluate_unsupported_vectors(const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) const; + void evaluate_all_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const; +}; + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_LAGRANGEBASIS_H diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisFast.h b/Code/Source/solver/FE/Basis/LagrangeBasisFast.h new file mode 100644 index 000000000..5b9faae04 --- /dev/null +++ b/Code/Source/solver/FE/Basis/LagrangeBasisFast.h @@ -0,0 +1,1378 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_LAGRANGEBASISFAST_H +#define SVMP_FE_BASIS_LAGRANGEBASISFAST_H + +/** + * @file LagrangeBasisFast.h + * @brief Header-only zero-overhead specializations of the Lagrange basis + * + * Provides templated static methods for the common nodal Lagrange families + * with compile-time-known polynomial order. Callers that know their basis + * type and order at compile time use these directly — there is no virtual + * dispatch, no std::vector allocation, no scratch lookup, and no topology + * switch. The output buffers are stack-allocated std::array, sized at + * compile time. The compiler fully unrolls and constant-folds. + * + * These specializations are an alternative entry point to the runtime path + * provided by `LagrangeBasis`. The runtime path remains the canonical API + * for generic callers; these specializations serve hot loops that know the + * element type. + * + * Node orderings match `ReferenceNodeLayout::get_lagrange_node_coords(...)` (VTK). + */ + +#include "Types.h" +#include "Math/Vector.h" +#include "Math/Matrix.h" +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +using Gradient = math::Vector; +using Hessian = math::Matrix; + +namespace detail { + +constexpr Gradient scaled_gradient(const Gradient& gradient, Real scale) { + return Gradient{scale * gradient[0], scale * gradient[1], scale * gradient[2]}; +} + +constexpr Gradient p2_edge_gradient(Real left, + const Gradient& left_gradient, + Real right, + const Gradient& right_gradient) { + return Gradient{ + Real(4) * (left_gradient[0] * right + right_gradient[0] * left), + Real(4) * (left_gradient[1] * right + right_gradient[1] * left), + Real(4) * (left_gradient[2] * right + right_gradient[2] * left), + }; +} + +constexpr Hessian p2_vertex_hessian(const Gradient& gradient) { + Hessian hessian{}; + for (std::size_t row = 0; row < 3u; ++row) { + for (std::size_t col = 0; col < 3u; ++col) { + hessian(row, col) = Real(4) * gradient[row] * gradient[col]; + } + } + return hessian; +} + +constexpr Hessian p2_edge_hessian(const Gradient& left_gradient, + const Gradient& right_gradient) { + Hessian hessian{}; + for (std::size_t row = 0; row < 3u; ++row) { + for (std::size_t col = 0; col < 3u; ++col) { + hessian(row, col) = Real(4) * ( + left_gradient[row] * right_gradient[col] + + right_gradient[row] * left_gradient[col]); + } + } + return hessian; +} + +constexpr std::size_t public_axis_index(int lattice, int order) noexcept { + return lattice == 0 ? 0u : + lattice == order ? 1u : + static_cast(lattice + 1); +} + +template +constexpr Real public_axis_coord(std::size_t public_index) noexcept { + const int lattice = public_index == 0u ? 0 : + public_index == 1u ? Order : + static_cast(public_index) - 1; + return Real(-1) + Real(2) * static_cast(lattice) / static_cast(Order); +} + +template +constexpr std::array make_public_axis_nodes() { + std::array nodes{}; + for (std::size_t i = 0; i < nodes.size(); ++i) { + nodes[i] = public_axis_coord(i); + } + return nodes; +} + +template +constexpr std::array make_public_axis_inverse_denominators() { + constexpr auto nodes = make_public_axis_nodes(); + std::array inv_denominators{}; + for (std::size_t i = 0; i < nodes.size(); ++i) { + Real denominator = Real(1); + for (std::size_t j = 0; j < nodes.size(); ++j) { + if (j != i) { + denominator *= nodes[i] - nodes[j]; + } + } + inv_denominators[i] = Real(1) / denominator; + } + return inv_denominators; +} + +template +void fill_axis_lagrange(Real x, + std::array& values, + std::array* first, + std::array* second) { + constexpr auto nodes = make_public_axis_nodes(); + constexpr auto inv_denominators = make_public_axis_inverse_denominators(); + for (std::size_t i = 0; i < nodes.size(); ++i) { + Real product = Real(1); + for (std::size_t j = 0; j < nodes.size(); ++j) { + if (j != i) { + product *= x - nodes[j]; + } + } + values[i] = product * inv_denominators[i]; + + if constexpr (NeedFirst) { + Real derivative = Real(0); + for (std::size_t m = 0; m < nodes.size(); ++m) { + if (m == i) { + continue; + } + Real term = Real(1); + for (std::size_t j = 0; j < nodes.size(); ++j) { + if (j != i && j != m) { + term *= x - nodes[j]; + } + } + derivative += term; + } + (*first)[i] = derivative * inv_denominators[i]; + } + + if constexpr (NeedSecond) { + Real curvature = Real(0); + for (std::size_t m = 0; m < nodes.size(); ++m) { + if (m == i) { + continue; + } + for (std::size_t l = 0; l < nodes.size(); ++l) { + if (l == i || l == m) { + continue; + } + Real term = Real(1); + for (std::size_t j = 0; j < nodes.size(); ++j) { + if (j != i && j != m && j != l) { + term *= x - nodes[j]; + } + } + curvature += term; + } + } + (*second)[i] = curvature * inv_denominators[i]; + } + } +} + +template +void fill_axis_values(Real x, std::array& values) { + fill_axis_lagrange(x, values, nullptr, nullptr); +} + +template +void fill_axis_values_first(Real x, + std::array& values, + std::array& first) { + fill_axis_lagrange(x, values, &first, nullptr); +} + +template +void fill_axis_values_first_second(Real x, + std::array& values, + std::array& first, + std::array& second) { + fill_axis_lagrange(x, values, &first, &second); +} + +template +constexpr std::array, (Order + 1) * (Order + 1)> +make_quad_tensor_node_axes() { + std::array, (Order + 1) * (Order + 1)> axes{}; + std::size_t n = 0; + + axes[n++] = {{0u, 0u}}; + axes[n++] = {{1u, 0u}}; + axes[n++] = {{1u, 1u}}; + axes[n++] = {{0u, 1u}}; + + for (int i = 1; i < Order; ++i) { + axes[n++] = {{public_axis_index(i, Order), 0u}}; + } + for (int j = 1; j < Order; ++j) { + axes[n++] = {{1u, public_axis_index(j, Order)}}; + } + for (int i = Order - 1; i >= 1; --i) { + axes[n++] = {{public_axis_index(i, Order), 1u}}; + } + for (int j = Order - 1; j >= 1; --j) { + axes[n++] = {{0u, public_axis_index(j, Order)}}; + } + + for (int j = 1; j < Order; ++j) { + for (int i = 1; i < Order; ++i) { + axes[n++] = {{public_axis_index(i, Order), public_axis_index(j, Order)}}; + } + } + + return axes; +} + +template +constexpr std::array, (Order + 1) * (Order + 1) * (Order + 1)> +make_hex_tensor_node_axes() { + std::array, (Order + 1) * (Order + 1) * (Order + 1)> axes{}; + std::size_t n = 0; + + axes[n++] = {{0u, 0u, 0u}}; + axes[n++] = {{1u, 0u, 0u}}; + axes[n++] = {{1u, 1u, 0u}}; + axes[n++] = {{0u, 1u, 0u}}; + axes[n++] = {{0u, 0u, 1u}}; + axes[n++] = {{1u, 0u, 1u}}; + axes[n++] = {{1u, 1u, 1u}}; + axes[n++] = {{0u, 1u, 1u}}; + + for (int i = 1; i < Order; ++i) { + axes[n++] = {{public_axis_index(i, Order), 0u, 0u}}; + } + for (int j = 1; j < Order; ++j) { + axes[n++] = {{1u, public_axis_index(j, Order), 0u}}; + } + for (int i = Order - 1; i >= 1; --i) { + axes[n++] = {{public_axis_index(i, Order), 1u, 0u}}; + } + for (int j = Order - 1; j >= 1; --j) { + axes[n++] = {{0u, public_axis_index(j, Order), 0u}}; + } + for (int i = 1; i < Order; ++i) { + axes[n++] = {{public_axis_index(i, Order), 0u, 1u}}; + } + for (int j = 1; j < Order; ++j) { + axes[n++] = {{1u, public_axis_index(j, Order), 1u}}; + } + for (int i = Order - 1; i >= 1; --i) { + axes[n++] = {{public_axis_index(i, Order), 1u, 1u}}; + } + for (int j = Order - 1; j >= 1; --j) { + axes[n++] = {{0u, public_axis_index(j, Order), 1u}}; + } + for (int k = 1; k < Order; ++k) { + axes[n++] = {{0u, 0u, public_axis_index(k, Order)}}; + } + for (int k = 1; k < Order; ++k) { + axes[n++] = {{1u, 0u, public_axis_index(k, Order)}}; + } + for (int k = 1; k < Order; ++k) { + axes[n++] = {{1u, 1u, public_axis_index(k, Order)}}; + } + for (int k = 1; k < Order; ++k) { + axes[n++] = {{0u, 1u, public_axis_index(k, Order)}}; + } + + for (int j = 1; j < Order; ++j) { + for (int i = 1; i < Order; ++i) { + axes[n++] = {{public_axis_index(i, Order), public_axis_index(j, Order), 0u}}; + } + } + for (int j = 1; j < Order; ++j) { + for (int i = 1; i < Order; ++i) { + axes[n++] = {{public_axis_index(i, Order), public_axis_index(j, Order), 1u}}; + } + } + for (int k = 1; k < Order; ++k) { + for (int i = 1; i < Order; ++i) { + axes[n++] = {{public_axis_index(i, Order), 0u, public_axis_index(k, Order)}}; + } + } + for (int k = 1; k < Order; ++k) { + for (int j = 1; j < Order; ++j) { + axes[n++] = {{1u, public_axis_index(j, Order), public_axis_index(k, Order)}}; + } + } + for (int k = 1; k < Order; ++k) { + for (int i = Order - 1; i >= 1; --i) { + axes[n++] = {{public_axis_index(i, Order), 1u, public_axis_index(k, Order)}}; + } + } + for (int k = 1; k < Order; ++k) { + for (int j = Order - 1; j >= 1; --j) { + axes[n++] = {{0u, public_axis_index(j, Order), public_axis_index(k, Order)}}; + } + } + + for (int k = 1; k < Order; ++k) { + for (int j = 1; j < Order; ++j) { + for (int i = 1; i < Order; ++i) { + axes[n++] = {{public_axis_index(i, Order), + public_axis_index(j, Order), + public_axis_index(k, Order)}}; + } + } + } + + return axes; +} + +template +constexpr std::array, (Order + 1) * (Order + 2) / 2> +make_triangle_simplex_exponents() { + std::array, (Order + 1) * (Order + 2) / 2> exponents{}; + std::size_t n = 0; + + exponents[n++] = {{static_cast(Order), 0u, 0u}}; + exponents[n++] = {{0u, static_cast(Order), 0u}}; + exponents[n++] = {{0u, 0u, static_cast(Order)}}; + + for (int m = 1; m < Order; ++m) { + exponents[n++] = {{static_cast(Order - m), static_cast(m), 0u}}; + } + for (int m = 1; m < Order; ++m) { + exponents[n++] = {{0u, static_cast(Order - m), static_cast(m)}}; + } + for (int m = 1; m < Order; ++m) { + exponents[n++] = {{static_cast(m), 0u, static_cast(Order - m)}}; + } + + for (int c = 1; c <= Order - 2; ++c) { + for (int b = 1; b <= Order - c - 1; ++b) { + const int a = Order - b - c; + exponents[n++] = {{static_cast(a), + static_cast(b), + static_cast(c)}}; + } + } + + return exponents; +} + +template +constexpr std::array, (Order + 1) * (Order + 2) * (Order + 3) / 6> +make_tetrahedron_simplex_exponents() { + std::array, (Order + 1) * (Order + 2) * (Order + 3) / 6> exponents{}; + std::size_t n = 0; + + exponents[n++] = {{static_cast(Order), 0u, 0u, 0u}}; + exponents[n++] = {{0u, static_cast(Order), 0u, 0u}}; + exponents[n++] = {{0u, 0u, static_cast(Order), 0u}}; + exponents[n++] = {{0u, 0u, 0u, static_cast(Order)}}; + + constexpr int edges[6][2] = { + {0, 1}, {1, 2}, {2, 0}, {0, 3}, {1, 3}, {2, 3} + }; + for (const auto& edge : edges) { + for (int m = 1; m < Order; ++m) { + std::array e{}; + e[static_cast(edge[0])] = static_cast(Order - m); + e[static_cast(edge[1])] = static_cast(m); + exponents[n++] = e; + } + } + + constexpr int faces[4][3] = { + {0, 1, 2}, + {0, 1, 3}, + {1, 2, 3}, + {0, 2, 3}, + }; + for (const auto& face : faces) { + for (int c = 1; c <= Order - 2; ++c) { + for (int b = 1; b <= Order - c - 1; ++b) { + const int a = Order - b - c; + std::array e{}; + e[static_cast(face[0])] = static_cast(a); + e[static_cast(face[1])] = static_cast(b); + e[static_cast(face[2])] = static_cast(c); + exponents[n++] = e; + } + } + } + + for (int l = 1; l <= Order - 3; ++l) { + for (int k = 1; k <= Order - l - 2; ++k) { + for (int j = 1; j <= Order - l - k - 1; ++j) { + const int i = Order - j - k - l; + exponents[n++] = {{static_cast(i), + static_cast(j), + static_cast(k), + static_cast(l)}}; + } + } + } + + return exponents; +} + +template +void fill_simplex_factor_sequence(Real lambda, + std::array& phi, + std::array* dphi, + std::array* d2phi) { + phi[0] = Real(1); + if constexpr (NeedFirst) { + (*dphi)[0] = Real(0); + } + if constexpr (NeedSecond) { + (*d2phi)[0] = Real(0); + } + + const Real t = static_cast(Order) * lambda; + constexpr Real dt_dlambda = static_cast(Order); + Real dphi_dt_prev = Real(0); + Real d2phi_dt2_prev = Real(0); + + for (int a = 1; a <= Order; ++a) { + const std::size_t au = static_cast(a); + const Real inv_a = Real(1) / static_cast(a); + const Real s = (t - static_cast(a - 1)) * inv_a; + phi[au] = s * phi[au - 1]; + + if constexpr (NeedFirst) { + const Real dphi_dt = inv_a * phi[au - 1] + s * dphi_dt_prev; + (*dphi)[au] = dt_dlambda * dphi_dt; + + if constexpr (NeedSecond) { + const Real d2phi_dt2 = Real(2) * inv_a * dphi_dt_prev + s * d2phi_dt2_prev; + (*d2phi)[au] = dt_dlambda * dt_dlambda * d2phi_dt2; + d2phi_dt2_prev = d2phi_dt2; + } + + dphi_dt_prev = dphi_dt; + } + } +} + +template +void fill_simplex_factor_values(Real lambda, std::array& phi) { + fill_simplex_factor_sequence(lambda, phi, nullptr, nullptr); +} + +template +void fill_simplex_factor_values_first(Real lambda, + std::array& phi, + std::array& dphi) { + fill_simplex_factor_sequence(lambda, phi, &dphi, nullptr); +} + +template +void fill_simplex_factor_values_first_second(Real lambda, + std::array& phi, + std::array& dphi, + std::array& d2phi) { + fill_simplex_factor_sequence(lambda, phi, &dphi, &d2phi); +} + +} // namespace detail + +// --------------------------------------------------------------------------- +// LagrangeLineFast +// --------------------------------------------------------------------------- +template +struct LagrangeLineFast; + +template<> +struct LagrangeLineFast<1> { + static constexpr int n_dofs = 2; + + static constexpr void evaluate(const math::Vector& xi, std::array& out) { + out[0] = (Real(1) - xi[0]) * Real(0.5); + out[1] = (Real(1) + xi[0]) * Real(0.5); + } + + static constexpr void evaluate_gradients(const math::Vector& /*xi*/, + std::array& out) { + out[0] = Gradient{Real(-0.5), Real(0), Real(0)}; + out[1] = Gradient{Real( 0.5), Real(0), Real(0)}; + } + + static constexpr void evaluate_hessians(const math::Vector& /*xi*/, + std::array& out) { + out[0] = Hessian{}; + out[1] = Hessian{}; + } +}; + +template<> +struct LagrangeLineFast<2> { + static constexpr int n_dofs = 3; + + static constexpr void evaluate(const math::Vector& xi, std::array& out) { + const Real x = xi[0]; + out[0] = x * (x - Real(1)) * Real(0.5); + out[1] = x * (x + Real(1)) * Real(0.5); + out[2] = (Real(1) - x) * (Real(1) + x); + } + + static constexpr void evaluate_gradients(const math::Vector& xi, + std::array& out) { + const Real x = xi[0]; + out[0] = Gradient{x - Real(0.5), Real(0), Real(0)}; + out[1] = Gradient{x + Real(0.5), Real(0), Real(0)}; + out[2] = Gradient{Real(-2) * x, Real(0), Real(0)}; + } + + static constexpr void evaluate_hessians(const math::Vector& /*xi*/, + std::array& out) { + out[0] = Hessian{}; + out[1] = Hessian{}; + out[2] = Hessian{}; + out[0](0, 0) = Real(1); + out[1](0, 0) = Real(1); + out[2](0, 0) = Real(-2); + } +}; + +template<> +struct LagrangeLineFast<3> { + static constexpr int n_dofs = 4; + + static void evaluate(const math::Vector& xi, std::array& out) { + detail::fill_axis_values<3>(xi[0], out); + } + + static void evaluate_gradients(const math::Vector& xi, + std::array& out) { + std::array values{}; + std::array first{}; + detail::fill_axis_values_first<3>(xi[0], values, first); + for (std::size_t i = 0; i < first.size(); ++i) { + out[i] = Gradient{first[i], Real(0), Real(0)}; + } + } + + static void evaluate_hessians(const math::Vector& xi, + std::array& out) { + std::array values{}; + std::array first{}; + std::array second{}; + detail::fill_axis_values_first_second<3>(xi[0], values, first, second); + for (std::size_t i = 0; i < second.size(); ++i) { + Hessian H{}; + H(0, 0) = second[i]; + out[i] = H; + } + } +}; + +// --------------------------------------------------------------------------- +// LagrangeQuadFast +// --------------------------------------------------------------------------- +template +struct LagrangeQuadFast; + +template<> +struct LagrangeQuadFast<1> { + static constexpr int n_dofs = 4; + + // VTK Quad4 corner ordering: (-,-), (+,-), (+,+), (-,+). + static constexpr void evaluate(const math::Vector& xi, std::array& out) { + const Real lx = (Real(1) - xi[0]) * Real(0.5); + const Real ly = (Real(1) - xi[1]) * Real(0.5); + const Real ux = (Real(1) + xi[0]) * Real(0.5); + const Real uy = (Real(1) + xi[1]) * Real(0.5); + out[0] = lx * ly; + out[1] = ux * ly; + out[2] = ux * uy; + out[3] = lx * uy; + } + + static constexpr void evaluate_gradients(const math::Vector& xi, + std::array& out) { + const Real lx = (Real(1) - xi[0]) * Real(0.5); + const Real ly = (Real(1) - xi[1]) * Real(0.5); + const Real ux = (Real(1) + xi[0]) * Real(0.5); + const Real uy = (Real(1) + xi[1]) * Real(0.5); + out[0] = Gradient{Real(-0.5) * ly, Real(-0.5) * lx, Real(0)}; + out[1] = Gradient{Real( 0.5) * ly, Real(-0.5) * ux, Real(0)}; + out[2] = Gradient{Real( 0.5) * uy, Real( 0.5) * ux, Real(0)}; + out[3] = Gradient{Real(-0.5) * uy, Real( 0.5) * lx, Real(0)}; + } + + static constexpr void evaluate_hessians(const math::Vector& /*xi*/, + std::array& out) { + out[0] = Hessian{}; + out[1] = Hessian{}; + out[2] = Hessian{}; + out[3] = Hessian{}; + constexpr Real qrt = Real(0.25); + out[0](0, 1) = qrt; out[0](1, 0) = qrt; + out[1](0, 1) = -qrt; out[1](1, 0) = -qrt; + out[2](0, 1) = qrt; out[2](1, 0) = qrt; + out[3](0, 1) = -qrt; out[3](1, 0) = -qrt; + } +}; + +template<> +struct LagrangeQuadFast<2> { + static constexpr int n_dofs = 9; + + static constexpr std::array, n_dofs> node_axes = {{ + {{0u, 0u}}, {{1u, 0u}}, {{1u, 1u}}, {{0u, 1u}}, + {{2u, 0u}}, {{1u, 2u}}, {{2u, 1u}}, {{0u, 2u}}, + {{2u, 2u}}, + }}; + + static void evaluate(const math::Vector& xi, std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); + LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + out[n] = lx[node_axes[n][0]] * ly[node_axes[n][1]]; + } + } + + static void evaluate_gradients(const math::Vector& xi, + std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> gx{}; + std::array::n_dofs> gy{}; + LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); + LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); + LagrangeLineFast<2>::evaluate_gradients({xi[0], Real(0), Real(0)}, gx); + LagrangeLineFast<2>::evaluate_gradients({xi[1], Real(0), Real(0)}, gy); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + const auto i = node_axes[n][0]; + const auto j = node_axes[n][1]; + out[n] = Gradient{gx[i][0] * ly[j], lx[i] * gy[j][0], Real(0)}; + } + } + + static void evaluate_hessians(const math::Vector& xi, + std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> gx{}; + std::array::n_dofs> gy{}; + std::array::n_dofs> hx{}; + std::array::n_dofs> hy{}; + LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); + LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); + LagrangeLineFast<2>::evaluate_gradients({xi[0], Real(0), Real(0)}, gx); + LagrangeLineFast<2>::evaluate_gradients({xi[1], Real(0), Real(0)}, gy); + LagrangeLineFast<2>::evaluate_hessians({xi[0], Real(0), Real(0)}, hx); + LagrangeLineFast<2>::evaluate_hessians({xi[1], Real(0), Real(0)}, hy); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + const auto i = node_axes[n][0]; + const auto j = node_axes[n][1]; + Hessian H{}; + H(0, 0) = hx[i](0, 0) * ly[j]; + H(1, 1) = lx[i] * hy[j](0, 0); + H(0, 1) = gx[i][0] * gy[j][0]; + H(1, 0) = H(0, 1); + out[n] = H; + } + } +}; + +template<> +struct LagrangeQuadFast<3> { + static constexpr int n_dofs = 16; + + static constexpr std::array, n_dofs> node_axes = + detail::make_quad_tensor_node_axes<3>(); + + static void evaluate(const math::Vector& xi, std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + detail::fill_axis_values<3>(xi[0], lx); + detail::fill_axis_values<3>(xi[1], ly); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + out[n] = lx[node_axes[n][0]] * ly[node_axes[n][1]]; + } + } + + static void evaluate_gradients(const math::Vector& xi, + std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> gx{}; + std::array::n_dofs> gy{}; + detail::fill_axis_values_first<3>(xi[0], lx, gx); + detail::fill_axis_values_first<3>(xi[1], ly, gy); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + const auto i = node_axes[n][0]; + const auto j = node_axes[n][1]; + out[n] = Gradient{gx[i] * ly[j], lx[i] * gy[j], Real(0)}; + } + } + + static void evaluate_hessians(const math::Vector& xi, + std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> gx{}; + std::array::n_dofs> gy{}; + std::array::n_dofs> hx{}; + std::array::n_dofs> hy{}; + detail::fill_axis_values_first_second<3>(xi[0], lx, gx, hx); + detail::fill_axis_values_first_second<3>(xi[1], ly, gy, hy); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + const auto i = node_axes[n][0]; + const auto j = node_axes[n][1]; + Hessian H{}; + H(0, 0) = hx[i] * ly[j]; + H(1, 1) = lx[i] * hy[j]; + H(0, 1) = gx[i] * gy[j]; + H(1, 0) = H(0, 1); + out[n] = H; + } + } +}; + +// --------------------------------------------------------------------------- +// LagrangeHexFast +// --------------------------------------------------------------------------- +template +struct LagrangeHexFast; + +template<> +struct LagrangeHexFast<1> { + static constexpr int n_dofs = 8; + + // VTK Hex8 corner ordering: (-,-,-), (+,-,-), (+,+,-), (-,+,-), + // (-,-,+), (+,-,+), (+,+,+), (-,+,+). + static constexpr void evaluate(const math::Vector& xi, std::array& out) { + const Real lx = (Real(1) - xi[0]) * Real(0.5); + const Real ly = (Real(1) - xi[1]) * Real(0.5); + const Real lz = (Real(1) - xi[2]) * Real(0.5); + const Real ux = (Real(1) + xi[0]) * Real(0.5); + const Real uy = (Real(1) + xi[1]) * Real(0.5); + const Real uz = (Real(1) + xi[2]) * Real(0.5); + // Precompute z-plane partial products (sum factorization). + const Real lxly = lx * ly; + const Real uxly = ux * ly; + const Real uxuy = ux * uy; + const Real lxuy = lx * uy; + out[0] = lxly * lz; + out[1] = uxly * lz; + out[2] = uxuy * lz; + out[3] = lxuy * lz; + out[4] = lxly * uz; + out[5] = uxly * uz; + out[6] = uxuy * uz; + out[7] = lxuy * uz; + } + + static constexpr void evaluate_gradients(const math::Vector& xi, + std::array& out) { + const Real lx = (Real(1) - xi[0]) * Real(0.5); + const Real ly = (Real(1) - xi[1]) * Real(0.5); + const Real lz = (Real(1) - xi[2]) * Real(0.5); + const Real ux = (Real(1) + xi[0]) * Real(0.5); + const Real uy = (Real(1) + xi[1]) * Real(0.5); + const Real uz = (Real(1) + xi[2]) * Real(0.5); + // dL_0(x)/dx = -0.5, dL_1(x)/dx = +0.5 along each axis. + out[0] = Gradient{Real(-0.5) * ly * lz, Real(-0.5) * lx * lz, Real(-0.5) * lx * ly}; + out[1] = Gradient{Real( 0.5) * ly * lz, Real(-0.5) * ux * lz, Real(-0.5) * ux * ly}; + out[2] = Gradient{Real( 0.5) * uy * lz, Real( 0.5) * ux * lz, Real(-0.5) * ux * uy}; + out[3] = Gradient{Real(-0.5) * uy * lz, Real( 0.5) * lx * lz, Real(-0.5) * lx * uy}; + out[4] = Gradient{Real(-0.5) * ly * uz, Real(-0.5) * lx * uz, Real( 0.5) * lx * ly}; + out[5] = Gradient{Real( 0.5) * ly * uz, Real(-0.5) * ux * uz, Real( 0.5) * ux * ly}; + out[6] = Gradient{Real( 0.5) * uy * uz, Real( 0.5) * ux * uz, Real( 0.5) * ux * uy}; + out[7] = Gradient{Real(-0.5) * uy * uz, Real( 0.5) * lx * uz, Real( 0.5) * lx * uy}; + } + + static constexpr void evaluate_hessians(const math::Vector& xi, + std::array& out) { + const Real lx = (Real(1) - xi[0]) * Real(0.5); + const Real ly = (Real(1) - xi[1]) * Real(0.5); + const Real lz = (Real(1) - xi[2]) * Real(0.5); + const Real ux = (Real(1) + xi[0]) * Real(0.5); + const Real uy = (Real(1) + xi[1]) * Real(0.5); + const Real uz = (Real(1) + xi[2]) * Real(0.5); + const Real ax[8] = {lx, ux, ux, lx, lx, ux, ux, lx}; + const Real ay[8] = {ly, ly, uy, uy, ly, ly, uy, uy}; + const Real az[8] = {lz, lz, lz, lz, uz, uz, uz, uz}; + const int sx[8] = {-1, 1, 1, -1, -1, 1, 1, -1}; + const int sy[8] = {-1, -1, 1, 1, -1, -1, 1, 1}; + const int sz[8] = {-1, -1, -1, -1, 1, 1, 1, 1}; + constexpr Real qrt = Real(0.25); + for (std::size_t n = 0; n < static_cast(n_dofs); ++n) { + out[n] = Hessian{}; + out[n](0, 1) = static_cast(sx[n] * sy[n]) * qrt * az[n]; + out[n](1, 0) = out[n](0, 1); + out[n](0, 2) = static_cast(sx[n] * sz[n]) * qrt * ay[n]; + out[n](2, 0) = out[n](0, 2); + out[n](1, 2) = static_cast(sy[n] * sz[n]) * qrt * ax[n]; + out[n](2, 1) = out[n](1, 2); + } + } +}; + +template<> +struct LagrangeHexFast<2> { + static constexpr int n_dofs = 27; + + static constexpr std::array, n_dofs> node_axes = {{ + {{0u, 0u, 0u}}, {{1u, 0u, 0u}}, {{1u, 1u, 0u}}, {{0u, 1u, 0u}}, + {{0u, 0u, 1u}}, {{1u, 0u, 1u}}, {{1u, 1u, 1u}}, {{0u, 1u, 1u}}, + {{2u, 0u, 0u}}, {{1u, 2u, 0u}}, {{2u, 1u, 0u}}, {{0u, 2u, 0u}}, + {{2u, 0u, 1u}}, {{1u, 2u, 1u}}, {{2u, 1u, 1u}}, {{0u, 2u, 1u}}, + {{0u, 0u, 2u}}, {{1u, 0u, 2u}}, {{1u, 1u, 2u}}, {{0u, 1u, 2u}}, + {{2u, 2u, 0u}}, {{2u, 2u, 1u}}, {{2u, 0u, 2u}}, {{1u, 2u, 2u}}, + {{2u, 1u, 2u}}, {{0u, 2u, 2u}}, {{2u, 2u, 2u}}, + }}; + + static void evaluate(const math::Vector& xi, std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> lz{}; + LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); + LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); + LagrangeLineFast<2>::evaluate({xi[2], Real(0), Real(0)}, lz); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + out[n] = lx[node_axes[n][0]] * ly[node_axes[n][1]] * lz[node_axes[n][2]]; + } + } + + static void evaluate_gradients(const math::Vector& xi, + std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> lz{}; + std::array::n_dofs> gx{}; + std::array::n_dofs> gy{}; + std::array::n_dofs> gz{}; + LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); + LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); + LagrangeLineFast<2>::evaluate({xi[2], Real(0), Real(0)}, lz); + LagrangeLineFast<2>::evaluate_gradients({xi[0], Real(0), Real(0)}, gx); + LagrangeLineFast<2>::evaluate_gradients({xi[1], Real(0), Real(0)}, gy); + LagrangeLineFast<2>::evaluate_gradients({xi[2], Real(0), Real(0)}, gz); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + const auto i = node_axes[n][0]; + const auto j = node_axes[n][1]; + const auto k = node_axes[n][2]; + out[n] = Gradient{ + gx[i][0] * ly[j] * lz[k], + lx[i] * gy[j][0] * lz[k], + lx[i] * ly[j] * gz[k][0], + }; + } + } + + static void evaluate_hessians(const math::Vector& xi, + std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> lz{}; + std::array::n_dofs> gx{}; + std::array::n_dofs> gy{}; + std::array::n_dofs> gz{}; + std::array::n_dofs> hx{}; + std::array::n_dofs> hy{}; + std::array::n_dofs> hz{}; + LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); + LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); + LagrangeLineFast<2>::evaluate({xi[2], Real(0), Real(0)}, lz); + LagrangeLineFast<2>::evaluate_gradients({xi[0], Real(0), Real(0)}, gx); + LagrangeLineFast<2>::evaluate_gradients({xi[1], Real(0), Real(0)}, gy); + LagrangeLineFast<2>::evaluate_gradients({xi[2], Real(0), Real(0)}, gz); + LagrangeLineFast<2>::evaluate_hessians({xi[0], Real(0), Real(0)}, hx); + LagrangeLineFast<2>::evaluate_hessians({xi[1], Real(0), Real(0)}, hy); + LagrangeLineFast<2>::evaluate_hessians({xi[2], Real(0), Real(0)}, hz); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + const auto i = node_axes[n][0]; + const auto j = node_axes[n][1]; + const auto k = node_axes[n][2]; + Hessian H{}; + H(0, 0) = hx[i](0, 0) * ly[j] * lz[k]; + H(1, 1) = lx[i] * hy[j](0, 0) * lz[k]; + H(2, 2) = lx[i] * ly[j] * hz[k](0, 0); + H(0, 1) = gx[i][0] * gy[j][0] * lz[k]; + H(1, 0) = H(0, 1); + H(0, 2) = gx[i][0] * ly[j] * gz[k][0]; + H(2, 0) = H(0, 2); + H(1, 2) = lx[i] * gy[j][0] * gz[k][0]; + H(2, 1) = H(1, 2); + out[n] = H; + } + } +}; + +template<> +struct LagrangeHexFast<3> { + static constexpr int n_dofs = 64; + + static constexpr std::array, n_dofs> node_axes = + detail::make_hex_tensor_node_axes<3>(); + + static void evaluate(const math::Vector& xi, std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> lz{}; + detail::fill_axis_values<3>(xi[0], lx); + detail::fill_axis_values<3>(xi[1], ly); + detail::fill_axis_values<3>(xi[2], lz); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + out[n] = lx[node_axes[n][0]] * ly[node_axes[n][1]] * lz[node_axes[n][2]]; + } + } + + static void evaluate_gradients(const math::Vector& xi, + std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> lz{}; + std::array::n_dofs> gx{}; + std::array::n_dofs> gy{}; + std::array::n_dofs> gz{}; + detail::fill_axis_values_first<3>(xi[0], lx, gx); + detail::fill_axis_values_first<3>(xi[1], ly, gy); + detail::fill_axis_values_first<3>(xi[2], lz, gz); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + const auto i = node_axes[n][0]; + const auto j = node_axes[n][1]; + const auto k = node_axes[n][2]; + out[n] = Gradient{ + gx[i] * ly[j] * lz[k], + lx[i] * gy[j] * lz[k], + lx[i] * ly[j] * gz[k], + }; + } + } + + static void evaluate_hessians(const math::Vector& xi, + std::array& out) { + std::array::n_dofs> lx{}; + std::array::n_dofs> ly{}; + std::array::n_dofs> lz{}; + std::array::n_dofs> gx{}; + std::array::n_dofs> gy{}; + std::array::n_dofs> gz{}; + std::array::n_dofs> hx{}; + std::array::n_dofs> hy{}; + std::array::n_dofs> hz{}; + detail::fill_axis_values_first_second<3>(xi[0], lx, gx, hx); + detail::fill_axis_values_first_second<3>(xi[1], ly, gy, hy); + detail::fill_axis_values_first_second<3>(xi[2], lz, gz, hz); + for (std::size_t n = 0; n < node_axes.size(); ++n) { + const auto i = node_axes[n][0]; + const auto j = node_axes[n][1]; + const auto k = node_axes[n][2]; + Hessian H{}; + H(0, 0) = hx[i] * ly[j] * lz[k]; + H(1, 1) = lx[i] * hy[j] * lz[k]; + H(2, 2) = lx[i] * ly[j] * hz[k]; + H(0, 1) = gx[i] * gy[j] * lz[k]; + H(1, 0) = H(0, 1); + H(0, 2) = gx[i] * ly[j] * gz[k]; + H(2, 0) = H(0, 2); + H(1, 2) = lx[i] * gy[j] * gz[k]; + H(2, 1) = H(1, 2); + out[n] = H; + } + } +}; + +// --------------------------------------------------------------------------- +// LagrangeTriFast +// --------------------------------------------------------------------------- +template +struct LagrangeTriFast; + +template<> +struct LagrangeTriFast<1> { + static constexpr int n_dofs = 3; + + static constexpr void evaluate(const math::Vector& xi, std::array& out) { + out[0] = Real(1) - xi[0] - xi[1]; + out[1] = xi[0]; + out[2] = xi[1]; + } + + static constexpr void evaluate_gradients(const math::Vector& /*xi*/, + std::array& out) { + out[0] = Gradient{Real(-1), Real(-1), Real(0)}; + out[1] = Gradient{Real( 1), Real( 0), Real(0)}; + out[2] = Gradient{Real( 0), Real( 1), Real(0)}; + } + + static constexpr void evaluate_hessians(const math::Vector& /*xi*/, + std::array& out) { + out[0] = Hessian{}; + out[1] = Hessian{}; + out[2] = Hessian{}; + } +}; + +template<> +struct LagrangeTriFast<2> { + static constexpr int n_dofs = 6; + + static constexpr void evaluate(const math::Vector& xi, std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + + out[0] = l0 * (Real(2) * l0 - Real(1)); + out[1] = l1 * (Real(2) * l1 - Real(1)); + out[2] = l2 * (Real(2) * l2 - Real(1)); + out[3] = Real(4) * l0 * l1; + out[4] = Real(4) * l1 * l2; + out[5] = Real(4) * l0 * l2; + } + + static constexpr void evaluate_gradients(const math::Vector& xi, + std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + constexpr Gradient g0{Real(-1), Real(-1), Real(0)}; + constexpr Gradient g1{Real( 1), Real( 0), Real(0)}; + constexpr Gradient g2{Real( 0), Real( 1), Real(0)}; + + out[0] = detail::scaled_gradient(g0, Real(4) * l0 - Real(1)); + out[1] = detail::scaled_gradient(g1, Real(4) * l1 - Real(1)); + out[2] = detail::scaled_gradient(g2, Real(4) * l2 - Real(1)); + out[3] = detail::p2_edge_gradient(l0, g0, l1, g1); + out[4] = detail::p2_edge_gradient(l1, g1, l2, g2); + out[5] = detail::p2_edge_gradient(l0, g0, l2, g2); + } + + static constexpr void evaluate_hessians(const math::Vector& /*xi*/, + std::array& out) { + constexpr Gradient g0{Real(-1), Real(-1), Real(0)}; + constexpr Gradient g1{Real( 1), Real( 0), Real(0)}; + constexpr Gradient g2{Real( 0), Real( 1), Real(0)}; + + out[0] = detail::p2_vertex_hessian(g0); + out[1] = detail::p2_vertex_hessian(g1); + out[2] = detail::p2_vertex_hessian(g2); + out[3] = detail::p2_edge_hessian(g0, g1); + out[4] = detail::p2_edge_hessian(g1, g2); + out[5] = detail::p2_edge_hessian(g0, g2); + } +}; + +template<> +struct LagrangeTriFast<3> { + static constexpr int n_dofs = 10; + + static constexpr std::array, n_dofs> exponents = + detail::make_triangle_simplex_exponents<3>(); + + static void evaluate(const math::Vector& xi, std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + std::array phi0{}; + std::array phi1{}; + std::array phi2{}; + detail::fill_simplex_factor_values<3>(l0, phi0); + detail::fill_simplex_factor_values<3>(l1, phi1); + detail::fill_simplex_factor_values<3>(l2, phi2); + + for (std::size_t n = 0; n < exponents.size(); ++n) { + const auto& e = exponents[n]; + out[n] = phi0[e[0]] * phi1[e[1]] * phi2[e[2]]; + } + } + + static void evaluate_gradients(const math::Vector& xi, + std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + std::array phi0{}; + std::array phi1{}; + std::array phi2{}; + std::array dphi0{}; + std::array dphi1{}; + std::array dphi2{}; + detail::fill_simplex_factor_values_first<3>(l0, phi0, dphi0); + detail::fill_simplex_factor_values_first<3>(l1, phi1, dphi1); + detail::fill_simplex_factor_values_first<3>(l2, phi2, dphi2); + + for (std::size_t n = 0; n < exponents.size(); ++n) { + const auto& e = exponents[n]; + const Real v0 = phi0[e[0]]; + const Real v1 = phi1[e[1]]; + const Real v2 = phi2[e[2]]; + const Real dl0 = dphi0[e[0]] * v1 * v2; + const Real dl1 = v0 * dphi1[e[1]] * v2; + const Real dl2 = v0 * v1 * dphi2[e[2]]; + out[n] = Gradient{dl1 - dl0, dl2 - dl0, Real(0)}; + } + } + + static void evaluate_hessians(const math::Vector& xi, + std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + std::array phi0{}; + std::array phi1{}; + std::array phi2{}; + std::array dphi0{}; + std::array dphi1{}; + std::array dphi2{}; + std::array d2phi0{}; + std::array d2phi1{}; + std::array d2phi2{}; + detail::fill_simplex_factor_values_first_second<3>(l0, phi0, dphi0, d2phi0); + detail::fill_simplex_factor_values_first_second<3>(l1, phi1, dphi1, d2phi1); + detail::fill_simplex_factor_values_first_second<3>(l2, phi2, dphi2, d2phi2); + + for (std::size_t n = 0; n < exponents.size(); ++n) { + const auto& e = exponents[n]; + const Real v0 = phi0[e[0]]; + const Real v1 = phi1[e[1]]; + const Real v2 = phi2[e[2]]; + const Real D0 = dphi0[e[0]]; + const Real D1 = dphi1[e[1]]; + const Real D2 = dphi2[e[2]]; + const Real H00 = d2phi0[e[0]] * v1 * v2; + const Real H11 = v0 * d2phi1[e[1]] * v2; + const Real H22 = v0 * v1 * d2phi2[e[2]]; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + + Hessian H{}; + H(0, 0) = H00 - Real(2) * H01 + H11; + H(1, 1) = H00 - Real(2) * H02 + H22; + H(0, 1) = H00 - H01 - H02 + H12; + H(1, 0) = H(0, 1); + out[n] = H; + } + } +}; + +// --------------------------------------------------------------------------- +// LagrangeTetFast +// --------------------------------------------------------------------------- +template +struct LagrangeTetFast; + +template<> +struct LagrangeTetFast<1> { + static constexpr int n_dofs = 4; + + static constexpr void evaluate(const math::Vector& xi, std::array& out) { + out[0] = Real(1) - xi[0] - xi[1] - xi[2]; + out[1] = xi[0]; + out[2] = xi[1]; + out[3] = xi[2]; + } + + static constexpr void evaluate_gradients(const math::Vector& /*xi*/, + std::array& out) { + out[0] = Gradient{Real(-1), Real(-1), Real(-1)}; + out[1] = Gradient{Real( 1), Real( 0), Real( 0)}; + out[2] = Gradient{Real( 0), Real( 1), Real( 0)}; + out[3] = Gradient{Real( 0), Real( 0), Real( 1)}; + } + + static constexpr void evaluate_hessians(const math::Vector& /*xi*/, + std::array& out) { + out[0] = Hessian{}; + out[1] = Hessian{}; + out[2] = Hessian{}; + out[3] = Hessian{}; + } +}; + +template<> +struct LagrangeTetFast<2> { + static constexpr int n_dofs = 10; + + static constexpr void evaluate(const math::Vector& xi, std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + + out[0] = l0 * (Real(2) * l0 - Real(1)); + out[1] = l1 * (Real(2) * l1 - Real(1)); + out[2] = l2 * (Real(2) * l2 - Real(1)); + out[3] = l3 * (Real(2) * l3 - Real(1)); + out[4] = Real(4) * l0 * l1; + out[5] = Real(4) * l1 * l2; + out[6] = Real(4) * l0 * l2; + out[7] = Real(4) * l0 * l3; + out[8] = Real(4) * l1 * l3; + out[9] = Real(4) * l2 * l3; + } + + static constexpr void evaluate_gradients(const math::Vector& xi, + std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + constexpr Gradient g0{Real(-1), Real(-1), Real(-1)}; + constexpr Gradient g1{Real( 1), Real( 0), Real( 0)}; + constexpr Gradient g2{Real( 0), Real( 1), Real( 0)}; + constexpr Gradient g3{Real( 0), Real( 0), Real( 1)}; + + out[0] = detail::scaled_gradient(g0, Real(4) * l0 - Real(1)); + out[1] = detail::scaled_gradient(g1, Real(4) * l1 - Real(1)); + out[2] = detail::scaled_gradient(g2, Real(4) * l2 - Real(1)); + out[3] = detail::scaled_gradient(g3, Real(4) * l3 - Real(1)); + out[4] = detail::p2_edge_gradient(l0, g0, l1, g1); + out[5] = detail::p2_edge_gradient(l1, g1, l2, g2); + out[6] = detail::p2_edge_gradient(l0, g0, l2, g2); + out[7] = detail::p2_edge_gradient(l0, g0, l3, g3); + out[8] = detail::p2_edge_gradient(l1, g1, l3, g3); + out[9] = detail::p2_edge_gradient(l2, g2, l3, g3); + } + + static constexpr void evaluate_hessians(const math::Vector& /*xi*/, + std::array& out) { + constexpr Gradient g0{Real(-1), Real(-1), Real(-1)}; + constexpr Gradient g1{Real( 1), Real( 0), Real( 0)}; + constexpr Gradient g2{Real( 0), Real( 1), Real( 0)}; + constexpr Gradient g3{Real( 0), Real( 0), Real( 1)}; + + out[0] = detail::p2_vertex_hessian(g0); + out[1] = detail::p2_vertex_hessian(g1); + out[2] = detail::p2_vertex_hessian(g2); + out[3] = detail::p2_vertex_hessian(g3); + out[4] = detail::p2_edge_hessian(g0, g1); + out[5] = detail::p2_edge_hessian(g1, g2); + out[6] = detail::p2_edge_hessian(g0, g2); + out[7] = detail::p2_edge_hessian(g0, g3); + out[8] = detail::p2_edge_hessian(g1, g3); + out[9] = detail::p2_edge_hessian(g2, g3); + } +}; + +template<> +struct LagrangeTetFast<3> { + static constexpr int n_dofs = 20; + + static constexpr std::array, n_dofs> exponents = + detail::make_tetrahedron_simplex_exponents<3>(); + + static void evaluate(const math::Vector& xi, std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + std::array phi0{}; + std::array phi1{}; + std::array phi2{}; + std::array phi3{}; + detail::fill_simplex_factor_values<3>(l0, phi0); + detail::fill_simplex_factor_values<3>(l1, phi1); + detail::fill_simplex_factor_values<3>(l2, phi2); + detail::fill_simplex_factor_values<3>(l3, phi3); + + for (std::size_t n = 0; n < exponents.size(); ++n) { + const auto& e = exponents[n]; + out[n] = phi0[e[0]] * phi1[e[1]] * phi2[e[2]] * phi3[e[3]]; + } + } + + static void evaluate_gradients(const math::Vector& xi, + std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + std::array phi0{}; + std::array phi1{}; + std::array phi2{}; + std::array phi3{}; + std::array dphi0{}; + std::array dphi1{}; + std::array dphi2{}; + std::array dphi3{}; + detail::fill_simplex_factor_values_first<3>(l0, phi0, dphi0); + detail::fill_simplex_factor_values_first<3>(l1, phi1, dphi1); + detail::fill_simplex_factor_values_first<3>(l2, phi2, dphi2); + detail::fill_simplex_factor_values_first<3>(l3, phi3, dphi3); + + for (std::size_t n = 0; n < exponents.size(); ++n) { + const auto& e = exponents[n]; + const Real v0 = phi0[e[0]]; + const Real v1 = phi1[e[1]]; + const Real v2 = phi2[e[2]]; + const Real v3 = phi3[e[3]]; + const Real dl0 = dphi0[e[0]] * v1 * v2 * v3; + const Real dl1 = v0 * dphi1[e[1]] * v2 * v3; + const Real dl2 = v0 * v1 * dphi2[e[2]] * v3; + const Real dl3 = v0 * v1 * v2 * dphi3[e[3]]; + out[n] = Gradient{dl1 - dl0, dl2 - dl0, dl3 - dl0}; + } + } + + static void evaluate_hessians(const math::Vector& xi, + std::array& out) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + std::array phi0{}; + std::array phi1{}; + std::array phi2{}; + std::array phi3{}; + std::array dphi0{}; + std::array dphi1{}; + std::array dphi2{}; + std::array dphi3{}; + std::array d2phi0{}; + std::array d2phi1{}; + std::array d2phi2{}; + std::array d2phi3{}; + detail::fill_simplex_factor_values_first_second<3>(l0, phi0, dphi0, d2phi0); + detail::fill_simplex_factor_values_first_second<3>(l1, phi1, dphi1, d2phi1); + detail::fill_simplex_factor_values_first_second<3>(l2, phi2, dphi2, d2phi2); + detail::fill_simplex_factor_values_first_second<3>(l3, phi3, dphi3, d2phi3); + + for (std::size_t n = 0; n < exponents.size(); ++n) { + const auto& e = exponents[n]; + const Real v0 = phi0[e[0]]; + const Real v1 = phi1[e[1]]; + const Real v2 = phi2[e[2]]; + const Real v3 = phi3[e[3]]; + const Real D0 = dphi0[e[0]]; + const Real D1 = dphi1[e[1]]; + const Real D2 = dphi2[e[2]]; + const Real D3 = dphi3[e[3]]; + + const Real H00 = d2phi0[e[0]] * v1 * v2 * v3; + const Real H11 = v0 * d2phi1[e[1]] * v2 * v3; + const Real H22 = v0 * v1 * d2phi2[e[2]] * v3; + const Real H33 = v0 * v1 * v2 * d2phi3[e[3]]; + const Real H01 = D0 * D1 * v2 * v3; + const Real H02 = D0 * v1 * D2 * v3; + const Real H03 = D0 * v1 * v2 * D3; + const Real H12 = v0 * D1 * D2 * v3; + const Real H13 = v0 * D1 * v2 * D3; + const Real H23 = v0 * v1 * D2 * D3; + + Hessian H{}; + H(0, 0) = H00 - Real(2) * H01 + H11; + H(1, 1) = H00 - Real(2) * H02 + H22; + H(2, 2) = H00 - Real(2) * H03 + H33; + H(0, 1) = H00 - H01 - H02 + H12; + H(1, 0) = H(0, 1); + H(0, 2) = H00 - H01 - H03 + H13; + H(2, 0) = H(0, 2); + H(1, 2) = H00 - H02 - H03 + H23; + H(2, 1) = H(1, 2); + out[n] = H; + } + } +}; + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_LAGRANGEBASISFAST_H diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.cpp b/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.cpp new file mode 100644 index 000000000..4a332621e --- /dev/null +++ b/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.cpp @@ -0,0 +1,2069 @@ +#include "LagrangeBasisPyramid.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Basis/BasisExceptions.h" +#include "BasisTolerance.h" +#include "Math/DenseLinearAlgebra.h" +#include "Math/DenseTransformKernels.h" +#include "LagrangeBasisUtility.h" +#include "PyramidModalBasis.h" + +namespace svmp { +namespace FE { +namespace basis { +namespace detail { + +class PyramidLagrangeCache { +public: + using ModalTerm = pyramid_modal::Term; + + struct UvPolynomial { + using Power = std::pair; + std::vector> coeffs; + + void add_term(int pu, int pv, Real coeff, Real tol = Real(1e-14)) { + if (std::abs(coeff) <= tol) { + return; + } + const auto key = std::make_pair(pu, pv); + const auto found = std::lower_bound( + coeffs.begin(), + coeffs.end(), + key, + [](const auto& entry, const Power& value) { return entry.first < value; }); + if (found == coeffs.end() || found->first != key) { + coeffs.insert(found, {key, coeff}); + return; + } + + found->second += coeff; + if (std::abs(found->second) <= tol) { + coeffs.erase(found); + } + } + + void add_scaled(const UvPolynomial& other, Real scale, Real tol = Real(1e-14)) { + if (std::abs(scale) <= tol) { + return; + } + for (const auto& [powers, coeff] : other.coeffs) { + add_term(powers.first, powers.second, scale * coeff, tol); + } + } + + bool empty(Real tol = Real(1e-12)) const { + for (const auto& [powers, coeff] : coeffs) { + (void)powers; + if (std::abs(coeff) > tol) { + return false; + } + } + return true; + } + + bool is_constant(Real tol = Real(1e-12)) const { + for (const auto& [powers, coeff] : coeffs) { + if (std::abs(coeff) <= tol) { + continue; + } + if (powers.first != 0 || powers.second != 0) { + return false; + } + } + return true; + } + + Real constant_value(Real tol = Real(1e-12)) const { + Real value = Real(0); + for (const auto& [powers, coeff] : coeffs) { + if (std::abs(coeff) <= tol) { + continue; + } + if (powers.first == 0 && powers.second == 0) { + value += coeff; + } + } + return value; + } + }; + + struct ApexSeries { + std::vector> by_power; + + void add_term(int beta, int pu, int pv, Real coeff, Real tol = Real(1e-14)) { + const auto found = find_or_insert(beta); + found->second.add_term(pu, pv, coeff, tol); + if (found->second.empty(tol)) { + by_power.erase(found); + } + } + + void add_scaled(const ApexSeries& other, Real scale, Real tol = Real(1e-14)) { + if (std::abs(scale) <= tol) { + return; + } + for (const auto& [beta, poly] : other.by_power) { + const auto found = find_or_insert(beta); + found->second.add_scaled(poly, scale, tol); + if (found->second.empty(tol)) { + by_power.erase(found); + } + } + } + + private: + std::vector>::iterator find_or_insert(int beta) { + const auto found = std::lower_bound( + by_power.begin(), + by_power.end(), + beta, + [](const auto& entry, int value) { return entry.first < value; }); + if (found != by_power.end() && found->first == beta) { + return found; + } + return by_power.insert(found, {beta, UvPolynomial{}}); + } + }; + + using GradientSeries = std::array; + using HessianSeries = std::array, 3>; + + enum class ApexLimitKind { + Constant, + DirectionDependent, + Singular, + }; + + enum class ApexRankStatus { + Exact, + DirectionDependent, + Singular, + }; + + struct ApexClassification { + ApexLimitKind kind{ApexLimitKind::Constant}; + Real constant_value{0}; + int leading_power{1}; + }; + + struct ApexData { + std::vector values; + std::vector gradients; + std::vector hessians; + ApexRankStatus gradient_status{ApexRankStatus::Exact}; + ApexRankStatus hessian_status{ApexRankStatus::Exact}; + }; + + struct OrderData { + int order{0}; + std::vector> nodes; + std::vector modal_terms; + std::vector modal_to_nodal; + ApexData apex; + }; + + struct EvaluationScratch { + std::vector modal_values; + std::vector modal_gradient_components; + std::vector modal_hessian_components; + std::vector modal_gradients; + std::vector modal_hessians; + pyramid_modal::EvaluationPoint modal_point; + + void prewarm(std::size_t max_size, std::size_t max_qpts) { + const std::size_t batched_size = max_size * std::max(max_qpts, 1u); + modal_values.reserve(batched_size); + modal_gradient_components.reserve(batched_size * 3u); + modal_hessian_components.reserve(batched_size * 9u); + modal_gradients.reserve(max_size); + modal_hessians.reserve(max_size); + } + }; + + static EvaluationScratch& evaluation_scratch() { + // Scratch is intentionally thread-local: production assembly uses a + // persistent worker-thread team, so buffers stay warm on each worker. + static thread_local EvaluationScratch scratch; + return scratch; + } + + static void prewarm_scratch(std::size_t max_size, std::size_t max_qpts) { + evaluation_scratch().prewarm(max_size, max_qpts); + } + + static bool is_apex_point(const math::Vector& xi) { + const Real tol = apex_coord_tolerance(); + return std::abs(xi[0]) <= tol && + std::abs(xi[1]) <= tol && + std::abs(Real(1) - xi[2]) <= tol; + } + + static bool on_degenerate_top_plane(const math::Vector& xi) { + return basis_near_zero(Real(1) - xi[2]); + } + + static void validate_top_plane_query(const math::Vector& xi) { + if (on_degenerate_top_plane(xi) && !is_apex_point(xi)) [[unlikely]] { + throw BasisEvaluationException( + "Pyramid reference evaluation on the degenerate z=1 plane is only defined at the apex", + __FILE__, __LINE__, __func__); + } + } + + static OrderData build_order_data(int order) { + OrderData data; + data.order = order; + + data.nodes = build_public_nodes(order); + data.modal_terms = pyramid_modal::build_terms(order); + + const std::size_t n = data.nodes.size(); + if (data.modal_terms.size() != n) { + throw BasisConstructionException("LagrangeBasis pyramid modal basis size mismatch", + __FILE__, __LINE__, __func__); + } + + std::vector vandermonde(n * n, Real(0)); + for (std::size_t row = 0; row < n; ++row) { + pyramid_modal::EvaluationPoint modal_point; + pyramid_modal::prepare_evaluation_point( + data.modal_terms, data.nodes[row], modal_point); + for (std::size_t col = 0; col < n; ++col) { + Real value = Real(0); + pyramid_modal::evaluate_term(data.modal_terms[col], modal_point, value); + vandermonde[row * n + col] = value; + } + } + + const auto inverse_result = math::invert_dense_matrix_with_diagnostics( + std::move(vandermonde), + n, + "LagrangeBasis pyramid Vandermonde"); + math::validate_dense_inverse_diagnostics( + inverse_result, + n, + "LagrangeBasis pyramid Vandermonde"); + const std::vector& inverse = inverse_result.inverse; + + data.modal_to_nodal.assign(n * n, Real(0)); + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { + data.modal_to_nodal[basis_i * n + modal_j] = + inverse[modal_j * n + basis_i]; + } + } + data.apex = build_apex_data(data); + return data; + } + + static bool has_low_order_fast_modal_to_nodal(const OrderData& data) noexcept { + return data.order == 1 || data.order == 2; + } + + static const OrderData& get(int order) { + constexpr int kMaxOnceCachedOrder = 12; + if (order >= 0 && order <= kMaxOnceCachedOrder) { + static std::array flags; + static std::array, kMaxOnceCachedOrder + 1> cache; + const auto idx = static_cast(order); + std::call_once(flags[idx], [idx, order]() { + cache[idx] = std::make_unique(build_order_data(order)); + }); + return *cache[idx]; + } + + static std::mutex fallback_mutex; + static std::map> fallback_cache; + + std::lock_guard lock(fallback_mutex); + const auto found = fallback_cache.find(order); + if (found != fallback_cache.end()) { + return *found->second; + } + + auto data = std::make_unique(build_order_data(order)); + const auto [it, inserted] = fallback_cache.emplace(order, std::move(data)); + (void)inserted; + return *it->second; + } + + static void evaluate_values(const OrderData& data, + const math::Vector& xi, + std::vector& values) { + validate_top_plane_query(xi); + if (is_apex_point(xi)) { + values = data.apex.values; + return; + } + + auto& scratch = evaluation_scratch(); + auto& modal = scratch.modal_values; + auto& modal_point = scratch.modal_point; + modal.resize(data.modal_terms.size()); + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { + pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, modal[m]); + } + if (has_low_order_fast_modal_to_nodal(data)) { + apply_sparse_basis_to_nodal(data, modal, values); + } else { + apply_modal_to_nodal(data, modal, values); + } + } + + static void evaluate_gradients(const OrderData& data, + const math::Vector& xi, + std::vector& gradients) { + validate_top_plane_query(xi); + if (is_apex_point(xi)) { + if (data.apex.gradient_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("gradient", data.apex.gradient_status), + __FILE__, __LINE__, __func__); + } + gradients = data.apex.gradients; + return; + } + + auto& scratch = evaluation_scratch(); + auto& modal_gradients = scratch.modal_gradients; + auto& modal_point = scratch.modal_point; + modal_gradients.resize(data.modal_terms.size()); + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { + Real value = Real(0); + pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, value, &modal_gradients[m]); + } + if (has_low_order_fast_modal_to_nodal(data)) { + apply_sparse_basis_to_nodal(data, modal_gradients, gradients); + } else { + apply_modal_to_nodal(data, modal_gradients, gradients); + } + } + + static void evaluate_hessians(const OrderData& data, + const math::Vector& xi, + std::vector& hessians) { + validate_top_plane_query(xi); + if (is_apex_point(xi)) { + if (data.apex.hessian_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("Hessian", data.apex.hessian_status), + __FILE__, __LINE__, __func__); + } + hessians = data.apex.hessians; + return; + } + + auto& scratch = evaluation_scratch(); + auto& modal_hessians = scratch.modal_hessians; + auto& modal_point = scratch.modal_point; + modal_hessians.resize(data.modal_terms.size()); + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { + Real value = Real(0); + pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, value, nullptr, &modal_hessians[m]); + } + if (has_low_order_fast_modal_to_nodal(data)) { + apply_sparse_basis_to_nodal(data, modal_hessians, hessians); + } else { + apply_modal_to_nodal(data, modal_hessians, hessians); + } + } + + static void evaluate_all(const OrderData& data, + const math::Vector& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians) { + validate_top_plane_query(xi); + if (is_apex_point(xi)) { + if (data.apex.gradient_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("gradient", data.apex.gradient_status), + __FILE__, __LINE__, __func__); + } + if (data.apex.hessian_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("Hessian", data.apex.hessian_status), + __FILE__, __LINE__, __func__); + } + values = data.apex.values; + gradients = data.apex.gradients; + hessians = data.apex.hessians; + return; + } + + const std::size_t n = data.modal_terms.size(); + auto& scratch = evaluation_scratch(); + auto& modal_values = scratch.modal_values; + auto& modal_gradients = scratch.modal_gradients; + auto& modal_hessians = scratch.modal_hessians; + auto& modal_point = scratch.modal_point; + modal_values.resize(n); + modal_gradients.resize(n); + modal_hessians.resize(n); + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + + for (std::size_t m = 0; m < n; ++m) { + pyramid_modal::evaluate_term( + data.modal_terms[m], modal_point, modal_values[m], &modal_gradients[m], &modal_hessians[m]); + } + + if (has_low_order_fast_modal_to_nodal(data)) { + apply_sparse_basis_to_nodal_all( + data, modal_values, modal_gradients, modal_hessians, values, gradients, hessians); + return; + } + + values.resize(n); + gradients.resize(n); + hessians.resize(n); + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + const Real* row = data.modal_to_nodal.data() + basis_i * n; + Gradient gradient{}; + Hessian hessian{}; + Real value = Real(0); + for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { + const Real coeff = row[modal_j]; + value += coeff * modal_values[modal_j]; + + const Real* modal_gradient = modal_gradients[modal_j].data(); + gradient[0] += coeff * modal_gradient[0]; + gradient[1] += coeff * modal_gradient[1]; + gradient[2] += coeff * modal_gradient[2]; + + const Real* modal_hessian = modal_hessians[modal_j].data(); + Real* hessian_data = hessian.data(); + hessian_data[0] += coeff * modal_hessian[0]; + hessian_data[1] += coeff * modal_hessian[1]; + hessian_data[2] += coeff * modal_hessian[2]; + hessian_data[4] += coeff * modal_hessian[4]; + hessian_data[5] += coeff * modal_hessian[5]; + hessian_data[8] += coeff * modal_hessian[8]; + } + values[basis_i] = value; + gradients[basis_i] = gradient; + Real* hessian_data = hessian.data(); + hessian_data[3] = hessian_data[1]; + hessian_data[6] = hessian_data[2]; + hessian_data[7] = hessian_data[5]; + hessians[basis_i] = hessian; + } + } + + static void evaluate_values_to(const OrderData& data, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out) { + validate_top_plane_query(xi); + if (is_apex_point(xi)) { + std::copy(data.apex.values.begin(), data.apex.values.end(), values_out); + return; + } + + auto& scratch = evaluation_scratch(); + auto& modal = scratch.modal_values; + auto& modal_point = scratch.modal_point; + modal.resize(data.modal_terms.size()); + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { + pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, modal[m]); + } + if (has_low_order_fast_modal_to_nodal(data)) { + apply_sparse_basis_to_nodal_to(data, modal, values_out); + } else { + apply_modal_to_nodal_to(data, modal, values_out); + } + } + + static void evaluate_gradients_to(const OrderData& data, + const math::Vector& xi, + Real* SVMP_RESTRICT gradients_out) { + validate_top_plane_query(xi); + if (is_apex_point(xi)) { + if (data.apex.gradient_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("gradient", data.apex.gradient_status), + __FILE__, __LINE__, __func__); + } + for (std::size_t i = 0; i < data.apex.gradients.size(); ++i) { + gradients_out[i * 3u + 0u] = data.apex.gradients[i][0]; + gradients_out[i * 3u + 1u] = data.apex.gradients[i][1]; + gradients_out[i * 3u + 2u] = data.apex.gradients[i][2]; + } + return; + } + + auto& scratch = evaluation_scratch(); + auto& modal_gradients = scratch.modal_gradients; + auto& modal_point = scratch.modal_point; + modal_gradients.resize(data.modal_terms.size()); + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { + Real value = Real(0); + pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, value, &modal_gradients[m]); + } + if (has_low_order_fast_modal_to_nodal(data)) { + apply_sparse_basis_to_nodal_to(data, modal_gradients, gradients_out); + } else { + apply_modal_to_nodal_to(data, modal_gradients, gradients_out); + } + } + + static void evaluate_hessians_to(const OrderData& data, + const math::Vector& xi, + Real* SVMP_RESTRICT hessians_out) { + validate_top_plane_query(xi); + if (is_apex_point(xi)) { + if (data.apex.hessian_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("Hessian", data.apex.hessian_status), + __FILE__, __LINE__, __func__); + } + for (std::size_t i = 0; i < data.apex.hessians.size(); ++i) { + store_hessian(data.apex.hessians[i], hessians_out + i * 9u); + } + return; + } + + auto& scratch = evaluation_scratch(); + auto& modal_hessians = scratch.modal_hessians; + auto& modal_point = scratch.modal_point; + modal_hessians.resize(data.modal_terms.size()); + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { + Real value = Real(0); + pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, value, nullptr, &modal_hessians[m]); + } + if (has_low_order_fast_modal_to_nodal(data)) { + apply_sparse_basis_to_nodal_to(data, modal_hessians, hessians_out); + } else { + apply_modal_to_nodal_to(data, modal_hessians, hessians_out); + } + } + + static void evaluate_all_to(const OrderData& data, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + validate_top_plane_query(xi); + if (is_apex_point(xi)) { + if (data.apex.gradient_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("gradient", data.apex.gradient_status), + __FILE__, __LINE__, __func__); + } + if (data.apex.hessian_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("Hessian", data.apex.hessian_status), + __FILE__, __LINE__, __func__); + } + std::copy(data.apex.values.begin(), data.apex.values.end(), values_out); + for (std::size_t i = 0; i < data.apex.gradients.size(); ++i) { + gradients_out[i * 3u + 0u] = data.apex.gradients[i][0]; + gradients_out[i * 3u + 1u] = data.apex.gradients[i][1]; + gradients_out[i * 3u + 2u] = data.apex.gradients[i][2]; + } + for (std::size_t i = 0; i < data.apex.hessians.size(); ++i) { + const Real* hessian = data.apex.hessians[i].data(); + std::copy(hessian, hessian + 9u, hessians_out + i * 9u); + } + return; + } + + const std::size_t n = data.modal_terms.size(); + auto& scratch = evaluation_scratch(); + auto& modal_values = scratch.modal_values; + auto& modal_gradients = scratch.modal_gradients; + auto& modal_hessians = scratch.modal_hessians; + auto& modal_point = scratch.modal_point; + modal_values.resize(n); + modal_gradients.resize(n); + modal_hessians.resize(n); + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + + for (std::size_t m = 0; m < n; ++m) { + pyramid_modal::evaluate_term( + data.modal_terms[m], modal_point, modal_values[m], &modal_gradients[m], &modal_hessians[m]); + } + + if (has_low_order_fast_modal_to_nodal(data)) { + apply_sparse_basis_to_nodal_all_to( + data, modal_values, modal_gradients, modal_hessians, values_out, gradients_out, hessians_out); + return; + } + + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + const Real* row = data.modal_to_nodal.data() + basis_i * n; + Real value = Real(0); + Real gradient[3] = {Real(0), Real(0), Real(0)}; + Real hessian[9] = {}; + for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { + const Real coeff = row[modal_j]; + value += coeff * modal_values[modal_j]; + + const Real* modal_gradient = modal_gradients[modal_j].data(); + gradient[0] += coeff * modal_gradient[0]; + gradient[1] += coeff * modal_gradient[1]; + gradient[2] += coeff * modal_gradient[2]; + + const Real* modal_hessian = modal_hessians[modal_j].data(); + hessian[0] += coeff * modal_hessian[0]; + hessian[1] += coeff * modal_hessian[1]; + hessian[2] += coeff * modal_hessian[2]; + hessian[4] += coeff * modal_hessian[4]; + hessian[5] += coeff * modal_hessian[5]; + hessian[8] += coeff * modal_hessian[8]; + } + + values_out[basis_i] = value; + Real* gradient_out = gradients_out + basis_i * 3u; + gradient_out[0] = gradient[0]; + gradient_out[1] = gradient[1]; + gradient_out[2] = gradient[2]; + + Real* hessian_out = hessians_out + basis_i * 9u; + hessian_out[0] = hessian[0]; + hessian_out[1] = hessian[1]; + hessian_out[2] = hessian[2]; + hessian_out[3] = hessian[1]; + hessian_out[4] = hessian[4]; + hessian_out[5] = hessian[5]; + hessian_out[6] = hessian[2]; + hessian_out[7] = hessian[5]; + hessian_out[8] = hessian[8]; + } + } + + static void evaluate_at_quadrature_points_strided( + const OrderData& data, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const unsigned mask = (values_out != nullptr ? 1u : 0u) | + (gradients_out != nullptr ? 2u : 0u) | + (hessians_out != nullptr ? 4u : 0u); + switch (mask) { + case 0u: + validate_strided_points(points); + return; + case 1u: + evaluate_at_quadrature_points_strided_impl( + data, points, output_stride, values_out, gradients_out, hessians_out); + return; + case 2u: + evaluate_at_quadrature_points_strided_impl( + data, points, output_stride, values_out, gradients_out, hessians_out); + return; + case 3u: + evaluate_at_quadrature_points_strided_impl( + data, points, output_stride, values_out, gradients_out, hessians_out); + return; + case 4u: + evaluate_at_quadrature_points_strided_impl( + data, points, output_stride, values_out, gradients_out, hessians_out); + return; + case 5u: + evaluate_at_quadrature_points_strided_impl( + data, points, output_stride, values_out, gradients_out, hessians_out); + return; + case 6u: + evaluate_at_quadrature_points_strided_impl( + data, points, output_stride, values_out, gradients_out, hessians_out); + return; + case 7u: + evaluate_at_quadrature_points_strided_impl( + data, points, output_stride, values_out, gradients_out, hessians_out); + return; + default: + return; + } + } + +private: + static void validate_strided_points(const std::vector>& points) { + for (const auto& xi : points) { + validate_top_plane_query(xi); + } + } + + template + static void write_apex_strided(const OrderData& data, + std::size_t q, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const std::size_t n = data.modal_terms.size(); + if constexpr (NeedValues) { + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + values_out[basis_i * output_stride + q] = data.apex.values[basis_i]; + } + } + if constexpr (NeedGradients) { + if (data.apex.gradient_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("gradient", data.apex.gradient_status), + __FILE__, __LINE__, __func__); + } + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + Real* g = gradients_out + basis_i * 3u * output_stride; + g[0u * output_stride + q] = data.apex.gradients[basis_i][0]; + g[1u * output_stride + q] = data.apex.gradients[basis_i][1]; + g[2u * output_stride + q] = data.apex.gradients[basis_i][2]; + } + } + if constexpr (NeedHessians) { + if (data.apex.hessian_status != ApexRankStatus::Exact) { + throw BasisEvaluationException( + apex_status_message("Hessian", data.apex.hessian_status), + __FILE__, __LINE__, __func__); + } + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + const Real* hessian = data.apex.hessians[basis_i].data(); + Real* H = hessians_out + basis_i * 9u * output_stride; + for (std::size_t component = 0; component < 9u; ++component) { + H[component * output_stride + q] = hessian[component]; + } + } + } + } + + template + static void fill_low_order_modal_jet(std::size_t modal_i, + const Real* SVMP_RESTRICT xp, + const Real* SVMP_RESTRICT yp, + const Real* SVMP_RESTRICT zp, + const Real* SVMP_RESTRICT inv_tp, + Real* SVMP_RESTRICT modal_values, + Real (*SVMP_RESTRICT modal_gradients)[3], + Real (*SVMP_RESTRICT modal_hessians)[9]) { + const Real xy_base = xp[Px] * yp[Py]; + const Real base = xy_base * zp[Pz]; + const Real inv_denom = inv_tp[DenomPower]; + const Real value = base * inv_denom; + + if constexpr (NeedValues) { + modal_values[modal_i] = value; + } + if constexpr (NeedGradients) { + Real* g = modal_gradients[modal_i]; + if constexpr (Px > 0) { + g[0] = static_cast(Px) * xp[Px - 1] * yp[Py] * zp[Pz] * inv_denom; + } else { + g[0] = Real(0); + } + if constexpr (Py > 0) { + g[1] = static_cast(Py) * xp[Px] * yp[Py - 1] * zp[Pz] * inv_denom; + } else { + g[1] = Real(0); + } + Real gz = Real(0); + if constexpr (Pz > 0) { + gz += static_cast(Pz) * xy_base * zp[Pz - 1] * inv_denom; + } + if constexpr (DenomPower > 0) { + gz += static_cast(DenomPower) * base * inv_tp[DenomPower + 1]; + } + g[2] = gz; + } + if constexpr (NeedHessians) { + Real* H = modal_hessians[modal_i]; + if constexpr (Px > 1) { + H[0] = static_cast(Px * (Px - 1)) * + xp[Px - 2] * yp[Py] * zp[Pz] * inv_denom; + } else { + H[0] = Real(0); + } + if constexpr (Py > 1) { + H[4] = static_cast(Py * (Py - 1)) * + xp[Px] * yp[Py - 2] * zp[Pz] * inv_denom; + } else { + H[4] = Real(0); + } + Real hxy = Real(0); + if constexpr (Px > 0 && Py > 0) { + hxy = static_cast(Px * Py) * + xp[Px - 1] * yp[Py - 1] * zp[Pz] * inv_denom; + } + H[1] = hxy; + H[3] = hxy; + + Real hxz = Real(0); + if constexpr (Px > 0) { + constexpr Real px_real = static_cast(Px); + const Real x_deriv_y = px_real * xp[Px - 1] * yp[Py]; + if constexpr (Pz > 0) { + hxz += x_deriv_y * static_cast(Pz) * + zp[Pz - 1] * inv_denom; + } + if constexpr (DenomPower > 0) { + hxz += x_deriv_y * static_cast(DenomPower) * + zp[Pz] * inv_tp[DenomPower + 1]; + } + } + H[2] = hxz; + H[6] = hxz; + + Real hyz = Real(0); + if constexpr (Py > 0) { + constexpr Real py_real = static_cast(Py); + const Real x_y_deriv = py_real * xp[Px] * yp[Py - 1]; + if constexpr (Pz > 0) { + hyz += x_y_deriv * static_cast(Pz) * + zp[Pz - 1] * inv_denom; + } + if constexpr (DenomPower > 0) { + hyz += x_y_deriv * static_cast(DenomPower) * + zp[Pz] * inv_tp[DenomPower + 1]; + } + } + H[5] = hyz; + H[7] = hyz; + + Real hzz = Real(0); + if constexpr (Pz > 1) { + hzz += static_cast(Pz * (Pz - 1)) * + xy_base * zp[Pz - 2] * inv_denom; + } + if constexpr (Pz > 0 && DenomPower > 0) { + hzz += static_cast(2 * Pz * DenomPower) * xy_base * + zp[Pz - 1] * inv_tp[DenomPower + 1]; + } + if constexpr (DenomPower > 0) { + hzz += static_cast(DenomPower * (DenomPower + 1)) * + base * inv_tp[DenomPower + 2]; + } + H[8] = hzz; + } + } + + template + static void evaluate_low_order_modal_jets(const OrderData& data, + const math::Vector& xi, + Real* SVMP_RESTRICT modal_values, + Real (*SVMP_RESTRICT modal_gradients)[3], + Real (*SVMP_RESTRICT modal_hessians)[9]) { + const Real x = xi[0]; + const Real y = xi[1]; + const Real z = xi[2]; + const Real inv_t = Real(1) / (Real(1) - z); + const Real xp[3] = {Real(1), x, x * x}; + const Real yp[3] = {Real(1), y, y * y}; + const Real zp[3] = {Real(1), z, z * z}; + Real inv_tp[5] = {Real(1), inv_t, Real(0), Real(0), Real(0)}; + inv_tp[2] = inv_tp[1] * inv_t; + inv_tp[3] = inv_tp[2] * inv_t; + inv_tp[4] = inv_tp[3] * inv_t; + + fill_low_order_modal_jet<0, 0, 0, 0, NeedValues, NeedGradients, NeedHessians>( + 0u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<1, 0, 0, 0, NeedValues, NeedGradients, NeedHessians>( + 1u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + if (data.order == 1) { + fill_low_order_modal_jet<0, 1, 0, 0, NeedValues, NeedGradients, NeedHessians>( + 2u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<1, 1, 0, 1, NeedValues, NeedGradients, NeedHessians>( + 3u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<0, 0, 1, 0, NeedValues, NeedGradients, NeedHessians>( + 4u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + return; + } + + fill_low_order_modal_jet<2, 0, 0, 0, NeedValues, NeedGradients, NeedHessians>( + 2u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<0, 1, 0, 0, NeedValues, NeedGradients, NeedHessians>( + 3u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<1, 1, 0, 1, NeedValues, NeedGradients, NeedHessians>( + 4u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<2, 1, 0, 1, NeedValues, NeedGradients, NeedHessians>( + 5u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<0, 2, 0, 0, NeedValues, NeedGradients, NeedHessians>( + 6u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<1, 2, 0, 1, NeedValues, NeedGradients, NeedHessians>( + 7u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<2, 2, 0, 2, NeedValues, NeedGradients, NeedHessians>( + 8u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<0, 0, 1, 0, NeedValues, NeedGradients, NeedHessians>( + 9u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<1, 0, 1, 0, NeedValues, NeedGradients, NeedHessians>( + 10u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<0, 1, 1, 0, NeedValues, NeedGradients, NeedHessians>( + 11u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<1, 1, 1, 1, NeedValues, NeedGradients, NeedHessians>( + 12u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + fill_low_order_modal_jet<0, 0, 2, 0, NeedValues, NeedGradients, NeedHessians>( + 13u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); + } + + template + static bool try_evaluate_low_order_strided( + const OrderData& data, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + if (!has_low_order_fast_modal_to_nodal(data)) { + return false; + } + for (const auto& xi : points) { + validate_top_plane_query(xi); + if (is_apex_point(xi)) { + return false; + } + } + + Real modal_values[14]; + Real modal_gradients[14][3]; + Real modal_hessians[14][9]; + for (std::size_t q = 0; q < points.size(); ++q) { + evaluate_low_order_modal_jets( + data, points[q], modal_values, modal_gradients, modal_hessians); + if constexpr (NeedValues) { + apply_low_order_combination( + data, + 1u, + [&](std::size_t modal_i, std::size_t) { + return modal_values[modal_i]; + }, + [&](std::size_t basis_i, std::size_t, Real value) { + values_out[basis_i * output_stride + q] = value; + }); + } + if constexpr (NeedGradients) { + apply_low_order_combination( + data, + 3u, + [&](std::size_t modal_i, std::size_t component) { + return modal_gradients[modal_i][component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + gradients_out[basis_i * 3u * output_stride + + component * output_stride + q] = value; + }); + } + if constexpr (NeedHessians) { + apply_low_order_combination( + data, + 9u, + [&](std::size_t modal_i, std::size_t component) { + return modal_hessians[modal_i][component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + hessians_out[basis_i * 9u * output_stride + + component * output_stride + q] = value; + }); + } + } + return true; + } + + template + static void evaluate_at_quadrature_points_strided_impl( + const OrderData& data, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const std::size_t n = data.modal_terms.size(); + if (points.empty() || n == 0u) { + return; + } + if (try_evaluate_low_order_strided( + data, points, output_stride, values_out, gradients_out, hessians_out)) { + return; + } + + auto& scratch = evaluation_scratch(); + auto& modal_values = scratch.modal_values; + auto& modal_gradients = scratch.modal_gradients; + auto& modal_hessians = scratch.modal_hessians; + auto& modal_point = scratch.modal_point; + if constexpr (NeedValues) { + modal_values.resize(n); + } + if constexpr (NeedGradients) { + modal_gradients.resize(n); + } + if constexpr (NeedHessians) { + modal_hessians.resize(n); + } + const bool use_fast_modal_to_nodal = has_low_order_fast_modal_to_nodal(data); + + if (!use_fast_modal_to_nodal) { + bool has_apex_query = false; + for (const auto& xi : points) { + validate_top_plane_query(xi); + has_apex_query = has_apex_query || is_apex_point(xi); + } + + if (!has_apex_query) { + const std::size_t num_qpts = points.size(); + if constexpr (NeedValues) { + modal_values.resize(n * num_qpts); + } + if constexpr (NeedGradients) { + scratch.modal_gradient_components.resize(n * 3u * num_qpts); + } + if constexpr (NeedHessians) { + scratch.modal_hessian_components.resize(n * 9u * num_qpts); + } + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { + Real modal_value = Real(0); + Gradient modal_gradient{}; + Hessian modal_hessian{}; + pyramid_modal::evaluate_term( + data.modal_terms[modal_j], + modal_point, + modal_value, + NeedGradients ? &modal_gradient : nullptr, + NeedHessians ? &modal_hessian : nullptr); + if constexpr (NeedValues) { + modal_values[modal_j * num_qpts + q] = modal_value; + } + if constexpr (NeedGradients) { + for (std::size_t component = 0; component < 3u; ++component) { + scratch.modal_gradient_components[ + (modal_j * 3u + component) * num_qpts + q] = + modal_gradient[component]; + } + } + if constexpr (NeedHessians) { + for (std::size_t component = 0; component < 9u; ++component) { + scratch.modal_hessian_components[ + (modal_j * 9u + component) * num_qpts + q] = + modal_hessian.data()[component]; + } + } + } + } + + const Real* transform = data.modal_to_nodal.data(); + if constexpr (NeedValues) { + math::dense_transform_batched_row_major( + transform, + n, + n, + modal_values.data(), + num_qpts, + values_out, + output_stride, + num_qpts); + } + if constexpr (NeedGradients) { + for (std::size_t component = 0; component < 3u; ++component) { + math::dense_transform_batched_row_major( + transform, + n, + n, + scratch.modal_gradient_components.data() + component * num_qpts, + 3u * num_qpts, + gradients_out + component * output_stride, + 3u * output_stride, + num_qpts); + } + } + if constexpr (NeedHessians) { + for (std::size_t component = 0; component < 9u; ++component) { + math::dense_transform_batched_row_major( + transform, + n, + n, + scratch.modal_hessian_components.data() + component * num_qpts, + 9u * num_qpts, + hessians_out + component * output_stride, + 9u * output_stride, + num_qpts); + } + } + return; + } + } + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + validate_top_plane_query(xi); + + if (is_apex_point(xi)) { + write_apex_strided( + data, q, output_stride, values_out, gradients_out, hessians_out); + continue; + } + + pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); + for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { + Gradient* gradient_out = nullptr; + Hessian* hessian_out = nullptr; + if constexpr (NeedGradients) { + gradient_out = &modal_gradients[modal_j]; + } + if constexpr (NeedHessians) { + hessian_out = &modal_hessians[modal_j]; + } + if constexpr (NeedValues) { + pyramid_modal::evaluate_term( + data.modal_terms[modal_j], + modal_point, + modal_values[modal_j], + gradient_out, + hessian_out); + } else { + Real value = Real(0); + pyramid_modal::evaluate_term( + data.modal_terms[modal_j], + modal_point, + value, + gradient_out, + hessian_out); + } + } + + if (use_fast_modal_to_nodal) { + if constexpr (NeedValues) { + apply_low_order_combination( + data, + 1u, + [&](std::size_t modal_i, std::size_t) { + return modal_values[modal_i]; + }, + [&](std::size_t basis_i, std::size_t, Real value) { + values_out[basis_i * output_stride + q] = value; + }); + } + if constexpr (NeedGradients) { + apply_low_order_combination( + data, + 3u, + [&](std::size_t modal_i, std::size_t component) { + return modal_gradients[modal_i][component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + gradients_out[basis_i * 3u * output_stride + + component * output_stride + q] = value; + }); + } + if constexpr (NeedHessians) { + apply_low_order_combination( + data, + 9u, + [&](std::size_t modal_i, std::size_t component) { + return modal_hessians[modal_i].data()[component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + hessians_out[basis_i * 9u * output_stride + + component * output_stride + q] = value; + }); + } + continue; + } + + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + const Real* matrix_row = data.modal_to_nodal.data() + basis_i * n; + [[maybe_unused]] Real value = Real(0); + [[maybe_unused]] std::array gradient{}; + [[maybe_unused]] std::array hessian{}; + + for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { + const Real coeff = matrix_row[modal_j]; + if constexpr (NeedValues) { + value += coeff * modal_values[modal_j]; + } + if constexpr (NeedGradients) { + const Real* modal_gradient = modal_gradients[modal_j].data(); + gradient[0] += coeff * modal_gradient[0]; + gradient[1] += coeff * modal_gradient[1]; + gradient[2] += coeff * modal_gradient[2]; + } + if constexpr (NeedHessians) { + const Real* modal_hessian = modal_hessians[modal_j].data(); + for (std::size_t component = 0; component < 9u; ++component) { + hessian[component] += coeff * modal_hessian[component]; + } + } + } + + if constexpr (NeedValues) { + values_out[basis_i * output_stride + q] = value; + } + if constexpr (NeedGradients) { + Real* g = gradients_out + basis_i * 3u * output_stride; + g[0u * output_stride + q] = gradient[0]; + g[1u * output_stride + q] = gradient[1]; + g[2u * output_stride + q] = gradient[2]; + } + if constexpr (NeedHessians) { + Real* H = hessians_out + basis_i * 9u * output_stride; + for (std::size_t component = 0; component < 9u; ++component) { + H[component * output_stride + q] = hessian[component]; + } + } + } + } + } + + static Real apex_coord_tolerance() noexcept { + return basis_scaled_tolerance(); + } + + // Coefficient pruning for symbolic apex series, not a reference-coordinate + // roundoff test. Keep this strict and separate from BasisTolerance. + static constexpr Real kSeriesTolerance = Real(1e-12); + + static Real binomial_coeff(int n, int k) { + if (k < 0 || k > n) { + return Real(0); + } + if (k == 0 || k == n) { + return Real(1); + } + k = std::min(k, n - k); + Real coeff = Real(1); + for (int i = 1; i <= k; ++i) { + coeff *= static_cast(n - (k - i)); + coeff /= static_cast(i); + } + return coeff; + } + + static void add_z_expansion(ApexSeries& series, + int z_power, + int beta0, + int pu, + int pv, + Real coeff) { + for (int q = 0; q <= z_power; ++q) { + const Real z_coeff = coeff * binomial_coeff(z_power, q) * + ((q % 2 == 0) ? Real(1) : Real(-1)); + series.add_term(beta0 + q, pu, pv, z_coeff, kSeriesTolerance); + } + } + + static ApexSeries modal_value_asymptotic(const ModalTerm& term) { + ApexSeries series; + add_z_expansion(series, + term.pz, + term.px + term.py - term.denom_power, + term.px, + term.py, + Real(1)); + return series; + } + + static GradientSeries modal_gradient_asymptotic(const ModalTerm& term) { + GradientSeries gradient_series{}; + + if (term.px > 0) { + add_z_expansion(gradient_series[0], + term.pz, + term.px - 1 + term.py - term.denom_power, + term.px - 1, + term.py, + static_cast(term.px)); + } + + if (term.py > 0) { + add_z_expansion(gradient_series[1], + term.pz, + term.px + term.py - 1 - term.denom_power, + term.px, + term.py - 1, + static_cast(term.py)); + } + + if (term.pz > 0) { + add_z_expansion(gradient_series[2], + term.pz - 1, + term.px + term.py - term.denom_power, + term.px, + term.py, + static_cast(term.pz)); + } + if (term.denom_power > 0) { + add_z_expansion(gradient_series[2], + term.pz, + term.px + term.py - term.denom_power - 1, + term.px, + term.py, + static_cast(term.denom_power)); + } + + return gradient_series; + } + + static HessianSeries modal_hessian_asymptotic(const ModalTerm& term) { + HessianSeries hessian_series{}; + + if (term.px > 1) { + add_z_expansion(hessian_series[0][0], + term.pz, + term.px - 2 + term.py - term.denom_power, + term.px - 2, + term.py, + static_cast(term.px * (term.px - 1))); + } + + if (term.py > 1) { + add_z_expansion(hessian_series[1][1], + term.pz, + term.px + term.py - 2 - term.denom_power, + term.px, + term.py - 2, + static_cast(term.py * (term.py - 1))); + } + + if (term.px > 0 && term.py > 0) { + add_z_expansion(hessian_series[0][1], + term.pz, + term.px + term.py - 2 - term.denom_power, + term.px - 1, + term.py - 1, + static_cast(term.px * term.py)); + hessian_series[1][0] = hessian_series[0][1]; + } + + if (term.px > 0 && term.pz > 0) { + add_z_expansion(hessian_series[0][2], + term.pz - 1, + term.px - 1 + term.py - term.denom_power, + term.px - 1, + term.py, + static_cast(term.px * term.pz)); + } + if (term.px > 0 && term.denom_power > 0) { + add_z_expansion(hessian_series[0][2], + term.pz, + term.px - 1 + term.py - term.denom_power - 1, + term.px - 1, + term.py, + static_cast(term.px * term.denom_power)); + } + hessian_series[2][0] = hessian_series[0][2]; + + if (term.py > 0 && term.pz > 0) { + add_z_expansion(hessian_series[1][2], + term.pz - 1, + term.px + term.py - 1 - term.denom_power, + term.px, + term.py - 1, + static_cast(term.py * term.pz)); + } + if (term.py > 0 && term.denom_power > 0) { + add_z_expansion(hessian_series[1][2], + term.pz, + term.px + term.py - 1 - term.denom_power - 1, + term.px, + term.py - 1, + static_cast(term.py * term.denom_power)); + } + hessian_series[2][1] = hessian_series[1][2]; + + if (term.pz > 1) { + add_z_expansion(hessian_series[2][2], + term.pz - 2, + term.px + term.py - term.denom_power, + term.px, + term.py, + static_cast(term.pz * (term.pz - 1))); + } + if (term.pz > 0 && term.denom_power > 0) { + add_z_expansion(hessian_series[2][2], + term.pz - 1, + term.px + term.py - term.denom_power - 1, + term.px, + term.py, + static_cast(2 * term.pz * term.denom_power)); + } + if (term.denom_power > 0) { + add_z_expansion(hessian_series[2][2], + term.pz, + term.px + term.py - term.denom_power - 2, + term.px, + term.py, + static_cast(term.denom_power * (term.denom_power + 1))); + } + + return hessian_series; + } + + static ApexClassification classify_series(const ApexSeries& series) { + for (const auto& [beta, poly] : series.by_power) { + if (poly.empty(kSeriesTolerance)) { + continue; + } + if (beta < 0) { + return {ApexLimitKind::Singular, Real(0), beta}; + } + if (beta > 0) { + return {ApexLimitKind::Constant, Real(0), beta}; + } + if (poly.is_constant(kSeriesTolerance)) { + return {ApexLimitKind::Constant, poly.constant_value(kSeriesTolerance), beta}; + } + return {ApexLimitKind::DirectionDependent, Real(0), beta}; + } + return {ApexLimitKind::Constant, Real(0), 1}; + } + + static void accumulate_rank_status(ApexRankStatus& status, + const ApexClassification& classification) { + if (classification.kind == ApexLimitKind::Singular) { + status = ApexRankStatus::Singular; + return; + } + if (classification.kind == ApexLimitKind::DirectionDependent && + status != ApexRankStatus::Singular) { + status = ApexRankStatus::DirectionDependent; + } + } + + static std::string apex_status_message(const char* rank, + ApexRankStatus status) { + switch (status) { + case ApexRankStatus::DirectionDependent: + return std::string("Pyramid rational nodal ") + rank + + " at the exact apex is not uniquely defined under admissible interior approaches"; + case ApexRankStatus::Singular: + return std::string("Pyramid rational nodal ") + rank + + " at the exact apex is singular for this basis family"; + case ApexRankStatus::Exact: + return std::string("Pyramid rational nodal ") + rank + + " apex evaluation unexpectedly reported non-exact status"; + } + return std::string("Pyramid rational nodal ") + rank + + " apex evaluation is not available"; + } + + static ApexData build_apex_data(const OrderData& data) { + const std::size_t n = data.modal_terms.size(); + + std::vector modal_values(n); + std::vector modal_gradients(n); + std::vector modal_hessians(n); + for (std::size_t m = 0; m < n; ++m) { + modal_values[m] = modal_value_asymptotic(data.modal_terms[m]); + modal_gradients[m] = modal_gradient_asymptotic(data.modal_terms[m]); + modal_hessians[m] = modal_hessian_asymptotic(data.modal_terms[m]); + } + + std::vector nodal_values(n); + std::vector nodal_gradients(n); + std::vector nodal_hessians(n); + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t m = 0; m < n; ++m) { + const Real coeff = data.modal_to_nodal[i * n + m]; + nodal_values[i].add_scaled(modal_values[m], coeff, kSeriesTolerance); + for (int d = 0; d < 3; ++d) { + nodal_gradients[i][static_cast(d)].add_scaled( + modal_gradients[m][static_cast(d)], coeff, kSeriesTolerance); + } + for (int r = 0; r < 3; ++r) { + for (int c = 0; c < 3; ++c) { + nodal_hessians[i][static_cast(r)][static_cast(c)] + .add_scaled( + modal_hessians[m][static_cast(r)][static_cast(c)], + coeff, + kSeriesTolerance); + } + } + } + } + + ApexData apex; + apex.values.assign(n, Real(0)); + apex.gradients.assign(n, Gradient{}); + apex.hessians.assign(n, Hessian{}); + + for (std::size_t i = 0; i < n; ++i) { + const ApexClassification value_class = classify_series(nodal_values[i]); + if (value_class.kind != ApexLimitKind::Constant) { + throw BasisConstructionException( + "Pyramid nodal value at apex is not uniquely defined for basis index " + + std::to_string(i), + __FILE__, __LINE__, __func__); + } + apex.values[i] = value_class.constant_value; + + for (int d = 0; d < 3; ++d) { + const ApexClassification grad_class = classify_series( + nodal_gradients[i][static_cast(d)]); + accumulate_rank_status(apex.gradient_status, grad_class); + if (grad_class.kind == ApexLimitKind::Constant) { + apex.gradients[i][static_cast(d)] = grad_class.constant_value; + } + } + + for (int r = 0; r < 3; ++r) { + for (int c = 0; c < 3; ++c) { + const ApexClassification hess_class = classify_series( + nodal_hessians[i][static_cast(r)][static_cast(c)]); + accumulate_rank_status(apex.hessian_status, hess_class); + if (hess_class.kind == ApexLimitKind::Constant) { + apex.hessians[i](static_cast(r), + static_cast(c)) = hess_class.constant_value; + } + } + } + } + + if (apex.gradient_status != ApexRankStatus::Exact) { + apex.gradients.clear(); + } + if (apex.hessian_status != ApexRankStatus::Exact) { + apex.hessians.clear(); + } + + return apex; + } + + static std::vector> build_public_nodes(int order) { + if (order == 0) { + return {math::Vector{Real(0), Real(0), Real(0.25)}}; + } + + std::vector> nodes; + nodes.reserve(static_cast((order + 1) * (order + 2) * (2 * order + 3) / 6)); + + nodes.push_back(math::Vector{Real(-1), Real(-1), Real(0)}); + nodes.push_back(math::Vector{Real(1), Real(-1), Real(0)}); + nodes.push_back(math::Vector{Real(1), Real(1), Real(0)}); + nodes.push_back(math::Vector{Real(-1), Real(1), Real(0)}); + nodes.push_back(math::Vector{Real(0), Real(0), Real(1)}); + + for (int m = 1; m < order; ++m) { + nodes.push_back(math::Vector{equispaced_pm_one_coord(m, order), Real(-1), Real(0)}); + } + for (int m = 1; m < order; ++m) { + nodes.push_back(math::Vector{Real(1), equispaced_pm_one_coord(m, order), Real(0)}); + } + for (int m = order - 1; m >= 1; --m) { + nodes.push_back(math::Vector{equispaced_pm_one_coord(m, order), Real(1), Real(0)}); + } + for (int m = order - 1; m >= 1; --m) { + nodes.push_back(math::Vector{Real(-1), equispaced_pm_one_coord(m, order), Real(0)}); + } + + for (int level = 1; level < order; ++level) { + const Real z = static_cast(level) / static_cast(order); + const Real scale = Real(1) - z; + nodes.push_back(math::Vector{-scale, -scale, z}); + nodes.push_back(math::Vector{scale, -scale, z}); + nodes.push_back(math::Vector{scale, scale, z}); + nodes.push_back(math::Vector{-scale, scale, z}); + } + + for (int j = 1; j < order; ++j) { + for (int i = 1; i < order; ++i) { + nodes.push_back(math::Vector{equispaced_pm_one_coord(i, order), + equispaced_pm_one_coord(j, order), + Real(0)}); + } + } + + for (int level = 1; level < order - 1; ++level) { + const int n = order - level; + const Real z = static_cast(level) / static_cast(order); + const Real scale = Real(1) - z; + + for (int m = 1; m < n; ++m) { + const Real s = equispaced_pm_one_coord(m, n) * scale; + nodes.push_back(math::Vector{s, -scale, z}); + } + for (int m = 1; m < n; ++m) { + const Real s = equispaced_pm_one_coord(m, n) * scale; + nodes.push_back(math::Vector{scale, s, z}); + } + for (int m = n - 1; m >= 1; --m) { + const Real s = equispaced_pm_one_coord(m, n) * scale; + nodes.push_back(math::Vector{s, scale, z}); + } + for (int m = n - 1; m >= 1; --m) { + const Real s = equispaced_pm_one_coord(m, n) * scale; + nodes.push_back(math::Vector{-scale, s, z}); + } + } + + for (int level = 1; level < order - 1; ++level) { + const int n = order - level; + const Real z = static_cast(level) / static_cast(order); + const Real scale = Real(1) - z; + for (int j = 1; j < n; ++j) { + for (int i = 1; i < n; ++i) { + nodes.push_back(math::Vector{equispaced_pm_one_coord(i, n) * scale, + equispaced_pm_one_coord(j, n) * scale, + z}); + } + } + } + + return nodes; + } + + struct VectorValueSink { + std::vector& output; + void resize(std::size_t n) const { output.resize(n); } + void write(std::size_t i, Real value) const { output[i] = value; } + }; + + struct RawValueSink { + Real* output; + void resize(std::size_t) const {} + void write(std::size_t i, Real value) const { output[i] = value; } + }; + + struct VectorGradientSink { + std::vector& output; + void resize(std::size_t n) const { output.resize(n); } + void write(std::size_t i, const Gradient& value) const { output[i] = value; } + }; + + struct RawGradientSink { + Real* output; + void resize(std::size_t) const {} + void write(std::size_t i, const Gradient& value) const { + Real* dst = output + i * 3u; + dst[0] = value[0]; + dst[1] = value[1]; + dst[2] = value[2]; + } + }; + + struct VectorHessianSink { + std::vector& output; + void resize(std::size_t n) const { output.resize(n); } + void write(std::size_t i, const Hessian& value) const { output[i] = value; } + }; + + struct RawHessianSink { + Real* output; + void resize(std::size_t) const {} + void write(std::size_t i, const Hessian& value) const { + store_hessian(value, output + i * 9u); + } + }; + + template + static void apply_order1_combination(std::size_t components, + const Get& get, + const Set& set) { + for (std::size_t c = 0; c < components; ++c) { + const Real m0 = get(0u, c); + const Real m1 = get(1u, c); + const Real m2 = get(2u, c); + const Real m3 = get(3u, c); + const Real m4 = get(4u, c); + set(0u, c, Real(0.25) * (m0 - m1 - m2 + m3 - m4)); + set(1u, c, Real(0.25) * (m0 + m1 - m2 - m3 - m4)); + set(2u, c, Real(0.25) * (m0 + m1 + m2 + m3 - m4)); + set(3u, c, Real(0.25) * (m0 - m1 + m2 - m3 - m4)); + set(4u, c, m4); + } + } + + template + static void apply_order2_combination(std::size_t components, + const Get& get, + const Set& set) { + for (std::size_t c = 0; c < components; ++c) { + const Real m0 = get(0u, c); + const Real m1 = get(1u, c); + const Real m2 = get(2u, c); + const Real m3 = get(3u, c); + const Real m4 = get(4u, c); + const Real m5 = get(5u, c); + const Real m6 = get(6u, c); + const Real m7 = get(7u, c); + const Real m8 = get(8u, c); + const Real m9 = get(9u, c); + const Real m10 = get(10u, c); + const Real m11 = get(11u, c); + const Real m12 = get(12u, c); + const Real m13 = get(13u, c); + set(0u, c, Real(0.25) * (m4 - m5 - m7 + m8 - m9 + m10 + m11 - Real(2) * m12 + m13)); + set(1u, c, Real(0.25) * (-m4 - m5 + m7 + m8 - m9 - m10 + m11 + Real(2) * m12 + m13)); + set(2u, c, Real(0.25) * (m4 + m5 + m7 + m8 - m9 - m10 - m11 - Real(2) * m12 + m13)); + set(3u, c, Real(0.25) * (-m4 + m5 - m7 + m8 - m9 + m10 - m11 + Real(2) * m12 + m13)); + set(4u, c, -m9 + Real(2) * m13); + set(5u, c, Real(0.5) * (-m3 + m5 + m6 - m8 + m11)); + set(6u, c, Real(0.5) * (m1 + m2 - m7 - m8 - m10)); + set(7u, c, Real(0.5) * (m3 - m5 + m6 - m8 - m11)); + set(8u, c, Real(0.5) * (-m1 + m2 + m7 - m8 + m10)); + set(9u, c, m9 - m10 - m11 + m12 - m13); + set(10u, c, m9 + m10 - m11 - m12 - m13); + set(11u, c, m9 + m10 + m11 + m12 - m13); + set(12u, c, m9 - m10 + m11 - m12 - m13); + set(13u, c, m0 - m2 - m6 + m8 - Real(2) * m9 + m13); + } + } + + template + static void apply_low_order_combination(const OrderData& data, + std::size_t components, + const Get& get, + const Set& set) { + if (data.order == 1) { + apply_order1_combination(components, get, set); + return; + } + apply_order2_combination(components, get, set); + } + + static void apply_sparse_basis_to_nodal(const OrderData& data, + const std::vector& modal_values, + std::vector& nodal_values) { + const std::size_t n = modal_values.size(); + nodal_values.resize(n); + apply_low_order_combination( + data, + 1u, + [&](std::size_t modal_i, std::size_t) { return modal_values[modal_i]; }, + [&](std::size_t basis_i, std::size_t, Real value) { nodal_values[basis_i] = value; }); + } + + static void apply_sparse_basis_to_nodal_to(const OrderData& data, + const std::vector& modal_values, + Real* SVMP_RESTRICT nodal_values) { + apply_low_order_combination( + data, + 1u, + [&](std::size_t modal_i, std::size_t) { return modal_values[modal_i]; }, + [&](std::size_t basis_i, std::size_t, Real value) { nodal_values[basis_i] = value; }); + } + + static void apply_sparse_basis_to_nodal(const OrderData& data, + const std::vector& modal_gradients, + std::vector& nodal_gradients) { + const std::size_t n = modal_gradients.size(); + nodal_gradients.resize(n); + apply_low_order_combination( + data, + 3u, + [&](std::size_t modal_i, std::size_t component) { + return modal_gradients[modal_i][component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + nodal_gradients[basis_i][component] = value; + }); + } + + static void apply_sparse_basis_to_nodal_to(const OrderData& data, + const std::vector& modal_gradients, + Real* SVMP_RESTRICT nodal_gradients) { + apply_low_order_combination( + data, + 3u, + [&](std::size_t modal_i, std::size_t component) { + return modal_gradients[modal_i][component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + nodal_gradients[basis_i * 3u + component] = value; + }); + } + + static void apply_sparse_basis_to_nodal(const OrderData& data, + const std::vector& modal_hessians, + std::vector& nodal_hessians) { + const std::size_t n = modal_hessians.size(); + nodal_hessians.resize(n); + apply_low_order_combination( + data, + 9u, + [&](std::size_t modal_i, std::size_t component) { + return modal_hessians[modal_i].data()[component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + nodal_hessians[basis_i].data()[component] = value; + }); + } + + static void apply_sparse_basis_to_nodal_to(const OrderData& data, + const std::vector& modal_hessians, + Real* SVMP_RESTRICT nodal_hessians) { + apply_low_order_combination( + data, + 9u, + [&](std::size_t modal_i, std::size_t component) { + return modal_hessians[modal_i].data()[component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + nodal_hessians[basis_i * 9u + component] = value; + }); + } + + static void apply_sparse_basis_to_nodal_all( + const OrderData& data, + const std::vector& modal_values, + const std::vector& modal_gradients, + const std::vector& modal_hessians, + std::vector& nodal_values, + std::vector& nodal_gradients, + std::vector& nodal_hessians) { + const std::size_t n = modal_values.size(); + nodal_values.resize(n); + nodal_gradients.resize(n); + nodal_hessians.resize(n); + apply_low_order_combination( + data, + 1u, + [&](std::size_t modal_i, std::size_t) { return modal_values[modal_i]; }, + [&](std::size_t basis_i, std::size_t, Real value) { nodal_values[basis_i] = value; }); + apply_low_order_combination( + data, + 3u, + [&](std::size_t modal_i, std::size_t component) { + return modal_gradients[modal_i][component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + nodal_gradients[basis_i][component] = value; + }); + apply_low_order_combination( + data, + 9u, + [&](std::size_t modal_i, std::size_t component) { + return modal_hessians[modal_i].data()[component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + nodal_hessians[basis_i].data()[component] = value; + }); + } + + static void apply_sparse_basis_to_nodal_all_to( + const OrderData& data, + const std::vector& modal_values, + const std::vector& modal_gradients, + const std::vector& modal_hessians, + Real* SVMP_RESTRICT nodal_values, + Real* SVMP_RESTRICT nodal_gradients, + Real* SVMP_RESTRICT nodal_hessians) { + apply_low_order_combination( + data, + 1u, + [&](std::size_t modal_i, std::size_t) { return modal_values[modal_i]; }, + [&](std::size_t basis_i, std::size_t, Real value) { nodal_values[basis_i] = value; }); + apply_low_order_combination( + data, + 3u, + [&](std::size_t modal_i, std::size_t component) { + return modal_gradients[modal_i][component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + nodal_gradients[basis_i * 3u + component] = value; + }); + apply_low_order_combination( + data, + 9u, + [&](std::size_t modal_i, std::size_t component) { + return modal_hessians[modal_i].data()[component]; + }, + [&](std::size_t basis_i, std::size_t component, Real value) { + nodal_hessians[basis_i * 9u + component] = value; + }); + } + + template + // Keep modal transform helpers free of forced-inline attributes unless + // compiler-versioned benchmarks and LLVM IR checks show a stable benefit. + static void apply_modal_values_to_nodal(const OrderData& data, + const std::vector& modal_values, + const Sink& sink) { + const std::size_t n = modal_values.size(); + sink.resize(n); + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + const Real* row = data.modal_to_nodal.data() + basis_i * n; + Real value = Real(0); + for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { + value += row[modal_j] * modal_values[modal_j]; + } + sink.write(basis_i, value); + } + } + + template + static void apply_modal_gradients_to_nodal(const OrderData& data, + const std::vector& modal_gradients, + const Sink& sink) { + const std::size_t n = modal_gradients.size(); + sink.resize(n); + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + const Real* row = data.modal_to_nodal.data() + basis_i * n; + Gradient gradient{}; + for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { + const Real coeff = row[modal_j]; + for (std::size_t component = 0; component < 3u; ++component) { + gradient[component] += coeff * modal_gradients[modal_j][component]; + } + } + sink.write(basis_i, gradient); + } + } + + template + static void apply_modal_hessians_to_nodal(const OrderData& data, + const std::vector& modal_hessians, + const Sink& sink) { + const std::size_t n = modal_hessians.size(); + sink.resize(n); + for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { + const Real* matrix_row = data.modal_to_nodal.data() + basis_i * n; + Hessian hessian{}; + for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { + const Real coeff = matrix_row[modal_j]; + for (std::size_t row = 0; row < 3u; ++row) { + for (std::size_t col = 0; col < 3u; ++col) { + hessian(row, col) += coeff * modal_hessians[modal_j](row, col); + } + } + } + sink.write(basis_i, hessian); + } + } + + static void apply_modal_to_nodal(const OrderData& data, + const std::vector& modal_values, + std::vector& nodal_values) { + apply_modal_values_to_nodal(data, modal_values, VectorValueSink{nodal_values}); + } + + static void apply_modal_to_nodal(const OrderData& data, + const std::vector& modal_gradients, + std::vector& nodal_gradients) { + apply_modal_gradients_to_nodal(data, modal_gradients, VectorGradientSink{nodal_gradients}); + } + + static void apply_modal_to_nodal(const OrderData& data, + const std::vector& modal_hessians, + std::vector& nodal_hessians) { + apply_modal_hessians_to_nodal(data, modal_hessians, VectorHessianSink{nodal_hessians}); + } + + static void apply_modal_to_nodal_to(const OrderData& data, + const std::vector& modal_values, + Real* nodal_values) { + apply_modal_values_to_nodal(data, modal_values, RawValueSink{nodal_values}); + } + + static void apply_modal_to_nodal_to(const OrderData& data, + const std::vector& modal_gradients, + Real* nodal_gradients) { + apply_modal_gradients_to_nodal(data, modal_gradients, RawGradientSink{nodal_gradients}); + } + + static void apply_modal_to_nodal_to(const OrderData& data, + const std::vector& modal_hessians, + Real* nodal_hessians) { + apply_modal_hessians_to_nodal(data, modal_hessians, RawHessianSink{nodal_hessians}); + } +}; + +namespace lagrange_pyramid { + +const std::vector>& nodes(int order) { + return PyramidLagrangeCache::get(order).nodes; +} + +void prewarm_scratch(int order, std::size_t max_qpts) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::prewarm_scratch(data.modal_terms.size(), max_qpts); +} + +void evaluate_values(int order, + const math::Vector& xi, + std::vector& values) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::evaluate_values(data, xi, values); +} + +void evaluate_gradients(int order, + const math::Vector& xi, + std::vector& gradients) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::evaluate_gradients(data, xi, gradients); +} + +void evaluate_hessians(int order, + const math::Vector& xi, + std::vector& hessians) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::evaluate_hessians(data, xi, hessians); +} + +void evaluate_all(int order, + const math::Vector& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::evaluate_all(data, xi, values, gradients, hessians); +} + +void evaluate_values_to(int order, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::evaluate_values_to(data, xi, values_out); +} + +void evaluate_gradients_to(int order, + const math::Vector& xi, + Real* SVMP_RESTRICT gradients_out) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::evaluate_gradients_to(data, xi, gradients_out); +} + +void evaluate_hessians_to(int order, + const math::Vector& xi, + Real* SVMP_RESTRICT hessians_out) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::evaluate_hessians_to(data, xi, hessians_out); +} + +void evaluate_all_to(int order, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::evaluate_all_to(data, xi, values_out, gradients_out, hessians_out); +} + +void evaluate_at_quadrature_points_strided( + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const auto& data = PyramidLagrangeCache::get(order); + PyramidLagrangeCache::evaluate_at_quadrature_points_strided( + data, points, output_stride, values_out, gradients_out, hessians_out); +} + +} // namespace lagrange_pyramid + +} // namespace detail +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.h b/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.h new file mode 100644 index 000000000..76859501c --- /dev/null +++ b/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.h @@ -0,0 +1,67 @@ +#ifndef SVMP_FE_BASIS_LAGRANGEBASISPYRAMID_H +#define SVMP_FE_BASIS_LAGRANGEBASISPYRAMID_H + +// Private declarations for the rational pyramid Lagrange helper implemented in +// LagrangeBasisPyramid.cpp. This header is intentionally small so the large +// construction and apex-classification code stays out of LagrangeBasis.cpp. + +#include "BasisFunction.h" + +#include +#include + +namespace svmp { +namespace FE { +namespace basis { +namespace detail { +namespace lagrange_pyramid { + +const std::vector>& nodes(int order); + +void prewarm_scratch(int order, std::size_t max_qpts = 0); + +void evaluate_values(int order, + const math::Vector& xi, + std::vector& values); +void evaluate_gradients(int order, + const math::Vector& xi, + std::vector& gradients); +void evaluate_hessians(int order, + const math::Vector& xi, + std::vector& hessians); +void evaluate_all(int order, + const math::Vector& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians); + +void evaluate_values_to(int order, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out); +void evaluate_gradients_to(int order, + const math::Vector& xi, + Real* SVMP_RESTRICT gradients_out); +void evaluate_hessians_to(int order, + const math::Vector& xi, + Real* SVMP_RESTRICT hessians_out); +void evaluate_all_to(int order, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out); + +void evaluate_at_quadrature_points_strided( + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out); + +} // namespace lagrange_pyramid +} // namespace detail +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_LAGRANGEBASISPYRAMID_H diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.cpp b/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.cpp new file mode 100644 index 000000000..36325576a --- /dev/null +++ b/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.cpp @@ -0,0 +1,2457 @@ +#include "LagrangeBasisSimplex.h" + +#include +#include + +namespace svmp { +namespace FE { +namespace basis { +namespace detail { + +// Falling-factorial (equispaced barycentric) Lagrange factors for simplex nodes. +// +// For a fixed polynomial order p and barycentric coordinate lambda in [0, 1], +// define +// phi_a(lambda) = product_{m=0}^{a-1} (p * lambda - m) / (a - m), a = 0..p +// Then for a multi-index (i0, i1, ..., id) with sum i_k = p, the simplex +// Lagrange basis function is product_k phi_{i_k}(lambda_k), nodal on the +// barycentric lattice. +// +// Output buffers must each be sized to at least p+1 entries; the function +// writes every output slot (no pre-zero required by the caller). +template +void simplex_lagrange_factor_sequence_impl(int p, + Real lambda, + Real* phi, + Real* dphi, + Real* d2phi) { + static_assert(!NeedSecond || NeedFirst, + "second derivative factors require first-derivative recurrence state"); + + phi[0] = Real(1); + if constexpr (NeedFirst) { + dphi[0] = Real(0); + } + if constexpr (NeedSecond) { + d2phi[0] = Real(0); + } + if (p == 0) { + return; + } + + const Real t = static_cast(p) * lambda; + const Real dt_dlambda = static_cast(p); + + Real dphi_dt_prev = Real(0); + Real d2phi_dt2_prev = Real(0); + + for (int a = 1; a <= p; ++a) { + const std::size_t au = static_cast(a); + const Real inv_a = Real(1) / static_cast(a); + const Real s = (t - static_cast(a - 1)) * inv_a; + + phi[au] = s * phi[au - 1]; + + if constexpr (NeedFirst) { + const Real dphi_dt_old = dphi_dt_prev; + const Real dphi_dt = inv_a * phi[au - 1] + s * dphi_dt_old; + dphi[au] = dt_dlambda * dphi_dt; + + if constexpr (NeedSecond) { + const Real d2phi_dt2 = Real(2) * inv_a * dphi_dt_old + s * d2phi_dt2_prev; + d2phi[au] = dt_dlambda * dt_dlambda * d2phi_dt2; + d2phi_dt2_prev = d2phi_dt2; + } + + dphi_dt_prev = dphi_dt; + } + } +} + +void simplex_lagrange_factor_sequence(int p, + Real lambda, + Real* phi, + Real* dphi, + Real* d2phi) { + if (d2phi != nullptr) { + simplex_lagrange_factor_sequence_impl(p, lambda, phi, dphi, d2phi); + } else if (dphi != nullptr) { + simplex_lagrange_factor_sequence_impl(p, lambda, phi, dphi, nullptr); + } else { + simplex_lagrange_factor_sequence_impl(p, lambda, phi, nullptr, nullptr); + } +} + +constexpr int kFixedSimplexAxisOrder = 12; +constexpr std::size_t kFixedSimplexAxisSize = + static_cast(kFixedSimplexAxisOrder + 1); +constexpr std::size_t kFixedSimplexBatchEntries = 512; + +template +inline void simplex_lagrange_factor_values_product(Real lambda, + Real* SVMP_RESTRICT values) { + static_assert(Order >= 0, "simplex order must be non-negative"); + values[0] = Real(1); + const Real t = static_cast(Order) * lambda; + for (int a = 1; a <= Order; ++a) { + const Real inv_a = Real(1) / static_cast(a); + values[a] = values[a - 1] * (t - static_cast(a - 1)) * inv_a; + } +} + +template +void evaluate_triangle_simplex_values_q4( + const std::vector>& simplex_exponents, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + static_assert(Order >= 4 && Order <= 8, "specialized simplex path covers orders 4..8"); + + Real phi0[4][Order + 1]; + Real phi1[4][Order + 1]; + Real phi2[4][Order + 1]; + + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + simplex_lagrange_factor_values_product(l0, phi0[q]); + simplex_lagrange_factor_values_product(l1, phi1[q]); + simplex_lagrange_factor_values_product(l2, phi2[q]); + } + + const std::size_t num_nodes = simplex_exponents.size(); + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* SVMP_RESTRICT row = values_out + node * output_stride; + row[0] = phi0[0][i0] * phi1[0][i1] * phi2[0][i2]; + row[1] = phi0[1][i0] * phi1[1][i1] * phi2[1][i2]; + row[2] = phi0[2][i0] * phi1[2][i1] * phi2[2][i2]; + row[3] = phi0[3][i0] * phi1[3][i1] * phi2[3][i2]; + } +} + +bool try_evaluate_triangle_simplex_values_q4( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + switch (order) { + case 4: + evaluate_triangle_simplex_values_q4<4>( + simplex_exponents, points, output_stride, values_out); + return true; + case 5: + evaluate_triangle_simplex_values_q4<5>( + simplex_exponents, points, output_stride, values_out); + return true; + case 6: + evaluate_triangle_simplex_values_q4<6>( + simplex_exponents, points, output_stride, values_out); + return true; + case 7: + evaluate_triangle_simplex_values_q4<7>( + simplex_exponents, points, output_stride, values_out); + return true; + case 8: + evaluate_triangle_simplex_values_q4<8>( + simplex_exponents, points, output_stride, values_out); + return true; + default: + return false; + } +} + +template +void evaluate_tetrahedron_simplex_values_q4( + const std::vector>& simplex_exponents, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + static_assert(Order >= 4 && Order <= 8, "specialized simplex path covers orders 4..8"); + + Real phi0[4][Order + 1]; + Real phi1[4][Order + 1]; + Real phi2[4][Order + 1]; + Real phi3[4][Order + 1]; + + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + simplex_lagrange_factor_values_product(l0, phi0[q]); + simplex_lagrange_factor_values_product(l1, phi1[q]); + simplex_lagrange_factor_values_product(l2, phi2[q]); + simplex_lagrange_factor_values_product(l3, phi3[q]); + } + + const std::size_t num_nodes = simplex_exponents.size(); + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real* SVMP_RESTRICT row = values_out + node * output_stride; + row[0] = phi0[0][i0] * phi1[0][i1] * phi2[0][i2] * phi3[0][i3]; + row[1] = phi0[1][i0] * phi1[1][i1] * phi2[1][i2] * phi3[1][i3]; + row[2] = phi0[2][i0] * phi1[2][i1] * phi2[2][i2] * phi3[2][i3]; + row[3] = phi0[3][i0] * phi1[3][i1] * phi2[3][i2] * phi3[3][i3]; + } +} + +bool try_evaluate_tetrahedron_simplex_values_q4( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out) { + switch (order) { + case 4: + evaluate_tetrahedron_simplex_values_q4<4>( + simplex_exponents, points, output_stride, values_out); + return true; + case 5: + evaluate_tetrahedron_simplex_values_q4<5>( + simplex_exponents, points, output_stride, values_out); + return true; + case 6: + evaluate_tetrahedron_simplex_values_q4<6>( + simplex_exponents, points, output_stride, values_out); + return true; + case 7: + evaluate_tetrahedron_simplex_values_q4<7>( + simplex_exponents, points, output_stride, values_out); + return true; + case 8: + evaluate_tetrahedron_simplex_values_q4<8>( + simplex_exponents, points, output_stride, values_out); + return true; + default: + return false; + } +} + +template +void evaluate_tetrahedron_simplex_gradients_q4( + const std::vector>& simplex_exponents, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + static_assert(Order >= 3 && Order <= 8, + "specialized tetrahedron gradient path covers orders 3..8"); + + Real phi0[4][Order + 1]; + Real phi1[4][Order + 1]; + Real phi2[4][Order + 1]; + Real phi3[4][Order + 1]; + Real dphi0[4][Order + 1]; + Real dphi1[4][Order + 1]; + Real dphi2[4][Order + 1]; + Real dphi3[4][Order + 1]; + + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + simplex_lagrange_factor_sequence_impl( + Order, l0, phi0[q], dphi0[q], nullptr); + simplex_lagrange_factor_sequence_impl( + Order, l1, phi1[q], dphi1[q], nullptr); + simplex_lagrange_factor_sequence_impl( + Order, l2, phi2[q], dphi2[q], nullptr); + simplex_lagrange_factor_sequence_impl( + Order, l3, phi3[q], dphi3[q], nullptr); + } + + const std::size_t num_nodes = simplex_exponents.size(); + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real gx[4]; + Real gy[4]; + Real gz[4]; + + for (std::size_t q = 0; q < 4u; ++q) { + const Real v0 = phi0[q][i0]; + const Real v1 = phi1[q][i1]; + const Real v2 = phi2[q][i2]; + const Real v3 = phi3[q][i3]; + const Real D0 = dphi0[q][i0]; + const Real D1 = dphi1[q][i1]; + const Real D2 = dphi2[q][i2]; + const Real D3 = dphi3[q][i3]; + const Real v23 = v2 * v3; + const Real v01 = v0 * v1; + const Real dl0 = D0 * v1 * v23; + gx[q] = v0 * D1 * v23 - dl0; + gy[q] = v01 * D2 * v3 - dl0; + gz[q] = v01 * v2 * D3 - dl0; + } + + Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; + g[0u] = gx[0]; + g[1u] = gx[1]; + g[2u] = gx[2]; + g[3u] = gx[3]; + g[output_stride + 0u] = gy[0]; + g[output_stride + 1u] = gy[1]; + g[output_stride + 2u] = gy[2]; + g[output_stride + 3u] = gy[3]; + g[2u * output_stride + 0u] = gz[0]; + g[2u * output_stride + 1u] = gz[1]; + g[2u * output_stride + 2u] = gz[2]; + g[2u * output_stride + 3u] = gz[3]; + } +} + +template +void evaluate_triangle_simplex_gradients_q4( + const std::vector>& simplex_exponents, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + static_assert((Order == 2) || (Order >= 4 && Order <= 8), + "specialized simplex path covers order 2 and orders 4..8"); + + Real phi0[4][Order + 1]; + Real phi1[4][Order + 1]; + Real phi2[4][Order + 1]; + Real dphi0[4][Order + 1]; + Real dphi1[4][Order + 1]; + Real dphi2[4][Order + 1]; + + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + simplex_lagrange_factor_sequence_impl( + Order, l0, phi0[q], dphi0[q], nullptr); + simplex_lagrange_factor_sequence_impl( + Order, l1, phi1[q], dphi1[q], nullptr); + simplex_lagrange_factor_sequence_impl( + Order, l2, phi2[q], dphi2[q], nullptr); + } + + const std::size_t num_nodes = simplex_exponents.size(); + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; + + for (std::size_t q = 0; q < 4u; ++q) { + const Real v0 = phi0[q][i0]; + const Real v1 = phi1[q][i1]; + const Real v2 = phi2[q][i2]; + const Real D0 = dphi0[q][i0]; + const Real D1 = dphi1[q][i1]; + const Real D2 = dphi2[q][i2]; + const Real dl0 = D0 * v1 * v2; + g[0u * output_stride + q] = v0 * D1 * v2 - dl0; + g[1u * output_stride + q] = v0 * v1 * D2 - dl0; + g[2u * output_stride + q] = Real(0); + } + } +} + +bool try_evaluate_triangle_simplex_gradients_q4( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT gradients_out) { + switch (order) { + case 2: + evaluate_triangle_simplex_gradients_q4<2>( + simplex_exponents, points, output_stride, gradients_out); + return true; + case 4: + evaluate_triangle_simplex_gradients_q4<4>( + simplex_exponents, points, output_stride, gradients_out); + return true; + case 5: + evaluate_triangle_simplex_gradients_q4<5>( + simplex_exponents, points, output_stride, gradients_out); + return true; + case 6: + evaluate_triangle_simplex_gradients_q4<6>( + simplex_exponents, points, output_stride, gradients_out); + return true; + case 7: + evaluate_triangle_simplex_gradients_q4<7>( + simplex_exponents, points, output_stride, gradients_out); + return true; + case 8: + evaluate_triangle_simplex_gradients_q4<8>( + simplex_exponents, points, output_stride, gradients_out); + return true; + default: + return false; + } +} + +template +void evaluate_triangle_simplex_hessian_outputs_q4( + const std::vector>& simplex_exponents, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + static_assert(Order >= 2 && Order <= 8, "specialized simplex path covers orders 2..8"); + + Real phi0[4][Order + 1]; + Real phi1[4][Order + 1]; + Real phi2[4][Order + 1]; + Real dphi0[4][Order + 1]; + Real dphi1[4][Order + 1]; + Real dphi2[4][Order + 1]; + Real d2phi0[4][Order + 1]; + Real d2phi1[4][Order + 1]; + Real d2phi2[4][Order + 1]; + + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + simplex_lagrange_factor_sequence_impl( + Order, l0, phi0[q], dphi0[q], d2phi0[q]); + simplex_lagrange_factor_sequence_impl( + Order, l1, phi1[q], dphi1[q], d2phi1[q]); + simplex_lagrange_factor_sequence_impl( + Order, l2, phi2[q], dphi2[q], d2phi2[q]); + } + + const std::size_t num_nodes = simplex_exponents.size(); + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* SVMP_RESTRICT value_row = values_out ? values_out + node * output_stride : nullptr; + Real* SVMP_RESTRICT g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; + Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; + H[2u * output_stride + 0u] = Real(0); + H[2u * output_stride + 1u] = Real(0); + H[2u * output_stride + 2u] = Real(0); + H[2u * output_stride + 3u] = Real(0); + H[5u * output_stride + 0u] = Real(0); + H[5u * output_stride + 1u] = Real(0); + H[5u * output_stride + 2u] = Real(0); + H[5u * output_stride + 3u] = Real(0); + H[6u * output_stride + 0u] = Real(0); + H[6u * output_stride + 1u] = Real(0); + H[6u * output_stride + 2u] = Real(0); + H[6u * output_stride + 3u] = Real(0); + H[7u * output_stride + 0u] = Real(0); + H[7u * output_stride + 1u] = Real(0); + H[7u * output_stride + 2u] = Real(0); + H[7u * output_stride + 3u] = Real(0); + H[8u * output_stride + 0u] = Real(0); + H[8u * output_stride + 1u] = Real(0); + H[8u * output_stride + 2u] = Real(0); + H[8u * output_stride + 3u] = Real(0); + + for (std::size_t q = 0; q < 4u; ++q) { + const Real v0 = phi0[q][i0]; + const Real v1 = phi1[q][i1]; + const Real v2 = phi2[q][i2]; + if (value_row != nullptr) { + value_row[q] = v0 * v1 * v2; + } + + const Real D0 = dphi0[q][i0]; + const Real D1 = dphi1[q][i1]; + const Real D2 = dphi2[q][i2]; + if (g != nullptr) { + const Real dl0 = D0 * v1 * v2; + g[0u * output_stride + q] = v0 * D1 * v2 - dl0; + g[1u * output_stride + q] = v0 * v1 * D2 - dl0; + g[2u * output_stride + q] = Real(0); + } + + const Real DD0 = d2phi0[q][i0]; + const Real DD1 = d2phi1[q][i1]; + const Real DD2 = d2phi2[q][i2]; + const Real H00 = DD0 * v1 * v2; + const Real H11 = v0 * DD1 * v2; + const Real H22 = v0 * v1 * DD2; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + const Real h01 = H00 - H01 - H02 + H12; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = h01; + H[3u * output_stride + q] = h01; + H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; + } + } +} + +bool try_evaluate_triangle_simplex_hessian_outputs_q4( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + switch (order) { + case 2: + evaluate_triangle_simplex_hessian_outputs_q4<2>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 3: + evaluate_triangle_simplex_hessian_outputs_q4<3>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 4: + evaluate_triangle_simplex_hessian_outputs_q4<4>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 5: + evaluate_triangle_simplex_hessian_outputs_q4<5>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 6: + evaluate_triangle_simplex_hessian_outputs_q4<6>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 7: + evaluate_triangle_simplex_hessian_outputs_q4<7>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 8: + evaluate_triangle_simplex_hessian_outputs_q4<8>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + default: + return false; + } +} + +template +inline void write_tetrahedron_simplex_hessian_q4( + const Real (&phi0)[4][Order + 1], + const Real (&phi1)[4][Order + 1], + const Real (&phi2)[4][Order + 1], + const Real (&phi3)[4][Order + 1], + const Real (&dphi0)[4][Order + 1], + const Real (&dphi1)[4][Order + 1], + const Real (&dphi2)[4][Order + 1], + const Real (&dphi3)[4][Order + 1], + const Real (&d2phi0)[4][Order + 1], + const Real (&d2phi1)[4][Order + 1], + const Real (&d2phi2)[4][Order + 1], + const Real (&d2phi3)[4][Order + 1], + std::size_t i0, + std::size_t i1, + std::size_t i2, + std::size_t i3, + std::size_t output_stride, + Real* SVMP_RESTRICT H) { + const Real v0 = phi0[Q][i0]; + const Real v1 = phi1[Q][i1]; + const Real v2 = phi2[Q][i2]; + const Real v3 = phi3[Q][i3]; + const Real D0 = dphi0[Q][i0]; + const Real D1 = dphi1[Q][i1]; + const Real D2 = dphi2[Q][i2]; + const Real D3 = dphi3[Q][i3]; + const Real DD0 = d2phi0[Q][i0]; + const Real DD1 = d2phi1[Q][i1]; + const Real DD2 = d2phi2[Q][i2]; + const Real DD3 = d2phi3[Q][i3]; + const Real H00 = DD0 * v1 * v2 * v3; + const Real H11 = v0 * DD1 * v2 * v3; + const Real H22 = v0 * v1 * DD2 * v3; + const Real H33 = v0 * v1 * v2 * DD3; + const Real H01 = D0 * D1 * v2 * v3; + const Real H02 = D0 * v1 * D2 * v3; + const Real H03 = D0 * v1 * v2 * D3; + const Real H12 = v0 * D1 * D2 * v3; + const Real H13 = v0 * D1 * v2 * D3; + const Real H23 = v0 * v1 * D2 * D3; + const Real h01 = H00 - H01 - H02 + H12; + const Real h02 = H00 - H01 - H03 + H13; + const Real h12 = H00 - H02 - H03 + H23; + H[0u * output_stride + Q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + Q] = h01; + H[2u * output_stride + Q] = h02; + H[3u * output_stride + Q] = h01; + H[4u * output_stride + Q] = H00 - Real(2) * H02 + H22; + H[5u * output_stride + Q] = h12; + H[6u * output_stride + Q] = h02; + H[7u * output_stride + Q] = h12; + H[8u * output_stride + Q] = H00 - Real(2) * H03 + H33; +} + +template +inline void write_tetrahedron_simplex_hessian_stride4_q( + const Real (&phi0)[4][Order + 1], + const Real (&phi1)[4][Order + 1], + const Real (&phi2)[4][Order + 1], + const Real (&phi3)[4][Order + 1], + const Real (&dphi0)[4][Order + 1], + const Real (&dphi1)[4][Order + 1], + const Real (&dphi2)[4][Order + 1], + const Real (&dphi3)[4][Order + 1], + const Real (&d2phi0)[4][Order + 1], + const Real (&d2phi1)[4][Order + 1], + const Real (&d2phi2)[4][Order + 1], + const Real (&d2phi3)[4][Order + 1], + std::size_t i0, + std::size_t i1, + std::size_t i2, + std::size_t i3, + Real* SVMP_RESTRICT H) { + const Real v0 = phi0[Q][i0]; + const Real v1 = phi1[Q][i1]; + const Real v2 = phi2[Q][i2]; + const Real v3 = phi3[Q][i3]; + const Real D0 = dphi0[Q][i0]; + const Real D1 = dphi1[Q][i1]; + const Real D2 = dphi2[Q][i2]; + const Real D3 = dphi3[Q][i3]; + const Real DD0 = d2phi0[Q][i0]; + const Real DD1 = d2phi1[Q][i1]; + const Real DD2 = d2phi2[Q][i2]; + const Real DD3 = d2phi3[Q][i3]; + const Real v12 = v1 * v2; + const Real v13 = v1 * v3; + const Real v23 = v2 * v3; + const Real v123 = v1 * v23; + const Real v023 = v0 * v23; + const Real v013 = v0 * v13; + const Real v012 = v0 * v12; + const Real H00 = DD0 * v123; + const Real H11 = DD1 * v023; + const Real H22 = DD2 * v013; + const Real H33 = DD3 * v012; + const Real H01 = D0 * D1 * v23; + const Real H02 = D0 * D2 * v13; + const Real H03 = D0 * D3 * v12; + const Real H12 = D1 * D2 * v0 * v3; + const Real H13 = D1 * D3 * v0 * v2; + const Real H23 = D2 * D3 * v0 * v1; + const Real h01 = H00 - H01 - H02 + H12; + const Real h02 = H00 - H01 - H03 + H13; + const Real h12 = H00 - H02 - H03 + H23; + H[Q] = H00 - Real(2) * H01 + H11; + H[4u + Q] = h01; + H[8u + Q] = h02; + H[12u + Q] = h01; + H[16u + Q] = H00 - Real(2) * H02 + H22; + H[20u + Q] = h12; + H[24u + Q] = h02; + H[28u + Q] = h12; + H[32u + Q] = H00 - Real(2) * H03 + H33; +} + +template +inline void write_tetrahedron_simplex_all_stride4_q( + const Real (&phi0)[4][Order + 1], + const Real (&phi1)[4][Order + 1], + const Real (&phi2)[4][Order + 1], + const Real (&phi3)[4][Order + 1], + const Real (&dphi0)[4][Order + 1], + const Real (&dphi1)[4][Order + 1], + const Real (&dphi2)[4][Order + 1], + const Real (&dphi3)[4][Order + 1], + const Real (&d2phi0)[4][Order + 1], + const Real (&d2phi1)[4][Order + 1], + const Real (&d2phi2)[4][Order + 1], + const Real (&d2phi3)[4][Order + 1], + std::size_t i0, + std::size_t i1, + std::size_t i2, + std::size_t i3, + Real* SVMP_RESTRICT value_row, + Real* SVMP_RESTRICT g, + Real* SVMP_RESTRICT H) { + const Real v0 = phi0[Q][i0]; + const Real v1 = phi1[Q][i1]; + const Real v2 = phi2[Q][i2]; + const Real v3 = phi3[Q][i3]; + const Real D0 = dphi0[Q][i0]; + const Real D1 = dphi1[Q][i1]; + const Real D2 = dphi2[Q][i2]; + const Real D3 = dphi3[Q][i3]; + const Real DD0 = d2phi0[Q][i0]; + const Real DD1 = d2phi1[Q][i1]; + const Real DD2 = d2phi2[Q][i2]; + const Real DD3 = d2phi3[Q][i3]; + const Real v12 = v1 * v2; + const Real v13 = v1 * v3; + const Real v23 = v2 * v3; + const Real v123 = v1 * v23; + const Real v023 = v0 * v23; + const Real v013 = v0 * v13; + const Real v012 = v0 * v12; + const Real dl0 = D0 * v123; + const Real H00 = DD0 * v123; + const Real H11 = DD1 * v023; + const Real H22 = DD2 * v013; + const Real H33 = DD3 * v012; + const Real H01 = D0 * D1 * v23; + const Real H02 = D0 * D2 * v13; + const Real H03 = D0 * D3 * v12; + const Real H12 = D1 * D2 * v0 * v3; + const Real H13 = D1 * D3 * v0 * v2; + const Real H23 = D2 * D3 * v0 * v1; + const Real h01 = H00 - H01 - H02 + H12; + const Real h02 = H00 - H01 - H03 + H13; + const Real h12 = H00 - H02 - H03 + H23; + + value_row[Q] = v0 * v123; + g[Q] = D1 * v023 - dl0; + g[4u + Q] = D2 * v013 - dl0; + g[8u + Q] = D3 * v012 - dl0; + H[Q] = H00 - Real(2) * H01 + H11; + H[4u + Q] = h01; + H[8u + Q] = h02; + H[12u + Q] = h01; + H[16u + Q] = H00 - Real(2) * H02 + H22; + H[20u + Q] = h12; + H[24u + Q] = h02; + H[28u + Q] = h12; + H[32u + Q] = H00 - Real(2) * H03 + H33; +} + +template +void evaluate_tetrahedron_simplex_hessian_outputs_q4( + const std::vector>& simplex_exponents, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + static_assert(Order >= 2 && Order <= 8, "specialized simplex path covers orders 2..8"); + + Real phi0[4][Order + 1]; + Real phi1[4][Order + 1]; + Real phi2[4][Order + 1]; + Real phi3[4][Order + 1]; + Real dphi0[4][Order + 1]; + Real dphi1[4][Order + 1]; + Real dphi2[4][Order + 1]; + Real dphi3[4][Order + 1]; + Real d2phi0[4][Order + 1]; + Real d2phi1[4][Order + 1]; + Real d2phi2[4][Order + 1]; + Real d2phi3[4][Order + 1]; + + for (std::size_t q = 0; q < 4u; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + simplex_lagrange_factor_sequence_impl( + Order, l0, phi0[q], dphi0[q], d2phi0[q]); + simplex_lagrange_factor_sequence_impl( + Order, l1, phi1[q], dphi1[q], d2phi1[q]); + simplex_lagrange_factor_sequence_impl( + Order, l2, phi2[q], dphi2[q], d2phi2[q]); + simplex_lagrange_factor_sequence_impl( + Order, l3, phi3[q], dphi3[q], d2phi3[q]); + } + + const std::size_t num_nodes = simplex_exponents.size(); + if (values_out == nullptr && gradients_out == nullptr) { + if (output_stride == 4u) { + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real* SVMP_RESTRICT H = hessians_out + node * 36u; + write_tetrahedron_simplex_hessian_stride4_q( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, H); + write_tetrahedron_simplex_hessian_stride4_q( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, H); + write_tetrahedron_simplex_hessian_stride4_q( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, H); + write_tetrahedron_simplex_hessian_stride4_q( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, H); + } + } else { + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; + write_tetrahedron_simplex_hessian_q4( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, output_stride, H); + write_tetrahedron_simplex_hessian_q4( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, output_stride, H); + write_tetrahedron_simplex_hessian_q4( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, output_stride, H); + write_tetrahedron_simplex_hessian_q4( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, output_stride, H); + } + } + return; + } + + if (values_out != nullptr && gradients_out != nullptr) { + if (output_stride == 4u) { + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real* SVMP_RESTRICT value_row = values_out + node * output_stride; + Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; + Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; + write_tetrahedron_simplex_all_stride4_q( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, value_row, g, H); + write_tetrahedron_simplex_all_stride4_q( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, value_row, g, H); + write_tetrahedron_simplex_all_stride4_q( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, value_row, g, H); + write_tetrahedron_simplex_all_stride4_q( + phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, + d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, value_row, g, H); + } + return; + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real* SVMP_RESTRICT value_row = values_out + node * output_stride; + Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; + Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; + + for (std::size_t q = 0; q < 4u; ++q) { + const Real v0 = phi0[q][i0]; + const Real v1 = phi1[q][i1]; + const Real v2 = phi2[q][i2]; + const Real v3 = phi3[q][i3]; + const Real D0 = dphi0[q][i0]; + const Real D1 = dphi1[q][i1]; + const Real D2 = dphi2[q][i2]; + const Real D3 = dphi3[q][i3]; + const Real DD0 = d2phi0[q][i0]; + const Real DD1 = d2phi1[q][i1]; + const Real DD2 = d2phi2[q][i2]; + const Real DD3 = d2phi3[q][i3]; + const Real v12 = v1 * v2; + const Real v13 = v1 * v3; + const Real v23 = v2 * v3; + const Real v123 = v1 * v23; + const Real v023 = v0 * v23; + const Real v013 = v0 * v13; + const Real v012 = v0 * v12; + const Real dl0 = D0 * v123; + const Real H00 = DD0 * v123; + const Real H11 = DD1 * v023; + const Real H22 = DD2 * v013; + const Real H33 = DD3 * v012; + const Real H01 = D0 * D1 * v23; + const Real H02 = D0 * D2 * v13; + const Real H03 = D0 * D3 * v12; + const Real H12 = D1 * D2 * v0 * v3; + const Real H13 = D1 * D3 * v0 * v2; + const Real H23 = D2 * D3 * v0 * v1; + const Real h01 = H00 - H01 - H02 + H12; + const Real h02 = H00 - H01 - H03 + H13; + const Real h12 = H00 - H02 - H03 + H23; + + value_row[q] = v0 * v123; + g[0u * output_stride + q] = D1 * v023 - dl0; + g[1u * output_stride + q] = D2 * v013 - dl0; + g[2u * output_stride + q] = D3 * v012 - dl0; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = h01; + H[2u * output_stride + q] = h02; + H[3u * output_stride + q] = h01; + H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; + H[5u * output_stride + q] = h12; + H[6u * output_stride + q] = h02; + H[7u * output_stride + q] = h12; + H[8u * output_stride + q] = H00 - Real(2) * H03 + H33; + } + } + return; + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real* SVMP_RESTRICT value_row = values_out ? values_out + node * output_stride : nullptr; + Real* SVMP_RESTRICT g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; + Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; + + for (std::size_t q = 0; q < 4u; ++q) { + const Real v0 = phi0[q][i0]; + const Real v1 = phi1[q][i1]; + const Real v2 = phi2[q][i2]; + const Real v3 = phi3[q][i3]; + if (value_row != nullptr) { + value_row[q] = v0 * v1 * v2 * v3; + } + + const Real D0 = dphi0[q][i0]; + const Real D1 = dphi1[q][i1]; + const Real D2 = dphi2[q][i2]; + const Real D3 = dphi3[q][i3]; + if (g != nullptr) { + const Real dl0 = D0 * v1 * v2 * v3; + g[0u * output_stride + q] = v0 * D1 * v2 * v3 - dl0; + g[1u * output_stride + q] = v0 * v1 * D2 * v3 - dl0; + g[2u * output_stride + q] = v0 * v1 * v2 * D3 - dl0; + } + + const Real DD0 = d2phi0[q][i0]; + const Real DD1 = d2phi1[q][i1]; + const Real DD2 = d2phi2[q][i2]; + const Real DD3 = d2phi3[q][i3]; + const Real H00 = DD0 * v1 * v2 * v3; + const Real H11 = v0 * DD1 * v2 * v3; + const Real H22 = v0 * v1 * DD2 * v3; + const Real H33 = v0 * v1 * v2 * DD3; + const Real H01 = D0 * D1 * v2 * v3; + const Real H02 = D0 * v1 * D2 * v3; + const Real H03 = D0 * v1 * v2 * D3; + const Real H12 = v0 * D1 * D2 * v3; + const Real H13 = v0 * D1 * v2 * D3; + const Real H23 = v0 * v1 * D2 * D3; + const Real h01 = H00 - H01 - H02 + H12; + const Real h02 = H00 - H01 - H03 + H13; + const Real h12 = H00 - H02 - H03 + H23; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = h01; + H[2u * output_stride + q] = h02; + H[3u * output_stride + q] = h01; + H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; + H[5u * output_stride + q] = h12; + H[6u * output_stride + q] = h02; + H[7u * output_stride + q] = h12; + H[8u * output_stride + q] = H00 - Real(2) * H03 + H33; + } + } +} + +bool try_evaluate_tetrahedron_simplex_hessian_outputs_q4( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + switch (order) { + case 2: + evaluate_tetrahedron_simplex_hessian_outputs_q4<2>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 3: + evaluate_tetrahedron_simplex_hessian_outputs_q4<3>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 4: + evaluate_tetrahedron_simplex_hessian_outputs_q4<4>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 5: + evaluate_tetrahedron_simplex_hessian_outputs_q4<5>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 6: + evaluate_tetrahedron_simplex_hessian_outputs_q4<6>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 7: + evaluate_tetrahedron_simplex_hessian_outputs_q4<7>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + case 8: + evaluate_tetrahedron_simplex_hessian_outputs_q4<8>( + simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); + return true; + default: + return false; + } +} + +// Per-thread scratch space for simplex factor sequences. Common low orders use +// fixed storage; higher orders fall back to dynamic vectors. +struct SimplexAxisScratch { + std::size_t size{0}; + std::array phi_fixed{}; + std::array dphi_fixed{}; + std::array d2phi_fixed{}; + std::vector phi_dynamic; + std::vector dphi_dynamic; + std::vector d2phi_dynamic; + + void reserveFor(std::size_t n) { + size = n; + if (n <= kFixedSimplexAxisSize) { + return; + } + if (phi_dynamic.size() < n) phi_dynamic.resize(n); + if (dphi_dynamic.size() < n) dphi_dynamic.resize(n); + if (d2phi_dynamic.size() < n) d2phi_dynamic.resize(n); + } + + Real* phi() noexcept { + return size <= kFixedSimplexAxisSize ? phi_fixed.data() : phi_dynamic.data(); + } + + Real* dphi() noexcept { + return size <= kFixedSimplexAxisSize ? dphi_fixed.data() : dphi_dynamic.data(); + } + + Real* d2phi() noexcept { + return size <= kFixedSimplexAxisSize ? d2phi_fixed.data() : d2phi_dynamic.data(); + } + + const Real* phi() const noexcept { + return size <= kFixedSimplexAxisSize ? phi_fixed.data() : phi_dynamic.data(); + } + + const Real* dphi() const noexcept { + return size <= kFixedSimplexAxisSize ? dphi_fixed.data() : dphi_dynamic.data(); + } + + const Real* d2phi() const noexcept { + return size <= kFixedSimplexAxisSize ? d2phi_fixed.data() : d2phi_dynamic.data(); + } +}; + +SimplexAxisScratch& simplex_axis_scratch_slot(int slot) { + thread_local SimplexAxisScratch s[4]; + return s[slot]; +} + +struct SimplexVectorSink { + std::vector* values; + std::vector* gradients; + std::vector* hessians; + + bool wants_values() const noexcept { return values != nullptr; } + bool wants_gradients() const noexcept { return gradients != nullptr; } + bool wants_hessians() const noexcept { return hessians != nullptr; } + + void prepare(std::size_t n_nodes) const { + if (values) values->resize(n_nodes); + if (gradients) gradients->resize(n_nodes); + if (hessians) hessians->resize(n_nodes); + } + + void write_value(std::size_t n, Real value) const { + (*values)[n] = value; + } + + void write_gradient(std::size_t n, Real x, Real y, Real z) const { + auto& gradient = (*gradients)[n]; + gradient[0] = x; + gradient[1] = y; + gradient[2] = z; + } + + void write_hessian(std::size_t n, + Real xx, + Real yy, + Real zz, + Real xy, + Real xz, + Real yz) const { + Hessian hessian{}; + hessian(0, 0) = xx; + hessian(1, 1) = yy; + hessian(2, 2) = zz; + hessian(0, 1) = xy; hessian(1, 0) = xy; + hessian(0, 2) = xz; hessian(2, 0) = xz; + hessian(1, 2) = yz; hessian(2, 1) = yz; + (*hessians)[n] = hessian; + } +}; + +struct SimplexRawSink { + Real* values; + Real* gradients; + Real* hessians; + + bool wants_values() const noexcept { return values != nullptr; } + bool wants_gradients() const noexcept { return gradients != nullptr; } + bool wants_hessians() const noexcept { return hessians != nullptr; } + + void prepare(std::size_t) const {} + + void write_value(std::size_t n, Real value) const { + values[n] = value; + } + + void write_gradient(std::size_t n, Real x, Real y, Real z) const { + Real* gradient = gradients + n * 3u; + gradient[0] = x; + gradient[1] = y; + gradient[2] = z; + } + + void write_hessian(std::size_t n, + Real xx, + Real yy, + Real zz, + Real xy, + Real xz, + Real yz) const { + Real* hessian = hessians + n * 9u; + hessian[0] = xx; + hessian[1] = xy; + hessian[2] = xz; + hessian[3] = xy; + hessian[4] = yy; + hessian[5] = yz; + hessian[6] = xz; + hessian[7] = yz; + hessian[8] = zz; + } +}; + +template +void evaluate_triangle_simplex_basis_impl(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + const Sink& sink) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + + const std::size_t n = static_cast(order + 1); + SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); + SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); + SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); + s0.reserveFor(n); + s1.reserveFor(n); + s2.reserveFor(n); + + const std::size_t num_nodes = simplex_exponents.size(); + sink.prepare(num_nodes); + const bool need_values = sink.wants_values(); + const bool need_gradients = sink.wants_gradients(); + const bool need_hessians = sink.wants_hessians(); + Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; + Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; + Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; + Real* d20_out = need_hessians ? s0.d2phi() : nullptr; + Real* d21_out = need_hessians ? s1.d2phi() : nullptr; + Real* d22_out = need_hessians ? s2.d2phi() : nullptr; + + simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); + simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); + simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); + const Real* phi0 = s0.phi(); + const Real* phi1 = s1.phi(); + const Real* phi2 = s2.phi(); + const Real* dphi0 = s0.dphi(); + const Real* dphi1 = s1.dphi(); + const Real* dphi2 = s2.dphi(); + const Real* d2phi0 = s0.d2phi(); + const Real* d2phi1 = s1.d2phi(); + const Real* d2phi2 = s2.d2phi(); + + for (std::size_t n_idx = 0; n_idx < num_nodes; ++n_idx) { + const auto& e = simplex_exponents[n_idx]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + + const Real v0 = phi0[i0]; + const Real v1 = phi1[i1]; + const Real v2 = phi2[i2]; + if (need_values) { + sink.write_value(n_idx, v0 * v1 * v2); + } + if (!need_gradients && !need_hessians) { + continue; + } + + const Real D0 = dphi0[i0]; + const Real D1 = dphi1[i1]; + const Real D2 = dphi2[i2]; + + if (need_gradients) { + const Real dl0 = D0 * v1 * v2; + const Real dl1 = v0 * D1 * v2; + const Real dl2 = v0 * v1 * D2; + sink.write_gradient(n_idx, dl1 - dl0, dl2 - dl0, Real(0)); + } + + if (need_hessians) { + const Real DD0 = d2phi0[i0]; + const Real DD1 = d2phi1[i1]; + const Real DD2 = d2phi2[i2]; + + const Real H00 = DD0 * v1 * v2; + const Real H11 = v0 * DD1 * v2; + const Real H22 = v0 * v1 * DD2; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + + sink.write_hessian(n_idx, + H00 - Real(2) * H01 + H11, + H00 - Real(2) * H02 + H22, + Real(0), + H00 - H01 - H02 + H12, + Real(0), + Real(0)); + } + } +} + +void evaluate_triangle_simplex_basis(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) { + const SimplexVectorSink sink{values, gradients, hessians}; + evaluate_triangle_simplex_basis_impl(simplex_exponents, order, xi, sink); +} + +void evaluate_triangle_simplex_basis_to(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const SimplexRawSink sink{values_out, gradients_out, hessians_out}; + evaluate_triangle_simplex_basis_impl(simplex_exponents, order, xi, sink); +} + +void evaluate_triangle_simplex_basis_strided( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const std::size_t num_nodes = simplex_exponents.size(); + if (points.empty() || num_nodes == 0u) { + return; + } + + const std::size_t sequence_size = static_cast(order + 1); + const std::size_t num_qpts = points.size(); + const bool need_gradients = gradients_out != nullptr; + const bool need_hessians = hessians_out != nullptr; + if (num_qpts == 4u && + values_out != nullptr && + !need_gradients && + !need_hessians && + try_evaluate_triangle_simplex_values_q4( + simplex_exponents, order, points, output_stride, values_out)) { + return; + } + if (num_qpts == 4u && + values_out == nullptr && + need_gradients && + !need_hessians && + try_evaluate_triangle_simplex_gradients_q4( + simplex_exponents, order, points, output_stride, gradients_out)) { + return; + } + if (num_qpts == 4u && + need_hessians && + try_evaluate_triangle_simplex_hessian_outputs_q4( + simplex_exponents, order, points, output_stride, + values_out, gradients_out, hessians_out)) { + return; + } + const std::size_t batch_entries = sequence_size * num_qpts; + if (batch_entries <= kFixedSimplexBatchEntries) { + if (values_out != nullptr && gradients_out == nullptr && hessians_out == nullptr) { + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + const std::size_t offset = q * sequence_size; + simplex_lagrange_factor_sequence( + order, l0, phi0_batch.data() + offset, nullptr, nullptr); + simplex_lagrange_factor_sequence( + order, l1, phi1_batch.data() + offset, nullptr, nullptr); + simplex_lagrange_factor_sequence( + order, l2, phi2_batch.data() + offset, nullptr, nullptr); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* value_row = values_out + node * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + value_row[q] = + phi0_batch[offset + i0] * + phi1_batch[offset + i1] * + phi2_batch[offset + i2]; + } + } + return; + } + + if (values_out == nullptr && gradients_out != nullptr && hessians_out == nullptr) { + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + std::array dphi0_batch; + std::array dphi1_batch; + std::array dphi2_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + const std::size_t offset = q * sequence_size; + simplex_lagrange_factor_sequence( + order, l0, phi0_batch.data() + offset, dphi0_batch.data() + offset, nullptr); + simplex_lagrange_factor_sequence( + order, l1, phi1_batch.data() + offset, dphi1_batch.data() + offset, nullptr); + simplex_lagrange_factor_sequence( + order, l2, phi2_batch.data() + offset, dphi2_batch.data() + offset, nullptr); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* g = gradients_out + node * 3u * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + const Real v0 = phi0_batch[offset + i0]; + const Real v1 = phi1_batch[offset + i1]; + const Real v2 = phi2_batch[offset + i2]; + const Real D0 = dphi0_batch[offset + i0]; + const Real D1 = dphi1_batch[offset + i1]; + const Real D2 = dphi2_batch[offset + i2]; + const Real dl0 = D0 * v1 * v2; + g[0u * output_stride + q] = v0 * D1 * v2 - dl0; + g[1u * output_stride + q] = v0 * v1 * D2 - dl0; + g[2u * output_stride + q] = Real(0); + } + } + return; + } + + if (order >= 4 && + values_out == nullptr && + gradients_out == nullptr && + hessians_out != nullptr) { + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + std::array dphi0_batch; + std::array dphi1_batch; + std::array dphi2_batch; + std::array d2phi0_batch; + std::array d2phi1_batch; + std::array d2phi2_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + const std::size_t offset = q * sequence_size; + simplex_lagrange_factor_sequence( + order, l0, phi0_batch.data() + offset, + dphi0_batch.data() + offset, d2phi0_batch.data() + offset); + simplex_lagrange_factor_sequence( + order, l1, phi1_batch.data() + offset, + dphi1_batch.data() + offset, d2phi1_batch.data() + offset); + simplex_lagrange_factor_sequence( + order, l2, phi2_batch.data() + offset, + dphi2_batch.data() + offset, d2phi2_batch.data() + offset); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* H = hessians_out + node * 9u * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + const Real v0 = phi0_batch[offset + i0]; + const Real v1 = phi1_batch[offset + i1]; + const Real v2 = phi2_batch[offset + i2]; + const Real D0 = dphi0_batch[offset + i0]; + const Real D1 = dphi1_batch[offset + i1]; + const Real D2 = dphi2_batch[offset + i2]; + const Real DD0 = d2phi0_batch[offset + i0]; + const Real DD1 = d2phi1_batch[offset + i1]; + const Real DD2 = d2phi2_batch[offset + i2]; + const Real H00 = DD0 * v1 * v2; + const Real H11 = v0 * DD1 * v2; + const Real H22 = v0 * v1 * DD2; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + const Real h01 = H00 - H01 - H02 + H12; + + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = h01; + H[2u * output_stride + q] = Real(0); + H[3u * output_stride + q] = h01; + H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; + H[5u * output_stride + q] = Real(0); + H[6u * output_stride + q] = Real(0); + H[7u * output_stride + q] = Real(0); + H[8u * output_stride + q] = Real(0); + } + } + return; + } + + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + std::array dphi0_batch; + std::array dphi1_batch; + std::array dphi2_batch; + std::array d2phi0_batch; + std::array d2phi1_batch; + std::array d2phi2_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + const std::size_t offset = q * sequence_size; + Real* d0_out = (need_gradients || need_hessians) ? dphi0_batch.data() + offset : nullptr; + Real* d1_out = (need_gradients || need_hessians) ? dphi1_batch.data() + offset : nullptr; + Real* d2_out = (need_gradients || need_hessians) ? dphi2_batch.data() + offset : nullptr; + Real* d20_out = need_hessians ? d2phi0_batch.data() + offset : nullptr; + Real* d21_out = need_hessians ? d2phi1_batch.data() + offset : nullptr; + Real* d22_out = need_hessians ? d2phi2_batch.data() + offset : nullptr; + simplex_lagrange_factor_sequence(order, l0, phi0_batch.data() + offset, d0_out, d20_out); + simplex_lagrange_factor_sequence(order, l1, phi1_batch.data() + offset, d1_out, d21_out); + simplex_lagrange_factor_sequence(order, l2, phi2_batch.data() + offset, d2_out, d22_out); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* value_row = values_out ? values_out + node * output_stride : nullptr; + Real* g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; + Real* H = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + const Real v0 = phi0_batch[offset + i0]; + const Real v1 = phi1_batch[offset + i1]; + const Real v2 = phi2_batch[offset + i2]; + if (value_row != nullptr) { + value_row[q] = v0 * v1 * v2; + } + if (!need_gradients && !need_hessians) { + continue; + } + + const Real D0 = dphi0_batch[offset + i0]; + const Real D1 = dphi1_batch[offset + i1]; + const Real D2 = dphi2_batch[offset + i2]; + + if (gradients_out != nullptr) { + const Real dl0 = D0 * v1 * v2; + const Real dl1 = v0 * D1 * v2; + const Real dl2 = v0 * v1 * D2; + g[0u * output_stride + q] = dl1 - dl0; + g[1u * output_stride + q] = dl2 - dl0; + g[2u * output_stride + q] = Real(0); + } + + if (hessians_out != nullptr) { + const Real DD0 = d2phi0_batch[offset + i0]; + const Real DD1 = d2phi1_batch[offset + i1]; + const Real DD2 = d2phi2_batch[offset + i2]; + const Real H00 = DD0 * v1 * v2; + const Real H11 = v0 * DD1 * v2; + const Real H22 = v0 * v1 * DD2; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + const Real h01 = H00 - H01 - H02 + H12; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = h01; + H[2u * output_stride + q] = Real(0); + H[3u * output_stride + q] = h01; + H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; + H[5u * output_stride + q] = Real(0); + H[6u * output_stride + q] = Real(0); + H[7u * output_stride + q] = Real(0); + H[8u * output_stride + q] = Real(0); + } + } + } + return; + } + + SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); + SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); + SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); + s0.reserveFor(sequence_size); + s1.reserveFor(sequence_size); + s2.reserveFor(sequence_size); + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + + Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; + Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; + Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; + Real* d20_out = need_hessians ? s0.d2phi() : nullptr; + Real* d21_out = need_hessians ? s1.d2phi() : nullptr; + Real* d22_out = need_hessians ? s2.d2phi() : nullptr; + + simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); + simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); + simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); + const Real* phi0 = s0.phi(); + const Real* phi1 = s1.phi(); + const Real* phi2 = s2.phi(); + const Real* dphi0 = s0.dphi(); + const Real* dphi1 = s1.dphi(); + const Real* dphi2 = s2.dphi(); + const Real* d2phi0 = s0.d2phi(); + const Real* d2phi1 = s1.d2phi(); + const Real* d2phi2 = s2.d2phi(); + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + + const Real v0 = phi0[i0]; + const Real v1 = phi1[i1]; + const Real v2 = phi2[i2]; + const Real value = v0 * v1 * v2; + if (values_out != nullptr) { + values_out[node * output_stride + q] = value; + } + if (!need_gradients && !need_hessians) { + continue; + } + + const Real D0 = dphi0[i0]; + const Real D1 = dphi1[i1]; + const Real D2 = dphi2[i2]; + + if (gradients_out != nullptr) { + const Real dl0 = D0 * v1 * v2; + const Real dl1 = v0 * D1 * v2; + const Real dl2 = v0 * v1 * D2; + Real* g = gradients_out + node * 3u * output_stride; + g[0u * output_stride + q] = dl1 - dl0; + g[1u * output_stride + q] = dl2 - dl0; + g[2u * output_stride + q] = Real(0); + } + + if (hessians_out != nullptr) { + const Real DD0 = d2phi0[i0]; + const Real DD1 = d2phi1[i1]; + const Real DD2 = d2phi2[i2]; + + const Real H00 = DD0 * v1 * v2; + const Real H11 = v0 * DD1 * v2; + const Real H22 = v0 * v1 * DD2; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + + Real* H = hessians_out + node * 9u * output_stride; + const Real h01 = H00 - H01 - H02 + H12; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = h01; + H[2u * output_stride + q] = Real(0); + H[3u * output_stride + q] = h01; + H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; + H[5u * output_stride + q] = Real(0); + H[6u * output_stride + q] = Real(0); + H[7u * output_stride + q] = Real(0); + H[8u * output_stride + q] = Real(0); + } + } + } +} + +void evaluate_triangle_simplex_basis_wedge_components_strided( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_xy_out, + Real* SVMP_RESTRICT hessians_xx_xy_yy_out) { + const std::size_t num_nodes = simplex_exponents.size(); + if (points.empty() || num_nodes == 0u) { + return; + } + + const std::size_t sequence_size = static_cast(order + 1); + const std::size_t num_qpts = points.size(); + const bool need_gradients = gradients_xy_out != nullptr; + const bool need_hessians = hessians_xx_xy_yy_out != nullptr; + const std::size_t batch_entries = sequence_size * num_qpts; + + if (batch_entries <= kFixedSimplexBatchEntries) { + if (values_out != nullptr && + gradients_xy_out != nullptr && + hessians_xx_xy_yy_out == nullptr) { + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + std::array dphi0_batch; + std::array dphi1_batch; + std::array dphi2_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + const std::size_t offset = q * sequence_size; + simplex_lagrange_factor_sequence( + order, l0, phi0_batch.data() + offset, dphi0_batch.data() + offset, nullptr); + simplex_lagrange_factor_sequence( + order, l1, phi1_batch.data() + offset, dphi1_batch.data() + offset, nullptr); + simplex_lagrange_factor_sequence( + order, l2, phi2_batch.data() + offset, dphi2_batch.data() + offset, nullptr); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* value_row = values_out + node * output_stride; + Real* g = gradients_xy_out + node * 2u * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + const Real v0 = phi0_batch[offset + i0]; + const Real v1 = phi1_batch[offset + i1]; + const Real v2 = phi2_batch[offset + i2]; + const Real D0 = dphi0_batch[offset + i0]; + const Real D1 = dphi1_batch[offset + i1]; + const Real D2 = dphi2_batch[offset + i2]; + const Real dl0 = D0 * v1 * v2; + value_row[q] = v0 * v1 * v2; + g[0u * output_stride + q] = v0 * D1 * v2 - dl0; + g[1u * output_stride + q] = v0 * v1 * D2 - dl0; + } + } + return; + } + + if (values_out != nullptr && + gradients_xy_out != nullptr && + hessians_xx_xy_yy_out != nullptr) { + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + std::array dphi0_batch; + std::array dphi1_batch; + std::array dphi2_batch; + std::array d2phi0_batch; + std::array d2phi1_batch; + std::array d2phi2_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + const std::size_t offset = q * sequence_size; + simplex_lagrange_factor_sequence_impl( + order, l0, phi0_batch.data() + offset, + dphi0_batch.data() + offset, d2phi0_batch.data() + offset); + simplex_lagrange_factor_sequence_impl( + order, l1, phi1_batch.data() + offset, + dphi1_batch.data() + offset, d2phi1_batch.data() + offset); + simplex_lagrange_factor_sequence_impl( + order, l2, phi2_batch.data() + offset, + dphi2_batch.data() + offset, d2phi2_batch.data() + offset); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* SVMP_RESTRICT value_row = values_out + node * output_stride; + Real* SVMP_RESTRICT g = gradients_xy_out + node * 2u * output_stride; + Real* SVMP_RESTRICT H = hessians_xx_xy_yy_out + node * 3u * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + const Real v0 = phi0_batch[offset + i0]; + const Real v1 = phi1_batch[offset + i1]; + const Real v2 = phi2_batch[offset + i2]; + const Real D0 = dphi0_batch[offset + i0]; + const Real D1 = dphi1_batch[offset + i1]; + const Real D2 = dphi2_batch[offset + i2]; + const Real dl0 = D0 * v1 * v2; + const Real dl1 = v0 * D1 * v2; + const Real dl2 = v0 * v1 * D2; + const Real DD0 = d2phi0_batch[offset + i0]; + const Real DD1 = d2phi1_batch[offset + i1]; + const Real DD2 = d2phi2_batch[offset + i2]; + const Real H00 = DD0 * v1 * v2; + const Real H11 = v0 * DD1 * v2; + const Real H22 = v0 * v1 * DD2; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + + value_row[q] = v0 * v1 * v2; + g[0u * output_stride + q] = dl1 - dl0; + g[1u * output_stride + q] = dl2 - dl0; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = H00 - H01 - H02 + H12; + H[2u * output_stride + q] = H00 - Real(2) * H02 + H22; + } + } + return; + } + + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + std::array dphi0_batch; + std::array dphi1_batch; + std::array dphi2_batch; + std::array d2phi0_batch; + std::array d2phi1_batch; + std::array d2phi2_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + const std::size_t offset = q * sequence_size; + Real* d0_out = (need_gradients || need_hessians) ? dphi0_batch.data() + offset : nullptr; + Real* d1_out = (need_gradients || need_hessians) ? dphi1_batch.data() + offset : nullptr; + Real* d2_out = (need_gradients || need_hessians) ? dphi2_batch.data() + offset : nullptr; + Real* d20_out = need_hessians ? d2phi0_batch.data() + offset : nullptr; + Real* d21_out = need_hessians ? d2phi1_batch.data() + offset : nullptr; + Real* d22_out = need_hessians ? d2phi2_batch.data() + offset : nullptr; + simplex_lagrange_factor_sequence(order, l0, phi0_batch.data() + offset, d0_out, d20_out); + simplex_lagrange_factor_sequence(order, l1, phi1_batch.data() + offset, d1_out, d21_out); + simplex_lagrange_factor_sequence(order, l2, phi2_batch.data() + offset, d2_out, d22_out); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + Real* value_row = values_out ? values_out + node * output_stride : nullptr; + Real* g = gradients_xy_out ? gradients_xy_out + node * 2u * output_stride : nullptr; + Real* H = hessians_xx_xy_yy_out ? hessians_xx_xy_yy_out + node * 3u * output_stride : nullptr; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + const Real v0 = phi0_batch[offset + i0]; + const Real v1 = phi1_batch[offset + i1]; + const Real v2 = phi2_batch[offset + i2]; + if (value_row != nullptr) { + value_row[q] = v0 * v1 * v2; + } + if (!need_gradients && !need_hessians) { + continue; + } + + const Real D0 = dphi0_batch[offset + i0]; + const Real D1 = dphi1_batch[offset + i1]; + const Real D2 = dphi2_batch[offset + i2]; + const Real dl0 = D0 * v1 * v2; + const Real dl1 = v0 * D1 * v2; + const Real dl2 = v0 * v1 * D2; + + if (gradients_xy_out != nullptr) { + g[0u * output_stride + q] = dl1 - dl0; + g[1u * output_stride + q] = dl2 - dl0; + } + + if (hessians_xx_xy_yy_out != nullptr) { + const Real DD0 = d2phi0_batch[offset + i0]; + const Real DD1 = d2phi1_batch[offset + i1]; + const Real DD2 = d2phi2_batch[offset + i2]; + const Real H00 = DD0 * v1 * v2; + const Real H11 = v0 * DD1 * v2; + const Real H22 = v0 * v1 * DD2; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = H00 - H01 - H02 + H12; + H[2u * output_stride + q] = H00 - Real(2) * H02 + H22; + } + } + } + return; + } + + SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); + SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); + SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); + s0.reserveFor(sequence_size); + s1.reserveFor(sequence_size); + s2.reserveFor(sequence_size); + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l0 = Real(1) - l1 - l2; + + Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; + Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; + Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; + Real* d20_out = need_hessians ? s0.d2phi() : nullptr; + Real* d21_out = need_hessians ? s1.d2phi() : nullptr; + Real* d22_out = need_hessians ? s2.d2phi() : nullptr; + simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); + simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); + simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); + + const Real* phi0 = s0.phi(); + const Real* phi1 = s1.phi(); + const Real* phi2 = s2.phi(); + const Real* dphi0 = s0.dphi(); + const Real* dphi1 = s1.dphi(); + const Real* dphi2 = s2.dphi(); + const Real* d2phi0 = s0.d2phi(); + const Real* d2phi1 = s1.d2phi(); + const Real* d2phi2 = s2.d2phi(); + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const Real v0 = phi0[i0]; + const Real v1 = phi1[i1]; + const Real v2 = phi2[i2]; + + if (values_out != nullptr) { + values_out[node * output_stride + q] = v0 * v1 * v2; + } + if (!need_gradients && !need_hessians) { + continue; + } + + const Real D0 = dphi0[i0]; + const Real D1 = dphi1[i1]; + const Real D2 = dphi2[i2]; + const Real dl0 = D0 * v1 * v2; + const Real dl1 = v0 * D1 * v2; + const Real dl2 = v0 * v1 * D2; + + if (gradients_xy_out != nullptr) { + Real* g = gradients_xy_out + node * 2u * output_stride; + g[0u * output_stride + q] = dl1 - dl0; + g[1u * output_stride + q] = dl2 - dl0; + } + + if (hessians_xx_xy_yy_out != nullptr) { + const Real DD0 = d2phi0[i0]; + const Real DD1 = d2phi1[i1]; + const Real DD2 = d2phi2[i2]; + const Real H00 = DD0 * v1 * v2; + const Real H11 = v0 * DD1 * v2; + const Real H22 = v0 * v1 * DD2; + const Real H01 = D0 * D1 * v2; + const Real H02 = D0 * v1 * D2; + const Real H12 = v0 * D1 * D2; + Real* H = hessians_xx_xy_yy_out + node * 3u * output_stride; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = H00 - H01 - H02 + H12; + H[2u * output_stride + q] = H00 - Real(2) * H02 + H22; + } + } + } +} + +template +void evaluate_tetrahedron_simplex_basis_impl(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + const Sink& sink) { + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + + const std::size_t n = static_cast(order + 1); + SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); + SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); + SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); + SimplexAxisScratch& s3 = simplex_axis_scratch_slot(3); + s0.reserveFor(n); + s1.reserveFor(n); + s2.reserveFor(n); + s3.reserveFor(n); + + const std::size_t num_nodes = simplex_exponents.size(); + sink.prepare(num_nodes); + const bool need_values = sink.wants_values(); + const bool need_gradients = sink.wants_gradients(); + const bool need_hessians = sink.wants_hessians(); + Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; + Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; + Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; + Real* d3_out = (need_gradients || need_hessians) ? s3.dphi() : nullptr; + Real* d20_out = need_hessians ? s0.d2phi() : nullptr; + Real* d21_out = need_hessians ? s1.d2phi() : nullptr; + Real* d22_out = need_hessians ? s2.d2phi() : nullptr; + Real* d23_out = need_hessians ? s3.d2phi() : nullptr; + + simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); + simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); + simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); + simplex_lagrange_factor_sequence(order, l3, s3.phi(), d3_out, d23_out); + const Real* phi0 = s0.phi(); + const Real* phi1 = s1.phi(); + const Real* phi2 = s2.phi(); + const Real* phi3 = s3.phi(); + const Real* dphi0 = s0.dphi(); + const Real* dphi1 = s1.dphi(); + const Real* dphi2 = s2.dphi(); + const Real* dphi3 = s3.dphi(); + const Real* d2phi0 = s0.d2phi(); + const Real* d2phi1 = s1.d2phi(); + const Real* d2phi2 = s2.d2phi(); + const Real* d2phi3 = s3.d2phi(); + + for (std::size_t n_idx = 0; n_idx < num_nodes; ++n_idx) { + const auto& e = simplex_exponents[n_idx]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + + const Real v0 = phi0[i0]; + const Real v1 = phi1[i1]; + const Real v2 = phi2[i2]; + const Real v3 = phi3[i3]; + if (need_values) { + sink.write_value(n_idx, v0 * v1 * v2 * v3); + } + if (!need_gradients && !need_hessians) { + continue; + } + + const Real D0 = dphi0[i0]; + const Real D1 = dphi1[i1]; + const Real D2 = dphi2[i2]; + const Real D3 = dphi3[i3]; + + if (need_gradients) { + const Real dl0 = D0 * v1 * v2 * v3; + const Real dl1 = v0 * D1 * v2 * v3; + const Real dl2 = v0 * v1 * D2 * v3; + const Real dl3 = v0 * v1 * v2 * D3; + sink.write_gradient(n_idx, dl1 - dl0, dl2 - dl0, dl3 - dl0); + } + + if (need_hessians) { + const Real DD0 = d2phi0[i0]; + const Real DD1 = d2phi1[i1]; + const Real DD2 = d2phi2[i2]; + const Real DD3 = d2phi3[i3]; + + const Real H00 = DD0 * v1 * v2 * v3; + const Real H11 = v0 * DD1 * v2 * v3; + const Real H22 = v0 * v1 * DD2 * v3; + const Real H33 = v0 * v1 * v2 * DD3; + + const Real H01 = D0 * D1 * v2 * v3; + const Real H02 = D0 * v1 * D2 * v3; + const Real H03 = D0 * v1 * v2 * D3; + const Real H12 = v0 * D1 * D2 * v3; + const Real H13 = v0 * D1 * v2 * D3; + const Real H23 = v0 * v1 * D2 * D3; + + sink.write_hessian(n_idx, + H00 - Real(2) * H01 + H11, + H00 - Real(2) * H02 + H22, + H00 - Real(2) * H03 + H33, + H00 - H01 - H02 + H12, + H00 - H01 - H03 + H13, + H00 - H02 - H03 + H23); + } + } +} + +void evaluate_tetrahedron_simplex_basis(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians) { + const SimplexVectorSink sink{values, gradients, hessians}; + evaluate_tetrahedron_simplex_basis_impl(simplex_exponents, order, xi, sink); +} + +void evaluate_tetrahedron_simplex_basis_to(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const SimplexRawSink sink{values_out, gradients_out, hessians_out}; + evaluate_tetrahedron_simplex_basis_impl(simplex_exponents, order, xi, sink); +} + +void evaluate_tetrahedron_simplex_basis_strided( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) { + const std::size_t num_nodes = simplex_exponents.size(); + if (points.empty() || num_nodes == 0u) { + return; + } + + const std::size_t sequence_size = static_cast(order + 1); + const std::size_t num_qpts = points.size(); + const bool need_gradients = gradients_out != nullptr; + const bool need_hessians = hessians_out != nullptr; + if (num_qpts == 4u && + values_out != nullptr && + !need_gradients && + !need_hessians && + try_evaluate_tetrahedron_simplex_values_q4( + simplex_exponents, order, points, output_stride, values_out)) { + return; + } + if (num_qpts == 4u && + values_out == nullptr && + need_gradients && + !need_hessians) { + switch (order) { + case 3: + evaluate_tetrahedron_simplex_gradients_q4<3>( + simplex_exponents, points, output_stride, gradients_out); + return; + case 4: + evaluate_tetrahedron_simplex_gradients_q4<4>( + simplex_exponents, points, output_stride, gradients_out); + return; + case 5: + evaluate_tetrahedron_simplex_gradients_q4<5>( + simplex_exponents, points, output_stride, gradients_out); + return; + case 6: + evaluate_tetrahedron_simplex_gradients_q4<6>( + simplex_exponents, points, output_stride, gradients_out); + return; + case 7: + evaluate_tetrahedron_simplex_gradients_q4<7>( + simplex_exponents, points, output_stride, gradients_out); + return; + case 8: + evaluate_tetrahedron_simplex_gradients_q4<8>( + simplex_exponents, points, output_stride, gradients_out); + return; + default: + break; + } + } + if (num_qpts == 4u && + need_hessians && + try_evaluate_tetrahedron_simplex_hessian_outputs_q4( + simplex_exponents, order, points, output_stride, + values_out, gradients_out, hessians_out)) { + return; + } + const std::size_t batch_entries = sequence_size * num_qpts; + if (batch_entries <= kFixedSimplexBatchEntries) { + if (values_out != nullptr && gradients_out == nullptr && hessians_out == nullptr) { + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + std::array phi3_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + const std::size_t offset = q * sequence_size; + simplex_lagrange_factor_sequence( + order, l0, phi0_batch.data() + offset, nullptr, nullptr); + simplex_lagrange_factor_sequence( + order, l1, phi1_batch.data() + offset, nullptr, nullptr); + simplex_lagrange_factor_sequence( + order, l2, phi2_batch.data() + offset, nullptr, nullptr); + simplex_lagrange_factor_sequence( + order, l3, phi3_batch.data() + offset, nullptr, nullptr); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real* value_row = values_out + node * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + value_row[q] = + phi0_batch[offset + i0] * + phi1_batch[offset + i1] * + phi2_batch[offset + i2] * + phi3_batch[offset + i3]; + } + } + return; + } + + if (values_out == nullptr && gradients_out != nullptr && hessians_out == nullptr) { + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + std::array phi3_batch; + std::array dphi0_batch; + std::array dphi1_batch; + std::array dphi2_batch; + std::array dphi3_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + const std::size_t offset = q * sequence_size; + simplex_lagrange_factor_sequence( + order, l0, phi0_batch.data() + offset, dphi0_batch.data() + offset, nullptr); + simplex_lagrange_factor_sequence( + order, l1, phi1_batch.data() + offset, dphi1_batch.data() + offset, nullptr); + simplex_lagrange_factor_sequence( + order, l2, phi2_batch.data() + offset, dphi2_batch.data() + offset, nullptr); + simplex_lagrange_factor_sequence( + order, l3, phi3_batch.data() + offset, dphi3_batch.data() + offset, nullptr); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real* g = gradients_out + node * 3u * output_stride; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + const Real v0 = phi0_batch[offset + i0]; + const Real v1 = phi1_batch[offset + i1]; + const Real v2 = phi2_batch[offset + i2]; + const Real v3 = phi3_batch[offset + i3]; + const Real D0 = dphi0_batch[offset + i0]; + const Real D1 = dphi1_batch[offset + i1]; + const Real D2 = dphi2_batch[offset + i2]; + const Real D3 = dphi3_batch[offset + i3]; + const Real v23 = v2 * v3; + const Real dl0 = D0 * v1 * v23; + g[0u * output_stride + q] = v0 * D1 * v23 - dl0; + g[1u * output_stride + q] = v0 * v1 * D2 * v3 - dl0; + g[2u * output_stride + q] = v0 * v1 * v2 * D3 - dl0; + } + } + return; + } + + std::array phi0_batch; + std::array phi1_batch; + std::array phi2_batch; + std::array phi3_batch; + std::array dphi0_batch; + std::array dphi1_batch; + std::array dphi2_batch; + std::array dphi3_batch; + std::array d2phi0_batch; + std::array d2phi1_batch; + std::array d2phi2_batch; + std::array d2phi3_batch; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + const std::size_t offset = q * sequence_size; + Real* d0_out = (need_gradients || need_hessians) ? dphi0_batch.data() + offset : nullptr; + Real* d1_out = (need_gradients || need_hessians) ? dphi1_batch.data() + offset : nullptr; + Real* d2_out = (need_gradients || need_hessians) ? dphi2_batch.data() + offset : nullptr; + Real* d3_out = (need_gradients || need_hessians) ? dphi3_batch.data() + offset : nullptr; + Real* d20_out = need_hessians ? d2phi0_batch.data() + offset : nullptr; + Real* d21_out = need_hessians ? d2phi1_batch.data() + offset : nullptr; + Real* d22_out = need_hessians ? d2phi2_batch.data() + offset : nullptr; + Real* d23_out = need_hessians ? d2phi3_batch.data() + offset : nullptr; + simplex_lagrange_factor_sequence(order, l0, phi0_batch.data() + offset, d0_out, d20_out); + simplex_lagrange_factor_sequence(order, l1, phi1_batch.data() + offset, d1_out, d21_out); + simplex_lagrange_factor_sequence(order, l2, phi2_batch.data() + offset, d2_out, d22_out); + simplex_lagrange_factor_sequence(order, l3, phi3_batch.data() + offset, d3_out, d23_out); + } + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + Real* value_row = values_out ? values_out + node * output_stride : nullptr; + Real* g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; + Real* H = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; + + for (std::size_t q = 0; q < num_qpts; ++q) { + const std::size_t offset = q * sequence_size; + const Real v0 = phi0_batch[offset + i0]; + const Real v1 = phi1_batch[offset + i1]; + const Real v2 = phi2_batch[offset + i2]; + const Real v3 = phi3_batch[offset + i3]; + if (value_row != nullptr) { + value_row[q] = v0 * v1 * v2 * v3; + } + if (!need_gradients && !need_hessians) { + continue; + } + + const Real D0 = dphi0_batch[offset + i0]; + const Real D1 = dphi1_batch[offset + i1]; + const Real D2 = dphi2_batch[offset + i2]; + const Real D3 = dphi3_batch[offset + i3]; + + if (gradients_out != nullptr) { + const Real dl0 = D0 * v1 * v2 * v3; + const Real dl1 = v0 * D1 * v2 * v3; + const Real dl2 = v0 * v1 * D2 * v3; + const Real dl3 = v0 * v1 * v2 * D3; + g[0u * output_stride + q] = dl1 - dl0; + g[1u * output_stride + q] = dl2 - dl0; + g[2u * output_stride + q] = dl3 - dl0; + } + + if (hessians_out != nullptr) { + const Real DD0 = d2phi0_batch[offset + i0]; + const Real DD1 = d2phi1_batch[offset + i1]; + const Real DD2 = d2phi2_batch[offset + i2]; + const Real DD3 = d2phi3_batch[offset + i3]; + const Real H00 = DD0 * v1 * v2 * v3; + const Real H11 = v0 * DD1 * v2 * v3; + const Real H22 = v0 * v1 * DD2 * v3; + const Real H33 = v0 * v1 * v2 * DD3; + const Real H01 = D0 * D1 * v2 * v3; + const Real H02 = D0 * v1 * D2 * v3; + const Real H03 = D0 * v1 * v2 * D3; + const Real H12 = v0 * D1 * D2 * v3; + const Real H13 = v0 * D1 * v2 * D3; + const Real H23 = v0 * v1 * D2 * D3; + const Real h01 = H00 - H01 - H02 + H12; + const Real h02 = H00 - H01 - H03 + H13; + const Real h12 = H00 - H02 - H03 + H23; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = h01; + H[2u * output_stride + q] = h02; + H[3u * output_stride + q] = h01; + H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; + H[5u * output_stride + q] = h12; + H[6u * output_stride + q] = h02; + H[7u * output_stride + q] = h12; + H[8u * output_stride + q] = H00 - Real(2) * H03 + H33; + } + } + } + return; + } + + SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); + SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); + SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); + SimplexAxisScratch& s3 = simplex_axis_scratch_slot(3); + s0.reserveFor(sequence_size); + s1.reserveFor(sequence_size); + s2.reserveFor(sequence_size); + s3.reserveFor(sequence_size); + + for (std::size_t q = 0; q < points.size(); ++q) { + const auto& xi = points[q]; + const Real l1 = xi[0]; + const Real l2 = xi[1]; + const Real l3 = xi[2]; + const Real l0 = Real(1) - l1 - l2 - l3; + + Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; + Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; + Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; + Real* d3_out = (need_gradients || need_hessians) ? s3.dphi() : nullptr; + Real* d20_out = need_hessians ? s0.d2phi() : nullptr; + Real* d21_out = need_hessians ? s1.d2phi() : nullptr; + Real* d22_out = need_hessians ? s2.d2phi() : nullptr; + Real* d23_out = need_hessians ? s3.d2phi() : nullptr; + + simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); + simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); + simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); + simplex_lagrange_factor_sequence(order, l3, s3.phi(), d3_out, d23_out); + const Real* phi0 = s0.phi(); + const Real* phi1 = s1.phi(); + const Real* phi2 = s2.phi(); + const Real* phi3 = s3.phi(); + const Real* dphi0 = s0.dphi(); + const Real* dphi1 = s1.dphi(); + const Real* dphi2 = s2.dphi(); + const Real* dphi3 = s3.dphi(); + const Real* d2phi0 = s0.d2phi(); + const Real* d2phi1 = s1.d2phi(); + const Real* d2phi2 = s2.d2phi(); + const Real* d2phi3 = s3.d2phi(); + + for (std::size_t node = 0; node < num_nodes; ++node) { + const auto& e = simplex_exponents[node]; + const std::size_t i0 = static_cast(e[0]); + const std::size_t i1 = static_cast(e[1]); + const std::size_t i2 = static_cast(e[2]); + const std::size_t i3 = static_cast(e[3]); + + const Real v0 = phi0[i0]; + const Real v1 = phi1[i1]; + const Real v2 = phi2[i2]; + const Real v3 = phi3[i3]; + if (values_out != nullptr) { + values_out[node * output_stride + q] = v0 * v1 * v2 * v3; + } + if (!need_gradients && !need_hessians) { + continue; + } + + const Real D0 = dphi0[i0]; + const Real D1 = dphi1[i1]; + const Real D2 = dphi2[i2]; + const Real D3 = dphi3[i3]; + + if (gradients_out != nullptr) { + const Real dl0 = D0 * v1 * v2 * v3; + const Real dl1 = v0 * D1 * v2 * v3; + const Real dl2 = v0 * v1 * D2 * v3; + const Real dl3 = v0 * v1 * v2 * D3; + Real* g = gradients_out + node * 3u * output_stride; + g[0u * output_stride + q] = dl1 - dl0; + g[1u * output_stride + q] = dl2 - dl0; + g[2u * output_stride + q] = dl3 - dl0; + } + + if (hessians_out != nullptr) { + const Real DD0 = d2phi0[i0]; + const Real DD1 = d2phi1[i1]; + const Real DD2 = d2phi2[i2]; + const Real DD3 = d2phi3[i3]; + + const Real H00 = DD0 * v1 * v2 * v3; + const Real H11 = v0 * DD1 * v2 * v3; + const Real H22 = v0 * v1 * DD2 * v3; + const Real H33 = v0 * v1 * v2 * DD3; + + const Real H01 = D0 * D1 * v2 * v3; + const Real H02 = D0 * v1 * D2 * v3; + const Real H03 = D0 * v1 * v2 * D3; + const Real H12 = v0 * D1 * D2 * v3; + const Real H13 = v0 * D1 * v2 * D3; + const Real H23 = v0 * v1 * D2 * D3; + + const Real h01 = H00 - H01 - H02 + H12; + const Real h02 = H00 - H01 - H03 + H13; + const Real h12 = H00 - H02 - H03 + H23; + + Real* H = hessians_out + node * 9u * output_stride; + H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; + H[1u * output_stride + q] = h01; + H[2u * output_stride + q] = h02; + H[3u * output_stride + q] = h01; + H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; + H[5u * output_stride + q] = h12; + H[6u * output_stride + q] = h02; + H[7u * output_stride + q] = h12; + H[8u * output_stride + q] = H00 - Real(2) * H03 + H33; + } + } + } +} + +} // namespace detail +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.h b/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.h new file mode 100644 index 000000000..19cf725bd --- /dev/null +++ b/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.h @@ -0,0 +1,78 @@ +#ifndef SVMP_FE_BASIS_LAGRANGEBASISSIMPLEX_H +#define SVMP_FE_BASIS_LAGRANGEBASISSIMPLEX_H + +// Private declarations for simplex Lagrange evaluation helpers implemented in +// LagrangeBasisSimplex.cpp. + +#include "BasisFunction.h" + +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { +namespace detail { + +void evaluate_triangle_simplex_basis(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians); + +void evaluate_triangle_simplex_basis_to(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out); + +void evaluate_triangle_simplex_basis_strided( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out); + +void evaluate_triangle_simplex_basis_wedge_components_strided( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_xy_out, + Real* SVMP_RESTRICT hessians_xx_xy_yy_out); + +void evaluate_tetrahedron_simplex_basis(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + std::vector* values, + std::vector* gradients, + std::vector* hessians); + +void evaluate_tetrahedron_simplex_basis_to(const std::vector>& simplex_exponents, + int order, + const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out); + +void evaluate_tetrahedron_simplex_basis_strided( + const std::vector>& simplex_exponents, + int order, + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out); + +} // namespace detail +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_LAGRANGEBASISSIMPLEX_H diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisUtility.h b/Code/Source/solver/FE/Basis/LagrangeBasisUtility.h new file mode 100644 index 000000000..e622de1c6 --- /dev/null +++ b/Code/Source/solver/FE/Basis/LagrangeBasisUtility.h @@ -0,0 +1,25 @@ +#ifndef SVMP_FE_BASIS_LAGRANGEBASISUTILITY_H +#define SVMP_FE_BASIS_LAGRANGEBASISUTILITY_H + +// Private helper for LagrangeBasis internals. +// This header is only intended to be included after the FE basis scalar types +// are already available. + +namespace svmp { +namespace FE { +namespace basis { +namespace detail { + +inline constexpr Real equispaced_pm_one_coord(int i, int order) { + if (order <= 0) { + return Real(0); + } + return Real(-1) + Real(2) * static_cast(i) / static_cast(order); +} + +} // namespace detail +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_LAGRANGEBASISUTILITY_H diff --git a/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp b/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp new file mode 100644 index 000000000..20f743916 --- /dev/null +++ b/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp @@ -0,0 +1,818 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#include "NodeOrderingConventions.h" +#include "Basis/BasisExceptions.h" +#include "Basis/BasisTraits.h" + +#include + +namespace svmp { +namespace FE { +namespace basis { + +namespace { + +using Point = math::Vector; +using RawPoint = std::array; + +template +using NodeTable = std::array; + +struct NodeTableView { + const RawPoint* data{nullptr}; + std::size_t size{0}; +}; + +inline constexpr NodeTable<2> kLine2Nodes = {{ + {Real(-1), Real(0), Real(0)}, + {Real(1), Real(0), Real(0)}, +}}; + +inline constexpr NodeTable<3> kLine3Nodes = {{ + {Real(-1), Real(0), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(0), Real(0)}, +}}; + +inline constexpr NodeTable<3> kTriangle3Nodes = {{ + {Real(0), Real(0), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, +}}; + +inline constexpr NodeTable<6> kTriangle6Nodes = {{ + {Real(0), Real(0), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, + {Real(0.5), Real(0), Real(0)}, + {Real(0.5), Real(0.5), Real(0)}, + {Real(0), Real(0.5), Real(0)}, +}}; + +inline constexpr NodeTable<4> kQuad4Nodes = {{ + {Real(-1), Real(-1), Real(0)}, + {Real(1), Real(-1), Real(0)}, + {Real(1), Real(1), Real(0)}, + {Real(-1), Real(1), Real(0)}, +}}; + +inline constexpr NodeTable<9> kQuad9Nodes = {{ + {Real(-1), Real(-1), Real(0)}, + {Real(1), Real(-1), Real(0)}, + {Real(1), Real(1), Real(0)}, + {Real(-1), Real(1), Real(0)}, + {Real(0), Real(-1), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, + {Real(-1), Real(0), Real(0)}, + {Real(0), Real(0), Real(0)}, +}}; + +inline constexpr NodeTable<8> kQuad8Nodes = {{ + {Real(-1), Real(-1), Real(0)}, + {Real(1), Real(-1), Real(0)}, + {Real(1), Real(1), Real(0)}, + {Real(-1), Real(1), Real(0)}, + {Real(0), Real(-1), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, + {Real(-1), Real(0), Real(0)}, +}}; + +inline constexpr NodeTable<4> kTetra4Nodes = {{ + {Real(0), Real(0), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, + {Real(0), Real(0), Real(1)}, +}}; + +inline constexpr NodeTable<10> kTetra10Nodes = {{ + {Real(0), Real(0), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, + {Real(0), Real(0), Real(1)}, + {Real(0.5), Real(0), Real(0)}, + {Real(0.5), Real(0.5), Real(0)}, + {Real(0), Real(0.5), Real(0)}, + {Real(0), Real(0), Real(0.5)}, + {Real(0.5), Real(0), Real(0.5)}, + {Real(0), Real(0.5), Real(0.5)}, +}}; + +inline constexpr NodeTable<8> kHex8Nodes = {{ + {Real(-1), Real(-1), Real(-1)}, + {Real(1), Real(-1), Real(-1)}, + {Real(1), Real(1), Real(-1)}, + {Real(-1), Real(1), Real(-1)}, + {Real(-1), Real(-1), Real(1)}, + {Real(1), Real(-1), Real(1)}, + {Real(1), Real(1), Real(1)}, + {Real(-1), Real(1), Real(1)}, +}}; + +inline constexpr NodeTable<27> kHex27Nodes = {{ + {Real(-1), Real(-1), Real(-1)}, + {Real(1), Real(-1), Real(-1)}, + {Real(1), Real(1), Real(-1)}, + {Real(-1), Real(1), Real(-1)}, + {Real(-1), Real(-1), Real(1)}, + {Real(1), Real(-1), Real(1)}, + {Real(1), Real(1), Real(1)}, + {Real(-1), Real(1), Real(1)}, + {Real(0), Real(-1), Real(-1)}, + {Real(1), Real(0), Real(-1)}, + {Real(0), Real(1), Real(-1)}, + {Real(-1), Real(0), Real(-1)}, + {Real(0), Real(-1), Real(1)}, + {Real(1), Real(0), Real(1)}, + {Real(0), Real(1), Real(1)}, + {Real(-1), Real(0), Real(1)}, + {Real(-1), Real(-1), Real(0)}, + {Real(1), Real(-1), Real(0)}, + {Real(1), Real(1), Real(0)}, + {Real(-1), Real(1), Real(0)}, + {Real(0), Real(0), Real(-1)}, + {Real(0), Real(0), Real(1)}, + {Real(0), Real(-1), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, + {Real(-1), Real(0), Real(0)}, + {Real(0), Real(0), Real(0)}, +}}; + +inline constexpr NodeTable<20> kHex20Nodes = {{ + {Real(-1), Real(-1), Real(-1)}, + {Real(1), Real(-1), Real(-1)}, + {Real(1), Real(1), Real(-1)}, + {Real(-1), Real(1), Real(-1)}, + {Real(-1), Real(-1), Real(1)}, + {Real(1), Real(-1), Real(1)}, + {Real(1), Real(1), Real(1)}, + {Real(-1), Real(1), Real(1)}, + {Real(0), Real(-1), Real(-1)}, + {Real(1), Real(0), Real(-1)}, + {Real(0), Real(1), Real(-1)}, + {Real(-1), Real(0), Real(-1)}, + {Real(0), Real(-1), Real(1)}, + {Real(1), Real(0), Real(1)}, + {Real(0), Real(1), Real(1)}, + {Real(-1), Real(0), Real(1)}, + {Real(-1), Real(-1), Real(0)}, + {Real(1), Real(-1), Real(0)}, + {Real(1), Real(1), Real(0)}, + {Real(-1), Real(1), Real(0)}, +}}; + +// Mesh uses conventional Hex20 ordering: corners first, then edge midpoints in +// {bottom, top, vertical} groups. The quadratic Hex20 serendipity polynomial +// table uses an axis-grouped edge order. This maps public mesh/reference index +// to the internal polynomial-table index. +constexpr std::array kHex20MeshToBasisOrder = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 13, 10, 12, + 9, 15, 11, 14, + 16, 17, 19, 18 +}; + +inline constexpr NodeTable<6> kWedge6Nodes = {{ + {Real(0), Real(0), Real(-1)}, + {Real(1), Real(0), Real(-1)}, + {Real(0), Real(1), Real(-1)}, + {Real(0), Real(0), Real(1)}, + {Real(1), Real(0), Real(1)}, + {Real(0), Real(1), Real(1)}, +}}; + +inline constexpr NodeTable<18> kWedge18Nodes = {{ + {Real(0), Real(0), Real(-1)}, + {Real(1), Real(0), Real(-1)}, + {Real(0), Real(1), Real(-1)}, + {Real(0), Real(0), Real(1)}, + {Real(1), Real(0), Real(1)}, + {Real(0), Real(1), Real(1)}, + {Real(0.5), Real(0), Real(-1)}, + {Real(0.5), Real(0.5), Real(-1)}, + {Real(0), Real(0.5), Real(-1)}, + {Real(0.5), Real(0), Real(1)}, + {Real(0.5), Real(0.5), Real(1)}, + {Real(0), Real(0.5), Real(1)}, + {Real(0), Real(0), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, + {Real(0.5), Real(0), Real(0)}, + {Real(0.5), Real(0.5), Real(0)}, + {Real(0), Real(0.5), Real(0)}, +}}; + +inline constexpr NodeTable<15> kWedge15Nodes = {{ + {Real(0), Real(0), Real(-1)}, + {Real(1), Real(0), Real(-1)}, + {Real(0), Real(1), Real(-1)}, + {Real(0), Real(0), Real(1)}, + {Real(1), Real(0), Real(1)}, + {Real(0), Real(1), Real(1)}, + {Real(0.5), Real(0), Real(-1)}, + {Real(0.5), Real(0.5), Real(-1)}, + {Real(0), Real(0.5), Real(-1)}, + {Real(0.5), Real(0), Real(1)}, + {Real(0.5), Real(0.5), Real(1)}, + {Real(0), Real(0.5), Real(1)}, + {Real(0), Real(0), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, +}}; + +inline constexpr NodeTable<5> kPyramid5Nodes = {{ + {Real(-1), Real(-1), Real(0)}, + {Real(1), Real(-1), Real(0)}, + {Real(1), Real(1), Real(0)}, + {Real(-1), Real(1), Real(0)}, + {Real(0), Real(0), Real(1)}, +}}; + +inline constexpr NodeTable<14> kPyramid14Nodes = {{ + {Real(-1), Real(-1), Real(0)}, + {Real(1), Real(-1), Real(0)}, + {Real(1), Real(1), Real(0)}, + {Real(-1), Real(1), Real(0)}, + {Real(0), Real(0), Real(1)}, + {Real(0), Real(-1), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, + {Real(-1), Real(0), Real(0)}, + {Real(-0.5), Real(-0.5), Real(0.5)}, + {Real(0.5), Real(-0.5), Real(0.5)}, + {Real(0.5), Real(0.5), Real(0.5)}, + {Real(-0.5), Real(0.5), Real(0.5)}, + {Real(0), Real(0), Real(0)}, +}}; + +inline constexpr NodeTable<13> kPyramid13Nodes = {{ + {Real(-1), Real(-1), Real(0)}, + {Real(1), Real(-1), Real(0)}, + {Real(1), Real(1), Real(0)}, + {Real(-1), Real(1), Real(0)}, + {Real(0), Real(0), Real(1)}, + {Real(0), Real(-1), Real(0)}, + {Real(1), Real(0), Real(0)}, + {Real(0), Real(1), Real(0)}, + {Real(-1), Real(0), Real(0)}, + {Real(-0.5), Real(-0.5), Real(0.5)}, + {Real(0.5), Real(-0.5), Real(0.5)}, + {Real(0.5), Real(0.5), Real(0.5)}, + {Real(-0.5), Real(0.5), Real(0.5)}, +}}; + +template +constexpr NodeTableView view(const NodeTable& table) noexcept { + return NodeTableView{table.data(), table.size()}; +} + +Point to_point(const RawPoint& raw) { + return Point{raw[0], raw[1], raw[2]}; +} + +constexpr NodeTableView fixed_node_table(ElementType elem_type) noexcept { + switch (elem_type) { + case ElementType::Line2: return view(kLine2Nodes); + case ElementType::Line3: return view(kLine3Nodes); + case ElementType::Triangle3: return view(kTriangle3Nodes); + case ElementType::Triangle6: return view(kTriangle6Nodes); + case ElementType::Quad4: return view(kQuad4Nodes); + case ElementType::Quad8: return view(kQuad8Nodes); + case ElementType::Quad9: return view(kQuad9Nodes); + case ElementType::Tetra4: return view(kTetra4Nodes); + case ElementType::Tetra10: return view(kTetra10Nodes); + case ElementType::Hex8: return view(kHex8Nodes); + case ElementType::Hex20: return view(kHex20Nodes); + case ElementType::Hex27: return view(kHex27Nodes); + case ElementType::Wedge6: return view(kWedge6Nodes); + case ElementType::Wedge15: return view(kWedge15Nodes); + case ElementType::Wedge18: return view(kWedge18Nodes); + case ElementType::Pyramid5: return view(kPyramid5Nodes); + case ElementType::Pyramid13: return view(kPyramid13Nodes); + case ElementType::Pyramid14: return view(kPyramid14Nodes); + default: return {}; + } +} + +constexpr NodeTableView fixed_complete_lagrange_table(ElementType canonical_type, + int order) noexcept { + switch (canonical_type) { + case ElementType::Line2: + return order == 1 ? view(kLine2Nodes) : + order == 2 ? view(kLine3Nodes) : NodeTableView{}; + case ElementType::Triangle3: + return order == 1 ? view(kTriangle3Nodes) : + order == 2 ? view(kTriangle6Nodes) : NodeTableView{}; + case ElementType::Quad4: + return order == 1 ? view(kQuad4Nodes) : + order == 2 ? view(kQuad9Nodes) : NodeTableView{}; + case ElementType::Tetra4: + return order == 1 ? view(kTetra4Nodes) : + order == 2 ? view(kTetra10Nodes) : NodeTableView{}; + case ElementType::Hex8: + return order == 1 ? view(kHex8Nodes) : + order == 2 ? view(kHex27Nodes) : NodeTableView{}; + case ElementType::Wedge6: + return order == 1 ? view(kWedge6Nodes) : + order == 2 ? view(kWedge18Nodes) : NodeTableView{}; + case ElementType::Pyramid5: + return order == 1 ? view(kPyramid5Nodes) : + order == 2 ? view(kPyramid14Nodes) : NodeTableView{}; + default: + return {}; + } +} + +Real line_coord_pm_one(int i, int order) { + if (order <= 0) { + return Real(0); + } + return Real(-1) + Real(2) * static_cast(i) / static_cast(order); +} + +Real line_coord_zero_one(int i, int order) { + if (order <= 0) { + return Real(0); + } + return static_cast(i) / static_cast(order); +} + +void append_triangle_face_interior(std::vector& nodes, + const Point& v0, + const Point& v1, + const Point& v2, + int order) { + for (int c = 1; c <= order - 2; ++c) { + for (int b = 1; b <= order - c - 1; ++b) { + const int a = order - b - c; + const Real la = static_cast(a) / static_cast(order); + const Real lb = static_cast(b) / static_cast(order); + const Real lc = static_cast(c) / static_cast(order); + nodes.push_back(v0 * la + v1 * lb + v2 * lc); + } + } +} + +std::vector generate_line_nodes(int order) { + if (order == 0) { + return {Point{Real(0), Real(0), Real(0)}}; + } + + std::vector nodes; + nodes.reserve(static_cast(order + 1)); + nodes.push_back(Point{Real(-1), Real(0), Real(0)}); + nodes.push_back(Point{Real(1), Real(0), Real(0)}); + for (int i = 1; i < order; ++i) { + nodes.push_back(Point{line_coord_pm_one(i, order), Real(0), Real(0)}); + } + return nodes; +} + +std::vector generate_triangle_nodes(int order) { + if (order == 0) { + return {Point{Real(1) / Real(3), Real(1) / Real(3), Real(0)}}; + } + + std::vector nodes; + nodes.reserve(static_cast((order + 1) * (order + 2) / 2)); + + nodes.push_back(Point{Real(0), Real(0), Real(0)}); + nodes.push_back(Point{Real(1), Real(0), Real(0)}); + nodes.push_back(Point{Real(0), Real(1), Real(0)}); + + for (int m = 1; m < order; ++m) { + nodes.push_back(Point{line_coord_zero_one(m, order), Real(0), Real(0)}); + } + for (int m = 1; m < order; ++m) { + nodes.push_back(Point{line_coord_zero_one(order - m, order), + line_coord_zero_one(m, order), Real(0)}); + } + for (int m = 1; m < order; ++m) { + nodes.push_back(Point{Real(0), line_coord_zero_one(order - m, order), Real(0)}); + } + + append_triangle_face_interior( + nodes, + Point{Real(0), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + order); + + return nodes; +} + +std::vector generate_quad_nodes(int order) { + if (order == 0) { + return {Point{Real(0), Real(0), Real(0)}}; + } + + std::vector nodes; + nodes.reserve(static_cast((order + 1) * (order + 1))); + + nodes.push_back(Point{Real(-1), Real(-1), Real(0)}); + nodes.push_back(Point{Real(1), Real(-1), Real(0)}); + nodes.push_back(Point{Real(1), Real(1), Real(0)}); + nodes.push_back(Point{Real(-1), Real(1), Real(0)}); + + for (int i = 1; i < order; ++i) { + nodes.push_back(Point{line_coord_pm_one(i, order), Real(-1), Real(0)}); + } + for (int j = 1; j < order; ++j) { + nodes.push_back(Point{Real(1), line_coord_pm_one(j, order), Real(0)}); + } + for (int i = order - 1; i >= 1; --i) { + nodes.push_back(Point{line_coord_pm_one(i, order), Real(1), Real(0)}); + } + for (int j = order - 1; j >= 1; --j) { + nodes.push_back(Point{Real(-1), line_coord_pm_one(j, order), Real(0)}); + } + + for (int j = 1; j < order; ++j) { + for (int i = 1; i < order; ++i) { + nodes.push_back(Point{line_coord_pm_one(i, order), line_coord_pm_one(j, order), Real(0)}); + } + } + + return nodes; +} + +std::vector generate_tetra_nodes(int order) { + if (order == 0) { + return {Point{Real(0.25), Real(0.25), Real(0.25)}}; + } + + std::vector nodes; + nodes.reserve(static_cast((order + 1) * (order + 2) * (order + 3) / 6)); + + const Point verts[] = { + Point{Real(0), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(0), Real(0), Real(1)}, + }; + for (const auto& v : verts) { + nodes.push_back(v); + } + + const int edges[6][2] = { + {0, 1}, {1, 2}, {2, 0}, {0, 3}, {1, 3}, {2, 3} + }; + for (const auto& edge : edges) { + for (int m = 1; m < order; ++m) { + const Real t = static_cast(m) / static_cast(order); + nodes.push_back(verts[edge[0]] * (Real(1) - t) + verts[edge[1]] * t); + } + } + + const int faces[4][3] = { + {0, 1, 2}, + {0, 1, 3}, + {1, 2, 3}, + {0, 2, 3}, + }; + for (const auto& face : faces) { + append_triangle_face_interior( + nodes, + verts[face[0]], + verts[face[1]], + verts[face[2]], + order); + } + + for (int l = 1; l <= order - 3; ++l) { + for (int k = 1; k <= order - l - 2; ++k) { + for (int j = 1; j <= order - l - k - 1; ++j) { + const Real x = static_cast(j) / static_cast(order); + const Real y = static_cast(k) / static_cast(order); + const Real z = static_cast(l) / static_cast(order); + nodes.push_back(Point{x, y, z}); + } + } + } + + return nodes; +} + +std::vector generate_hex_nodes(int order) { + if (order == 0) { + return {Point{Real(0), Real(0), Real(0)}}; + } + + std::vector nodes; + nodes.reserve(static_cast((order + 1) * (order + 1) * (order + 1))); + + const Point verts[] = { + Point{Real(-1), Real(-1), Real(-1)}, + Point{Real(1), Real(-1), Real(-1)}, + Point{Real(1), Real(1), Real(-1)}, + Point{Real(-1), Real(1), Real(-1)}, + Point{Real(-1), Real(-1), Real(1)}, + Point{Real(1), Real(-1), Real(1)}, + Point{Real(1), Real(1), Real(1)}, + Point{Real(-1), Real(1), Real(1)}, + }; + for (const auto& v : verts) { + nodes.push_back(v); + } + + const int edges[12][2] = { + {0, 1}, {1, 2}, {2, 3}, {3, 0}, + {4, 5}, {5, 6}, {6, 7}, {7, 4}, + {0, 4}, {1, 5}, {2, 6}, {3, 7}, + }; + for (const auto& edge : edges) { + for (int m = 1; m < order; ++m) { + const Real t = static_cast(m) / static_cast(order); + nodes.push_back(verts[edge[0]] * (Real(1) - t) + verts[edge[1]] * t); + } + } + + for (int j = 1; j < order; ++j) { + for (int i = 1; i < order; ++i) { + nodes.push_back(Point{line_coord_pm_one(i, order), line_coord_pm_one(j, order), Real(-1)}); + } + } + for (int j = 1; j < order; ++j) { + for (int i = 1; i < order; ++i) { + nodes.push_back(Point{line_coord_pm_one(i, order), line_coord_pm_one(j, order), Real(1)}); + } + } + for (int k = 1; k < order; ++k) { + for (int i = 1; i < order; ++i) { + nodes.push_back(Point{line_coord_pm_one(i, order), Real(-1), line_coord_pm_one(k, order)}); + } + } + for (int k = 1; k < order; ++k) { + for (int j = 1; j < order; ++j) { + nodes.push_back(Point{Real(1), line_coord_pm_one(j, order), line_coord_pm_one(k, order)}); + } + } + for (int k = 1; k < order; ++k) { + for (int i = order - 1; i >= 1; --i) { + nodes.push_back(Point{line_coord_pm_one(i, order), Real(1), line_coord_pm_one(k, order)}); + } + } + for (int k = 1; k < order; ++k) { + for (int j = order - 1; j >= 1; --j) { + nodes.push_back(Point{Real(-1), line_coord_pm_one(j, order), line_coord_pm_one(k, order)}); + } + } + + for (int k = 1; k < order; ++k) { + for (int j = 1; j < order; ++j) { + for (int i = 1; i < order; ++i) { + nodes.push_back(Point{line_coord_pm_one(i, order), + line_coord_pm_one(j, order), + line_coord_pm_one(k, order)}); + } + } + } + + return nodes; +} + +std::vector generate_wedge_nodes(int order) { + if (order == 0) { + return {Point{Real(1) / Real(3), Real(1) / Real(3), Real(0)}}; + } + + std::vector nodes; + nodes.reserve(static_cast((order + 1) * (order + 1) * (order + 2) / 2)); + + const Point verts[] = { + Point{Real(0), Real(0), Real(-1)}, + Point{Real(1), Real(0), Real(-1)}, + Point{Real(0), Real(1), Real(-1)}, + Point{Real(0), Real(0), Real(1)}, + Point{Real(1), Real(0), Real(1)}, + Point{Real(0), Real(1), Real(1)}, + }; + for (const auto& v : verts) { + nodes.push_back(v); + } + + const int edges[9][2] = { + {0, 1}, {1, 2}, {2, 0}, + {3, 4}, {4, 5}, {5, 3}, + {0, 3}, {1, 4}, {2, 5}, + }; + for (const auto& edge : edges) { + for (int m = 1; m < order; ++m) { + const Real t = static_cast(m) / static_cast(order); + nodes.push_back(verts[edge[0]] * (Real(1) - t) + verts[edge[1]] * t); + } + } + + append_triangle_face_interior( + nodes, verts[0], verts[1], verts[2], order); + append_triangle_face_interior( + nodes, verts[3], verts[4], verts[5], order); + + for (int r = 1; r < order; ++r) { + const Real z = line_coord_pm_one(r, order); + for (int m = 1; m < order; ++m) { + const Real t = static_cast(m) / static_cast(order); + nodes.push_back(Point{t, Real(0), z}); + } + for (int m = 1; m < order; ++m) { + const Real t = static_cast(m) / static_cast(order); + nodes.push_back(Point{Real(1) - t, t, z}); + } + for (int m = 1; m < order; ++m) { + const Real t = static_cast(m) / static_cast(order); + nodes.push_back(Point{Real(0), Real(1) - t, z}); + } + } + + for (int r = 1; r < order; ++r) { + const Real z = line_coord_pm_one(r, order); + for (int c = 1; c <= order - 2; ++c) { + for (int b = 1; b <= order - c - 1; ++b) { + const Real x = static_cast(b) / static_cast(order); + const Real y = static_cast(c) / static_cast(order); + nodes.push_back(Point{x, y, z}); + } + } + } + + return nodes; +} + +std::vector generate_pyramid_nodes(int order) { + if (order == 0) { + return {Point{Real(0), Real(0), Real(0.25)}}; + } + + std::vector nodes; + nodes.reserve(static_cast((order + 1) * (order + 2) * (2 * order + 3) / 6)); + + nodes.push_back(Point{Real(-1), Real(-1), Real(0)}); + nodes.push_back(Point{Real(1), Real(-1), Real(0)}); + nodes.push_back(Point{Real(1), Real(1), Real(0)}); + nodes.push_back(Point{Real(-1), Real(1), Real(0)}); + nodes.push_back(Point{Real(0), Real(0), Real(1)}); + + for (int m = 1; m < order; ++m) { + nodes.push_back(Point{line_coord_pm_one(m, order), Real(-1), Real(0)}); + } + for (int m = 1; m < order; ++m) { + nodes.push_back(Point{Real(1), line_coord_pm_one(m, order), Real(0)}); + } + for (int m = order - 1; m >= 1; --m) { + nodes.push_back(Point{line_coord_pm_one(m, order), Real(1), Real(0)}); + } + for (int m = order - 1; m >= 1; --m) { + nodes.push_back(Point{Real(-1), line_coord_pm_one(m, order), Real(0)}); + } + + for (int level = 1; level < order; ++level) { + const Real z = static_cast(level) / static_cast(order); + const Real scale = Real(1) - z; + nodes.push_back(Point{-scale, -scale, z}); + nodes.push_back(Point{scale, -scale, z}); + nodes.push_back(Point{scale, scale, z}); + nodes.push_back(Point{-scale, scale, z}); + } + + for (int j = 1; j < order; ++j) { + for (int i = 1; i < order; ++i) { + nodes.push_back(Point{line_coord_pm_one(i, order), line_coord_pm_one(j, order), Real(0)}); + } + } + + for (int level = 1; level < order - 1; ++level) { + const int n = order - level; + const Real z = static_cast(level) / static_cast(order); + const Real scale = Real(1) - z; + + for (int m = 1; m < n; ++m) { + const Real s = line_coord_pm_one(m, n) * scale; + nodes.push_back(Point{s, -scale, z}); + } + for (int m = 1; m < n; ++m) { + const Real s = line_coord_pm_one(m, n) * scale; + nodes.push_back(Point{scale, s, z}); + } + for (int m = n - 1; m >= 1; --m) { + const Real s = line_coord_pm_one(m, n) * scale; + nodes.push_back(Point{s, scale, z}); + } + for (int m = n - 1; m >= 1; --m) { + const Real s = line_coord_pm_one(m, n) * scale; + nodes.push_back(Point{-scale, s, z}); + } + } + + for (int level = 1; level < order - 1; ++level) { + const int n = order - level; + const Real z = static_cast(level) / static_cast(order); + const Real scale = Real(1) - z; + for (int j = 1; j < n; ++j) { + for (int i = 1; i < n; ++i) { + nodes.push_back(Point{line_coord_pm_one(i, n) * scale, + line_coord_pm_one(j, n) * scale, + z}); + } + } + } + + return nodes; +} + +} // namespace + +math::Vector ReferenceNodeLayout::get_node_coords(ElementType elem_type, + std::size_t local_node) { + const auto table = fixed_node_table(elem_type); + if (table.data != nullptr && local_node < table.size) { + return to_point(table.data[local_node]); + } + + throw BasisNodeOrderingException("Invalid element type or node index in ReferenceNodeLayout::get_node_coords", + __FILE__, __LINE__, __func__); +} + +std::size_t ReferenceNodeLayout::num_nodes(ElementType elem_type) { + const auto table = fixed_node_table(elem_type); + if (table.data != nullptr) { + return table.size; + } + + throw BasisNodeOrderingException("Unknown element type in ReferenceNodeLayout::num_nodes", + __FILE__, __LINE__, __func__); +} + +std::vector> +ReferenceNodeLayout::get_lagrange_node_coords(ElementType canonical_type, int order) { + if (order < 0) { + throw BasisNodeOrderingException("ReferenceNodeLayout::get_lagrange_node_coords requires non-negative order", + __FILE__, __LINE__, __func__); + } + + const ElementType type = canonical_lagrange_type(canonical_type); + const auto fixed_table = fixed_complete_lagrange_table(type, order); + if (fixed_table.data != nullptr) { + std::vector nodes; + nodes.reserve(fixed_table.size); + for (std::size_t i = 0; i < fixed_table.size; ++i) { + nodes.push_back(to_point(fixed_table.data[i])); + } + return nodes; + } + + switch (type) { + case ElementType::Point1: + return {Point{Real(0), Real(0), Real(0)}}; + case ElementType::Line2: + return generate_line_nodes(order); + case ElementType::Triangle3: + return generate_triangle_nodes(order); + case ElementType::Quad4: + return generate_quad_nodes(order); + case ElementType::Tetra4: + return generate_tetra_nodes(order); + case ElementType::Hex8: + return generate_hex_nodes(order); + case ElementType::Wedge6: + return generate_wedge_nodes(order); + case ElementType::Pyramid5: + return generate_pyramid_nodes(order); + case ElementType::Quad8: + case ElementType::Hex20: + case ElementType::Wedge15: + case ElementType::Pyramid13: + throw BasisNodeOrderingException("ReferenceNodeLayout::get_lagrange_node_coords does not support serendipity topologies", + __FILE__, __LINE__, __func__); + default: + throw BasisNodeOrderingException("ReferenceNodeLayout::get_lagrange_node_coords: unsupported topology", + __FILE__, __LINE__, __func__); + } +} + +std::span ReferenceNodeLayout::mesh_to_basis_ordering(ElementType elem_type) { + if (elem_type == ElementType::Hex20) { + return std::span( + kHex20MeshToBasisOrder.data(), + kHex20MeshToBasisOrder.size()); + } + return {}; +} + +bool ReferenceNodeLayout::is_simplex(ElementType elem_type) { + return svmp::FE::basis::is_simplex(elem_type); +} + +bool ReferenceNodeLayout::is_tensor_product(ElementType elem_type) { + return svmp::FE::basis::is_tensor_product(elem_type); +} + +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/NodeOrderingConventions.h b/Code/Source/solver/FE/Basis/NodeOrderingConventions.h new file mode 100644 index 000000000..52af4d932 --- /dev/null +++ b/Code/Source/solver/FE/Basis/NodeOrderingConventions.h @@ -0,0 +1,538 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_NODEORDERINGCONVENTIONS_H +#define SVMP_FE_BASIS_NODEORDERINGCONVENTIONS_H + +#include "Types.h" +#include "Math/Vector.h" +#include +#include + +/** + * @file NodeOrderingConventions.h + * @brief Documentation of node ordering conventions for all element types + * + * This file provides comprehensive documentation of the node ordering + * conventions used throughout the FE library. These orderings are consistent + * with VTK conventions and must be matched exactly when interfacing with + * the Mesh library. + * + * IMPORTANT: The FE library (Basis, Quadrature, Geometry) uses "node" to refer + * to degrees of freedom locations on reference elements. The Mesh library uses + * "vertex" for geometry vertices and "cell" for mesh elements. When interfacing + * between the two, ensure consistent ordering. + * + * Reference Element Conventions: + * - Line: xi in [-1, 1] + * - Quad: (xi, eta) in [-1, 1] x [-1, 1] + * - Hex: (xi, eta, zeta) in [-1, 1]^3 + * - Triangle: (xi, eta) in simplex with vertices (0,0), (1,0), (0,1) + * - Tetrahedron: (xi, eta, zeta) in simplex with vertices + * (0,0,0), (1,0,0), (0,1,0), (0,0,1) + * - Wedge: Triangle base x line height, zeta in [-1, 1] + * - Pyramid: Quad base at z=0, apex at (0, 0, 1) + * + * + * ============================================================================= + * 1D ELEMENTS + * ============================================================================= + * + * Line2 (Linear Line) + * ------------------- + * 0---------1 + * | | + * xi=-1 xi=+1 + * + * Node 0: xi = -1 + * Node 1: xi = +1 + * + * + * Line3 (Quadratic Line) + * ---------------------- + * 0----2----1 + * | | | + * xi=-1 0 xi=+1 + * + * Node 0: xi = -1 + * Node 1: xi = +1 + * Node 2: xi = 0 (mid-edge) + * + * + * ============================================================================= + * 2D QUADRILATERAL ELEMENTS + * ============================================================================= + * + * Quad4 (Bilinear Quadrilateral) + * ------------------------------ + * + * 3-----------2 + * | | + * | | + * | | + * 0-----------1 + * + * Node 0: (xi, eta) = (-1, -1) + * Node 1: (xi, eta) = (+1, -1) + * Node 2: (xi, eta) = (+1, +1) + * Node 3: (xi, eta) = (-1, +1) + * + * + * Quad8 (Serendipity Quadrilateral) + * --------------------------------- + * + * 3-----6-----2 + * | | + * 7 5 + * | | + * 0-----4-----1 + * + * Corners (same as Quad4): + * Node 0: (-1, -1) + * Node 1: (+1, -1) + * Node 2: (+1, +1) + * Node 3: (-1, +1) + * + * Mid-edge nodes: + * Node 4: ( 0, -1) (edge 0-1) + * Node 5: (+1, 0) (edge 1-2) + * Node 6: ( 0, +1) (edge 2-3) + * Node 7: (-1, 0) (edge 3-0) + * + * + * Quad9 (Biquadratic Quadrilateral) + * --------------------------------- + * + * 3-----6-----2 + * | | + * 7 8 5 + * | | + * 0-----4-----1 + * + * Same as Quad8 plus: + * Node 8: (0, 0) (center) + * + * + * ============================================================================= + * 3D HEXAHEDRAL ELEMENTS + * ============================================================================= + * + * Hex8 (Trilinear Hexahedron) + * --------------------------- + * + * 7-----------6 + * /| /| + * / | / | + * 4-----------5 | + * | | | | + * | 3--------|--2 + * | / | / + * |/ |/ + * 0-----------1 + * + * Bottom face (zeta = -1): + * Node 0: (xi, eta, zeta) = (-1, -1, -1) + * Node 1: (xi, eta, zeta) = (+1, -1, -1) + * Node 2: (xi, eta, zeta) = (+1, +1, -1) + * Node 3: (xi, eta, zeta) = (-1, +1, -1) + * + * Top face (zeta = +1): + * Node 4: (xi, eta, zeta) = (-1, -1, +1) + * Node 5: (xi, eta, zeta) = (+1, -1, +1) + * Node 6: (xi, eta, zeta) = (+1, +1, +1) + * Node 7: (xi, eta, zeta) = (-1, +1, +1) + * + * + * Hex20 (Serendipity Hexahedron) + * ------------------------------ + * + * 7-----14-----6 + * /| /| + * 15 | 13 | + * / 19 / 18 + * 4-----12-----5 | + * | | | | + * | 3-----10-|---2 + * 16 / 17 / + * | 11 | 9 + * |/ |/ + * 0------8-----1 + * + * Corners (same as Hex8): Nodes 0-7 + * + * Mid-edge nodes on bottom face (zeta = -1): + * Node 8: ( 0, -1, -1) (edge 0-1) + * Node 9: (+1, 0, -1) (edge 1-2) + * Node 10: ( 0, +1, -1) (edge 2-3) + * Node 11: (-1, 0, -1) (edge 3-0) + * + * Mid-edge nodes on top face (zeta = +1): + * Node 12: ( 0, -1, +1) (edge 4-5) + * Node 13: (+1, 0, +1) (edge 5-6) + * Node 14: ( 0, +1, +1) (edge 6-7) + * Node 15: (-1, 0, +1) (edge 7-4) + * + * Mid-edge nodes on vertical edges: + * Node 16: (-1, -1, 0) (edge 0-4) + * Node 17: (+1, -1, 0) (edge 1-5) + * Node 18: (+1, +1, 0) (edge 2-6) + * Node 19: (-1, +1, 0) (edge 3-7) + * + * + * Hex27 (Triquadratic Hexahedron) + * ------------------------------- + * Same as Hex20 plus face-center and body-center nodes: + * + * Face centers: + * Node 20: ( 0, 0, -1) (bottom face) + * Node 21: ( 0, 0, +1) (top face) + * Node 22: ( 0, -1, 0) (front face) + * Node 23: (+1, 0, 0) (right face) + * Node 24: ( 0, +1, 0) (back face) + * Node 25: (-1, 0, 0) (left face) + * + * Body center: + * Node 26: (0, 0, 0) + * + * + * ============================================================================= + * 2D TRIANGULAR ELEMENTS + * ============================================================================= + * + * Triangle3 (Linear Triangle) + * --------------------------- + * + * 2 + * |\ + * | \ + * | \ + * | \ + * 0----1 + * + * Reference: (xi, eta) simplex with vertices at: + * Node 0: (xi, eta) = (0, 0) + * Node 1: (xi, eta) = (1, 0) + * Node 2: (xi, eta) = (0, 1) + * + * + * Triangle6 (Quadratic Triangle) + * ------------------------------ + * + * 2 + * |\ + * | \ + * 5 4 + * | \ + * 0--3--1 + * + * Corners: Nodes 0-2 (same as Triangle3) + * + * Mid-edge nodes: + * Node 3: (0.5, 0) (edge 0-1) + * Node 4: (0.5, 0.5) (edge 1-2) + * Node 5: ( 0, 0.5) (edge 2-0) + * + * + * ============================================================================= + * 3D TETRAHEDRAL ELEMENTS + * ============================================================================= + * + * Tetrahedron4 (Linear Tetrahedron) + * --------------------------------- + * + * 3 + * /|\ + * / | \ + * / | \ + * / | \ + * / | \ + * 0-----|-----2 + * \ | / + * \ | / + * \ | / + * \ | / + * \|/ + * 1 + * + * Reference: (xi, eta, zeta) simplex with vertices at: + * Node 0: (0, 0, 0) + * Node 1: (1, 0, 0) + * Node 2: (0, 1, 0) + * Node 3: (0, 0, 1) + * + * + * Tetrahedron10 (Quadratic Tetrahedron) + * ------------------------------------- + * Corners: Nodes 0-3 (same as Tet4) + * + * Mid-edge nodes: + * Node 4: (0.5, 0, 0) (edge 0-1) + * Node 5: (0.5, 0.5, 0) (edge 1-2) + * Node 6: ( 0, 0.5, 0) (edge 2-0) + * Node 7: ( 0, 0, 0.5) (edge 0-3) + * Node 8: (0.5, 0, 0.5) (edge 1-3) + * Node 9: ( 0, 0.5, 0.5) (edge 2-3) + * + * + * ============================================================================= + * 3D WEDGE (PRISM) ELEMENTS + * ============================================================================= + * + * Wedge6 (Linear Wedge) + * --------------------- + * + * 5 + * /|\ + * / | \ + * / | \ + * 3---|---4 + * | 2 | + * | / \ | + * | / \ | + * |/ \| + * 0-------1 + * + * Reference: Triangle base at zeta = -1, top at zeta = +1 + * + * Bottom face (zeta = -1): + * Node 0: (0, 0, -1) + * Node 1: (1, 0, -1) + * Node 2: (0, 1, -1) + * + * Top face (zeta = +1): + * Node 3: (0, 0, +1) + * Node 4: (1, 0, +1) + * Node 5: (0, 1, +1) + * + * + * Wedge15 (Quadratic Wedge) + * ------------------------- + * Corners: Nodes 0-5 (same as Wedge6) + * + * Mid-edge nodes on bottom face: + * Node 6: (0.5, 0, -1) (edge 0-1) + * Node 7: (0.5, 0.5, -1) (edge 1-2) + * Node 8: ( 0, 0.5, -1) (edge 2-0) + * + * Mid-edge nodes on top face: + * Node 9: (0.5, 0, +1) (edge 3-4) + * Node 10: (0.5, 0.5, +1) (edge 4-5) + * Node 11: ( 0, 0.5, +1) (edge 5-3) + * + * Mid-edge nodes on vertical edges: + * Node 12: (0, 0, 0) (edge 0-3) + * Node 13: (1, 0, 0) (edge 1-4) + * Node 14: (0, 1, 0) (edge 2-5) + * + * + * Wedge18 (Complete Quadratic Wedge) + * ---------------------------------- + * Corners and mid-edges: Nodes 0-14 (same as Wedge15) + * + * Face-center nodes on quadrilateral faces: + * Node 15: (0.5, 0.0, 0.0) (face with vertices 0-1-4-3, y = 0) + * Node 16: (0.5, 0.5, 0.0) (face with vertices 1-2-5-4, x + y = 1) + * Node 17: (0.0, 0.5, 0.0) (face with vertices 2-0-3-5, x = 0) + * + * + * ============================================================================= + * 3D PYRAMID ELEMENTS + * ============================================================================= + * + * Pyramid5 (Linear Pyramid) + * ------------------------- + * + * 4 + * /|\ + * / | \ + * / | \ + * / | \ + * 3----|----2 + * | | | + * | + | (apex projects to center of base) + * | | + * 0---------1 + * + * Reference: Quad base in xi-eta plane at zeta = 0, apex at zeta = 1 + * + * Base (zeta = 0): + * Node 0: (-1, -1, 0) + * Node 1: (+1, -1, 0) + * Node 2: (+1, +1, 0) + * Node 3: (-1, +1, 0) + * + * Apex: + * Node 4: (0, 0, 1) + * + * + * Pyramid13 (Quadratic Pyramid) + * ----------------------------- + * Corners: Nodes 0-4 (same as Pyramid5) + * + * Mid-edge nodes on base: + * Node 5: ( 0, -1, 0) (edge 0-1) + * Node 6: (+1, 0, 0) (edge 1-2) + * Node 7: ( 0, +1, 0) (edge 2-3) + * Node 8: (-1, 0, 0) (edge 3-0) + * + * Mid-edge nodes to apex: + * Node 9: (-0.5, -0.5, 0.5) (edge 0-4) + * Node 10: (+0.5, -0.5, 0.5) (edge 1-4) + * Node 11: (+0.5, +0.5, 0.5) (edge 2-4) + * Node 12: (-0.5, +0.5, 0.5) (edge 3-4) + * + * + * Pyramid14 (Quadratic Rational Pyramid) + * -------------------------------------- + * + * This retained low-order compatibility layout matches the generated + * complete-family quadratic Lagrange ordering for the reference pyramid with + * base (-1,-1,0)..(1,1,0) and apex at (0,0,1). Nodes 0-12 coincide with the + * Pyramid13 layout; node 13 is the base center. + * + * Base corners (same as Pyramid5): + * Node 0: (-1, -1, 0) + * Node 1: (+1, -1, 0) + * Node 2: (+1, +1, 0) + * Node 3: (-1, +1, 0) + * + * Apex: + * Node 4: (0, 0, 1) + * + * Base mid-edges (same as Pyramid13): + * Node 5: ( 0, -1, 0) (edge 0-1) + * Node 6: (+1, 0, 0) (edge 1-2) + * Node 7: ( 0, +1, 0) (edge 2-3) + * Node 8: (-1, 0, 0) (edge 3-0) + * + * Mid-edges to apex (same as Pyramid13): + * Node 9: (-0.5, -0.5, 0.5) (edge 0-4) + * Node 10: (+0.5, -0.5, 0.5) (edge 1-4) + * Node 11: (+0.5, +0.5, 0.5) (edge 2-4) + * Node 12: (-0.5, +0.5, 0.5) (edge 3-4) + * + * Base center: + * Node 13: (0, 0, 0) + * + * + * ============================================================================= + * NOTES ON VTK COMPATIBILITY + * ============================================================================= + * + * The node orderings above are consistent with VTK cell types: + * + * VTK_LINE (3) -> Line2 + * VTK_QUADRATIC_EDGE (21) -> Line3 + * VTK_TRIANGLE (5) -> Triangle3 + * VTK_QUADRATIC_TRIANGLE (22) -> Triangle6 + * VTK_QUAD (9) -> Quad4 + * VTK_QUADRATIC_QUAD (23) -> Quad8 + * VTK_BIQUADRATIC_QUAD (28) -> Quad9 + * VTK_TETRA (10) -> Tetrahedron4 + * VTK_QUADRATIC_TETRA (24) -> Tetrahedron10 + * VTK_HEXAHEDRON (12) -> Hex8 + * VTK_QUADRATIC_HEXAHEDRON (25) -> Hex20 + * VTK_TRIQUADRATIC_HEXAHEDRON (29) -> Hex27 + * VTK_WEDGE (13) -> Wedge6 + * VTK_QUADRATIC_WEDGE (26) -> Wedge15 + * VTK_BIQUADRATIC_QUADRATIC_WEDGE (32) -> Wedge18 + * VTK_PYRAMID (14) -> Pyramid5 + * VTK_QUADRATIC_PYRAMID (27) -> Pyramid13 + * + * + * ============================================================================= + * BARYCENTRIC COORDINATES + * ============================================================================= + * + * For simplex elements, barycentric coordinates (lambda_0, ..., lambda_n) + * satisfy sum(lambda_i) = 1. + * + * Triangle: + * lambda_0 = 1 - xi - eta + * lambda_1 = xi + * lambda_2 = eta + * + * Tetrahedron: + * lambda_0 = 1 - xi - eta - zeta + * lambda_1 = xi + * lambda_2 = eta + * lambda_3 = zeta + * + */ + +#include + +namespace svmp { +namespace FE { +namespace basis { + +/** + * @brief Basis-side reference node coordinate queries + * + * This is intentionally named differently from `svmp::NodeOrdering` in Mesh, + * which handles mesh-format permutations rather than reference basis layouts. + */ +class ReferenceNodeLayout { +public: + /** + * @brief Get reference coordinates for a node + * @param elem_type Element type + * @param local_node Local node index (0-based) + * + * Complete-family low-order Lagrange aliases (`Line2/3`, `Triangle3/6`, + * `Quad4/9`, `Tetra4/10`, `Hex8/27`, `Wedge6/18`, `Pyramid5/14`) are + * served by the generated arbitrary-order Lagrange ordering path. Explicit + * hard-coded tables remain only for serendipity-only enums such as + * `Quad8`, `Hex20`, `Wedge15`, and `Pyramid13`. + * + * @return Reference coordinates (xi, eta, zeta) + */ + static math::Vector get_node_coords(ElementType elem_type, std::size_t local_node); + + /** + * @brief Get number of nodes for an element type + * + * The low-order complete-family Lagrange aliases share the same generated + * ordering path used by `get_node_coords`. + */ + static std::size_t num_nodes(ElementType elem_type); + + /** + * @brief Generate complete-family Lagrange node coordinates for a canonical topology and order + * + * This covers arbitrary-order complete nodal Lagrange spaces on the + * canonical topologies `Line2`, `Triangle3`, `Quad4`, `Tetra4`, `Hex8`, + * `Wedge6`, and `Pyramid5`. Serendipity variants are intentionally + * excluded. + */ + static std::vector> + get_lagrange_node_coords(ElementType canonical_type, int order); + + /** + * @brief Optional mapping from mesh/reference node order to internal basis order + * + * Returns an empty span when the public node order is already the basis + * table order or no special mapping is registered. + */ + static std::span mesh_to_basis_ordering(ElementType elem_type); + + /** + * @brief Check if element is a simplex (triangle, tetrahedron) + */ + static bool is_simplex(ElementType elem_type); + + /** + * @brief Check if element uses tensor-product topology + */ + static bool is_tensor_product(ElementType elem_type); +}; + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_NODEORDERINGCONVENTIONS_H diff --git a/Code/Source/solver/FE/Basis/PyramidModalBasis.h b/Code/Source/solver/FE/Basis/PyramidModalBasis.h new file mode 100644 index 000000000..1ecdae282 --- /dev/null +++ b/Code/Source/solver/FE/Basis/PyramidModalBasis.h @@ -0,0 +1,265 @@ +#ifndef SVMP_FE_BASIS_PYRAMIDMODALBASIS_H +#define SVMP_FE_BASIS_PYRAMIDMODALBASIS_H + +// Shared rational/modal pyramid helpers for scalar complete-family and spectral +// pyramid bases. The degenerate z=1 top plane is evaluated by its apex limit; +// callers that reject non-apex top-plane queries must validate before calling. + +#include "BasisFunction.h" +#include "BasisTolerance.h" +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { +namespace pyramid_modal { + +struct Term { + int px{0}; + int py{0}; + int pz{0}; + int denom_power{0}; +}; + +struct EvaluationPoint { + Real x{Real(0)}; + Real y{Real(0)}; + Real z{Real(0)}; + Real t{Real(1)}; + bool top_plane{false}; + std::vector x_powers; + std::vector y_powers; + std::vector z_powers; + std::vector t_powers; +}; + +inline std::vector build_terms(int order) { + std::vector terms; + terms.reserve(static_cast((order + 1) * (order + 2) * + (2 * order + 3) / 6)); + for (int pz = 0; pz <= order; ++pz) { + const int n = order - pz; + for (int py = 0; py <= n; ++py) { + for (int px = 0; px <= n; ++px) { + terms.push_back(Term{px, py, pz, std::min(px, py)}); + } + } + } + return terms; +} + +inline bool on_degenerate_top_plane(const math::Vector& xi, + Real tolerance = detail::basis_scaled_tolerance()) { + return std::abs(Real(1) - xi[2]) <= tolerance; +} + +inline void fill_powers(Real base, int max_power, std::vector& powers) { + powers.assign(static_cast(max_power + 1), Real(1)); + for (int p = 1; p <= max_power; ++p) { + powers[static_cast(p)] = + powers[static_cast(p - 1)] * base; + } +} + +inline void prepare_evaluation_point(const math::Vector& xi, + int max_px, + int max_py, + int max_pz, + int max_denom_power, + EvaluationPoint& point) { + point.x = xi[0]; + point.y = xi[1]; + point.z = xi[2]; + point.t = Real(1) - point.z; + point.top_plane = on_degenerate_top_plane(xi); + + fill_powers(point.x, std::max(max_px, 0), point.x_powers); + fill_powers(point.y, std::max(max_py, 0), point.y_powers); + fill_powers(point.z, std::max(max_pz, 0), point.z_powers); + if (point.top_plane) [[unlikely]] { + point.t_powers.assign(1u, Real(1)); + } else { + fill_powers(point.t, std::max(max_denom_power + 2, 0), point.t_powers); + } +} + +inline void prepare_evaluation_point(const std::vector& terms, + const math::Vector& xi, + EvaluationPoint& point) { + int max_px = 0; + int max_py = 0; + int max_pz = 0; + int max_denom_power = 0; + for (const Term& term : terms) { + max_px = std::max(max_px, term.px); + max_py = std::max(max_py, term.py); + max_pz = std::max(max_pz, term.pz); + max_denom_power = std::max(max_denom_power, term.denom_power); + } + prepare_evaluation_point(xi, max_px, max_py, max_pz, max_denom_power, point); +} + +inline void evaluate_term(const Term& term, + const EvaluationPoint& point, + Real& value, + Gradient* gradient = nullptr, + Hessian* hessian = nullptr) { + const auto pow_x = [&](int p) -> Real { + return point.x_powers[static_cast(p)]; + }; + const auto pow_y = [&](int p) -> Real { + return point.y_powers[static_cast(p)]; + }; + const auto pow_z = [&](int p) -> Real { + return point.z_powers[static_cast(p)]; + }; + const auto pow_t = [&](int p) -> Real { + return point.t_powers[static_cast(p)]; + }; + + if (point.top_plane) [[unlikely]] { + if (term.px == 0 && term.py == 0) { + value = pow_z(term.pz); + } else { + value = Real(0); + } + if (gradient != nullptr) { + *gradient = Gradient{}; + if (term.px == 0 && term.py == 0 && term.pz > 0) { + (*gradient)[2] = static_cast(term.pz) * pow_z(term.pz - 1); + } + } + if (hessian != nullptr) { + *hessian = Hessian{}; + if (term.px == 0 && term.py == 0 && term.pz > 1) { + (*hessian)(2, 2) = + static_cast(term.pz * (term.pz - 1)) * + pow_z(term.pz - 2); + } + } + return; + } + + const Real base = pow_x(term.px) * pow_y(term.py) * pow_z(term.pz); + const Real denom = pow_t(term.denom_power); + value = base / denom; + + if (gradient != nullptr) { + *gradient = Gradient{}; + if (term.px > 0) { + (*gradient)[0] = + static_cast(term.px) * pow_x(term.px - 1) * + pow_y(term.py) * pow_z(term.pz) / denom; + } + if (term.py > 0) { + (*gradient)[1] = + static_cast(term.py) * pow_x(term.px) * + pow_y(term.py - 1) * pow_z(term.pz) / denom; + } + + Real gz = Real(0); + if (term.pz > 0) { + gz += static_cast(term.pz) * pow_x(term.px) * + pow_y(term.py) * pow_z(term.pz - 1) / denom; + } + if (term.denom_power > 0) { + gz += static_cast(term.denom_power) * base / pow_t(term.denom_power + 1); + } + (*gradient)[2] = gz; + } + + if (hessian == nullptr) { + return; + } + + *hessian = Hessian{}; + if (term.px > 1) { + (*hessian)(0, 0) = + static_cast(term.px * (term.px - 1)) * + pow_x(term.px - 2) * pow_y(term.py) * pow_z(term.pz) / denom; + } + if (term.py > 1) { + (*hessian)(1, 1) = + static_cast(term.py * (term.py - 1)) * + pow_x(term.px) * pow_y(term.py - 2) * pow_z(term.pz) / denom; + } + if (term.px > 0 && term.py > 0) { + const Real hxy = + static_cast(term.px * term.py) * + pow_x(term.px - 1) * pow_y(term.py - 1) * pow_z(term.pz) / denom; + (*hessian)(0, 1) = hxy; + (*hessian)(1, 0) = hxy; + } + + if (term.px > 0) { + Real hxz = + static_cast(term.px) * pow_x(term.px - 1) * + pow_y(term.py) / denom; + if (term.pz > 0) { + hxz *= static_cast(term.pz) * pow_z(term.pz - 1); + } else { + hxz = Real(0); + } + if (term.denom_power > 0) { + hxz += static_cast(term.px * term.denom_power) * + pow_x(term.px - 1) * pow_y(term.py) * + pow_z(term.pz) / pow_t(term.denom_power + 1); + } + (*hessian)(0, 2) = hxz; + (*hessian)(2, 0) = hxz; + } + + if (term.py > 0) { + Real hyz = + static_cast(term.py) * pow_x(term.px) * + pow_y(term.py - 1) / denom; + if (term.pz > 0) { + hyz *= static_cast(term.pz) * pow_z(term.pz - 1); + } else { + hyz = Real(0); + } + if (term.denom_power > 0) { + hyz += static_cast(term.py * term.denom_power) * + pow_x(term.px) * pow_y(term.py - 1) * + pow_z(term.pz) / pow_t(term.denom_power + 1); + } + (*hessian)(1, 2) = hyz; + (*hessian)(2, 1) = hyz; + } + + Real hzz = Real(0); + if (term.pz > 1) { + hzz += static_cast(term.pz * (term.pz - 1)) * + pow_x(term.px) * pow_y(term.py) * pow_z(term.pz - 2) / denom; + } + if (term.pz > 0 && term.denom_power > 0) { + hzz += static_cast(2 * term.pz * term.denom_power) * + pow_x(term.px) * pow_y(term.py) * + pow_z(term.pz - 1) / pow_t(term.denom_power + 1); + } + if (term.denom_power > 0) { + hzz += static_cast(term.denom_power * (term.denom_power + 1)) * + base / pow_t(term.denom_power + 2); + } + (*hessian)(2, 2) = hzz; +} + +inline void evaluate_term(const Term& term, + const math::Vector& xi, + Real& value, + Gradient* gradient = nullptr, + Hessian* hessian = nullptr) { + EvaluationPoint point; + prepare_evaluation_point( + xi, term.px, term.py, term.pz, term.denom_power, point); + evaluate_term(term, point, value, gradient, hessian); +} + +} // namespace pyramid_modal +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_PYRAMIDMODALBASIS_H diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp new file mode 100644 index 000000000..309fd18be --- /dev/null +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp @@ -0,0 +1,882 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#include "SerendipityBasis.h" +#include "LagrangeBasis.h" +#include "NodeOrderingConventions.h" +#include "Math/DenseLinearAlgebra.h" +#include "Math/IntegerMath.h" + +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +using math::pow_int; + +namespace { +using Vec3 = math::Vector; + +int quad_serendipity_superlinear_degree(int ax, int ay) { + return (ax > 1 ? ax : 0) + (ay > 1 ? ay : 0); +} + +std::vector> quad_serendipity_exponents(int order) { + std::vector> exponents; + for (int ay = 0; ay <= order; ++ay) { + for (int ax = 0; ax <= order; ++ax) { + if (quad_serendipity_superlinear_degree(ax, ay) <= order) { + exponents.push_back({ax, ay}); + } + } + } + return exponents; +} + +std::vector quad_serendipity_nodes(int order, std::size_t total_size) { + std::vector nodes; + if (order <= 0) { + return nodes; + } + + const Real inv_order = Real(1) / Real(order); + + nodes.push_back(Vec3{Real(-1), Real(-1), Real(0)}); + nodes.push_back(Vec3{Real(1), Real(-1), Real(0)}); + nodes.push_back(Vec3{Real(1), Real(1), Real(0)}); + nodes.push_back(Vec3{Real(-1), Real(1), Real(0)}); + + for (int i = 1; i < order; ++i) { + nodes.push_back(Vec3{Real(-1) + Real(2 * i) * inv_order, Real(-1), Real(0)}); + } + for (int i = 1; i < order; ++i) { + nodes.push_back(Vec3{Real(1), Real(-1) + Real(2 * i) * inv_order, Real(0)}); + } + for (int i = 1; i < order; ++i) { + nodes.push_back(Vec3{Real(1) - Real(2 * i) * inv_order, Real(1), Real(0)}); + } + for (int i = 1; i < order; ++i) { + nodes.push_back(Vec3{Real(-1), Real(1) - Real(2 * i) * inv_order, Real(0)}); + } + + if (nodes.size() > total_size) { + throw BasisConstructionException( + "SerendipityBasis: quadrilateral serendipity boundary nodes exceed requested size", + __FILE__, __LINE__, __func__); + } + + const std::size_t interior_count = total_size - nodes.size(); + if (interior_count == 0u) { + return nodes; + } + + std::vector interior_candidates; + interior_candidates.reserve(static_cast((order - 1) * (order - 1))); + for (int j = 1; j < order; ++j) { + for (int i = 1; i < order; ++i) { + interior_candidates.push_back( + Vec3{Real(-1) + Real(2 * i) * inv_order, + Real(-1) + Real(2 * j) * inv_order, + Real(0)}); + } + } + + std::sort(interior_candidates.begin(), interior_candidates.end(), + [](const Vec3& a, const Vec3& b) { + const Real a_linf = std::max(std::abs(a[0]), std::abs(a[1])); + const Real b_linf = std::max(std::abs(b[0]), std::abs(b[1])); + if (a_linf != b_linf) { + return a_linf < b_linf; + } + + const Real a_l1 = std::abs(a[0]) + std::abs(a[1]); + const Real b_l1 = std::abs(b[0]) + std::abs(b[1]); + if (a_l1 != b_l1) { + return a_l1 < b_l1; + } + + if (a[1] != b[1]) { + return a[1] < b[1]; + } + return a[0] < b[0]; + }); + + if (interior_count > interior_candidates.size()) { + throw BasisConstructionException( + "SerendipityBasis: insufficient quadrilateral interior nodes for requested serendipity order", + __FILE__, __LINE__, __func__); + } + + nodes.insert(nodes.end(), + interior_candidates.begin(), + interior_candidates.begin() + static_cast(interior_count)); + return nodes; +} + +std::vector invert_dense_matrix(std::vector matrix, int n, const char* label) { + return math::invert_dense_matrix( + std::move(matrix), + static_cast(n), + std::string("SerendipityBasis interpolation matrix for ") + label); +} + +std::vector quad_serendipity_inverse_vandermonde( + std::span nodes, + std::span> exponents, + int order) { + const int n = static_cast(nodes.size()); + if (n == 0 || exponents.size() != nodes.size()) { + throw BasisConstructionException( + "SerendipityBasis: invalid quadrilateral serendipity interpolation setup", + __FILE__, __LINE__, __func__); + } + + std::vector vandermonde(static_cast(n * n), Real(0)); + auto idx = [n](int row, int col) -> std::size_t { + return static_cast(row * n + col); + }; + + for (int row = 0; row < n; ++row) { + const Real x = nodes[static_cast(row)][0]; + const Real y = nodes[static_cast(row)][1]; + for (int col = 0; col < n; ++col) { + const auto [ax, ay] = exponents[static_cast(col)]; + vandermonde[idx(row, col)] = pow_int(x, ax) * pow_int(y, ay); + } + } + + const std::string label = "Quad order " + std::to_string(order); + return invert_dense_matrix(std::move(vandermonde), n, label.c_str()); +} +constexpr std::array kPyramid13CenterRedistribution = { + Real(-0.25), Real(-0.25), Real(-0.25), Real(-0.25), + Real(0), + Real(0.5), Real(0.5), Real(0.5), Real(0.5), + Real(0), Real(0), Real(0), Real(0) +}; + +constexpr std::array, 15> kWedge15MonomialExponents = {{ + {{0, 0, 0}}, + {{0, 0, 1}}, + {{0, 0, 2}}, + {{0, 1, 0}}, + {{0, 1, 1}}, + {{0, 1, 2}}, + {{0, 2, 0}}, + {{0, 2, 1}}, + {{1, 0, 0}}, + {{1, 0, 1}}, + {{1, 0, 2}}, + {{1, 1, 0}}, + {{1, 1, 1}}, + {{2, 0, 0}}, + {{2, 0, 1}} +}}; + +constexpr std::array, 15> kWedge15Coefficients = {{ + {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0}}, + {{-0.5, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {{0.5, -0, -0, 0.5, -0, -0, -0, -0, -0, -0, -0, -0, -1, -0, -0}}, + {{-1, 0, -1, -1, 0, -1, 0, 0, 2, 0, 0, 2, -1, 0, 1}}, + {{1.5, 0, 0.5, -1.5, 0, -0.5, 0, 0, -2, 0, 0, 2, 0, 0, 0}}, + {{-0.5, -0, 0.5, -0.5, -0, 0.5, -0, -0, -0, -0, -0, -0, 1, -0, -1}}, + {{1, 0, 1, 1, 0, 1, 0, 0, -2, 0, 0, -2, 0, 0, 0}}, + {{-1, 0, -1, 1, 0, 1, 0, 0, 2, 0, 0, -2, 0, 0, 0}}, + {{-1, -1, 0, -1, -1, 0, 2, 0, 0, 2, 0, 0, -1, 1, 0}}, + {{1.5, 0.5, 0, -1.5, -0.5, 0, -2, 0, 0, 2, 0, 0, 0, 0, 0}}, + {{-0.5, 0.5, -0, -0.5, 0.5, -0, -0, -0, -0, -0, -0, -0, 1, -1, -0}}, + {{2, 0, -0, 2, 0, -0, -2, 2, -2, -2, 2, -2, -0, -0, -0}}, + {{-2, 0, 0, 2, 0, 0, 2, -2, 2, -2, 2, -2, 0, 0, 0}}, + {{1, 1, -0, 1, 1, -0, -2, -0, -0, -2, -0, -0, -0, -0, -0}}, + {{-1, -1, -0, 1, 1, -0, 2, -0, -0, -2, -0, -0, -0, -0, -0}} +}}; + +static const int hex20_monomial_exponents[20][3] = { + {0, 0, 0}, {0, 0, 1}, {0, 0, 2}, {0, 1, 0}, {0, 1, 1}, + {0, 1, 2}, {0, 2, 0}, {0, 2, 1}, {1, 0, 0}, {1, 0, 1}, + {1, 0, 2}, {1, 1, 0}, {1, 1, 1}, {1, 1, 2}, {1, 2, 0}, + {1, 2, 1}, {2, 0, 0}, {2, 0, 1}, {2, 1, 0}, {2, 1, 1} +}; + +static const Real hex20_coeffs[20][20] = { + {-0.25, -0.25, -0.25, -0.25, -0.25, -0.25, -0.25, -0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25}, + {0.125, 0.125, 0.125, 0.125, -0.125, -0.125, -0.125, -0.125, -0.25, 0.25, -0.25, 0.25, -0.25, -0.25, 0.25, 0.25, 0, 0, 0, 0}, + {0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0, 0, 0, 0, 0, 0, 0, 0, -0.25, -0.25, -0.25, -0.25}, + {0.125, 0.125, -0.125, -0.125, 0.125, 0.125, -0.125, -0.125, -0.25, -0.25, 0.25, 0.25, 0, 0, 0, 0, -0.25, -0.25, 0.25, 0.25}, + {0, 0, 0, 0, 0, 0, 0, 0, 0.25, -0.25, -0.25, 0.25, 0, 0, 0, 0, 0, 0, 0, 0}, + {-0.125, -0.125, 0.125, 0.125, -0.125, -0.125, 0.125, 0.125, 0, 0, 0, 0, 0, 0, 0, 0, 0.25, 0.25, -0.25, -0.25}, + {0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0, 0, 0, 0, -0.25, -0.25, -0.25, -0.25, 0, 0, 0, 0}, + {-0.125, -0.125, -0.125, -0.125, 0.125, 0.125, 0.125, 0.125, 0, 0, 0, 0, 0.25, 0.25, -0.25, -0.25, 0, 0, 0, 0}, + {0.125, -0.125, -0.125, 0.125, 0.125, -0.125, -0.125, 0.125, 0, 0, 0, 0, -0.25, 0.25, -0.25, 0.25, -0.25, 0.25, -0.25, 0.25}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.25, -0.25, -0.25, 0.25, 0, 0, 0, 0}, + {-0.125, 0.125, 0.125, -0.125, -0.125, 0.125, 0.125, -0.125, 0, 0, 0, 0, 0, 0, 0, 0, 0.25, -0.25, 0.25, -0.25}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.25, -0.25, -0.25, 0.25}, + {-0.125, 0.125, -0.125, 0.125, 0.125, -0.125, 0.125, -0.125, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0.125, -0.125, 0.125, -0.125, 0.125, -0.125, 0.125, -0.125, 0, 0, 0, 0, 0, 0, 0, 0, -0.25, 0.25, 0.25, -0.25}, + {-0.125, 0.125, 0.125, -0.125, -0.125, 0.125, 0.125, -0.125, 0, 0, 0, 0, 0.25, -0.25, 0.25, -0.25, 0, 0, 0, 0}, + {0.125, -0.125, -0.125, 0.125, -0.125, 0.125, 0.125, -0.125, 0, 0, 0, 0, -0.25, 0.25, 0.25, -0.25, 0, 0, 0, 0}, + {0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, -0.25, -0.25, -0.25, -0.25, 0, 0, 0, 0, 0, 0, 0, 0}, + {-0.125, -0.125, -0.125, -0.125, 0.125, 0.125, 0.125, 0.125, 0.25, -0.25, 0.25, -0.25, 0, 0, 0, 0, 0, 0, 0, 0}, + {-0.125, -0.125, 0.125, 0.125, -0.125, -0.125, 0.125, 0.125, 0.25, 0.25, -0.25, -0.25, 0, 0, 0, 0, 0, 0, 0, 0}, + {0.125, 0.125, -0.125, -0.125, -0.125, -0.125, 0.125, 0.125, -0.25, 0.25, 0.25, -0.25, 0, 0, 0, 0, 0, 0, 0, 0} +}; + +inline std::array quadratic_powers(Real x) { + return {Real(1), x, x * x}; +} + +void eval_hex20_internal(Real r, Real s, Real t, Real* internal_vals) { + const auto rp = quadratic_powers(r); + const auto sp = quadratic_powers(s); + const auto tp = quadratic_powers(t); + Real phi[20]; + for (int j = 0; j < 20; ++j) { + const int a = hex20_monomial_exponents[j][0]; + const int b = hex20_monomial_exponents[j][1]; + const int c = hex20_monomial_exponents[j][2]; + phi[j] = rp[static_cast(a)] * + sp[static_cast(b)] * + tp[static_cast(c)]; + } + for (int i = 0; i < 20; ++i) { + Real v = Real(0); + for (int j = 0; j < 20; ++j) { + v += hex20_coeffs[j][i] * phi[j]; + } + internal_vals[i] = v; + } +} + +void eval_hex20_grad_internal(Real r, Real s, Real t, Gradient* internal_grads) { + const auto rp = quadratic_powers(r); + const auto sp = quadratic_powers(s); + const auto tp = quadratic_powers(t); + Real dphi_dr[20], dphi_ds[20], dphi_dt[20]; + for (int j = 0; j < 20; ++j) { + const int a = hex20_monomial_exponents[j][0]; + const int b = hex20_monomial_exponents[j][1]; + const int c = hex20_monomial_exponents[j][2]; + + dphi_dr[j] = (a > 0) ? Real(a) * rp[static_cast(a - 1)] * + sp[static_cast(b)] * + tp[static_cast(c)] + : Real(0); + dphi_ds[j] = (b > 0) ? rp[static_cast(a)] * + Real(b) * sp[static_cast(b - 1)] * + tp[static_cast(c)] + : Real(0); + dphi_dt[j] = (c > 0) ? rp[static_cast(a)] * + sp[static_cast(b)] * + Real(c) * tp[static_cast(c - 1)] + : Real(0); + } + + for (int i = 0; i < 20; ++i) { + Real gr = Real(0), gs = Real(0), gt = Real(0); + for (int j = 0; j < 20; ++j) { + gr += hex20_coeffs[j][i] * dphi_dr[j]; + gs += hex20_coeffs[j][i] * dphi_ds[j]; + gt += hex20_coeffs[j][i] * dphi_dt[j]; + } + internal_grads[i][0] = gr; + internal_grads[i][1] = gs; + internal_grads[i][2] = gt; + } +} + +void eval_hex20_hess_internal(Real r, Real s, Real t, Hessian* internal_hessians) { + const auto rp = quadratic_powers(r); + const auto sp = quadratic_powers(s); + const auto tp = quadratic_powers(t); + Real d2phi_drr[20], d2phi_dss[20], d2phi_dtt[20]; + Real d2phi_drs[20], d2phi_drt[20], d2phi_dst[20]; + for (int j = 0; j < 20; ++j) { + const int a = hex20_monomial_exponents[j][0]; + const int b = hex20_monomial_exponents[j][1]; + const int c = hex20_monomial_exponents[j][2]; + + d2phi_drr[j] = (a > 1) ? Real(a * (a - 1)) * + rp[static_cast(a - 2)] * + sp[static_cast(b)] * + tp[static_cast(c)] + : Real(0); + d2phi_dss[j] = (b > 1) ? rp[static_cast(a)] * + Real(b * (b - 1)) * + sp[static_cast(b - 2)] * + tp[static_cast(c)] + : Real(0); + d2phi_dtt[j] = (c > 1) ? rp[static_cast(a)] * + sp[static_cast(b)] * + Real(c * (c - 1)) * + tp[static_cast(c - 2)] + : Real(0); + d2phi_drs[j] = (a > 0 && b > 0) ? Real(a * b) * + rp[static_cast(a - 1)] * + sp[static_cast(b - 1)] * + tp[static_cast(c)] + : Real(0); + d2phi_drt[j] = (a > 0 && c > 0) ? Real(a * c) * + rp[static_cast(a - 1)] * + sp[static_cast(b)] * + tp[static_cast(c - 1)] + : Real(0); + d2phi_dst[j] = (b > 0 && c > 0) ? rp[static_cast(a)] * + Real(b * c) * + sp[static_cast(b - 1)] * + tp[static_cast(c - 1)] + : Real(0); + } + + for (int i = 0; i < 20; ++i) { + Hessian H{}; + for (int j = 0; j < 20; ++j) { + H(0, 0) += hex20_coeffs[j][i] * d2phi_drr[j]; + H(1, 1) += hex20_coeffs[j][i] * d2phi_dss[j]; + H(2, 2) += hex20_coeffs[j][i] * d2phi_dtt[j]; + H(0, 1) += hex20_coeffs[j][i] * d2phi_drs[j]; + H(0, 2) += hex20_coeffs[j][i] * d2phi_drt[j]; + H(1, 2) += hex20_coeffs[j][i] * d2phi_dst[j]; + } + H(1, 0) = H(0, 1); + H(2, 0) = H(0, 2); + H(2, 1) = H(1, 2); + internal_hessians[i] = H; + } +} + +void eval_wedge15_polynomial(Real r, + Real s, + Real t, + Real* values, + Gradient* gradients, + Hessian* hessians) { + Real phi[15]{}; + Real dr[15]{}; + Real ds[15]{}; + Real dt[15]{}; + Real drr[15]{}; + Real dss[15]{}; + Real dtt[15]{}; + Real drs[15]{}; + Real drt[15]{}; + Real dst[15]{}; + + const auto rp = quadratic_powers(r); + const auto sp = quadratic_powers(s); + const auto tp = quadratic_powers(t); + + for (int j = 0; j < 15; ++j) { + const auto& exponent = kWedge15MonomialExponents[static_cast(j)]; + const int a = exponent[0]; + const int b = exponent[1]; + const int c = exponent[2]; + const auto ar = static_cast(a); + const auto bs = static_cast(b); + const auto ct = static_cast(c); + + const Real ra = rp[ar]; + const Real sb = sp[bs]; + const Real tc = tp[ct]; + + if (values) { + phi[j] = ra * sb * tc; + } + if (gradients) { + dr[j] = (a > 0) ? Real(a) * rp[ar - 1u] * sb * tc : Real(0); + ds[j] = (b > 0) ? ra * Real(b) * sp[bs - 1u] * tc : Real(0); + dt[j] = (c > 0) ? ra * sb * Real(c) * tp[ct - 1u] : Real(0); + } + if (hessians) { + drr[j] = (a > 1) ? Real(a * (a - 1)) * rp[ar - 2u] * sb * tc : Real(0); + dss[j] = (b > 1) ? ra * Real(b * (b - 1)) * sp[bs - 2u] * tc : Real(0); + dtt[j] = (c > 1) ? ra * sb * Real(c * (c - 1)) * tp[ct - 2u] : Real(0); + drs[j] = (a > 0 && b > 0) ? Real(a * b) * rp[ar - 1u] * sp[bs - 1u] * tc : Real(0); + drt[j] = (a > 0 && c > 0) ? Real(a * c) * rp[ar - 1u] * sb * tp[ct - 1u] : Real(0); + dst[j] = (b > 0 && c > 0) ? ra * Real(b * c) * sp[bs - 1u] * tp[ct - 1u] : Real(0); + } + } + + for (int i = 0; i < 15; ++i) { + Real value = Real(0); + Real gr = Real(0); + Real gs = Real(0); + Real gt = Real(0); + Hessian H{}; + for (int j = 0; j < 15; ++j) { + const Real coefficient = + kWedge15Coefficients[static_cast(j)][static_cast(i)]; + if (values) { + value += coefficient * phi[j]; + } + if (gradients) { + gr += coefficient * dr[j]; + gs += coefficient * ds[j]; + gt += coefficient * dt[j]; + } + if (hessians) { + H(0, 0) += coefficient * drr[j]; + H(1, 1) += coefficient * dss[j]; + H(2, 2) += coefficient * dtt[j]; + H(0, 1) += coefficient * drs[j]; + H(0, 2) += coefficient * drt[j]; + H(1, 2) += coefficient * dst[j]; + } + } + + const std::size_t index = static_cast(i); + if (values) { + values[index] = value; + } + if (gradients) { + gradients[index][0] = gr; + gradients[index][1] = gs; + gradients[index][2] = gt; + } + if (hessians) { + H(1, 0) = H(0, 1); + H(2, 0) = H(0, 2); + H(2, 1) = H(1, 2); + hessians[index] = H; + } + } +} + +} // namespace + +SerendipityBasis::SerendipityBasis(ElementType type, int order, bool geometry_mode) + : element_type_(type), dimension_(0), order_(order), size_(0), geometry_mode_(geometry_mode) { + if (type == ElementType::Quad4 || type == ElementType::Quad8) { + dimension_ = 2; + if (order_ < 1) { + order_ = 1; + } + if (type == ElementType::Quad8 && order_ != 2) { + throw BasisConfigurationException( + "SerendipityBasis: Quad8 is only valid for quadratic order 2; use Quad4 for higher-order quadrilateral serendipity", + __FILE__, __LINE__, __func__); + } + quad_monomial_exponents_ = quad_serendipity_exponents(order_); + size_ = quad_monomial_exponents_.size(); + nodes_ = quad_serendipity_nodes(order_, size_); + if (nodes_.size() != size_) { + throw BasisConstructionException( + "SerendipityBasis: quadrilateral serendipity setup produced inconsistent sizes", + __FILE__, __LINE__, __func__); + } + quad_inv_vandermonde_ = quad_serendipity_inverse_vandermonde(nodes_, quad_monomial_exponents_, order_); + } else if (type == ElementType::Hex8 || type == ElementType::Hex20) { + dimension_ = 3; + if (order_ < 1) order_ = 1; + if (order_ == 1) { + size_ = 8; + } else if (order_ == 2) { + size_ = 20; + } else { + throw BasisConfigurationException( + "SerendipityBasis supports up to quadratic on hexahedra", + __FILE__, __LINE__, __func__); + } + } else if (type == ElementType::Wedge15) { + dimension_ = 3; + if (order_ < 2) { + order_ = 2; + } + if (order_ == 2) { + size_ = 15; + } else { + throw BasisConfigurationException( + "SerendipityBasis supports up to quadratic on wedge15", + __FILE__, __LINE__, __func__); + } + } else if (type == ElementType::Pyramid13) { + dimension_ = 3; + if (order_ < 2) { + order_ = 2; + } + if (order_ == 2) { + size_ = 13; + } else { + throw BasisConfigurationException( + "SerendipityBasis supports up to quadratic on pyramid13", + __FILE__, __LINE__, __func__); + } + } else { + throw BasisElementCompatibilityException("SerendipityBasis supports Quad4/Quad8, Hex8/Hex20, Wedge15, and Pyramid13 elements", + __FILE__, __LINE__, __func__); + } + + if (nodes_.empty()) { + nodes_.reserve(size_); + for (std::size_t i = 0; i < size_; ++i) { + nodes_.push_back(ReferenceNodeLayout::get_node_coords(element_type_, i)); + } + } +} + +bool SerendipityBasis::cache_identity_words(std::vector& words) const { + words.push_back(0x736572656e646970ULL); + words.push_back(static_cast(basis_type())); + words.push_back(static_cast(element_type_)); + words.push_back(static_cast(dimension_)); + words.push_back(static_cast(order_)); + words.push_back(static_cast(size_)); + words.push_back(geometry_mode_ ? 1u : 0u); + return true; +} + +void SerendipityBasis::evaluate_values(const math::Vector& xi, + std::vector& values) const { + values.assign(size_, Real(0)); + const Real x = xi[0]; + const Real y = xi[1]; + const Real z = xi[2]; + + if (dimension_ == 2) { + if (quad_monomial_exponents_.size() != size_ || + quad_inv_vandermonde_.size() != size_ * size_) { + throw BasisEvaluationException( + "SerendipityBasis: quadrilateral interpolation tables are not initialized for value evaluation", + __FILE__, __LINE__, __func__); + } + + std::vector monomials(size_, Real(0)); + for (std::size_t j = 0; j < size_; ++j) { + const auto [ax, ay] = quad_monomial_exponents_[j]; + monomials[j] = pow_int(x, ax) * pow_int(y, ay); + } + + for (std::size_t i = 0; i < size_; ++i) { + Real value = Real(0); + for (std::size_t j = 0; j < size_; ++j) { + value += monomials[j] * quad_inv_vandermonde_[j * size_ + i]; + } + values[i] = value; + } + return; + } + + if (dimension_ == 3 && order_ == 1) { + // Hex8 trilinear shape functions + const Real r = x; + const Real s = y; + const Real t = z; + values[0] = Real(0.125) * (Real(1) - r) * (Real(1) - s) * (Real(1) - t); + values[1] = Real(0.125) * (Real(1) + r) * (Real(1) - s) * (Real(1) - t); + values[2] = Real(0.125) * (Real(1) + r) * (Real(1) + s) * (Real(1) - t); + values[3] = Real(0.125) * (Real(1) - r) * (Real(1) + s) * (Real(1) - t); + values[4] = Real(0.125) * (Real(1) - r) * (Real(1) - s) * (Real(1) + t); + values[5] = Real(0.125) * (Real(1) + r) * (Real(1) - s) * (Real(1) + t); + values[6] = Real(0.125) * (Real(1) + r) * (Real(1) + s) * (Real(1) + t); + values[7] = Real(0.125) * (Real(1) - r) * (Real(1) + s) * (Real(1) + t); + return; + } + + const Real r = x; + const Real s = y; + const Real t = z; + + if (geometry_mode_ && element_type_ == ElementType::Hex20) { + // Hex20 geometry mode: use trilinear Hex8 shape functions on corners, edges zero. + values[0] = Real(0.125) * (Real(1) - r) * (Real(1) - s) * (Real(1) - t); + values[1] = Real(0.125) * (Real(1) + r) * (Real(1) - s) * (Real(1) - t); + values[2] = Real(0.125) * (Real(1) + r) * (Real(1) + s) * (Real(1) - t); + values[3] = Real(0.125) * (Real(1) - r) * (Real(1) + s) * (Real(1) - t); + values[4] = Real(0.125) * (Real(1) - r) * (Real(1) - s) * (Real(1) + t); + values[5] = Real(0.125) * (Real(1) + r) * (Real(1) - s) * (Real(1) + t); + values[6] = Real(0.125) * (Real(1) + r) * (Real(1) + s) * (Real(1) + t); + values[7] = Real(0.125) * (Real(1) - r) * (Real(1) + s) * (Real(1) + t); + for (std::size_t i = 8; i < 20; ++i) { + values[i] = Real(0); + } + return; + } + + if (element_type_ == ElementType::Hex20) { + Real internal_vals[20]; + eval_hex20_internal(r, s, t, internal_vals); + const auto mesh_to_basis = ReferenceNodeLayout::mesh_to_basis_ordering(element_type_); + BASIS_CHECK_EVAL(mesh_to_basis.size() == size_, + "Hex20 mesh-to-basis ordering is not registered"); + for (std::size_t i = 0; i < 20; ++i) { + values[i] = internal_vals[mesh_to_basis[i]]; + } + return; + } + + if (element_type_ == ElementType::Wedge15) { + eval_wedge15_polynomial(r, s, t, values.data(), nullptr, nullptr); + return; + } + + if (element_type_ == ElementType::Pyramid13) { + static const LagrangeBasis parent(ElementType::Pyramid14, 2); + std::array parent_values{}; + parent.evaluate_values_to(xi, parent_values.data()); + for (std::size_t i = 0; i < 13; ++i) { + values[i] = parent_values[i] + kPyramid13CenterRedistribution[i] * parent_values[13]; + } + return; + } +} + +void SerendipityBasis::evaluate_gradients(const math::Vector& xi, + std::vector& gradients) const { + gradients.assign(size_, Gradient{}); + + const Real x = xi[0]; + const Real y = xi[1]; + const Real z = xi[2]; + + if (dimension_ == 2) { + if (quad_monomial_exponents_.size() != size_ || + quad_inv_vandermonde_.size() != size_ * size_) { + throw BasisEvaluationException( + "SerendipityBasis: quadrilateral interpolation tables are not initialized for gradient evaluation", + __FILE__, __LINE__, __func__); + } + + std::vector dmon_dx(size_, Real(0)); + std::vector dmon_dy(size_, Real(0)); + for (std::size_t j = 0; j < size_; ++j) { + const auto [ax, ay] = quad_monomial_exponents_[j]; + dmon_dx[j] = (ax > 0) ? Real(ax) * pow_int(x, ax - 1) * pow_int(y, ay) : Real(0); + dmon_dy[j] = (ay > 0) ? pow_int(x, ax) * Real(ay) * pow_int(y, ay - 1) : Real(0); + } + + for (std::size_t i = 0; i < size_; ++i) { + Real gx = Real(0); + Real gy = Real(0); + for (std::size_t j = 0; j < size_; ++j) { + const Real coeff = quad_inv_vandermonde_[j * size_ + i]; + gx += dmon_dx[j] * coeff; + gy += dmon_dy[j] * coeff; + } + gradients[i][0] = gx; + gradients[i][1] = gy; + } + return; + } + + // 3D linear hex (Hex8) + if (dimension_ == 3 && order_ == 1) { + const Real r = x, s = y, t = z; + gradients[0][0] = -Real(0.125) * (Real(1) - s) * (Real(1) - t); + gradients[0][1] = -Real(0.125) * (Real(1) - r) * (Real(1) - t); + gradients[0][2] = -Real(0.125) * (Real(1) - r) * (Real(1) - s); + + gradients[1][0] = Real(0.125) * (Real(1) - s) * (Real(1) - t); + gradients[1][1] = -Real(0.125) * (Real(1) + r) * (Real(1) - t); + gradients[1][2] = -Real(0.125) * (Real(1) + r) * (Real(1) - s); + + gradients[2][0] = Real(0.125) * (Real(1) + s) * (Real(1) - t); + gradients[2][1] = Real(0.125) * (Real(1) + r) * (Real(1) - t); + gradients[2][2] = -Real(0.125) * (Real(1) + r) * (Real(1) + s); + + gradients[3][0] = -Real(0.125) * (Real(1) + s) * (Real(1) - t); + gradients[3][1] = Real(0.125) * (Real(1) - r) * (Real(1) - t); + gradients[3][2] = -Real(0.125) * (Real(1) - r) * (Real(1) + s); + + gradients[4][0] = -Real(0.125) * (Real(1) - s) * (Real(1) + t); + gradients[4][1] = -Real(0.125) * (Real(1) - r) * (Real(1) + t); + gradients[4][2] = Real(0.125) * (Real(1) - r) * (Real(1) - s); + + gradients[5][0] = Real(0.125) * (Real(1) - s) * (Real(1) + t); + gradients[5][1] = -Real(0.125) * (Real(1) + r) * (Real(1) + t); + gradients[5][2] = Real(0.125) * (Real(1) + r) * (Real(1) - s); + + gradients[6][0] = Real(0.125) * (Real(1) + s) * (Real(1) + t); + gradients[6][1] = Real(0.125) * (Real(1) + r) * (Real(1) + t); + gradients[6][2] = Real(0.125) * (Real(1) + r) * (Real(1) + s); + + gradients[7][0] = -Real(0.125) * (Real(1) + s) * (Real(1) + t); + gradients[7][1] = Real(0.125) * (Real(1) - r) * (Real(1) + t); + gradients[7][2] = Real(0.125) * (Real(1) - r) * (Real(1) + s); + return; + } + + // Hex20 geometry mode: use Hex8 gradients + if (dimension_ == 3 && order_ == 2 && geometry_mode_ && + (element_type_ == ElementType::Hex20 || element_type_ == ElementType::Quad8)) { + const Real r = x, s = y, t = z; + gradients[0][0] = -Real(0.125) * (Real(1) - s) * (Real(1) - t); + gradients[0][1] = -Real(0.125) * (Real(1) - r) * (Real(1) - t); + gradients[0][2] = -Real(0.125) * (Real(1) - r) * (Real(1) - s); + + gradients[1][0] = Real(0.125) * (Real(1) - s) * (Real(1) - t); + gradients[1][1] = -Real(0.125) * (Real(1) + r) * (Real(1) - t); + gradients[1][2] = -Real(0.125) * (Real(1) + r) * (Real(1) - s); + + gradients[2][0] = Real(0.125) * (Real(1) + s) * (Real(1) - t); + gradients[2][1] = Real(0.125) * (Real(1) + r) * (Real(1) - t); + gradients[2][2] = -Real(0.125) * (Real(1) + r) * (Real(1) + s); + + gradients[3][0] = -Real(0.125) * (Real(1) + s) * (Real(1) - t); + gradients[3][1] = Real(0.125) * (Real(1) - r) * (Real(1) - t); + gradients[3][2] = -Real(0.125) * (Real(1) - r) * (Real(1) + s); + + gradients[4][0] = -Real(0.125) * (Real(1) - s) * (Real(1) + t); + gradients[4][1] = -Real(0.125) * (Real(1) - r) * (Real(1) + t); + gradients[4][2] = Real(0.125) * (Real(1) - r) * (Real(1) - s); + + gradients[5][0] = Real(0.125) * (Real(1) - s) * (Real(1) + t); + gradients[5][1] = -Real(0.125) * (Real(1) + r) * (Real(1) + t); + gradients[5][2] = Real(0.125) * (Real(1) + r) * (Real(1) - s); + + gradients[6][0] = Real(0.125) * (Real(1) + s) * (Real(1) + t); + gradients[6][1] = Real(0.125) * (Real(1) + r) * (Real(1) + t); + gradients[6][2] = Real(0.125) * (Real(1) + r) * (Real(1) + s); + + gradients[7][0] = -Real(0.125) * (Real(1) + s) * (Real(1) + t); + gradients[7][1] = Real(0.125) * (Real(1) - r) * (Real(1) + t); + gradients[7][2] = Real(0.125) * (Real(1) - r) * (Real(1) + s); + // Edge-node gradients remain zero + return; + } + + // Hex20 analytical gradients using monomial differentiation + if (element_type_ == ElementType::Hex20 && order_ == 2) { + const Real r = x, s = y, t = z; + Gradient internal_grads[20]; + eval_hex20_grad_internal(r, s, t, internal_grads); + const auto mesh_to_basis = ReferenceNodeLayout::mesh_to_basis_ordering(element_type_); + BASIS_CHECK_EVAL(mesh_to_basis.size() == size_, + "Hex20 mesh-to-basis ordering is not registered"); + for (std::size_t i = 0; i < 20; ++i) { + gradients[i] = internal_grads[mesh_to_basis[i]]; + } + return; + } + + // Wedge15 analytical gradients using monomial differentiation + if (element_type_ == ElementType::Wedge15 && order_ == 2) { + eval_wedge15_polynomial(x, y, z, nullptr, gradients.data(), nullptr); + return; + } + + if (element_type_ == ElementType::Pyramid13) { + static const LagrangeBasis parent(ElementType::Pyramid14, 2); + std::array parent_gradients{}; + // Pyramid13 inherits the complete-family pyramid apex contract from the + // parent basis rather than introducing a separate regularized path. + parent.evaluate_gradients_to(xi, parent_gradients.data()); + const auto parent_gradient = [&](std::size_t node, std::size_t component) { + return parent_gradients[node * 3u + component]; + }; + for (std::size_t i = 0; i < 13; ++i) { + for (std::size_t c = 0; c < 3u; ++c) { + gradients[i][c] = + parent_gradient(i, c) + + kPyramid13CenterRedistribution[i] * parent_gradient(13u, c); + } + } + return; + } + + throw BasisEvaluationException("SerendipityBasis::evaluate_gradients: unsupported serendipity configuration", + __FILE__, __LINE__, __func__); +} + +void SerendipityBasis::evaluate_hessians(const math::Vector& xi, + std::vector& hessians) const { + hessians.assign(size_, Hessian{}); + const Real x = xi[0]; + const Real y = xi[1]; + const Real z = xi[2]; + + if (dimension_ == 2) { + if (quad_monomial_exponents_.size() != size_ || + quad_inv_vandermonde_.size() != size_ * size_) { + throw BasisEvaluationException( + "SerendipityBasis: quadrilateral interpolation tables are not initialized for Hessian evaluation", + __FILE__, __LINE__, __func__); + } + + std::vector dxx(size_, Real(0)); + std::vector dxy(size_, Real(0)); + std::vector dyy(size_, Real(0)); + for (std::size_t j = 0; j < size_; ++j) { + const auto [ax, ay] = quad_monomial_exponents_[j]; + dxx[j] = (ax > 1) ? Real(ax * (ax - 1)) * pow_int(x, ax - 2) * pow_int(y, ay) : Real(0); + dxy[j] = (ax > 0 && ay > 0) ? Real(ax * ay) * pow_int(x, ax - 1) * pow_int(y, ay - 1) : Real(0); + dyy[j] = (ay > 1) ? Real(ay * (ay - 1)) * pow_int(x, ax) * pow_int(y, ay - 2) : Real(0); + } + + for (std::size_t i = 0; i < size_; ++i) { + for (std::size_t j = 0; j < size_; ++j) { + const Real coeff = quad_inv_vandermonde_[j * size_ + i]; + hessians[i](0, 0) += dxx[j] * coeff; + hessians[i](0, 1) += dxy[j] * coeff; + hessians[i](1, 1) += dyy[j] * coeff; + } + hessians[i](1, 0) = hessians[i](0, 1); + } + return; + } + + if (element_type_ == ElementType::Hex8 && order_ == 1) { + static const LagrangeBasis parent(ElementType::Hex8, 1); + parent.evaluate_hessians(xi, hessians); + return; + } + + if (geometry_mode_ && element_type_ == ElementType::Hex20) { + static const LagrangeBasis parent(ElementType::Hex8, 1); + std::array parent_hessians{}; + parent.evaluate_hessians_to(xi, parent_hessians.data()); + for (std::size_t i = 0; i < 8; ++i) { + for (std::size_t r = 0; r < 3; ++r) { + for (std::size_t c = 0; c < 3; ++c) { + hessians[i](r, c) = parent_hessians[i * 9u + r * 3u + c]; + } + } + } + return; + } + + if (element_type_ == ElementType::Hex20 && order_ == 2) { + Hessian internal_hessians[20]; + eval_hex20_hess_internal(x, y, z, internal_hessians); + const auto mesh_to_basis = ReferenceNodeLayout::mesh_to_basis_ordering(element_type_); + BASIS_CHECK_EVAL(mesh_to_basis.size() == size_, + "Hex20 mesh-to-basis ordering is not registered"); + for (std::size_t i = 0; i < 20; ++i) { + hessians[i] = internal_hessians[mesh_to_basis[i]]; + } + return; + } + + if (element_type_ == ElementType::Wedge15 && order_ == 2) { + eval_wedge15_polynomial(x, y, z, nullptr, nullptr, hessians.data()); + return; + } + + if (element_type_ == ElementType::Pyramid13) { + static const LagrangeBasis parent(ElementType::Pyramid14, 2); + std::array parent_hessians{}; + // Pyramid13 inherits the complete-family pyramid apex contract from the + // parent basis rather than introducing a separate regularized path. + parent.evaluate_hessians_to(xi, parent_hessians.data()); + const Hessian center_hessian = load_hessian(parent_hessians.data() + 13u * 9u); + for (std::size_t i = 0; i < 13; ++i) { + hessians[i] = load_hessian(parent_hessians.data() + i * 9u); + add_scaled_hessian(hessians[i], center_hessian, kPyramid13CenterRedistribution[i]); + } + return; + } + + throw BasisEvaluationException("SerendipityBasis::evaluate_hessians: unsupported serendipity configuration", + __FILE__, __LINE__, __func__); +} + +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.h b/Code/Source/solver/FE/Basis/SerendipityBasis.h new file mode 100644 index 000000000..98c01415a --- /dev/null +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.h @@ -0,0 +1,70 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_SERENDIPITYBASIS_H +#define SVMP_FE_BASIS_SERENDIPITYBASIS_H + +/** + * @file SerendipityBasis.h + * @brief Reduced-degree-of-freedom serendipity bases + * + * `Pyramid13` inherits its apex contract from the complete-family rational + * pyramid basis: values remain exact at the apex, while exact-apex gradient + * and Hessian queries throw because the inherited nodal derivative limit is + * not unique. + */ + +#include "BasisFunction.h" + +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +class SerendipityBasis : public BasisFunction { +public: + SerendipityBasis(ElementType type, int order, bool geometry_mode = false); + + BasisType basis_type() const noexcept override { return BasisType::Serendipity; } + ElementType element_type() const noexcept override { return element_type_; } + int dimension() const noexcept override { return dimension_; } + int order() const noexcept override { return order_; } + std::size_t size() const noexcept override { return size_; } + const std::vector>& nodes() const noexcept { return nodes_; } + bool cache_identity_words(std::vector& words) const override; + + void evaluate_values(const math::Vector& xi, + std::vector& values) const override; + + void evaluate_gradients(const math::Vector& xi, + std::vector& gradients) const override; + + void evaluate_hessians(const math::Vector& xi, + std::vector& hessians) const override; + +private: + ElementType element_type_; + int dimension_; + int order_; + std::size_t size_; + std::vector> nodes_; + std::vector> quad_monomial_exponents_; + // Row-major inverse Vandermonde, indexed as [monomial, basis]. + std::vector quad_inv_vandermonde_; + + // When true, this basis is used purely for geometry mapping and may use + // reduced polynomial order (e.g., Hex20 geometry as Hex8). + bool geometry_mode_; +}; + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_SERENDIPITYBASIS_H diff --git a/Code/Source/solver/FE/Basis/VectorBasis.h b/Code/Source/solver/FE/Basis/VectorBasis.h new file mode 100644 index 000000000..d442c2160 --- /dev/null +++ b/Code/Source/solver/FE/Basis/VectorBasis.h @@ -0,0 +1,255 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_VECTORBASIS_H +#define SVMP_FE_BASIS_VECTORBASIS_H + +/** + * @file VectorBasis.h + * @brief Vector-valued bases for H(div) and H(curl) conforming spaces + */ + +#include "BasisFunction.h" +#include "VectorBasisModalPolynomial.h" +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +/** + * @brief DOF entity type for vector-valued basis functions + */ +enum class DofEntity { + Vertex, ///< DOF associated with a vertex + Edge, ///< DOF associated with an edge (tangential moments for H(curl)) + Face, ///< DOF associated with a face (normal moments for H(div), tangential for H(curl)) + Interior ///< DOF associated with element interior +}; + +/** + * @brief DOF association metadata for a single DOF + */ +struct DofAssociation { + DofEntity entity_type{DofEntity::Interior}; + int entity_id{-1}; ///< Local index of the entity (edge/face/vertex) + int moment_index{0}; ///< Index within the entity's moment space +}; + +struct SparseModalCoefficientMatrix { + std::size_t rows{0}; + std::size_t cols{0}; + std::vector row_offsets; + std::vector dofs; + std::vector coefficients; +}; + +class VectorBasisFunction : public BasisFunction { +public: + bool is_vector_valued() const noexcept override { return true; } + bool supports_vector_jacobians() const noexcept override { return true; } + void evaluate_values(const math::Vector&, + std::vector&) const override { + throw BasisEvaluationException("Vector basis uses evaluate_vector_values", + __FILE__, __LINE__, __func__); + } + + void evaluate_vector_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out) const override; + + /** + * @brief Get DOF association metadata for all basis functions + * + * Returns a vector of size(), where each entry describes which + * geometric entity (vertex/edge/face/interior) the corresponding + * DOF is associated with. This is essential for orientation-aware + * assembly of H(div) and H(curl) spaces. + */ + virtual std::vector dof_associations() const { + // Default: all interior DOFs (subclasses should override) + std::vector result(size()); + for (std::size_t i = 0; i < size(); ++i) { + result[i].entity_type = DofEntity::Interior; + result[i].entity_id = 0; + result[i].moment_index = static_cast(i); + } + return result; + } +}; + +/** + * @brief Raviart-Thomas H(div) basis on supported element families + */ +class RaviartThomasBasis : public VectorBasisFunction { +public: + RaviartThomasBasis(ElementType type, int order = 0); + + BasisType basis_type() const noexcept override { return BasisType::RaviartThomas; } + ElementType element_type() const noexcept override { return element_type_; } + int dimension() const noexcept override { return dimension_; } + int order() const noexcept override { return order_; } + std::size_t size() const noexcept override { return size_; } + bool cache_identity_is_structural() const noexcept override { return true; } + + void evaluate_vector_values(const math::Vector& xi, + std::vector>& values) const override; + void evaluate_vector_jacobians(const math::Vector& xi, + std::vector& jacobians) const override; + void evaluate_divergence(const math::Vector& xi, + std::vector& divergence) const override; + bool supports_divergence() const noexcept override { return true; } + void evaluate_vector_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out) const override; + + /// Get DOF associations (face/edge DOFs for 2D, face DOFs for 3D H(div)) + std::vector dof_associations() const override; + +private: + using ModalTerm = VectorBasisModalTerm; + using ModalPolynomial = VectorBasisModalPolynomial; + using SeedJacobianEvaluator = void (*)( + const math::Vector&, + std::vector&); + + ElementType element_type_; + int dimension_; + int order_; + std::size_t size_{0}; + + bool nodal_generated_{false}; + bool use_transformed_direct_seed_{false}; ///< True for wedge/pyramid RT(k=1,2) transformed from direct seed functions + std::vector transformed_seed_indices_; + std::vector> transformed_monomial_candidates_; ///< {component, px, py, pz} + std::vector monomials_; + std::array modal_power_limits_{{0, 0, 0}}; + std::array transformed_power_limits_{{0, 0, 0}}; + SeedJacobianEvaluator transformed_seed_jacobian_evaluator_{nullptr}; + // Sparse coefficients for nodal basis in modal monomial basis: + // phi_j = sum_p c(p,j) * modal_p. + // Rows index modal functions; entries target nodal DOFs. + SparseModalCoefficientMatrix modal_sparse_coeffs_; + SparseModalCoefficientMatrix transformed_sparse_coeffs_; +}; + +/** + * @brief First-kind Nedelec H(curl) basis on supported element families + */ +class NedelecBasis : public VectorBasisFunction { +public: + NedelecBasis(ElementType type, int order = 0); + + BasisType basis_type() const noexcept override { return BasisType::Nedelec; } + ElementType element_type() const noexcept override { return element_type_; } + int dimension() const noexcept override { return dimension_; } + int order() const noexcept override { return order_; } + std::size_t size() const noexcept override { return size_; } + bool cache_identity_is_structural() const noexcept override { return true; } + + void evaluate_vector_values(const math::Vector& xi, + std::vector>& values) const override; + void evaluate_vector_jacobians(const math::Vector& xi, + std::vector& jacobians) const override; + void evaluate_curl(const math::Vector& xi, + std::vector>& curl) const override; + bool supports_curl() const noexcept override { return true; } + void evaluate_vector_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out) const override; + + /// Get DOF associations (edge DOFs for H(curl), face DOFs for 3D interior) + std::vector dof_associations() const override; + +private: + using ModalTerm = VectorBasisModalTerm; + using ModalPolynomial = VectorBasisModalPolynomial; + using SeedJacobianEvaluator = void (*)( + const math::Vector&, + std::vector&); + + ElementType element_type_; + int dimension_; + int order_; + std::size_t size_{0}; + + bool nodal_generated_{false}; + bool use_transformed_direct_seed_{false}; ///< True for wedge/pyramid ND(k=1,2) transformed from direct seed/candidate functions + std::vector> transformed_monomial_candidates_; ///< {component, px, py, pz} + std::vector monomials_; + SparseModalCoefficientMatrix modal_sparse_coeffs_; + SparseModalCoefficientMatrix transformed_sparse_coeffs_; + std::array modal_power_limits_{{0, 0, 0}}; + std::array transformed_power_limits_{{0, 0, 0}}; + SeedJacobianEvaluator transformed_seed_jacobian_evaluator_{nullptr}; +}; + +/** + * @brief Brezzi-Douglas-Marini basis (simple linear variant) + */ +class BDMBasis : public VectorBasisFunction { +public: + BDMBasis(ElementType type, int order = 1); + + BasisType basis_type() const noexcept override { return BasisType::BDM; } + ElementType element_type() const noexcept override { return element_type_; } + int dimension() const noexcept override { return dimension_; } + int order() const noexcept override { return order_; } + std::size_t size() const noexcept override { return size_; } + bool cache_identity_is_structural() const noexcept override { return true; } + + void evaluate_vector_values(const math::Vector& xi, + std::vector>& values) const override; + void evaluate_vector_jacobians(const math::Vector& xi, + std::vector& jacobians) const override; + void evaluate_divergence(const math::Vector& xi, + std::vector& divergence) const override; + bool supports_divergence() const noexcept override { return true; } + void evaluate_vector_at_quadrature_points_strided( + const std::vector>& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out) const override; + + /// Get DOF associations (face/edge DOFs for H(div)) + std::vector dof_associations() const override; + +private: + using ModalTerm = VectorBasisModalTerm; + using ModalPolynomial = VectorBasisModalPolynomial; + + ElementType element_type_; + int dimension_; + int order_; + std::size_t size_{0}; + bool nodal_generated_{false}; + std::vector monomials_; + SparseModalCoefficientMatrix modal_sparse_coeffs_; + std::array modal_power_limits_{{0, 0, 0}}; +}; + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_VECTORBASIS_H diff --git a/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.cpp b/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.cpp new file mode 100644 index 000000000..7ec848633 --- /dev/null +++ b/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.cpp @@ -0,0 +1,593 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#include "VectorBasisEvaluationHelpers.h" + +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { +namespace detail { +namespace vector_common { + +VectorBasisScratch& vector_basis_scratch() { + // Scratch is intentionally thread-local: production assembly uses a + // persistent worker-thread team, so buffers stay warm on each worker. + static thread_local VectorBasisScratch scratch; + return scratch; +} + +void prewarm_vector_basis_scratch(std::size_t max_size, std::size_t max_qpts) { + vector_basis_scratch().prewarm(max_size, max_qpts); +} + +void fill_powers(Real x, int max_p, std::vector& out) { + BASIS_CHECK_CONSTRUCTION(max_p >= 0, "powers: negative max_p"); + out.assign(static_cast(max_p + 1), Real(1)); + for (int i = 1; i <= max_p; ++i) { + out[static_cast(i)] = + out[static_cast(i - 1)] * x; + } +} + +void fill_power_tables(const Vec3& xi, + const std::array& limits, + VectorBasisScratch& scratch) { + fill_powers(xi[0], limits[0], scratch.px); + fill_powers(xi[1], limits[1], scratch.py); + fill_powers(xi[2], limits[2], scratch.pz); +} + +namespace { + +constexpr Real kSparseCoefficientRelativeTolerance = + Real(256) * std::numeric_limits::epsilon(); + +void fill_batched_axis_powers(const std::vector& points, + std::size_t axis, + int max_power, + std::vector& out) { + BASIS_CHECK_CONSTRUCTION(max_power >= 0, "batched powers: negative max_p"); + const std::size_t num_qpts = points.size(); + out.assign(static_cast(max_power + 1) * num_qpts, Real(1)); + if (num_qpts == 0 || max_power == 0) { + return; + } + + Real* first_power = out.data() + num_qpts; + for (std::size_t q = 0; q < num_qpts; ++q) { + first_power[q] = points[q][axis]; + } + for (int power = 2; power <= max_power; ++power) { + const Real* previous = + out.data() + static_cast(power - 1) * num_qpts; + Real* current = out.data() + static_cast(power) * num_qpts; + for (std::size_t q = 0; q < num_qpts; ++q) { + current[q] = previous[q] * points[q][axis]; + } + } +} + +} // namespace + +void fill_batched_power_tables(const std::vector& points, + const std::array& limits, + VectorBasisScratch& scratch) { + fill_batched_axis_powers(points, 0u, limits[0], scratch.batched_px); + fill_batched_axis_powers(points, 1u, limits[1], scratch.batched_py); + fill_batched_axis_powers(points, 2u, limits[2], scratch.batched_pz); +} + +void validate_vector_strided_outputs(std::size_t num_qpts, + std::size_t output_stride, + const char* family_name) { + if (output_stride < num_qpts) { + throw BasisConfigurationException( + std::string(family_name) + + " strided vector evaluation requires output_stride >= points.size()", + __FILE__, __LINE__, __func__); + } +} + +void zero_active_strided_rows(Real* output, + std::size_t rows, + std::size_t output_stride, + std::size_t num_qpts) { + for (std::size_t row = 0; row < rows; ++row) { + std::fill_n(output + row * output_stride, num_qpts, Real(0)); + } +} + +SparseModalCoefficientMatrix build_sparse_modal_coefficients( + const std::vector& dense_coefficients, + std::size_t rows, + std::size_t cols) { + BASIS_CHECK_CONSTRUCTION(dense_coefficients.size() == rows * cols, + "build_sparse_modal_coefficients: dense coefficient size mismatch"); + + SparseModalCoefficientMatrix sparse; + sparse.rows = rows; + sparse.cols = cols; + sparse.row_offsets.reserve(rows + 1u); + sparse.row_offsets.push_back(0u); + + Real max_abs = Real(0); + for (const Real coefficient : dense_coefficients) { + max_abs = std::max(max_abs, std::abs(coefficient)); + } + const Real prune_threshold = kSparseCoefficientRelativeTolerance * max_abs; + + for (std::size_t row = 0; row < rows; ++row) { + const Real* dense_row = dense_coefficients.data() + row * cols; + for (std::size_t col = 0; col < cols; ++col) { + const Real coefficient = dense_row[col]; + if (std::abs(coefficient) > prune_threshold) { + sparse.dofs.push_back(col); + sparse.coefficients.push_back(coefficient); + } + } + sparse.row_offsets.push_back(sparse.dofs.size()); + } + + return sparse; +} + +Vec3 curl_from_jacobian(const VectorJacobian& J) noexcept { + return Vec3{J(2u, 1u) - J(1u, 2u), + J(0u, 2u) - J(2u, 0u), + J(1u, 0u) - J(0u, 1u)}; +} + +Real divergence_from_jacobian(const VectorJacobian& J) noexcept { + return J(0u, 0u) + J(1u, 1u) + J(2u, 2u); +} + +void write_vector_values_strided(const std::vector& values, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT values_out) { + if (values_out == nullptr) { + return; + } + BASIS_CHECK_CONSTRUCTION(values.size() == num_dofs, + "vector value evaluation returned the wrong number of DOFs"); + for (std::size_t dof = 0; dof < num_dofs; ++dof) { + for (std::size_t component = 0; component < 3u; ++component) { + values_out[(dof * 3u + component) * output_stride + q] = + values[dof][component]; + } + } +} + +void write_vector_jacobians_strided(const std::vector& jacobians, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT jacobians_out) { + if (jacobians_out == nullptr) { + return; + } + BASIS_CHECK_CONSTRUCTION(jacobians.size() == num_dofs, + "vector Jacobian evaluation returned the wrong number of DOFs"); + for (std::size_t dof = 0; dof < num_dofs; ++dof) { + const auto& J = jacobians[dof]; + for (std::size_t component = 0; component < 3u; ++component) { + for (std::size_t derivative = 0; derivative < 3u; ++derivative) { + jacobians_out[(dof * 9u + component * 3u + derivative) * + output_stride + q] = J(component, derivative); + } + } + } +} + +void write_vector_curl_strided(const std::vector& curl, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT curls_out) { + if (curls_out == nullptr) { + return; + } + BASIS_CHECK_CONSTRUCTION(curl.size() == num_dofs, + "vector curl evaluation returned the wrong number of DOFs"); + for (std::size_t dof = 0; dof < num_dofs; ++dof) { + for (std::size_t component = 0; component < 3u; ++component) { + curls_out[(dof * 3u + component) * output_stride + q] = + curl[dof][component]; + } + } +} + +void write_vector_divergence_strided(const std::vector& divergence, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT divergence_out) { + if (divergence_out == nullptr) { + return; + } + BASIS_CHECK_CONSTRUCTION(divergence.size() == num_dofs, + "vector divergence evaluation returned the wrong number of DOFs"); + for (std::size_t dof = 0; dof < num_dofs; ++dof) { + divergence_out[dof * output_stride + q] = divergence[dof]; + } +} + +void write_curl_and_divergence_from_jacobians_strided( + const std::vector& jacobians, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out) { + BASIS_CHECK_CONSTRUCTION(jacobians.size() == num_dofs, + "vector Jacobian evaluation returned the wrong number of DOFs"); + for (std::size_t dof = 0; dof < num_dofs; ++dof) { + const auto& J = jacobians[dof]; + if (curls_out != nullptr) { + const Vec3 curl = curl_from_jacobian(J); + for (std::size_t component = 0; component < 3u; ++component) { + curls_out[(dof * 3u + component) * output_stride + q] = + curl[component]; + } + } + if (divergence_out != nullptr) { + divergence_out[dof * output_stride + q] = divergence_from_jacobian(J); + } + } +} + +Vec3 lerp(const Vec3& a, const Vec3& b, Real s) { + const Real t = (s + Real(1)) * Real(0.5); + return a * (Real(1) - t) + b * t; +} + +Vec3 bilinear(const std::array& v, Real u, Real w) { + const Real N0 = Real(0.25) * (Real(1) - u) * (Real(1) - w); + const Real N1 = Real(0.25) * (Real(1) + u) * (Real(1) - w); + const Real N2 = Real(0.25) * (Real(1) + u) * (Real(1) + w); + const Real N3 = Real(0.25) * (Real(1) - u) * (Real(1) + w); + return v[0] * N0 + v[1] * N1 + v[2] * N2 + v[3] * N3; +} + +Vec3 bilinear_du(const std::array& v, Real u, Real w) { + (void)u; + const Real dN0 = -Real(0.25) * (Real(1) - w); + const Real dN1 = Real(0.25) * (Real(1) - w); + const Real dN2 = Real(0.25) * (Real(1) + w); + const Real dN3 = -Real(0.25) * (Real(1) + w); + return v[0] * dN0 + v[1] * dN1 + v[2] * dN2 + v[3] * dN3; +} + +Vec3 bilinear_dw(const std::array& v, Real u, Real w) { + (void)w; + const Real dN0 = -Real(0.25) * (Real(1) - u); + const Real dN1 = -Real(0.25) * (Real(1) + u); + const Real dN2 = Real(0.25) * (Real(1) + u); + const Real dN3 = Real(0.25) * (Real(1) - u); + return v[0] * dN0 + v[1] * dN1 + v[2] * dN2 + v[3] * dN3; +} + +Vec3 cross3(const Vec3& a, const Vec3& b) { + return Vec3{a[1] * b[2] - a[2] * b[1], + a[2] * b[0] - a[0] * b[2], + a[0] * b[1] - a[1] * b[0]}; +} + +Vec3 normalize3(const Vec3& v) { + const Real n = v.norm(); + BASIS_CHECK_CONSTRUCTION(n > std::numeric_limits::epsilon(), + "normalize3: zero-length vector"); + return v / n; +} + +std::array component_monomial_power_limits( + const std::vector>& candidates) { + std::array limits{{0, 0, 0}}; + for (const auto& mono : candidates) { + limits[0] = std::max(limits[0], mono[1]); + limits[1] = std::max(limits[1], mono[2]); + limits[2] = std::max(limits[2], mono[3]); + } + return limits; +} + +std::size_t triangle_poly_dim(std::size_t k) { + return (k + 1u) * (k + 2u) / 2u; +} + +std::size_t tetra_poly_dim(std::size_t k) { + return (k + 1u) * (k + 2u) * (k + 3u) / 6u; +} + +std::size_t rt_wedge_size(int order) { + const std::size_t k = static_cast(order); + const std::size_t face_dofs = + 2u * triangle_poly_dim(k) + 3u * (k + 1u) * (k + 1u); + const std::size_t interior_dofs = + (k >= 1u) ? (3u * k * (k + 1u) * (k + 1u) / 2u) : 0u; + return face_dofs + interior_dofs; +} + +std::size_t rt_pyramid_size(int order) { + const std::size_t k = static_cast(order); + const std::size_t face_dofs = (k + 1u) * (k + 1u) + 4u * triangle_poly_dim(k); + const std::size_t interior_dofs = (k >= 1u) ? (3u * k * k * k) : 0u; + return face_dofs + interior_dofs; +} + +std::size_t nd_wedge_size(int order) { + const std::size_t k = static_cast(order); + const std::size_t edge_dofs = 9u * (k + 1u); + const std::size_t face_dofs = (k >= 1u) ? (8u * k * (k + 1u)) : 0u; + const std::size_t interior_dofs = + (k >= 2u) ? (3u * k * (k - 1u) * (k + 1u) / 2u) : 0u; + return edge_dofs + face_dofs + interior_dofs; +} + +std::size_t nd_pyramid_size(int order) { + const std::size_t k = static_cast(order); + const std::size_t edge_dofs = 8u * (k + 1u); + const std::size_t face_dofs = (k >= 1u) ? (6u * k * (k + 1u)) : 0u; + const std::size_t interior_dofs = + (k >= 2u) ? (k * (k - 1u) * (k + 1u) / 2u) : 0u; + return edge_dofs + face_dofs + interior_dofs; +} + +void ensure_supported_hybrid_vector_order(ElementType type, + int order, + const char* family_name) { + (void)type; + (void)order; + (void)family_name; +} + +std::vector> make_component_monomial_candidates( + int max_total_degree) { + BASIS_CHECK_CONSTRUCTION(max_total_degree >= 0, + "make_component_monomial_candidates: negative total degree"); + + std::vector> candidates; + for (int component = 0; component < 3; ++component) { + for (int total = 0; total <= max_total_degree; ++total) { + for (int pz = 0; pz <= total; ++pz) { + for (int py = 0; py <= total - pz; ++py) { + const int px = total - py - pz; + candidates.push_back({component, px, py, pz}); + } + } + } + } + return candidates; +} + +std::vector> make_rt_extra_monomial_candidates(ElementType type, + int order) { + if (order >= 3) { + return make_component_monomial_candidates(3 * order); + } + + std::vector> candidates; + if (!is_pyramid(type) || order != 2) { + return candidates; + } + + for (int component = 0; component < 3; ++component) { + for (int pz = 0; pz <= 2; ++pz) { + for (int py = 0; py <= 2 - pz; ++py) { + for (int px = 0; px <= 2 - py - pz; ++px) { + candidates.push_back({component, px, py, pz}); + } + } + } + } + return candidates; +} + +Real eval_transformed_rt_monomial_scalar(const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz) { + return px[static_cast(mono[1])] * + py[static_cast(mono[2])] * + pz[static_cast(mono[3])]; +} + +Real eval_transformed_rt_monomial_divergence(const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz) { + const int component = mono[0]; + const int px_pow = mono[1]; + const int py_pow = mono[2]; + const int pz_pow = mono[3]; + + if (component == 0) { + if (px_pow == 0) { + return Real(0); + } + return Real(px_pow) * + px[static_cast(px_pow - 1)] * + py[static_cast(py_pow)] * + pz[static_cast(pz_pow)]; + } + if (component == 1) { + if (py_pow == 0) { + return Real(0); + } + return Real(py_pow) * + px[static_cast(px_pow)] * + py[static_cast(py_pow - 1)] * + pz[static_cast(pz_pow)]; + } + if (pz_pow == 0) { + return Real(0); + } + return Real(pz_pow) * + px[static_cast(px_pow)] * + py[static_cast(py_pow)] * + pz[static_cast(pz_pow - 1)]; +} + +void add_component_monomial_jacobian(VectorJacobian& J, + int component, + int px_pow, + int py_pow, + int pz_pow, + Real coefficient, + const std::vector& px, + const std::vector& py, + const std::vector& pz) { + const auto comp = static_cast(component); + if (px_pow > 0) { + J(comp, 0) += coefficient * Real(px_pow) * + px[static_cast(px_pow - 1)] * + py[static_cast(py_pow)] * + pz[static_cast(pz_pow)]; + } + if (py_pow > 0) { + J(comp, 1) += coefficient * Real(py_pow) * + px[static_cast(px_pow)] * + py[static_cast(py_pow - 1)] * + pz[static_cast(pz_pow)]; + } + if (pz_pow > 0) { + J(comp, 2) += coefficient * Real(pz_pow) * + px[static_cast(px_pow)] * + py[static_cast(py_pow)] * + pz[static_cast(pz_pow - 1)]; + } +} + +VectorJacobian eval_transformed_component_monomial_jacobian( + const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz) { + VectorJacobian J{}; + add_component_monomial_jacobian( + J, mono[0], mono[1], mono[2], mono[3], Real(1), px, py, pz); + return J; +} + +void add_component_monomial_curl(Vec3& curl, + int component, + int px_pow, + int py_pow, + int pz_pow, + Real coefficient, + const std::vector& px, + const std::vector& py, + const std::vector& pz) { + const Real dphidx = (px_pow == 0) + ? Real(0) + : coefficient * Real(px_pow) * + px[static_cast(px_pow - 1)] * + py[static_cast(py_pow)] * + pz[static_cast(pz_pow)]; + const Real dphidy = (py_pow == 0) + ? Real(0) + : coefficient * Real(py_pow) * + px[static_cast(px_pow)] * + py[static_cast(py_pow - 1)] * + pz[static_cast(pz_pow)]; + const Real dphidz = (pz_pow == 0) + ? Real(0) + : coefficient * Real(pz_pow) * + px[static_cast(px_pow)] * + py[static_cast(py_pow)] * + pz[static_cast(pz_pow - 1)]; + + if (component == 0) { + curl[1] += dphidz; + curl[2] -= dphidy; + } else if (component == 1) { + curl[0] -= dphidz; + curl[2] += dphidx; + } else { + curl[0] += dphidy; + curl[1] -= dphidx; + } +} + +std::vector> make_nd_extra_monomial_candidates(ElementType, + int order) { + if (order >= 3) { + return make_component_monomial_candidates(3 * order); + } + + std::vector> candidates; + const int max_total_degree = (order == 1) ? 4 : 5; + for (int component = 0; component < 3; ++component) { + for (int total = 0; total <= max_total_degree; ++total) { + for (int pz = 0; pz <= total; ++pz) { + for (int py = 0; py <= total - pz; ++py) { + const int px = total - py - pz; + candidates.push_back({component, px, py, pz}); + } + } + } + } + return candidates; +} + +Real eval_transformed_nd_monomial_scalar(const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz) { + return px[static_cast(mono[1])] * + py[static_cast(mono[2])] * + pz[static_cast(mono[3])]; +} + +Vec3 eval_transformed_nd_monomial_curl(const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz) { + const int component = mono[0]; + const int px_pow = mono[1]; + const int py_pow = mono[2]; + const int pz_pow = mono[3]; + + const Real dphidx = (px_pow == 0) + ? Real(0) + : Real(px_pow) * + px[static_cast(px_pow - 1)] * + py[static_cast(py_pow)] * + pz[static_cast(pz_pow)]; + const Real dphidy = (py_pow == 0) + ? Real(0) + : Real(py_pow) * + px[static_cast(px_pow)] * + py[static_cast(py_pow - 1)] * + pz[static_cast(pz_pow)]; + const Real dphidz = (pz_pow == 0) + ? Real(0) + : Real(pz_pow) * + px[static_cast(px_pow)] * + py[static_cast(py_pow)] * + pz[static_cast(pz_pow - 1)]; + + if (component == 0) { + return Vec3{Real(0), dphidz, -dphidy}; + } + if (component == 1) { + return Vec3{-dphidz, Real(0), dphidx}; + } + return Vec3{dphidy, -dphidx, Real(0)}; +} + +} // namespace vector_common +} // namespace detail +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.h b/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.h new file mode 100644 index 000000000..e0e6daa10 --- /dev/null +++ b/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.h @@ -0,0 +1,751 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_VECTORBASISEVALUATIONHELPERS_H +#define SVMP_FE_BASIS_VECTORBASISEVALUATIONHELPERS_H + +#include "VectorBasis.h" +#include "Basis/BasisTraits.h" + +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { +namespace detail { +namespace vector_common { + +using Vec3 = math::Vector; + +struct VectorBasisScratch { + std::vector px; + std::vector py; + std::vector pz; + std::vector batched_px; + std::vector batched_py; + std::vector batched_pz; + std::vector candidate_values; + std::vector candidate_dx; + std::vector candidate_dy; + std::vector candidate_dz; + std::vector modal_values_batched; + std::vector modal_jacobians_batched; + std::vector modal_curls_batched; + std::vector modal_divergence_batched; + std::vector vector_values; + std::vector vector_jacobians; + std::vector scalars; + std::vector api_values; + std::vector api_jacobians; + std::vector api_curl; + std::vector api_divergence; + + void prewarm(std::size_t max_size, std::size_t max_qpts) { + const std::size_t batched_size = max_size * std::max(max_qpts, 1u); + px.reserve(max_size); + py.reserve(max_size); + pz.reserve(max_size); + batched_px.reserve(batched_size); + batched_py.reserve(batched_size); + batched_pz.reserve(batched_size); + candidate_values.reserve(max_size); + candidate_dx.reserve(max_size); + candidate_dy.reserve(max_size); + candidate_dz.reserve(max_size); + modal_values_batched.reserve(batched_size * 3u); + modal_jacobians_batched.reserve(batched_size * 9u); + modal_curls_batched.reserve(batched_size * 3u); + modal_divergence_batched.reserve(batched_size); + vector_values.reserve(max_size); + vector_jacobians.reserve(max_size); + scalars.reserve(max_size); + api_values.reserve(max_size); + api_jacobians.reserve(max_size); + api_curl.reserve(max_size); + api_divergence.reserve(max_size); + } +}; + +VectorBasisScratch& vector_basis_scratch(); +void prewarm_vector_basis_scratch(std::size_t max_size, std::size_t max_qpts = 0); + +void fill_powers(Real x, int max_p, std::vector& out); +void fill_power_tables(const Vec3& xi, + const std::array& limits, + VectorBasisScratch& scratch); +void fill_batched_power_tables(const std::vector& points, + const std::array& limits, + VectorBasisScratch& scratch); +void validate_vector_strided_outputs(std::size_t num_qpts, + std::size_t output_stride, + const char* family_name); +void zero_active_strided_rows(Real* output, + std::size_t rows, + std::size_t output_stride, + std::size_t num_qpts); +SparseModalCoefficientMatrix build_sparse_modal_coefficients( + const std::vector& dense_coefficients, + std::size_t rows, + std::size_t cols); +Vec3 curl_from_jacobian(const VectorJacobian& J) noexcept; +Real divergence_from_jacobian(const VectorJacobian& J) noexcept; + +inline Real batched_power_product(const std::vector& px, + const std::vector& py, + const std::vector& pz, + std::size_t stride, + int px_pow, + int py_pow, + int pz_pow, + std::size_t q) noexcept { + return px[static_cast(px_pow) * stride + q] * + py[static_cast(py_pow) * stride + q] * + pz[static_cast(pz_pow) * stride + q]; +} + +inline Real batched_component_partial(const std::vector& px, + const std::vector& py, + const std::vector& pz, + std::size_t stride, + int px_pow, + int py_pow, + int pz_pow, + int derivative_axis, + std::size_t q) noexcept { + if (derivative_axis == 0) { + if (px_pow == 0) { + return Real(0); + } + return Real(px_pow) * + px[static_cast(px_pow - 1) * stride + q] * + py[static_cast(py_pow) * stride + q] * + pz[static_cast(pz_pow) * stride + q]; + } + if (derivative_axis == 1) { + if (py_pow == 0) { + return Real(0); + } + return Real(py_pow) * + px[static_cast(px_pow) * stride + q] * + py[static_cast(py_pow - 1) * stride + q] * + pz[static_cast(pz_pow) * stride + q]; + } + if (pz_pow == 0) { + return Real(0); + } + return Real(pz_pow) * + px[static_cast(px_pow) * stride + q] * + py[static_cast(py_pow) * stride + q] * + pz[static_cast(pz_pow - 1) * stride + q]; +} + +inline Vec3 curl_from_component_gradient(int component, + Real dphidx, + Real dphidy, + Real dphidz) noexcept { + if (component == 0) { + return Vec3{Real(0), dphidz, -dphidy}; + } + if (component == 1) { + return Vec3{-dphidz, Real(0), dphidx}; + } + return Vec3{dphidy, -dphidx, Real(0)}; +} + +inline void axpy_qpoints(Real* target, + const Real* source, + Real coefficient, + std::size_t num_qpts) noexcept { + for (std::size_t q = 0; q < num_qpts; ++q) { + target[q] += coefficient * source[q]; + } +} + +void write_vector_values_strided(const std::vector& values, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT values_out); +void write_vector_jacobians_strided(const std::vector& jacobians, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT jacobians_out); +void write_vector_curl_strided(const std::vector& curl, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT curls_out); +void write_vector_divergence_strided(const std::vector& divergence, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT divergence_out); +void write_curl_and_divergence_from_jacobians_strided( + const std::vector& jacobians, + std::size_t num_dofs, + std::size_t output_stride, + std::size_t q, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out); + +template +void evaluate_vector_public_api_strided( + const BasisLike& basis, + const std::vector& points, + std::size_t output_stride, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out, + bool use_direct_curl, + bool use_direct_divergence, + const char* family_name) { + const std::size_t num_qpts = points.size(); + const std::size_t num_dofs = basis.size(); + validate_vector_strided_outputs(num_qpts, output_stride, family_name); + + auto& scratch = vector_basis_scratch(); + for (std::size_t q = 0; q < num_qpts; ++q) { + if (values_out != nullptr) { + basis.evaluate_vector_values(points[q], scratch.api_values); + write_vector_values_strided( + scratch.api_values, num_dofs, output_stride, q, values_out); + } + + const bool needs_jacobians = + jacobians_out != nullptr || + (curls_out != nullptr && !use_direct_curl) || + (divergence_out != nullptr && !use_direct_divergence); + + if (needs_jacobians) { + basis.evaluate_vector_jacobians(points[q], scratch.api_jacobians); + write_vector_jacobians_strided( + scratch.api_jacobians, num_dofs, output_stride, q, jacobians_out); + write_curl_and_divergence_from_jacobians_strided( + scratch.api_jacobians, + num_dofs, + output_stride, + q, + curls_out, + divergence_out); + continue; + } + + if (curls_out != nullptr) { + basis.evaluate_curl(points[q], scratch.api_curl); + write_vector_curl_strided( + scratch.api_curl, num_dofs, output_stride, q, curls_out); + } + if (divergence_out != nullptr) { + basis.evaluate_divergence(points[q], scratch.api_divergence); + write_vector_divergence_strided( + scratch.api_divergence, num_dofs, output_stride, q, divergence_out); + } + } +} + +Vec3 lerp(const Vec3& a, const Vec3& b, Real s); +Vec3 bilinear(const std::array& v, Real u, Real w); +Vec3 bilinear_du(const std::array& v, Real u, Real w); +Vec3 bilinear_dw(const std::array& v, Real u, Real w); +Vec3 cross3(const Vec3& a, const Vec3& b); +Vec3 normalize3(const Vec3& v); + +template +std::array modal_power_limits(const ModalPolynomials& monomials) { + std::array limits{{0, 0, 0}}; + for (const auto& poly : monomials) { + for (int t = 0; t < poly.num_terms; ++t) { + const auto& m = poly.terms[static_cast(t)]; + limits[0] = std::max(limits[0], m.px); + limits[1] = std::max(limits[1], m.py); + limits[2] = std::max(limits[2], m.pz); + } + } + return limits; +} + +std::array component_monomial_power_limits( + const std::vector>& candidates); +std::size_t triangle_poly_dim(std::size_t k); +std::size_t tetra_poly_dim(std::size_t k); +std::size_t rt_wedge_size(int order); +std::size_t rt_pyramid_size(int order); +std::size_t nd_wedge_size(int order); +std::size_t nd_pyramid_size(int order); +void ensure_supported_hybrid_vector_order(ElementType type, + int order, + const char* family_name); +std::vector> make_component_monomial_candidates(int max_total_degree); +std::vector> make_rt_extra_monomial_candidates(ElementType type, + int order); +Real eval_transformed_rt_monomial_scalar(const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz); +Real eval_transformed_rt_monomial_divergence(const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz); + +void add_component_monomial_jacobian(VectorJacobian& J, + int component, + int px_pow, + int py_pow, + int pz_pow, + Real coefficient, + const std::vector& px, + const std::vector& py, + const std::vector& pz); +VectorJacobian eval_transformed_component_monomial_jacobian( + const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz); +void add_component_monomial_curl(Vec3& curl, + int component, + int px_pow, + int py_pow, + int pz_pow, + Real coefficient, + const std::vector& px, + const std::vector& py, + const std::vector& pz); + +template +void evaluate_nodal_modal_vector_values_with_limits(const ModalPolynomials& monomials, + const SparseModalCoefficientMatrix& sparse_coeffs, + std::size_t n, + const Vec3& xi, + const std::array& power_limits, + std::vector& values) { + values.assign(n, Vec3{}); + + auto& scratch = vector_basis_scratch(); + fill_power_tables(xi, power_limits, scratch); + const auto& px = scratch.px; + const auto& py = scratch.py; + const auto& pz = scratch.pz; + + auto& modal_vals = scratch.vector_values; + modal_vals.assign(n, Vec3{}); + for (std::size_t p = 0; p < n; ++p) { + const auto& poly = monomials[p]; + auto& v = modal_vals[p]; + for (int t = 0; t < poly.num_terms; ++t) { + const auto& m = poly.terms[static_cast(t)]; + const Real mv = + px[static_cast(m.px)] * + py[static_cast(m.py)] * + pz[static_cast(m.pz)]; + v[static_cast(m.component)] += m.coefficient * mv; + } + } + + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && + sparse_coeffs.cols == n && + sparse_coeffs.row_offsets.size() == n + 1u, + "evaluate_nodal_modal_vector_values: sparse coefficient size mismatch"); + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), + "evaluate_nodal_modal_vector_values: sparse coefficient entry mismatch"); + for (std::size_t p = 0; p < n; ++p) { + const Vec3& mv = modal_vals[p]; + const std::size_t row_begin = sparse_coeffs.row_offsets[p]; + const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; + for (std::size_t entry = row_begin; entry < row_end; ++entry) { + const std::size_t dof = sparse_coeffs.dofs[entry]; + const Real c = sparse_coeffs.coefficients[entry]; + values[dof][0] += c * mv[0]; + values[dof][1] += c * mv[1]; + values[dof][2] += c * mv[2]; + } + } +} + +template +void evaluate_nodal_modal_vector_jacobians_with_limits(const ModalPolynomials& monomials, + const SparseModalCoefficientMatrix& sparse_coeffs, + std::size_t n, + const Vec3& xi, + const std::array& power_limits, + std::vector& jacobians) { + jacobians.assign(n, VectorJacobian{}); + + auto& scratch = vector_basis_scratch(); + fill_power_tables(xi, power_limits, scratch); + const auto& px = scratch.px; + const auto& py = scratch.py; + const auto& pz = scratch.pz; + + auto& modal_jacs = scratch.vector_jacobians; + modal_jacs.assign(n, VectorJacobian{}); + for (std::size_t p = 0; p < n; ++p) { + const auto& poly = monomials[p]; + auto& J = modal_jacs[p]; + for (int t = 0; t < poly.num_terms; ++t) { + const auto& m = poly.terms[static_cast(t)]; + add_component_monomial_jacobian(J, m.component, m.px, m.py, m.pz, m.coefficient, px, py, pz); + } + } + + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && + sparse_coeffs.cols == n && + sparse_coeffs.row_offsets.size() == n + 1u, + "evaluate_nodal_modal_vector_jacobians: sparse coefficient size mismatch"); + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), + "evaluate_nodal_modal_vector_jacobians: sparse coefficient entry mismatch"); + for (std::size_t p = 0; p < n; ++p) { + const auto& Jp = modal_jacs[p]; + const std::size_t row_begin = sparse_coeffs.row_offsets[p]; + const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; + for (std::size_t entry = row_begin; entry < row_end; ++entry) { + const std::size_t dof = sparse_coeffs.dofs[entry]; + const Real c = sparse_coeffs.coefficients[entry]; + for (std::size_t r = 0; r < 3; ++r) { + for (std::size_t col = 0; col < 3; ++col) { + jacobians[dof](r, col) += c * Jp(r, col); + } + } + } + } +} + +template +void evaluate_nodal_modal_vector_curl_with_limits(const ModalPolynomials& monomials, + const SparseModalCoefficientMatrix& sparse_coeffs, + std::size_t n, + const Vec3& xi, + const std::array& power_limits, + std::vector& curl) { + curl.assign(n, Vec3{}); + + auto& scratch = vector_basis_scratch(); + fill_power_tables(xi, power_limits, scratch); + const auto& px = scratch.px; + const auto& py = scratch.py; + const auto& pz = scratch.pz; + + auto& modal_curl = scratch.vector_values; + modal_curl.assign(n, Vec3{}); + for (std::size_t p = 0; p < n; ++p) { + const auto& poly = monomials[p]; + auto& c = modal_curl[p]; + for (int t = 0; t < poly.num_terms; ++t) { + const auto& m = poly.terms[static_cast(t)]; + add_component_monomial_curl(c, m.component, m.px, m.py, m.pz, m.coefficient, px, py, pz); + } + } + + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && + sparse_coeffs.cols == n && + sparse_coeffs.row_offsets.size() == n + 1u, + "evaluate_nodal_modal_vector_curl: sparse coefficient size mismatch"); + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), + "evaluate_nodal_modal_vector_curl: sparse coefficient entry mismatch"); + for (std::size_t p = 0; p < n; ++p) { + const Vec3& cm = modal_curl[p]; + const std::size_t row_begin = sparse_coeffs.row_offsets[p]; + const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; + for (std::size_t entry = row_begin; entry < row_end; ++entry) { + const std::size_t dof = sparse_coeffs.dofs[entry]; + const Real c = sparse_coeffs.coefficients[entry]; + curl[dof][0] += c * cm[0]; + curl[dof][1] += c * cm[1]; + curl[dof][2] += c * cm[2]; + } + } +} + +template +void evaluate_nodal_modal_divergence_with_limits(const ModalPolynomials& monomials, + const SparseModalCoefficientMatrix& sparse_coeffs, + std::size_t n, + const Vec3& xi, + const std::array& power_limits, + std::vector& divergence) { + divergence.assign(n, Real(0)); + + auto& scratch = vector_basis_scratch(); + fill_power_tables(xi, power_limits, scratch); + const auto& px = scratch.px; + const auto& py = scratch.py; + const auto& pz = scratch.pz; + + auto& modal_divergence = scratch.scalars; + modal_divergence.assign(n, Real(0)); + for (std::size_t p = 0; p < n; ++p) { + const auto& poly = monomials[p]; + Real div = Real(0); + for (int t = 0; t < poly.num_terms; ++t) { + const auto& m = poly.terms[static_cast(t)]; + if (m.component == 0 && m.px > 0) { + div += m.coefficient * Real(m.px) * + px[static_cast(m.px - 1)] * + py[static_cast(m.py)] * + pz[static_cast(m.pz)]; + } else if (m.component == 1 && m.py > 0) { + div += m.coefficient * Real(m.py) * + px[static_cast(m.px)] * + py[static_cast(m.py - 1)] * + pz[static_cast(m.pz)]; + } else if (m.component == 2 && m.pz > 0) { + div += m.coefficient * Real(m.pz) * + px[static_cast(m.px)] * + py[static_cast(m.py)] * + pz[static_cast(m.pz - 1)]; + } + } + modal_divergence[p] = div; + } + + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && + sparse_coeffs.cols == n && + sparse_coeffs.row_offsets.size() == n + 1u, + "evaluate_nodal_modal_divergence: sparse coefficient size mismatch"); + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), + "evaluate_nodal_modal_divergence: sparse coefficient entry mismatch"); + for (std::size_t p = 0; p < n; ++p) { + const Real div = modal_divergence[p]; + if (div == Real(0)) { + continue; + } + const std::size_t row_begin = sparse_coeffs.row_offsets[p]; + const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; + for (std::size_t entry = row_begin; entry < row_end; ++entry) { + divergence[sparse_coeffs.dofs[entry]] += + sparse_coeffs.coefficients[entry] * div; + } + } +} + +template +void evaluate_nodal_modal_vector_strided_with_limits( + const ModalPolynomials& monomials, + const SparseModalCoefficientMatrix& sparse_coeffs, + std::size_t n, + const std::vector& points, + std::size_t output_stride, + const std::array& power_limits, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT jacobians_out, + Real* SVMP_RESTRICT curls_out, + Real* SVMP_RESTRICT divergence_out, + const char* family_name) { + const std::size_t num_qpts = points.size(); + validate_vector_strided_outputs(num_qpts, output_stride, family_name); + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && + sparse_coeffs.cols == n && + sparse_coeffs.row_offsets.size() == n + 1u, + "evaluate_nodal_modal_vector_strided: sparse coefficient size mismatch"); + BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), + "evaluate_nodal_modal_vector_strided: sparse coefficient entry mismatch"); + + auto& scratch = vector_basis_scratch(); + const bool need_values = values_out != nullptr; + const bool need_jacobians = jacobians_out != nullptr; + const bool need_curls = curls_out != nullptr; + const bool need_divergence = divergence_out != nullptr; + + if (need_values) { + zero_active_strided_rows(values_out, n * 3u, output_stride, num_qpts); + } + if (need_jacobians) { + zero_active_strided_rows(jacobians_out, n * 9u, output_stride, num_qpts); + } + if (need_curls) { + zero_active_strided_rows(curls_out, n * 3u, output_stride, num_qpts); + } + if (need_divergence) { + zero_active_strided_rows(divergence_out, n, output_stride, num_qpts); + } + if (num_qpts == 0 || n == 0) { + return; + } + + fill_batched_power_tables(points, power_limits, scratch); + const auto& px = scratch.batched_px; + const auto& py = scratch.batched_py; + const auto& pz = scratch.batched_pz; + const std::size_t power_stride = num_qpts; + const bool need_modal_gradient = need_jacobians || need_curls || need_divergence; + + auto& modal_values = scratch.modal_values_batched; + auto& modal_jacobians = scratch.modal_jacobians_batched; + auto& modal_curls = scratch.modal_curls_batched; + auto& modal_divergence = scratch.modal_divergence_batched; + + for (std::size_t p = 0; p < n; ++p) { + if (need_values) { + modal_values.assign(3u * num_qpts, Real(0)); + } + if (need_jacobians) { + modal_jacobians.assign(9u * num_qpts, Real(0)); + } + if (need_curls) { + modal_curls.assign(3u * num_qpts, Real(0)); + } + if (need_divergence) { + modal_divergence.assign(num_qpts, Real(0)); + } + + const auto& poly = monomials[p]; + for (int term_index = 0; term_index < poly.num_terms; ++term_index) { + const auto& term = poly.terms[static_cast(term_index)]; + const std::size_t component = static_cast(term.component); + Real* modal_value_row = need_values + ? modal_values.data() + component * num_qpts + : nullptr; + Real* modal_jacobian_row = need_jacobians + ? modal_jacobians.data() + component * 3u * num_qpts + : nullptr; + Real* modal_curl_rows = need_curls ? modal_curls.data() : nullptr; + Real* modal_divergence_row = + need_divergence ? modal_divergence.data() : nullptr; + + if (need_values) { + for (std::size_t q = 0; q < num_qpts; ++q) { + modal_value_row[q] += + term.coefficient * + batched_power_product(px, + py, + pz, + power_stride, + term.px, + term.py, + term.pz, + q); + } + } + + if (need_modal_gradient) { + for (std::size_t q = 0; q < num_qpts; ++q) { + const Real dphidx = + term.coefficient * + batched_component_partial(px, + py, + pz, + power_stride, + term.px, + term.py, + term.pz, + 0, + q); + const Real dphidy = + term.coefficient * + batched_component_partial(px, + py, + pz, + power_stride, + term.px, + term.py, + term.pz, + 1, + q); + const Real dphidz = + term.coefficient * + batched_component_partial(px, + py, + pz, + power_stride, + term.px, + term.py, + term.pz, + 2, + q); + + if (need_jacobians) { + modal_jacobian_row[q] += dphidx; + modal_jacobian_row[num_qpts + q] += dphidy; + modal_jacobian_row[2u * num_qpts + q] += dphidz; + } + if (need_curls) { + const Vec3 curl = + curl_from_component_gradient(term.component, + dphidx, + dphidy, + dphidz); + modal_curl_rows[q] += curl[0]; + modal_curl_rows[num_qpts + q] += curl[1]; + modal_curl_rows[2u * num_qpts + q] += curl[2]; + } + if (need_divergence) { + const Real div = term.component == 0 ? dphidx + : term.component == 1 ? dphidy + : dphidz; + modal_divergence_row[q] += div; + } + } + } + } + + const std::size_t row_begin = sparse_coeffs.row_offsets[p]; + const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; + for (std::size_t entry = row_begin; entry < row_end; ++entry) { + const std::size_t dof = sparse_coeffs.dofs[entry]; + const Real c = sparse_coeffs.coefficients[entry]; + if (need_values) { + for (std::size_t component = 0; component < 3u; ++component) { + axpy_qpoints(values_out + (dof * 3u + component) * output_stride, + modal_values.data() + component * num_qpts, + c, + num_qpts); + } + } + if (need_jacobians) { + for (std::size_t row = 0; row < 3u; ++row) { + for (std::size_t col = 0; col < 3u; ++col) { + axpy_qpoints(jacobians_out + + (dof * 9u + row * 3u + col) * output_stride, + modal_jacobians.data() + + (row * 3u + col) * num_qpts, + c, + num_qpts); + } + } + } + if (need_curls) { + for (std::size_t component = 0; component < 3u; ++component) { + axpy_qpoints(curls_out + (dof * 3u + component) * output_stride, + modal_curls.data() + component * num_qpts, + c, + num_qpts); + } + } + if (need_divergence) { + axpy_qpoints(divergence_out + dof * output_stride, + modal_divergence.data(), + c, + num_qpts); + } + } + } +} + +std::vector> make_nd_extra_monomial_candidates(ElementType type, + int order); +Real eval_transformed_nd_monomial_scalar(const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz); +Vec3 eval_transformed_nd_monomial_curl(const std::array& mono, + const std::vector& px, + const std::vector& py, + const std::vector& pz); + + +} // namespace vector_common +} // namespace detail +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_VECTORBASISEVALUATIONHELPERS_H diff --git a/Code/Source/solver/FE/Basis/VectorBasisModalPolynomial.h b/Code/Source/solver/FE/Basis/VectorBasisModalPolynomial.h new file mode 100644 index 000000000..6e1a7202b --- /dev/null +++ b/Code/Source/solver/FE/Basis/VectorBasisModalPolynomial.h @@ -0,0 +1,77 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_BASIS_VECTORBASISMODALPOLYNOMIAL_H +#define SVMP_FE_BASIS_VECTORBASISMODALPOLYNOMIAL_H + +#include "Types.h" + +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { + +struct VectorBasisModalTerm { + int component{0}; // 0=x, 1=y, 2=z + int px{0}; + int py{0}; + int pz{0}; + Real coefficient{Real(1)}; +}; + +struct VectorBasisModalPolynomial { + std::array terms{}; + int num_terms{0}; +}; + +inline bool modal_terms_equal(const VectorBasisModalTerm& lhs, + const VectorBasisModalTerm& rhs) noexcept { + return lhs.component == rhs.component && + lhs.px == rhs.px && + lhs.py == rhs.py && + lhs.pz == rhs.pz && + lhs.coefficient == rhs.coefficient; +} + +inline bool modal_polynomials_equal(const VectorBasisModalPolynomial& lhs, + const VectorBasisModalPolynomial& rhs) noexcept { + if (lhs.num_terms != rhs.num_terms) { + return false; + } + for (int term = 0; term < lhs.num_terms; ++term) { + const auto index = static_cast(term); + if (!modal_terms_equal(lhs.terms[index], rhs.terms[index])) { + return false; + } + } + return true; +} + +inline bool append_unique_modal_polynomial( + std::vector& polynomials, + const VectorBasisModalPolynomial& polynomial) { + const auto found = std::find_if( + polynomials.begin(), + polynomials.end(), + [&](const VectorBasisModalPolynomial& existing) { + return modal_polynomials_equal(existing, polynomial); + }); + if (found != polynomials.end()) { + return false; + } + polynomials.push_back(polynomial); + return true; +} + +} // namespace basis +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_BASIS_VECTORBASISMODALPOLYNOMIAL_H diff --git a/Code/Source/solver/FE/Common/Alignment.h b/Code/Source/solver/FE/Common/Alignment.h new file mode 100644 index 000000000..8d33a7a7a --- /dev/null +++ b/Code/Source/solver/FE/Common/Alignment.h @@ -0,0 +1,23 @@ +#ifndef SVMP_FE_CORE_ALIGNMENT_H +#define SVMP_FE_CORE_ALIGNMENT_H + +/** + * @file Alignment.h + * @brief Global alignment constants used across FE modules. + */ + +#include + +namespace svmp { +namespace FE { + +/// Preferred cache-line/SIMD alignment for performance-critical arrays. +inline constexpr std::size_t kFEPreferredAlignmentBytes = 64u; + +/// Alignment for small fixed-size math objects that are commonly passed by value. +inline constexpr std::size_t kFEFixedObjectAlignmentBytes = 32u; + +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_CORE_ALIGNMENT_H diff --git a/Code/Source/solver/FE/Common/Types.h b/Code/Source/solver/FE/Common/Types.h new file mode 100644 index 000000000..60312a524 --- /dev/null +++ b/Code/Source/solver/FE/Common/Types.h @@ -0,0 +1,532 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See Copyright-SimVascular.txt for additional details. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject + * to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SVMP_FE_TYPES_H +#define SVMP_FE_TYPES_H + +/** + * @file Types.h + * @brief Fundamental type definitions for the finite element library + * + * This header provides core type aliases, enumerations, and strong type + * definitions used throughout the FE library. It establishes a consistent + * type system that integrates with the Mesh library while maintaining + * independence from backend-specific types. + */ + +#if defined(SVMP_FE_WITH_MESH) && SVMP_FE_WITH_MESH +# include "Mesh/Core/MeshTypes.h" +# define SVMP_FE_HAS_MESH_TYPES 1 +#else +// Build FE without Mesh types unless explicitly enabled. +# define SVMP_FE_HAS_MESH_TYPES 0 +#endif + +#if !SVMP_FE_HAS_MESH_TYPES +namespace svmp { +// Minimal fallback when the Mesh library is not available. +// Keeps FE compilation self-contained while preserving the same namespace. +#ifndef SVMP_CELL_FAMILY_DEFINED +#define SVMP_CELL_FAMILY_DEFINED 1 +enum class CellFamily { + Point, + Line, + Triangle, + Quad, + Tetra, + Hex, + Wedge, + Pyramid, + Polygon, + Polyhedron +}; +#endif +} // namespace svmp +#endif +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +# define SVMP_RESTRICT __restrict +#elif defined(__clang__) || defined(__GNUC__) +# define SVMP_RESTRICT __restrict__ +#else +# define SVMP_RESTRICT +#endif + +namespace svmp { +namespace FE { + +// ============================================================================ +// Index Types +// ============================================================================ + +/** + * @brief Local index type for element-level operations + * + * Used for local node numbering within elements, local DOF indices, + * and other element-local indexing. Unsigned for safety. + */ +using LocalIndex = std::uint32_t; + +/** + * @brief Global index type for distributed DOF numbering + * + * Signed 64-bit for compatibility with PETSc and Trilinos. + * Negative values can indicate special conditions or invalid indices. + */ +using GlobalIndex = std::int64_t; + +/** + * @brief DOF-specific index type + * + * Strong type alias to prevent mixing DOF indices with other indices. + * Provides type safety at compile time. + */ +struct DofIndex { + GlobalIndex value; + + constexpr explicit DofIndex(GlobalIndex v = -1) noexcept : value(v) {} + constexpr operator GlobalIndex() const noexcept { return value; } + constexpr bool is_valid() const noexcept { return value >= 0; } +}; + +/** + * @brief Field identifier type + * + * Used to distinguish between different physical fields in multi-field problems. + */ +using FieldId = std::uint16_t; + +/** + * @brief Block identifier for block-structured systems + */ +using BlockId = std::uint16_t; + +// Import mesh library scalar/index types when available (optional dependency). +#if SVMP_FE_HAS_MESH_TYPES +using MeshIndex = svmp::index_t; +using MeshOffset = svmp::offset_t; +using MeshGlobalId = svmp::gid_t; +using Real = svmp::real_t; // Use same precision as Mesh library +#else +using MeshIndex = std::int32_t; +using MeshOffset = std::int64_t; +using MeshGlobalId = std::int64_t; +using Real = double; +#endif + +// ============================================================================ +// Constants +// ============================================================================ + +constexpr LocalIndex INVALID_LOCAL_INDEX = std::numeric_limits::max(); +constexpr GlobalIndex INVALID_GLOBAL_INDEX = -1; +constexpr FieldId INVALID_FIELD_ID = std::numeric_limits::max(); +/// Sentinel FieldId for geometry-only quantities (no DOF dependence). +/// Uses first registered field's space for quadrature, but logically decoupled +/// from any specific field's DOFs. +constexpr FieldId GEOMETRY_FIELD_ID = std::numeric_limits::max() - 1; +constexpr BlockId INVALID_BLOCK_ID = std::numeric_limits::max(); + +/** + * @brief Sentinel FieldId representing "the current solution state" in tangent forms. + * + * When differentiating a residual form to obtain the tangent (Jacobian), undifferentiated + * TrialFunction occurrences are rewritten to StateField nodes. Those that represent the + * block's own primary unknown (rather than a named external field) use this sentinel + * FieldId. The assembler maps it to the current solution coefficients at each quadrature + * point, regardless of which physics or field variables are involved. + * + * This is distinct from INVALID_FIELD_ID, which means "uninitialized / no field." + * CURRENT_SOLUTION_FIELD_ID uses the same numeric value for backward compatibility + * with existing KernelIR encodings, but carries explicit semantic intent. + */ +constexpr FieldId CURRENT_SOLUTION_FIELD_ID = std::numeric_limits::max(); + +// ============================================================================ +// Field Value Entry (for point evaluation of field-dependent expressions) +// ============================================================================ + +/// Maximum number of components in a FieldValueEntry (3x3 tensor). +constexpr int MAX_FIELD_VALUE_COMPONENTS = 9; + +/** + * @brief Field value at an evaluation point — scalar, vector, or tensor. + * + * Used by PointEvaluator and the auxiliary assembly path to supply FE + * field values at entity locations (e.g., nodal DOF values for + * Node-scoped auxiliary models with Lagrange Kronecker delta). + */ +struct FieldValueEntry { + FieldId field{INVALID_FIELD_ID}; + int n_components{0}; + Real components[MAX_FIELD_VALUE_COMPONENTS]{}; +}; + +// ============================================================================ +// Element Type Enumerations +// ============================================================================ + +/** + * @brief Reference element types supported by the FE library + * + * Maps to svmp::CellFamily from the Mesh library but provides + * FE-specific categorization including higher-order variants. + */ +enum class ElementType : std::uint8_t { + // Linear elements + Line2 = 0, // 2-node line + Triangle3 = 1, // 3-node triangle + Quad4 = 2, // 4-node quadrilateral + Tetra4 = 3, // 4-node tetrahedron + Hex8 = 4, // 8-node hexahedron + Wedge6 = 5, // 6-node wedge/prism + Pyramid5 = 6, // 5-node pyramid + + // Quadratic elements + Line3 = 10, // 3-node line + Triangle6 = 11, // 6-node triangle + Quad9 = 12, // 9-node quadrilateral (bi-quadratic) + Quad8 = 13, // 8-node quadrilateral (serendipity) + Tetra10 = 14, // 10-node tetrahedron + Hex27 = 15, // 27-node hexahedron (tri-quadratic) + Hex20 = 16, // 20-node hexahedron (serendipity) + Wedge15 = 17, // 15-node wedge + Wedge18 = 18, // 18-node wedge (complete quadratic) + Pyramid13 = 19, // 13-node pyramid + Pyramid14 = 20, // 14-node pyramid + + // Special elements + Point1 = 30, // 1-node point element + + Unknown = 255 +}; + +/** + * @brief Quadrature rule types + */ +enum class QuadratureType : std::uint8_t { + GaussLegendre, // Standard Gaussian quadrature + GaussLobatto, // Includes endpoints (for spectral elements) + Newton, // Newton-Cotes rules + Reduced, // Order-based reduced integration for locking + PositionBased, // Position-based reduced integration (legacy compatible) + Composite, // Composite rules for adaptivity + Custom // User-defined quadrature points +}; + +/** + * @brief Basis function families + */ +enum class BasisType : std::uint8_t { + Lagrange, // Standard nodal Lagrange basis + Hierarchical, // Hierarchical/modal basis + Bernstein, // Bernstein polynomials + NURBS, // Non-uniform rational B-splines + BSpline, // Non-rational B-spline basis + Spectral, // Spectral element basis + Serendipity, // Serendipity elements + Hermite, // Hermite C1 continuity basis + RaviartThomas, // H(div) Raviart-Thomas family + Nedelec, // H(curl) Nedelec edge elements + BDM, // H(div) Brezzi-Douglas-Marini family + Bubble, // Interior bubble functions for enrichment + Custom // User-defined basis +}; + +/** + * @brief Field types for function spaces + */ +enum class FieldType : std::uint8_t { + Scalar, // Scalar field (temperature, pressure) + Vector, // Vector field (velocity, displacement) + Tensor, // Tensor field (stress, strain) + SymmetricTensor, // Symmetric tensor field + Mixed // Mixed/composite field +}; + +/** + * @brief Continuity requirements for function spaces + */ +enum class Continuity : std::uint8_t { + C0, // Continuous (standard FEM) + C1, // C1 continuous (for plates/shells) + L2, // L2 (discontinuous) + H_div, // H(div) conforming + H_curl, // H(curl) conforming + Custom +}; + +/** + * @brief Assembly strategies + */ +enum class AssemblyStrategy : std::uint8_t { + ElementByElement, // Traditional element loop + Vectorized, // SIMD vectorized assembly + MatrixFree, // Matrix-free operators + Hybrid // Mixed strategy +}; + +/** + * @brief Status codes for FE operations + */ +enum class FEStatus : std::uint8_t { + Success = 0, + InvalidArgument = 1, + InvalidElement = 2, + SingularMapping = 3, + QuadratureError = 4, + AssemblyError = 5, + BackendError = 6, + NotImplemented = 7, + ConvergenceError = 8, + AllocationError = 9, + MPIError = 10, + IOError = 11, + Unknown = 255 +}; + +// ============================================================================ +// Geometric Types +// ============================================================================ + +/** + * @brief Point in reference element coordinates + */ +template +using ReferencePoint = std::array(Dim)>; + +/** + * @brief Point in physical coordinates + */ +using PhysicalPoint = std::array; + +/** + * @brief Jacobian matrix type + */ +template +using Jacobian = std::array(ReferenceDim)>, static_cast(SpatialDim)>; + +// ============================================================================ +// Strong Type Wrappers (C++17 idiom for type safety) +// ============================================================================ + +/** + * @brief Strong type wrapper template for type-safe programming + * + * Prevents accidental mixing of conceptually different types that have + * the same underlying representation. + */ +template +class StrongType { +public: + using ValueType = T; + + constexpr StrongType() noexcept(std::is_nothrow_default_constructible_v) + : value_{} {} + + constexpr explicit StrongType(T value) noexcept(std::is_nothrow_move_constructible_v) + : value_(std::move(value)) {} + + constexpr T& get() noexcept { return value_; } + constexpr const T& get() const noexcept { return value_; } + + // Explicit conversion + constexpr explicit operator T() const noexcept { return value_; } + + // Comparison operators + constexpr bool operator==(const StrongType& other) const noexcept { + return value_ == other.value_; + } + constexpr bool operator!=(const StrongType& other) const noexcept { + return value_ != other.value_; + } + constexpr bool operator<(const StrongType& other) const noexcept { + return value_ < other.value_; + } + +private: + T value_; +}; + +// Specific strong types for common use cases +struct QuadraturePointTag {}; +struct QuadratureWeightTag {}; +struct BasisValueTag {}; +struct BasisGradientTag {}; + +using QuadraturePointIndex = StrongType; +using QuadratureWeight = StrongType; + +// ============================================================================ +// Type Traits +// ============================================================================ + +/** + * @brief Check if a type is a valid index type + */ +template +struct is_index_type : std::false_type {}; + +template<> +struct is_index_type : std::true_type {}; + +template<> +struct is_index_type : std::true_type {}; + +template<> +struct is_index_type : std::true_type {}; + +template +inline constexpr bool is_index_type_v = is_index_type::value; + +/** + * @brief Check if a type represents a field type + */ +template +struct is_field_type : std::false_type {}; + +template<> +struct is_field_type : std::true_type {}; + +template +inline constexpr bool is_field_type_v = is_field_type::value; + +// ============================================================================ +// Utility Functions +// ============================================================================ + +/** + * @brief Convert FE ElementType to Mesh CellFamily + */ +constexpr svmp::CellFamily to_mesh_family(ElementType elem) noexcept { + switch(elem) { + case ElementType::Line2: + case ElementType::Line3: + return svmp::CellFamily::Line; + + case ElementType::Triangle3: + case ElementType::Triangle6: + return svmp::CellFamily::Triangle; + + case ElementType::Quad4: + case ElementType::Quad8: + case ElementType::Quad9: + return svmp::CellFamily::Quad; + + case ElementType::Tetra4: + case ElementType::Tetra10: + return svmp::CellFamily::Tetra; + + case ElementType::Hex8: + case ElementType::Hex20: + case ElementType::Hex27: + return svmp::CellFamily::Hex; + + case ElementType::Wedge6: + case ElementType::Wedge15: + case ElementType::Wedge18: + return svmp::CellFamily::Wedge; + + case ElementType::Pyramid5: + case ElementType::Pyramid13: + case ElementType::Pyramid14: + return svmp::CellFamily::Pyramid; + + case ElementType::Point1: + return svmp::CellFamily::Point; + + default: + return svmp::CellFamily::Point; // Fallback + } +} + +/** + * @brief Get spatial dimension of element type + */ +constexpr int element_dimension(ElementType elem) noexcept { + switch(elem) { + case ElementType::Point1: + return 0; + case ElementType::Line2: + case ElementType::Line3: + return 1; + case ElementType::Triangle3: + case ElementType::Triangle6: + case ElementType::Quad4: + case ElementType::Quad8: + case ElementType::Quad9: + return 2; + case ElementType::Tetra4: + case ElementType::Tetra10: + case ElementType::Hex8: + case ElementType::Hex20: + case ElementType::Hex27: + case ElementType::Wedge6: + case ElementType::Wedge15: + case ElementType::Wedge18: + case ElementType::Pyramid5: + case ElementType::Pyramid13: + case ElementType::Pyramid14: + return 3; + default: + return -1; + } +} + +/** + * @brief Convert status code to string for error reporting + */ +inline const char* status_to_string(FEStatus status) noexcept { + switch(status) { + case FEStatus::Success: return "Success"; + case FEStatus::InvalidArgument: return "Invalid argument"; + case FEStatus::InvalidElement: return "Invalid element"; + case FEStatus::SingularMapping: return "Singular mapping"; + case FEStatus::QuadratureError: return "Quadrature error"; + case FEStatus::AssemblyError: return "Assembly error"; + case FEStatus::BackendError: return "Backend error"; + case FEStatus::NotImplemented: return "Not implemented"; + case FEStatus::ConvergenceError: return "Convergence error"; + case FEStatus::AllocationError: return "Allocation error"; + case FEStatus::MPIError: return "MPI error"; + case FEStatus::IOError: return "I/O error"; + default: return "Unknown error"; + } +} + +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_TYPES_H diff --git a/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp b/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp new file mode 100644 index 000000000..7d909fa0c --- /dev/null +++ b/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp @@ -0,0 +1,480 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#include "DenseLinearAlgebra.h" + +#include "FEException.h" + +#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN +#include +#endif + +#include +#include +#include +#include +#include + +#define DENSE_LINALG_CHECK(condition, message) \ + ::svmp::FE::throw_if<::svmp::FE::FEException>(!(condition), SVMP_HERE, (message)) + +namespace svmp { +namespace FE { +namespace math { + +namespace { + +constexpr std::size_t kDenseSolveRhsBlock = 32u; + +void materialize_inverse_from_solver(const DenseLUSolver& solver, + std::vector& inverse) { + const std::size_t n = solver.n; + inverse.assign(n * n, Real(0)); + for (std::size_t diag = 0; diag < n; ++diag) { + inverse[diag * n + diag] = Real(1); + } + solver.solve_in_place(std::span(inverse.data(), inverse.size()), n); +} + +} // namespace + +Real dense_matrix_max_abs(std::span matrix) noexcept { + Real max_abs = Real(0); + for (const Real value : matrix) { + max_abs = std::max(max_abs, std::abs(value)); + } + return max_abs; +} + +Real dense_matrix_pivot_tolerance(std::size_t rows, + std::size_t cols, + Real max_abs, + Real multiplier) noexcept { + const Real size_scale = static_cast(std::max(rows, cols)); + const Real value_scale = std::max(Real(1), max_abs); + return multiplier * std::numeric_limits::epsilon() * + std::max(Real(1), size_scale) * value_scale; +} + +Real dense_matrix_singular_value_tolerance(std::size_t rows, + std::size_t cols, + Real largest_singular_value, + Real multiplier) noexcept { + const Real size_scale = static_cast(std::max(rows, cols)); + return multiplier * std::numeric_limits::epsilon() * + std::max(Real(1), size_scale) * + std::max(Real(1), largest_singular_value); +} + +Real dense_matrix_condition_fallback_threshold() noexcept { + return Real(1.0e12); +} + +Real dense_matrix_condition_error_threshold() noexcept { + return Real(1.0e14); +} + +void DenseLUSolver::solve_in_place(std::span rhs) const { + solve_in_place(rhs, 1u); +} + +void DenseLUSolver::solve_in_place(std::span rhs, + std::size_t rhs_count) const { + DENSE_LINALG_CHECK(rhs_count > 0, + label + ": dense solve requires at least one right-hand side"); + DENSE_LINALG_CHECK(rhs.size() == n * rhs_count, + label + ": dense multi-RHS solve size mismatch"); + DENSE_LINALG_CHECK(lu.size() == n * n && pivots.size() == n, + label + ": dense solver is not factorized"); + + for (std::size_t k = 0; k < n; ++k) { + if (pivots[k] != k) { + for (std::size_t block = 0; block < rhs_count; block += kDenseSolveRhsBlock) { + const std::size_t end = + std::min(rhs_count, block + kDenseSolveRhsBlock); + for (std::size_t r = block; r < end; ++r) { + std::swap(rhs[k * rhs_count + r], + rhs[pivots[k] * rhs_count + r]); + } + } + } + } + + for (std::size_t row = 0; row < n; ++row) { + for (std::size_t col = 0; col < row; ++col) { + const Real factor = lu[row * n + col]; + for (std::size_t block = 0; block < rhs_count; block += kDenseSolveRhsBlock) { + const std::size_t end = + std::min(rhs_count, block + kDenseSolveRhsBlock); + for (std::size_t r = block; r < end; ++r) { + rhs[row * rhs_count + r] -= factor * rhs[col * rhs_count + r]; + } + } + } + } + + for (std::size_t rev = 0; rev < n; ++rev) { + const std::size_t row = n - 1u - rev; + for (std::size_t col = row + 1u; col < n; ++col) { + const Real factor = lu[row * n + col]; + for (std::size_t block = 0; block < rhs_count; block += kDenseSolveRhsBlock) { + const std::size_t end = + std::min(rhs_count, block + kDenseSolveRhsBlock); + for (std::size_t r = block; r < end; ++r) { + rhs[row * rhs_count + r] -= factor * rhs[col * rhs_count + r]; + } + } + } + const Real pivot = lu[row * n + row]; + DENSE_LINALG_CHECK( + std::abs(pivot) > pivot_tolerance, + label + ": zero pivot during dense solve"); + for (std::size_t block = 0; block < rhs_count; block += kDenseSolveRhsBlock) { + const std::size_t end = + std::min(rhs_count, block + kDenseSolveRhsBlock); + for (std::size_t r = block; r < end; ++r) { + rhs[row * rhs_count + r] /= pivot; + } + } + } +} + +std::vector DenseLUSolver::solve(std::span rhs) const { + std::vector x(rhs.begin(), rhs.end()); + solve_in_place(std::span(x.data(), x.size())); + return x; +} + +DenseMatrixDiagnostics dense_matrix_diagnostics( + std::span matrix, + std::size_t rows, + std::size_t cols, + std::string_view label) { + DENSE_LINALG_CHECK(matrix.size() == rows * cols, + std::string(label) + ": diagnostic size mismatch"); + DENSE_LINALG_CHECK(rows > 0 && cols > 0, + std::string(label) + ": diagnostics require a nonempty matrix"); + +#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN + using RowMajorMatrix = Eigen::Matrix; + using Matrix = Eigen::Matrix; + const Eigen::Map A(matrix.data(), + static_cast(rows), + static_cast(cols)); + const Matrix dense = A; + Eigen::JacobiSVD svd(dense); + + DenseMatrixDiagnostics diagnostics; + const auto& singular_values = svd.singularValues(); + diagnostics.largest_singular_value = + (singular_values.size() > 0) ? singular_values[0] : Real(0); + diagnostics.tolerance = + dense_matrix_singular_value_tolerance(rows, cols, + diagnostics.largest_singular_value); + + for (Eigen::Index i = 0; i < singular_values.size(); ++i) { + const Real sigma = singular_values[i]; + if (sigma <= diagnostics.tolerance) { + continue; + } + ++diagnostics.rank; + diagnostics.smallest_retained_singular_value = sigma; + } + + const std::size_t full_rank = std::min(rows, cols); + if (diagnostics.rank == full_rank && + diagnostics.smallest_retained_singular_value > Real(0)) { + diagnostics.condition_estimate = + diagnostics.largest_singular_value / + diagnostics.smallest_retained_singular_value; + } + return diagnostics; +#else + DenseMatrixDiagnostics diagnostics; + diagnostics.largest_singular_value = dense_matrix_max_abs(matrix); + diagnostics.tolerance = + dense_matrix_pivot_tolerance(rows, cols, diagnostics.largest_singular_value); + diagnostics.rank = + dense_matrix_rank(std::vector(matrix.begin(), matrix.end()), rows, cols); + const std::size_t full_rank = std::min(rows, cols); + if (diagnostics.rank == full_rank) { + diagnostics.smallest_retained_singular_value = diagnostics.tolerance; + } + // Exact condition estimates require SVD diagnostics. In Eigen-disabled + // builds this stays explicit instead of relying on a misleading estimate. + diagnostics.condition_estimate = std::numeric_limits::infinity(); + return diagnostics; +#endif +} + +DenseLUSolver factor_dense_matrix(std::vector matrix, + std::size_t n, + std::string_view label) { + DENSE_LINALG_CHECK(matrix.size() == n * n, + std::string(label) + ": dense factorization size mismatch"); + + DenseLUSolver solver; + solver.n = n; + solver.lu = std::move(matrix); + solver.pivots.resize(n); + const Real max_abs = dense_matrix_max_abs(solver.lu); + solver.pivot_tolerance = + dense_matrix_pivot_tolerance(n, n, max_abs); + solver.label = std::string(label); + + Real max_pivot_abs = Real(0); + Real min_pivot_abs = std::numeric_limits::infinity(); + for (std::size_t col = 0; col < n; ++col) { + std::size_t pivot_row = col; + Real pivot_abs = std::abs(solver.lu[col * n + col]); + for (std::size_t row = col + 1; row < n; ++row) { + const Real candidate = std::abs(solver.lu[row * n + col]); + if (candidate > pivot_abs) { + pivot_abs = candidate; + pivot_row = row; + } + } + + DENSE_LINALG_CHECK( + pivot_abs > solver.pivot_tolerance, + solver.label + ": rank-deficient matrix (rank " + + std::to_string(col) + " of " + std::to_string(n) + + ", pivot below scale-aware tolerance " + + std::to_string(solver.pivot_tolerance) + ")"); + + solver.pivots[col] = pivot_row; + if (pivot_row != col) { + for (std::size_t j = 0; j < n; ++j) { + std::swap(solver.lu[col * n + j], solver.lu[pivot_row * n + j]); + } + } + + const Real pivot = solver.lu[col * n + col]; + DENSE_LINALG_CHECK( + std::abs(pivot) > solver.pivot_tolerance, + solver.label + ": zero pivot after row exchange"); + const Real pivot_magnitude = std::abs(pivot); + max_pivot_abs = std::max(max_pivot_abs, pivot_magnitude); + min_pivot_abs = std::min(min_pivot_abs, pivot_magnitude); + + for (std::size_t row = col + 1; row < n; ++row) { + const Real factor = solver.lu[row * n + col] / pivot; + solver.lu[row * n + col] = factor; + for (std::size_t j = col + 1; j < n; ++j) { + solver.lu[row * n + j] -= factor * solver.lu[col * n + j]; + } + } + } + + solver.diagnostics.rank = n; + solver.diagnostics.tolerance = solver.pivot_tolerance; + solver.diagnostics.largest_singular_value = max_abs; + solver.diagnostics.smallest_retained_singular_value = + std::isfinite(min_pivot_abs) ? min_pivot_abs : Real(0); + if (solver.diagnostics.smallest_retained_singular_value > Real(0)) { + solver.diagnostics.condition_estimate = + max_pivot_abs / solver.diagnostics.smallest_retained_singular_value; + } + return solver; +} + +DenseInverseResult invert_dense_matrix_with_diagnostics( + std::vector matrix, + std::size_t n, + std::string_view label) { + DENSE_LINALG_CHECK(matrix.size() == n * n, + std::string(label) + ": dense inverse size mismatch"); + std::vector matrix_for_lu = matrix; + const DenseLUSolver solver = + factor_dense_matrix(std::move(matrix_for_lu), n, label); + + DenseInverseResult result; + result.diagnostics = + dense_matrix_diagnostics(std::span(matrix.data(), matrix.size()), + n, n, label); + +#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN + if (std::isfinite(solver.diagnostics.condition_estimate) && + std::isfinite(result.diagnostics.condition_estimate) && + result.diagnostics.condition_estimate > dense_matrix_condition_fallback_threshold()) { + using RowMajorMatrix = Eigen::Matrix; + using Matrix = Eigen::Matrix; + const Eigen::Map A(matrix.data(), + static_cast(n), + static_cast(n)); + const Matrix dense = A; + Eigen::JacobiSVD svd(dense, + Eigen::ComputeFullU | Eigen::ComputeFullV); + Matrix sigma_inverse = Matrix::Zero(static_cast(n), + static_cast(n)); + const auto& singular_values = svd.singularValues(); + for (Eigen::Index i = 0; i < singular_values.size(); ++i) { + DENSE_LINALG_CHECK( + singular_values[i] > solver.diagnostics.tolerance, + std::string(label) + ": high-condition SVD fallback encountered a dropped singular value"); + sigma_inverse(i, i) = Real(1) / singular_values[i]; + } + const Matrix inverse = svd.matrixV() * sigma_inverse * svd.matrixU().transpose(); + result.inverse.assign(n * n, Real(0)); + for (std::size_t row = 0; row < n; ++row) { + for (std::size_t col = 0; col < n; ++col) { + result.inverse[row * n + col] = + inverse(static_cast(row), static_cast(col)); + } + } + result.used_svd_fallback = true; + return result; + } +#endif + + materialize_inverse_from_solver(solver, result.inverse); + return result; +} + +void validate_dense_inverse_diagnostics( + const DenseInverseResult& result, + std::size_t expected_rank, + std::string_view label, + Real max_condition) { + DENSE_LINALG_CHECK( + result.diagnostics.rank == expected_rank, + std::string(label) + ": rank-deficient matrix (rank " + + std::to_string(result.diagnostics.rank) + " of " + + std::to_string(expected_rank) + ")"); + + if (!std::isfinite(result.diagnostics.condition_estimate)) { + return; + } + + DENSE_LINALG_CHECK( + result.diagnostics.condition_estimate <= max_condition, + std::string(label) + ": condition estimate " + + std::to_string(result.diagnostics.condition_estimate) + + " exceeds supported threshold " + std::to_string(max_condition)); +} + +std::vector invert_dense_matrix(std::vector matrix, + std::size_t n, + std::string_view label) { + const DenseLUSolver solver = factor_dense_matrix(std::move(matrix), n, label); + std::vector inverse; + materialize_inverse_from_solver(solver, inverse); + return inverse; +} + +std::size_t dense_matrix_rank(std::vector matrix, + std::size_t rows, + std::size_t cols) { + DENSE_LINALG_CHECK(matrix.size() == rows * cols, + "dense_matrix_rank: size mismatch"); + const Real tolerance = + dense_matrix_pivot_tolerance(rows, cols, dense_matrix_max_abs(matrix)); + + std::size_t rank = 0; + std::size_t pivot_row = 0; + for (std::size_t col = 0; col < cols && pivot_row < rows; ++col) { + std::size_t best_row = pivot_row; + Real best_abs = std::abs(matrix[pivot_row * cols + col]); + for (std::size_t row = pivot_row + 1; row < rows; ++row) { + const Real candidate = std::abs(matrix[row * cols + col]); + if (candidate > best_abs) { + best_abs = candidate; + best_row = row; + } + } + if (best_abs <= tolerance) { + continue; + } + + if (best_row != pivot_row) { + for (std::size_t c = col; c < cols; ++c) { + std::swap(matrix[pivot_row * cols + c], matrix[best_row * cols + c]); + } + } + + const Real pivot = matrix[pivot_row * cols + col]; + for (std::size_t row = pivot_row + 1; row < rows; ++row) { + const Real factor = matrix[row * cols + col] / pivot; + if (std::abs(factor) <= tolerance) { + matrix[row * cols + col] = Real(0); + continue; + } + matrix[row * cols + col] = Real(0); + for (std::size_t c = col + 1; c < cols; ++c) { + matrix[row * cols + c] -= factor * matrix[pivot_row * cols + c]; + } + } + + ++rank; + ++pivot_row; + } + return rank; +} + +DensePseudoInverseResult rank_revealing_pseudo_inverse( + std::span matrix, + std::size_t rows, + std::size_t cols, + std::string_view label) { + DENSE_LINALG_CHECK(matrix.size() == rows * cols, + std::string(label) + ": pseudo-inverse size mismatch"); + DENSE_LINALG_CHECK(rows > 0 && cols > 0, + std::string(label) + ": pseudo-inverse requires a nonempty matrix"); + +#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN + using RowMajorMatrix = Eigen::Matrix; + using Matrix = Eigen::Matrix; + const Eigen::Map A(matrix.data(), + static_cast(rows), + static_cast(cols)); + const Matrix dense = A; + Eigen::JacobiSVD svd(dense, Eigen::ComputeFullU | Eigen::ComputeFullV); + + DensePseudoInverseResult result; + result.inverse.assign(cols * rows, Real(0)); + + const auto& singular_values = svd.singularValues(); + result.largest_singular_value = + (singular_values.size() > 0) ? singular_values[0] : Real(0); + result.tolerance = + dense_matrix_singular_value_tolerance(rows, cols, result.largest_singular_value); + + Matrix sigma_inverse = Matrix::Zero(static_cast(cols), + static_cast(rows)); + for (Eigen::Index i = 0; i < singular_values.size(); ++i) { + const Real sigma = singular_values[i]; + if (sigma <= result.tolerance) { + continue; + } + sigma_inverse(i, i) = Real(1) / sigma; + ++result.rank; + result.smallest_retained_singular_value = sigma; + } + + const Matrix pseudo_inverse = + svd.matrixV() * sigma_inverse * svd.matrixU().transpose(); + for (std::size_t r = 0; r < cols; ++r) { + for (std::size_t c = 0; c < rows; ++c) { + result.inverse[r * rows + c] = + pseudo_inverse(static_cast(r), static_cast(c)); + } + } + return result; +#else + DENSE_LINALG_CHECK( + false, + std::string(label) + + ": rank-revealing pseudo-inverse requires FE_ENABLE_EIGEN"); + return {}; +#endif +} + +} // namespace math +} // namespace FE +} // namespace svmp + +#undef DENSE_LINALG_CHECK diff --git a/Code/Source/solver/FE/Math/DenseLinearAlgebra.h b/Code/Source/solver/FE/Math/DenseLinearAlgebra.h new file mode 100644 index 000000000..7684439b5 --- /dev/null +++ b/Code/Source/solver/FE/Math/DenseLinearAlgebra.h @@ -0,0 +1,119 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_MATH_DENSELINEARALGEBRA_H +#define SVMP_FE_MATH_DENSELINEARALGEBRA_H + +#include "Types.h" + +#include +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace math { + +// Dense solve, inverse, rank, and pseudo-inverse support for FE construction +// utilities. Matrices are row-major: matrix[row * cols + col]. +[[nodiscard]] Real dense_matrix_max_abs(std::span matrix) noexcept; + +[[nodiscard]] Real dense_matrix_pivot_tolerance(std::size_t rows, + std::size_t cols, + Real max_abs, + Real multiplier = Real(64)) noexcept; + +[[nodiscard]] Real dense_matrix_singular_value_tolerance(std::size_t rows, + std::size_t cols, + Real largest_singular_value, + Real multiplier = Real(64)) noexcept; + +struct DensePseudoInverseResult { + std::vector inverse; + std::size_t rank{0}; + Real tolerance{0}; + Real largest_singular_value{0}; + Real smallest_retained_singular_value{0}; +}; + +struct DenseMatrixDiagnostics { + std::size_t rank{0}; + Real tolerance{0}; + Real largest_singular_value{0}; + Real smallest_retained_singular_value{0}; + Real condition_estimate{std::numeric_limits::infinity()}; +}; + +struct DenseInverseResult { + std::vector inverse; + DenseMatrixDiagnostics diagnostics; + bool used_svd_fallback{false}; +}; + +[[nodiscard]] Real dense_matrix_condition_fallback_threshold() noexcept; +[[nodiscard]] Real dense_matrix_condition_error_threshold() noexcept; + +struct DenseLUSolver { + std::size_t n{0}; + std::vector lu; + std::vector pivots; + DenseMatrixDiagnostics diagnostics; + Real pivot_tolerance{0}; + std::string label; + + [[nodiscard]] bool empty() const noexcept { return n == 0; } + + void solve_in_place(std::span rhs) const; + void solve_in_place(std::span rhs, std::size_t rhs_count) const; + [[nodiscard]] std::vector solve(std::span rhs) const; +}; + +// Inverses and pseudo-inverses keep the same row-major convention for their +// returned dimensions. +[[nodiscard]] DenseMatrixDiagnostics dense_matrix_diagnostics( + std::span matrix, + std::size_t rows, + std::size_t cols, + std::string_view label = "dense matrix"); + +[[nodiscard]] DenseLUSolver factor_dense_matrix(std::vector matrix, + std::size_t n, + std::string_view label = "dense matrix"); + +[[nodiscard]] std::vector invert_dense_matrix(std::vector matrix, + std::size_t n, + std::string_view label = "dense matrix"); + +[[nodiscard]] DenseInverseResult invert_dense_matrix_with_diagnostics( + std::vector matrix, + std::size_t n, + std::string_view label = "dense matrix"); + +void validate_dense_inverse_diagnostics( + const DenseInverseResult& result, + std::size_t expected_rank, + std::string_view label = "dense matrix", + Real max_condition = dense_matrix_condition_error_threshold()); + +[[nodiscard]] std::size_t dense_matrix_rank(std::vector matrix, + std::size_t rows, + std::size_t cols); + +[[nodiscard]] DensePseudoInverseResult rank_revealing_pseudo_inverse( + std::span matrix, + std::size_t rows, + std::size_t cols, + std::string_view label = "dense matrix"); + +} // namespace math +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_MATH_DENSELINEARALGEBRA_H diff --git a/Code/Source/solver/FE/Math/DenseTransformKernels.h b/Code/Source/solver/FE/Math/DenseTransformKernels.h new file mode 100644 index 000000000..8bf83ec0b --- /dev/null +++ b/Code/Source/solver/FE/Math/DenseTransformKernels.h @@ -0,0 +1,78 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_MATH_DENSETRANSFORMKERNELS_H +#define SVMP_FE_MATH_DENSETRANSFORMKERNELS_H + +#include "Types.h" + +#include +#include +#include + +namespace svmp { +namespace FE { +namespace math { + +constexpr std::size_t dense_transform_blocked_min_rows() noexcept { return 32u; } +constexpr std::size_t dense_transform_blocked_min_rhs() noexcept { return 4u; } + +inline void dense_transform_batched_row_major( + const Real* SVMP_RESTRICT matrix, + std::size_t rows, + std::size_t cols, + const Real* SVMP_RESTRICT input, + std::size_t input_row_stride, + Real* SVMP_RESTRICT output, + std::size_t output_row_stride, + std::size_t rhs_count) { + if (rows == 0u || cols == 0u || rhs_count == 0u) { + return; + } + + if (rows < dense_transform_blocked_min_rows() || + rhs_count < dense_transform_blocked_min_rhs()) { + for (std::size_t row = 0; row < rows; ++row) { + const Real* matrix_row = matrix + row * cols; + Real* output_row = output + row * output_row_stride; + for (std::size_t rhs = 0; rhs < rhs_count; ++rhs) { + Real value = Real(0); + for (std::size_t col = 0; col < cols; ++col) { + value += matrix_row[col] * input[col * input_row_stride + rhs]; + } + output_row[rhs] = value; + } + } + return; + } + + constexpr std::size_t kRhsBlock = 32u; + for (std::size_t row = 0; row < rows; ++row) { + const Real* matrix_row = matrix + row * cols; + Real* output_row = output + row * output_row_stride; + for (std::size_t rhs_base = 0; rhs_base < rhs_count; rhs_base += kRhsBlock) { + const std::size_t block_size = std::min(kRhsBlock, rhs_count - rhs_base); + std::array accum{}; + for (std::size_t col = 0; col < cols; ++col) { + const Real coeff = matrix_row[col]; + const Real* input_row = input + col * input_row_stride + rhs_base; + for (std::size_t rhs = 0; rhs < block_size; ++rhs) { + accum[rhs] += coeff * input_row[rhs]; + } + } + for (std::size_t rhs = 0; rhs < block_size; ++rhs) { + output_row[rhs_base + rhs] = accum[rhs]; + } + } + } +} + +} // namespace math +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_MATH_DENSETRANSFORMKERNELS_H diff --git a/Code/Source/solver/FE/Math/ExpressionOps.h b/Code/Source/solver/FE/Math/ExpressionOps.h new file mode 100644 index 000000000..96cea1037 --- /dev/null +++ b/Code/Source/solver/FE/Math/ExpressionOps.h @@ -0,0 +1,99 @@ +#ifndef SVMP_FE_MATH_EXPRESSION_OPS_H +#define SVMP_FE_MATH_EXPRESSION_OPS_H + +/** + * @file ExpressionOps.h + * @brief Common expression template operators for vector and matrix expressions + * + * This header provides shared operator functors used by both VectorExpr.h and + * MatrixExpr.h to avoid code duplication and namespace conflicts. All operators + * are defined in the detail::ops namespace for internal use by expression templates. + */ + +#include + +namespace svmp { +namespace FE { +namespace math { +namespace detail { +namespace ops { + +/** + * @brief Addition operator functor + */ +struct Add { + template + constexpr auto operator()(const T1& a, const T2& b) const { + return a + b; + } +}; + +/** + * @brief Subtraction operator functor + */ +struct Sub { + template + constexpr auto operator()(const T1& a, const T2& b) const { + return a - b; + } +}; + +/** + * @brief Multiplication operator functor + */ +struct Mul { + template + constexpr auto operator()(const T1& a, const T2& b) const { + return a * b; + } +}; + +/** + * @brief Division operator functor + */ +struct Div { + template + constexpr auto operator()(const T1& a, const T2& b) const { + return a / b; + } +}; + +/** + * @brief Negation operator functor + */ +struct Negate { + template + constexpr auto operator()(const T& a) const { + return -a; + } +}; + +/** + * @brief Absolute value operator functor + */ +struct Abs { + template + constexpr auto operator()(const T& a) const { + using std::abs; + return abs(a); + } +}; + +/** + * @brief Square root operator functor + */ +struct Sqrt { + template + constexpr auto operator()(const T& a) const { + using std::sqrt; + return sqrt(a); + } +}; + +} // namespace ops +} // namespace detail +} // namespace math +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_MATH_EXPRESSION_OPS_H diff --git a/Code/Source/solver/FE/Math/IntegerMath.h b/Code/Source/solver/FE/Math/IntegerMath.h new file mode 100644 index 000000000..52a50117f --- /dev/null +++ b/Code/Source/solver/FE/Math/IntegerMath.h @@ -0,0 +1,98 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_MATH_INTEGERMATH_H +#define SVMP_FE_MATH_INTEGERMATH_H + +#include "Types.h" + +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace math { + +[[nodiscard]] constexpr Real pow_int_nonnegative(Real base, int exponent) noexcept { + Real result = Real(1); + Real factor = base; + int power = exponent; + while (power > 0) { + if ((power & 1) != 0) { + result *= factor; + } + power >>= 1; + if (power > 0) { + factor *= factor; + } + } + return result; +} + +[[nodiscard]] constexpr Real pow_int(Real base, int exponent) noexcept { + if (exponent < 0) { + return Real(1) / pow_int_nonnegative(base, -exponent); + } + return pow_int_nonnegative(base, exponent); +} + +[[nodiscard]] constexpr std::size_t binomial_size(int n, int k) { + if (n < 0 || k < 0 || k > n) { + return 0u; + } + if (k > n - k) { + k = n - k; + } + + std::size_t result = 1u; + for (int i = 1; i <= k; ++i) { + auto numerator = static_cast(n - (k - i)); + auto denominator = static_cast(i); + + const auto numerator_gcd = std::gcd(numerator, denominator); + numerator /= numerator_gcd; + denominator /= numerator_gcd; + + const auto result_gcd = std::gcd(result, denominator); + result /= result_gcd; + denominator /= result_gcd; + if (denominator != 1u) { + throw std::overflow_error( + "binomial_size: failed to reduce exact binomial factor"); + } + if (numerator != 0u && + result > std::numeric_limits::max() / numerator) { + throw std::overflow_error("binomial_size: result does not fit in size_t"); + } + result *= numerator; + } + return result; +} + +[[nodiscard]] constexpr Real binomial_real(int n, int k) noexcept { + if (k < 0 || k > n) { + return Real(0); + } + if (k > n - k) { + k = n - k; + } + + Real result = Real(1); + for (int i = 1; i <= k; ++i) { + result *= static_cast(n - (k - i)); + result /= static_cast(i); + } + return result; +} + +} // namespace math +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_MATH_INTEGERMATH_H diff --git a/Code/Source/solver/FE/Math/MathConstants.h b/Code/Source/solver/FE/Math/MathConstants.h new file mode 100644 index 000000000..145520ab2 --- /dev/null +++ b/Code/Source/solver/FE/Math/MathConstants.h @@ -0,0 +1,388 @@ +#ifndef SVMP_FE_MATH_CONSTANTS_H +#define SVMP_FE_MATH_CONSTANTS_H + +/** + * @file MathConstants.h + * @brief Mathematical constants and numerical tolerances for FE computations + * + * This header provides mathematical constants (π, e, √2, etc.) and numerical + * tolerances used throughout the FE library. All constants are templated + * to support different precision types. + */ + +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace math { + +/** + * @brief Mathematical constants templated by type + * @tparam T The numeric type (float, double, long double) + */ +template +struct Constants { + static_assert(std::is_floating_point_v, + "Constants only defined for floating-point types"); + + // Mathematical constants + static constexpr T pi = T(3.14159265358979323846264338327950288419716939937510L); + static constexpr T two_pi = T(6.28318530717958647692528676655900576839433879875021L); + static constexpr T half_pi = T(1.57079632679489661923132169163975144209858469968755L); + static constexpr T quarter_pi = T(0.78539816339744830961566084581987572104929234984378L); + static constexpr T inv_pi = T(0.31830988618379067153776752674502872406891929148091L); + static constexpr T inv_two_pi = T(0.15915494309189533576888376337251436203445964574046L); + + static constexpr T e = T(2.71828182845904523536028747135266249775724709369995L); + static constexpr T log2e = T(1.44269504088896340735992468100189213742664595415299L); + static constexpr T log10e = T(0.43429448190325182765112891891660508229439700580367L); + static constexpr T ln2 = T(0.69314718055994530941723212145817656807550013436026L); + static constexpr T ln10 = T(2.30258509299404568401799145468436420760110148862877L); + + static constexpr T sqrt2 = T(1.41421356237309504880168872420969807856967187537694L); + static constexpr T sqrt3 = T(1.73205080756887729352744634150587236694280525381038L); + static constexpr T inv_sqrt2 = T(0.70710678118654752440084436210484903928483593768847L); + static constexpr T inv_sqrt3 = T(0.57735026918962576450914878050195745564760175127013L); + + // Golden ratio + static constexpr T phi = T(1.61803398874989484820458683436563811772030917980576L); + + // Degrees to radians conversion + static constexpr T deg_to_rad = pi / T(180); + static constexpr T rad_to_deg = T(180) / pi; +}; + +/** + * @brief Numerical tolerances and machine epsilon + * @tparam T The numeric type + */ +template +struct Tolerances { + static_assert(std::is_floating_point_v, + "Tolerances only defined for floating-point types"); + + // Machine epsilon + static constexpr T epsilon = std::numeric_limits::epsilon(); + + // Default tolerance (1000 * machine epsilon) + static constexpr T tolerance = T(1000) * epsilon; + + // Strict tolerance (10 * machine epsilon) + static constexpr T strict = T(10) * epsilon; + + // Loose tolerance (10000 * machine epsilon) + static constexpr T loose = T(10000) * epsilon; + + // Square root of epsilon (useful for finite differences) + static inline const T sqrt_epsilon = std::sqrt(epsilon); + + // Cube root of epsilon (useful for numerical derivatives) + static inline const T cbrt_epsilon = std::cbrt(epsilon); + + // Smallest positive normalized value + static constexpr T min_positive = std::numeric_limits::min(); + + // Largest representable value + static constexpr T max_value = std::numeric_limits::max(); + + // Infinity + static constexpr T infinity = std::numeric_limits::infinity(); + + // Not-a-Number + static constexpr T nan = std::numeric_limits::quiet_NaN(); +}; + +/** + * @brief Convenient aliases for common types + */ +template inline constexpr T pi = Constants::pi; +template inline constexpr T two_pi = Constants::two_pi; +template inline constexpr T half_pi = Constants::half_pi; +template inline constexpr T quarter_pi = Constants::quarter_pi; +template inline constexpr T inv_pi = Constants::inv_pi; +template inline constexpr T inv_two_pi = Constants::inv_two_pi; + +template inline constexpr T e = Constants::e; +template inline constexpr T log2e = Constants::log2e; +template inline constexpr T log10e = Constants::log10e; +template inline constexpr T ln2 = Constants::ln2; +template inline constexpr T ln10 = Constants::ln10; + +template inline constexpr T sqrt2 = Constants::sqrt2; +template inline constexpr T sqrt3 = Constants::sqrt3; +template inline constexpr T inv_sqrt2 = Constants::inv_sqrt2; +template inline constexpr T inv_sqrt3 = Constants::inv_sqrt3; + +template inline constexpr T phi = Constants::phi; + +template inline constexpr T deg_to_rad = Constants::deg_to_rad; +template inline constexpr T rad_to_deg = Constants::rad_to_deg; + +template inline constexpr T epsilon = Tolerances::epsilon; +template inline constexpr T tolerance = Tolerances::tolerance; +template inline constexpr T strict_tol = Tolerances::strict; +template inline constexpr T loose_tol = Tolerances::loose; +template inline const T sqrt_epsilon = Tolerances::sqrt_epsilon; +template inline const T cbrt_epsilon = Tolerances::cbrt_epsilon; +template inline constexpr T min_positive = Tolerances::min_positive; +template inline constexpr T max_value = Tolerances::max_value; +template inline constexpr T infinity = Tolerances::infinity; + +/** + * @brief Comparison functions with tolerance + */ + +/** + * @brief Check if two values are approximately equal + * @param a First value + * @param b Second value + * @param tol Tolerance (default: 1000 * epsilon) + * @return true if |a - b| <= tol * max(|a|, |b|, 1) + */ +template +inline constexpr bool approx_equal(T a, T b, T tol = tolerance) { + static_assert(std::is_floating_point_v, + "approx_equal only defined for floating-point types"); + const T scale = std::max({std::abs(a), std::abs(b), T(1)}); + return std::abs(a - b) <= tol * scale; +} + +/** + * @brief Check if a value is approximately zero + * @param a Value to check + * @param tol Tolerance (default: 1000 * epsilon) + * @return true if |a| <= tol + */ +template +inline constexpr bool approx_zero(T a, T tol = tolerance) { + static_assert(std::is_floating_point_v, + "approx_zero only defined for floating-point types"); + return std::abs(a) <= tol; +} + +/** + * @brief Check if a value is positive (greater than tolerance) + * @param a Value to check + * @param tol Tolerance (default: 1000 * epsilon) + * @return true if a > tol + */ +template +inline constexpr bool is_positive(T a, T tol = tolerance) { + static_assert(std::is_floating_point_v, + "is_positive only defined for floating-point types"); + return a > tol; +} + +/** + * @brief Check if a value is negative (less than -tolerance) + * @param a Value to check + * @param tol Tolerance (default: 1000 * epsilon) + * @return true if a < -tol + */ +template +inline constexpr bool is_negative(T a, T tol = tolerance) { + static_assert(std::is_floating_point_v, + "is_negative only defined for floating-point types"); + return a < -tol; +} + +/** + * @brief Check if a value is finite (not infinite or NaN) + * @param a Value to check + * @return true if value is finite + */ +template +inline constexpr bool is_finite(T a) { + static_assert(std::is_floating_point_v, + "is_finite only defined for floating-point types"); + return std::isfinite(a); +} + +/** + * @brief Degrees to radians conversion + * @param degrees Angle in degrees + * @return Angle in radians + */ +template +inline constexpr T to_radians(T degrees) { + static_assert(std::is_floating_point_v, + "to_radians only defined for floating-point types"); + return degrees * deg_to_rad; +} + +/** + * @brief Radians to degrees conversion + * @param radians Angle in radians + * @return Angle in degrees + */ +template +inline constexpr T to_degrees(T radians) { + static_assert(std::is_floating_point_v, + "to_degrees only defined for floating-point types"); + return radians * rad_to_deg; +} + +// ============================================================================= +// Constants namespace for compatibility with test expectations +// ============================================================================= +namespace constants { + +// Mathematical constants (double precision defaults) +inline constexpr double PI = Constants::pi; +inline constexpr double PI_2 = Constants::half_pi; +inline constexpr double PI_4 = Constants::quarter_pi; +inline constexpr double TWO_PI = Constants::two_pi; +inline constexpr double INV_PI = Constants::inv_pi; + +inline constexpr double E = Constants::e; +inline constexpr double LN_2 = Constants::ln2; +inline constexpr double LN_10 = Constants::ln10; +inline constexpr double LOG10_E = Constants::log10e; +inline constexpr double LOG2_E = Constants::log2e; + +inline constexpr double SQRT_2 = Constants::sqrt2; +inline constexpr double SQRT_3 = Constants::sqrt3; +inline constexpr double SQRT_5 = 2.2360679774997896964091736687312L; +inline constexpr double INV_SQRT_2 = Constants::inv_sqrt2; +inline constexpr double INV_SQRT_3 = Constants::inv_sqrt3; + +inline constexpr double PHI = Constants::phi; + +// Angle conversion functions +template +inline constexpr T deg_to_rad(T degrees) { + return degrees * Constants::deg_to_rad; +} + +template +inline constexpr T rad_to_deg(T radians) { + return radians * Constants::rad_to_deg; +} + +// Templated tolerances +template +inline constexpr T tolerance() { + return Tolerances::tolerance; +} + +template +inline constexpr T machine_epsilon() { + return Tolerances::epsilon; +} + +// Additional constants and utility functions for tests +inline constexpr double DEFAULT_TOLERANCE = Tolerances::tolerance; +inline constexpr double DEFAULT_REL_TOLERANCE = 1e-12; +inline constexpr double GEOMETRY_TOLERANCE = 1e-10; +inline constexpr double SOLVER_TOLERANCE = Tolerances::strict; +inline constexpr double EPSILON = Tolerances::epsilon; +inline constexpr double INF_VALUE = Tolerances::infinity; // Renamed from INFINITY +inline constexpr double NOT_A_NUMBER = Tolerances::nan; // Renamed from NAN +inline constexpr double MAX_DOUBLE = Tolerances::max_value; +inline constexpr double MIN_DOUBLE = Tolerances::min_positive; +inline constexpr double LOWEST_DOUBLE = -Tolerances::max_value; + +// Physical constants +inline constexpr double SPEED_OF_LIGHT = 299792458.0; // m/s +inline constexpr double GRAVITATIONAL_CONSTANT = 6.67430e-11; // m³/(kg·s²) +inline constexpr double PLANCK_CONSTANT = 6.62607015e-34; // J·s +inline constexpr double AVOGADRO_NUMBER = 6.02214076e23; // mol⁻¹ +inline constexpr double BOLTZMANN_CONSTANT = 1.380649e-23; // J/K +inline constexpr double STANDARD_GRAVITY = 9.80665; // m/s² + +// Float and long double versions +inline constexpr float PI_F = static_cast(PI); +inline constexpr float E_F = static_cast(E); +inline constexpr float SQRT_2_F = static_cast(SQRT_2); +inline constexpr float EPSILON_F = Tolerances::epsilon; + +inline constexpr long double PI_L = static_cast(PI); +inline constexpr long double E_L = static_cast(E); +inline constexpr long double SQRT_2_L = static_cast(SQRT_2); +inline constexpr long double EPSILON_L = Tolerances::epsilon; + +// Additional mathematical constants +inline constexpr double SQRT_PI = 1.7724538509055160272981674833411L; + +// Utility functions +template +inline constexpr int sign(T value) { + return (T(0) < value) - (value < T(0)); +} + +template +inline constexpr bool is_zero(T value, T tol = DEFAULT_TOLERANCE) { + return std::abs(value) <= tol; +} + +template +inline bool near(T a, T b, T tol = DEFAULT_TOLERANCE) { + return std::abs(a - b) <= tol; +} + +template +inline bool near_relative(T a, T b, T rel_tol = DEFAULT_REL_TOLERANCE) { + T scale = std::max(std::abs(a), std::abs(b)); + return std::abs(a - b) <= rel_tol * scale; +} + +template +inline constexpr T clamp(T value, T min_val, T max_val) { + return value < min_val ? min_val : (value > max_val ? max_val : value); +} + +template +inline constexpr T lerp(T a, T b, T t) { + return a + t * (b - a); +} + +template +inline T safe_divide(T numerator, T denominator, T default_val = T(0)) { + return is_zero(denominator) ? default_val : numerator / denominator; +} + +template +inline bool isinf(T value) { + return std::isinf(value); +} + +template +inline bool isnan(T value) { + return std::isnan(value); +} + +} // namespace constants + +// Physical constants for FE analysis +namespace physical_constants { + +// Material properties (SI units) +inline constexpr double water_density = 1000.0; // kg/m³ +inline constexpr double steel_density = 7850.0; // kg/m³ +inline constexpr double aluminum_density = 2700.0; // kg/m³ + +inline constexpr double water_viscosity = 0.001; // Pa·s at 20°C +inline constexpr double air_viscosity = 1.81e-5; // Pa·s at 20°C + +inline constexpr double steel_youngs_modulus = 200e9; // Pa +inline constexpr double aluminum_youngs_modulus = 70e9; // Pa + +inline constexpr double steel_poisson_ratio = 0.3; // dimensionless +inline constexpr double aluminum_poisson_ratio = 0.33; // dimensionless + +// Physical constants +inline constexpr double gravity = 9.80665; // m/s² +inline constexpr double gas_constant = 8.314462618; // J/(mol·K) +inline constexpr double boltzmann = 1.380649e-23; // J/K +inline constexpr double avogadro = 6.02214076e23; // mol⁻¹ + +} // namespace physical_constants + +} // namespace math +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_MATH_CONSTANTS_H diff --git a/Code/Source/solver/FE/Math/Matrix.h b/Code/Source/solver/FE/Math/Matrix.h new file mode 100644 index 000000000..0b80091f9 --- /dev/null +++ b/Code/Source/solver/FE/Math/Matrix.h @@ -0,0 +1,1487 @@ +#ifndef SVMP_FE_MATH_MATRIX_H +#define SVMP_FE_MATH_MATRIX_H + +/** + * @file Matrix.h + * @brief Fixed-size matrices with expression templates and specializations for FE computations + * + * This header provides optimized fixed-size matrix operations for element-level + * computations. Includes specialized analytical formulas for 2x2 and 3x3 matrices + * (determinant, inverse using Cramer's rule) and Gauss elimination for larger matrices. + * All operations use expression templates to eliminate temporaries. + */ + +#include "MatrixExpr.h" +#include "Vector.h" +#include "MathConstants.h" +#include "../Common/Alignment.h" +#include +#include +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace math { + +/** + * @brief Fixed-size matrix for element-level computations + * @tparam T Scalar type (float, double) + * @tparam M Number of rows + * @tparam N Number of columns + * + * Storage is row-major for cache efficiency. Memory is aligned for SIMD operations. + * Specializations exist for 2x2, 3x3, 4x4 matrices with analytical algorithms. + */ +template +class Matrix : public MatrixExpr> { + static_assert(std::is_arithmetic_v, "T must be an arithmetic type"); + static_assert(M > 0 && N > 0, "Matrix dimensions must be positive"); + +private: + alignas(kFEFixedObjectAlignmentBytes) T data_[M * N]; // Row-major, SIMD-friendly storage + + // Helper to compute linear index from (i,j) + static constexpr std::size_t index(std::size_t i, std::size_t j) { + return i * N + j; + } + +public: + // Type definitions + using value_type = T; + using size_type = std::size_t; + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using const_pointer = const T*; + + /** + * @brief Default constructor - zero initializes all elements + */ + constexpr Matrix() : data_{} {} + + /** + * @brief Fill constructor - initializes all elements with same value + * @param value Value to fill matrix with + */ + constexpr explicit Matrix(T value) { + for (size_type i = 0; i < M * N; ++i) { + data_[i] = value; + } + } + + /** + * @brief Initializer list constructor for row-wise initialization + * @param init Nested initializer lists {{row0}, {row1}, ...} + */ + constexpr Matrix(std::initializer_list> init) : data_{} { + size_type row = 0; + for (auto row_init : init) { + if (row >= M) break; + size_type col = 0; + for (auto val : row_init) { + if (col >= N) break; + (*this)(row, col) = val; + ++col; + } + ++row; + } + } + + /** + * @brief Constructor from expression template + * @tparam Expr Expression type + * @param expr Matrix expression to evaluate + */ + template + Matrix(const MatrixExpr& expr) { + const auto& e = expr.derived(); + for (size_type i = 0; i < M; ++i) { + for (size_type j = 0; j < N; ++j) { + (*this)(i, j) = e(i, j); + } + } + } + + /** + * @brief Copy constructor + */ + constexpr Matrix(const Matrix&) = default; + + /** + * @brief Move constructor + */ + constexpr Matrix(Matrix&&) noexcept = default; + + /** + * @brief Copy assignment + */ + Matrix& operator=(const Matrix&) = default; + + /** + * @brief Move assignment + */ + Matrix& operator=(Matrix&&) noexcept = default; + + /** + * @brief Assignment from expression template + * @tparam Expr Expression type + * @param expr Matrix expression to evaluate + * @return Reference to this + */ + template + Matrix& operator=(const MatrixExpr& expr) { + const auto& e = expr.derived(); + for (size_type i = 0; i < M; ++i) { + for (size_type j = 0; j < N; ++j) { + (*this)(i, j) = e(i, j); + } + } + return *this; + } + + /** + * @brief Get number of rows (compile-time constant) + * @return Number of rows + */ + static constexpr size_type rows() { return M; } + + /** + * @brief Get number of columns (compile-time constant) + * @return Number of columns + */ + static constexpr size_type cols() { return N; } + + /** + * @brief Get total number of elements + * @return M * N + */ + static constexpr size_type size() { return M * N; } + + /** + * @brief Element access (no bounds checking) + * @param i Row index + * @param j Column index + * @return Reference to element + */ + constexpr T& operator()(size_type i, size_type j) { + return data_[index(i, j)]; + } + + /** + * @brief Element access (no bounds checking) - const version + * @param i Row index + * @param j Column index + * @return Const reference to element + */ + constexpr const T& operator()(size_type i, size_type j) const { + return data_[index(i, j)]; + } + + /** + * @brief Element access with bounds checking + * @param i Row index + * @param j Column index + * @return Reference to element + * @throws std::out_of_range if indices are out of bounds + */ + T& at(size_type i, size_type j) { + if (i >= M || j >= N) { + throw std::out_of_range("Matrix::at: index out of range"); + } + return (*this)(i, j); + } + + /** + * @brief Element access with bounds checking - const version + * @param i Row index + * @param j Column index + * @return Const reference to element + * @throws std::out_of_range if indices are out of bounds + */ + const T& at(size_type i, size_type j) const { + if (i >= M || j >= N) { + throw std::out_of_range("Matrix::at: index out of range"); + } + return (*this)(i, j); + } + + /** + * @brief Get row as vector + * @param i Row index + * @return Vector containing row elements + */ + Vector row(size_type i) const { + Vector result; + for (size_type j = 0; j < N; ++j) { + result[j] = (*this)(i, j); + } + return result; + } + + /** + * @brief Get column as vector + * @param j Column index + * @return Vector containing column elements + */ + Vector column(size_type j) const { + Vector result; + for (size_type i = 0; i < M; ++i) { + result[i] = (*this)(i, j); + } + return result; + } + + /** + * @brief Get column as vector (alias for column) + * @param j Column index + * @return Vector containing column elements + */ + Vector col(size_type j) const { + return column(j); + } + + /** + * @brief Set row from vector + * @param i Row index + * @param v Vector of values + */ + void set_row(size_type i, const Vector& v) { + for (size_type j = 0; j < N; ++j) { + (*this)(i, j) = v[j]; + } + } + + /** + * @brief Set column from vector + * @param j Column index + * @param v Vector of values + */ + void set_column(size_type j, const Vector& v) { + for (size_type i = 0; i < M; ++i) { + (*this)(i, j) = v[i]; + } + } + + /** + * @brief Set column from vector (alias for set_column) + * @param j Column index + * @param v Vector of values + */ + void set_col(size_type j, const Vector& v) { + set_column(j, v); + } + + /** + * @brief Get pointer to underlying data + * @return Pointer to first element + */ + T* data() { return data_; } + const T* data() const { return data_; } + + /** + * @brief Fill matrix with value + * @param value Value to fill with + */ + void fill(T value) { + for (size_type i = 0; i < M * N; ++i) { + data_[i] = value; + } + } + + /** + * @brief Set all elements to zero + */ + void set_zero() { + fill(T{0}); + } + + // Arithmetic operators + + /** + * @brief In-place addition + * @param other Matrix to add + * @return Reference to this + */ + Matrix& operator+=(const Matrix& other) { + for (size_type i = 0; i < M * N; ++i) { + data_[i] += other.data_[i]; + } + return *this; + } + + /** + * @brief In-place subtraction + * @param other Matrix to subtract + * @return Reference to this + */ + Matrix& operator-=(const Matrix& other) { + for (size_type i = 0; i < M * N; ++i) { + data_[i] -= other.data_[i]; + } + return *this; + } + + /** + * @brief In-place scalar multiplication + * @param scalar Scalar to multiply by + * @return Reference to this + */ + Matrix& operator*=(T scalar) { + for (size_type i = 0; i < M * N; ++i) { + data_[i] *= scalar; + } + return *this; + } + + /** + * @brief In-place scalar division + * @param scalar Scalar to divide by + * @return Reference to this + */ + Matrix& operator/=(T scalar) { + const T inv = T(1) / scalar; + return (*this) *= inv; + } + + // Matrix operations + + /** + * @brief Compute transpose + * @return Transposed matrix + */ + Matrix transpose() const { + Matrix result; + for (size_type i = 0; i < M; ++i) { + for (size_type j = 0; j < N; ++j) { + result(j, i) = (*this)(i, j); + } + } + return result; + } + + /** + * @brief Compute trace (sum of diagonal elements) + * @return Trace (only valid for square matrices) + */ + template + std::enable_if_t trace() const { + T result = T(0); + for (size_type i = 0; i < M; ++i) { + result += (*this)(i, i); + } + return result; + } + + /** + * @brief Compute Frobenius norm squared + * @return Sum of squares of all elements + */ + T frobenius_norm_squared() const { + T result = T(0); + for (size_type i = 0; i < M * N; ++i) { + result += data_[i] * data_[i]; + } + return result; + } + + /** + * @brief Compute Frobenius norm + * @return Square root of sum of squares + */ + T frobenius_norm() const { + using std::sqrt; + return sqrt(frobenius_norm_squared()); + } + + /** + * @brief Compute infinity norm (maximum absolute row sum) + * @return Infinity norm + */ + T infinity_norm() const { + T max_row_sum = T(0); + for (size_type i = 0; i < M; ++i) { + T row_sum = T(0); + for (size_type j = 0; j < N; ++j) { + using std::abs; + row_sum += abs((*this)(i, j)); + } + max_row_sum = std::max(max_row_sum, row_sum); + } + return max_row_sum; + } + + /** + * @brief Compute one norm (maximum absolute column sum) + * @return One norm + */ + T one_norm() const { + T max_col_sum = T(0); + for (size_type j = 0; j < N; ++j) { + T col_sum = T(0); + for (size_type i = 0; i < M; ++i) { + using std::abs; + col_sum += abs((*this)(i, j)); + } + max_col_sum = std::max(max_col_sum, col_sum); + } + return max_col_sum; + } + + /** + * @brief Get minimum element + * @return Minimum value + */ + T min() const { + return *std::min_element(data_, data_ + M * N); + } + + /** + * @brief Get maximum element + * @return Maximum value + */ + T max() const { + return *std::max_element(data_, data_ + M * N); + } + + /** + * @brief Get sum of all elements + * @return Sum of elements + */ + T sum() const { + T result = T(0); + for (size_type i = 0; i < M * N; ++i) { + result += data_[i]; + } + return result; + } + + // Static factory functions + + /** + * @brief Create zero matrix + * @return Matrix with all elements zero + */ + static constexpr Matrix zeros() { + return Matrix(); + } + + /** + * @brief Create matrix with all elements one + * @return Matrix with all elements one + */ + static constexpr Matrix ones() { + return Matrix(T(1)); + } + + /** + * @brief Create identity matrix (only for square matrices) + * @return Identity matrix + */ + template + static std::enable_if_t identity() { + Matrix result; + for (size_type i = 0; i < M; ++i) { + result(i, i) = T(1); + } + return result; + } + + /** + * @brief Create diagonal matrix from vector (only for square matrices) + * @param diag Vector of diagonal elements + * @return Diagonal matrix + */ + template + static std::enable_if_t diagonal(const Vector& diag) { + Matrix result; + for (size_type i = 0; i < M; ++i) { + result(i, i) = diag[i]; + } + return result; + } + + /** + * @brief Create zero matrix (static factory) + * @return Zero matrix + */ + static Matrix zero() { + return zeros(); + } + + // Property checking methods + + /** + * @brief Check if matrix is symmetric (only for square matrices) + * @param tol Tolerance for comparison + * @return true if symmetric + */ + template + std::enable_if_t is_symmetric(T tol = tolerance) const { + for (size_type i = 0; i < M; ++i) { + for (size_type j = i + 1; j < N; ++j) { + using std::abs; + if (abs((*this)(i, j) - (*this)(j, i)) > tol) { + return false; + } + } + } + return true; + } + + /** + * @brief Check if matrix is skew-symmetric (only for square matrices) + * @param tol Tolerance for comparison + * @return true if skew-symmetric + */ + template + std::enable_if_t is_skew_symmetric(T tol = tolerance) const { + for (size_type i = 0; i < M; ++i) { + // Diagonal must be zero + using std::abs; + if (abs((*this)(i, i)) > tol) { + return false; + } + for (size_type j = i + 1; j < N; ++j) { + if (abs((*this)(i, j) + (*this)(j, i)) > tol) { + return false; + } + } + } + return true; + } + + /** + * @brief Check if matrix is diagonal (only for square matrices) + * @param tol Tolerance for comparison + * @return true if diagonal + */ + template + std::enable_if_t is_diagonal(T tol = tolerance) const { + for (size_type i = 0; i < M; ++i) { + for (size_type j = 0; j < N; ++j) { + if (i != j) { + using std::abs; + if (abs((*this)(i, j)) > tol) { + return false; + } + } + } + } + return true; + } + + // Determinant (general template, specialized for 2x2, 3x3) + /** + * @brief Compute determinant (only for square matrices) + * @return Determinant value + */ + template + std::enable_if_t determinant() const { + // For 4x4 and larger, use LU decomposition + return determinant_lu(); + } + + // Inverse (general template, specialized for 2x2, 3x3) + /** + * @brief Compute matrix inverse (only for square matrices) + * @return Inverse matrix + */ + template + std::enable_if_t inverse() const { + // For 4x4 and larger, use Gauss-Jordan elimination + return inverse_gauss_jordan(); + } + +private: + // LU decomposition for determinant (4x4 and larger) + T determinant_lu() const { + Matrix lu = *this; + T det = T(1); + + for (size_type k = 0; k < M - 1; ++k) { + // Find pivot + size_type pivot = k; + T max_val = std::abs(lu(k, k)); + for (size_type i = k + 1; i < M; ++i) { + T val = std::abs(lu(i, k)); + if (val > max_val) { + max_val = val; + pivot = i; + } + } + + // Swap rows if needed + if (pivot != k) { + for (size_type j = 0; j < M; ++j) { + std::swap(lu(k, j), lu(pivot, j)); + } + det = -det; // Row swap changes sign + } + + // Check for singularity + if (approx_zero(lu(k, k))) { + return T(0); + } + + // Eliminate column + for (size_type i = k + 1; i < M; ++i) { + T factor = lu(i, k) / lu(k, k); + for (size_type j = k + 1; j < M; ++j) { + lu(i, j) -= factor * lu(k, j); + } + } + + det *= lu(k, k); + } + det *= lu(M - 1, M - 1); + + return det; + } + + // Gauss-Jordan elimination for inverse (4x4 and larger) + Matrix inverse_gauss_jordan() const { + Matrix aug; // Augmented matrix [A | I] + Matrix result = Matrix::identity(); + + // Copy this matrix to augmented matrix + for (size_type i = 0; i < M; ++i) { + for (size_type j = 0; j < M; ++j) { + aug(i, j) = (*this)(i, j); + } + } + + // Forward elimination with partial pivoting + for (size_type k = 0; k < M; ++k) { + // Find pivot + size_type pivot = k; + T max_val = std::abs(aug(k, k)); + for (size_type i = k + 1; i < M; ++i) { + T val = std::abs(aug(i, k)); + if (val > max_val) { + max_val = val; + pivot = i; + } + } + + // Swap rows + if (pivot != k) { + for (size_type j = 0; j < M; ++j) { + std::swap(aug(k, j), aug(pivot, j)); + std::swap(result(k, j), result(pivot, j)); + } + } + + // Check for singularity + if (approx_zero(aug(k, k))) { + throw std::runtime_error("Matrix is singular"); + } + + // Scale pivot row + T pivot_val = aug(k, k); + for (size_type j = 0; j < M; ++j) { + aug(k, j) /= pivot_val; + result(k, j) /= pivot_val; + } + + // Eliminate column + for (size_type i = 0; i < M; ++i) { + if (i != k) { + T factor = aug(i, k); + for (size_type j = 0; j < M; ++j) { + aug(i, j) -= factor * aug(k, j); + result(i, j) -= factor * result(k, j); + } + } + } + } + + return result; + } + + // Iterators +public: + T* begin() { return data_; } + T* end() { return data_ + M * N; } + const T* begin() const { return data_; } + const T* end() const { return data_ + M * N; } + const T* cbegin() const { return data_; } + const T* cend() const { return data_ + M * N; } +}; + +// Specialization for 2x2 determinant (analytical formula) +template +inline T determinant_2x2(const Matrix& m) { + return m(0, 0) * m(1, 1) - m(0, 1) * m(1, 0); +} + +// Specialization for 2x2 inverse (Cramer's rule) +template +inline Matrix inverse_2x2(const Matrix& m) { + T det = determinant_2x2(m); + if (approx_zero(det)) { + throw std::runtime_error("Matrix is singular"); + } + + T inv_det = T(1) / det; + return Matrix{ + { m(1, 1) * inv_det, -m(0, 1) * inv_det}, + {-m(1, 0) * inv_det, m(0, 0) * inv_det} + }; +} + +// Specialization for 3x3 determinant (Sarrus rule) +template +inline T determinant_3x3(const Matrix& m) { + return m(0, 0) * (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)) + - m(0, 1) * (m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0)) + + m(0, 2) * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0)); +} + +// Specialization for 3x3 inverse (Cramer's rule / adjugate method) +template +inline Matrix inverse_3x3(const Matrix& m) { + T det = determinant_3x3(m); + if (approx_zero(det)) { + throw std::runtime_error("Matrix is singular"); + } + + T inv_det = T(1) / det; + + // Compute adjugate matrix (transpose of cofactor matrix) + Matrix adj; + adj(0, 0) = (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)); + adj(0, 1) = -(m(0, 1) * m(2, 2) - m(0, 2) * m(2, 1)); + adj(0, 2) = (m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1)); + + adj(1, 0) = -(m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0)); + adj(1, 1) = (m(0, 0) * m(2, 2) - m(0, 2) * m(2, 0)); + adj(1, 2) = -(m(0, 0) * m(1, 2) - m(0, 2) * m(1, 0)); + + adj(2, 0) = (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0)); + adj(2, 1) = -(m(0, 0) * m(2, 1) - m(0, 1) * m(2, 0)); + adj(2, 2) = (m(0, 0) * m(1, 1) - m(0, 1) * m(1, 0)); + + return adj * inv_det; +} + +// Template specializations for 2x2 Matrix determinant and inverse +template +class Matrix : public MatrixExpr> { + static constexpr std::size_t M = 2; + static constexpr std::size_t N = 2; + +private: + alignas(kFEFixedObjectAlignmentBytes) T data_[4]; + + static constexpr std::size_t index(std::size_t i, std::size_t j) { + return i * 2 + j; + } + +public: + using value_type = T; + using size_type = std::size_t; + + // Include all the same constructors and methods as the general template + constexpr Matrix() : data_{} {} + constexpr explicit Matrix(T value) { + for (size_type i = 0; i < 4; ++i) { + data_[i] = value; + } + } + constexpr Matrix(std::initializer_list> init) : data_{} { + size_type row = 0; + for (auto row_init : init) { + if (row >= 2) break; + size_type col = 0; + for (auto val : row_init) { + if (col >= 2) break; + (*this)(row, col) = val; + ++col; + } + ++row; + } + } + + template + Matrix(const MatrixExpr& expr) { + const auto& e = expr.derived(); + for (size_type i = 0; i < 2; ++i) { + for (size_type j = 0; j < 2; ++j) { + (*this)(i, j) = e(i, j); + } + } + } + + constexpr Matrix(const Matrix&) = default; + constexpr Matrix(Matrix&&) noexcept = default; + Matrix& operator=(const Matrix&) = default; + Matrix& operator=(Matrix&&) noexcept = default; + + template + Matrix& operator=(const MatrixExpr& expr) { + const auto& e = expr.derived(); + for (size_type i = 0; i < 2; ++i) { + for (size_type j = 0; j < 2; ++j) { + (*this)(i, j) = e(i, j); + } + } + return *this; + } + + static constexpr size_type rows() { return 2; } + static constexpr size_type cols() { return 2; } + static constexpr size_type size() { return 4; } + + constexpr T& operator()(size_type i, size_type j) { + return data_[index(i, j)]; + } + constexpr const T& operator()(size_type i, size_type j) const { + return data_[index(i, j)]; + } + + T* data() { return data_; } + const T* data() const { return data_; } + + void fill(T value) { + for (size_type i = 0; i < 4; ++i) { + data_[i] = value; + } + } + + void set_zero() { fill(T{0}); } + + void set_row(size_type i, const Vector& v) { + for (size_type j = 0; j < 2; ++j) { + (*this)(i, j) = v[j]; + } + } + + void set_column(size_type j, const Vector& v) { + for (size_type i = 0; i < 2; ++i) { + (*this)(i, j) = v[i]; + } + } + + void set_col(size_type j, const Vector& v) { + set_column(j, v); + } + + Vector col(size_type j) const { + return column(j); + } + + static Matrix zero() { + return zeros(); + } + + static Matrix diagonal(const Vector& diag) { + Matrix result; + result(0, 0) = diag[0]; + result(1, 1) = diag[1]; + return result; + } + + bool is_symmetric(T tol = tolerance) const { + using std::abs; + return abs((*this)(0, 1) - (*this)(1, 0)) <= tol; + } + + bool is_skew_symmetric(T tol = tolerance) const { + using std::abs; + // Diagonal must be zero + if (abs((*this)(0, 0)) > tol || abs((*this)(1, 1)) > tol) { + return false; + } + // Off-diagonal must be opposite + return abs((*this)(0, 1) + (*this)(1, 0)) <= tol; + } + + bool is_diagonal(T tol = tolerance) const { + using std::abs; + return abs((*this)(0, 1)) <= tol && abs((*this)(1, 0)) <= tol; + } + + T frobenius_norm() const { + using std::sqrt; + T sum = T(0); + for (size_type i = 0; i < 4; ++i) { + sum += data_[i] * data_[i]; + } + return sqrt(sum); + } + + T infinity_norm() const { + using std::abs; + T row0 = abs((*this)(0, 0)) + abs((*this)(0, 1)); + T row1 = abs((*this)(1, 0)) + abs((*this)(1, 1)); + return std::max(row0, row1); + } + + T one_norm() const { + using std::abs; + T col0 = abs((*this)(0, 0)) + abs((*this)(1, 0)); + T col1 = abs((*this)(0, 1)) + abs((*this)(1, 1)); + return std::max(col0, col1); + } + + Matrix& operator+=(const Matrix& other) { + for (size_type i = 0; i < 4; ++i) { + data_[i] += other.data_[i]; + } + return *this; + } + + Matrix& operator-=(const Matrix& other) { + for (size_type i = 0; i < 4; ++i) { + data_[i] -= other.data_[i]; + } + return *this; + } + + Matrix& operator*=(T scalar) { + for (size_type i = 0; i < 4; ++i) { + data_[i] *= scalar; + } + return *this; + } + + Matrix& operator/=(T scalar) { + const T inv = T(1) / scalar; + return (*this) *= inv; + } + + Matrix transpose() const { + return Matrix{ + {(*this)(0, 0), (*this)(1, 0)}, + {(*this)(0, 1), (*this)(1, 1)} + }; + } + + T trace() const { + return (*this)(0, 0) + (*this)(1, 1); + } + + static Matrix identity() { + Matrix result; + result(0, 0) = T(1); + result(1, 1) = T(1); + return result; + } + + static Matrix zeros() { + return Matrix(); + } + + static Matrix ones() { + return Matrix(T(1)); + } + + // Specialized 2x2 determinant + T determinant() const { + return determinant_2x2(*this); + } + + // Specialized 2x2 inverse + Matrix inverse() const { + return inverse_2x2(*this); + } + + Vector row(size_type i) const { + return Vector{(*this)(i, 0), (*this)(i, 1)}; + } + + Vector column(size_type j) const { + return Vector{(*this)(0, j), (*this)(1, j)}; + } + + T* begin() { return data_; } + T* end() { return data_ + 4; } + const T* begin() const { return data_; } + const T* end() const { return data_ + 4; } +}; + +// Template specialization for 3x3 Matrix +template +class Matrix : public MatrixExpr> { + static constexpr std::size_t M = 3; + static constexpr std::size_t N = 3; + +private: + alignas(kFEFixedObjectAlignmentBytes) T data_[9]; + + static constexpr std::size_t index(std::size_t i, std::size_t j) { + return i * 3 + j; + } + +public: + using value_type = T; + using size_type = std::size_t; + + constexpr Matrix() : data_{} {} + constexpr explicit Matrix(T value) { + for (size_type i = 0; i < 9; ++i) { + data_[i] = value; + } + } + constexpr Matrix(std::initializer_list> init) : data_{} { + size_type row = 0; + for (auto row_init : init) { + if (row >= 3) break; + size_type col = 0; + for (auto val : row_init) { + if (col >= 3) break; + (*this)(row, col) = val; + ++col; + } + ++row; + } + } + + template + Matrix(const MatrixExpr& expr) { + const auto& e = expr.derived(); + for (size_type i = 0; i < 3; ++i) { + for (size_type j = 0; j < 3; ++j) { + (*this)(i, j) = e(i, j); + } + } + } + + constexpr Matrix(const Matrix&) = default; + constexpr Matrix(Matrix&&) noexcept = default; + Matrix& operator=(const Matrix&) = default; + Matrix& operator=(Matrix&&) noexcept = default; + + template + Matrix& operator=(const MatrixExpr& expr) { + const auto& e = expr.derived(); + for (size_type i = 0; i < 3; ++i) { + for (size_type j = 0; j < 3; ++j) { + (*this)(i, j) = e(i, j); + } + } + return *this; + } + + static constexpr size_type rows() { return 3; } + static constexpr size_type cols() { return 3; } + static constexpr size_type size() { return 9; } + + constexpr T& operator()(size_type i, size_type j) { + return data_[index(i, j)]; + } + constexpr const T& operator()(size_type i, size_type j) const { + return data_[index(i, j)]; + } + + T* data() { return data_; } + const T* data() const { return data_; } + + void fill(T value) { + for (size_type i = 0; i < 9; ++i) { + data_[i] = value; + } + } + + void set_zero() { fill(T{0}); } + + void set_row(size_type i, const Vector& v) { + for (size_type j = 0; j < 3; ++j) { + (*this)(i, j) = v[j]; + } + } + + void set_column(size_type j, const Vector& v) { + for (size_type i = 0; i < 3; ++i) { + (*this)(i, j) = v[i]; + } + } + + void set_col(size_type j, const Vector& v) { + set_column(j, v); + } + + Vector col(size_type j) const { + return column(j); + } + + static Matrix zero() { + return zeros(); + } + + static Matrix diagonal(const Vector& diag) { + Matrix result; + result(0, 0) = diag[0]; + result(1, 1) = diag[1]; + result(2, 2) = diag[2]; + return result; + } + + bool is_symmetric(T tol = tolerance) const { + using std::abs; + for (size_type i = 0; i < 3; ++i) { + for (size_type j = i + 1; j < 3; ++j) { + if (abs((*this)(i, j) - (*this)(j, i)) > tol) { + return false; + } + } + } + return true; + } + + bool is_skew_symmetric(T tol = tolerance) const { + using std::abs; + // Diagonal must be zero + for (size_type i = 0; i < 3; ++i) { + if (abs((*this)(i, i)) > tol) { + return false; + } + } + // Off-diagonal must be opposite + for (size_type i = 0; i < 3; ++i) { + for (size_type j = i + 1; j < 3; ++j) { + if (abs((*this)(i, j) + (*this)(j, i)) > tol) { + return false; + } + } + } + return true; + } + + bool is_diagonal(T tol = tolerance) const { + using std::abs; + for (size_type i = 0; i < 3; ++i) { + for (size_type j = 0; j < 3; ++j) { + if (i != j && abs((*this)(i, j)) > tol) { + return false; + } + } + } + return true; + } + + T frobenius_norm() const { + using std::sqrt; + T sum = T(0); + for (size_type i = 0; i < 9; ++i) { + sum += data_[i] * data_[i]; + } + return sqrt(sum); + } + + T infinity_norm() const { + using std::abs; + T max_row_sum = T(0); + for (size_type i = 0; i < 3; ++i) { + T row_sum = T(0); + for (size_type j = 0; j < 3; ++j) { + row_sum += abs((*this)(i, j)); + } + max_row_sum = std::max(max_row_sum, row_sum); + } + return max_row_sum; + } + + T one_norm() const { + using std::abs; + T max_col_sum = T(0); + for (size_type j = 0; j < 3; ++j) { + T col_sum = T(0); + for (size_type i = 0; i < 3; ++i) { + col_sum += abs((*this)(i, j)); + } + max_col_sum = std::max(max_col_sum, col_sum); + } + return max_col_sum; + } + + Matrix& operator+=(const Matrix& other) { + for (size_type i = 0; i < 9; ++i) { + data_[i] += other.data_[i]; + } + return *this; + } + + Matrix& operator-=(const Matrix& other) { + for (size_type i = 0; i < 9; ++i) { + data_[i] -= other.data_[i]; + } + return *this; + } + + Matrix& operator*=(T scalar) { + for (size_type i = 0; i < 9; ++i) { + data_[i] *= scalar; + } + return *this; + } + + Matrix& operator/=(T scalar) { + const T inv = T(1) / scalar; + return (*this) *= inv; + } + + Matrix transpose() const { + Matrix result; + for (size_type i = 0; i < 3; ++i) { + for (size_type j = 0; j < 3; ++j) { + result(j, i) = (*this)(i, j); + } + } + return result; + } + + T trace() const { + return (*this)(0, 0) + (*this)(1, 1) + (*this)(2, 2); + } + + static Matrix identity() { + Matrix result; + result(0, 0) = T(1); + result(1, 1) = T(1); + result(2, 2) = T(1); + return result; + } + + static Matrix zeros() { + return Matrix(); + } + + static Matrix ones() { + return Matrix(T(1)); + } + + // Specialized 3x3 determinant + T determinant() const { + return determinant_3x3(*this); + } + + // Specialized 3x3 inverse + Matrix inverse() const { + return inverse_3x3(*this); + } + + Vector row(size_type i) const { + return Vector{(*this)(i, 0), (*this)(i, 1), (*this)(i, 2)}; + } + + Vector column(size_type j) const { + return Vector{(*this)(0, j), (*this)(1, j), (*this)(2, j)}; + } + + T* begin() { return data_; } + T* end() { return data_ + 9; } + const T* begin() const { return data_; } + const T* end() const { return data_ + 9; } +}; + +// Type aliases for common matrix types +template using Matrix2x2 = Matrix; +template using Matrix3x3 = Matrix; +template using Matrix4x4 = Matrix; +template using Matrix2x3 = Matrix; +template using Matrix3x2 = Matrix; +template using Matrix3x4 = Matrix; +template using Matrix4x3 = Matrix; + +// Double precision aliases +using Matrix2x2d = Matrix2x2; +using Matrix3x3d = Matrix3x3; +using Matrix4x4d = Matrix4x4; + +// Single precision aliases +using Matrix2x2f = Matrix2x2; +using Matrix3x3f = Matrix3x3; +using Matrix4x4f = Matrix4x4; + +// Matrix-vector multiplication +template +inline Vector operator*(const Matrix& A, const Vector& x) { + Vector result; + for (std::size_t i = 0; i < M; ++i) { + T sum = T(0); + for (std::size_t j = 0; j < N; ++j) { + sum += A(i, j) * x[j]; + } + result[i] = sum; + } + return result; +} + +// Vector-matrix multiplication (row vector * matrix) +template +inline Vector operator*(const Vector& x, const Matrix& A) { + Vector result; + for (std::size_t j = 0; j < N; ++j) { + T sum = T(0); + for (std::size_t i = 0; i < M; ++i) { + sum += x[i] * A(i, j); + } + result[j] = sum; + } + return result; +} + +// Matrix-matrix multiplication +template +inline Matrix operator*(const Matrix& A, const Matrix& B) { + Matrix result; + for (std::size_t i = 0; i < M; ++i) { + for (std::size_t k = 0; k < N; ++k) { + T a_ik = A(i, k); + for (std::size_t j = 0; j < P; ++j) { + result(i, j) += a_ik * B(k, j); + } + } + } + return result; +} + +// Free functions + +/** + * @brief Compute matrix transpose + */ +template +inline Matrix transpose(const Matrix& m) { + return m.transpose(); +} + +/** + * @brief Compute matrix trace + */ +template +inline T trace(const Matrix& m) { + return m.trace(); +} + +/** + * @brief Compute matrix determinant + */ +template +inline T determinant(const Matrix& m) { + return m.determinant(); +} + +/** + * @brief Compute matrix inverse + */ +template +inline Matrix inverse(const Matrix& m) { + return m.inverse(); +} + +/** + * @brief Compute Frobenius norm + */ +template +inline T frobenius_norm(const Matrix& m) { + return m.frobenius_norm(); +} + +/** + * @brief Component-wise absolute value + */ +template +inline Matrix abs(const Matrix& m) { + Matrix result; + for (std::size_t i = 0; i < M; ++i) { + for (std::size_t j = 0; j < N; ++j) { + using std::abs; + result(i, j) = abs(m(i, j)); + } + } + return result; +} + +/** + * @brief Component-wise minimum + */ +template +inline Matrix min(const Matrix& a, const Matrix& b) { + Matrix result; + for (std::size_t i = 0; i < M; ++i) { + for (std::size_t j = 0; j < N; ++j) { + result(i, j) = std::min(a(i, j), b(i, j)); + } + } + return result; +} + +/** + * @brief Component-wise maximum + */ +template +inline Matrix max(const Matrix& a, const Matrix& b) { + Matrix result; + for (std::size_t i = 0; i < M; ++i) { + for (std::size_t j = 0; j < N; ++j) { + result(i, j) = std::max(a(i, j), b(i, j)); + } + } + return result; +} + +/** + * @brief Outer product of two vectors + */ +template +inline Matrix outer_product(const Vector& u, const Vector& v) { + Matrix result; + for (std::size_t i = 0; i < M; ++i) { + for (std::size_t j = 0; j < N; ++j) { + result(i, j) = u[i] * v[j]; + } + } + return result; +} + +/** + * @brief Check if two matrices are approximately equal + */ +template +inline bool approx_equal(const Matrix& a, const Matrix& b, T tol = tolerance) { + for (std::size_t i = 0; i < M; ++i) { + for (std::size_t j = 0; j < N; ++j) { + if (!approx_equal(a(i, j), b(i, j), tol)) { + return false; + } + } + } + return true; +} + +/** + * @brief Stream output operator for matrices + * @tparam T Scalar type + * @tparam M Number of rows + * @tparam N Number of columns + * @param os Output stream + * @param m Matrix to output + * @return Reference to output stream + */ +template +inline std::ostream& operator<<(std::ostream& os, const Matrix& m) { + os << "["; + for (std::size_t i = 0; i < M; ++i) { + if (i > 0) os << "\n "; + os << "["; + for (std::size_t j = 0; j < N; ++j) { + if (j > 0) os << ", "; + os << m(i, j); + } + os << "]"; + } + os << "]"; + return os; +} + +} // namespace math +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_MATH_MATRIX_H diff --git a/Code/Source/solver/FE/Math/MatrixExpr.h b/Code/Source/solver/FE/Math/MatrixExpr.h new file mode 100644 index 000000000..da2f8c8d6 --- /dev/null +++ b/Code/Source/solver/FE/Math/MatrixExpr.h @@ -0,0 +1,626 @@ +#ifndef SVMP_FE_MATH_MATRIX_EXPR_H +#define SVMP_FE_MATH_MATRIX_EXPR_H + +/** + * @file MatrixExpr.h + * @brief Expression template infrastructure for lazy evaluation of matrix operations + * + * This header provides expression templates that enable compound matrix operations + * without creating temporary objects. Operations are evaluated lazily at the point + * of assignment, eliminating intermediate allocations and improving performance. + */ + +#include +#include +#include +#include +#include "ExpressionOps.h" + +namespace svmp { +namespace FE { +namespace math { + +/** + * @brief Base class for all matrix expressions using CRTP + * @tparam Derived The derived expression type + * + * This uses the Curiously Recurring Template Pattern (CRTP) to provide + * static polymorphism for expression templates. + */ +template +class MatrixExpr { +public: + /** + * @brief Get the derived expression + * @return Reference to the derived type + */ + const Derived& derived() const { + return static_cast(*this); + } + + /** + * @brief Get the derived expression (non-const) + * @return Reference to the derived type + */ + Derived& derived() { + return static_cast(*this); + } + + /** + * @brief Access element by row and column indices + * @param i Row index + * @param j Column index + * @return Value at (i,j) + */ + auto operator()(std::size_t i, std::size_t j) const { + return derived()(i, j); + } + + /** + * @brief Get number of rows + * @return Number of rows + */ + std::size_t rows() const { + return derived().rows(); + } + + /** + * @brief Get number of columns + * @return Number of columns + */ + std::size_t cols() const { + return derived().cols(); + } +}; + +/** + * @brief Binary expression for element-wise operations between two matrix expressions + * @tparam LHS Left-hand side expression type + * @tparam RHS Right-hand side expression type + * @tparam Op Binary operation functor + */ +template +class MatrixBinaryExpr : public MatrixExpr> { +private: + const LHS& lhs_; + const RHS& rhs_; + Op op_; + +public: + /** + * @brief Construct binary expression + * @param lhs Left operand + * @param rhs Right operand + * @param op Operation to apply + */ + constexpr MatrixBinaryExpr(const LHS& lhs, const RHS& rhs, Op op = Op{}) + : lhs_(lhs), rhs_(rhs), op_(op) {} + + /** + * @brief Access element at (i,j) + * @param i Row index + * @param j Column index + * @return Result of operation on elements at (i,j) + */ + constexpr auto operator()(std::size_t i, std::size_t j) const { + return op_(lhs_(i, j), rhs_(i, j)); + } + + /** + * @brief Get number of rows + * @return Number of rows + */ + constexpr std::size_t rows() const { + return lhs_.rows(); + } + + /** + * @brief Get number of columns + * @return Number of columns + */ + constexpr std::size_t cols() const { + return lhs_.cols(); + } +}; + +/** + * @brief Unary expression for element-wise operations on a single matrix expression + * @tparam Expr Expression type + * @tparam Op Unary operation functor + */ +template +class MatrixUnaryExpr : public MatrixExpr> { +private: + const Expr& expr_; + Op op_; + +public: + /** + * @brief Construct unary expression + * @param expr Operand expression + * @param op Operation to apply + */ + constexpr MatrixUnaryExpr(const Expr& expr, Op op = Op{}) + : expr_(expr), op_(op) {} + + /** + * @brief Access element at (i,j) + * @param i Row index + * @param j Column index + * @return Result of operation on element at (i,j) + */ + constexpr auto operator()(std::size_t i, std::size_t j) const { + return op_(expr_(i, j)); + } + + /** + * @brief Get number of rows + * @return Number of rows + */ + constexpr std::size_t rows() const { + return expr_.rows(); + } + + /** + * @brief Get number of columns + * @return Number of columns + */ + constexpr std::size_t cols() const { + return expr_.cols(); + } +}; + +/** + * @brief Scalar multiplication expression + * @tparam Expr Matrix expression type + * @tparam Scalar Scalar type + */ +template +class MatrixScalarExpr : public MatrixExpr> { +private: + const Expr& expr_; + Scalar scalar_; + +public: + /** + * @brief Construct scalar multiplication expression + * @param expr Matrix expression + * @param scalar Scalar value + */ + constexpr MatrixScalarExpr(const Expr& expr, Scalar scalar) + : expr_(expr), scalar_(scalar) {} + + /** + * @brief Access element at (i,j) + * @param i Row index + * @param j Column index + * @return Element multiplied by scalar + */ + constexpr auto operator()(std::size_t i, std::size_t j) const { + return expr_(i, j) * scalar_; + } + + /** + * @brief Get number of rows + * @return Number of rows + */ + constexpr std::size_t rows() const { + return expr_.rows(); + } + + /** + * @brief Get number of columns + * @return Number of columns + */ + constexpr std::size_t cols() const { + return expr_.cols(); + } +}; + +/** + * @brief Scalar division expression + * @tparam Expr Matrix expression type + * @tparam Scalar Scalar type + */ +template +class MatrixScalarDivExpr : public MatrixExpr> { +private: + const Expr& expr_; + Scalar scalar_; + +public: + /** + * @brief Construct scalar division expression + * @param expr Matrix expression + * @param scalar Scalar divisor + */ + constexpr MatrixScalarDivExpr(const Expr& expr, Scalar scalar) + : expr_(expr), scalar_(scalar) {} + + /** + * @brief Access element at (i,j) + * @param i Row index + * @param j Column index + * @return Element divided by scalar + */ + constexpr auto operator()(std::size_t i, std::size_t j) const { + return expr_(i, j) / scalar_; + } + + /** + * @brief Get number of rows + * @return Number of rows + */ + constexpr std::size_t rows() const { + return expr_.rows(); + } + + /** + * @brief Get number of columns + * @return Number of columns + */ + constexpr std::size_t cols() const { + return expr_.cols(); + } +}; + +/** + * @brief Matrix multiplication expression (lazy evaluation) + * @tparam LHS Left matrix expression type + * @tparam RHS Right matrix expression type + * + * Computes matrix multiplication A*B lazily + */ +template +class MatrixMulExpr : public MatrixExpr> { +private: + const LHS& lhs_; + const RHS& rhs_; + +public: + /** + * @brief Construct matrix multiplication expression + * @param lhs Left matrix + * @param rhs Right matrix + */ + constexpr MatrixMulExpr(const LHS& lhs, const RHS& rhs) + : lhs_(lhs), rhs_(rhs) {} + + /** + * @brief Compute element at (i,j) + * @param i Row index + * @param j Column index + * @return Dot product of row i of lhs and column j of rhs + */ + constexpr auto operator()(std::size_t i, std::size_t j) const { + using result_type = decltype(lhs_(0, 0) * rhs_(0, 0)); + result_type sum = result_type{0}; + const auto n = lhs_.cols(); + for (std::size_t k = 0; k < n; ++k) { + sum += lhs_(i, k) * rhs_(k, j); + } + return sum; + } + + /** + * @brief Get number of rows (from left matrix) + * @return Number of rows + */ + constexpr std::size_t rows() const { + return lhs_.rows(); + } + + /** + * @brief Get number of columns (from right matrix) + * @return Number of columns + */ + constexpr std::size_t cols() const { + return rhs_.cols(); + } +}; + +/** + * @brief Transpose expression (lazy evaluation) + * @tparam Expr Matrix expression type + */ +template +class TransposeExpr : public MatrixExpr> { +private: + const Expr& expr_; + +public: + /** + * @brief Construct transpose expression + * @param expr Matrix expression to transpose + */ + constexpr explicit TransposeExpr(const Expr& expr) + : expr_(expr) {} + + /** + * @brief Access transposed element + * @param i Row index (becomes column in original) + * @param j Column index (becomes row in original) + * @return Element at (j,i) of original matrix + */ + constexpr auto operator()(std::size_t i, std::size_t j) const { + return expr_(j, i); + } + + /** + * @brief Get number of rows (columns of original) + * @return Number of rows + */ + constexpr std::size_t rows() const { + return expr_.cols(); + } + + /** + * @brief Get number of columns (rows of original) + * @return Number of columns + */ + constexpr std::size_t cols() const { + return expr_.rows(); + } +}; + +/** + * @brief Diagonal matrix expression (creates diagonal matrix from vector) + * @tparam VecExpr Vector expression type + */ +template +class DiagonalExpr : public MatrixExpr> { +private: + const VecExpr& vec_; + std::size_t n_; + +public: + /** + * @brief Construct diagonal matrix from vector + * @param vec Vector of diagonal elements + * @param n Matrix dimension (default: vector size) + */ + constexpr explicit DiagonalExpr(const VecExpr& vec, std::size_t n = 0) + : vec_(vec), n_(n > 0 ? n : vec.size()) {} + + /** + * @brief Access element + * @param i Row index + * @param j Column index + * @return Diagonal element if i==j, zero otherwise + */ + constexpr auto operator()(std::size_t i, std::size_t j) const { + using result_type = decltype(vec_[0]); + return (i == j && i < vec_.size()) ? vec_[i] : result_type{0}; + } + + /** + * @brief Get number of rows + * @return Number of rows + */ + constexpr std::size_t rows() const { + return n_; + } + + /** + * @brief Get number of columns + * @return Number of columns + */ + constexpr std::size_t cols() const { + return n_; + } +}; + +/** + * @brief Addition operator for matrix expressions + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto operator+(const MatrixExpr& lhs, const MatrixExpr& rhs) { + return MatrixBinaryExpr( + lhs.derived(), rhs.derived(), detail::ops::Add{} + ); +} + +/** + * @brief Subtraction operator for matrix expressions + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto operator-(const MatrixExpr& lhs, const MatrixExpr& rhs) { + return MatrixBinaryExpr( + lhs.derived(), rhs.derived(), detail::ops::Sub{} + ); +} + +/** + * @brief Matrix multiplication operator + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto operator*(const MatrixExpr& lhs, const MatrixExpr& rhs) { + return MatrixMulExpr(lhs.derived(), rhs.derived()); +} + +/** + * @brief Element-wise multiplication (Hadamard product) + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto hadamard(const MatrixExpr& lhs, const MatrixExpr& rhs) { + return MatrixBinaryExpr( + lhs.derived(), rhs.derived(), detail::ops::Mul{} + ); +} + +/** + * @brief Element-wise division + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto hadamard_div(const MatrixExpr& lhs, const MatrixExpr& rhs) { + return MatrixBinaryExpr( + lhs.derived(), rhs.derived(), detail::ops::Div{} + ); +} + +/** + * @brief Negation operator for matrix expressions + */ +template, Expr> + >> +constexpr auto operator-(const MatrixExpr& expr) { + return MatrixUnaryExpr( + expr.derived(), detail::ops::Negate{} + ); +} + +/** + * @brief Scalar multiplication operator (matrix * scalar) + */ +template, Expr> && + std::is_arithmetic_v + >> +constexpr auto operator*(const MatrixExpr& expr, Scalar scalar) { + return MatrixScalarExpr(expr.derived(), scalar); +} + +/** + * @brief Scalar multiplication operator (scalar * matrix) + */ +template && + std::is_base_of_v, Expr> + >> +constexpr auto operator*(Scalar scalar, const MatrixExpr& expr) { + return MatrixScalarExpr(expr.derived(), scalar); +} + +/** + * @brief Scalar division operator (matrix / scalar) + */ +template, Expr> && + std::is_arithmetic_v + >> +constexpr auto operator/(const MatrixExpr& expr, Scalar scalar) { + return MatrixScalarDivExpr(expr.derived(), scalar); +} + +/** + * @brief Transpose function + */ +template, Expr> + >> +constexpr auto transpose(const MatrixExpr& expr) { + return TransposeExpr(expr.derived()); +} + +/** + * @brief Element-wise absolute value + */ +template, Expr> + >> +constexpr auto abs(const MatrixExpr& expr) { + return MatrixUnaryExpr(expr.derived(), detail::ops::Abs{}); +} + +/** + * @brief Element-wise square root + */ +template, Expr> + >> +constexpr auto sqrt(const MatrixExpr& expr) { + return MatrixUnaryExpr(expr.derived(), detail::ops::Sqrt{}); +} + +/** + * @brief Compute Frobenius norm squared of matrix expression + * @tparam Expr Matrix expression type + * @param expr Matrix expression + * @return Square of the Frobenius norm + */ +template, Expr> + >> +constexpr auto frobenius_norm_squared(const MatrixExpr& expr) { + using result_type = decltype(expr.derived()(0, 0) * expr.derived()(0, 0)); + result_type sum = result_type{0}; + const auto m = expr.rows(); + const auto n = expr.cols(); + for (std::size_t i = 0; i < m; ++i) { + for (std::size_t j = 0; j < n; ++j) { + auto val = expr.derived()(i, j); + sum += val * val; + } + } + return sum; +} + +/** + * @brief Compute Frobenius norm of matrix expression + * @tparam Expr Matrix expression type + * @param expr Matrix expression + * @return Frobenius norm + */ +template, Expr> + >> +constexpr auto frobenius_norm(const MatrixExpr& expr) { + using std::sqrt; + return sqrt(frobenius_norm_squared(expr)); +} + +/** + * @brief Compute trace of square matrix expression + * @tparam Expr Matrix expression type + * @param expr Matrix expression + * @return Sum of diagonal elements + */ +template, Expr> + >> +constexpr auto trace(const MatrixExpr& expr) { + using result_type = decltype(expr.derived()(0, 0)); + result_type sum = result_type{0}; + const auto n = std::min(expr.rows(), expr.cols()); + for (std::size_t i = 0; i < n; ++i) { + sum += expr.derived()(i, i); + } + return sum; +} + +} // namespace math +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_MATH_MATRIX_EXPR_H \ No newline at end of file diff --git a/Code/Source/solver/FE/Math/Vector.h b/Code/Source/solver/FE/Math/Vector.h new file mode 100644 index 000000000..e272bd6dd --- /dev/null +++ b/Code/Source/solver/FE/Math/Vector.h @@ -0,0 +1,831 @@ +#ifndef SVMP_FE_MATH_VECTOR_H +#define SVMP_FE_MATH_VECTOR_H + +/** + * @file Vector.h + * @brief Fixed-size vectors with expression templates for FE computations + * + * This header provides optimized fixed-size vector operations for element-level + * computations. All operations use expression templates to eliminate temporaries + * and are header-only for maximum inlining. Memory is aligned for SIMD operations. + */ + +#include "VectorExpr.h" +#include "MathConstants.h" +#include "../Common/Alignment.h" +#include +#include +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace math { + +/** + * @brief Fixed-size vector for element-level computations + * @tparam T Scalar type (float, double) + * @tparam N Vector dimension + * + * This class provides small vector operations optimized for + * compile-time known dimensions. Memory is aligned for SIMD operations. + */ +template +class Vector : public VectorExpr> { + static_assert(std::is_arithmetic_v, "T must be an arithmetic type"); + static_assert(N > 0, "Vector dimension must be positive"); + +private: + alignas(kFEFixedObjectAlignmentBytes) T data_[N]; // SIMD-friendly alignment + +public: + // Type definitions + using value_type = T; + using size_type = std::size_t; + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using const_pointer = const T*; + + /** + * @brief Default constructor - zero initializes all components + */ + constexpr Vector() : data_{} {} + + /** + * @brief Fill constructor - initializes all components with same value + * @param value Value to fill vector with + */ + constexpr explicit Vector(T value) { + for (size_type i = 0; i < N; ++i) { + data_[i] = value; + } + } + + /** + * @brief Initializer list constructor + * @param init List of values + */ + constexpr Vector(std::initializer_list init) : data_{} { + auto it = init.begin(); + for (size_type i = 0; i < N && it != init.end(); ++i, ++it) { + data_[i] = *it; + } + } + + /** + * @brief Constructor from expression template + * @tparam Expr Expression type + * @param expr Vector expression to evaluate + */ + template + Vector(const VectorExpr& expr) { + const auto& e = expr.derived(); + for (size_type i = 0; i < N; ++i) { + data_[i] = e[i]; + } + } + + /** + * @brief Copy constructor + */ + constexpr Vector(const Vector&) = default; + + /** + * @brief Move constructor + */ + constexpr Vector(Vector&&) noexcept = default; + + /** + * @brief Copy assignment + */ + Vector& operator=(const Vector&) = default; + + /** + * @brief Move assignment + */ + Vector& operator=(Vector&&) noexcept = default; + + /** + * @brief Assignment from expression template + * @tparam Expr Expression type + * @param expr Vector expression to evaluate + * @return Reference to this + */ + template + Vector& operator=(const VectorExpr& expr) { + const auto& e = expr.derived(); + for (size_type i = 0; i < N; ++i) { + data_[i] = e[i]; + } + return *this; + } + + /** + * @brief Get vector size (compile-time constant) + * @return Number of elements + */ + static constexpr size_type size() { return N; } + + /** + * @brief Element access (no bounds checking) + * @param i Element index + * @return Reference to element + */ + constexpr T& operator[](size_type i) { + return data_[i]; + } + + /** + * @brief Element access (no bounds checking) - const version + * @param i Element index + * @return Const reference to element + */ + constexpr const T& operator[](size_type i) const { + return data_[i]; + } + + /** + * @brief Element access with bounds checking + * @param i Element index + * @return Reference to element + * @throws std::out_of_range if i >= N + */ + T& at(size_type i) { + if (i >= N) { + throw std::out_of_range("Vector::at: index out of range"); + } + return data_[i]; + } + + /** + * @brief Element access with bounds checking - const version + * @param i Element index + * @return Const reference to element + * @throws std::out_of_range if i >= N + */ + const T& at(size_type i) const { + if (i >= N) { + throw std::out_of_range("Vector::at: index out of range"); + } + return data_[i]; + } + + /** + * @brief Access first element + * @return Reference to first element + */ + T& front() { return data_[0]; } + const T& front() const { return data_[0]; } + + /** + * @brief Access last element + * @return Reference to last element + */ + T& back() { return data_[N-1]; } + const T& back() const { return data_[N-1]; } + + /** + * @brief Get pointer to underlying data + * @return Pointer to first element + */ + T* data() { return data_; } + const T* data() const { return data_; } + + /** + * @brief Fill vector with value + * @param value Value to fill with + */ + void fill(T value) { + for (size_type i = 0; i < N; ++i) { + data_[i] = value; + } + } + + /** + * @brief Set all components to zero + */ + void set_zero() { + fill(T{0}); + } + + // Arithmetic operators + + /** + * @brief In-place addition + * @param other Vector to add + * @return Reference to this + */ + Vector& operator+=(const Vector& other) { + for (size_type i = 0; i < N; ++i) { + data_[i] += other.data_[i]; + } + return *this; + } + + /** + * @brief In-place subtraction + * @param other Vector to subtract + * @return Reference to this + */ + Vector& operator-=(const Vector& other) { + for (size_type i = 0; i < N; ++i) { + data_[i] -= other.data_[i]; + } + return *this; + } + + /** + * @brief In-place scalar multiplication + * @param scalar Scalar to multiply by + * @return Reference to this + */ + Vector& operator*=(T scalar) { + for (size_type i = 0; i < N; ++i) { + data_[i] *= scalar; + } + return *this; + } + + /** + * @brief In-place scalar division + * @param scalar Scalar to divide by + * @return Reference to this + */ + Vector& operator/=(T scalar) { + const T inv = T(1) / scalar; + return (*this) *= inv; + } + + // Vector operations + + /** + * @brief Compute dot product + * @param other Other vector + * @return Dot product + */ + T dot(const Vector& other) const { + T result = T(0); + for (size_type i = 0; i < N; ++i) { + result += data_[i] * other.data_[i]; + } + return result; + } + + /** + * @brief Compute squared Euclidean norm + * @return Squared norm + */ + T norm_squared() const { + return dot(*this); + } + + /** + * @brief Compute Euclidean norm + * @return Norm + */ + T norm() const { + using std::sqrt; + return sqrt(norm_squared()); + } + + /** + * @brief Get normalized vector + * @return Unit vector in same direction + */ + Vector normalized() const { + const T n = norm(); + if (approx_zero(n)) { + return Vector(); // Return zero vector + } + return (*this) / n; + } + + /** + * @brief Normalize this vector in place + * @return Reference to this + */ + Vector& normalize() { + const T n = norm(); + if (!approx_zero(n)) { + (*this) /= n; + } + return *this; + } + + /** + * @brief Compute L1 norm (Manhattan norm) + * @return Sum of absolute values + */ + T norm_l1() const { + T result = T(0); + for (size_type i = 0; i < N; ++i) { + using std::abs; + result += abs(data_[i]); + } + return result; + } + + /** + * @brief Compute L-infinity norm (maximum norm) + * @return Maximum absolute value + */ + T norm_inf() const { + T result = T(0); + for (size_type i = 0; i < N; ++i) { + using std::abs; + result = std::max(result, abs(data_[i])); + } + return result; + } + + /** + * @brief Get minimum component + * @return Minimum value + */ + T min() const { + T result = data_[0]; + for (size_type i = 1; i < N; ++i) { + result = std::min(result, data_[i]); + } + return result; + } + + /** + * @brief Get maximum component + * @return Maximum value + */ + T max() const { + T result = data_[0]; + for (size_type i = 1; i < N; ++i) { + result = std::max(result, data_[i]); + } + return result; + } + + /** + * @brief Get sum of all components + * @return Sum of components + */ + T sum() const { + T result = T(0); + for (size_type i = 0; i < N; ++i) { + result += data_[i]; + } + return result; + } + + /** + * @brief Get product of all components + * @return Product of components + */ + T product() const { + T result = data_[0]; + for (size_type i = 1; i < N; ++i) { + result *= data_[i]; + } + return result; + } + + // Static factory functions + + /** + * @brief Create zero vector + * @return Vector with all components zero + */ + static constexpr Vector zeros() { + return Vector(); + } + + /** + * @brief Create vector with all components one + * @return Vector with all components one + */ + static constexpr Vector ones() { + return Vector(T(1)); + } + + /** + * @brief Create unit vector along axis + * @param axis Axis index (0-based) + * @return Unit vector + */ + static Vector unit(size_type axis) { + Vector v; + if (axis < N) { + v[axis] = T(1); + } + return v; + } + + /** + * @brief Create basis vector (alias for unit) + * @param i Axis index (0-based) + * @return Basis vector + */ + static Vector basis(size_type i) { + return unit(i); + } + + /** + * @brief Create zero vector (alias for zeros) + * @return Zero vector + */ + static constexpr Vector zero() { + return zeros(); + } + + /** + * @brief Get index of minimum element + * @return Index of minimum value + */ + size_type min_index() const { + size_type idx = 0; + T min_val = data_[0]; + for (size_type i = 1; i < N; ++i) { + if (data_[i] < min_val) { + min_val = data_[i]; + idx = i; + } + } + return idx; + } + + /** + * @brief Get index of maximum element + * @return Index of maximum value + */ + size_type max_index() const { + size_type idx = 0; + T max_val = data_[0]; + for (size_type i = 1; i < N; ++i) { + if (data_[i] > max_val) { + max_val = data_[i]; + idx = i; + } + } + return idx; + } + + /** + * @brief Compute mean of all components + * @return Average value + */ + T mean() const { + return sum() / static_cast(N); + } + + /** + * @brief Cross product for 3D vectors + * @param other Other vector + * @return Cross product + * @note Only available for 3D vectors + */ + template + std::enable_if_t> cross(const Vector& other) const { + return Vector{ + data_[1] * other[2] - data_[2] * other[1], + data_[2] * other[0] - data_[0] * other[2], + data_[0] * other[1] - data_[1] * other[0] + }; + } + + /** + * @brief Check if vectors are approximately equal + * @param other Other vector + * @param tol Tolerance + * @return true if equal within tolerance + */ + bool approx_equal(const Vector& other, T tol = tolerance) const { + for (size_type i = 0; i < N; ++i) { + using std::abs; + if (abs(data_[i] - other.data_[i]) > tol) { + return false; + } + } + return true; + } + + /** + * @brief Equality comparison + * @param other Other vector + * @return true if exactly equal + */ + bool operator==(const Vector& other) const { + for (size_type i = 0; i < N; ++i) { + if (data_[i] != other.data_[i]) { + return false; + } + } + return true; + } + + /** + * @brief Inequality comparison + * @param other Other vector + * @return true if not equal + */ + bool operator!=(const Vector& other) const { + return !(*this == other); + } + + // Iterators + T* begin() { return data_; } + T* end() { return data_ + N; } + const T* begin() const { return data_; } + const T* end() const { return data_ + N; } + const T* cbegin() const { return data_; } + const T* cend() const { return data_ + N; } +}; + +// Type aliases for common vector types +template using Vector2 = Vector; +template using Vector3 = Vector; +template using Vector4 = Vector; + +// Double precision aliases +using Vector2d = Vector2; +using Vector3d = Vector3; +using Vector4d = Vector4; + +// Single precision aliases +using Vector2f = Vector2; +using Vector3f = Vector3; +using Vector4f = Vector4; + +// Integer aliases +using Vector2i = Vector2; +using Vector3i = Vector3; +using Vector4i = Vector4; + +/** + * @brief 3D Cross product + * @tparam T Scalar type + * @param a First vector + * @param b Second vector + * @return Cross product a × b + */ +template +inline Vector3 cross(const Vector3& a, const Vector3& b) { + return Vector3{ + a[1] * b[2] - a[2] * b[1], + a[2] * b[0] - a[0] * b[2], + a[0] * b[1] - a[1] * b[0] + }; +} + +/** + * @brief 2D Cross product (returns scalar - z component of 3D cross) + * @tparam T Scalar type + * @param a First vector + * @param b Second vector + * @return Scalar cross product + */ +template +inline T cross(const Vector2& a, const Vector2& b) { + return a[0] * b[1] - a[1] * b[0]; +} + +/** + * @brief Triple scalar product (a · (b × c)) + * @tparam T Scalar type + * @param a First vector + * @param b Second vector + * @param c Third vector + * @return Scalar triple product + */ +template +inline T triple_product(const Vector3& a, const Vector3& b, const Vector3& c) { + return a.dot(cross(b, c)); +} + +// Free functions for common operations + +/** + * @brief Compute dot product + */ +template +inline T dot(const Vector& a, const Vector& b) { + return a.dot(b); +} + +/** + * @brief Compute Euclidean norm + */ +template +inline T norm(const Vector& v) { + return v.norm(); +} + +/** + * @brief Compute squared Euclidean norm + */ +template +inline T norm_squared(const Vector& v) { + return v.norm_squared(); +} + +/** + * @brief Get normalized vector + */ +template +inline Vector normalize(const Vector& v) { + return v.normalized(); +} + +/** + * @brief Component-wise absolute value + */ +template +inline Vector abs(const Vector& v) { + Vector result; + for (std::size_t i = 0; i < N; ++i) { + using std::abs; + result[i] = abs(v[i]); + } + return result; +} + +/** + * @brief Component-wise minimum + */ +template +inline Vector min(const Vector& a, const Vector& b) { + Vector result; + for (std::size_t i = 0; i < N; ++i) { + result[i] = std::min(a[i], b[i]); + } + return result; +} + +/** + * @brief Component-wise maximum + */ +template +inline Vector max(const Vector& a, const Vector& b) { + Vector result; + for (std::size_t i = 0; i < N; ++i) { + result[i] = std::max(a[i], b[i]); + } + return result; +} + +/** + * @brief Component-wise clamp + */ +template +inline Vector clamp(const Vector& v, const Vector& min_v, const Vector& max_v) { + Vector result; + for (std::size_t i = 0; i < N; ++i) { + result[i] = std::clamp(v[i], min_v[i], max_v[i]); + } + return result; +} + +/** + * @brief Linear interpolation between vectors + * @tparam T Scalar type + * @tparam N Vector dimension + * @param t Interpolation parameter [0, 1] + * @param a Start vector (at t=0) + * @param b End vector (at t=1) + * @return Interpolated vector + */ +template +inline Vector lerp(T t, const Vector& a, const Vector& b) { + return a + t * (b - a); +} + +/** + * @brief Spherical linear interpolation (for unit vectors) + * @tparam T Scalar type + * @param t Interpolation parameter [0, 1] + * @param a Start unit vector + * @param b End unit vector + * @return Interpolated unit vector + */ +template +inline Vector3 slerp(T t, const Vector3& a, const Vector3& b) { + T cos_angle = a.dot(b); + + // Handle numerical issues + cos_angle = std::clamp(cos_angle, T(-1), T(1)); + + // If vectors are nearly parallel, use linear interpolation + if (cos_angle > T(0.9995)) { + return normalize(lerp(t, a, b)); + } + + T angle = std::acos(cos_angle); + T sin_angle = std::sin(angle); + + T t0 = std::sin((T(1) - t) * angle) / sin_angle; + T t1 = std::sin(t * angle) / sin_angle; + + return t0 * a + t1 * b; +} + +/** + * @brief Reflect vector about normal + * @tparam T Scalar type + * @tparam N Vector dimension + * @param v Incident vector + * @param n Normal vector (should be unit) + * @return Reflected vector + */ +template +inline Vector reflect(const Vector& v, const Vector& n) { + return v - T(2) * dot(v, n) * n; +} + +/** + * @brief Project vector onto another vector + * @tparam T Scalar type + * @tparam N Vector dimension + * @param v Vector to project + * @param onto Vector to project onto + * @return Projection of v onto 'onto' + */ +template +inline Vector project(const Vector& v, const Vector& onto) { + T denom = onto.norm_squared(); + if (approx_zero(denom)) { + return Vector::zeros(); + } + return (dot(v, onto) / denom) * onto; +} + +/** + * @brief Get perpendicular component of vector + * @tparam T Scalar type + * @tparam N Vector dimension + * @param v Vector + * @param direction Direction to remove + * @return Component of v perpendicular to direction + */ +template +inline Vector perpendicular(const Vector& v, const Vector& direction) { + return v - project(v, direction); +} + +/** + * @brief Compute angle between two vectors + * @tparam T Scalar type + * @tparam N Vector dimension + * @param a First vector + * @param b Second vector + * @return Angle in radians [0, π] + */ +template +inline T angle(const Vector& a, const Vector& b) { + T cos_angle = dot(a, b) / (norm(a) * norm(b)); + cos_angle = std::clamp(cos_angle, T(-1), T(1)); + return std::acos(cos_angle); +} + +/** + * @brief Check if two vectors are approximately equal + * @tparam T Scalar type + * @tparam N Vector dimension + * @param a First vector + * @param b Second vector + * @param tol Tolerance + * @return true if vectors are equal within tolerance + */ +template +inline bool approx_equal(const Vector& a, const Vector& b, T tol = tolerance) { + for (std::size_t i = 0; i < N; ++i) { + if (!approx_equal(a[i], b[i], tol)) { + return false; + } + } + return true; +} + +/** + * @brief Stream output operator + * @tparam T Scalar type + * @tparam N Vector dimension + * @param os Output stream + * @param v Vector to output + * @return Reference to output stream + */ +template +inline std::ostream& operator<<(std::ostream& os, const Vector& v) { + os << "["; + for (std::size_t i = 0; i < N; ++i) { + if (i > 0) os << ", "; + os << v[i]; + } + os << "]"; + return os; +} + +} // namespace math +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_MATH_VECTOR_H diff --git a/Code/Source/solver/FE/Math/VectorExpr.h b/Code/Source/solver/FE/Math/VectorExpr.h new file mode 100644 index 000000000..8b9c8e382 --- /dev/null +++ b/Code/Source/solver/FE/Math/VectorExpr.h @@ -0,0 +1,418 @@ +#ifndef SVMP_FE_MATH_VECTOR_EXPR_H +#define SVMP_FE_MATH_VECTOR_EXPR_H + +/** + * @file VectorExpr.h + * @brief Expression template infrastructure for lazy evaluation of vector operations + * + * This header provides expression templates that enable compound vector operations + * without creating temporary objects. Operations are evaluated lazily at the point + * of assignment, eliminating intermediate allocations and improving performance. + */ + +#include +#include +#include +#include "ExpressionOps.h" + +namespace svmp { +namespace FE { +namespace math { + +/** + * @brief Base class for all vector expressions using CRTP + * @tparam Derived The derived expression type + * + * This uses the Curiously Recurring Template Pattern (CRTP) to provide + * static polymorphism for expression templates. + */ +template +class VectorExpr { +public: + /** + * @brief Get the derived expression + * @return Reference to the derived type + */ + const Derived& derived() const { + return static_cast(*this); + } + + /** + * @brief Get the derived expression (non-const) + * @return Reference to the derived type + */ + Derived& derived() { + return static_cast(*this); + } + + /** + * @brief Access element by index + * @param i Element index + * @return Value at index i + */ + auto operator[](std::size_t i) const { + return derived()[i]; + } + + /** + * @brief Get the size of the vector expression + * @return Number of elements + */ + std::size_t size() const { + return derived().size(); + } +}; + +/** + * @brief Binary expression for element-wise operations between two vector expressions + * @tparam LHS Left-hand side expression type + * @tparam RHS Right-hand side expression type + * @tparam Op Binary operation functor + */ +template +class VectorBinaryExpr : public VectorExpr> { +private: + const LHS& lhs_; + const RHS& rhs_; + Op op_; + +public: + /** + * @brief Construct binary expression + * @param lhs Left operand + * @param rhs Right operand + * @param op Operation to apply + */ + constexpr VectorBinaryExpr(const LHS& lhs, const RHS& rhs, Op op = Op{}) + : lhs_(lhs), rhs_(rhs), op_(op) {} + + /** + * @brief Access element at index + * @param i Element index + * @return Result of operation on elements at index i + */ + constexpr auto operator[](std::size_t i) const { + return op_(lhs_[i], rhs_[i]); + } + + /** + * @brief Get size of expression (from left operand) + * @return Number of elements + */ + constexpr std::size_t size() const { + return lhs_.size(); + } +}; + +/** + * @brief Unary expression for element-wise operations on a single vector expression + * @tparam Expr Expression type + * @tparam Op Unary operation functor + */ +template +class VectorUnaryExpr : public VectorExpr> { +private: + const Expr& expr_; + Op op_; + +public: + /** + * @brief Construct unary expression + * @param expr Operand expression + * @param op Operation to apply + */ + constexpr VectorUnaryExpr(const Expr& expr, Op op = Op{}) + : expr_(expr), op_(op) {} + + /** + * @brief Access element at index + * @param i Element index + * @return Result of operation on element at index i + */ + constexpr auto operator[](std::size_t i) const { + return op_(expr_[i]); + } + + /** + * @brief Get size of expression + * @return Number of elements + */ + constexpr std::size_t size() const { + return expr_.size(); + } +}; + +/** + * @brief Scalar multiplication expression + * @tparam Expr Vector expression type + * @tparam Scalar Scalar type + */ +template +class VectorScalarExpr : public VectorExpr> { +private: + const Expr& expr_; + Scalar scalar_; + +public: + /** + * @brief Construct scalar multiplication expression + * @param expr Vector expression + * @param scalar Scalar value + */ + constexpr VectorScalarExpr(const Expr& expr, Scalar scalar) + : expr_(expr), scalar_(scalar) {} + + /** + * @brief Access element at index + * @param i Element index + * @return Element multiplied by scalar + */ + constexpr auto operator[](std::size_t i) const { + return expr_[i] * scalar_; + } + + /** + * @brief Get size of expression + * @return Number of elements + */ + constexpr std::size_t size() const { + return expr_.size(); + } +}; + +/** + * @brief Scalar division expression + * @tparam Expr Vector expression type + * @tparam Scalar Scalar type + */ +template +class VectorScalarDivExpr : public VectorExpr> { +private: + const Expr& expr_; + Scalar scalar_; + +public: + /** + * @brief Construct scalar division expression + * @param expr Vector expression + * @param scalar Scalar divisor + */ + constexpr VectorScalarDivExpr(const Expr& expr, Scalar scalar) + : expr_(expr), scalar_(scalar) {} + + /** + * @brief Access element at index + * @param i Element index + * @return Element divided by scalar + */ + constexpr auto operator[](std::size_t i) const { + return expr_[i] / scalar_; + } + + /** + * @brief Get size of expression + * @return Number of elements + */ + constexpr std::size_t size() const { + return expr_.size(); + } +}; + +/** + * @brief Addition operator for vector expressions + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto operator+(const VectorExpr& lhs, const VectorExpr& rhs) { + return VectorBinaryExpr( + lhs.derived(), rhs.derived(), detail::ops::Add{} + ); +} + +/** + * @brief Subtraction operator for vector expressions + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto operator-(const VectorExpr& lhs, const VectorExpr& rhs) { + return VectorBinaryExpr( + lhs.derived(), rhs.derived(), detail::ops::Sub{} + ); +} + +/** + * @brief Element-wise multiplication operator for vector expressions + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto hadamard(const VectorExpr& lhs, const VectorExpr& rhs) { + return VectorBinaryExpr( + lhs.derived(), rhs.derived(), detail::ops::Mul{} + ); +} + +/** + * @brief Element-wise division operator for vector expressions + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto hadamard_div(const VectorExpr& lhs, const VectorExpr& rhs) { + return VectorBinaryExpr( + lhs.derived(), rhs.derived(), detail::ops::Div{} + ); +} + +/** + * @brief Negation operator for vector expressions + */ +template, Expr> + >> +constexpr auto operator-(const VectorExpr& expr) { + return VectorUnaryExpr( + expr.derived(), detail::ops::Negate{} + ); +} + +/** + * @brief Scalar multiplication operator (vector * scalar) + */ +template, Expr> && + std::is_arithmetic_v + >> +constexpr auto operator*(const VectorExpr& expr, Scalar scalar) { + return VectorScalarExpr(expr.derived(), scalar); +} + +/** + * @brief Scalar multiplication operator (scalar * vector) + */ +template && + std::is_base_of_v, Expr> + >> +constexpr auto operator*(Scalar scalar, const VectorExpr& expr) { + return VectorScalarExpr(expr.derived(), scalar); +} + +/** + * @brief Scalar division operator (vector / scalar) + */ +template, Expr> && + std::is_arithmetic_v + >> +constexpr auto operator/(const VectorExpr& expr, Scalar scalar) { + return VectorScalarDivExpr(expr.derived(), scalar); +} + +/** + * @brief Element-wise absolute value + */ +template, Expr> + >> +constexpr auto abs(const VectorExpr& expr) { + return VectorUnaryExpr(expr.derived(), detail::ops::Abs{}); +} + +/** + * @brief Element-wise square root + */ +template, Expr> + >> +constexpr auto sqrt(const VectorExpr& expr) { + return VectorUnaryExpr(expr.derived(), detail::ops::Sqrt{}); +} + +/** + * @brief Dot product for vector expressions + * @tparam LHS Left vector expression type + * @tparam RHS Right vector expression type + * @param lhs Left operand + * @param rhs Right operand + * @return Dot product result + */ +template, LHS> && + std::is_base_of_v, RHS> + >> +constexpr auto dot(const VectorExpr& lhs, const VectorExpr& rhs) { + using result_type = decltype(lhs.derived()[0] * rhs.derived()[0]); + result_type sum = result_type{0}; + const auto n = lhs.size(); + for (std::size_t i = 0; i < n; ++i) { + sum += lhs.derived()[i] * rhs.derived()[i]; + } + return sum; +} + +/** + * @brief Compute norm squared of vector expression + * @tparam Expr Vector expression type + * @param expr Vector expression + * @return Square of the Euclidean norm + */ +template, Expr> + >> +constexpr auto norm_squared(const VectorExpr& expr) { + return dot(expr, expr); +} + +/** + * @brief Compute norm of vector expression + * @tparam Expr Vector expression type + * @param expr Vector expression + * @return Euclidean norm + */ +template, Expr> + >> +constexpr auto norm(const VectorExpr& expr) { + using std::sqrt; + return sqrt(norm_squared(expr)); +} + +/** + * @brief Normalize vector expression + * @tparam Expr Vector expression type + * @param expr Vector expression + * @return Normalized vector expression + */ +template, Expr> + >> +constexpr auto normalize(const VectorExpr& expr) { + return expr / norm(expr); +} + +} // namespace math +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_MATH_VECTOR_EXPR_H \ No newline at end of file diff --git a/Code/Source/solver/FE/Quadrature/QuadratureRule.h b/Code/Source/solver/FE/Quadrature/QuadratureRule.h new file mode 100644 index 000000000..f7d186891 --- /dev/null +++ b/Code/Source/solver/FE/Quadrature/QuadratureRule.h @@ -0,0 +1,237 @@ +/* Copyright (c) Stanford University, The Regents of the University of California, and others. + * + * All Rights Reserved. + * + * See License file. + */ + +#ifndef SVMP_FE_QUADRATURE_RULE_H +#define SVMP_FE_QUADRATURE_RULE_H + +/** + * @file QuadratureRule.h + * @brief Abstracted quadrature rule representation for FE integration + * + * This header defines the base class for all quadrature rules used by the + * finite element infrastructure. Rules are expressed in reference element + * space only; mapping to physical space is handled by the Geometry module. + * + * The interface is intentionally lightweight and header-only to avoid coupling + * Quadrature to other modules while remaining compatible with the Mesh library + * through shared type aliases provided by FE/Common/Types.h. + */ + +#include "Types.h" +#include "FEException.h" +#include "Math/Vector.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace quadrature { + +/// Convenience alias for quadrature point representation in reference space +using QuadPoint = math::Vector; + +struct QuadraturePointFingerprint { + int dimension{0}; + std::size_t num_points{0}; + std::uint64_t points_hash_a{0}; + std::uint64_t points_hash_b{0}; +}; + +/** + * @brief Base class for quadrature rules over reference elements + * + * Derived classes populate the point/weight data via the protected setters. + * The class performs lightweight consistency checks (size agreement, basic + * reference-measure validation) but leaves element-specific checks to callers. + */ +class QuadratureRule { +public: + virtual ~QuadratureRule() = default; + + /// Number of quadrature points + std::size_t num_points() const noexcept { return points_.size(); } + + /// Polynomial exactness degree reported by the rule + int order() const noexcept { return order_; } + + /// Spatial dimension of the reference domain + int dimension() const noexcept { return dimension_; } + + /// Cell family the rule integrates over (line, tri, quad, ...) + svmp::CellFamily cell_family() const noexcept { return cell_family_; } + + /// Access a single quadrature point (no bounds checking) + QuadPoint point(std::size_t i) const noexcept { return points_[i]; } + + /// Access a single quadrature weight (no bounds checking) + Real weight(std::size_t i) const noexcept { return weights_[i]; } + + /// Bulk accessors + const std::vector& points() const noexcept { return points_; } + const std::vector& weights() const noexcept { return weights_; } + + /// Cached coordinate-only fingerprint for consumers whose values depend on + /// reference points but not quadrature weights. + QuadraturePointFingerprint point_fingerprint() const noexcept { return point_fingerprint_; } + + /// Stable semantic identity used by BasisCache + virtual std::string cache_identity() const; + + /** + * @brief Validate rule data for basic consistency + * @param tol Relative tolerance for weight sum check + * @return True if rule passes size and weight checks + */ + virtual bool is_valid(Real tol = 1e-12) const; + + /** + * @brief Reference-domain measure for the element family + * + * Length/area/volume of the canonical reference element: + * - Line [-1,1] -> 2 + * - Quad [-1,1]^2 -> 4 + * - Hex [-1,1]^3 -> 8 + * - Tri (0,0)-(1,0)-(0,1) -> 0.5 + * - Tet simplex at origin -> 1/6 + * - Wedge (triangle x line) -> 1 + * - Pyramid (x,y in [-1,1], z in [0,1]) -> 4/3 + */ + Real reference_measure() const noexcept; + +protected: + QuadratureRule(svmp::CellFamily family, int dimension, int order = 0) + : cell_family_(family), dimension_(dimension), order_(order) {} + + /// Assign point and weight storage (sizes must match) + void set_data(std::vector pts, std::vector wts); + + /// Override computed order in derived classes + void set_order(int ord) noexcept { order_ = ord; } + +private: + std::string build_cache_identity() const; + QuadraturePointFingerprint build_point_fingerprint() const noexcept; + + svmp::CellFamily cell_family_; + int dimension_; + int order_; + std::vector points_; + std::vector weights_; + std::string cache_identity_; + QuadraturePointFingerprint point_fingerprint_; +}; + +// -------------------------------------------------------------------------------- +// Inline implementations +// -------------------------------------------------------------------------------- + +inline void QuadratureRule::set_data(std::vector pts, std::vector wts) { + if (pts.size() != wts.size()) { + throw FEException("QuadratureRule: points/weights size mismatch", + StatusCode::InvalidArgument, + __FILE__, __LINE__, __func__); + } + points_ = std::move(pts); + weights_ = std::move(wts); + point_fingerprint_ = build_point_fingerprint(); + cache_identity_ = build_cache_identity(); +} + +inline bool QuadratureRule::is_valid(Real tol) const { + if (points_.empty() || points_.size() != weights_.size()) { + return false; + } + Real sum_w = Real(0); + for (Real w : weights_) { + if (!std::isfinite(w)) { + return false; + } + sum_w += w; + } + const Real ref = reference_measure(); + const Real denom = std::max(Real(1), std::abs(ref)); + return std::abs(sum_w - ref) <= tol * denom; +} + +inline std::string QuadratureRule::cache_identity() const { + if (!cache_identity_.empty()) { + return cache_identity_; + } + return build_cache_identity(); +} + +inline std::string QuadratureRule::build_cache_identity() const { + std::ostringstream oss; + oss << "dim=" << dimension_ + << "|npts=" << points_.size(); + + oss << std::setprecision(std::numeric_limits::max_digits10); + for (const auto& pt : points_) { + oss << "|pt=" << pt[0] << ',' << pt[1] << ',' << pt[2]; + } + return oss.str(); +} + +inline QuadraturePointFingerprint QuadratureRule::build_point_fingerprint() const noexcept { + auto real_bits = [](Real value) noexcept { + static_assert(sizeof(Real) <= sizeof(std::uint64_t), + "Quadrature point fingerprints assume Real fits in 64 bits"); + std::uint64_t bits = 0; + std::memcpy(&bits, &value, sizeof(Real)); + return bits; + }; + auto mix_hash = [](std::uint64_t& seed, std::uint64_t value) noexcept { + seed ^= value + 0x9e3779b97f4a7c15ULL + (seed << 6u) + (seed >> 2u); + }; + + QuadraturePointFingerprint fingerprint; + fingerprint.dimension = dimension_; + fingerprint.num_points = points_.size(); + fingerprint.points_hash_a = 1469598103934665603ULL; + fingerprint.points_hash_b = 1099511628211ULL; + + mix_hash(fingerprint.points_hash_a, static_cast(fingerprint.dimension)); + mix_hash(fingerprint.points_hash_a, static_cast(fingerprint.num_points)); + mix_hash(fingerprint.points_hash_b, static_cast(fingerprint.num_points)); + mix_hash(fingerprint.points_hash_b, static_cast(fingerprint.dimension)); + for (const auto& point : points_) { + for (std::size_t component = 0; component < 3u; ++component) { + const std::uint64_t bits = real_bits(point[component]); + mix_hash(fingerprint.points_hash_a, bits); + mix_hash(fingerprint.points_hash_b, bits ^ (0xbf58476d1ce4e5b9ULL + component)); + } + } + return fingerprint; +} + +inline Real QuadratureRule::reference_measure() const noexcept { + switch (cell_family_) { + case svmp::CellFamily::Line: return Real(2); + case svmp::CellFamily::Quad: return Real(4); + case svmp::CellFamily::Hex: return Real(8); + case svmp::CellFamily::Triangle: return Real(0.5); + case svmp::CellFamily::Tetra: return Real(1.0 / 6.0); + case svmp::CellFamily::Wedge: return Real(1.0); // 0.5 area * length 2 + case svmp::CellFamily::Pyramid: return Real(4.0 / 3.0); + case svmp::CellFamily::Point: return Real(1.0); + default: return Real(1.0); + } +} + +} // namespace quadrature +} // namespace FE +} // namespace svmp + +#endif // SVMP_FE_QUADRATURE_RULE_H diff --git a/Code/Source/solver/fs.cpp b/Code/Source/solver/fs.cpp index d592a8b96..abe1992df 100644 --- a/Code/Source/solver/fs.cpp +++ b/Code/Source/solver/fs.cpp @@ -5,10 +5,66 @@ #include "fs.h" #include "consts.h" +#include "FE/Common/FEException.h" #include "nn.h" +#include +#include + namespace fs { +namespace { + +namespace fe = svmp::FE; + +std::string element_name(consts::ElementType eType) +{ + const auto iter = consts::element_type_to_string.find(eType); + if (iter != consts::element_type_to_string.end()) { + return iter->second; + } + + return "unknown (" + std::to_string(static_cast(eType)) + ")"; +} + +bool supports_reference_hessians(consts::ElementType eType) +{ + using namespace consts; + + switch (eType) { + case ElementType::LIN1: + case ElementType::LIN2: + case ElementType::TRI3: + case ElementType::TRI6: + case ElementType::QUD4: + case ElementType::QUD8: + case ElementType::QUD9: + case ElementType::TET4: + case ElementType::TET10: + case ElementType::HEX8: + case ElementType::HEX20: + case ElementType::HEX27: + case ElementType::WDG: + return true; + default: + return false; + } +} + +void populate_reference_hessians_if_supported(fsType& fs, const int insd) +{ + if (fs.Nxx.size() == 0 || !supports_reference_hessians(fs.eType)) { + return; + } + + const int ind2 = std::max(3 * (insd - 1), 1); + for (int g = 0; g < fs.nG; ++g) { + nn::get_gn_nxx(insd, ind2, fs.eType, fs.eNoN, g, fs.xi, fs.Nxx); + } +} + +} // namespace + /// @brief Allocates arrays within the function space type. Assumes that /// fs%eNoN and fs%nG are already defined @@ -103,6 +159,7 @@ void get_thood_fs(ComMod& com_mod, std::array& fs, const mshType& lM, nn::get_gnn(nsd, fs[1].eType, fs[1].eNoN, g, fs[1].xi, fs[1].N, fs[1].Nx); } nn::get_nn_bnds(nsd, fs[1].eType, fs[1].eNoN, fs[1].xib, fs[1].Nb); + populate_reference_hessians_if_supported(fs[1], nsd); } else if (iOpt == 2) { fs[1].nG = lM.fs[1].nG; @@ -133,6 +190,7 @@ void get_thood_fs(ComMod& com_mod, std::array& fs, const mshType& lM, nn::get_gnn(nsd, fs[0].eType, fs[0].eNoN, g, fs[0].xi, fs[0].N, fs[0].Nx); } nn::get_nn_bnds(nsd, fs[0].eType, fs[0].eNoN, fs[0].xib, fs[0].Nb); + populate_reference_hessians_if_supported(fs[0], nsd); } } } @@ -275,14 +333,7 @@ void init_fs_msh(const ComMod& com_mod, mshType& lM) lM.fs[0].Nb = lM.Nb; lM.fs[0].Nx = lM.Nx; } - // Second order derivatives for vector function space - // - if (!lM.fs[0].lShpF) { - int ind2 = std::max(3*(insd-1), 1); - for (int g = 0; g < lM.fs[0].nG; g++) { - nn::get_gn_nxx(insd, ind2, lM.fs[0].eType, lM.fs[0].eNoN, g, lM.fs[0].xi, lM.fs[0].Nxx); - } - } + populate_reference_hessians_if_supported(lM.fs[0], insd); // Sets Taylor-Hood basis [fluid, stokes, ustruct, FSI) if (lM.nFs == 2) { @@ -291,6 +342,7 @@ void init_fs_msh(const ComMod& com_mod, mshType& lM) // Initialize the function space init_fs(lM.fs[1], nsd, insd); + populate_reference_hessians_if_supported(lM.fs[1], insd); } } @@ -343,7 +395,8 @@ void set_thood_fs(fsType& fs, consts::ElementType eType) break; default: - throw std::runtime_error("Cannot choose Taylor-Hood basis"); + throw fe::InvalidElementException("Cannot choose Taylor-Hood basis", + element_name(eType), __FILE__, __LINE__, __func__); break; } } diff --git a/Code/Source/solver/nn.cpp b/Code/Source/solver/nn.cpp index 9f12d64e4..51c126708 100644 --- a/Code/Source/solver/nn.cpp +++ b/Code/Source/solver/nn.cpp @@ -15,15 +15,28 @@ #include "Array.h" #include "Vector.h" +#include "FE/Basis/BasisExceptions.h" +#include "FE/Basis/BasisFactory.h" +#include "FE/Common/FEException.h" + #include "consts.h" #include "mat_fun.h" #include "utils.h" #include "lapack_defs.h" +#include +#include +#include +#include +#include #include #include #include +#include +#include +#include +#include namespace nn { @@ -46,13 +59,510 @@ using namespace consts; // Define a map type used to set the bounds of element shape functions. #include "nn_elem_nn_bnds.h" +namespace { + +namespace fe = svmp::FE; +namespace febasis = svmp::FE::basis; + +struct BasisSelection { + fe::ElementType element; + fe::BasisType basis; + int order; +}; + +enum class BasisMode { + Auto, + Legacy, + Fe +}; + +std::string normalize_basis_mode_name(std::string value) +{ + std::transform(value.begin(), value.end(), value.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + return value; +} + +BasisMode parse_basis_mode() +{ + const char* mode_env = std::getenv("SVMP_BASIS_MODE"); + if (mode_env == nullptr || *mode_env == '\0') { + return BasisMode::Auto; + } + + const std::string mode = normalize_basis_mode_name(mode_env); + if (mode == "auto") { + return BasisMode::Auto; + } + if (mode == "legacy") { + return BasisMode::Legacy; + } + if (mode == "fe") { + return BasisMode::Fe; + } + + throw febasis::BasisConfigurationException( + "Invalid SVMP_BASIS_MODE='" + std::string(mode_env) + + "'. Expected one of: auto, legacy, fe", + __FILE__, __LINE__, __func__); +} + +BasisMode active_basis_mode() +{ + static const BasisMode mode = parse_basis_mode(); + return mode; +} + +const char* basis_mode_name(BasisMode mode) +{ + switch (mode) { + case BasisMode::Auto: + return "auto"; + case BasisMode::Legacy: + return "legacy"; + case BasisMode::Fe: + return "fe"; + } + return "unknown"; +} + +void log_basis_mode_once() +{ + static const bool logged = []() { + std::cout << "[svMultiPhysics] SVMP_BASIS_MODE=" + << basis_mode_name(active_basis_mode()) << std::endl; + return true; + }(); + (void)logged; +} + +bool basis_mode_allows_fe_adapter() +{ + return active_basis_mode() != BasisMode::Legacy; +} + +std::string solver_element_name(consts::ElementType eType) +{ + auto it = consts::element_type_to_string.find(eType); + if (it != consts::element_type_to_string.end()) { + return it->second + " (" + std::to_string(static_cast(eType)) + ")"; + } + return "unknown (" + std::to_string(static_cast(eType)) + ")"; +} + +std::optional to_basis_selection(consts::ElementType eType) +{ + switch (eType) { + case consts::ElementType::LIN1: + return BasisSelection{fe::ElementType::Line2, fe::BasisType::Lagrange, 1}; + case consts::ElementType::LIN2: + return BasisSelection{fe::ElementType::Line3, fe::BasisType::Lagrange, 2}; + case consts::ElementType::TRI3: + return BasisSelection{fe::ElementType::Triangle3, fe::BasisType::Lagrange, 1}; + case consts::ElementType::TRI6: + return BasisSelection{fe::ElementType::Triangle6, fe::BasisType::Lagrange, 2}; + case consts::ElementType::QUD4: + return BasisSelection{fe::ElementType::Quad4, fe::BasisType::Lagrange, 1}; + case consts::ElementType::QUD8: + return BasisSelection{fe::ElementType::Quad8, fe::BasisType::Serendipity, 2}; + case consts::ElementType::QUD9: + return BasisSelection{fe::ElementType::Quad9, fe::BasisType::Lagrange, 2}; + case consts::ElementType::TET4: + return BasisSelection{fe::ElementType::Tetra4, fe::BasisType::Lagrange, 1}; + case consts::ElementType::TET10: + return BasisSelection{fe::ElementType::Tetra10, fe::BasisType::Lagrange, 2}; + case consts::ElementType::HEX8: + return BasisSelection{fe::ElementType::Hex8, fe::BasisType::Lagrange, 1}; + case consts::ElementType::HEX20: + return BasisSelection{fe::ElementType::Hex20, fe::BasisType::Serendipity, 2}; + case consts::ElementType::HEX27: + return BasisSelection{fe::ElementType::Hex27, fe::BasisType::Lagrange, 2}; + case consts::ElementType::WDG: + return BasisSelection{fe::ElementType::Wedge6, fe::BasisType::Lagrange, 1}; + default: + return std::nullopt; + } +} + +bool use_basis_adapter_for(consts::ElementType eType) +{ + return basis_mode_allows_fe_adapter() && to_basis_selection(eType).has_value(); +} + +bool supports_basis_hessian_adapter_for(consts::ElementType eType) +{ + return basis_mode_allows_fe_adapter() && to_basis_selection(eType).has_value(); +} + +bool supports_face_basis_adapter_for(consts::ElementType eType) +{ + if (!basis_mode_allows_fe_adapter()) { + return false; + } + + switch (eType) { + case consts::ElementType::LIN1: + case consts::ElementType::LIN2: + case consts::ElementType::TRI3: + case consts::ElementType::TRI6: + case consts::ElementType::QUD4: + case consts::ElementType::QUD8: + case consts::ElementType::QUD9: + return to_basis_selection(eType).has_value(); + default: + return false; + } +} + +std::shared_ptr make_basis_for_solver_element(consts::ElementType eType) +{ + auto selection = to_basis_selection(eType); + if (!selection) { + throw febasis::BasisElementCompatibilityException( + "No FE Basis selection for solver element " + solver_element_name(eType), + __FILE__, __LINE__, __func__); + } + + febasis::BasisRequest request; + request.element_type = selection->element; + request.basis_type = selection->basis; + request.order = selection->order; + return febasis::basis_factory::create(request); +} + +template +std::size_t mapped_basis_index(const std::array& map, + consts::ElementType eType, + const int solver_node) +{ + if (solver_node < 0 || static_cast(solver_node) >= map.size()) { + throw febasis::BasisNodeOrderingException( + "Solver node " + std::to_string(solver_node) + + " is outside node map for " + solver_element_name(eType), + __FILE__, __LINE__, __func__); + } + + return map[static_cast(solver_node)]; +} + +std::size_t basis_index_for_solver_node(consts::ElementType eType, const int solver_node) +{ + if (solver_node < 0) { + throw febasis::BasisNodeOrderingException( + "Solver node " + std::to_string(solver_node) + + " is outside node map for " + solver_element_name(eType), + __FILE__, __LINE__, __func__); + } + + const auto node = static_cast(solver_node); + + switch (eType) { + case consts::ElementType::TRI3: { + static constexpr std::array map{1, 2, 0}; + return mapped_basis_index(map, eType, solver_node); + } + case consts::ElementType::TRI6: { + static constexpr std::array map{1, 2, 0, 4, 5, 3}; + return mapped_basis_index(map, eType, solver_node); + } + case consts::ElementType::TET4: { + static constexpr std::array map{1, 2, 3, 0}; + return mapped_basis_index(map, eType, solver_node); + } + case consts::ElementType::TET10: { + static constexpr std::array map{1, 2, 3, 0, 5, 9, 8, 4, 6, 7}; + return mapped_basis_index(map, eType, solver_node); + } + case consts::ElementType::WDG: { + static constexpr std::array map{1, 2, 0, 4, 5, 3}; + return mapped_basis_index(map, eType, solver_node); + } + case consts::ElementType::HEX27: { + static constexpr std::array map{ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, + 25, 23, 22, 24, 20, 21, 26 + }; + return mapped_basis_index(map, eType, solver_node); + } + default: + return node; + } +} + +fe::math::Vector make_basis_point(const febasis::BasisFunction& basis, + const int g, + const Array& xi) +{ + if (xi.nrows() < basis.dimension()) { + throw febasis::BasisConfigurationException( + "xi has " + std::to_string(xi.nrows()) + + " rows but FE Basis element requires " + std::to_string(basis.dimension()) + + " reference coordinates", + __FILE__, __LINE__, __func__); + } + + fe::math::Vector point{}; + for (int d = 0; d < basis.dimension(); ++d) { + point[static_cast(d)] = xi(d, g); + } + return point; +} + +void copy_basis_values_to_solver_arrays(consts::ElementType eType, + const int eNoN, + const int g, + const std::vector& values, + const std::vector& gradients, + Array& N, + Array3& Nx) +{ + if (values.size() != static_cast(eNoN)) { + throw febasis::BasisEvaluationException( + "FE Basis value count " + std::to_string(values.size()) + + " does not match solver eNoN " + std::to_string(eNoN), + __FILE__, __LINE__, __func__); + } + if (gradients.size() != static_cast(eNoN)) { + throw febasis::BasisEvaluationException( + "FE Basis gradient count " + std::to_string(gradients.size()) + + " does not match solver eNoN " + std::to_string(eNoN), + __FILE__, __LINE__, __func__); + } + + for (int a = 0; a < eNoN; ++a) { + const auto basis_index = basis_index_for_solver_node(eType, a); + if (basis_index >= values.size() || basis_index >= gradients.size()) { + throw febasis::BasisNodeOrderingException( + "Solver node " + std::to_string(a) + " maps to FE Basis node " + + std::to_string(basis_index) + " outside basis output for " + + solver_element_name(eType), + __FILE__, __LINE__, __func__); + } + + N(a, g) = values[basis_index]; + + for (int d = 0; d < Nx.nrows(); ++d) { + Nx(d, a, g) = 0.0; + } + const int ndim = std::min(Nx.nrows(), 3); + for (int d = 0; d < ndim; ++d) { + Nx(d, a, g) = gradients[basis_index][static_cast(d)]; + } + } +} + +void evaluate_basis_values_and_gradients(const int insd, + consts::ElementType eType, + const int eNoN, + const int g, + Array& xi, + Array& N, + Array3& Nx) +{ + auto basis = make_basis_for_solver_element(eType); + if (insd < basis->dimension()) { + throw febasis::BasisConfigurationException( + "solver insd " + std::to_string(insd) + + " is smaller than FE Basis reference dimension " + std::to_string(basis->dimension()), + __FILE__, __LINE__, __func__); + } + + const auto point = make_basis_point(*basis, g, xi); + std::vector values; + std::vector gradients; + basis->evaluate_values(point, values); + basis->evaluate_gradients(point, gradients); + + // FE Basis owns the formulas; fsType and mshType remain the solver-facing storage contract. + copy_basis_values_to_solver_arrays(eType, eNoN, g, values, gradients, N, Nx); +} + +void evaluate_face_basis_values_and_gradients(const int gaus_pt, faceType& face) +{ + evaluate_basis_values_and_gradients( + face.xi.nrows(), + face.eType, + face.eNoN, + gaus_pt, + face.xi, + face.N, + face.Nx); +} + +int required_nxx_components_for_dimension(const int dimension) +{ + switch (dimension) { + case 1: + return 1; + case 2: + return 3; + case 3: + return 6; + default: + throw febasis::BasisConfigurationException( + "Unsupported FE Basis reference dimension " + std::to_string(dimension), + __FILE__, __LINE__, __func__); + } +} + +void copy_basis_hessians_to_solver_nxx(consts::ElementType eType, + const int eNoN, + const int g, + const int dimension, + const std::vector& hessians, + Array3& Nxx) +{ + if (hessians.size() != static_cast(eNoN)) { + throw febasis::BasisEvaluationException( + "FE Basis Hessian count " + std::to_string(hessians.size()) + + " does not match solver eNoN " + std::to_string(eNoN), + __FILE__, __LINE__, __func__); + } + + const int required_components = required_nxx_components_for_dimension(dimension); + if (Nxx.nrows() < required_components) { + throw febasis::BasisConfigurationException( + "solver Nxx has " + std::to_string(Nxx.nrows()) + + " rows but FE Basis Hessian packing requires " + std::to_string(required_components), + __FILE__, __LINE__, __func__); + } + + for (int a = 0; a < eNoN; ++a) { + for (int i = 0; i < Nxx.nrows(); ++i) { + Nxx(i, a, g) = 0.0; + } + + const auto basis_index = basis_index_for_solver_node(eType, a); + if (basis_index >= hessians.size()) { + throw febasis::BasisNodeOrderingException( + "Solver node " + std::to_string(a) + " maps to FE Basis Hessian node " + + std::to_string(basis_index) + " outside basis output for " + + solver_element_name(eType), + __FILE__, __LINE__, __func__); + } + + const auto& hessian = hessians[basis_index]; + Nxx(0, a, g) = hessian(0, 0); + if (dimension >= 2) { + Nxx(1, a, g) = hessian(1, 1); + Nxx(2, a, g) = hessian(0, 1); + } + if (dimension >= 3) { + Nxx(2, a, g) = hessian(2, 2); + Nxx(3, a, g) = hessian(0, 1); + Nxx(4, a, g) = hessian(1, 2); + Nxx(5, a, g) = hessian(0, 2); + } + } +} + +void evaluate_basis_hessians(const int insd, + const int ind2, + consts::ElementType eType, + const int eNoN, + const int gaus_pt, + const Array& xi, + Array3& Nxx) +{ + auto basis = make_basis_for_solver_element(eType); + if (insd < basis->dimension()) { + throw febasis::BasisConfigurationException( + "solver insd " + std::to_string(insd) + + " is smaller than FE Basis reference dimension " + std::to_string(basis->dimension()), + __FILE__, __LINE__, __func__); + } + + const int required_components = required_nxx_components_for_dimension(basis->dimension()); + if (ind2 < required_components) { + throw febasis::BasisConfigurationException( + "solver ind2 " + std::to_string(ind2) + + " is smaller than packed Hessian component count " + std::to_string(required_components), + __FILE__, __LINE__, __func__); + } + + const auto point = make_basis_point(*basis, gaus_pt, xi); + std::vector hessians; + basis->evaluate_hessians(point, hessians); + + // Solver Nxx packing is dxx, dyy, dxy in 2D and dxx, dyy, dzz, dxy, dyz, dxz in 3D. + copy_basis_hessians_to_solver_nxx(eType, eNoN, gaus_pt, basis->dimension(), hessians, Nxx); +} + +void call_legacy_get_gnn(const int insd, + consts::ElementType eType, + const int eNoN, + const int g, + Array& xi, + Array& N, + Array3& Nx, + const std::string& basis_failure = "") +{ + try { + get_element_shape_data[eType](insd, eNoN, g, xi, N, Nx); + } catch (const std::bad_function_call&) { + std::string message = "[get_gnn] No FE Basis or legacy shape support for element " + + solver_element_name(eType) + "; legacy fallback was attempted"; + if (!basis_failure.empty()) { + message += " after FE Basis failure: " + basis_failure; + } + throw fe::InvalidElementException(message, solver_element_name(eType), + __FILE__, __LINE__, __func__); + } +} + +void call_legacy_get_gn_nxx(const int insd, + const int ind2, + consts::ElementType eType, + const int eNoN, + const int gaus_pt, + const Array& xi, + Array3& Nxx, + const std::string& basis_failure = "", + const bool allow_missing_legacy_table = false) +{ + try { + get_element_2nd_derivs[eType](insd, ind2, eNoN, gaus_pt, xi, Nxx); + } catch (const std::bad_function_call&) { + if (allow_missing_legacy_table) { + return; + } + + std::string message = "[get_gn_nxx] No FE Basis or legacy second-derivative support for element " + + solver_element_name(eType) + "; legacy fallback was attempted"; + if (!basis_failure.empty()) { + message += " after FE Basis failure: " + basis_failure; + } + throw fe::InvalidElementException(message, solver_element_name(eType), + __FILE__, __LINE__, __func__); + } +} + +void call_legacy_face_shape_data(const int gaus_pt, faceType& face) +{ + auto legacy_shape = set_face_shape_data.find(face.eType); + if (legacy_shape == set_face_shape_data.end()) { + throw fe::InvalidElementException( + "[get_gnn(face)] No FE Basis or legacy face shape support", + solver_element_name(face.eType), __FILE__, __LINE__, __func__); + } + + legacy_shape->second(gaus_pt, face); +} + +} // namespace + void get_gip(const int insd, consts::ElementType eType, const int nG, Vector& w, Array& xi) { + log_basis_mode_once(); + try { get_element_gauss_int_data[eType](insd, nG, w, xi); } catch (const std::bad_function_call& exception) { - throw std::runtime_error("No support for element etype " + std::to_string(static_cast(eType)) + - " in 'get_element_gauss_int_data'."); + throw fe::InvalidElementException( + "No support in 'get_element_gauss_int_data'", + solver_element_name(eType), __FILE__, __LINE__, __func__); } } @@ -62,19 +572,27 @@ void get_gip(const int insd, consts::ElementType eType, const int nG, Vector(mesh.eType)) + " in 'set_element_gauss_int_data'."); + throw fe::InvalidElementException( + "No support in 'set_element_gauss_int_data'", + solver_element_name(mesh.eType), __FILE__, __LINE__, __func__); } } void get_gip(Simulation* simulation, faceType& face) { + log_basis_mode_once(); + try { set_face_gauss_int_data[face.eType](face); } catch (const std::bad_function_call& exception) { - throw std::runtime_error("No support for face type " + std::to_string(static_cast(face.eType)) + " in 'set_face_gauss_int_data'."); + throw fe::InvalidElementException( + "No support in 'set_face_gauss_int_data'", + solver_element_name(face.eType), __FILE__, __LINE__, __func__); } } @@ -83,11 +601,26 @@ void get_gip(Simulation* simulation, faceType& face) void get_gnn(const int insd, consts::ElementType eType, const int eNoN, const int g, Array& xi, Array& N, Array3& Nx) { - try { - get_element_shape_data[eType](insd, eNoN, g, xi, N, Nx); - } catch (const std::bad_function_call& exception) { - throw std::runtime_error("[get_gnn] No support for element type " + std::to_string(static_cast(eType)) + " in 'get_element_shape_data'."); + log_basis_mode_once(); + + if (use_basis_adapter_for(eType)) { + try { + evaluate_basis_values_and_gradients(insd, eType, eNoN, g, xi, N, Nx); + return; + } catch (const fe::NotImplementedException& exception) { + call_legacy_get_gnn(insd, eType, eNoN, g, xi, N, Nx, exception.what()); + return; + } catch (const std::exception& exception) { + throw febasis::BasisEvaluationException( + "[get_gnn] FE Basis adapter failed for element " + + solver_element_name(eType) + + "; legacy fallback was not attempted for this approved element: " + + exception.what(), + __FILE__, __LINE__, __func__); + } } + + call_legacy_get_gnn(insd, eType, eNoN, g, xi, N, Nx); } /// @brief A big fat hack because the Fortran GETNN() operates on primitive types but @@ -111,20 +644,48 @@ void get_gnn(const int nsd, consts::ElementType eType, const int eNoN, Vector(mesh.eType)) + " in 'set_element_shape_data'."); - } + nn::get_gnn(mesh.xi.nrows(), mesh.eType, mesh.eNoN, gaus_pt, mesh.xi, mesh.N, mesh.Nx); } void get_gnn(Simulation* simulation, int gaus_pt, faceType& face) { - try { - set_face_shape_data[face.eType](gaus_pt, face); - } catch (const std::bad_function_call& exception) { - throw std::runtime_error("No support for face type " + std::to_string(static_cast(face.eType)) + " in 'set_face_shape_data'."); + using consts::ElementType; + + log_basis_mode_once(); + + if (active_basis_mode() == BasisMode::Legacy) { + call_legacy_face_shape_data(gaus_pt, face); + return; + } + + if (face.eType == ElementType::NRB) { + throw fe::NotImplementedException( + "[get_gnn(face)] NRB face shape functions remain unsupported by FE Basis and the legacy face table", + __FILE__, __LINE__, __func__); + } + + if (supports_face_basis_adapter_for(face.eType)) { + try { + // FE Basis owns mapped face N/Nx formulas; faceType remains the solver-facing storage contract. + evaluate_face_basis_values_and_gradients(gaus_pt, face); + return; + } catch (const std::exception& exception) { + throw febasis::BasisEvaluationException( + "[get_gnn(face)] FE Basis face adapter failed for mapped face element " + + solver_element_name(face.eType) + "; legacy fallback was not attempted: " + + exception.what(), + __FILE__, __LINE__, __func__); + } } + + if (face.eType == ElementType::PNT) { + // Point faces have no mapped FE Basis representation in this pass; keep the legacy scalar value path. + call_legacy_face_shape_data(gaus_pt, face); + return; + } + + // The legacy face table is retained only for explicitly unsupported paths and future cleanup. + call_legacy_face_shape_data(gaus_pt, face); } /// @brief Returns second order derivatives at given natural coords @@ -136,19 +697,40 @@ void get_gn_nxx(const int insd, const int ind2, consts::ElementType eType, const { using namespace consts; - // Element types that don't have 2nd derivatives computed for them. - static std::set no_derivs{ElementType::NRB, ElementType::QUD4, ElementType::HEX8, - ElementType::HEX20, ElementType::HEX27}; + log_basis_mode_once(); - if (no_derivs.count(eType) != 0) { + // NRB/PNT and face-only Hessian paths remain intentionally unsupported here. + if (eType == ElementType::NRB || eType == ElementType::PNT) { return; } - try { - get_element_2nd_derivs[eType](insd, ind2, eNoN, gaus_pt, xi, Nxx); - } catch (const std::bad_function_call& exception) { - throw std::runtime_error("[get_gn_nxx] No support for element type " + std::to_string(static_cast(eType)) + " in 'get_element_2nd_derivs'."); + if (active_basis_mode() == BasisMode::Legacy) { + call_legacy_get_gn_nxx( + insd, ind2, eType, eNoN, gaus_pt, xi, Nxx, "", true); + return; } + + if (supports_basis_hessian_adapter_for(eType)) { + try { + evaluate_basis_hessians(insd, ind2, eType, eNoN, gaus_pt, xi, Nxx); + return; + } catch (const fe::NotImplementedException& exception) { + throw fe::NotImplementedException( + "[get_gn_nxx] FE Basis Hessian support is required for mapped volume element " + + solver_element_name(eType) + " but is not implemented: " + exception.what(), + __FILE__, __LINE__, __func__); + } catch (const std::exception& exception) { + throw febasis::BasisEvaluationException( + "[get_gn_nxx] FE Basis Hessian adapter failed for element " + + solver_element_name(eType) + + "; legacy fallback was not attempted for this approved element: " + + exception.what(), + __FILE__, __LINE__, __func__); + } + } + + // Legacy Hessian tables are reserved for intentionally unsupported families. + call_legacy_get_gn_nxx(insd, ind2, eType, eNoN, gaus_pt, xi, Nxx); } /// @brief Sets bounds on Gauss integration points in parametric space and @@ -333,7 +915,9 @@ void get_nnx(const int nsd, const consts::ElementType eType, const int eNoN, con l1 = (l1 && l2 && l3 && l4); if (!l1) { - throw std::runtime_error("Error in computing shape functions"); + throw fe::InvalidArgumentException( + "Error in computing shape functions", + __FILE__, __LINE__, __func__); } } @@ -582,8 +1166,11 @@ void gnnb(const ComMod& com_mod, const faceType& lFa, const int e, const int g, } if (!found_node) { - throw std::runtime_error("[svMultiPhysics::gnnb] ERROR: The '" + lFa.name + "' face node " + std::to_string(Ac) + - " could not be matched to a node in the '" + msh.name + "' volume mesh."); + throw fe::InvalidArgumentException( + "[svMultiPhysics::gnnb] ERROR: The '" + lFa.name + "' face node " + + std::to_string(Ac) + " could not be matched to a node in the '" + + msh.name + "' volume mesh.", + __FILE__, __LINE__, __func__); } ptr(a) = b; @@ -632,7 +1219,9 @@ void gnnb(const ComMod& com_mod, const faceType& lFa, const int e, const int g, } break; default: - throw std::runtime_error("gnnb: invalid MechanicalConfigurationType provided"); + throw fe::InvalidArgumentException( + "gnnb: invalid MechanicalConfigurationType provided", + __FILE__, __LINE__, __func__); } } } @@ -821,7 +1410,8 @@ void gn_nxx(const int l, const int eNoN, const int nsd, const int insd, Array + +#include "FE/Basis/BasisCache.h" +#include "FE/Basis/BasisFactory.h" +#include "FE/Basis/LagrangeBasis.h" +#include "FE/Basis/SerendipityBasis.h" +#include "FE/Quadrature/QuadratureRule.h" + +#include +#include + +using namespace svmp::FE; +using namespace svmp::FE::basis; +using namespace svmp::FE::quadrature; + +namespace { + +class CustomQuadratureRule final : public QuadratureRule { +public: + CustomQuadratureRule(svmp::CellFamily family, + int dimension, + int order, + std::vector points, + std::vector weights) + : QuadratureRule(family, dimension, order) + { + set_data(std::move(points), std::move(weights)); + } +}; + +CustomQuadratureRule line_rule() { + return CustomQuadratureRule( + svmp::CellFamily::Line, 1, 3, + { + QuadPoint{Real(-0.5), Real(0), Real(0)}, + QuadPoint{Real(0.5), Real(0), Real(0)} + }, + {Real(1), Real(1)}); +} + +CustomQuadratureRule quad_rule(Real first_weight = Real(1)) { + return CustomQuadratureRule( + svmp::CellFamily::Quad, 2, 3, + { + QuadPoint{Real(-0.5), Real(-0.5), Real(0)}, + QuadPoint{Real(0.5), Real(-0.25), Real(0)}, + QuadPoint{Real(0.0), Real(0.5), Real(0)} + }, + {first_weight, Real(1), Real(2)}); +} + +class TestCustomScalarBasis final : public BasisFunction { +public: + explicit TestCustomScalarBasis(int tag) + : tag_(tag) + { + } + + BasisType basis_type() const noexcept override { return BasisType::Custom; } + ElementType element_type() const noexcept override { return ElementType::Line2; } + int dimension() const noexcept override { return 1; } + int order() const noexcept override { return 1; } + std::size_t size() const noexcept override { return 2u; } + + std::string cache_identity() const override { + return BasisFunction::cache_identity() + "|tag=" + std::to_string(tag_); + } + + void evaluate_values(const math::Vector& xi, + std::vector& values) const override + { + values.resize(2u); + const Real shift = Real(tag_) * Real(0.125); + values[0] = Real(0.5) * (Real(1) - xi[0]) + shift; + values[1] = Real(0.5) * (Real(1) + xi[0]) - shift; + } + + void evaluate_gradients(const math::Vector&, + std::vector& gradients) const override + { + gradients.assign(2u, Gradient{}); + gradients[0][0] = Real(-0.5); + gradients[1][0] = Real(0.5); + } + +private: + int tag_{0}; +}; + +class StructuredIdentityScalarBasis final : public BasisFunction { +public: + explicit StructuredIdentityScalarBasis(int tag) + : tag_(tag) + { + } + + BasisType basis_type() const noexcept override { return BasisType::Custom; } + ElementType element_type() const noexcept override { return ElementType::Line2; } + int dimension() const noexcept override { return 1; } + int order() const noexcept override { return 1; } + std::size_t size() const noexcept override { return 2u; } + + bool cache_identity_words(std::vector& words) const override { + words.push_back(0x7374727563746964ULL); + words.push_back(static_cast(tag_)); + return true; + } + + std::string cache_identity() const override { + ++string_identity_calls; + return BasisFunction::cache_identity() + "|structured-tag=" + std::to_string(tag_); + } + + void evaluate_values(const math::Vector& xi, + std::vector& values) const override + { + values.resize(2u); + values[0] = Real(1) - xi[0] + Real(tag_); + values[1] = xi[0] - Real(tag_); + } + + mutable std::size_t string_identity_calls{0}; + +private: + int tag_{0}; +}; + +} // namespace + +TEST(BasisFactory, CreatesLagrangeAndSerendipityBases) { + auto lagrange = basis_factory::create( + BasisRequest{ElementType::Line2, BasisType::Lagrange, 2}); + ASSERT_NE(lagrange, nullptr); + EXPECT_EQ(lagrange->basis_type(), BasisType::Lagrange); + EXPECT_EQ(lagrange->element_type(), ElementType::Line2); + EXPECT_EQ(lagrange->order(), 2); + + auto serendipity = basis_factory::create( + BasisRequest{ElementType::Quad8, BasisType::Serendipity, 2}); + ASSERT_NE(serendipity, nullptr); + EXPECT_EQ(serendipity->basis_type(), BasisType::Serendipity); + EXPECT_EQ(serendipity->element_type(), ElementType::Quad8); + EXPECT_EQ(serendipity->size(), 8u); +} + +TEST(BasisFactory, RejectsOutOfScopeAndInvalidRequests) { + EXPECT_THROW( + (void)basis_factory::create(BasisRequest{ElementType::Line2, BasisType::Lagrange}), + BasisConfigurationException); + EXPECT_THROW( + (void)basis_factory::create( + BasisRequest{ElementType::Line2, BasisType::Lagrange, -1}), + BasisConfigurationException); + EXPECT_THROW( + (void)basis_factory::create( + BasisRequest{ElementType::Line2, BasisType::Bernstein, 1}), + BasisConfigurationException); + EXPECT_THROW( + (void)basis_factory::create( + BasisRequest{ElementType::Line2, + BasisType::Lagrange, + 1, + Continuity::H_div, + FieldType::Vector}), + BasisConfigurationException); +} + +TEST(BasisFactory, SupportsCustomFactoryRegistration) { + basis_factory::clear_custom_registry_for_tests(); + basis_factory::register_custom( + "test-custom", + [](const BasisRequest& req) { + const int tag = req.order.value_or(0); + return std::make_shared(tag); + }); + + BasisRequest req{ElementType::Line2, BasisType::Custom, 7}; + req.custom_id = "test-custom"; + auto custom = basis_factory::create(req); + ASSERT_NE(custom, nullptr); + EXPECT_EQ(custom->basis_type(), BasisType::Custom); + EXPECT_EQ(custom->size(), 2u); + + basis_factory::unregister_custom("test-custom"); + EXPECT_THROW((void)basis_factory::create(req), BasisConfigurationException); + basis_factory::clear_custom_registry_for_tests(); +} + +TEST(BasisCache, ReusesEntriesForSameBasisAndQuadratureCoordinates) { + LagrangeBasis basis(ElementType::Line2, 2); + const auto quad = line_rule(); + + auto& cache = BasisCache::instance(); + cache.clear(); + const auto& entry1 = cache.get_or_compute(basis, quad, true, true); + const auto& entry2 = cache.get_or_compute(basis, quad, true, true); + + EXPECT_EQ(&entry1, &entry2); + EXPECT_EQ(entry1.num_qpts, quad.num_points()); + EXPECT_EQ(entry1.num_dofs, basis.size()); + ASSERT_EQ(entry1.scalar_values.size(), basis.size() * quad.num_points()); + ASSERT_EQ(entry1.gradients.size(), basis.size() * 3u * quad.num_points()); + ASSERT_EQ(entry1.hessians.size(), basis.size() * 9u * quad.num_points()); + EXPECT_EQ(cache.size(), 1u); +} + +TEST(BasisCache, ReusesCoordinateIdenticalQuadratureRulesIgnoringWeights) { + SerendipityBasis basis(ElementType::Quad8, 2); + const auto quad_a = quad_rule(Real(1)); + const auto quad_b = quad_rule(Real(0.25)); + + auto& cache = BasisCache::instance(); + cache.clear(); + const auto& entry_a = cache.get_or_compute(basis, quad_a, true, false); + const auto& entry_b = cache.get_or_compute(basis, quad_b, true, false); + + EXPECT_EQ(&entry_a, &entry_b); + EXPECT_EQ(cache.size(), 1u); +} + +TEST(BasisCache, SeparatesStringIdentityCustomBases) { + TestCustomScalarBasis custom_a(1); + TestCustomScalarBasis custom_b(2); + const auto quad = line_rule(); + + auto& cache = BasisCache::instance(); + cache.clear(); + const auto& entry_a = cache.get_or_compute(custom_a, quad, false, false); + const auto& entry_b = cache.get_or_compute(custom_b, quad, false, false); + + EXPECT_NE(&entry_a, &entry_b); + EXPECT_NE(entry_a.scalar_values, entry_b.scalar_values); + EXPECT_EQ(cache.size(), 2u); +} + +TEST(BasisCache, StructuredIdentityAvoidsStringFallbackAndSeparatesBases) { + StructuredIdentityScalarBasis custom_a(1); + StructuredIdentityScalarBasis custom_b(2); + const auto quad = line_rule(); + + auto& cache = BasisCache::instance(); + cache.clear(); + const auto& entry_a = cache.get_or_compute(custom_a, quad, false, false); + const auto& entry_b = cache.get_or_compute(custom_b, quad, false, false); + + EXPECT_NE(&entry_a, &entry_b); + EXPECT_EQ(custom_a.string_identity_calls, 0u); + EXPECT_EQ(custom_b.string_identity_calls, 0u); + EXPECT_EQ(cache.size(), 2u); +} + diff --git a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp new file mode 100644 index 000000000..967f078aa --- /dev/null +++ b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp @@ -0,0 +1,203 @@ +/** + * @file test_BasisErrorPaths.cpp + * @brief Error-path coverage for the migrated Lagrange-focused Basis subset. + */ + +#include + +#include "FE/Basis/BasisExceptions.h" +#include "FE/Basis/BasisFactory.h" +#include "FE/Basis/BasisFunction.h" +#include "FE/Basis/LagrangeBasis.h" +#include "FE/Basis/NodeOrderingConventions.h" +#include "FE/Basis/SerendipityBasis.h" + +#include + +using namespace svmp::FE; +using namespace svmp::FE::basis; + +namespace { + +class MinimalScalarBasis : public BasisFunction { +public: + BasisType basis_type() const noexcept override { return BasisType::Custom; } + ElementType element_type() const noexcept override { return ElementType::Line2; } + int dimension() const noexcept override { return 1; } + int order() const noexcept override { return 1; } + std::size_t size() const noexcept override { return 2u; } + + void evaluate_values(const math::Vector&, + std::vector& values) const override + { + values.assign(size(), Real(0)); + } +}; + +class CompleteFallbackBasis : public BasisFunction { +public: + BasisType basis_type() const noexcept override { return BasisType::Custom; } + ElementType element_type() const noexcept override { return ElementType::Triangle3; } + int dimension() const noexcept override { return 2; } + int order() const noexcept override { return 1; } + std::size_t size() const noexcept override { return 2u; } + + void evaluate_values(const math::Vector& xi, + std::vector& values) const override + { + values.resize(size()); + values[0] = Real(1) + xi[0]; + values[1] = Real(2) + xi[1]; + } + + void evaluate_gradients(const math::Vector&, + std::vector& gradients) const override + { + gradients.assign(size(), Gradient{}); + gradients[0][0] = Real(1); + gradients[1][1] = Real(1); + } + + void evaluate_hessians(const math::Vector& xi, + std::vector& hessians) const override + { + hessians.assign(size(), Hessian{}); + for (std::size_t d = 0; d < hessians.size(); ++d) { + for (std::size_t r = 0; r < 3u; ++r) { + for (std::size_t c = 0; c < 3u; ++c) { + hessians[d](r, c) = Real(100) * static_cast(d + 1u) + + Real(10) * static_cast(r) + + static_cast(c) + xi[2]; + } + } + } + } +}; + +} // namespace + +TEST(BasisErrorPaths, LagrangeInvalidRequestsThrowBasisExceptions) { + EXPECT_THROW(LagrangeBasis(ElementType::Unknown, 1), + BasisElementCompatibilityException); + EXPECT_THROW(LagrangeBasis(ElementType::Line2, -1), + BasisConfigurationException); + EXPECT_THROW(LagrangeBasis(ElementType::Quad8, 2), + BasisElementCompatibilityException); +} + +TEST(BasisErrorPaths, SerendipityInvalidRequestsThrowBasisExceptions) { + EXPECT_THROW(SerendipityBasis(ElementType::Unknown, 2), + BasisElementCompatibilityException); + EXPECT_THROW(SerendipityBasis(ElementType::Quad8, 3), + BasisConfigurationException); + EXPECT_THROW(SerendipityBasis(ElementType::Pyramid14, 2), + BasisElementCompatibilityException); +} + +TEST(BasisErrorPaths, BasisFactoryInvalidRequestsThrowBasisExceptions) { + EXPECT_THROW((void)basis_factory::create( + BasisRequest{ElementType::Line2, BasisType::Lagrange}), + BasisConfigurationException); + EXPECT_THROW((void)basis_factory::create( + BasisRequest{ElementType::Line2, BasisType::Lagrange, -1}), + BasisConfigurationException); + EXPECT_THROW((void)basis_factory::create( + BasisRequest{ElementType::Line2, BasisType::Bernstein, 1}), + BasisConfigurationException); + + auto serendipity = basis_factory::create( + BasisRequest{ElementType::Quad8, BasisType::Serendipity, 2}); + ASSERT_NE(serendipity, nullptr); + EXPECT_EQ(serendipity->basis_type(), BasisType::Serendipity); +} + +TEST(BasisErrorPaths, BasisExceptionsUseCommonStatusCodes) { + try { + throw BasisConfigurationException("invalid config", __FILE__, __LINE__, __func__); + } catch (const FEException& e) { + EXPECT_EQ(e.status(), svmp::StatusCode::InvalidArgument); + } + + try { + throw BasisConstructionException("construction failure", __FILE__, __LINE__, __func__); + } catch (const FEException& e) { + EXPECT_EQ(e.status(), svmp::StatusCode::InternalError); + } +} + +TEST(BasisErrorPaths, NodeOrderingInvalidNodeThrows) { + EXPECT_THROW((void)ReferenceNodeLayout::get_node_coords(ElementType::Quad8, 99u), + BasisNodeOrderingException); + EXPECT_THROW((void)ReferenceNodeLayout::get_lagrange_node_coords(ElementType::Quad8, 2), + BasisNodeOrderingException); +} + +TEST(BasisErrorPaths, BasisFunctionDefaultsThrowForMissingDerivatives) { + MinimalScalarBasis basis; + const math::Vector xi{Real(0), Real(0), Real(0)}; + std::vector gradients; + std::vector hessians; + + EXPECT_THROW(basis.evaluate_gradients(xi, gradients), BasisEvaluationException); + EXPECT_THROW(basis.evaluate_hessians(xi, hessians), BasisEvaluationException); +} + +TEST(BasisErrorPaths, BasisFunctionFallbackWritesFlatAndStridedLayouts) { + CompleteFallbackBasis basis; + const std::vector> points = { + {Real(0.25), Real(0.5), Real(-0.25)}, + {Real(-0.5), Real(0.75), Real(0.125)} + }; + prewarm_basis_function_scratch(basis.size(), points.size()); + + std::vector flat_values(basis.size()); + std::vector flat_gradients(basis.size() * 3u); + std::vector flat_hessians(basis.size() * 9u); + basis.evaluate_values_to(points.front(), flat_values.data()); + basis.evaluate_gradients_to(points.front(), flat_gradients.data()); + basis.evaluate_hessians_to(points.front(), flat_hessians.data()); + + std::vector expected_values; + std::vector expected_gradients; + std::vector expected_hessians; + basis.evaluate_all(points.front(), expected_values, expected_gradients, expected_hessians); + for (std::size_t d = 0; d < basis.size(); ++d) { + EXPECT_EQ(flat_values[d], expected_values[d]); + for (std::size_t c = 0; c < 3u; ++c) { + EXPECT_EQ(flat_gradients[d * 3u + c], expected_gradients[d][c]); + } + for (std::size_t r = 0; r < 3u; ++r) { + for (std::size_t c = 0; c < 3u; ++c) { + EXPECT_EQ(flat_hessians[d * 9u + r * 3u + c], expected_hessians[d](r, c)); + } + } + } + + constexpr std::size_t output_stride = 3u; + std::vector values(basis.size() * output_stride, Real(-99)); + std::vector gradients(basis.size() * 3u * output_stride, Real(-99)); + std::vector hessians(basis.size() * 9u * output_stride, Real(-99)); + basis.evaluate_at_quadrature_points_strided( + points, output_stride, values.data(), gradients.data(), hessians.data()); + + for (std::size_t q = 0; q < points.size(); ++q) { + basis.evaluate_all(points[q], expected_values, expected_gradients, expected_hessians); + for (std::size_t d = 0; d < basis.size(); ++d) { + EXPECT_EQ(values[d * output_stride + q], expected_values[d]); + for (std::size_t c = 0; c < 3u; ++c) { + EXPECT_EQ(gradients[(d * 3u + c) * output_stride + q], + expected_gradients[d][c]); + } + for (std::size_t r = 0; r < 3u; ++r) { + for (std::size_t c = 0; c < 3u; ++c) { + EXPECT_EQ(hessians[(d * 9u + r * 3u + c) * output_stride + q], + expected_hessians[d](r, c)); + } + } + } + } + + for (std::size_t d = 0; d < basis.size(); ++d) { + EXPECT_EQ(values[d * output_stride + 2u], Real(-99)); + } +} diff --git a/tests/unitTests/FE/Basis/test_BasisHessians.cpp b/tests/unitTests/FE/Basis/test_BasisHessians.cpp new file mode 100644 index 000000000..0899ce358 --- /dev/null +++ b/tests/unitTests/FE/Basis/test_BasisHessians.cpp @@ -0,0 +1,314 @@ +/** + * @file test_BasisHessians.cpp + * @brief Analytical Hessian coverage for the migrated Lagrange basis. + */ + +#include + +#include "FE/Basis/BasisFactory.h" +#include "FE/Basis/LagrangeBasis.h" +#include "FE/Basis/SerendipityBasis.h" + +#include +#include +#include + +using namespace svmp::FE; +using namespace svmp::FE::basis; + +namespace { + +void numerical_hessian_helper(const BasisFunction& basis, + const math::Vector& xi, + std::vector& hessians, + Real eps = Real(1e-5)) +{ + hessians.assign(basis.size(), Hessian{}); + const int dim = basis.dimension(); + + for (int i = 0; i < dim; ++i) { + for (int j = 0; j < dim; ++j) { + math::Vector xi_p = xi; + math::Vector xi_m = xi; + const std::size_t sj = static_cast(j); + xi_p[sj] += eps; + xi_m[sj] -= eps; + + std::vector g_p; + std::vector g_m; + basis.evaluate_gradients(xi_p, g_p); + basis.evaluate_gradients(xi_m, g_m); + + for (std::size_t n = 0; n < basis.size(); ++n) { + const std::size_t si = static_cast(i); + hessians[n](si, sj) = (g_p[n][si] - g_m[n][si]) / (Real(2) * eps); + } + } + } +} + +std::vector> sample_points_for(ElementType type) { + switch (type) { + case ElementType::Line2: + return {{Real(-0.35), Real(0), Real(0)}, {Real(0.2), Real(0), Real(0)}}; + case ElementType::Triangle3: + return {{Real(0.15), Real(0.2), Real(0)}, {Real(0.25), Real(0.1), Real(0)}}; + case ElementType::Quad4: + return {{Real(0.2), Real(-0.3), Real(0)}, {Real(-0.45), Real(0.25), Real(0)}}; + case ElementType::Tetra4: + return {{Real(0.12), Real(0.18), Real(0.16)}, {Real(0.2), Real(0.1), Real(0.18)}}; + case ElementType::Hex8: + return {{Real(0.1), Real(-0.2), Real(0.3)}, {Real(-0.35), Real(0.25), Real(-0.15)}}; + case ElementType::Wedge6: + return {{Real(0.18), Real(0.22), Real(-0.2)}, {Real(0.12), Real(0.16), Real(0.1)}}; + case ElementType::Pyramid5: + return {{Real(0.0), Real(0.0), Real(0.2)}, {Real(0.12), Real(-0.08), Real(0.24)}}; + default: + return {{Real(0), Real(0), Real(0)}}; + } +} + +void expect_hessians_match_numerical(const LagrangeBasis& basis, + const std::vector>& points, + Real tol, + Real eps = Real(1e-5)) +{ + for (const auto& xi : points) { + std::vector analytical; + std::vector numerical; + basis.evaluate_hessians(xi, analytical); + numerical_hessian_helper(basis, xi, numerical, eps); + + ASSERT_EQ(analytical.size(), numerical.size()); + for (std::size_t n = 0; n < analytical.size(); ++n) { + for (int i = 0; i < basis.dimension(); ++i) { + for (int j = 0; j < basis.dimension(); ++j) { + const std::size_t si = static_cast(i); + const std::size_t sj = static_cast(j); + EXPECT_NEAR(analytical[n](si, sj), numerical[n](si, sj), tol) + << "basis " << n << ", component (" << i << "," << j + << "), element " << static_cast(basis.element_type()) + << ", order " << basis.order(); + } + } + } + } +} + +void expect_partition_hessian_sum_zero(const LagrangeBasis& basis, + const math::Vector& xi, + Real tol) +{ + std::vector hessians; + basis.evaluate_hessians(xi, hessians); + + Hessian sum{}; + for (const auto& hessian : hessians) { + for (std::size_t r = 0; r < 3u; ++r) { + for (std::size_t c = 0; c < 3u; ++c) { + sum(r, c) += hessian(r, c); + } + } + } + + for (int r = 0; r < basis.dimension(); ++r) { + for (int c = 0; c < basis.dimension(); ++c) { + EXPECT_NEAR(sum(static_cast(r), static_cast(c)), + Real(0), + tol) + << "element " << static_cast(basis.element_type()) + << ", order " << basis.order(); + } + } +} + +void expect_hessians_symmetric(const LagrangeBasis& basis, + const math::Vector& xi, + Real tol) +{ + std::vector hessians; + basis.evaluate_hessians(xi, hessians); + + for (const auto& hessian : hessians) { + for (int r = 0; r < basis.dimension(); ++r) { + for (int c = r + 1; c < basis.dimension(); ++c) { + const std::size_t sr = static_cast(r); + const std::size_t sc = static_cast(c); + EXPECT_NEAR(hessian(sr, sc), hessian(sc, sr), tol); + } + } + } +} + +void expect_partition_hessian_sum_zero(const BasisFunction& basis, + const math::Vector& xi, + Real tol) +{ + std::vector hessians; + basis.evaluate_hessians(xi, hessians); + + Hessian sum{}; + for (const auto& hessian : hessians) { + for (std::size_t r = 0; r < 3u; ++r) { + for (std::size_t c = 0; c < 3u; ++c) { + sum(r, c) += hessian(r, c); + } + } + } + + for (int r = 0; r < basis.dimension(); ++r) { + for (int c = 0; c < basis.dimension(); ++c) { + EXPECT_NEAR(sum(static_cast(r), static_cast(c)), + Real(0), + tol) + << "element " << static_cast(basis.element_type()) + << ", order " << basis.order(); + } + } +} + +void expect_hessians_symmetric(const BasisFunction& basis, + const math::Vector& xi, + Real tol) +{ + std::vector hessians; + basis.evaluate_hessians(xi, hessians); + + for (const auto& hessian : hessians) { + for (int r = 0; r < basis.dimension(); ++r) { + for (int c = r + 1; c < basis.dimension(); ++c) { + const std::size_t sr = static_cast(r); + const std::size_t sc = static_cast(c); + EXPECT_NEAR(hessian(sr, sc), hessian(sc, sr), tol); + } + } + } +} + +} // namespace + +TEST(BasisHessians, LagrangeCanonicalTopologiesMatchNumericalHessians) { + const struct Case { + ElementType type; + int order; + Real tol; + Real eps; + } cases[] = { + {ElementType::Line2, 3, Real(1e-7), Real(1e-5)}, + {ElementType::Triangle3, 3, Real(2e-6), Real(1e-5)}, + {ElementType::Quad4, 3, Real(1e-6), Real(1e-5)}, + {ElementType::Tetra4, 2, Real(1e-6), Real(1e-5)}, + {ElementType::Hex8, 2, Real(1e-6), Real(1e-5)}, + {ElementType::Wedge6, 2, Real(1e-5), Real(1e-5)}, + {ElementType::Pyramid5, 1, Real(2e-6), Real(1e-5)}, + {ElementType::Pyramid5, 3, Real(4e-4), Real(2e-5)}, + }; + + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + expect_hessians_match_numerical(basis, sample_points_for(c.type), c.tol, c.eps); + } +} + +TEST(BasisHessians, LagrangeHessiansSumToZeroAndAreSymmetric) { + const struct Case { + ElementType type; + int order; + math::Vector xi; + Real tol; + } cases[] = { + {ElementType::Line2, 3, {Real(0.15), Real(0), Real(0)}, Real(1e-12)}, + {ElementType::Triangle3, 3, {Real(0.2), Real(0.25), Real(0)}, Real(1e-10)}, + {ElementType::Quad4, 3, {Real(0.3), Real(-0.2), Real(0)}, Real(1e-12)}, + {ElementType::Tetra4, 2, {Real(0.15), Real(0.2), Real(0.1)}, Real(1e-10)}, + {ElementType::Hex8, 2, {Real(0.1), Real(-0.2), Real(0.3)}, Real(1e-12)}, + {ElementType::Wedge6, 2, {Real(0.2), Real(0.15), Real(-0.3)}, Real(1e-10)}, + {ElementType::Pyramid5, 1, {Real(0.1), Real(-0.2), Real(0.3)}, Real(1e-8)}, + }; + + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + expect_partition_hessian_sum_zero(basis, c.xi, Real(10) * c.tol); + expect_hessians_symmetric(basis, c.xi, c.tol); + } +} + +TEST(BasisHessians, LagrangePyramidExactApexHessianThrows) { + const struct Case { + ElementType type; + int order; + } cases[] = { + {ElementType::Pyramid5, 1}, + {ElementType::Pyramid14, 2}, + {ElementType::Pyramid5, 4}, + }; + + const math::Vector apex{Real(0), Real(0), Real(1)}; + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + std::vector hessians; + EXPECT_THROW(basis.evaluate_hessians(apex, hessians), BasisEvaluationException) + << "order " << c.order; + } +} + +TEST(BasisHessians, SerendipityHessiansSumToZeroAndAreSymmetric) { + const struct Case { + ElementType type; + int order; + math::Vector xi; + Real tol; + } cases[] = { + {ElementType::Quad8, 2, {Real(0.17), Real(-0.31), Real(0)}, Real(1e-10)}, + {ElementType::Hex20, 2, {Real(0.2), Real(-0.1), Real(0.3)}, Real(1e-10)}, + {ElementType::Wedge15, 2, {Real(0.2), Real(0.3), Real(0.1)}, Real(1e-10)}, + {ElementType::Pyramid13, 2, {Real(0.1), Real(-0.2), Real(0.4)}, Real(1e-8)}, + }; + + for (const auto& c : cases) { + SerendipityBasis basis(c.type, c.order); + expect_partition_hessian_sum_zero(basis, c.xi, c.tol); + expect_hessians_symmetric(basis, c.xi, c.tol); + } +} + +TEST(BasisHessians, SerendipityPyramidExactApexHessianThrows) { + SerendipityBasis basis(ElementType::Pyramid13, 2); + std::vector hessians; + EXPECT_THROW(basis.evaluate_hessians({Real(0), Real(0), Real(1)}, hessians), + BasisEvaluationException); +} + +TEST(BasisHessians, SolverMappedVolumeSelectionsSatisfyInvariants) { + const struct Case { + ElementType type; + BasisType basis_type; + int order; + math::Vector xi; + Real tol; + } cases[] = { + {ElementType::Line2, BasisType::Lagrange, 1, {Real(0.15), Real(0), Real(0)}, Real(1e-12)}, + {ElementType::Line3, BasisType::Lagrange, 2, {Real(-0.25), Real(0), Real(0)}, Real(1e-12)}, + {ElementType::Triangle3, BasisType::Lagrange, 1, {Real(0.2), Real(0.25), Real(0)}, Real(1e-12)}, + {ElementType::Triangle6, BasisType::Lagrange, 2, {Real(0.2), Real(0.25), Real(0)}, Real(1e-12)}, + {ElementType::Quad4, BasisType::Lagrange, 1, {Real(0.3), Real(-0.2), Real(0)}, Real(1e-12)}, + {ElementType::Quad8, BasisType::Serendipity, 2, {Real(0.17), Real(-0.31), Real(0)}, Real(1e-10)}, + {ElementType::Quad9, BasisType::Lagrange, 2, {Real(0.3), Real(-0.2), Real(0)}, Real(1e-12)}, + {ElementType::Tetra4, BasisType::Lagrange, 1, {Real(0.15), Real(0.2), Real(0.1)}, Real(1e-12)}, + {ElementType::Tetra10, BasisType::Lagrange, 2, {Real(0.15), Real(0.2), Real(0.1)}, Real(1e-10)}, + {ElementType::Hex8, BasisType::Lagrange, 1, {Real(0.1), Real(-0.2), Real(0.3)}, Real(1e-12)}, + {ElementType::Hex20, BasisType::Serendipity, 2, {Real(0.2), Real(-0.1), Real(0.3)}, Real(1e-10)}, + {ElementType::Hex27, BasisType::Lagrange, 2, {Real(0.1), Real(-0.2), Real(0.3)}, Real(1e-12)}, + {ElementType::Wedge6, BasisType::Lagrange, 1, {Real(0.2), Real(0.15), Real(-0.3)}, Real(1e-12)}, + }; + + int covered = 0; + for (const auto& c : cases) { + auto basis = basis_factory::create(BasisRequest{c.type, c.basis_type, c.order}); + expect_partition_hessian_sum_zero(*basis, c.xi, c.tol); + expect_hessians_symmetric(*basis, c.xi, c.tol); + ++covered; + } + + EXPECT_EQ(covered, 13); +} diff --git a/tests/unitTests/FE/Basis/test_ConstexprBasis.cpp b/tests/unitTests/FE/Basis/test_ConstexprBasis.cpp new file mode 100644 index 000000000..a1031fa76 --- /dev/null +++ b/tests/unitTests/FE/Basis/test_ConstexprBasis.cpp @@ -0,0 +1,226 @@ +/** + * @file test_ConstexprBasis.cpp + * @brief Compile-time and lightweight runtime checks for migrated Basis helpers. + */ + +#include "FE/Basis/BasisTolerance.h" +#include "FE/Basis/BasisTraits.h" +#include "FE/Basis/LagrangeBasis.h" +#include "FE/Basis/LagrangeBasisFast.h" +#include "FE/Basis/NodeOrderingConventions.h" + +#include + +#include +#include +#include +#include +#include +#include + +namespace svmp { +namespace FE { +namespace basis { +namespace { + +static_assert(is_line(ElementType::Line2)); +static_assert(is_line(ElementType::Line3)); +static_assert(is_triangle(ElementType::Triangle6)); +static_assert(is_quadrilateral(ElementType::Quad8)); +static_assert(is_tetrahedron(ElementType::Tetra10)); +static_assert(is_hexahedron(ElementType::Hex20)); +static_assert(is_wedge(ElementType::Wedge18)); +static_assert(is_pyramid(ElementType::Pyramid14)); +static_assert(is_simplex(ElementType::Triangle3)); +static_assert(is_simplex(ElementType::Tetra4)); +static_assert(!is_simplex(ElementType::Wedge6)); +static_assert(is_tensor_product(ElementType::Line2)); +static_assert(is_tensor_product(ElementType::Quad9)); +static_assert(is_tensor_product(ElementType::Hex27)); +static_assert(!is_tensor_product(ElementType::Pyramid5)); +static_assert(reference_dimension(ElementType::Pyramid14) == 3); +static_assert(canonical_lagrange_type(ElementType::Hex27) == ElementType::Hex8); +static_assert(canonical_lagrange_type(ElementType::Pyramid13) == ElementType::Pyramid13); +static_assert(complete_lagrange_alias_order(ElementType::Wedge18) == 2); +static_assert(complete_lagrange_alias_order(ElementType::Hex20) == -1); +static_assert(line_lagrange_size(2) == 3u); +static_assert(triangle_lagrange_size(2) == 6u); +static_assert(quad_lagrange_size(2) == 9u); +static_assert(tetra_lagrange_size(2) == 10u); +static_assert(hex_lagrange_size(2) == 27u); +static_assert(wedge_lagrange_size(2) == 18u); +static_assert(pyramid_lagrange_size(2) == 14u); +static_assert(detail::basis_abs(Real(-2)) == Real(2)); +static_assert(detail::basis_max(Real(2), Real(3)) == Real(3)); +static_assert(detail::basis_near_zero(std::numeric_limits::epsilon() * Real(32))); +static_assert(detail::basis_nearly_equal( + Real(1), + Real(1) + std::numeric_limits::epsilon() * Real(32))); + +constexpr auto kLineFastValues = [] { + math::Vector xi{Real(0), Real(0), Real(0)}; + std::array::n_dofs> values{}; + LagrangeLineFast<1>::evaluate(xi, values); + return values; +}(); +static_assert(kLineFastValues[0] == Real(0.5)); +static_assert(kLineFastValues[1] == Real(0.5)); + +constexpr auto kLineP2FastHessians = [] { + math::Vector xi{Real(0), Real(0), Real(0)}; + std::array::n_dofs> hessians{}; + LagrangeLineFast<2>::evaluate_hessians(xi, hessians); + return hessians; +}(); +static_assert(kLineP2FastHessians[0](0, 0) == Real(1)); +static_assert(kLineP2FastHessians[1](0, 0) == Real(1)); +static_assert(kLineP2FastHessians[2](0, 0) == Real(-2)); + +constexpr auto kTriP2FastValues = [] { + math::Vector xi{Real(0.25), Real(0.25), Real(0)}; + std::array::n_dofs> values{}; + LagrangeTriFast<2>::evaluate(xi, values); + return values; +}(); +static_assert(kTriP2FastValues[0] == Real(0)); +static_assert(kTriP2FastValues[3] == Real(0.5)); +static_assert(kTriP2FastValues[4] == Real(0.25)); + +template +constexpr bool overrides_scalar_strided_v = + !std::is_same_v; + +template +void expect_fast_matches_lagrange(ElementType type, + int order, + const std::vector>& points) +{ + LagrangeBasis basis(type, order); + for (const auto& xi : points) { + std::vector expected_values; + std::vector expected_gradients; + std::vector expected_hessians; + basis.evaluate_all(xi, expected_values, expected_gradients, expected_hessians); + + std::array values{}; + std::array gradients{}; + std::array hessians{}; + FastBasis::evaluate(xi, values); + FastBasis::evaluate_gradients(xi, gradients); + FastBasis::evaluate_hessians(xi, hessians); + + ASSERT_EQ(expected_values.size(), values.size()); + for (std::size_t i = 0; i < values.size(); ++i) { + EXPECT_NEAR(values[i], expected_values[i], Real(1e-14)); + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_NEAR(gradients[i][d], expected_gradients[i][d], Real(1e-14)); + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_NEAR(hessians[i](d, e), expected_hessians[i](d, e), Real(1e-14)); + } + } + } + } +} + +TEST(ConstexprBasis, FixedNodeTableSizes) { + const std::vector> expected = { + {ElementType::Line2, 2u}, + {ElementType::Line3, 3u}, + {ElementType::Triangle3, 3u}, + {ElementType::Triangle6, 6u}, + {ElementType::Quad4, 4u}, + {ElementType::Quad8, 8u}, + {ElementType::Quad9, 9u}, + {ElementType::Tetra4, 4u}, + {ElementType::Tetra10, 10u}, + {ElementType::Hex8, 8u}, + {ElementType::Hex20, 20u}, + {ElementType::Hex27, 27u}, + {ElementType::Wedge6, 6u}, + {ElementType::Wedge15, 15u}, + {ElementType::Wedge18, 18u}, + {ElementType::Pyramid5, 5u}, + {ElementType::Pyramid13, 13u}, + {ElementType::Pyramid14, 14u}, + }; + + for (const auto& [type, size] : expected) { + EXPECT_EQ(ReferenceNodeLayout::num_nodes(type), size); + } +} + +TEST(ConstexprBasis, BasisToleranceScalesWithRealPrecision) { + const Real eps = std::numeric_limits::epsilon(); + EXPECT_GT(detail::basis_scaled_tolerance(), eps); + EXPECT_TRUE(detail::basis_near_zero(eps * Real(32))); + EXPECT_FALSE(detail::basis_near_zero(eps * Real(128))); + EXPECT_TRUE(detail::basis_nearly_equal(Real(1), Real(1) + eps * Real(32))); + EXPECT_FALSE(detail::basis_nearly_equal(Real(1), Real(1) + eps * Real(128))); +} + +TEST(ConstexprBasis, LagrangeOverridesStridedEvaluation) { + EXPECT_TRUE(overrides_scalar_strided_v); +} + +TEST(ConstexprBasis, FastSidecarsMatchRuntimeLagrangeBasis) { + expect_fast_matches_lagrange>( + ElementType::Line2, 1, + {{Real(-0.2), Real(0), Real(0)}, {Real(0.35), Real(0), Real(0)}}); + expect_fast_matches_lagrange>( + ElementType::Line2, 2, + {{Real(-0.2), Real(0), Real(0)}, {Real(0.35), Real(0), Real(0)}}); + expect_fast_matches_lagrange>( + ElementType::Quad4, 1, + {{Real(-0.2), Real(0.3), Real(0)}, {Real(0.35), Real(-0.45), Real(0)}}); + expect_fast_matches_lagrange>( + ElementType::Hex8, 1, + {{Real(-0.2), Real(0.3), Real(0.1)}, {Real(0.35), Real(-0.45), Real(0.25)}}); + expect_fast_matches_lagrange>( + ElementType::Triangle3, 1, + {{Real(0.2), Real(0.3), Real(0)}, {Real(0.1), Real(0.6), Real(0)}}); + expect_fast_matches_lagrange>( + ElementType::Triangle3, 2, + {{Real(0.2), Real(0.3), Real(0)}, {Real(0.1), Real(0.6), Real(0)}}); + expect_fast_matches_lagrange>( + ElementType::Tetra4, 1, + {{Real(0.2), Real(0.3), Real(0.1)}, {Real(0.1), Real(0.2), Real(0.4)}}); + expect_fast_matches_lagrange>( + ElementType::Tetra4, 2, + {{Real(0.2), Real(0.3), Real(0.1)}, {Real(0.1), Real(0.2), Real(0.4)}}); +} + +TEST(ConstexprBasis, CompleteAliasTablesMatchGeneratedLagrangeNodes) { + const std::vector> aliases = { + {ElementType::Line2, ElementType::Line2, 1}, + {ElementType::Line3, ElementType::Line2, 2}, + {ElementType::Triangle3, ElementType::Triangle3, 1}, + {ElementType::Triangle6, ElementType::Triangle3, 2}, + {ElementType::Quad4, ElementType::Quad4, 1}, + {ElementType::Quad9, ElementType::Quad4, 2}, + {ElementType::Tetra4, ElementType::Tetra4, 1}, + {ElementType::Tetra10, ElementType::Tetra4, 2}, + {ElementType::Hex8, ElementType::Hex8, 1}, + {ElementType::Hex27, ElementType::Hex8, 2}, + {ElementType::Wedge6, ElementType::Wedge6, 1}, + {ElementType::Wedge18, ElementType::Wedge6, 2}, + {ElementType::Pyramid5, ElementType::Pyramid5, 1}, + {ElementType::Pyramid14, ElementType::Pyramid5, 2}, + }; + + for (const auto& [alias, canonical_type, order] : aliases) { + const auto nodes = ReferenceNodeLayout::get_lagrange_node_coords(canonical_type, order); + ASSERT_EQ(nodes.size(), ReferenceNodeLayout::num_nodes(alias)); + for (std::size_t i = 0; i < nodes.size(); ++i) { + const auto direct = ReferenceNodeLayout::get_node_coords(alias, i); + EXPECT_EQ(nodes[i][0], direct[0]); + EXPECT_EQ(nodes[i][1], direct[1]); + EXPECT_EQ(nodes[i][2], direct[2]); + } + } +} + +} // namespace +} // namespace basis +} // namespace FE +} // namespace svmp diff --git a/tests/unitTests/FE/Basis/test_HigherOrderWedgePyramid.cpp b/tests/unitTests/FE/Basis/test_HigherOrderWedgePyramid.cpp new file mode 100644 index 000000000..26efc4070 --- /dev/null +++ b/tests/unitTests/FE/Basis/test_HigherOrderWedgePyramid.cpp @@ -0,0 +1,173 @@ +/** + * @file test_HigherOrderWedgePyramid.cpp + * @brief Focused higher-order wedge and pyramid checks for LagrangeBasis. + */ + +#include + +#include "FE/Basis/LagrangeBasis.h" +#include "FE/Basis/NodeOrderingConventions.h" + +#include +#include +#include +#include + +using namespace svmp::FE; +using namespace svmp::FE::basis; + +namespace { + +void expect_nodes_close(const std::vector>& lhs, + const std::vector>& rhs, + Real tol) +{ + ASSERT_EQ(lhs.size(), rhs.size()); + for (std::size_t i = 0; i < lhs.size(); ++i) { + EXPECT_NEAR(lhs[i][0], rhs[i][0], tol) << "node " << i; + EXPECT_NEAR(lhs[i][1], rhs[i][1], tol) << "node " << i; + EXPECT_NEAR(lhs[i][2], rhs[i][2], tol) << "node " << i; + } +} + +void expect_kronecker_at_nodes(const LagrangeBasis& basis, Real tol) +{ + const auto& nodes = basis.nodes(); + ASSERT_EQ(nodes.size(), basis.size()); + + std::vector values; + for (std::size_t node = 0; node < nodes.size(); ++node) { + basis.evaluate_values(nodes[node], values); + ASSERT_EQ(values.size(), basis.size()); + for (std::size_t i = 0; i < values.size(); ++i) { + const Real expected = (i == node) ? Real(1) : Real(0); + EXPECT_NEAR(values[i], expected, tol) + << "node " << node << ", basis " << i; + } + } +} + +void expect_partition_gradient_hessian_sums(const LagrangeBasis& basis, + const std::vector>& points, + Real value_tol, + Real derivative_tol) +{ + for (const auto& xi : points) { + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_all(xi, values, gradients, hessians); + + Real value_sum = Real(0); + Gradient gradient_sum{}; + Hessian hessian_sum{}; + for (std::size_t i = 0; i < values.size(); ++i) { + value_sum += values[i]; + for (std::size_t d = 0; d < 3u; ++d) { + gradient_sum[d] += gradients[i][d]; + for (std::size_t e = 0; e < 3u; ++e) { + hessian_sum(d, e) += hessians[i](d, e); + } + } + } + + EXPECT_NEAR(value_sum, Real(1), value_tol); + for (int d = 0; d < basis.dimension(); ++d) { + EXPECT_NEAR(gradient_sum[static_cast(d)], Real(0), derivative_tol); + for (int e = 0; e < basis.dimension(); ++e) { + EXPECT_NEAR(hessian_sum(static_cast(d), + static_cast(e)), + Real(0), + derivative_tol); + } + } + } +} + +void expect_all_entries_finite(const LagrangeBasis& basis, + const math::Vector& xi) +{ + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_all(xi, values, gradients, hessians); + + for (std::size_t i = 0; i < values.size(); ++i) { + EXPECT_TRUE(std::isfinite(static_cast(values[i]))) << "value " << i; + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_TRUE(std::isfinite(static_cast(gradients[i][d]))) + << "gradient " << i << ", " << d; + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_TRUE(std::isfinite(static_cast(hessians[i](d, e)))) + << "hessian " << i << ", " << d << ", " << e; + } + } + } +} + +} // namespace + +TEST(HigherOrderWedgePyramid, CompleteAliasesMatchGeneratedNodeLayouts) { + const std::vector> cases = { + {ElementType::Wedge18, ElementType::Wedge6, 2}, + {ElementType::Pyramid14, ElementType::Pyramid5, 2}, + }; + + for (const auto& [alias, canonical, order] : cases) { + LagrangeBasis alias_basis(alias, order); + const auto generated = ReferenceNodeLayout::get_lagrange_node_coords(canonical, order); + ASSERT_EQ(generated.size(), ReferenceNodeLayout::num_nodes(alias)); + expect_nodes_close(alias_basis.nodes(), generated, Real(1e-14)); + + for (std::size_t i = 0; i < generated.size(); ++i) { + const auto public_node = ReferenceNodeLayout::get_node_coords(alias, i); + EXPECT_NEAR(public_node[0], generated[i][0], Real(1e-14)) << "node " << i; + EXPECT_NEAR(public_node[1], generated[i][1], Real(1e-14)) << "node " << i; + EXPECT_NEAR(public_node[2], generated[i][2], Real(1e-14)) << "node " << i; + } + } +} + +TEST(HigherOrderWedgePyramid, WedgeOrderThreeIsNodalAndPartitionsUnity) { + LagrangeBasis wedge(ElementType::Wedge6, 3); + + expect_kronecker_at_nodes(wedge, Real(2e-10)); + expect_partition_gradient_hessian_sums( + wedge, + { + {Real(0.18), Real(0.22), Real(-0.2)}, + {Real(0.12), Real(0.16), Real(0.1)}, + {Real(0.25), Real(0.15), Real(0.45)}, + }, + Real(1e-12), + Real(1e-9)); +} + +TEST(HigherOrderWedgePyramid, PyramidOrderThreeIsNodalAndPartitionsUnity) { + LagrangeBasis pyramid(ElementType::Pyramid5, 3); + + expect_kronecker_at_nodes(pyramid, Real(5e-8)); + expect_partition_gradient_hessian_sums( + pyramid, + { + {Real(0), Real(0), Real(0.2)}, + {Real(0.12), Real(-0.08), Real(0.24)}, + {Real(-0.08), Real(0.1), Real(0.55)}, + }, + Real(1e-11), + Real(5e-7)); +} + +TEST(HigherOrderWedgePyramid, PyramidNearApexDerivativeQueriesRemainFinite) { + const std::vector> cases = { + {ElementType::Pyramid5, 1}, + {ElementType::Pyramid14, 2}, + {ElementType::Pyramid5, 4}, + }; + + for (const auto& [type, order] : cases) { + LagrangeBasis basis(type, order); + expect_all_entries_finite(basis, {Real(0.01), Real(-0.005), Real(0.92)}); + expect_all_entries_finite(basis, {Real(-0.004), Real(0.007), Real(0.98)}); + } +} diff --git a/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp b/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp new file mode 100644 index 000000000..a88d860e9 --- /dev/null +++ b/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp @@ -0,0 +1,3028 @@ +/** + * @file test_LagrangeBasis.cpp + * @brief Unit tests for Lagrange basis functions + */ + +#include +#include "FE/Basis/BasisFactory.h" +#include "FE/Basis/LagrangeBasis.h" +#include "FE/Basis/NodeOrderingConventions.h" +#include "FE/Basis/SerendipityBasis.h" +#include "fs.h" +#include "nn.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace legacy_solver_nn { +using namespace consts; +#include "nn_elem_gip.h" +#include "nn_elem_gnn.h" +#include "nn_elem_gnnxx.h" +} // namespace legacy_solver_nn + +using svmp::FE::basis::LagrangeBasis; +using svmp::FE::ElementType; +using svmp::FE::Real; +using svmp::FE::basis::Gradient; +using svmp::FE::basis::Hessian; +using svmp::FE::basis::ReferenceNodeLayout; + +namespace { + +using Point = svmp::FE::math::Vector; + +struct SolverBasisAdapterCase { + consts::ElementType type; + consts::ElementType quadrature_type; + int insd; + int eNoN; + int nG; +}; + +std::vector solver_basis_adapter_cases() { + using consts::ElementType; + return { + {ElementType::LIN1, ElementType::LIN1, 1, 2, 2}, + {ElementType::LIN2, ElementType::LIN2, 1, 3, 3}, + {ElementType::TRI3, ElementType::TRI3, 2, 3, 3}, + {ElementType::TRI6, ElementType::TRI6, 2, 6, 7}, + {ElementType::QUD4, ElementType::QUD4, 2, 4, 4}, + {ElementType::QUD8, ElementType::QUD9, 2, 8, 9}, + {ElementType::QUD9, ElementType::QUD9, 2, 9, 9}, + {ElementType::TET4, ElementType::TET4, 3, 4, 4}, + {ElementType::TET10, ElementType::TET10, 3, 10, 15}, + {ElementType::HEX8, ElementType::HEX8, 3, 8, 8}, + {ElementType::HEX20, ElementType::HEX20, 3, 20, 27}, + {ElementType::HEX27, ElementType::HEX27, 3, 27, 27}, + {ElementType::WDG, ElementType::WDG, 3, 6, 6}, + }; +} + +std::vector solver_face_basis_adapter_cases() { + using consts::ElementType; + return { + {ElementType::LIN1, ElementType::LIN1, 1, 2, 2}, + {ElementType::LIN2, ElementType::LIN2, 1, 3, 3}, + {ElementType::TRI3, ElementType::TRI3, 2, 3, 3}, + {ElementType::TRI6, ElementType::TRI6, 2, 6, 7}, + {ElementType::QUD4, ElementType::QUD4, 2, 4, 4}, + {ElementType::QUD8, ElementType::QUD8, 2, 8, 9}, + {ElementType::QUD9, ElementType::QUD9, 2, 9, 9}, + }; +} + +std::vector solver_hessian_adapter_cases() { + return solver_basis_adapter_cases(); +} + +std::vector solver_legacy_hessian_parity_cases() { + using consts::ElementType; + return { + {ElementType::TRI6, ElementType::TRI6, 2, 6, 7}, + {ElementType::QUD9, ElementType::QUD9, 2, 9, 9}, + {ElementType::TET10, ElementType::TET10, 3, 10, 15}, + }; +} + +int packed_hessian_components(int insd) { + if (insd == 1) { + return 1; + } + if (insd == 2) { + return 3; + } + return 6; +} + +void fill_legacy_quadrature(const SolverBasisAdapterCase& c, + Vector& w, + Array& xi) { + mshType mesh; + mesh.eType = c.quadrature_type; + mesh.eNoN = c.eNoN; + mesh.nG = c.nG; + mesh.w.resize(c.nG); + mesh.xi.resize(c.insd, c.nG); + legacy_solver_nn::set_element_gauss_int_data.at(c.quadrature_type)(mesh); + w = mesh.w; + xi = mesh.xi; +} + +faceType initialized_face_for_case(const SolverBasisAdapterCase& c) { + faceType face; + face.eType = c.type; + face.eNoN = c.eNoN; + face.nG = c.nG; + face.w.resize(c.nG); + face.xi.resize(c.insd, c.nG); + legacy_solver_nn::set_face_gauss_int_data.at(c.quadrature_type)(face); + face.N.resize(c.eNoN, c.nG); + face.Nx.resize(c.insd, c.eNoN, c.nG); + return face; +} + +void expect_arrays_near(const Array& actual, + const Array& expected, + double tol) { + ASSERT_EQ(actual.nrows(), expected.nrows()); + ASSERT_EQ(actual.ncols(), expected.ncols()); + for (int col = 0; col < actual.ncols(); ++col) { + for (int row = 0; row < actual.nrows(); ++row) { + EXPECT_NEAR(actual(row, col), expected(row, col), tol) + << "row=" << row << ", col=" << col; + } + } +} + +void expect_vectors_near(const Vector& actual, + const Vector& expected, + double tol) { + ASSERT_EQ(actual.size(), expected.size()); + for (int i = 0; i < actual.size(); ++i) { + EXPECT_NEAR(actual(i), expected(i), tol) << "index=" << i; + } +} + +void expect_array3_near(const Array3& actual, + const Array3& expected, + double tol) { + ASSERT_EQ(actual.nrows(), expected.nrows()); + ASSERT_EQ(actual.ncols(), expected.ncols()); + ASSERT_EQ(actual.nslices(), expected.nslices()); + for (int slice = 0; slice < actual.nslices(); ++slice) { + for (int col = 0; col < actual.ncols(); ++col) { + for (int row = 0; row < actual.nrows(); ++row) { + EXPECT_NEAR(actual(row, col, slice), expected(row, col, slice), tol) + << "row=" << row << ", col=" << col << ", slice=" << slice; + } + } + } +} + +void fill_array3(Array3& values, double value) { + for (int slice = 0; slice < values.nslices(); ++slice) { + for (int col = 0; col < values.ncols(); ++col) { + for (int row = 0; row < values.nrows(); ++row) { + values(row, col, slice) = value; + } + } + } +} + +void expect_face_partition_identities(const SolverBasisAdapterCase& c, + const faceType& face, + int g, + double tol) { + double partition = 0.0; + std::array gradient_sum{0.0, 0.0, 0.0}; + + for (int a = 0; a < c.eNoN; ++a) { + EXPECT_TRUE(std::isfinite(face.N(a, g))) + << "element=" << static_cast(c.type) + << ", node=" << a + << ", g=" << g; + partition += face.N(a, g); + + for (int d = 0; d < c.insd; ++d) { + EXPECT_TRUE(std::isfinite(face.Nx(d, a, g))) + << "element=" << static_cast(c.type) + << ", d=" << d + << ", node=" << a + << ", g=" << g; + gradient_sum[static_cast(d)] += face.Nx(d, a, g); + } + } + + EXPECT_NEAR(partition, 1.0, tol) + << "element=" << static_cast(c.type) << ", g=" << g; + for (int d = 0; d < c.insd; ++d) { + EXPECT_NEAR(gradient_sum[static_cast(d)], 0.0, tol) + << "element=" << static_cast(c.type) << ", d=" << d << ", g=" << g; + } +} + +bool array3_has_nonzero_component(const Array3& values, + int row, + double tol) { + for (int slice = 0; slice < values.nslices(); ++slice) { + for (int col = 0; col < values.ncols(); ++col) { + if (std::abs(values(row, col, slice)) > tol) { + return true; + } + } + } + return false; +} + +Array single_point_xi(const SolverBasisAdapterCase& c, + const Array& xi, + int g) { + Array point(c.insd, 1); + for (int d = 0; d < c.insd; ++d) { + point(d, 0) = xi(d, g); + } + return point; +} + +std::vector finite_difference_solver_second_derivative( + const SolverBasisAdapterCase& c, + const Array& point, + int gradient_component, + int coordinate_component, + double eps) { + Array xi_plus = point; + Array xi_minus = point; + xi_plus(coordinate_component, 0) += eps; + xi_minus(coordinate_component, 0) -= eps; + + Array N_plus(c.eNoN, 1); + Array N_minus(c.eNoN, 1); + Array3 Nx_plus(c.insd, c.eNoN, 1); + Array3 Nx_minus(c.insd, c.eNoN, 1); + + nn::get_gnn(c.insd, c.type, c.eNoN, 0, xi_plus, N_plus, Nx_plus); + nn::get_gnn(c.insd, c.type, c.eNoN, 0, xi_minus, N_minus, Nx_minus); + + std::vector values(static_cast(c.eNoN)); + for (int a = 0; a < c.eNoN; ++a) { + values[static_cast(a)] = + (Nx_plus(gradient_component, a, 0) - Nx_minus(gradient_component, a, 0)) / + (2.0 * eps); + } + return values; +} + +void expect_packed_hessian_component_matches_finite_difference( + const SolverBasisAdapterCase& c, + const Array& point, + const Array3& Nxx, + int g, + int packed_row, + int first_derivative_component, + int second_derivative_component, + double tol) { + const double eps = 2e-6; + const auto numerical = finite_difference_solver_second_derivative( + c, point, first_derivative_component, second_derivative_component, eps); + for (int a = 0; a < c.eNoN; ++a) { + EXPECT_NEAR(Nxx(packed_row, a, g), numerical[static_cast(a)], tol) + << "element=" << static_cast(c.type) + << ", packed_row=" << packed_row + << ", node=" << a + << ", g=" << g; + } + + if (first_derivative_component != second_derivative_component) { + const auto symmetric_numerical = finite_difference_solver_second_derivative( + c, point, second_derivative_component, first_derivative_component, eps); + for (int a = 0; a < c.eNoN; ++a) { + EXPECT_NEAR(Nxx(packed_row, a, g), + symmetric_numerical[static_cast(a)], + tol) + << "element=" << static_cast(c.type) + << ", symmetry packed_row=" << packed_row + << ", node=" << a + << ", g=" << g; + } + } +} + +void expect_solver_hessian_matches_gradient_finite_difference( + const SolverBasisAdapterCase& c, + const Array& xi, + int g, + const Array3& Nxx, + double tol) { + const Array point = single_point_xi(c, xi, g); + + expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 0, 0, 0, tol); + if (c.insd >= 2) { + expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 1, 1, 1, tol); + } + if (c.insd == 2) { + expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 2, 0, 1, tol); + } else if (c.insd >= 3) { + expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 2, 2, 2, tol); + expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 3, 0, 1, tol); + expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 4, 1, 2, tol); + expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 5, 0, 2, tol); + } +} + +void expect_partition_hessian_identity(const SolverBasisAdapterCase& c, + const Array3& Nxx, + int g, + double tol) { + for (int row = 0; row < Nxx.nrows(); ++row) { + double sum = 0.0; + for (int a = 0; a < c.eNoN; ++a) { + sum += Nxx(row, a, g); + } + EXPECT_NEAR(sum, 0.0, tol) + << "element=" << static_cast(c.type) + << ", packed_row=" << row + << ", g=" << g; + } +} + +void expect_all_hessians_zero(const SolverBasisAdapterCase& c, + const Array3& Nxx, + int g, + double tol) { + for (int row = 0; row < Nxx.nrows(); ++row) { + for (int a = 0; a < c.eNoN; ++a) { + EXPECT_NEAR(Nxx(row, a, g), 0.0, tol) + << "element=" << static_cast(c.type) + << ", packed_row=" << row + << ", node=" << a + << ", g=" << g; + } + } +} + +mshType initialized_mesh_for_case(const SolverBasisAdapterCase& c, bool force_lShpF) { + mshType mesh; + mesh.nFs = 1; + mesh.eType = c.type; + mesh.eNoN = c.eNoN; + mesh.nG = c.nG; + mesh.lShpF = force_lShpF; + mesh.w.resize(c.nG); + mesh.xi.resize(c.insd, c.nG); + mesh.N.resize(c.eNoN, c.nG); + mesh.Nx.resize(c.insd, c.eNoN, c.nG); + mesh.xib.resize(2, c.insd); + mesh.Nb.resize(2, c.eNoN); + + nn::get_gip(c.insd, c.quadrature_type, c.nG, mesh.w, mesh.xi); + for (int g = 0; g < c.nG; ++g) { + nn::get_gnn(c.insd, c.type, c.eNoN, g, mesh.xi, mesh.N, mesh.Nx); + } + nn::get_nn_bnds(c.insd, c.type, c.eNoN, mesh.xib, mesh.Nb); + return mesh; +} + +enum class PyramidFace { + Base, + South, + East, + North, + West +}; + +enum class PyramidEdge { + BaseSouth, + BaseEast, + BaseNorth, + BaseWest, + VerticalSW, + VerticalSE, + VerticalNE, + VerticalNW +}; + +struct LagrangeAccuracyCase { + ElementType type; + int order; + std::vector points; +}; + +std::size_t expected_lagrange_size(ElementType type, int order) { + switch (type) { + case ElementType::Point1: + return 1u; + case ElementType::Line2: + case ElementType::Line3: + return static_cast(order + 1); + case ElementType::Triangle3: + case ElementType::Triangle6: + return static_cast(order + 1) * static_cast(order + 2) / 2; + case ElementType::Quad4: + case ElementType::Quad9: + return static_cast(order + 1) * static_cast(order + 1); + case ElementType::Tetra4: + case ElementType::Tetra10: + return static_cast(order + 1) * + static_cast(order + 2) * + static_cast(order + 3) / 6; + case ElementType::Hex8: + case ElementType::Hex27: + return static_cast(order + 1) * + static_cast(order + 1) * + static_cast(order + 1); + case ElementType::Wedge6: + case ElementType::Wedge18: + return static_cast(order + 1) * + static_cast(order + 1) * + static_cast(order + 2) / 2; + case ElementType::Pyramid5: + case ElementType::Pyramid14: + return static_cast(order + 1) * + static_cast(order + 2) * + static_cast(2 * order + 3) / 6; + default: + return 0u; + } +} + +int expected_dimension(ElementType type) { + switch (type) { + case ElementType::Point1: + return 0; + case ElementType::Line2: + case ElementType::Line3: + return 1; + case ElementType::Triangle3: + case ElementType::Triangle6: + case ElementType::Quad4: + case ElementType::Quad9: + return 2; + default: + return 3; + } +} + +bool points_close(const Point& a, + const Point& b, + Real tol = Real(1e-12)) { + return std::abs(a[0] - b[0]) <= tol && + std::abs(a[1] - b[1]) <= tol && + std::abs(a[2] - b[2]) <= tol; +} + +std::vector reference_node_coords(ElementType type) { + switch (type) { + case ElementType::Line2: + return { + Point{Real(-1), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + }; + case ElementType::Line3: + return { + Point{Real(-1), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(0), Real(0)}, + }; + case ElementType::Triangle3: + return { + Point{Real(0), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + }; + case ElementType::Triangle6: + return { + Point{Real(0), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(0.5), Real(0), Real(0)}, + Point{Real(0.5), Real(0.5), Real(0)}, + Point{Real(0), Real(0.5), Real(0)}, + }; + case ElementType::Quad4: + return { + Point{Real(-1), Real(-1), Real(0)}, + Point{Real(1), Real(-1), Real(0)}, + Point{Real(1), Real(1), Real(0)}, + Point{Real(-1), Real(1), Real(0)}, + }; + case ElementType::Quad8: + return { + Point{Real(-1), Real(-1), Real(0)}, + Point{Real(1), Real(-1), Real(0)}, + Point{Real(1), Real(1), Real(0)}, + Point{Real(-1), Real(1), Real(0)}, + Point{Real(0), Real(-1), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(-1), Real(0), Real(0)}, + }; + case ElementType::Quad9: + return { + Point{Real(-1), Real(-1), Real(0)}, + Point{Real(1), Real(-1), Real(0)}, + Point{Real(1), Real(1), Real(0)}, + Point{Real(-1), Real(1), Real(0)}, + Point{Real(0), Real(-1), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(-1), Real(0), Real(0)}, + Point{Real(0), Real(0), Real(0)}, + }; + case ElementType::Tetra4: + return { + Point{Real(0), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(0), Real(0), Real(1)}, + }; + case ElementType::Tetra10: + return { + Point{Real(0), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(0), Real(0), Real(1)}, + Point{Real(0.5), Real(0), Real(0)}, + Point{Real(0.5), Real(0.5), Real(0)}, + Point{Real(0), Real(0.5), Real(0)}, + Point{Real(0), Real(0), Real(0.5)}, + Point{Real(0.5), Real(0), Real(0.5)}, + Point{Real(0), Real(0.5), Real(0.5)}, + }; + case ElementType::Hex8: + return { + Point{Real(-1), Real(-1), Real(-1)}, + Point{Real(1), Real(-1), Real(-1)}, + Point{Real(1), Real(1), Real(-1)}, + Point{Real(-1), Real(1), Real(-1)}, + Point{Real(-1), Real(-1), Real(1)}, + Point{Real(1), Real(-1), Real(1)}, + Point{Real(1), Real(1), Real(1)}, + Point{Real(-1), Real(1), Real(1)}, + }; + case ElementType::Hex20: + return { + Point{Real(-1), Real(-1), Real(-1)}, + Point{Real(1), Real(-1), Real(-1)}, + Point{Real(1), Real(1), Real(-1)}, + Point{Real(-1), Real(1), Real(-1)}, + Point{Real(-1), Real(-1), Real(1)}, + Point{Real(1), Real(-1), Real(1)}, + Point{Real(1), Real(1), Real(1)}, + Point{Real(-1), Real(1), Real(1)}, + Point{Real(0), Real(-1), Real(-1)}, + Point{Real(1), Real(0), Real(-1)}, + Point{Real(0), Real(1), Real(-1)}, + Point{Real(-1), Real(0), Real(-1)}, + Point{Real(0), Real(-1), Real(1)}, + Point{Real(1), Real(0), Real(1)}, + Point{Real(0), Real(1), Real(1)}, + Point{Real(-1), Real(0), Real(1)}, + Point{Real(-1), Real(-1), Real(0)}, + Point{Real(1), Real(-1), Real(0)}, + Point{Real(1), Real(1), Real(0)}, + Point{Real(-1), Real(1), Real(0)}, + }; + case ElementType::Hex27: + return { + Point{Real(-1), Real(-1), Real(-1)}, + Point{Real(1), Real(-1), Real(-1)}, + Point{Real(1), Real(1), Real(-1)}, + Point{Real(-1), Real(1), Real(-1)}, + Point{Real(-1), Real(-1), Real(1)}, + Point{Real(1), Real(-1), Real(1)}, + Point{Real(1), Real(1), Real(1)}, + Point{Real(-1), Real(1), Real(1)}, + Point{Real(0), Real(-1), Real(-1)}, + Point{Real(1), Real(0), Real(-1)}, + Point{Real(0), Real(1), Real(-1)}, + Point{Real(-1), Real(0), Real(-1)}, + Point{Real(0), Real(-1), Real(1)}, + Point{Real(1), Real(0), Real(1)}, + Point{Real(0), Real(1), Real(1)}, + Point{Real(-1), Real(0), Real(1)}, + Point{Real(-1), Real(-1), Real(0)}, + Point{Real(1), Real(-1), Real(0)}, + Point{Real(1), Real(1), Real(0)}, + Point{Real(-1), Real(1), Real(0)}, + Point{Real(0), Real(0), Real(-1)}, + Point{Real(0), Real(0), Real(1)}, + Point{Real(0), Real(-1), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(-1), Real(0), Real(0)}, + Point{Real(0), Real(0), Real(0)}, + }; + case ElementType::Wedge6: + return { + Point{Real(0), Real(0), Real(-1)}, + Point{Real(1), Real(0), Real(-1)}, + Point{Real(0), Real(1), Real(-1)}, + Point{Real(0), Real(0), Real(1)}, + Point{Real(1), Real(0), Real(1)}, + Point{Real(0), Real(1), Real(1)}, + }; + case ElementType::Wedge15: + return { + Point{Real(0), Real(0), Real(-1)}, + Point{Real(1), Real(0), Real(-1)}, + Point{Real(0), Real(1), Real(-1)}, + Point{Real(0), Real(0), Real(1)}, + Point{Real(1), Real(0), Real(1)}, + Point{Real(0), Real(1), Real(1)}, + Point{Real(0.5), Real(0), Real(-1)}, + Point{Real(0.5), Real(0.5), Real(-1)}, + Point{Real(0), Real(0.5), Real(-1)}, + Point{Real(0.5), Real(0), Real(1)}, + Point{Real(0.5), Real(0.5), Real(1)}, + Point{Real(0), Real(0.5), Real(1)}, + Point{Real(0), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + }; + case ElementType::Wedge18: + return { + Point{Real(0), Real(0), Real(-1)}, + Point{Real(1), Real(0), Real(-1)}, + Point{Real(0), Real(1), Real(-1)}, + Point{Real(0), Real(0), Real(1)}, + Point{Real(1), Real(0), Real(1)}, + Point{Real(0), Real(1), Real(1)}, + Point{Real(0.5), Real(0), Real(-1)}, + Point{Real(0.5), Real(0.5), Real(-1)}, + Point{Real(0), Real(0.5), Real(-1)}, + Point{Real(0.5), Real(0), Real(1)}, + Point{Real(0.5), Real(0.5), Real(1)}, + Point{Real(0), Real(0.5), Real(1)}, + Point{Real(0), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(0.5), Real(0), Real(0)}, + Point{Real(0.5), Real(0.5), Real(0)}, + Point{Real(0), Real(0.5), Real(0)}, + }; + case ElementType::Pyramid5: + return { + Point{Real(-1), Real(-1), Real(0)}, + Point{Real(1), Real(-1), Real(0)}, + Point{Real(1), Real(1), Real(0)}, + Point{Real(-1), Real(1), Real(0)}, + Point{Real(0), Real(0), Real(1)}, + }; + case ElementType::Pyramid13: + return { + Point{Real(-1), Real(-1), Real(0)}, + Point{Real(1), Real(-1), Real(0)}, + Point{Real(1), Real(1), Real(0)}, + Point{Real(-1), Real(1), Real(0)}, + Point{Real(0), Real(0), Real(1)}, + Point{Real(0), Real(-1), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(-1), Real(0), Real(0)}, + Point{Real(-0.5), Real(-0.5), Real(0.5)}, + Point{Real(0.5), Real(-0.5), Real(0.5)}, + Point{Real(0.5), Real(0.5), Real(0.5)}, + Point{Real(-0.5), Real(0.5), Real(0.5)}, + }; + case ElementType::Pyramid14: + return { + Point{Real(-1), Real(-1), Real(0)}, + Point{Real(1), Real(-1), Real(0)}, + Point{Real(1), Real(1), Real(0)}, + Point{Real(-1), Real(1), Real(0)}, + Point{Real(0), Real(0), Real(1)}, + Point{Real(0), Real(-1), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + Point{Real(-1), Real(0), Real(0)}, + Point{Real(-0.5), Real(-0.5), Real(0.5)}, + Point{Real(0.5), Real(-0.5), Real(0.5)}, + Point{Real(0.5), Real(0.5), Real(0.5)}, + Point{Real(-0.5), Real(0.5), Real(0.5)}, + Point{Real(0), Real(0), Real(0)}, + }; + default: + return {}; + } +} + +void expect_nodes_match_node_ordering(ElementType canonical_type, + int order, + ElementType node_ordering_type) { + LagrangeBasis basis(canonical_type, order); + const auto& nodes = basis.nodes(); + + ASSERT_EQ(nodes.size(), ReferenceNodeLayout::num_nodes(node_ordering_type)); + ASSERT_EQ(nodes.size(), basis.size()); + + for (std::size_t i = 0; i < nodes.size(); ++i) { + const auto expected = ReferenceNodeLayout::get_node_coords(node_ordering_type, i); + EXPECT_NEAR(nodes[i][0], expected[0], 1e-14); + EXPECT_NEAR(nodes[i][1], expected[1], 1e-14); + EXPECT_NEAR(nodes[i][2], expected[2], 1e-14); + + std::vector vals; + basis.evaluate_values(expected, vals); + ASSERT_EQ(vals.size(), nodes.size()); + for (std::size_t j = 0; j < vals.size(); ++j) { + const double expected_delta = (i == j) ? 1.0 : 0.0; + EXPECT_NEAR(vals[j], expected_delta, 1e-12); + } + } +} + +void expect_alias_matches_canonical(ElementType alias_type, + ElementType canonical_type, + int canonical_order, + const std::vector& points, + Real tol = Real(1e-12)) { + LagrangeBasis alias(alias_type, canonical_order); + LagrangeBasis canonical(canonical_type, canonical_order); + + ASSERT_EQ(alias.element_type(), canonical.element_type()); + ASSERT_EQ(alias.order(), canonical.order()); + ASSERT_EQ(alias.size(), canonical.size()); + ASSERT_EQ(alias.nodes().size(), canonical.nodes().size()); + + for (std::size_t i = 0; i < alias.nodes().size(); ++i) { + EXPECT_NEAR(alias.nodes()[i][0], canonical.nodes()[i][0], tol); + EXPECT_NEAR(alias.nodes()[i][1], canonical.nodes()[i][1], tol); + EXPECT_NEAR(alias.nodes()[i][2], canonical.nodes()[i][2], tol); + } + + for (const auto& xi : points) { + std::vector alias_values; + std::vector canonical_values; + std::vector alias_gradients; + std::vector canonical_gradients; + std::vector alias_hessians; + std::vector canonical_hessians; + + alias.evaluate_values(xi, alias_values); + canonical.evaluate_values(xi, canonical_values); + alias.evaluate_gradients(xi, alias_gradients); + canonical.evaluate_gradients(xi, canonical_gradients); + alias.evaluate_hessians(xi, alias_hessians); + canonical.evaluate_hessians(xi, canonical_hessians); + + ASSERT_EQ(alias_values.size(), canonical_values.size()); + ASSERT_EQ(alias_gradients.size(), canonical_gradients.size()); + ASSERT_EQ(alias_hessians.size(), canonical_hessians.size()); + + for (std::size_t i = 0; i < alias_values.size(); ++i) { + EXPECT_NEAR(alias_values[i], canonical_values[i], tol); + for (int d = 0; d < canonical.dimension(); ++d) { + const std::size_t sd = static_cast(d); + EXPECT_NEAR(alias_gradients[i][sd], canonical_gradients[i][sd], tol); + for (int e = 0; e < canonical.dimension(); ++e) { + const std::size_t se = static_cast(e); + EXPECT_NEAR(alias_hessians[i](sd, se), canonical_hessians[i](sd, se), Real(5) * tol); + } + } + } + } +} + +std::vector sample_points_for(ElementType type) { + switch (type) { + case ElementType::Line2: + case ElementType::Line3: + return { + Point{Real(-0.7), Real(0), Real(0)}, + Point{Real(0.1), Real(0), Real(0)}, + Point{Real(0.65), Real(0), Real(0)} + }; + case ElementType::Triangle3: + case ElementType::Triangle6: + return { + Point{Real(0.15), Real(0.2), Real(0)}, + Point{Real(0.25), Real(0.1), Real(0)}, + Point{Real(0.2), Real(0.3), Real(0)} + }; + case ElementType::Quad4: + case ElementType::Quad9: + return { + Point{Real(0.2), Real(-0.35), Real(0)}, + Point{Real(-0.4), Real(0.25), Real(0)}, + Point{Real(0.55), Real(0.1), Real(0)} + }; + case ElementType::Tetra4: + case ElementType::Tetra10: + return { + Point{Real(0.1), Real(0.2), Real(0.15)}, + Point{Real(0.2), Real(0.1), Real(0.25)}, + Point{Real(0.15), Real(0.15), Real(0.2)} + }; + case ElementType::Hex8: + case ElementType::Hex27: + return { + Point{Real(0.2), Real(-0.3), Real(0.25)}, + Point{Real(-0.5), Real(0.4), Real(-0.2)}, + Point{Real(0.1), Real(0.15), Real(0.6)} + }; + case ElementType::Wedge6: + case ElementType::Wedge18: + return { + Point{Real(0.2), Real(0.25), Real(0.0)}, + Point{Real(0.1), Real(0.2), Real(-0.45)}, + Point{Real(0.3), Real(0.15), Real(0.5)} + }; + case ElementType::Pyramid5: + case ElementType::Pyramid14: + return { + Point{Real(0.0), Real(0.0), Real(0.25)}, + Point{Real(0.15), Real(-0.1), Real(0.3)}, + Point{Real(-0.1), Real(0.2), Real(0.4)} + }; + default: + return {Point{Real(0), Real(0), Real(0)}}; + } +} + +std::vector boundary_stress_points_for(ElementType type); + +std::vector dense_sample_points_for(ElementType type) { + const auto interior = sample_points_for(type); + const auto boundary = boundary_stress_points_for(type); + + std::vector points; + points.reserve(interior.size() + boundary.size()); + points.insert(points.end(), interior.begin(), interior.end()); + points.insert(points.end(), boundary.begin(), boundary.end()); + + if (type == ElementType::Pyramid5 || type == ElementType::Pyramid14) { + points.push_back(Point{Real(0.0), Real(0.0), Real(0.85)}); + points.push_back(Point{Real(0.02), Real(-0.015), Real(0.95)}); + } + return points; +} + +std::vector boundary_stress_points_for(ElementType type) { + switch (type) { + case ElementType::Line2: + case ElementType::Line3: + return { + Point{Real(-0.999), Real(0), Real(0)}, + Point{Real(-0.75), Real(0), Real(0)}, + Point{Real(0.0), Real(0), Real(0)}, + Point{Real(0.8), Real(0), Real(0)}, + Point{Real(0.999), Real(0), Real(0)} + }; + case ElementType::Triangle3: + case ElementType::Triangle6: + return { + Point{Real(1e-6), Real(1e-6), Real(0)}, + Point{Real(0.98), Real(0.01), Real(0)}, + Point{Real(0.01), Real(0.98), Real(0)}, + Point{Real(0.25), Real(1e-4), Real(0)}, + Point{Real(0.49), Real(0.49), Real(0)} + }; + case ElementType::Quad4: + case ElementType::Quad9: + return { + Point{Real(-0.99), Real(-0.99), Real(0)}, + Point{Real(0.99), Real(-0.99), Real(0)}, + Point{Real(0.99), Real(0.99), Real(0)}, + Point{Real(-0.99), Real(0.99), Real(0)}, + Point{Real(0.0), Real(0.95), Real(0)} + }; + case ElementType::Tetra4: + case ElementType::Tetra10: + return { + Point{Real(1e-6), Real(1e-6), Real(1e-6)}, + Point{Real(0.97), Real(0.01), Real(0.01)}, + Point{Real(0.01), Real(0.97), Real(0.01)}, + Point{Real(0.01), Real(0.01), Real(0.97)}, + Point{Real(0.32), Real(0.33), Real(0.01)} + }; + case ElementType::Hex8: + case ElementType::Hex27: + return { + Point{Real(-0.99), Real(-0.99), Real(-0.99)}, + Point{Real(0.99), Real(-0.99), Real(0.99)}, + Point{Real(0.99), Real(0.99), Real(-0.99)}, + Point{Real(-0.99), Real(0.99), Real(0.99)}, + Point{Real(0.0), Real(0.0), Real(0.95)} + }; + case ElementType::Wedge6: + case ElementType::Wedge18: + return { + Point{Real(1e-6), Real(1e-6), Real(-0.99)}, + Point{Real(0.98), Real(0.01), Real(-0.99)}, + Point{Real(0.01), Real(0.98), Real(0.99)}, + Point{Real(0.49), Real(0.49), Real(0.0)}, + Point{Real(0.25), Real(1e-4), Real(0.95)} + }; + case ElementType::Pyramid5: + case ElementType::Pyramid14: + return { + Point{Real(0.0), Real(0.0), Real(0.95)}, + Point{Real(0.01), Real(-0.01), Real(0.98)}, + Point{Real(0.6), Real(-0.6), Real(0.2)}, + Point{Real(0.79), Real(0.0), Real(0.2)}, + Point{Real(0.0), Real(0.79), Real(0.2)} + }; + default: + return {Point{Real(0), Real(0), Real(0)}}; + } +} + +Real monomial_value(const Point& xi, int px, int py, int pz) { + return std::pow(xi[0], px) * std::pow(xi[1], py) * std::pow(xi[2], pz); +} + +void expect_gradients_match_finite_difference(const LagrangeAccuracyCase& c, + Real eps, + Real tol) { + LagrangeBasis basis(c.type, c.order); + + for (const auto& xi : c.points) { + std::vector gradients; + basis.evaluate_gradients(xi, gradients); + ASSERT_EQ(gradients.size(), basis.size()); + + for (int d = 0; d < basis.dimension(); ++d) { + Point xp = xi; + Point xm = xi; + xp[d] += eps; + xm[d] -= eps; + + std::vector values_p; + std::vector values_m; + basis.evaluate_values(xp, values_p); + basis.evaluate_values(xm, values_m); + + ASSERT_EQ(values_p.size(), basis.size()); + ASSERT_EQ(values_m.size(), basis.size()); + for (std::size_t i = 0; i < basis.size(); ++i) { + const Real fd = (values_p[i] - values_m[i]) / (Real(2) * eps); + EXPECT_NEAR(gradients[i][d], fd, tol) + << "type=" << static_cast(c.type) + << ", order=" << c.order + << ", dim=" << d + << ", basis_i=" << i + << ", xi=(" << xi[0] << "," << xi[1] << "," << xi[2] << ")"; + } + } + } +} + +void expect_polynomial_reproduction(const LagrangeAccuracyCase& c, + const std::vector>& exponents, + Real tol) { + LagrangeBasis basis(c.type, c.order); + const auto& nodes = basis.nodes(); + ASSERT_EQ(nodes.size(), basis.size()); + + for (const auto& exp : exponents) { + std::vector coeffs(basis.size(), Real(0)); + for (std::size_t i = 0; i < basis.size(); ++i) { + coeffs[i] = monomial_value(nodes[i], exp[0], exp[1], exp[2]); + } + + for (const auto& xi : c.points) { + std::vector values; + basis.evaluate_values(xi, values); + ASSERT_EQ(values.size(), basis.size()); + + Real interpolated = Real(0); + for (std::size_t i = 0; i < basis.size(); ++i) { + interpolated += coeffs[i] * values[i]; + } + + const Real exact = monomial_value(xi, exp[0], exp[1], exp[2]); + EXPECT_NEAR(interpolated, exact, tol) + << "type=" << static_cast(c.type) + << ", order=" << c.order + << ", monomial=(" << exp[0] << "," << exp[1] << "," << exp[2] << ")" + << ", xi=(" << xi[0] << "," << xi[1] << "," << xi[2] << ")"; + } + } +} + +template +void expect_all_finite(const Container& values) { + for (const auto& value : values) { + for (std::size_t d = 0; d < 3; ++d) { + EXPECT_TRUE(std::isfinite(value[d])); + } + } +} + +void expect_hessians_finite(const std::vector& hessians, + int dimension) { + for (const auto& H : hessians) { + for (int i = 0; i < dimension; ++i) { + for (int j = 0; j < dimension; ++j) { + EXPECT_TRUE(std::isfinite(H(static_cast(i), + static_cast(j)))); + } + } + } +} + +void expect_partition_gradient_hessian_sums(const LagrangeBasis& basis, + const std::vector& points, + Real value_tol, + Real derivative_tol) { + for (const auto& xi : points) { + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_values(xi, values); + basis.evaluate_gradients(xi, gradients); + basis.evaluate_hessians(xi, hessians); + + ASSERT_EQ(values.size(), basis.size()); + ASSERT_EQ(gradients.size(), basis.size()); + ASSERT_EQ(hessians.size(), basis.size()); + + Real value_sum = Real(0); + Gradient gradient_sum{}; + Hessian hessian_sum{}; + for (std::size_t i = 0; i < basis.size(); ++i) { + value_sum += values[i]; + for (int d = 0; d < basis.dimension(); ++d) { + const std::size_t sd = static_cast(d); + gradient_sum[sd] += gradients[i][sd]; + for (int e = 0; e < basis.dimension(); ++e) { + const std::size_t se = static_cast(e); + hessian_sum(sd, se) += hessians[i](sd, se); + } + } + } + + EXPECT_NEAR(value_sum, Real(1), value_tol) + << "Element type " << static_cast(basis.element_type()) + << ", order " << basis.order() + << ", xi=(" << xi[0] << "," << xi[1] << "," << xi[2] << ")"; + + for (int d = 0; d < basis.dimension(); ++d) { + const std::size_t sd = static_cast(d); + EXPECT_NEAR(gradient_sum[sd], Real(0), derivative_tol) + << "Gradient sum mismatch for element type " << static_cast(basis.element_type()) + << ", order " << basis.order() + << ", dim " << d; + for (int e = 0; e < basis.dimension(); ++e) { + const std::size_t se = static_cast(e); + EXPECT_NEAR(hessian_sum(sd, se), Real(0), derivative_tol) + << "Hessian sum mismatch for element type " << static_cast(basis.element_type()) + << ", order " << basis.order() + << ", component (" << d << "," << e << ")"; + } + } + } +} + +bool is_on_pyramid_face(const Point& point, + PyramidFace face, + Real tol = Real(1e-12)) { + const Real scale = Real(1) - point[2]; + switch (face) { + case PyramidFace::Base: + return std::abs(point[2]) <= tol; + case PyramidFace::South: + return std::abs(point[1] + scale) <= tol; + case PyramidFace::East: + return std::abs(point[0] - scale) <= tol; + case PyramidFace::North: + return std::abs(point[1] - scale) <= tol; + case PyramidFace::West: + return std::abs(point[0] + scale) <= tol; + } + return false; +} + +Point map_pyramid_face_to_reference(PyramidFace face, + const Point& point) { + const Real scale = Real(1) - point[2]; + switch (face) { + case PyramidFace::Base: + return Point{point[0], point[1], Real(0)}; + case PyramidFace::South: + return Point{(scale - point[0]) / Real(2), point[2], Real(0)}; + case PyramidFace::East: + return Point{(scale + point[1]) / Real(2), point[2], Real(0)}; + case PyramidFace::North: + return Point{(scale + point[0]) / Real(2), point[2], Real(0)}; + case PyramidFace::West: + return Point{(scale - point[1]) / Real(2), point[2], Real(0)}; + } + return Point{}; +} + +std::vector sample_points_for_pyramid_face(PyramidFace face) { + switch (face) { + case PyramidFace::Base: + return { + Point{Real(0.15), Real(-0.2), Real(0)}, + Point{Real(-0.55), Real(0.35), Real(0)} + }; + case PyramidFace::South: + return { + Point{Real(-0.2), Real(-0.8), Real(0.2)}, + Point{Real(0.05), Real(-0.35), Real(0.65)} + }; + case PyramidFace::East: + return { + Point{Real(0.8), Real(-0.25), Real(0.2)}, + Point{Real(0.3), Real(0.08), Real(0.7)} + }; + case PyramidFace::North: + return { + Point{Real(0.25), Real(0.8), Real(0.2)}, + Point{Real(-0.08), Real(0.35), Real(0.65)} + }; + case PyramidFace::West: + return { + Point{Real(-0.8), Real(0.2), Real(0.2)}, + Point{Real(-0.3), Real(-0.05), Real(0.7)} + }; + } + return {}; +} + +bool is_on_pyramid_edge(const Point& point, + PyramidEdge edge, + Real tol = Real(1e-12)) { + const Real scale = Real(1) - point[2]; + switch (edge) { + case PyramidEdge::BaseSouth: + return std::abs(point[2]) <= tol && std::abs(point[1] + Real(1)) <= tol; + case PyramidEdge::BaseEast: + return std::abs(point[2]) <= tol && std::abs(point[0] - Real(1)) <= tol; + case PyramidEdge::BaseNorth: + return std::abs(point[2]) <= tol && std::abs(point[1] - Real(1)) <= tol; + case PyramidEdge::BaseWest: + return std::abs(point[2]) <= tol && std::abs(point[0] + Real(1)) <= tol; + case PyramidEdge::VerticalSW: + return std::abs(point[0] + scale) <= tol && std::abs(point[1] + scale) <= tol; + case PyramidEdge::VerticalSE: + return std::abs(point[0] - scale) <= tol && std::abs(point[1] + scale) <= tol; + case PyramidEdge::VerticalNE: + return std::abs(point[0] - scale) <= tol && std::abs(point[1] - scale) <= tol; + case PyramidEdge::VerticalNW: + return std::abs(point[0] + scale) <= tol && std::abs(point[1] - scale) <= tol; + } + return false; +} + +Point map_pyramid_edge_to_reference(PyramidEdge edge, + const Point& point) { + switch (edge) { + case PyramidEdge::BaseSouth: + case PyramidEdge::BaseNorth: + return Point{point[0], Real(0), Real(0)}; + case PyramidEdge::BaseEast: + case PyramidEdge::BaseWest: + return Point{point[1], Real(0), Real(0)}; + case PyramidEdge::VerticalSW: + case PyramidEdge::VerticalSE: + case PyramidEdge::VerticalNE: + case PyramidEdge::VerticalNW: + return Point{Real(2) * point[2] - Real(1), Real(0), Real(0)}; + } + return Point{}; +} + +std::vector sample_points_for_pyramid_edge(PyramidEdge edge) { + switch (edge) { + case PyramidEdge::BaseSouth: + return {Point{Real(-0.65), Real(-1), Real(0)}, Point{Real(0.35), Real(-1), Real(0)}}; + case PyramidEdge::BaseEast: + return {Point{Real(1), Real(-0.45), Real(0)}, Point{Real(1), Real(0.55), Real(0)}}; + case PyramidEdge::BaseNorth: + return {Point{Real(-0.55), Real(1), Real(0)}, Point{Real(0.45), Real(1), Real(0)}}; + case PyramidEdge::BaseWest: + return {Point{Real(-1), Real(-0.55), Real(0)}, Point{Real(-1), Real(0.45), Real(0)}}; + case PyramidEdge::VerticalSW: + return {Point{Real(-0.75), Real(-0.75), Real(0.25)}, Point{Real(-0.3), Real(-0.3), Real(0.7)}}; + case PyramidEdge::VerticalSE: + return {Point{Real(0.75), Real(-0.75), Real(0.25)}, Point{Real(0.3), Real(-0.3), Real(0.7)}}; + case PyramidEdge::VerticalNE: + return {Point{Real(0.75), Real(0.75), Real(0.25)}, Point{Real(0.3), Real(0.3), Real(0.7)}}; + case PyramidEdge::VerticalNW: + return {Point{Real(-0.75), Real(0.75), Real(0.25)}, Point{Real(-0.3), Real(0.3), Real(0.7)}}; + } + return {}; +} + +std::vector map_pyramid_nodes_to_lower_basis_nodes( + const std::vector& pyramid_nodes, + const std::vector& lower_basis_nodes, + const std::function& selector, + const std::function& mapper) { + std::vector mapping(pyramid_nodes.size(), -1); + std::size_t face_count = 0; + for (std::size_t i = 0; i < pyramid_nodes.size(); ++i) { + if (!selector(pyramid_nodes[i])) { + continue; + } + + ++face_count; + const Point mapped = mapper(pyramid_nodes[i]); + bool found = false; + for (std::size_t j = 0; j < lower_basis_nodes.size(); ++j) { + if (points_close(mapped, lower_basis_nodes[j])) { + mapping[i] = static_cast(j); + found = true; + break; + } + } + EXPECT_TRUE(found) + << "Failed to match pyramid trace node at (" << pyramid_nodes[i][0] << "," + << pyramid_nodes[i][1] << "," << pyramid_nodes[i][2] << ")"; + } + + EXPECT_EQ(face_count, lower_basis_nodes.size()); + return mapping; +} + +void expect_pyramid_face_trace_matches_lower_basis(int order, + PyramidFace face, + Real tol = Real(2e-10)) { + LagrangeBasis pyramid(ElementType::Pyramid5, order); + const bool base_face = face == PyramidFace::Base; + LagrangeBasis lower(base_face ? ElementType::Quad4 : ElementType::Triangle3, order); + + const auto mapping = map_pyramid_nodes_to_lower_basis_nodes( + pyramid.nodes(), + lower.nodes(), + [&](const Point& point) { return is_on_pyramid_face(point, face); }, + [&](const Point& point) { return map_pyramid_face_to_reference(face, point); }); + + for (const auto& face_point : sample_points_for_pyramid_face(face)) { + std::vector pyramid_values; + std::vector lower_values; + pyramid.evaluate_values(face_point, pyramid_values); + lower.evaluate_values(map_pyramid_face_to_reference(face, face_point), lower_values); + + ASSERT_EQ(pyramid_values.size(), pyramid.size()); + ASSERT_EQ(lower_values.size(), lower.size()); + + for (std::size_t i = 0; i < pyramid.size(); ++i) { + if (mapping[i] >= 0) { + EXPECT_NEAR(pyramid_values[i], lower_values[static_cast(mapping[i])], tol) + << "Face trace mismatch for order " << order + << ", face " << static_cast(face) + << ", basis " << i; + } else { + EXPECT_NEAR(pyramid_values[i], Real(0), tol) + << "Off-face pyramid basis should vanish on face for order " << order + << ", face " << static_cast(face) + << ", basis " << i; + } + } + } +} + +void expect_pyramid_edge_trace_matches_line_basis(int order, + PyramidEdge edge, + Real tol = Real(2e-10)) { + LagrangeBasis pyramid(ElementType::Pyramid5, order); + LagrangeBasis line(ElementType::Line2, order); + + const auto mapping = map_pyramid_nodes_to_lower_basis_nodes( + pyramid.nodes(), + line.nodes(), + [&](const Point& point) { return is_on_pyramid_edge(point, edge); }, + [&](const Point& point) { return map_pyramid_edge_to_reference(edge, point); }); + + for (const auto& edge_point : sample_points_for_pyramid_edge(edge)) { + std::vector pyramid_values; + std::vector line_values; + pyramid.evaluate_values(edge_point, pyramid_values); + line.evaluate_values(map_pyramid_edge_to_reference(edge, edge_point), line_values); + + ASSERT_EQ(pyramid_values.size(), pyramid.size()); + ASSERT_EQ(line_values.size(), line.size()); + + for (std::size_t i = 0; i < pyramid.size(); ++i) { + if (mapping[i] >= 0) { + EXPECT_NEAR(pyramid_values[i], line_values[static_cast(mapping[i])], tol) + << "Edge trace mismatch for order " << order + << ", edge " << static_cast(edge) + << ", basis " << i; + } else { + EXPECT_NEAR(pyramid_values[i], Real(0), tol) + << "Off-edge pyramid basis should vanish on edge for order " << order + << ", edge " << static_cast(edge) + << ", basis " << i; + } + } + } +} + +struct StridedOutputRequest { + bool values; + bool gradients; + bool hessians; +}; + +void expect_strided_matches_pointwise(ElementType type, + int order, + const StridedOutputRequest& request) { + LagrangeBasis basis(type, order); + const auto points = dense_sample_points_for(type); + const std::size_t stride = points.size() + 3u; + constexpr Real sentinel = Real(-12345.25); + + std::vector values(request.values ? basis.size() * stride : 0u, sentinel); + std::vector gradients(request.gradients ? basis.size() * 3u * stride : 0u, sentinel); + std::vector hessians(request.hessians ? basis.size() * 9u * stride : 0u, sentinel); + + basis.evaluate_at_quadrature_points_strided( + points, + stride, + request.values ? values.data() : nullptr, + request.gradients ? gradients.data() : nullptr, + request.hessians ? hessians.data() : nullptr); + + const Real tol = (type == ElementType::Pyramid5 || type == ElementType::Pyramid14) + ? Real(5e-10) + : Real(1e-12); + + for (std::size_t q = 0; q < points.size(); ++q) { + if (request.values) { + std::vector expected; + basis.evaluate_values(points[q], expected); + ASSERT_EQ(expected.size(), basis.size()); + for (std::size_t d = 0; d < basis.size(); ++d) { + EXPECT_NEAR(values[d * stride + q], expected[d], tol) + << "type=" << static_cast(type) + << ", order=" << order + << ", dof=" << d + << ", q=" << q; + } + } + + if (request.gradients) { + std::vector expected; + basis.evaluate_gradients(points[q], expected); + ASSERT_EQ(expected.size(), basis.size()); + for (std::size_t d = 0; d < basis.size(); ++d) { + for (std::size_t c = 0; c < 3u; ++c) { + EXPECT_NEAR(gradients[(d * 3u + c) * stride + q], expected[d][c], tol) + << "type=" << static_cast(type) + << ", order=" << order + << ", dof=" << d + << ", component=" << c + << ", q=" << q; + } + } + } + + if (request.hessians) { + std::vector expected; + basis.evaluate_hessians(points[q], expected); + ASSERT_EQ(expected.size(), basis.size()); + for (std::size_t d = 0; d < basis.size(); ++d) { + for (std::size_t r = 0; r < 3u; ++r) { + for (std::size_t c = 0; c < 3u; ++c) { + EXPECT_NEAR(hessians[(d * 9u + r * 3u + c) * stride + q], + expected[d](r, c), + Real(4) * tol) + << "type=" << static_cast(type) + << ", order=" << order + << ", dof=" << d + << ", hessian=(" << r << "," << c << ")" + << ", q=" << q; + } + } + } + } + } + + const auto expect_padding_untouched = [&](const std::vector& buffer, + std::size_t rows) { + for (std::size_t row = 0; row < rows; ++row) { + for (std::size_t q = points.size(); q < stride; ++q) { + EXPECT_EQ(buffer[row * stride + q], sentinel) + << "type=" << static_cast(type) + << ", order=" << order + << ", row=" << row + << ", padding q=" << q; + } + } + }; + + if (request.values) { + expect_padding_untouched(values, basis.size()); + } + if (request.gradients) { + expect_padding_untouched(gradients, basis.size() * 3u); + } + if (request.hessians) { + expect_padding_untouched(hessians, basis.size() * 9u); + } +} + +void expect_raw_to_matches_vector_evaluation(ElementType type, int order) { + LagrangeBasis basis(type, order); + const Real tol = (type == ElementType::Pyramid5 || type == ElementType::Pyramid14) + ? Real(5e-10) + : Real(1e-12); + + for (const auto& point : sample_points_for(type)) { + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_all(point, values, gradients, hessians); + + std::vector raw_values(basis.size()); + std::vector raw_gradients(basis.size() * 3u); + std::vector raw_hessians(basis.size() * 9u); + basis.evaluate_values_to(point, raw_values.data()); + basis.evaluate_gradients_to(point, raw_gradients.data()); + basis.evaluate_hessians_to(point, raw_hessians.data()); + + for (std::size_t i = 0; i < basis.size(); ++i) { + EXPECT_NEAR(raw_values[i], values[i], tol) + << "type=" << static_cast(type) << ", order=" << order << ", dof=" << i; + for (std::size_t c = 0; c < 3u; ++c) { + EXPECT_NEAR(raw_gradients[i * 3u + c], gradients[i][c], tol) + << "type=" << static_cast(type) + << ", order=" << order + << ", dof=" << i + << ", gradient component=" << c; + } + for (std::size_t r = 0; r < 3u; ++r) { + for (std::size_t c = 0; c < 3u; ++c) { + EXPECT_NEAR(raw_hessians[i * 9u + r * 3u + c], hessians[i](r, c), Real(4) * tol) + << "type=" << static_cast(type) + << ", order=" << order + << ", dof=" << i + << ", hessian=(" << r << "," << c << ")"; + } + } + } + } +} + +} // namespace + +TEST(SolverBasisAdapter, ShapeValuesGradientsAndMeshOverloadMatchLegacy) { + constexpr double tol = 2e-12; + + for (const auto& c : solver_basis_adapter_cases()) { + SCOPED_TRACE("element=" + std::to_string(static_cast(c.type))); + Vector weights; + Array xi; + fill_legacy_quadrature(c, weights, xi); + + Array legacy_N(c.eNoN, c.nG); + Array adapter_N(c.eNoN, c.nG); + Array3 legacy_Nx(c.insd, c.eNoN, c.nG); + Array3 adapter_Nx(c.insd, c.eNoN, c.nG); + auto legacy_shape = legacy_solver_nn::get_element_shape_data.find(c.type); + + faceType legacy_face; + if (legacy_shape == legacy_solver_nn::get_element_shape_data.end()) { + ASSERT_EQ(c.type, consts::ElementType::QUD8); + legacy_face.eType = c.type; + legacy_face.eNoN = c.eNoN; + legacy_face.nG = c.nG; + legacy_face.xi = xi; + legacy_face.N.resize(c.eNoN, c.nG); + legacy_face.Nx.resize(c.insd, c.eNoN, c.nG); + } + + for (int g = 0; g < c.nG; ++g) { + if (legacy_shape != legacy_solver_nn::get_element_shape_data.end()) { + legacy_shape->second(c.insd, c.eNoN, g, xi, legacy_N, legacy_Nx); + } else { + legacy_solver_nn::set_face_shape_data.at(c.type)(g, legacy_face); + } + nn::get_gnn(c.insd, c.type, c.eNoN, g, xi, adapter_N, adapter_Nx); + + double partition = 0.0; + std::array gradient_sum{0.0, 0.0, 0.0}; + for (int a = 0; a < c.eNoN; ++a) { + partition += adapter_N(a, g); + for (int d = 0; d < c.insd; ++d) { + gradient_sum[static_cast(d)] += adapter_Nx(d, a, g); + } + } + + EXPECT_NEAR(partition, 1.0, tol) + << "element=" << static_cast(c.type) << ", g=" << g; + for (int d = 0; d < c.insd; ++d) { + EXPECT_NEAR(gradient_sum[static_cast(d)], 0.0, tol) + << "element=" << static_cast(c.type) << ", d=" << d << ", g=" << g; + } + } + + if (legacy_shape == legacy_solver_nn::get_element_shape_data.end()) { + legacy_N = legacy_face.N; + legacy_Nx = legacy_face.Nx; + } + + expect_arrays_near(adapter_N, legacy_N, tol); + expect_array3_near(adapter_Nx, legacy_Nx, tol); + + mshType mesh; + mesh.eType = c.type; + mesh.eNoN = c.eNoN; + mesh.nG = c.nG; + mesh.xi = xi; + mesh.N.resize(c.eNoN, c.nG); + mesh.Nx.resize(c.insd, c.eNoN, c.nG); + for (int g = 0; g < c.nG; ++g) { + nn::get_gnn(g, mesh); + } + + expect_arrays_near(mesh.N, legacy_N, tol); + expect_array3_near(mesh.Nx, legacy_Nx, tol); + } +} + +TEST(SolverFaceBasisAdapter, ShapeValuesGradientsAndDispatchMatchLegacyFaceTable) { + constexpr double tol = 2e-12; + + int covered = 0; + for (const auto& c : solver_face_basis_adapter_cases()) { + SCOPED_TRACE("face element=" + std::to_string(static_cast(c.type))); + + faceType legacy_face = initialized_face_for_case(c); + faceType basis_face = initialized_face_for_case(c); + + for (int g = 0; g < c.nG; ++g) { + legacy_solver_nn::set_face_shape_data.at(c.type)(g, legacy_face); + nn::get_gnn(nullptr, g, basis_face); + expect_face_partition_identities(c, basis_face, g, tol); + } + + expect_arrays_near(basis_face.N, legacy_face.N, tol); + expect_array3_near(basis_face.Nx, legacy_face.Nx, tol); + ++covered; + } + + EXPECT_EQ(covered, 7); +} + +TEST(SolverFaceBasisAdapter, MappedFacesFailClosedWithoutLegacyFallback) { + using consts::ElementType; + + SolverBasisAdapterCase c{ElementType::LIN1, ElementType::LIN1, 1, 3, 2}; + faceType face = initialized_face_for_case(c); + + try { + nn::get_gnn(nullptr, 0, face); + FAIL() << "Expected mapped face dispatch to reject mismatched eNoN"; + } catch (const svmp::FE::basis::BasisEvaluationException& exception) { + const std::string message = exception.what(); + EXPECT_NE(message.find("legacy fallback was not attempted"), std::string::npos) + << message; + } +} + +TEST(SolverFaceBasisAdapter, PointFaceRemainsLegacyValuePath) { + faceType face; + face.eType = consts::ElementType::PNT; + face.eNoN = 1; + face.nG = 1; + face.N.resize(1, 1); + face.Nx.resize(1, 1, 1); + face.N(0, 0) = -7.0; + face.Nx(0, 0, 0) = 42.0; + + nn::get_gnn(nullptr, 0, face); + + EXPECT_DOUBLE_EQ(face.N(0, 0), 1.0); + EXPECT_DOUBLE_EQ(face.Nx(0, 0, 0), 42.0); +} + +TEST(SolverFaceBasisAdapter, UnsupportedFacesThrowClearErrors) { + faceType nrb_face; + nrb_face.eType = consts::ElementType::NRB; + nrb_face.eNoN = 1; + nrb_face.nG = 1; + nrb_face.N.resize(1, 1); + nrb_face.Nx.resize(1, 1, 1); + EXPECT_THROW(nn::get_gnn(nullptr, 0, nrb_face), svmp::FE::NotImplementedException); + + faceType unknown_face; + unknown_face.eType = consts::ElementType::NA; + unknown_face.eNoN = 1; + unknown_face.nG = 1; + unknown_face.N.resize(1, 1); + unknown_face.Nx.resize(1, 1, 1); + EXPECT_THROW(nn::get_gnn(nullptr, 0, unknown_face), svmp::FE::InvalidElementException); +} + +TEST(SolverBasisAdapter, QuadraturePathsRemainLegacyCompatible) { + constexpr double tol = 0.0; + + for (const auto& c : solver_basis_adapter_cases()) { + auto mesh_it = legacy_solver_nn::set_element_gauss_int_data.find(c.type); + if (mesh_it != legacy_solver_nn::set_element_gauss_int_data.end()) { + mshType legacy_mesh; + legacy_mesh.eType = c.type; + legacy_mesh.eNoN = c.eNoN; + legacy_mesh.nG = c.nG; + legacy_mesh.w.resize(c.nG); + legacy_mesh.xi.resize(c.insd, c.nG); + mesh_it->second(legacy_mesh); + + mshType adapter_mesh; + adapter_mesh.eType = c.type; + adapter_mesh.eNoN = c.eNoN; + adapter_mesh.nG = c.nG; + adapter_mesh.w.resize(c.nG); + adapter_mesh.xi.resize(c.insd, c.nG); + nn::get_gip(adapter_mesh); + + expect_vectors_near(adapter_mesh.w, legacy_mesh.w, tol); + expect_arrays_near(adapter_mesh.xi, legacy_mesh.xi, tol); + } + + auto scalar_it = legacy_solver_nn::get_element_gauss_int_data.find(c.type); + if (scalar_it != legacy_solver_nn::get_element_gauss_int_data.end()) { + Vector legacy_w(c.nG); + Vector adapter_w(c.nG); + Array legacy_xi(c.insd, c.nG); + Array adapter_xi(c.insd, c.nG); + + scalar_it->second(c.insd, c.nG, legacy_w, legacy_xi); + nn::get_gip(c.insd, c.type, c.nG, adapter_w, adapter_xi); + + expect_vectors_near(adapter_w, legacy_w, tol); + expect_arrays_near(adapter_xi, legacy_xi, tol); + } + } + + mshType legacy_tet; + legacy_tet.eType = consts::ElementType::TET4; + legacy_tet.eNoN = 4; + legacy_tet.nG = 4; + legacy_tet.qmTET4 = 0.25; + legacy_tet.w.resize(4); + legacy_tet.xi.resize(3, 4); + legacy_solver_nn::set_element_gauss_int_data.at(consts::ElementType::TET4)(legacy_tet); + + mshType adapter_tet; + adapter_tet.eType = consts::ElementType::TET4; + adapter_tet.eNoN = 4; + adapter_tet.nG = 4; + adapter_tet.qmTET4 = 0.25; + adapter_tet.w.resize(4); + adapter_tet.xi.resize(3, 4); + nn::get_gip(adapter_tet); + + expect_vectors_near(adapter_tet.w, legacy_tet.w, tol); + expect_arrays_near(adapter_tet.xi, legacy_tet.xi, tol); +} + +TEST(SolverBasisAdapter, HessiansCoverEveryMappedScalarVolumeElement) { + constexpr double partition_tol = 2e-10; + constexpr double finite_difference_tol = 2e-5; + constexpr double zero_tol = 2e-12; + + int covered = 0; + for (const auto& c : solver_hessian_adapter_cases()) { + SCOPED_TRACE("element=" + std::to_string(static_cast(c.type))); + Vector weights; + Array xi; + fill_legacy_quadrature(c, weights, xi); + + const int ind2 = packed_hessian_components(c.insd); + Array3 adapter_Nxx(ind2, c.eNoN, c.nG); + fill_array3(adapter_Nxx, std::numeric_limits::quiet_NaN()); + + for (int g = 0; g < c.nG; ++g) { + nn::get_gn_nxx(c.insd, ind2, c.type, c.eNoN, g, xi, adapter_Nxx); + expect_partition_hessian_identity(c, adapter_Nxx, g, partition_tol); + expect_solver_hessian_matches_gradient_finite_difference( + c, xi, g, adapter_Nxx, finite_difference_tol); + + if (c.type == consts::ElementType::LIN1 || + c.type == consts::ElementType::TRI3 || + c.type == consts::ElementType::TET4) { + expect_all_hessians_zero(c, adapter_Nxx, g, zero_tol); + } + } + + if (c.type == consts::ElementType::QUD4) { + EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 2, zero_tol)); + } else if (c.type == consts::ElementType::HEX8) { + EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 3, zero_tol)); + EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 4, zero_tol)); + EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 5, zero_tol)); + } else if (c.type == consts::ElementType::WDG) { + EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 5, zero_tol)); + } + ++covered; + } + + EXPECT_EQ(covered, 13); +} + +TEST(SolverBasisAdapter, HessianPackingMatchesLegacyWhereLegacyIsApproved) { + constexpr double tol = 2e-12; + + for (const auto& c : solver_legacy_hessian_parity_cases()) { + Vector weights; + Array xi; + fill_legacy_quadrature(c, weights, xi); + + const int ind2 = packed_hessian_components(c.insd); + Array3 legacy_Nxx(ind2, c.eNoN, c.nG); + Array3 adapter_Nxx(ind2, c.eNoN, c.nG); + + for (int g = 0; g < c.nG; ++g) { + legacy_solver_nn::get_element_2nd_derivs.at(c.type)( + c.insd, ind2, c.eNoN, g, xi, legacy_Nxx); + nn::get_gn_nxx(c.insd, ind2, c.type, c.eNoN, g, xi, adapter_Nxx); + } + + expect_array3_near(adapter_Nxx, legacy_Nxx, tol); + } +} + +TEST(SolverBasisAdapter, Qud8HessiansDoNotUseLegacyFallback) { + using consts::ElementType; + SolverBasisAdapterCase c{ElementType::QUD8, ElementType::QUD9, 2, 8, 9}; + + Vector weights; + Array xi; + fill_legacy_quadrature(c, weights, xi); + + const int ind2 = packed_hessian_components(c.insd); + Array3 legacy_Nxx(ind2, c.eNoN, c.nG); + Array3 adapter_Nxx(ind2, c.eNoN, c.nG); + fill_array3(legacy_Nxx, 0.0); + fill_array3(adapter_Nxx, 0.0); + + for (int g = 0; g < c.nG; ++g) { + legacy_solver_nn::get_element_2nd_derivs.at(c.type)( + c.insd, ind2, c.eNoN, g, xi, legacy_Nxx); + nn::get_gn_nxx(c.insd, ind2, c.type, c.eNoN, g, xi, adapter_Nxx); + } + + double max_abs_difference = 0.0; + for (int g = 0; g < c.nG; ++g) { + for (int a = 0; a < c.eNoN; ++a) { + for (int row = 0; row < ind2; ++row) { + max_abs_difference = std::max( + max_abs_difference, + std::abs(adapter_Nxx(row, a, g) - legacy_Nxx(row, a, g))); + } + } + } + + EXPECT_GT(max_abs_difference, 1e-8); +} + +TEST(SolverBasisAdapter, UnsupportedHessianFamiliesRemainNoOp) { + Array xi(1, 1); + xi(0, 0) = 0.0; + Array3 Nxx(1, 1, 1); + + for (const auto unsupported : {consts::ElementType::NRB, consts::ElementType::PNT}) { + fill_array3(Nxx, 42.0); + nn::get_gn_nxx(1, 1, unsupported, 1, 0, xi, Nxx); + EXPECT_DOUBLE_EQ(Nxx(0, 0, 0), 42.0) + << "element=" << static_cast(unsupported); + } +} + +TEST(SolverBasisAdapter, InitFsMshPopulatesMappedHessiansWithoutLShpFGate) { + using consts::ElementType; + const SolverBasisAdapterCase cases[] = { + {ElementType::QUD4, ElementType::QUD4, 2, 4, 4}, + {ElementType::HEX8, ElementType::HEX8, 3, 8, 8}, + {ElementType::HEX20, ElementType::HEX20, 3, 20, 27}, + {ElementType::HEX27, ElementType::HEX27, 3, 27, 27}, + {ElementType::WDG, ElementType::WDG, 3, 6, 6}, + }; + + for (const auto& c : cases) { + SCOPED_TRACE("element=" + std::to_string(static_cast(c.type))); + ComMod com_mod; + com_mod.nsd = c.insd; + mshType mesh = initialized_mesh_for_case(c, true); + + fs::init_fs_msh(com_mod, mesh); + + ASSERT_EQ(mesh.fs.size(), 1u); + ASSERT_EQ(mesh.fs[0].Nxx.nrows(), packed_hessian_components(c.insd)); + if (c.type == ElementType::QUD4) { + EXPECT_TRUE(array3_has_nonzero_component(mesh.fs[0].Nxx, 2, 2e-12)); + } else if (c.type == ElementType::HEX8) { + EXPECT_TRUE(array3_has_nonzero_component(mesh.fs[0].Nxx, 3, 2e-12)); + } else if (c.type == ElementType::WDG) { + EXPECT_TRUE(array3_has_nonzero_component(mesh.fs[0].Nxx, 5, 2e-12)); + } else { + bool has_nonzero = false; + for (int row = 0; row < mesh.fs[0].Nxx.nrows(); ++row) { + has_nonzero = has_nonzero || + array3_has_nonzero_component(mesh.fs[0].Nxx, row, 2e-12); + } + EXPECT_TRUE(has_nonzero); + } + } +} + +TEST(LagrangeBasis, QuadPartitionOfUnity) { + LagrangeBasis basis(ElementType::Quad4, 1); + svmp::FE::math::Vector xi{0.2, -0.3, 0.0}; + + std::vector values; + basis.evaluate_values(xi, values); + + double sum = std::accumulate(values.begin(), values.end(), 0.0); + EXPECT_NEAR(sum, 1.0, 1e-12); +} + +TEST(LagrangeBasis, LineGradientLinear) { + LagrangeBasis basis(ElementType::Line2, 1); + svmp::FE::math::Vector xi{0.0, 0.0, 0.0}; + std::vector grad; + basis.evaluate_gradients(xi, grad); + + ASSERT_EQ(grad.size(), 2u); + EXPECT_NEAR(grad[0][0], -0.5, 1e-12); + EXPECT_NEAR(grad[1][0], 0.5, 1e-12); +} + +TEST(LagrangeBasis, TrianglePartitionOfUnity) { + LagrangeBasis basis(ElementType::Triangle3, 1); + svmp::FE::math::Vector xi{0.2, 0.3, 0.0}; + std::vector values; + basis.evaluate_values(xi, values); + + double sum = std::accumulate(values.begin(), values.end(), 0.0); + EXPECT_NEAR(sum, 1.0, 1e-12); +} + +TEST(LagrangeBasis, SizeFormulasPerElement) { + for (int order = 0; order <= 3; ++order) { + { + LagrangeBasis line(ElementType::Line2, order); + EXPECT_EQ(line.size(), static_cast(order + 1)); + } + { + LagrangeBasis quad(ElementType::Quad4, order); + const std::size_t n1d = static_cast(order + 1); + EXPECT_EQ(quad.size(), n1d * n1d); + } + { + LagrangeBasis hex(ElementType::Hex8, order); + const std::size_t n1d = static_cast(order + 1); + EXPECT_EQ(hex.size(), n1d * n1d * n1d); + } + { + LagrangeBasis tri(ElementType::Triangle3, order); + const std::size_t expected = + static_cast(order + 1) * + static_cast(order + 2) / 2; + EXPECT_EQ(tri.size(), expected); + } + { + LagrangeBasis tet(ElementType::Tetra4, order); + const std::size_t expected = + static_cast(order + 1) * + static_cast(order + 2) * + static_cast(order + 3) / 6; + EXPECT_EQ(tet.size(), expected); + } + } +} + +TEST(LagrangeBasis, KroneckerDeltaAtNodes) { + const std::vector> cases = { + {ElementType::Line2, 1}, + {ElementType::Quad4, 1}, + {ElementType::Triangle3, 1}, + {ElementType::Tetra4, 1}, + {ElementType::Hex8, 1}, + {ElementType::Triangle3, 2}, + {ElementType::Tetra4, 2}, + {ElementType::Quad4, 2}, + {ElementType::Hex8, 2}, + {ElementType::Wedge6, 2} + }; + + for (const auto& c : cases) { + LagrangeBasis basis(c.first, c.second); + const auto& nodes = basis.nodes(); + ASSERT_EQ(nodes.size(), basis.size()); + + for (std::size_t i = 0; i < nodes.size(); ++i) { + std::vector vals; + basis.evaluate_values(nodes[i], vals); + ASSERT_EQ(vals.size(), nodes.size()); + for (std::size_t j = 0; j < nodes.size(); ++j) { + if (i == j) { + EXPECT_NEAR(vals[j], 1.0, 1e-12); + } else { + EXPECT_NEAR(vals[j], 0.0, 1e-12); + } + } + } + } +} + +TEST(LagrangeBasis, MatchesNodeOrderingConventionsForLinearAndQuadratic) { + // Tensor-product elements + expect_nodes_match_node_ordering(ElementType::Line2, 1, ElementType::Line2); + expect_nodes_match_node_ordering(ElementType::Line2, 2, ElementType::Line3); + expect_nodes_match_node_ordering(ElementType::Quad4, 1, ElementType::Quad4); + expect_nodes_match_node_ordering(ElementType::Quad4, 2, ElementType::Quad9); + expect_nodes_match_node_ordering(ElementType::Hex8, 1, ElementType::Hex8); + expect_nodes_match_node_ordering(ElementType::Hex8, 2, ElementType::Hex27); + + // Simplex elements + expect_nodes_match_node_ordering(ElementType::Triangle3, 1, ElementType::Triangle3); + expect_nodes_match_node_ordering(ElementType::Triangle3, 2, ElementType::Triangle6); + expect_nodes_match_node_ordering(ElementType::Tetra4, 1, ElementType::Tetra4); + expect_nodes_match_node_ordering(ElementType::Tetra4, 2, ElementType::Tetra10); + + // Mixed topology + expect_nodes_match_node_ordering(ElementType::Wedge6, 1, ElementType::Wedge6); + expect_nodes_match_node_ordering(ElementType::Wedge6, 2, ElementType::Wedge18); + + // Pyramid + expect_nodes_match_node_ordering(ElementType::Pyramid5, 1, ElementType::Pyramid5); + expect_nodes_match_node_ordering(ElementType::Pyramid14, 2, ElementType::Pyramid14); +} + +TEST(LagrangeBasis, WedgeAndPyramidPartitionOfUnity) { + { + LagrangeBasis wedge(ElementType::Wedge6, 1); + svmp::FE::math::Vector xi{Real(0.2), Real(0.1), Real(0.3)}; + std::vector vals; + wedge.evaluate_values(xi, vals); + const double sum = std::accumulate(vals.begin(), vals.end(), 0.0); + EXPECT_NEAR(sum, 1.0, 1e-12); + } + + { + LagrangeBasis wedge_q(ElementType::Wedge18, 2); + svmp::FE::math::Vector xi{Real(0.2), Real(0.1), Real(-0.25)}; + std::vector vals; + wedge_q.evaluate_values(xi, vals); + const double sum = std::accumulate(vals.begin(), vals.end(), 0.0); + EXPECT_NEAR(sum, 1.0, 1e-12); + + // Wedge18 should report 18 nodes in ReferenceNodeLayout + EXPECT_EQ(ReferenceNodeLayout::num_nodes(ElementType::Wedge18), 18u); + // Corner nodes should match Wedge6 vertices + auto v0 = ReferenceNodeLayout::get_node_coords(ElementType::Wedge18, 0); + auto v1 = ReferenceNodeLayout::get_node_coords(ElementType::Wedge18, 1); + auto v2 = ReferenceNodeLayout::get_node_coords(ElementType::Wedge18, 2); + EXPECT_NEAR(v0[0], Real(0), 1e-14); + EXPECT_NEAR(v0[1], Real(0), 1e-14); + EXPECT_NEAR(v0[2], Real(-1), 1e-14); + EXPECT_NEAR(v1[0], Real(1), 1e-14); + EXPECT_NEAR(v1[1], Real(0), 1e-14); + EXPECT_NEAR(v1[2], Real(-1), 1e-14); + EXPECT_NEAR(v2[0], Real(0), 1e-14); + EXPECT_NEAR(v2[1], Real(1), 1e-14); + EXPECT_NEAR(v2[2], Real(-1), 1e-14); + } + + { + LagrangeBasis pyr(ElementType::Pyramid5, 1); + svmp::FE::math::Vector xi{Real(0.1), Real(-0.2), Real(0.4)}; + std::vector vals; + pyr.evaluate_values(xi, vals); + const double sum = std::accumulate(vals.begin(), vals.end(), 0.0); + EXPECT_NEAR(sum, 1.0, 1e-12); + } +} + +TEST(LagrangeBasis, NonTensorStridedEvaluationMatchesPointwise) { + const std::vector> cases = { + {ElementType::Triangle3, 3}, + {ElementType::Tetra4, 3}, + {ElementType::Wedge6, 3}, + {ElementType::Pyramid5, 3}, + }; + const std::vector requests = { + {true, false, false}, + {false, true, false}, + {false, false, true}, + {true, true, false}, + {true, false, true}, + {false, true, true}, + {true, true, true}, + }; + + for (const auto& [type, order] : cases) { + for (const auto& request : requests) { + SCOPED_TRACE(static_cast(type)); + SCOPED_TRACE(order); + SCOPED_TRACE(request.values ? "values" : "no values"); + SCOPED_TRACE(request.gradients ? "gradients" : "no gradients"); + SCOPED_TRACE(request.hessians ? "hessians" : "no hessians"); + expect_strided_matches_pointwise(type, order, request); + } + } +} + +TEST(LagrangeBasis, RawOutputSinksMatchVectorEvaluationAcrossTopologies) { + const std::vector> cases = { + {ElementType::Line2, 4}, + {ElementType::Quad4, 3}, + {ElementType::Hex8, 3}, + {ElementType::Triangle3, 4}, + {ElementType::Tetra4, 3}, + {ElementType::Wedge6, 3}, + {ElementType::Pyramid5, 3}, + }; + + for (const auto& [type, order] : cases) { + SCOPED_TRACE(static_cast(type)); + SCOPED_TRACE(order); + expect_raw_to_matches_vector_evaluation(type, order); + } +} + +TEST(LagrangeBasis, CanonicalConstructorsSupportArbitraryOrders) { + const struct Case { + ElementType type; + int max_order; + } cases[] = { + {ElementType::Line2, 8}, + {ElementType::Triangle3, 6}, + {ElementType::Quad4, 6}, + {ElementType::Tetra4, 5}, + {ElementType::Hex8, 5}, + {ElementType::Wedge6, 5}, + {ElementType::Pyramid5, 5}, + }; + + for (const auto& c : cases) { + for (int order = 0; order <= c.max_order; ++order) { + LagrangeBasis basis(c.type, order); + EXPECT_EQ(basis.element_type(), c.type); + EXPECT_EQ(basis.order(), order); + EXPECT_EQ(basis.dimension(), expected_dimension(c.type)); + EXPECT_EQ(basis.size(), expected_lagrange_size(c.type, order)); + EXPECT_EQ(basis.nodes().size(), basis.size()); + } + } +} + +TEST(LagrangeBasis, AliasVariantsNormalizeToCanonicalPaths) { + expect_alias_matches_canonical( + ElementType::Line3, ElementType::Line2, 2, sample_points_for(ElementType::Line2)); + expect_alias_matches_canonical( + ElementType::Triangle6, ElementType::Triangle3, 2, sample_points_for(ElementType::Triangle3)); + expect_alias_matches_canonical( + ElementType::Quad9, ElementType::Quad4, 2, sample_points_for(ElementType::Quad4)); + expect_alias_matches_canonical( + ElementType::Tetra10, ElementType::Tetra4, 2, sample_points_for(ElementType::Tetra4)); + expect_alias_matches_canonical( + ElementType::Hex27, ElementType::Hex8, 2, sample_points_for(ElementType::Hex8)); + expect_alias_matches_canonical( + ElementType::Wedge18, ElementType::Wedge6, 2, sample_points_for(ElementType::Wedge6)); + expect_alias_matches_canonical( + ElementType::Pyramid14, ElementType::Pyramid5, 2, sample_points_for(ElementType::Pyramid5), + Real(2e-10)); +} + +TEST(LagrangeBasis, SerendipityVariantsRemainRejected) { + EXPECT_THROW((void)LagrangeBasis(ElementType::Quad8, 2), svmp::FE::FEException); + EXPECT_THROW((void)LagrangeBasis(ElementType::Hex20, 2), svmp::FE::FEException); + EXPECT_THROW((void)LagrangeBasis(ElementType::Wedge15, 2), svmp::FE::FEException); + EXPECT_THROW((void)LagrangeBasis(ElementType::Pyramid13, 2), svmp::FE::FEException); +} + +TEST(LagrangeBasis, GeneratedNodeOrderingIsDeterministicAcrossOrders) { + const struct Case { + ElementType type; + int max_order; + } cases[] = { + {ElementType::Line2, 8}, + {ElementType::Triangle3, 6}, + {ElementType::Quad4, 6}, + {ElementType::Tetra4, 5}, + {ElementType::Hex8, 5}, + {ElementType::Wedge6, 5}, + {ElementType::Pyramid5, 5}, + }; + + for (const auto& c : cases) { + for (int order = 0; order <= c.max_order; ++order) { + const auto generated_a = ReferenceNodeLayout::get_lagrange_node_coords(c.type, order); + const auto generated_b = ReferenceNodeLayout::get_lagrange_node_coords(c.type, order); + ASSERT_EQ(generated_a.size(), expected_lagrange_size(c.type, order)); + ASSERT_EQ(generated_a.size(), generated_b.size()); + for (std::size_t i = 0; i < generated_a.size(); ++i) { + EXPECT_TRUE(points_close(generated_a[i], generated_b[i])); + } + } + } +} + +TEST(LagrangeBasis, NodeOrderingMatchesReferenceCoordinateOracles) { + const std::array cases = { + ElementType::Line2, ElementType::Line3, + ElementType::Triangle3, ElementType::Triangle6, + ElementType::Quad4, ElementType::Quad8, ElementType::Quad9, + ElementType::Tetra4, ElementType::Tetra10, + ElementType::Hex8, ElementType::Hex20, ElementType::Hex27, + ElementType::Wedge6, ElementType::Wedge15, ElementType::Wedge18, + ElementType::Pyramid5, ElementType::Pyramid13, ElementType::Pyramid14, + }; + + for (ElementType type : cases) { + const auto expected = reference_node_coords(type); + ASSERT_FALSE(expected.empty()); + ASSERT_EQ(ReferenceNodeLayout::num_nodes(type), expected.size()); + for (std::size_t i = 0; i < expected.size(); ++i) { + const auto actual = ReferenceNodeLayout::get_node_coords(type, i); + EXPECT_TRUE(points_close(actual, expected[i])) + << "Element type " << static_cast(type) + << ", node " << i; + } + } +} + +TEST(LagrangeBasis, GeneratedLowOrderOrderingMatchesPublicAliasPaths) { + const struct Case { + ElementType type; + int order; + ElementType public_alias; + } cases[] = { + {ElementType::Line2, 1, ElementType::Line2}, + {ElementType::Line2, 2, ElementType::Line3}, + {ElementType::Triangle3, 1, ElementType::Triangle3}, + {ElementType::Triangle3, 2, ElementType::Triangle6}, + {ElementType::Quad4, 1, ElementType::Quad4}, + {ElementType::Quad4, 2, ElementType::Quad9}, + {ElementType::Tetra4, 1, ElementType::Tetra4}, + {ElementType::Tetra4, 2, ElementType::Tetra10}, + {ElementType::Hex8, 1, ElementType::Hex8}, + {ElementType::Hex8, 2, ElementType::Hex27}, + {ElementType::Wedge6, 1, ElementType::Wedge6}, + {ElementType::Wedge6, 2, ElementType::Wedge18}, + {ElementType::Pyramid5, 1, ElementType::Pyramid5}, + {ElementType::Pyramid5, 2, ElementType::Pyramid14}, + }; + + for (const auto& c : cases) { + const auto generated = ReferenceNodeLayout::get_lagrange_node_coords(c.type, c.order); + ASSERT_EQ(generated.size(), ReferenceNodeLayout::num_nodes(c.public_alias)); + for (std::size_t i = 0; i < generated.size(); ++i) { + const auto public_alias = ReferenceNodeLayout::get_node_coords(c.public_alias, i); + EXPECT_TRUE(points_close(generated[i], public_alias)); + } + } +} + +TEST(LagrangeBasis, KroneckerDeltaAcrossCanonicalTopologiesAndOrders) { + const struct Case { + ElementType type; + int max_order; + } cases[] = { + {ElementType::Line2, 8}, + {ElementType::Triangle3, 6}, + {ElementType::Quad4, 6}, + {ElementType::Tetra4, 5}, + {ElementType::Hex8, 5}, + {ElementType::Wedge6, 5}, + {ElementType::Pyramid5, 5}, + }; + + for (const auto& c : cases) { + for (int order = 0; order <= c.max_order; ++order) { + LagrangeBasis basis(c.type, order); + ASSERT_EQ(basis.size(), expected_lagrange_size(c.type, order)); + + std::vector values; + for (std::size_t node_i = 0; node_i < basis.size(); ++node_i) { + basis.evaluate_values(basis.nodes()[node_i], values); + ASSERT_EQ(values.size(), basis.size()); + for (std::size_t basis_i = 0; basis_i < basis.size(); ++basis_i) { + EXPECT_NEAR(values[basis_i], basis_i == node_i ? Real(1) : Real(0), Real(2e-10)) + << "Element type " << static_cast(c.type) + << ", order " << order + << ", node " << node_i + << ", basis " << basis_i; + } + } + } + } +} + +TEST(LagrangeBasis, PartitionGradientAndHessianSumsAcrossCanonicalTopologiesAndOrders) { + const struct Case { + ElementType type; + int max_order; + Real tol; + } cases[] = { + {ElementType::Line2, 8, Real(1e-11)}, + {ElementType::Triangle3, 6, Real(1e-10)}, + {ElementType::Quad4, 6, Real(1e-10)}, + {ElementType::Tetra4, 5, Real(2e-10)}, + {ElementType::Hex8, 5, Real(2e-10)}, + {ElementType::Wedge6, 5, Real(5e-10)}, + {ElementType::Pyramid5, 5, Real(5e-7)}, + }; + + for (const auto& c : cases) { + for (int order = 0; order <= c.max_order; ++order) { + LagrangeBasis basis(c.type, order); + expect_partition_gradient_hessian_sums(basis, dense_sample_points_for(c.type), c.tol, c.tol); + } + } +} + +TEST(LagrangeBasis, SimplexAxisScratchDynamicFallbackForHighOrder) { + const struct Case { + ElementType type; + int order; + Point point; + Real tolerance; + } cases[] = { + {ElementType::Triangle3, 13, Point{Real(0.19), Real(0.31), Real(0)}, Real(1e-8)}, + {ElementType::Tetra4, 13, Point{Real(0.13), Real(0.17), Real(0.19)}, Real(1e-7)}, + }; + + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_all(c.point, values, gradients, hessians); + + ASSERT_EQ(values.size(), basis.size()); + ASSERT_EQ(gradients.size(), basis.size()); + ASSERT_EQ(hessians.size(), basis.size()); + + Real value_sum = Real(0); + Gradient gradient_sum{}; + Hessian hessian_sum{}; + for (std::size_t i = 0; i < basis.size(); ++i) { + value_sum += values[i]; + for (std::size_t d = 0; d < 3u; ++d) { + gradient_sum[d] += gradients[i][d]; + for (std::size_t e = 0; e < 3u; ++e) { + hessian_sum(d, e) += hessians[i](d, e); + } + } + } + + EXPECT_NEAR(value_sum, Real(1), c.tolerance); + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_NEAR(gradient_sum[d], Real(0), c.tolerance); + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_NEAR(hessian_sum(d, e), Real(0), Real(10) * c.tolerance); + } + } + } +} + +TEST(LagrangeBasis, HighOrderAxisNearNodeMaintainsPartitionAndDerivativeSums) { + const int order = 16; + const LagrangeBasis basis(ElementType::Line2, order); + const Real node = Real(-1) + Real(2 * 5) / static_cast(order); + const Point point{node + Real(1e-7), Real(0), Real(0)}; + + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_all(point, values, gradients, hessians); + ASSERT_EQ(values.size(), basis.size()); + + Real value_sum = Real(0); + Real gradient_sum = Real(0); + Real hessian_sum = Real(0); + for (std::size_t i = 0; i < basis.size(); ++i) { + value_sum += values[i]; + gradient_sum += gradients[i][0]; + hessian_sum += hessians[i](0, 0); + } + + EXPECT_NEAR(value_sum, Real(1), Real(1e-12)); + EXPECT_NEAR(gradient_sum, Real(0), Real(1e-8)); + EXPECT_NEAR(hessian_sum, Real(0), Real(1e-5)); +} + +TEST(LagrangeBasis, PyramidFaceTracesMatchLowerDimensionalLagrangeBases) { + const PyramidFace faces[] = { + PyramidFace::Base, + PyramidFace::South, + PyramidFace::East, + PyramidFace::North, + PyramidFace::West, + }; + + for (int order = 1; order <= 5; ++order) { + for (const auto face : faces) { + expect_pyramid_face_trace_matches_lower_basis( + order, face, face == PyramidFace::Base ? Real(2e-10) : Real(5e-10)); + } + } +} + +TEST(LagrangeBasis, PyramidEdgeTracesMatchLineLagrangeBasis) { + const PyramidEdge edges[] = { + PyramidEdge::BaseSouth, + PyramidEdge::BaseEast, + PyramidEdge::BaseNorth, + PyramidEdge::BaseWest, + PyramidEdge::VerticalSW, + PyramidEdge::VerticalSE, + PyramidEdge::VerticalNE, + PyramidEdge::VerticalNW, + }; + + for (int order = 1; order <= 5; ++order) { + for (const auto edge : edges) { + expect_pyramid_edge_trace_matches_line_basis(order, edge, Real(5e-10)); + } + } +} + +TEST(LagrangeBasis, Pyramid14RationalNodalAndPartition) { + using svmp::FE::basis::ReferenceNodeLayout; + + LagrangeBasis basis(ElementType::Pyramid14, 2); + EXPECT_EQ(basis.dimension(), 3); + EXPECT_EQ(basis.size(), 14u); + + // Kronecker nodal property at all Pyramid14 nodes + for (std::size_t i = 0; i < basis.size(); ++i) { + auto xi = ReferenceNodeLayout::get_node_coords(ElementType::Pyramid14, i); + std::vector vals; + basis.evaluate_values(xi, vals); + ASSERT_EQ(vals.size(), basis.size()); + for (std::size_t j = 0; j < basis.size(); ++j) { + const double expected = (i == j) ? 1.0 : 0.0; + EXPECT_NEAR(vals[j], expected, 1e-12); + } + } + + // Partition of unity at an interior point + svmp::FE::math::Vector xi{Real(0.1), Real(-0.2), Real(0.3)}; + std::vector vals; + basis.evaluate_values(xi, vals); + const double sum = std::accumulate(vals.begin(), vals.end(), 0.0); + EXPECT_NEAR(sum, 1.0, 1e-12); +} + +TEST(LagrangeBasis, Pyramid14GradientSumZero) { + LagrangeBasis basis(ElementType::Pyramid14, 2); + svmp::FE::math::Vector xi{Real(0.15), Real(-0.1), Real(0.3)}; + + std::vector grads; + basis.evaluate_gradients(xi, grads); + ASSERT_EQ(grads.size(), basis.size()); + + Gradient sum{}; + for (const auto& g : grads) { + sum[0] += g[0]; + sum[1] += g[1]; + sum[2] += g[2]; + } + EXPECT_NEAR(sum[0], 0.0, 1e-8); + EXPECT_NEAR(sum[1], 0.0, 1e-8); + EXPECT_NEAR(sum[2], 0.0, 1e-8); +} + +TEST(LagrangeBasis, HigherOrderP4KroneckerAndPartition) { + struct Case { + ElementType type; + int order; + svmp::FE::math::Vector xi; + }; + + const std::vector cases = { + {ElementType::Line2, 4, {Real(0.11), Real(0), Real(0)}}, + {ElementType::Quad4, 4, {Real(0.2), Real(-0.3), Real(0)}}, + {ElementType::Triangle3, 4, {Real(0.2), Real(0.3), Real(0)}}, + {ElementType::Hex8, 4, {Real(0.2), Real(-0.3), Real(0.4)}}, + }; + + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + + // Partition of unity at an interior point + std::vector values; + basis.evaluate_values(c.xi, values); + const double sum = std::accumulate(values.begin(), values.end(), 0.0); + EXPECT_NEAR(sum, 1.0, 1e-12); + + // Kronecker delta property at all nodes + const auto& nodes = basis.nodes(); + ASSERT_EQ(nodes.size(), basis.size()); + for (std::size_t i = 0; i < nodes.size(); ++i) { + basis.evaluate_values(nodes[i], values); + ASSERT_EQ(values.size(), nodes.size()); + for (std::size_t j = 0; j < nodes.size(); ++j) { + const double expected = (i == j) ? 1.0 : 0.0; + EXPECT_NEAR(values[j], expected, 1e-12); + } + } + } +} + +TEST(LagrangeBasis, Pyramid14InterpolatesQuadraticPolynomials) { + using svmp::FE::basis::ReferenceNodeLayout; + + LagrangeBasis basis(ElementType::Pyramid14, 2); + const std::size_t n = basis.size(); + + // Precompute nodal coordinates + std::vector> nodes; + nodes.reserve(n); + for (std::size_t i = 0; i < n; ++i) { + nodes.push_back(ReferenceNodeLayout::get_node_coords(ElementType::Pyramid14, i)); + } + + auto interpolate_and_check = [&](auto f, Real tol) { + // Nodal coefficients + std::vector coeffs(n); + for (std::size_t i = 0; i < n; ++i) { + const auto& x = nodes[i]; + coeffs[i] = f(x[0], x[1], x[2]); + } + + // Test at a few interior points + const svmp::FE::math::Vector test_pts[] = { + {Real(0.1), Real(-0.2), Real(0.2)}, + {Real(-0.2), Real(0.15), Real(0.4)}, + {Real(0.05), Real(0.05), Real(0.3)} + }; + + for (const auto& xi : test_pts) { + std::vector vals; + basis.evaluate_values(xi, vals); + ASSERT_EQ(vals.size(), n); + + Real u_interp = Real(0); + for (std::size_t i = 0; i < n; ++i) { + u_interp += coeffs[i] * vals[i]; + } + + const Real u_exact = f(xi[0], xi[1], xi[2]); + EXPECT_NEAR(u_interp, u_exact, tol); + } + }; + + // Constant, linear and quadratic monomials + interpolate_and_check([](Real, Real, Real) { return Real(1); }, Real(1e-12)); + interpolate_and_check([](Real x, Real, Real) { return x; }, Real(1e-11)); + interpolate_and_check([](Real, Real y, Real) { return y; }, Real(1e-11)); + interpolate_and_check([](Real, Real, Real z) { return z; }, Real(1e-11)); + interpolate_and_check([](Real x, Real y, Real) { return x * y; }, Real(1e-10)); + interpolate_and_check([](Real x, Real, Real z) { return x * z; }, Real(1e-10)); + interpolate_and_check([](Real, Real y, Real z) { return y * z; }, Real(1e-10)); + interpolate_and_check([](Real x, Real, Real) { return x * x; }, Real(1e-10)); + interpolate_and_check([](Real, Real y, Real) { return y * y; }, Real(1e-10)); + interpolate_and_check([](Real, Real, Real z) { return z * z; }, Real(1e-10)); +} + +TEST(LagrangeBasis, Pyramid14GradientMatchesLinearFunctionGradient) { + using svmp::FE::basis::ReferenceNodeLayout; + + LagrangeBasis basis(ElementType::Pyramid14, 2); + const std::size_t n = basis.size(); + + // Nodal coordinates and coefficients for f(x,y,z) = ax + by + cz + const Real a = Real(1.2); + const Real b = Real(-0.7); + const Real c = Real(0.5); + + std::vector coeffs(n); + for (std::size_t i = 0; i < n; ++i) { + const auto x = ReferenceNodeLayout::get_node_coords(ElementType::Pyramid14, i); + coeffs[i] = a * x[0] + b * x[1] + c * x[2]; + } + + const svmp::FE::math::Vector xi{Real(0.1), Real(-0.15), Real(0.35)}; + + std::vector grads; + basis.evaluate_gradients(xi, grads); + ASSERT_EQ(grads.size(), n); + + Gradient g_interp{}; + for (std::size_t i = 0; i < n; ++i) { + g_interp[0] += coeffs[i] * grads[i][0]; + g_interp[1] += coeffs[i] * grads[i][1]; + g_interp[2] += coeffs[i] * grads[i][2]; + } + + EXPECT_NEAR(g_interp[0], a, 1e-6); + EXPECT_NEAR(g_interp[1], b, 1e-6); + EXPECT_NEAR(g_interp[2], c, 1e-6); +} + +TEST(LagrangeBasis, PyramidApexValuesRemainExactAcrossRepresentativeOrders) { + const struct Case { + ElementType type; + int order; + } cases[] = { + {ElementType::Pyramid5, 1}, + {ElementType::Pyramid14, 2}, + {ElementType::Pyramid5, 4}, + }; + + const svmp::FE::math::Vector apex{Real(0), Real(0), Real(1)}; + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + std::vector values; + basis.evaluate_values(apex, values); + ASSERT_EQ(values.size(), basis.size()); + + const auto& nodes = basis.nodes(); + auto apex_it = std::find_if( + nodes.begin(), nodes.end(), + [](const auto& node) { + return std::abs(node[0]) <= Real(1e-14) && + std::abs(node[1]) <= Real(1e-14) && + std::abs(node[2] - Real(1)) <= Real(1e-14); + }); + ASSERT_NE(apex_it, nodes.end()); + const std::size_t apex_index = static_cast( + std::distance(nodes.begin(), apex_it)); + + Real sum = Real(0); + for (std::size_t i = 0; i < values.size(); ++i) { + EXPECT_TRUE(std::isfinite(static_cast(values[i]))); + sum += values[i]; + const Real expected = (i == apex_index) ? Real(1) : Real(0); + EXPECT_NEAR(values[i], expected, 1e-12) + << "order " << c.order << ", basis " << i; + } + EXPECT_NEAR(sum, Real(1), 1e-12); + } +} + +TEST(LagrangeBasis, PyramidGradientAtExactApexThrowsWhenLimitIsNotUnique) { + const struct Case { + ElementType type; + int order; + } cases[] = { + {ElementType::Pyramid5, 1}, + {ElementType::Pyramid14, 2}, + {ElementType::Pyramid5, 4}, + }; + + const svmp::FE::math::Vector apex{Real(0), Real(0), Real(1)}; + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + std::vector gradients; + EXPECT_THROW(basis.evaluate_gradients(apex, gradients), svmp::FE::basis::BasisEvaluationException) + << "order " << c.order; + } +} + +TEST(LagrangeBasis, PyramidApexValuesMatchDirectionalNearApexLimits) { + const struct Case { + ElementType type; + int order; + Real tol; + } cases[] = { + {ElementType::Pyramid5, 1, Real(3e-6)}, + {ElementType::Pyramid14, 2, Real(4e-6)}, + {ElementType::Pyramid5, 4, Real(1e-5)}, + }; + + const std::array, 4> directions = {{ + {Real(0), Real(0)}, + {Real(0.35), Real(-0.25)}, + {Real(-0.50), Real(0.45)}, + {Real(0.20), Real(0.60)}, + }}; + const Real t = Real(1e-6); + const svmp::FE::math::Vector apex{Real(0), Real(0), Real(1)}; + + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + std::vector apex_values; + basis.evaluate_values(apex, apex_values); + + for (const auto& direction : directions) { + const svmp::FE::math::Vector xi{ + t * direction[0], + t * direction[1], + Real(1) - t + }; + + std::vector values; + basis.evaluate_values(xi, values); + ASSERT_EQ(values.size(), apex_values.size()); + + for (std::size_t i = 0; i < values.size(); ++i) { + EXPECT_NEAR(values[i], apex_values[i], c.tol) + << "order " << c.order + << ", basis " << i + << ", direction (" << direction[0] << ", " << direction[1] << ")"; + } + } + } +} + +TEST(LagrangeBasis, PyramidNearApexGradientShowsDirectionalSpread) { + const struct Case { + ElementType type; + int order; + Real min_spread; + } cases[] = { + {ElementType::Pyramid5, 1, Real(5e-2)}, + {ElementType::Pyramid14, 2, Real(5e-2)}, + }; + + const std::array, 4> directions = {{ + {Real(0), Real(0)}, + {Real(0.45), Real(-0.30)}, + {Real(-0.35), Real(0.40)}, + {Real(0.25), Real(0.55)}, + }}; + const Real t = Real(1e-6); + + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + double max_spread = 0.0; + + std::vector> directional_gradients; + directional_gradients.reserve(directions.size()); + for (const auto& direction : directions) { + const svmp::FE::math::Vector xi{ + t * direction[0], + t * direction[1], + Real(1) - t + }; + + std::vector gradients; + basis.evaluate_gradients(xi, gradients); + directional_gradients.push_back(std::move(gradients)); + } + + for (std::size_t i = 0; i < basis.size(); ++i) { + for (int d = 0; d < 3; ++d) { + double min_value = std::numeric_limits::infinity(); + double max_value = -std::numeric_limits::infinity(); + for (const auto& gradients : directional_gradients) { + const double value = static_cast(gradients[i][static_cast(d)]); + min_value = std::min(min_value, value); + max_value = std::max(max_value, value); + } + max_spread = std::max(max_spread, max_value - min_value); + } + } + + EXPECT_GT(max_spread, static_cast(c.min_spread)) + << "order " << c.order; + } +} + +TEST(LagrangeBasis, GradientSumZeroQuadAndTet) { + const std::vector>> cases = { + {ElementType::Quad4, svmp::FE::math::Vector{Real(0.2), Real(-0.1), Real(0)}}, + {ElementType::Tetra4, svmp::FE::math::Vector{Real(0.1), Real(0.2), Real(0.1)}} + }; + + for (const auto& c : cases) { + LagrangeBasis basis(c.first, 1); + std::vector grads; + basis.evaluate_gradients(c.second, grads); + + ASSERT_EQ(grads.size(), basis.size()); + Gradient sum{}; + for (const auto& g : grads) { + sum[0] += g[0]; + sum[1] += g[1]; + sum[2] += g[2]; + } + EXPECT_NEAR(sum[0], 0.0, 1e-12); + EXPECT_NEAR(sum[1], 0.0, 1e-12); + EXPECT_NEAR(sum[2], 0.0, 1e-12); + } +} + +TEST(LagrangeBasis, HexPartitionAndGradientSumZeroOrderThree) { + LagrangeBasis basis(ElementType::Hex8, 3); + svmp::FE::math::Vector xi{Real(0.1), Real(-0.2), Real(0.25)}; + + std::vector values; + basis.evaluate_values(xi, values); + const double sum = std::accumulate(values.begin(), values.end(), 0.0); + EXPECT_NEAR(sum, 1.0, 1e-12); + + std::vector grads; + basis.evaluate_gradients(xi, grads); + Gradient gsum{}; + for (const auto& g : grads) { + gsum[0] += g[0]; + gsum[1] += g[1]; + gsum[2] += g[2]; + } + EXPECT_NEAR(gsum[0], 0.0, 1e-10); + EXPECT_NEAR(gsum[1], 0.0, 1e-10); + EXPECT_NEAR(gsum[2], 0.0, 1e-10); +} + +TEST(LagrangeBasis, OracleLine3ValuesGradientsAndHessians) { + LagrangeBasis basis(ElementType::Line3, 2); + const Point xi{Real(0.2), Real(0), Real(0)}; + + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_values(xi, values); + basis.evaluate_gradients(xi, gradients); + basis.evaluate_hessians(xi, hessians); + + ASSERT_EQ(values.size(), 3u); + ASSERT_EQ(gradients.size(), 3u); + ASSERT_EQ(hessians.size(), 3u); + + const Real expected_values[] = {Real(-2) / Real(25), Real(3) / Real(25), Real(24) / Real(25)}; + const Real expected_gradients[] = {Real(-3) / Real(10), Real(7) / Real(10), Real(-2) / Real(5)}; + const Real expected_hessians[] = {Real(1), Real(1), Real(-2)}; + + for (std::size_t i = 0; i < 3; ++i) { + EXPECT_NEAR(values[i], expected_values[i], 1e-14); + EXPECT_NEAR(gradients[i][0], expected_gradients[i], 1e-14); + EXPECT_NEAR(hessians[i](0, 0), expected_hessians[i], 1e-14); + } +} + +TEST(LagrangeBasis, OracleTriangle3ValuesGradientsAndHessians) { + LagrangeBasis basis(ElementType::Triangle3, 1); + const Point xi{Real(0.2), Real(0.3), Real(0)}; + + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_values(xi, values); + basis.evaluate_gradients(xi, gradients); + basis.evaluate_hessians(xi, hessians); + + ASSERT_EQ(values.size(), 3u); + const Point expected_gradients[] = { + Point{Real(-1), Real(-1), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)} + }; + const Real expected_values[] = {Real(0.5), Real(0.2), Real(0.3)}; + + for (std::size_t i = 0; i < 3; ++i) { + EXPECT_NEAR(values[i], expected_values[i], 1e-14); + EXPECT_NEAR(gradients[i][0], expected_gradients[i][0], 1e-14); + EXPECT_NEAR(gradients[i][1], expected_gradients[i][1], 1e-14); + for (int a = 0; a < 2; ++a) { + for (int b = 0; b < 2; ++b) { + EXPECT_NEAR(hessians[i](static_cast(a), static_cast(b)), + Real(0), 1e-14); + } + } + } +} + +TEST(LagrangeBasis, OracleQuad4ValuesGradientsAndHessians) { + LagrangeBasis basis(ElementType::Quad4, 1); + const Point xi{Real(0.2), Real(-0.4), Real(0)}; + + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_values(xi, values); + basis.evaluate_gradients(xi, gradients); + basis.evaluate_hessians(xi, hessians); + + ASSERT_EQ(values.size(), 4u); + const Real expected_values[] = {Real(7) / Real(25), Real(21) / Real(50), + Real(9) / Real(50), Real(3) / Real(25)}; + const Point expected_gradients[] = { + Point{Real(-7) / Real(20), Real(-1) / Real(5), Real(0)}, + Point{Real(7) / Real(20), Real(-3) / Real(10), Real(0)}, + Point{Real(3) / Real(20), Real(3) / Real(10), Real(0)}, + Point{Real(-3) / Real(20), Real(1) / Real(5), Real(0)} + }; + const Real expected_hxy[] = {Real(1) / Real(4), Real(-1) / Real(4), + Real(1) / Real(4), Real(-1) / Real(4)}; + + for (std::size_t i = 0; i < 4; ++i) { + EXPECT_NEAR(values[i], expected_values[i], 1e-14); + EXPECT_NEAR(gradients[i][0], expected_gradients[i][0], 1e-14); + EXPECT_NEAR(gradients[i][1], expected_gradients[i][1], 1e-14); + EXPECT_NEAR(hessians[i](0, 0), Real(0), 1e-14); + EXPECT_NEAR(hessians[i](1, 1), Real(0), 1e-14); + EXPECT_NEAR(hessians[i](0, 1), expected_hxy[i], 1e-14); + EXPECT_NEAR(hessians[i](1, 0), expected_hxy[i], 1e-14); + } +} + +TEST(LagrangeBasis, OracleWedge6ValuesGradientsAndHessians) { + LagrangeBasis basis(ElementType::Wedge6, 1); + const Point xi{Real(0.2), Real(0.25), Real(-0.3)}; + + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_values(xi, values); + basis.evaluate_gradients(xi, gradients); + basis.evaluate_hessians(xi, hessians); + + ASSERT_EQ(values.size(), 6u); + const Real expected_values[] = { + Real(143) / Real(400), Real(13) / Real(100), Real(13) / Real(80), + Real(77) / Real(400), Real(7) / Real(100), Real(7) / Real(80) + }; + const Point expected_gradients[] = { + Point{Real(-13) / Real(20), Real(-13) / Real(20), Real(-11) / Real(40)}, + Point{Real(13) / Real(20), Real(0), Real(-1) / Real(10)}, + Point{Real(0), Real(13) / Real(20), Real(-1) / Real(8)}, + Point{Real(-7) / Real(20), Real(-7) / Real(20), Real(11) / Real(40)}, + Point{Real(7) / Real(20), Real(0), Real(1) / Real(10)}, + Point{Real(0), Real(7) / Real(20), Real(1) / Real(8)} + }; + const Point expected_hxz[] = { + Point{Real(1) / Real(2), Real(1) / Real(2), Real(0)}, + Point{Real(-1) / Real(2), Real(0), Real(0)}, + Point{Real(0), Real(-1) / Real(2), Real(0)}, + Point{Real(-1) / Real(2), Real(-1) / Real(2), Real(0)}, + Point{Real(1) / Real(2), Real(0), Real(0)}, + Point{Real(0), Real(1) / Real(2), Real(0)} + }; + + for (std::size_t i = 0; i < 6; ++i) { + EXPECT_NEAR(values[i], expected_values[i], 1e-14); + EXPECT_NEAR(gradients[i][0], expected_gradients[i][0], 1e-14); + EXPECT_NEAR(gradients[i][1], expected_gradients[i][1], 1e-14); + EXPECT_NEAR(gradients[i][2], expected_gradients[i][2], 1e-14); + EXPECT_NEAR(hessians[i](0, 0), Real(0), 1e-14); + EXPECT_NEAR(hessians[i](1, 1), Real(0), 1e-14); + EXPECT_NEAR(hessians[i](2, 2), Real(0), 1e-14); + EXPECT_NEAR(hessians[i](0, 1), Real(0), 1e-14); + EXPECT_NEAR(hessians[i](1, 0), Real(0), 1e-14); + EXPECT_NEAR(hessians[i](0, 2), expected_hxz[i][0], 1e-14); + EXPECT_NEAR(hessians[i](2, 0), expected_hxz[i][0], 1e-14); + EXPECT_NEAR(hessians[i](1, 2), expected_hxz[i][1], 1e-14); + EXPECT_NEAR(hessians[i](2, 1), expected_hxz[i][1], 1e-14); + } +} + +TEST(LagrangeBasis, DeterministicBoundarySweepMaintainsPartitionAndFiniteDerivatives) { + const std::vector> cases = { + {ElementType::Line2, 1}, + {ElementType::Line3, 2}, + {ElementType::Triangle3, 1}, + {ElementType::Triangle6, 2}, + {ElementType::Quad4, 1}, + {ElementType::Quad9, 2}, + {ElementType::Tetra4, 1}, + {ElementType::Tetra10, 2}, + {ElementType::Hex8, 1}, + {ElementType::Hex27, 2}, + {ElementType::Wedge6, 1}, + {ElementType::Wedge18, 2}, + {ElementType::Pyramid5, 1}, + {ElementType::Pyramid14, 2}, + }; + + for (const auto& [type, order] : cases) { + LagrangeBasis basis(type, order); + for (const auto& xi : boundary_stress_points_for(type)) { + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_values(xi, values); + basis.evaluate_gradients(xi, gradients); + basis.evaluate_hessians(xi, hessians); + + ASSERT_EQ(values.size(), basis.size()); + ASSERT_EQ(gradients.size(), basis.size()); + ASSERT_EQ(hessians.size(), basis.size()); + + Real sum = Real(0); + for (Real value : values) { + EXPECT_TRUE(std::isfinite(value)); + sum += value; + } + expect_all_finite(gradients); + expect_hessians_finite(hessians, basis.dimension()); + EXPECT_NEAR(sum, Real(1), type == ElementType::Pyramid5 || type == ElementType::Pyramid14 + ? Real(1e-8) + : Real(1e-12)) + << "type=" << static_cast(type) + << ", order=" << order + << ", xi=(" << xi[0] << "," << xi[1] << "," << xi[2] << ")"; + } + } +} + +TEST(LagrangeBasis, FiniteDifferenceGradientsAcrossSupportedLinearShapes) { + const std::vector cases = { + {ElementType::Line2, 1, sample_points_for(ElementType::Line2)}, + {ElementType::Triangle3, 1, sample_points_for(ElementType::Triangle3)}, + {ElementType::Quad4, 1, sample_points_for(ElementType::Quad4)}, + {ElementType::Tetra4, 1, sample_points_for(ElementType::Tetra4)}, + {ElementType::Hex8, 1, sample_points_for(ElementType::Hex8)}, + {ElementType::Wedge6, 1, sample_points_for(ElementType::Wedge6)}, + {ElementType::Pyramid5, 1, sample_points_for(ElementType::Pyramid5)}, + }; + + for (const auto& c : cases) { + expect_gradients_match_finite_difference(c, Real(1e-6), Real(1e-6)); + } +} + +TEST(LagrangeBasis, FiniteDifferenceGradientsAcrossSupportedQuadraticShapes) { + const std::vector cases = { + {ElementType::Line3, 2, sample_points_for(ElementType::Line3)}, + {ElementType::Triangle6, 2, sample_points_for(ElementType::Triangle6)}, + {ElementType::Quad9, 2, sample_points_for(ElementType::Quad9)}, + {ElementType::Tetra10, 2, sample_points_for(ElementType::Tetra10)}, + {ElementType::Hex27, 2, sample_points_for(ElementType::Hex27)}, + {ElementType::Wedge18, 2, sample_points_for(ElementType::Wedge18)}, + {ElementType::Pyramid14, 2, sample_points_for(ElementType::Pyramid14)}, + }; + + for (const auto& c : cases) { + expect_gradients_match_finite_difference(c, Real(1e-6), Real(2e-6)); + } +} + +TEST(LagrangeBasis, LinearPolynomialReproductionAcrossSupportedLinearShapes) { + const std::vector cases = { + {ElementType::Line2, 1, sample_points_for(ElementType::Line2)}, + {ElementType::Triangle3, 1, sample_points_for(ElementType::Triangle3)}, + {ElementType::Quad4, 1, sample_points_for(ElementType::Quad4)}, + {ElementType::Tetra4, 1, sample_points_for(ElementType::Tetra4)}, + {ElementType::Hex8, 1, sample_points_for(ElementType::Hex8)}, + {ElementType::Wedge6, 1, sample_points_for(ElementType::Wedge6)}, + {ElementType::Pyramid5, 1, sample_points_for(ElementType::Pyramid5)}, + }; + + const std::vector> exponents = { + {0, 0, 0}, + {1, 0, 0}, + {0, 1, 0}, + {0, 0, 1}, + }; + + for (const auto& c : cases) { + const std::vector> relevant( + exponents.begin(), + exponents.begin() + static_cast(c.type == ElementType::Line2 ? 2 : + (c.type == ElementType::Triangle3 || + c.type == ElementType::Quad4) ? 3 : 4)); + expect_polynomial_reproduction(c, relevant, Real(1e-12)); + } +} + +TEST(LagrangeBasis, QuadraticPolynomialReproductionAcrossSupportedQuadraticShapes) { + const std::vector cases = { + {ElementType::Line3, 2, sample_points_for(ElementType::Line3)}, + {ElementType::Triangle6, 2, sample_points_for(ElementType::Triangle6)}, + {ElementType::Quad9, 2, sample_points_for(ElementType::Quad9)}, + {ElementType::Tetra10, 2, sample_points_for(ElementType::Tetra10)}, + {ElementType::Hex27, 2, sample_points_for(ElementType::Hex27)}, + {ElementType::Wedge18, 2, sample_points_for(ElementType::Wedge18)}, + {ElementType::Pyramid14, 2, sample_points_for(ElementType::Pyramid14)}, + }; + + const std::vector> line_exponents = { + {0, 0, 0}, {1, 0, 0}, {2, 0, 0} + }; + const std::vector> surface_exponents = { + {0, 0, 0}, {1, 0, 0}, {0, 1, 0}, + {2, 0, 0}, {1, 1, 0}, {0, 2, 0} + }; + const std::vector> volume_exponents = { + {0, 0, 0}, {1, 0, 0}, {0, 1, 0}, {0, 0, 1}, + {2, 0, 0}, {1, 1, 0}, {0, 2, 0}, + {1, 0, 1}, {0, 1, 1}, {0, 0, 2} + }; + + for (const auto& c : cases) { + if (c.type == ElementType::Line3) { + expect_polynomial_reproduction(c, line_exponents, Real(1e-12)); + } else if (c.type == ElementType::Triangle6 || c.type == ElementType::Quad9) { + expect_polynomial_reproduction(c, surface_exponents, Real(1e-11)); + } else { + expect_polynomial_reproduction(c, volume_exponents, Real(2e-10)); + } + } +} + +TEST(LagrangeBasis, HighOrderTensorLagrangeMaintainsPartitionAndDerivativeSums) { + const std::vector cases = { + {ElementType::Line2, 8, {Point{-0.875, 0, 0}, Point{0.125, 0, 0}, Point{1, 0, 0}}}, + {ElementType::Quad4, 7, {Point{0.2, -0.35, 0}, Point{-1, 0.5, 0}, Point{0.5, 1, 0}}}, + {ElementType::Hex8, 6, {Point{0.1, -0.2, 0.3}, Point{-1, 0.5, 1}, Point{0.75, -1, -0.5}}}, + }; + + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + expect_partition_gradient_hessian_sums(basis, c.points, Real(2e-12), Real(2e-8)); + } +} + +TEST(LagrangeBasis, HighOrderTensorLagrangeReproducesTensorPolynomials) { + const LagrangeAccuracyCase line{ElementType::Line2, + 8, + {Point{-0.73, 0, 0}, Point{-0.1, 0, 0}, Point{0.64, 0, 0}}}; + expect_polynomial_reproduction(line, + {{0, 0, 0}, {1, 0, 0}, {4, 0, 0}, {8, 0, 0}}, + Real(1e-11)); + + const LagrangeAccuracyCase quad{ElementType::Quad4, + 7, + {Point{-0.6, -0.2, 0}, Point{0.15, 0.45, 0}, Point{0.8, -0.55, 0}}}; + expect_polynomial_reproduction(quad, + {{0, 0, 0}, {7, 0, 0}, {0, 7, 0}, {4, 3, 0}}, + Real(5e-10)); + + const LagrangeAccuracyCase hex{ElementType::Hex8, + 6, + {Point{-0.4, 0.2, -0.3}, Point{0.35, -0.55, 0.25}, Point{0.75, 0.4, -0.65}}}; + expect_polynomial_reproduction(hex, + {{0, 0, 0}, {6, 0, 0}, {0, 6, 0}, {0, 0, 6}, {3, 2, 4}}, + Real(2e-9)); +} diff --git a/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp b/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp new file mode 100644 index 000000000..9f2bf8be5 --- /dev/null +++ b/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp @@ -0,0 +1,116 @@ +/** + * @file test_SerendipityTensorModal.cpp + * @brief Tests for the migrated Serendipity basis subset. + */ + +#include + +#include "FE/Basis/NodeOrderingConventions.h" +#include "FE/Basis/SerendipityBasis.h" + +#include + +using namespace svmp::FE; +using namespace svmp::FE::basis; + +namespace { + +void expect_partition_of_unity(const SerendipityBasis& basis, + const math::Vector& xi, + Real tolerance = Real(1e-10)) +{ + std::vector values; + std::vector gradients; + basis.evaluate_values(xi, values); + basis.evaluate_gradients(xi, gradients); + + Real value_sum = Real(0); + Gradient gradient_sum{}; + for (std::size_t i = 0; i < values.size(); ++i) { + value_sum += values[i]; + for (std::size_t component = 0; component < 3u; ++component) { + gradient_sum[component] += gradients[i][component]; + } + } + + EXPECT_NEAR(value_sum, Real(1), tolerance); + for (int component = 0; component < basis.dimension(); ++component) { + EXPECT_NEAR(gradient_sum[static_cast(component)], + Real(0), + tolerance); + } +} + +void expect_nodal_delta(const SerendipityBasis& basis, + const std::vector>& nodes, + Real tolerance) +{ + ASSERT_EQ(nodes.size(), basis.size()); + for (std::size_t node = 0; node < nodes.size(); ++node) { + std::vector values; + basis.evaluate_values(nodes[node], values); + ASSERT_EQ(values.size(), basis.size()); + for (std::size_t dof = 0; dof < values.size(); ++dof) { + EXPECT_NEAR(values[dof], dof == node ? Real(1) : Real(0), tolerance) + << "node=" << node << " dof=" << dof; + } + } +} + +std::vector> reference_nodes(ElementType type, + std::size_t count) +{ + std::vector> nodes; + nodes.reserve(count); + for (std::size_t i = 0; i < count; ++i) { + nodes.push_back(ReferenceNodeLayout::get_node_coords(type, i)); + } + return nodes; +} + +} // namespace + +TEST(SerendipityBasis, Quad8IsNodalAndPartitionsUnity) { + SerendipityBasis basis(ElementType::Quad8, 2); + + EXPECT_EQ(basis.size(), 8u); + expect_nodal_delta(basis, basis.nodes(), Real(1e-10)); + expect_partition_of_unity(basis, {Real(0.17), Real(-0.31), Real(0)}); +} + +TEST(SerendipityBasis, Hex20IsNodalAndPartitionsUnity) { + SerendipityBasis basis(ElementType::Hex20, 2); + + EXPECT_EQ(basis.size(), 20u); + expect_nodal_delta(basis, + reference_nodes(ElementType::Hex20, basis.size()), + Real(1e-10)); + expect_partition_of_unity(basis, {Real(0.2), Real(-0.1), Real(0.3)}); +} + +TEST(SerendipityBasis, Wedge15IsNodalAndPartitionsUnity) { + SerendipityBasis basis(ElementType::Wedge15, 2); + + EXPECT_EQ(basis.size(), 15u); + expect_nodal_delta(basis, + reference_nodes(ElementType::Wedge15, basis.size()), + Real(1e-9)); + expect_partition_of_unity(basis, {Real(0.2), Real(0.3), Real(0.1)}); +} + +TEST(SerendipityBasis, Pyramid13IsNodalAndPartitionsUnity) { + SerendipityBasis basis(ElementType::Pyramid13, 2); + + EXPECT_EQ(basis.size(), 13u); + expect_nodal_delta(basis, + reference_nodes(ElementType::Pyramid13, basis.size()), + Real(1e-8)); + expect_partition_of_unity(basis, {Real(0.1), Real(-0.2), Real(0.4)}); +} + +TEST(SerendipityBasis, RejectsUnsupportedSerendipityAliases) { + EXPECT_THROW(SerendipityBasis(ElementType::Quad9, 2), FEException); + EXPECT_THROW(SerendipityBasis(ElementType::Pyramid14, 2), FEException); + EXPECT_THROW(SerendipityBasis(ElementType::Quad8, 3), FEException); +} + diff --git a/tests/unitTests/FE/Math/test_DenseLinearAlgebra.cpp b/tests/unitTests/FE/Math/test_DenseLinearAlgebra.cpp new file mode 100644 index 000000000..2b44ad2bf --- /dev/null +++ b/tests/unitTests/FE/Math/test_DenseLinearAlgebra.cpp @@ -0,0 +1,265 @@ +/** + * @file test_DenseLinearAlgebra.cpp + * @brief Tests for shared dense linear algebra utilities. + */ + +#include + +#include "FE/Common/FEException.h" +#include "FE/Math/DenseLinearAlgebra.h" + +#include +#include +#include + +using namespace svmp::FE; +using namespace svmp::FE::math; + +namespace { + +Real multiply_entry(const std::vector& A, + const std::vector& B, + std::size_t n, + std::size_t row, + std::size_t col) { + Real sum = Real(0); + for (std::size_t k = 0; k < n; ++k) { + sum += A[row * n + k] * B[k * n + col]; + } + return sum; +} + +} // namespace + +TEST(DenseLinearAlgebra, InvertsScaledMatrix) { + const std::vector A{ + Real(1.0e9), Real(2.0e6), + Real(3.0e3), Real(4.0) + }; + + const auto inv = invert_dense_matrix(A, 2u, "scaled 2x2"); + for (std::size_t row = 0; row < 2u; ++row) { + for (std::size_t col = 0; col < 2u; ++col) { + const Real expected = (row == col) ? Real(1) : Real(0); + EXPECT_NEAR(multiply_entry(A, inv, 2u, row, col), expected, Real(1.0e-10)); + } + } +} + +TEST(DenseLinearAlgebra, FactorizationSolvesMultipleRightHandSides) { + const std::vector A{ + Real(4), Real(2), Real(0), + Real(2), Real(5), Real(1), + Real(0), Real(1), Real(3) + }; + + const auto solver = factor_dense_matrix(A, 3u, "symmetric 3x3"); + EXPECT_EQ(solver.diagnostics.rank, 3u); + + const std::vector rhs{Real(2), Real(4), Real(6)}; + const auto x = solver.solve(std::span(rhs.data(), rhs.size())); + ASSERT_EQ(x.size(), 3u); + + for (std::size_t row = 0; row < 3u; ++row) { + Real ax = Real(0); + for (std::size_t col = 0; col < 3u; ++col) { + ax += A[row * 3u + col] * x[col]; + } + EXPECT_NEAR(ax, rhs[row], Real(1.0e-12)); + } + + std::vector second_rhs{Real(1), Real(-2), Real(0.5)}; + const auto original_second_rhs = second_rhs; + solver.solve_in_place(std::span(second_rhs.data(), second_rhs.size())); + for (std::size_t row = 0; row < 3u; ++row) { + Real ax = Real(0); + for (std::size_t col = 0; col < 3u; ++col) { + ax += A[row * 3u + col] * second_rhs[col]; + } + EXPECT_NEAR(ax, original_second_rhs[row], Real(1.0e-12)); + } +} + +TEST(DenseLinearAlgebra, FactorizationSolvesDenseRightHandSideBlock) { + const std::vector A{ + Real(4), Real(2), Real(0), + Real(2), Real(5), Real(1), + Real(0), Real(1), Real(3) + }; + + const auto solver = factor_dense_matrix(A, 3u, "symmetric 3x3 block"); + + std::vector rhs{ + Real(2), Real(1), + Real(4), Real(-2), + Real(6), Real(0.5) + }; + const auto original_rhs = rhs; + solver.solve_in_place(std::span(rhs.data(), rhs.size()), 2u); + + for (std::size_t rhs_col = 0; rhs_col < 2u; ++rhs_col) { + for (std::size_t row = 0; row < 3u; ++row) { + Real ax = Real(0); + for (std::size_t col = 0; col < 3u; ++col) { + ax += A[row * 3u + col] * rhs[col * 2u + rhs_col]; + } + EXPECT_NEAR(ax, original_rhs[row * 2u + rhs_col], Real(1.0e-12)); + } + } +} + +TEST(DenseLinearAlgebra, HighConditionInverseUsesSvdFallback) { + const std::vector high_condition{ + Real(1), Real(0), + Real(0), Real(1.0e-13) + }; + + const auto result = + invert_dense_matrix_with_diagnostics(high_condition, 2u, "high-condition diagonal"); + EXPECT_EQ(result.diagnostics.rank, 2u); +#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN + EXPECT_GT(result.diagnostics.condition_estimate, + dense_matrix_condition_fallback_threshold()); + EXPECT_TRUE(result.used_svd_fallback); +#else + EXPECT_FALSE(result.used_svd_fallback); +#endif + + for (std::size_t row = 0; row < 2u; ++row) { + for (std::size_t col = 0; col < 2u; ++col) { + const Real expected = (row == col) ? Real(1) : Real(0); + EXPECT_NEAR(multiply_entry(high_condition, result.inverse, 2u, row, col), + expected, + Real(1.0e-12)); + } + } +} + +TEST(DenseLinearAlgebra, DiagnosticValidationRejectsUnsupportedCondition) { +#if !(defined(FE_HAS_EIGEN) && FE_HAS_EIGEN) + GTEST_SKIP() << "condition rejection requires FE_ENABLE_EIGEN diagnostics"; +#endif + DenseInverseResult result; + result.diagnostics.rank = 2u; + result.diagnostics.condition_estimate = + dense_matrix_condition_error_threshold() * Real(10); + + EXPECT_GT(result.diagnostics.condition_estimate, + dense_matrix_condition_error_threshold()); + EXPECT_THROW(validate_dense_inverse_diagnostics( + result, 2u, "excessive-condition diagonal"), + FEException); +} + +TEST(DenseLinearAlgebra, ThrowsForScaleAwareSingularPivot) { + const std::vector singular{ + Real(1.0e12), Real(2.0e12), + Real(0.5e12), Real(1.0e12) + }; + + EXPECT_THROW((void)invert_dense_matrix(singular, 2u, "singular 2x2"), + FEException); +} + +TEST(DenseLinearAlgebra, FactorizationThrowsForRankDeficientMatrix) { + const std::vector singular{ + Real(1), Real(2), + Real(2), Real(4) + }; + + EXPECT_THROW((void)factor_dense_matrix(singular, 2u, "rank-one 2x2"), + FEException); +} + +TEST(DenseLinearAlgebra, RankUsesScaleAwareTolerance) { + const std::vector rank_one{ + Real(1.0e8), Real(2.0e8), + Real(3.0e8), Real(6.0e8) + }; + EXPECT_EQ(dense_matrix_rank(rank_one, 2u, 2u), 1u); + + const std::vector full_rank{ + Real(1.0e8), Real(2.0e8), + Real(3.0e8), Real(6.1e8) + }; + EXPECT_EQ(dense_matrix_rank(full_rank, 2u, 2u), 2u); +} + +TEST(DenseLinearAlgebra, DiagnosticsReportRankAndConditionEstimate) { + const std::vector diagonal{ + Real(4), Real(0), + Real(0), Real(0.5) + }; + const auto full = + dense_matrix_diagnostics(diagonal, 2u, 2u, "diagonal 2x2"); + EXPECT_EQ(full.rank, 2u); +#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN + EXPECT_NEAR(full.largest_singular_value, Real(4), Real(1.0e-14)); + EXPECT_NEAR(full.smallest_retained_singular_value, Real(0.5), Real(1.0e-14)); + EXPECT_NEAR(full.condition_estimate, Real(8), Real(1.0e-14)); +#else + EXPECT_TRUE(std::isinf(full.condition_estimate)); +#endif + + const std::vector rank_one{ + Real(1), Real(2), + Real(2), Real(4) + }; + const auto deficient = + dense_matrix_diagnostics(rank_one, 2u, 2u, "rank-one 2x2"); + EXPECT_EQ(deficient.rank, 1u); + EXPECT_TRUE(std::isinf(deficient.condition_estimate)); +} + +TEST(DenseLinearAlgebra, PseudoInverseHandlesSingularMatrixWithoutNormalEquations) { +#if !(defined(FE_HAS_EIGEN) && FE_HAS_EIGEN) + GTEST_SKIP() << "rank-revealing pseudo-inverse requires FE_ENABLE_EIGEN"; +#endif + const std::vector rank_one{ + Real(1), Real(2), + Real(2), Real(4) + }; + + const auto pinv = + rank_revealing_pseudo_inverse(rank_one, 2u, 2u, "rank-one 2x2"); + EXPECT_EQ(pinv.rank, 1u); + EXPECT_NEAR(pinv.inverse[0], Real(0.04), Real(1.0e-13)); + EXPECT_NEAR(pinv.inverse[1], Real(0.08), Real(1.0e-13)); + EXPECT_NEAR(pinv.inverse[2], Real(0.08), Real(1.0e-13)); + EXPECT_NEAR(pinv.inverse[3], Real(0.16), Real(1.0e-13)); + + std::vector projection(4u, Real(0)); + for (std::size_t row = 0; row < 2u; ++row) { + for (std::size_t col = 0; col < 2u; ++col) { + for (std::size_t a = 0; a < 2u; ++a) { + for (std::size_t b = 0; b < 2u; ++b) { + projection[row * 2u + col] += + rank_one[row * 2u + a] * pinv.inverse[a * 2u + b] * + rank_one[b * 2u + col]; + } + } + EXPECT_NEAR(projection[row * 2u + col], + rank_one[row * 2u + col], + Real(1.0e-12)); + } + } +} + +TEST(DenseLinearAlgebra, PseudoInverseDropsNearZeroSingularValues) { +#if !(defined(FE_HAS_EIGEN) && FE_HAS_EIGEN) + GTEST_SKIP() << "rank-revealing pseudo-inverse requires FE_ENABLE_EIGEN"; +#endif + const std::vector near_singular{ + Real(1), Real(0), + Real(0), Real(1.0e-18) + }; + + const auto pinv = + rank_revealing_pseudo_inverse(near_singular, 2u, 2u, "near-singular 2x2"); + EXPECT_EQ(pinv.rank, 1u); + EXPECT_GT(pinv.tolerance, Real(1.0e-18)); + EXPECT_NEAR(pinv.inverse[0], Real(1), Real(1.0e-14)); + EXPECT_NEAR(pinv.inverse[1], Real(0), Real(1.0e-14)); + EXPECT_NEAR(pinv.inverse[2], Real(0), Real(1.0e-14)); + EXPECT_NEAR(pinv.inverse[3], Real(0), Real(1.0e-14)); +} diff --git a/tests/unitTests/FE/Math/test_ExpressionOps.cpp b/tests/unitTests/FE/Math/test_ExpressionOps.cpp new file mode 100644 index 000000000..307b308a1 --- /dev/null +++ b/tests/unitTests/FE/Math/test_ExpressionOps.cpp @@ -0,0 +1,509 @@ +/** + * @file test_ExpressionOps.cpp + * @brief Unit tests for ExpressionOps.h - expression template operators + */ + +#include +#include "FE/Math/ExpressionOps.h" +#include "FE/Math/Vector.h" +#include "FE/Math/Matrix.h" +#include "FE/Math/MathConstants.h" +#include +#include +#include +#include + +using namespace svmp::FE::math; +using namespace svmp::FE::math::detail::ops; + +// Test fixture for ExpressionOps tests +class ExpressionOpsTest : public ::testing::Test { +protected: + static constexpr double tolerance = 1e-14; + + void SetUp() override {} + void TearDown() override {} + + template + bool approx_equal(T a, T b, T tol = tolerance) { + return std::abs(a - b) <= tol; + } +}; + +// ============================================================================= +// Binary Operation Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, AddOperator) { + Add op; + + // Integer addition + EXPECT_EQ(op(5, 3), 8); + EXPECT_EQ(op(-5, 3), -2); + EXPECT_EQ(op(-5, -3), -8); + + // Floating point addition + EXPECT_DOUBLE_EQ(op(3.14, 2.86), 6.0); + EXPECT_DOUBLE_EQ(op(-1.5, 2.5), 1.0); + + // Mixed types + auto result = op(3, 2.5); + EXPECT_TRUE((std::is_same_v)); + EXPECT_DOUBLE_EQ(result, 5.5); +} + +TEST_F(ExpressionOpsTest, SubOperator) { + Sub op; + + // Integer subtraction + EXPECT_EQ(op(5, 3), 2); + EXPECT_EQ(op(3, 5), -2); + EXPECT_EQ(op(-5, -3), -2); + + // Floating point subtraction + EXPECT_DOUBLE_EQ(op(5.5, 2.5), 3.0); + EXPECT_DOUBLE_EQ(op(2.5, 5.5), -3.0); + + // Mixed types + auto result = op(5.5, 2); + EXPECT_TRUE((std::is_same_v)); + EXPECT_DOUBLE_EQ(result, 3.5); +} + +TEST_F(ExpressionOpsTest, MulOperator) { + Mul op; + + // Integer multiplication + EXPECT_EQ(op(5, 3), 15); + EXPECT_EQ(op(-5, 3), -15); + EXPECT_EQ(op(-5, -3), 15); + + // Floating point multiplication + EXPECT_DOUBLE_EQ(op(2.5, 4.0), 10.0); + EXPECT_DOUBLE_EQ(op(-2.5, 4.0), -10.0); + + // Zero multiplication + EXPECT_EQ(op(0, 100), 0); + EXPECT_DOUBLE_EQ(op(0.0, 3.14), 0.0); + + // Mixed types + auto result = op(3, 2.5); + EXPECT_TRUE((std::is_same_v)); + EXPECT_DOUBLE_EQ(result, 7.5); +} + +TEST_F(ExpressionOpsTest, DivOperator) { + Div op; + + // Integer division + EXPECT_EQ(op(10, 2), 5); + EXPECT_EQ(op(10, 3), 3); // Integer division truncates + EXPECT_EQ(op(-10, 2), -5); + + // Floating point division + EXPECT_DOUBLE_EQ(op(10.0, 2.0), 5.0); + EXPECT_DOUBLE_EQ(op(10.0, 3.0), 10.0/3.0); + EXPECT_DOUBLE_EQ(op(-10.0, 2.0), -5.0); + + // Mixed types + auto result = op(10.0, 3); + EXPECT_TRUE((std::is_same_v)); + EXPECT_DOUBLE_EQ(result, 10.0/3.0); +} + +// ============================================================================= +// Unary Operation Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, NegateOperator) { + Negate op; + + // Integer negation + EXPECT_EQ(op(5), -5); + EXPECT_EQ(op(-5), 5); + EXPECT_EQ(op(0), 0); + + // Floating point negation + EXPECT_DOUBLE_EQ(op(3.14), -3.14); + EXPECT_DOUBLE_EQ(op(-2.71), 2.71); + EXPECT_DOUBLE_EQ(op(0.0), 0.0); + + // Type preservation + auto int_result = op(5); + EXPECT_TRUE((std::is_same_v)); + + auto double_result = op(5.0); + EXPECT_TRUE((std::is_same_v)); +} + +TEST_F(ExpressionOpsTest, AbsOperator) { + Abs op; + + // Integer absolute value + EXPECT_EQ(op(5), 5); + EXPECT_EQ(op(-5), 5); + EXPECT_EQ(op(0), 0); + + // Floating point absolute value + EXPECT_DOUBLE_EQ(op(3.14), 3.14); + EXPECT_DOUBLE_EQ(op(-3.14), 3.14); + EXPECT_DOUBLE_EQ(op(0.0), 0.0); + + // Special cases + EXPECT_DOUBLE_EQ(op(-0.0), 0.0); + + // Type preservation + auto int_result = op(-5); + EXPECT_TRUE((std::is_same_v)); + + auto double_result = op(-5.0); + EXPECT_TRUE((std::is_same_v)); +} + +TEST_F(ExpressionOpsTest, SqrtOperator) { + Sqrt op; + + // Perfect squares + EXPECT_DOUBLE_EQ(op(4.0), 2.0); + EXPECT_DOUBLE_EQ(op(9.0), 3.0); + EXPECT_DOUBLE_EQ(op(16.0), 4.0); + EXPECT_DOUBLE_EQ(op(25.0), 5.0); + + // Non-perfect squares + EXPECT_DOUBLE_EQ(op(2.0), std::sqrt(2.0)); + EXPECT_DOUBLE_EQ(op(3.0), std::sqrt(3.0)); + + // Special cases + EXPECT_DOUBLE_EQ(op(0.0), 0.0); + EXPECT_DOUBLE_EQ(op(1.0), 1.0); + + // Type conversion + auto result = op(4); // Integer input + EXPECT_DOUBLE_EQ(result, 2.0); +} + +// ============================================================================= +// Constexpr Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, ConstexprOperators) { + // Test that operators can be used in constexpr contexts + constexpr Add add_op; + constexpr Sub sub_op; + constexpr Mul mul_op; + constexpr Div div_op; + constexpr Negate neg_op; + + // Compile-time evaluation + constexpr auto sum = add_op(3, 4); + constexpr auto diff = sub_op(7, 3); + constexpr auto prod = mul_op(3, 4); + constexpr auto quot = div_op(12, 3); + constexpr auto neg = neg_op(5); + + EXPECT_EQ(sum, 7); + EXPECT_EQ(diff, 4); + EXPECT_EQ(prod, 12); + EXPECT_EQ(quot, 4); + EXPECT_EQ(neg, -5); + + // Static assertions to verify compile-time evaluation + static_assert(add_op(2, 3) == 5); + static_assert(sub_op(5, 2) == 3); + static_assert(mul_op(3, 4) == 12); + static_assert(div_op(10, 2) == 5); + static_assert(neg_op(3) == -3); +} + +// ============================================================================= +// Type Deduction Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, TypeDeduction) { + Add add_op; + Sub sub_op; + Mul mul_op; + Div div_op; + + // int + int -> int + auto int_result = add_op(3, 4); + EXPECT_TRUE((std::is_same_v)); + + // double + double -> double + auto double_result = add_op(3.0, 4.0); + EXPECT_TRUE((std::is_same_v)); + + // int + double -> double + auto mixed_result1 = add_op(3, 4.0); + EXPECT_TRUE((std::is_same_v)); + + // double + int -> double + auto mixed_result2 = add_op(3.0, 4); + EXPECT_TRUE((std::is_same_v)); + + // float + double -> double + auto float_double_result = add_op(3.0f, 4.0); + EXPECT_TRUE((std::is_same_v)); +} + +// ============================================================================= +// Complex Expression Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, ChainedOperations) { + Add add_op; + Sub sub_op; + Mul mul_op; + Div div_op; + Negate neg_op; + + // Simulate complex expression: -(a + b) * c / d + double a = 2.0, b = 3.0, c = 4.0, d = 2.0; + + auto sum = add_op(a, b); // 5.0 + auto negated = neg_op(sum); // -5.0 + auto product = mul_op(negated, c); // -20.0 + auto result = div_op(product, d); // -10.0 + + EXPECT_DOUBLE_EQ(result, -10.0); +} + +TEST_F(ExpressionOpsTest, MixedPrecisionChain) { + Add add_op; + Mul mul_op; + + // Mixed precision chain + int a = 2; + float b = 3.5f; + double c = 1.5; + + auto step1 = add_op(a, b); // int + float -> float (5.5f) + auto step2 = mul_op(step1, c); // float + double -> double (8.25) + + EXPECT_TRUE((std::is_same_v)); + EXPECT_DOUBLE_EQ(step2, 8.25); +} + +// ============================================================================= +// Operator Integration with Vector/Matrix Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, VectorIntegration) { + Vector v1{1.0, 2.0, 3.0}; + Vector v2{4.0, 5.0, 6.0}; + + // Test that operators work correctly in vector expressions + Vector sum = v1 + v2; + Vector diff = v1 - v2; + Vector neg = -v1; + Vector scaled = v1 * 2.0; + + EXPECT_DOUBLE_EQ(sum[0], 5.0); + EXPECT_DOUBLE_EQ(sum[1], 7.0); + EXPECT_DOUBLE_EQ(sum[2], 9.0); + + EXPECT_DOUBLE_EQ(diff[0], -3.0); + EXPECT_DOUBLE_EQ(diff[1], -3.0); + EXPECT_DOUBLE_EQ(diff[2], -3.0); + + EXPECT_DOUBLE_EQ(neg[0], -1.0); + EXPECT_DOUBLE_EQ(neg[1], -2.0); + EXPECT_DOUBLE_EQ(neg[2], -3.0); + + EXPECT_DOUBLE_EQ(scaled[0], 2.0); + EXPECT_DOUBLE_EQ(scaled[1], 4.0); + EXPECT_DOUBLE_EQ(scaled[2], 6.0); +} + +TEST_F(ExpressionOpsTest, MatrixIntegration) { + Matrix m1{{1.0, 2.0}, {3.0, 4.0}}; + Matrix m2{{5.0, 6.0}, {7.0, 8.0}}; + + // Test that operators work correctly in matrix expressions + Matrix sum = m1 + m2; + Matrix diff = m1 - m2; + Matrix neg = -m1; + Matrix scaled = m1 * 2.0; + + EXPECT_DOUBLE_EQ(sum(0, 0), 6.0); + EXPECT_DOUBLE_EQ(sum(0, 1), 8.0); + EXPECT_DOUBLE_EQ(sum(1, 0), 10.0); + EXPECT_DOUBLE_EQ(sum(1, 1), 12.0); + + EXPECT_DOUBLE_EQ(diff(0, 0), -4.0); + EXPECT_DOUBLE_EQ(diff(0, 1), -4.0); + EXPECT_DOUBLE_EQ(diff(1, 0), -4.0); + EXPECT_DOUBLE_EQ(diff(1, 1), -4.0); + + EXPECT_DOUBLE_EQ(neg(0, 0), -1.0); + EXPECT_DOUBLE_EQ(neg(0, 1), -2.0); + EXPECT_DOUBLE_EQ(neg(1, 0), -3.0); + EXPECT_DOUBLE_EQ(neg(1, 1), -4.0); + + EXPECT_DOUBLE_EQ(scaled(0, 0), 2.0); + EXPECT_DOUBLE_EQ(scaled(0, 1), 4.0); + EXPECT_DOUBLE_EQ(scaled(1, 0), 6.0); + EXPECT_DOUBLE_EQ(scaled(1, 1), 8.0); +} + +// ============================================================================= +// Edge Cases and Special Values Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, SpecialFloatingPointValues) { + Add add_op; + Sub sub_op; + Mul mul_op; + Div div_op; + Abs abs_op; + Negate neg_op; + + // Infinity handling + double inf = std::numeric_limits::infinity(); + EXPECT_DOUBLE_EQ(add_op(inf, 1.0), inf); + EXPECT_DOUBLE_EQ(sub_op(inf, 1.0), inf); + EXPECT_DOUBLE_EQ(mul_op(inf, 2.0), inf); + EXPECT_DOUBLE_EQ(div_op(inf, 2.0), inf); + EXPECT_DOUBLE_EQ(abs_op(inf), inf); + EXPECT_DOUBLE_EQ(neg_op(inf), -inf); + + // NaN handling + double nan = std::numeric_limits::quiet_NaN(); + EXPECT_TRUE(std::isnan(add_op(nan, 1.0))); + EXPECT_TRUE(std::isnan(sub_op(nan, 1.0))); + EXPECT_TRUE(std::isnan(mul_op(nan, 2.0))); + EXPECT_TRUE(std::isnan(div_op(nan, 2.0))); + EXPECT_TRUE(std::isnan(abs_op(nan))); + EXPECT_TRUE(std::isnan(neg_op(nan))); + + // Division by zero + EXPECT_DOUBLE_EQ(div_op(1.0, 0.0), inf); + EXPECT_DOUBLE_EQ(div_op(-1.0, 0.0), -inf); + EXPECT_TRUE(std::isnan(div_op(0.0, 0.0))); +} + +TEST_F(ExpressionOpsTest, LargeAndSmallValues) { + Add add_op; + Mul mul_op; + + // Large values + double large = 1e308; + double result = add_op(large, large); + EXPECT_TRUE(std::isinf(result)); // Overflow to infinity + + // Small values + double tiny = std::numeric_limits::min(); + double tiny_result = mul_op(tiny, 0.5); + EXPECT_GT(tiny_result, 0.0); // Should still be positive + EXPECT_LT(tiny_result, tiny); // But smaller + + // Denormalized numbers + double denorm = std::numeric_limits::denorm_min(); + double denorm_result = add_op(denorm, denorm); + EXPECT_EQ(denorm_result, 2.0 * denorm); +} + +// ============================================================================= +// SFINAE and Compile-time Constraint Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, SFINAECompatibility) { + // Test that operators work with any arithmetic types + Add add_op; + + // Various integer types + EXPECT_EQ(add_op(int8_t(3), int8_t(4)), 7); + EXPECT_EQ(add_op(int16_t(100), int16_t(200)), 300); + EXPECT_EQ(add_op(int32_t(1000), int32_t(2000)), 3000); + EXPECT_EQ(add_op(int64_t(10000), int64_t(20000)), 30000); + + // Unsigned types + EXPECT_EQ(add_op(uint8_t(3), uint8_t(4)), 7u); + EXPECT_EQ(add_op(uint16_t(100), uint16_t(200)), 300u); + EXPECT_EQ(add_op(uint32_t(1000), uint32_t(2000)), 3000u); + + // Floating point types + EXPECT_FLOAT_EQ(add_op(3.0f, 4.0f), 7.0f); + EXPECT_DOUBLE_EQ(add_op(3.0, 4.0), 7.0); + + // Long double + long double ld1 = 3.0L; + long double ld2 = 4.0L; + EXPECT_DOUBLE_EQ(add_op(ld1, ld2), 7.0L); +} + +// ============================================================================= +// Template Instantiation Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, TemplateInstantiations) { + // Test that operators can be instantiated with various types + Add add_op; + Sub sub_op; + Mul mul_op; + Div div_op; + Abs abs_op; + Sqrt sqrt_op; + Negate neg_op; + + // Custom types that support arithmetic operations + struct CustomNumber { + double value; + CustomNumber(double v) : value(v) {} + CustomNumber operator+(const CustomNumber& other) const { return CustomNumber(value + other.value); } + CustomNumber operator-(const CustomNumber& other) const { return CustomNumber(value - other.value); } + CustomNumber operator*(const CustomNumber& other) const { return CustomNumber(value * other.value); } + CustomNumber operator/(const CustomNumber& other) const { return CustomNumber(value / other.value); } + CustomNumber operator-() const { return CustomNumber(-value); } + bool operator==(const CustomNumber& other) const { return value == other.value; } + }; + + CustomNumber cn1(3.0); + CustomNumber cn2(4.0); + + auto cn_sum = add_op(cn1, cn2); + EXPECT_EQ(cn_sum.value, 7.0); + + auto cn_diff = sub_op(cn1, cn2); + EXPECT_EQ(cn_diff.value, -1.0); + + auto cn_prod = mul_op(cn1, cn2); + EXPECT_EQ(cn_prod.value, 12.0); + + auto cn_quot = div_op(cn1, cn2); + EXPECT_EQ(cn_quot.value, 0.75); + + auto cn_neg = neg_op(cn1); + EXPECT_EQ(cn_neg.value, -3.0); +} + +// ============================================================================= +// Complex Number Support Tests +// ============================================================================= + +TEST_F(ExpressionOpsTest, ComplexNumberSupport) { + Add add_op; + Sub sub_op; + Mul mul_op; + Div div_op; + Negate neg_op; + + std::complex c1(3.0, 4.0); + std::complex c2(1.0, 2.0); + + auto c_sum = add_op(c1, c2); + EXPECT_DOUBLE_EQ(c_sum.real(), 4.0); + EXPECT_DOUBLE_EQ(c_sum.imag(), 6.0); + + auto c_diff = sub_op(c1, c2); + EXPECT_DOUBLE_EQ(c_diff.real(), 2.0); + EXPECT_DOUBLE_EQ(c_diff.imag(), 2.0); + + auto c_prod = mul_op(c1, c2); + EXPECT_DOUBLE_EQ(c_prod.real(), -5.0); // (3+4i)(1+2i) = 3+6i+4i+8i² = 3+10i-8 = -5+10i + EXPECT_DOUBLE_EQ(c_prod.imag(), 10.0); + + auto c_neg = neg_op(c1); + EXPECT_DOUBLE_EQ(c_neg.real(), -3.0); + EXPECT_DOUBLE_EQ(c_neg.imag(), -4.0); +} diff --git a/tests/unitTests/FE/Math/test_MathConstants.cpp b/tests/unitTests/FE/Math/test_MathConstants.cpp new file mode 100644 index 000000000..5619690ed --- /dev/null +++ b/tests/unitTests/FE/Math/test_MathConstants.cpp @@ -0,0 +1,341 @@ +/** + * @file test_MathConstants.cpp + * @brief Unit tests for MathConstants.h - mathematical constants and tolerances + */ + +#include +#include "FE/Math/MathConstants.h" +#include +#include +#include + +using namespace svmp::FE::math; + +// Test fixture for MathConstants tests +class MathConstantsTest : public ::testing::Test { +protected: + void SetUp() override {} + void TearDown() override {} +}; + +// ============================================================================= +// Mathematical Constants Tests +// ============================================================================= + +TEST_F(MathConstantsTest, PiConstants) { + // Test PI value + EXPECT_NEAR(constants::PI, 3.14159265358979323846, 1e-15); + + // Test PI/2 + EXPECT_NEAR(constants::PI_2, constants::PI / 2.0, 1e-15); + + // Test PI/4 + EXPECT_NEAR(constants::PI_4, constants::PI / 4.0, 1e-15); + + // Test 2*PI + EXPECT_NEAR(constants::TWO_PI, 2.0 * constants::PI, 1e-15); + + // Test 1/PI + EXPECT_NEAR(constants::INV_PI, 1.0 / constants::PI, 1e-15); + + // Test sqrt(PI) + EXPECT_NEAR(constants::SQRT_PI, std::sqrt(constants::PI), 1e-15); +} + +TEST_F(MathConstantsTest, EulerConstant) { + // Test e (Euler's number) + EXPECT_NEAR(constants::E, std::exp(1.0), 1e-15); + + // Test ln(2) + EXPECT_NEAR(constants::LN_2, std::log(2.0), 1e-15); + + // Test ln(10) + EXPECT_NEAR(constants::LN_10, std::log(10.0), 1e-15); + + // Test log10(e) + EXPECT_NEAR(constants::LOG10_E, std::log10(constants::E), 1e-15); + + // Test log2(e) + EXPECT_NEAR(constants::LOG2_E, std::log2(constants::E), 1e-15); +} + +TEST_F(MathConstantsTest, SquareRootConstants) { + // Test sqrt(2) + EXPECT_NEAR(constants::SQRT_2, std::sqrt(2.0), 1e-15); + + // Test sqrt(3) + EXPECT_NEAR(constants::SQRT_3, std::sqrt(3.0), 1e-15); + + // Test sqrt(5) + EXPECT_NEAR(constants::SQRT_5, std::sqrt(5.0), 1e-15); + + // Test 1/sqrt(2) + EXPECT_NEAR(constants::INV_SQRT_2, 1.0 / std::sqrt(2.0), 1e-15); + + // Test 1/sqrt(3) + EXPECT_NEAR(constants::INV_SQRT_3, 1.0 / std::sqrt(3.0), 1e-15); +} + +TEST_F(MathConstantsTest, GoldenRatio) { + // Test golden ratio φ = (1 + sqrt(5))/2 + EXPECT_NEAR(constants::PHI, (1.0 + std::sqrt(5.0)) / 2.0, 1e-15); + + // Property: φ² = φ + 1 + EXPECT_NEAR(constants::PHI * constants::PHI, constants::PHI + 1.0, 1e-14); + + // Property: 1/φ = φ - 1 + EXPECT_NEAR(1.0 / constants::PHI, constants::PHI - 1.0, 1e-14); +} + +// ============================================================================= +// Angle Conversion Tests +// ============================================================================= + +TEST_F(MathConstantsTest, DegreesToRadians) { + // Test common conversions + EXPECT_NEAR(constants::deg_to_rad(0.0), 0.0, 1e-15); + EXPECT_NEAR(constants::deg_to_rad(90.0), constants::PI_2, 1e-15); + EXPECT_NEAR(constants::deg_to_rad(180.0), constants::PI, 1e-15); + EXPECT_NEAR(constants::deg_to_rad(270.0), 3.0 * constants::PI_2, 1e-15); + EXPECT_NEAR(constants::deg_to_rad(360.0), constants::TWO_PI, 1e-15); + + // Test negative angles + EXPECT_NEAR(constants::deg_to_rad(-90.0), -constants::PI_2, 1e-15); + EXPECT_NEAR(constants::deg_to_rad(-180.0), -constants::PI, 1e-15); + + // Test arbitrary angle + EXPECT_NEAR(constants::deg_to_rad(45.0), constants::PI_4, 1e-15); + EXPECT_NEAR(constants::deg_to_rad(30.0), constants::PI / 6.0, 1e-15); + EXPECT_NEAR(constants::deg_to_rad(60.0), constants::PI / 3.0, 1e-15); +} + +TEST_F(MathConstantsTest, RadiansToDegrees) { + // Test common conversions + EXPECT_NEAR(constants::rad_to_deg(0.0), 0.0, 1e-13); + EXPECT_NEAR(constants::rad_to_deg(constants::PI_2), 90.0, 1e-13); + EXPECT_NEAR(constants::rad_to_deg(constants::PI), 180.0, 1e-13); + EXPECT_NEAR(constants::rad_to_deg(constants::TWO_PI), 360.0, 1e-13); + + // Test negative angles + EXPECT_NEAR(constants::rad_to_deg(-constants::PI), -180.0, 1e-13); + + // Test round-trip conversion + double angle_deg = 123.456; + double angle_rad = constants::deg_to_rad(angle_deg); + double back_to_deg = constants::rad_to_deg(angle_rad); + EXPECT_NEAR(back_to_deg, angle_deg, 1e-13); +} + +// ============================================================================= +// Machine Precision Tests +// ============================================================================= + +TEST_F(MathConstantsTest, MachineEpsilon) { + // Test double precision epsilon + EXPECT_EQ(constants::EPSILON, std::numeric_limits::epsilon()); + + // Test float precision epsilon + EXPECT_EQ(constants::EPSILON_F, std::numeric_limits::epsilon()); + + // Verify epsilon is the smallest value such that 1.0 + epsilon != 1.0 + double one_plus_eps = 1.0 + constants::EPSILON; + double one_plus_half_eps = 1.0 + constants::EPSILON / 2.0; + + EXPECT_NE(one_plus_eps, 1.0); + EXPECT_EQ(one_plus_half_eps, 1.0); +} + +TEST_F(MathConstantsTest, NumericalLimits) { + // Test infinity + EXPECT_TRUE(std::isinf(constants::INF_VALUE)); + EXPECT_GT(constants::INF_VALUE, std::numeric_limits::max()); + + // Test NaN + EXPECT_TRUE(std::isnan(constants::NOT_A_NUMBER)); + EXPECT_NE(constants::NOT_A_NUMBER, constants::NOT_A_NUMBER); // NaN != NaN + + // Test max/min values + EXPECT_EQ(constants::MAX_DOUBLE, std::numeric_limits::max()); + EXPECT_EQ(constants::MIN_DOUBLE, std::numeric_limits::min()); + EXPECT_EQ(constants::LOWEST_DOUBLE, std::numeric_limits::lowest()); +} + +// ============================================================================= +// Tolerance Tests +// ============================================================================= + +TEST_F(MathConstantsTest, DefaultTolerances) { + // Test default absolute tolerance + EXPECT_GT(constants::DEFAULT_TOLERANCE, 0.0); + EXPECT_LT(constants::DEFAULT_TOLERANCE, 1e-10); + + // Test default relative tolerance + EXPECT_GT(constants::DEFAULT_REL_TOLERANCE, 0.0); + EXPECT_LT(constants::DEFAULT_REL_TOLERANCE, 1e-10); + + // Test solver tolerance + EXPECT_GT(constants::SOLVER_TOLERANCE, 0.0); + EXPECT_LE(constants::SOLVER_TOLERANCE, constants::DEFAULT_TOLERANCE); + + // Test geometry tolerance (typically larger) + EXPECT_GT(constants::GEOMETRY_TOLERANCE, 0.0); + EXPECT_GE(constants::GEOMETRY_TOLERANCE, constants::DEFAULT_TOLERANCE); +} + +TEST_F(MathConstantsTest, ToleranceComparison) { + double a = 1.0; + double b = 1.0 + constants::DEFAULT_TOLERANCE / 2.0; + double c = 1.0 + constants::DEFAULT_TOLERANCE * 2.0; + + // Values within tolerance should be considered equal + EXPECT_TRUE(constants::near(a, b, constants::DEFAULT_TOLERANCE)); + + // Values outside tolerance should not be equal + EXPECT_FALSE(constants::near(a, c, constants::DEFAULT_TOLERANCE)); + + // Test relative tolerance + double large_a = 1e10; + double large_b = large_a * (1.0 + constants::DEFAULT_REL_TOLERANCE / 2.0); + double large_c = large_a * (1.0 + constants::DEFAULT_REL_TOLERANCE * 2.0); + + EXPECT_TRUE(constants::near_relative(large_a, large_b, constants::DEFAULT_REL_TOLERANCE)); + EXPECT_FALSE(constants::near_relative(large_a, large_c, constants::DEFAULT_REL_TOLERANCE)); +} + +TEST_F(MathConstantsTest, ZeroComparison) { + // Test near zero detection + EXPECT_TRUE(constants::is_zero(0.0)); + EXPECT_TRUE(constants::is_zero(constants::DEFAULT_TOLERANCE / 2.0)); + EXPECT_FALSE(constants::is_zero(constants::DEFAULT_TOLERANCE * 2.0)); + + // Test with negative values + EXPECT_TRUE(constants::is_zero(-constants::DEFAULT_TOLERANCE / 2.0)); + EXPECT_FALSE(constants::is_zero(-constants::DEFAULT_TOLERANCE * 2.0)); +} + +// ============================================================================= +// Physical Constants Tests +// ============================================================================= + +TEST_F(MathConstantsTest, PhysicalConstants) { + // Test speed of light (m/s) + EXPECT_NEAR(constants::SPEED_OF_LIGHT, 299792458.0, 1.0); + + // Test gravitational constant (m³/kg/s²) + EXPECT_NEAR(constants::GRAVITATIONAL_CONSTANT, 6.67430e-11, 1e-16); + + // Test standard gravity (m/s²) + EXPECT_NEAR(constants::STANDARD_GRAVITY, 9.80665, 1e-10); + + // Test Planck constant (J⋅s) + EXPECT_NEAR(constants::PLANCK_CONSTANT, 6.62607015e-34, 1e-42); + + // Test Boltzmann constant (J/K) + EXPECT_NEAR(constants::BOLTZMANN_CONSTANT, 1.380649e-23, 1e-29); + + // Test Avogadro's number (1/mol) + EXPECT_NEAR(constants::AVOGADRO_NUMBER, 6.02214076e23, 1e15); +} + +// ============================================================================= +// Compile-Time Constants Tests +// ============================================================================= + +TEST_F(MathConstantsTest, CompileTimeConstants) { + // Test that constants are constexpr (compile-time) + constexpr double pi = constants::PI; + constexpr double e = constants::E; + constexpr double sqrt2 = constants::SQRT_2; + + EXPECT_EQ(pi, constants::PI); + EXPECT_EQ(e, constants::E); + EXPECT_EQ(sqrt2, constants::SQRT_2); + + // Test compile-time functions + constexpr double angle_rad = constants::deg_to_rad(90.0); + EXPECT_NEAR(angle_rad, constants::PI_2, 1e-15); + + constexpr double angle_deg = constants::rad_to_deg(constants::PI); + EXPECT_NEAR(angle_deg, 180.0, 1e-13); +} + +// ============================================================================= +// Type Traits Tests +// ============================================================================= + +TEST_F(MathConstantsTest, TypedConstants) { + // Test float versions + EXPECT_NEAR(constants::PI_F, static_cast(constants::PI), 1e-7f); + EXPECT_NEAR(constants::E_F, static_cast(constants::E), 1e-7f); + EXPECT_NEAR(constants::SQRT_2_F, static_cast(constants::SQRT_2), 1e-7f); + + // Test long double versions + EXPECT_NEAR(constants::PI_L, static_cast(constants::PI), 1e-18L); + EXPECT_NEAR(constants::E_L, static_cast(constants::E), 1e-18L); +} + +// ============================================================================= +// Special Functions Tests +// ============================================================================= + +TEST_F(MathConstantsTest, SignFunction) { + // Test sign function + EXPECT_EQ(constants::sign(5.0), 1); + EXPECT_EQ(constants::sign(-5.0), -1); + EXPECT_EQ(constants::sign(0.0), 0); + + // Test with very small values + EXPECT_EQ(constants::sign(constants::EPSILON), 1); + EXPECT_EQ(constants::sign(-constants::EPSILON), -1); + + // Test with infinity + EXPECT_EQ(constants::sign(constants::INF_VALUE), 1); + EXPECT_EQ(constants::sign(-constants::INF_VALUE), -1); +} + +TEST_F(MathConstantsTest, SafeDivision) { + // Test safe division + EXPECT_NEAR(constants::safe_divide(10.0, 2.0), 5.0, 1e-15); + EXPECT_NEAR(constants::safe_divide(1.0, 3.0), 1.0/3.0, 1e-15); + + // Test division by zero returns default + EXPECT_EQ(constants::safe_divide(1.0, 0.0, 999.0), 999.0); + EXPECT_EQ(constants::safe_divide(1.0, constants::EPSILON/2.0, -1.0), -1.0); + + // Test division by near-zero + double tiny = constants::DEFAULT_TOLERANCE / 10.0; + EXPECT_EQ(constants::safe_divide(1.0, tiny, 0.0), 0.0); +} + +// ============================================================================= +// Utility Functions Tests +// ============================================================================= + +TEST_F(MathConstantsTest, ClampFunction) { + // Test clamping + EXPECT_EQ(constants::clamp(5.0, 0.0, 10.0), 5.0); + EXPECT_EQ(constants::clamp(-5.0, 0.0, 10.0), 0.0); + EXPECT_EQ(constants::clamp(15.0, 0.0, 10.0), 10.0); + + // Test with same min/max + EXPECT_EQ(constants::clamp(5.0, 3.0, 3.0), 3.0); + + // Test with infinity + EXPECT_EQ(constants::clamp(constants::INF_VALUE, 0.0, 10.0), 10.0); + EXPECT_EQ(constants::clamp(-constants::INF_VALUE, 0.0, 10.0), 0.0); +} + +TEST_F(MathConstantsTest, LerpFunction) { + // Test linear interpolation + EXPECT_NEAR(constants::lerp(0.0, 10.0, 0.0), 0.0, 1e-15); + EXPECT_NEAR(constants::lerp(0.0, 10.0, 1.0), 10.0, 1e-15); + EXPECT_NEAR(constants::lerp(0.0, 10.0, 0.5), 5.0, 1e-15); + EXPECT_NEAR(constants::lerp(0.0, 10.0, 0.25), 2.5, 1e-15); + + // Test extrapolation + EXPECT_NEAR(constants::lerp(0.0, 10.0, -0.5), -5.0, 1e-15); + EXPECT_NEAR(constants::lerp(0.0, 10.0, 1.5), 15.0, 1e-15); + + // Test with negative range + EXPECT_NEAR(constants::lerp(-10.0, -5.0, 0.5), -7.5, 1e-15); +} diff --git a/tests/unitTests/FE/Math/test_Matrix.cpp b/tests/unitTests/FE/Math/test_Matrix.cpp new file mode 100644 index 000000000..c186c26ee --- /dev/null +++ b/tests/unitTests/FE/Math/test_Matrix.cpp @@ -0,0 +1,594 @@ +/** + * @file test_Matrix.cpp + * @brief Unit tests for Matrix.h - fixed-size matrices with expression templates + */ + +#include +#include "FE/Math/Matrix.h" +#include "FE/Math/Vector.h" +#include "FE/Math/MatrixExpr.h" +#include "FE/Math/MathConstants.h" +#include +#include +#include +#include + +using namespace svmp::FE::math; + +// Test fixture for Matrix tests +class MatrixTest : public ::testing::Test { +protected: + static constexpr double tolerance = 1e-14; + + void SetUp() override {} + void TearDown() override {} + + // Helper function to check if two values are approximately equal + template + bool approx_equal(T a, T b, T tol = tolerance) { + return std::abs(a - b) <= tol; + } +}; + +// ============================================================================= +// Construction and Initialization Tests +// ============================================================================= + +TEST_F(MatrixTest, DefaultConstruction) { + Matrix m; + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 3; ++j) { + EXPECT_EQ(m(i, j), 0.0); + } + } +} + +TEST_F(MatrixTest, FillConstruction) { + Matrix m(5.0); + for (size_t i = 0; i < 2; ++i) { + for (size_t j = 0; j < 3; ++j) { + EXPECT_EQ(m(i, j), 5.0); + } + } +} + +TEST_F(MatrixTest, InitializerListConstruction) { + Matrix m{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}}; + + EXPECT_EQ(m(0, 0), 1.0); + EXPECT_EQ(m(0, 1), 2.0); + EXPECT_EQ(m(0, 2), 3.0); + EXPECT_EQ(m(1, 0), 4.0); + EXPECT_EQ(m(1, 1), 5.0); + EXPECT_EQ(m(1, 2), 6.0); +} + +TEST_F(MatrixTest, CopyConstruction) { + Matrix m1{{1.0, 2.0}, + {3.0, 4.0}}; + Matrix m2(m1); + + EXPECT_EQ(m2(0, 0), 1.0); + EXPECT_EQ(m2(0, 1), 2.0); + EXPECT_EQ(m2(1, 0), 3.0); + EXPECT_EQ(m2(1, 1), 4.0); + + // Ensure deep copy + m2(0, 0) = 10.0; + EXPECT_EQ(m1(0, 0), 1.0); + EXPECT_EQ(m2(0, 0), 10.0); +} + +TEST_F(MatrixTest, MoveConstruction) { + Matrix m1{{1.0, 2.0}, + {3.0, 4.0}}; + Matrix m2(std::move(m1)); + + EXPECT_EQ(m2(0, 0), 1.0); + EXPECT_EQ(m2(0, 1), 2.0); + EXPECT_EQ(m2(1, 0), 3.0); + EXPECT_EQ(m2(1, 1), 4.0); +} + +// ============================================================================= +// Element Access Tests +// ============================================================================= + +TEST_F(MatrixTest, ElementAccess) { + Matrix m{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}}; + + // Non-const access using operator() + EXPECT_EQ(m(0, 0), 1.0); + EXPECT_EQ(m(0, 2), 3.0); + EXPECT_EQ(m(1, 1), 5.0); + + // Modification + m(1, 2) = 7.0; + EXPECT_EQ(m(1, 2), 7.0); + + // Const access + const Matrix cm{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}}; + EXPECT_EQ(cm(0, 1), 2.0); + EXPECT_EQ(cm(1, 0), 4.0); +} + +TEST_F(MatrixTest, ElementAccessBounds) { + Matrix m{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}}; + + // at() with bounds checking + EXPECT_EQ(m.at(0, 0), 1.0); + EXPECT_EQ(m.at(1, 2), 6.0); + + // Test out of bounds throws + EXPECT_THROW(m.at(2, 0), std::out_of_range); + EXPECT_THROW(m.at(0, 3), std::out_of_range); + EXPECT_THROW(m.at(10, 10), std::out_of_range); +} + +TEST_F(MatrixTest, RowColumnAccess) { + Matrix m{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}}; + + // Get row + auto row1 = m.row(1); + EXPECT_EQ(row1[0], 4.0); + EXPECT_EQ(row1[1], 5.0); + EXPECT_EQ(row1[2], 6.0); + + // Get column + auto col2 = m.col(2); + EXPECT_EQ(col2[0], 3.0); + EXPECT_EQ(col2[1], 6.0); + EXPECT_EQ(col2[2], 9.0); + + // Set row + Vector new_row{10.0, 11.0, 12.0}; + m.set_row(0, new_row); + EXPECT_EQ(m(0, 0), 10.0); + EXPECT_EQ(m(0, 1), 11.0); + EXPECT_EQ(m(0, 2), 12.0); + + // Set column + Vector new_col{20.0, 21.0, 22.0}; + m.set_col(1, new_col); + EXPECT_EQ(m(0, 1), 20.0); + EXPECT_EQ(m(1, 1), 21.0); + EXPECT_EQ(m(2, 1), 22.0); +} + +// ============================================================================= +// Arithmetic Operations Tests +// ============================================================================= + +TEST_F(MatrixTest, Addition) { + Matrix a{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}}; + Matrix b{{7.0, 8.0, 9.0}, + {10.0, 11.0, 12.0}}; + + Matrix c = a + b; + EXPECT_EQ(c(0, 0), 8.0); + EXPECT_EQ(c(0, 1), 10.0); + EXPECT_EQ(c(0, 2), 12.0); + EXPECT_EQ(c(1, 0), 14.0); + EXPECT_EQ(c(1, 1), 16.0); + EXPECT_EQ(c(1, 2), 18.0); +} + +TEST_F(MatrixTest, Subtraction) { + Matrix a{{8.0, 10.0, 12.0}, + {14.0, 16.0, 18.0}}; + Matrix b{{7.0, 8.0, 9.0}, + {10.0, 11.0, 12.0}}; + + Matrix c = a - b; + EXPECT_EQ(c(0, 0), 1.0); + EXPECT_EQ(c(0, 1), 2.0); + EXPECT_EQ(c(0, 2), 3.0); + EXPECT_EQ(c(1, 0), 4.0); + EXPECT_EQ(c(1, 1), 5.0); + EXPECT_EQ(c(1, 2), 6.0); +} + +TEST_F(MatrixTest, ScalarMultiplication) { + Matrix a{{1.0, 2.0}, + {3.0, 4.0}}; + + Matrix b = 2.0 * a; + EXPECT_EQ(b(0, 0), 2.0); + EXPECT_EQ(b(0, 1), 4.0); + EXPECT_EQ(b(1, 0), 6.0); + EXPECT_EQ(b(1, 1), 8.0); + + Matrix c = a * 3.0; + EXPECT_EQ(c(0, 0), 3.0); + EXPECT_EQ(c(0, 1), 6.0); + EXPECT_EQ(c(1, 0), 9.0); + EXPECT_EQ(c(1, 1), 12.0); +} + +TEST_F(MatrixTest, ScalarDivision) { + Matrix a{{2.0, 4.0}, + {6.0, 8.0}}; + + Matrix b = a / 2.0; + EXPECT_EQ(b(0, 0), 1.0); + EXPECT_EQ(b(0, 1), 2.0); + EXPECT_EQ(b(1, 0), 3.0); + EXPECT_EQ(b(1, 1), 4.0); +} + +TEST_F(MatrixTest, MatrixMultiplication) { + Matrix a{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}}; + Matrix b{{7.0, 8.0}, + {9.0, 10.0}, + {11.0, 12.0}}; + + Matrix c = a * b; + EXPECT_EQ(c(0, 0), 58.0); // 1*7 + 2*9 + 3*11 + EXPECT_EQ(c(0, 1), 64.0); // 1*8 + 2*10 + 3*12 + EXPECT_EQ(c(1, 0), 139.0); // 4*7 + 5*9 + 6*11 + EXPECT_EQ(c(1, 1), 154.0); // 4*8 + 5*10 + 6*12 +} + +TEST_F(MatrixTest, MatrixVectorMultiplication) { + Matrix m{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}}; + Vector v{1.0, 2.0, 3.0}; + + Vector result = m * v; + EXPECT_EQ(result[0], 14.0); // 1*1 + 2*2 + 3*3 + EXPECT_EQ(result[1], 32.0); // 4*1 + 5*2 + 6*3 + EXPECT_EQ(result[2], 50.0); // 7*1 + 8*2 + 9*3 +} + +// ============================================================================= +// Special Matrix Operations Tests +// ============================================================================= + +TEST_F(MatrixTest, Transpose) { + Matrix m{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}}; + + Matrix mt = m.transpose(); + EXPECT_EQ(mt(0, 0), 1.0); + EXPECT_EQ(mt(0, 1), 4.0); + EXPECT_EQ(mt(1, 0), 2.0); + EXPECT_EQ(mt(1, 1), 5.0); + EXPECT_EQ(mt(2, 0), 3.0); + EXPECT_EQ(mt(2, 1), 6.0); +} + +TEST_F(MatrixTest, Determinant2x2) { + Matrix m{{1.0, 2.0}, + {3.0, 4.0}}; + + double det = m.determinant(); + EXPECT_EQ(det, -2.0); // 1*4 - 2*3 = 4 - 6 = -2 +} + +TEST_F(MatrixTest, Determinant3x3) { + Matrix m{{1.0, 2.0, 3.0}, + {0.0, 1.0, 4.0}, + {5.0, 6.0, 0.0}}; + + double det = m.determinant(); + EXPECT_EQ(det, 1.0); // Using Sarrus rule +} + +TEST_F(MatrixTest, Determinant4x4) { + Matrix m{{1, 0, 0, 0}, + {0, 2, 0, 0}, + {0, 0, 3, 0}, + {0, 0, 0, 4}}; + + double det = m.determinant(); + EXPECT_EQ(det, 24.0); // 1*2*3*4 = 24 (diagonal matrix) +} + +TEST_F(MatrixTest, Inverse2x2) { + Matrix m{{1.0, 2.0}, + {3.0, 4.0}}; + + Matrix inv = m.inverse(); + + // Check inverse properties + EXPECT_NEAR(inv(0, 0), -2.0, tolerance); + EXPECT_NEAR(inv(0, 1), 1.0, tolerance); + EXPECT_NEAR(inv(1, 0), 1.5, tolerance); + EXPECT_NEAR(inv(1, 1), -0.5, tolerance); + + // Verify M * M^-1 = I + Matrix identity = m * inv; + EXPECT_NEAR(identity(0, 0), 1.0, tolerance); + EXPECT_NEAR(identity(0, 1), 0.0, tolerance); + EXPECT_NEAR(identity(1, 0), 0.0, tolerance); + EXPECT_NEAR(identity(1, 1), 1.0, tolerance); +} + +TEST_F(MatrixTest, Inverse3x3) { + Matrix m{{1.0, 2.0, 3.0}, + {0.0, 1.0, 4.0}, + {5.0, 6.0, 0.0}}; + + Matrix inv = m.inverse(); + + // Verify M * M^-1 = I + Matrix identity = m * inv; + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 3; ++j) { + double expected = (i == j) ? 1.0 : 0.0; + EXPECT_NEAR(identity(i, j), expected, tolerance); + } + } +} + +TEST_F(MatrixTest, Trace) { + Matrix m{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}}; + + double trace = m.trace(); + EXPECT_EQ(trace, 15.0); // 1 + 5 + 9 = 15 +} + +// ============================================================================= +// Special Matrix Types Tests +// ============================================================================= + +TEST_F(MatrixTest, IdentityMatrix) { + Matrix I = Matrix::identity(); + + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 3; ++j) { + double expected = (i == j) ? 1.0 : 0.0; + EXPECT_EQ(I(i, j), expected); + } + } + + // Test identity property + Matrix m{{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}}; + Matrix result = m * I; + + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 3; ++j) { + EXPECT_EQ(result(i, j), m(i, j)); + } + } +} + +TEST_F(MatrixTest, ZeroMatrix) { + Matrix Z = Matrix::zero(); + + for (size_t i = 0; i < 2; ++i) { + for (size_t j = 0; j < 3; ++j) { + EXPECT_EQ(Z(i, j), 0.0); + } + } +} + +TEST_F(MatrixTest, DiagonalMatrix) { + Vector diag{1.0, 2.0, 3.0}; + Matrix D = Matrix::diagonal(diag); + + EXPECT_EQ(D(0, 0), 1.0); + EXPECT_EQ(D(1, 1), 2.0); + EXPECT_EQ(D(2, 2), 3.0); + + // Off-diagonal elements should be zero + EXPECT_EQ(D(0, 1), 0.0); + EXPECT_EQ(D(0, 2), 0.0); + EXPECT_EQ(D(1, 0), 0.0); + EXPECT_EQ(D(1, 2), 0.0); + EXPECT_EQ(D(2, 0), 0.0); + EXPECT_EQ(D(2, 1), 0.0); +} + +// ============================================================================= +// Expression Template Tests +// ============================================================================= + +TEST_F(MatrixTest, ExpressionTemplatesNoTemporaries) { + Matrix a{{1, 2}, {3, 4}}; + Matrix b{{5, 6}, {7, 8}}; + Matrix c{{9, 10}, {11, 12}}; + + // Complex expression should create no temporaries + Matrix result = a + b - c; + + EXPECT_EQ(result(0, 0), -3.0); // 1 + 5 - 9 + EXPECT_EQ(result(0, 1), -2.0); // 2 + 6 - 10 + EXPECT_EQ(result(1, 0), -1.0); // 3 + 7 - 11 + EXPECT_EQ(result(1, 1), 0.0); // 4 + 8 - 12 +} + +TEST_F(MatrixTest, LazyEvaluation) { + Matrix a{{1, 2}, {3, 4}}; + Matrix b{{5, 6}, {7, 8}}; + + // Expression should not be evaluated until assignment + auto expr = a + b; // No computation yet + + Matrix result = expr; // Evaluation happens here + EXPECT_EQ(result(0, 0), 6.0); + EXPECT_EQ(result(0, 1), 8.0); +} + +// ============================================================================= +// Edge Cases and Error Handling Tests +// ============================================================================= + +TEST_F(MatrixTest, SingularMatrixInverse) { + Matrix singular{{1.0, 2.0}, + {2.0, 4.0}}; // det = 0 + + EXPECT_THROW(singular.inverse(), std::runtime_error); +} + +TEST_F(MatrixTest, DivisionByZero) { + Matrix m{{1.0, 2.0}, + {3.0, 4.0}}; + + Matrix result = m / 0.0; + EXPECT_TRUE(std::isinf(result(0, 0))); + EXPECT_TRUE(std::isinf(result(0, 1))); +} + +TEST_F(MatrixTest, ExtremeLargeValues) { + double large = 1e308; + Matrix m{{large, 0}, {0, large}}; + + Matrix half = m / 2.0; + EXPECT_FALSE(std::isinf(half(0, 0))); + EXPECT_EQ(half(0, 0), large / 2.0); +} + +// ============================================================================= +// Numerical Precision Tests +// ============================================================================= + +TEST_F(MatrixTest, NumericalStability) { + // Test near-singular matrix + double eps = 1e-15; + Matrix m{{1.0, 1.0}, + {1.0, 1.0 + eps}}; + + double det = m.determinant(); + // Relax tolerance due to floating-point arithmetic in determinant calculation + EXPECT_NEAR(det, eps, 1e-14); +} + +TEST_F(MatrixTest, OrthogonalMatrixProperties) { + // Create rotation matrix (orthogonal) + double angle = M_PI / 4; + Matrix R{{cos(angle), -sin(angle)}, + {sin(angle), cos(angle)}}; + + // Check orthogonality: R * R^T = I + Matrix RRt = R * R.transpose(); + EXPECT_NEAR(RRt(0, 0), 1.0, tolerance); + EXPECT_NEAR(RRt(0, 1), 0.0, tolerance); + EXPECT_NEAR(RRt(1, 0), 0.0, tolerance); + EXPECT_NEAR(RRt(1, 1), 1.0, tolerance); + + // Check determinant = ±1 + EXPECT_NEAR(std::abs(R.determinant()), 1.0, tolerance); +} + +// ============================================================================= +// Matrix Properties Tests +// ============================================================================= + +TEST_F(MatrixTest, IsSymmetric) { + Matrix sym{{1, 2, 3}, + {2, 4, 5}, + {3, 5, 6}}; + EXPECT_TRUE(sym.is_symmetric(tolerance)); + + Matrix nonsym{{1, 2, 3}, + {4, 5, 6}, + {7, 8, 9}}; + EXPECT_FALSE(nonsym.is_symmetric(tolerance)); +} + +TEST_F(MatrixTest, IsSkewSymmetric) { + Matrix skew{{0, -1, 2}, + {1, 0, -3}, + {-2, 3, 0}}; + EXPECT_TRUE(skew.is_skew_symmetric(tolerance)); + + Matrix nonskew{{1, 2, 3}, + {4, 5, 6}, + {7, 8, 9}}; + EXPECT_FALSE(nonskew.is_skew_symmetric(tolerance)); +} + +TEST_F(MatrixTest, IsDiagonal) { + Matrix diag{{1, 0, 0}, + {0, 2, 0}, + {0, 0, 3}}; + EXPECT_TRUE(diag.is_diagonal(tolerance)); + + Matrix nondiag{{1, 0.1, 0}, + {0, 2, 0}, + {0, 0, 3}}; + EXPECT_FALSE(nondiag.is_diagonal(tolerance)); +} + +// ============================================================================= +// Thread Safety Tests +// ============================================================================= + +TEST_F(MatrixTest, ThreadSafetyReadOnly) { + Matrix m{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + + std::vector threads; + std::vector results(10); + + for (int i = 0; i < 10; ++i) { + threads.emplace_back([&m, &results, i]() { + results[static_cast(i)] = m.trace(); + }); + } + + for (auto& t : threads) { + t.join(); + } + + for (double r : results) { + EXPECT_EQ(r, 15.0); + } +} + +// ============================================================================= +// Memory Alignment Tests +// ============================================================================= + +TEST_F(MatrixTest, MemoryAlignment) { + Matrix m; + + std::uintptr_t addr = reinterpret_cast(m.data()); + EXPECT_EQ(addr % 32, 0) << "Matrix data should be 32-byte aligned for AVX"; +} + +// ============================================================================= +// Utility Function Tests +// ============================================================================= + +TEST_F(MatrixTest, Norms) { + Matrix m{{1, 2}, {3, 4}}; + + // Frobenius norm: sqrt(1^2 + 2^2 + 3^2 + 4^2) = sqrt(30) + EXPECT_NEAR(m.frobenius_norm(), std::sqrt(30.0), tolerance); + + // Infinity norm (max row sum) + EXPECT_EQ(m.infinity_norm(), 7.0); // max(|1|+|2|, |3|+|4|) = max(3, 7) + + // One norm (max column sum) + EXPECT_EQ(m.one_norm(), 6.0); // max(|1|+|3|, |2|+|4|) = max(4, 6) +} + +TEST_F(MatrixTest, MinMaxElements) { + Matrix m{{3, -1, 4}, {1, -2, 5}}; + + EXPECT_EQ(m.min(), -2.0); + EXPECT_EQ(m.max(), 5.0); +} + +TEST_F(MatrixTest, ToString) { + Matrix m{{1, 2}, {3, 4}}; + std::stringstream ss; + ss << m; + + std::string expected = "[[1, 2]\n [3, 4]]"; + EXPECT_EQ(ss.str(), expected); +} diff --git a/tests/unitTests/FE/Math/test_MatrixExpr.cpp b/tests/unitTests/FE/Math/test_MatrixExpr.cpp new file mode 100644 index 000000000..9486f409c --- /dev/null +++ b/tests/unitTests/FE/Math/test_MatrixExpr.cpp @@ -0,0 +1,528 @@ +/** + * @file test_MatrixExpr.cpp + * @brief Unit tests for MatrixExpr.h - matrix expression templates + */ + +#include +#include "FE/Math/Matrix.h" +#include "FE/Math/MatrixExpr.h" +#include "FE/Math/Vector.h" +#include "FE/Math/MathConstants.h" +#include +#include +#include +#include +#include + +using namespace svmp::FE::math; + +// Test fixture for MatrixExpr tests +class MatrixExprTest : public ::testing::Test { +protected: + static constexpr double tolerance = 1e-14; + + // Custom allocator to track memory allocations + template + class TrackingAllocator { + public: + using value_type = T; + + static std::atomic allocations; + static std::atomic deallocations; + static std::atomic bytes_allocated; + + TrackingAllocator() = default; + + template + TrackingAllocator(const TrackingAllocator&) {} + + T* allocate(size_t n) { + allocations.fetch_add(1); + bytes_allocated.fetch_add(n * sizeof(T)); + return static_cast(::operator new(n * sizeof(T))); + } + + void deallocate(T* p, size_t n) { + deallocations.fetch_add(1); + ::operator delete(p); + } + + static void reset() { + allocations = 0; + deallocations = 0; + bytes_allocated = 0; + } + }; + + void SetUp() override { + TrackingAllocator::reset(); + } + + void TearDown() override {} + + template + bool approx_equal(T a, T b, T tol = tolerance) { + return std::abs(a - b) <= tol; + } +}; + +template +std::atomic MatrixExprTest::TrackingAllocator::allocations{0}; +template +std::atomic MatrixExprTest::TrackingAllocator::deallocations{0}; +template +std::atomic MatrixExprTest::TrackingAllocator::bytes_allocated{0}; + +// ============================================================================= +// Lazy Evaluation Tests +// ============================================================================= + +TEST_F(MatrixExprTest, LazyEvaluationNoTemporaries) { + // Expression templates should not create temporary matrices + Matrix A{{1.0, 2.0}, {3.0, 4.0}}; + Matrix B{{5.0, 6.0}, {7.0, 8.0}}; + Matrix C{{9.0, 10.0}, {11.0, 12.0}}; + + // Build expression without evaluation + auto expr = A + B - C; + + // Expression type should not be Matrix, but an expression type + using ExprType = decltype(expr); + EXPECT_FALSE((std::is_same_v>)); + + // Now evaluate + Matrix result = expr; + EXPECT_DOUBLE_EQ(result(0, 0), -3.0); + EXPECT_DOUBLE_EQ(result(0, 1), -2.0); + EXPECT_DOUBLE_EQ(result(1, 0), -1.0); + EXPECT_DOUBLE_EQ(result(1, 1), 0.0); +} + +TEST_F(MatrixExprTest, LazyEvaluationAccessPattern) { + Matrix A; + Matrix B; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + A(i, j) = i * 3 + j + 1; + B(i, j) = (i * 3 + j + 1) * 2; + } + } + + auto expr = A + B; + + // Access individual elements without full evaluation + EXPECT_DOUBLE_EQ(expr(0, 0), 3.0); + EXPECT_DOUBLE_EQ(expr(1, 1), 15.0); + EXPECT_DOUBLE_EQ(expr(2, 2), 27.0); + + // Size should be accessible + EXPECT_EQ(expr.rows(), 3u); + EXPECT_EQ(expr.cols(), 3u); +} + +// ============================================================================= +// Matrix Multiplication Tests +// ============================================================================= + +TEST_F(MatrixExprTest, MatrixMultiplicationExpression) { + Matrix A{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; + Matrix B{{7.0, 8.0}, {9.0, 10.0}, {11.0, 12.0}}; + + // Matrix multiplication should produce 2x2 result + Matrix C = A * B; + + // Verify results + EXPECT_DOUBLE_EQ(C(0, 0), 58.0); // 1*7 + 2*9 + 3*11 + EXPECT_DOUBLE_EQ(C(0, 1), 64.0); // 1*8 + 2*10 + 3*12 + EXPECT_DOUBLE_EQ(C(1, 0), 139.0); // 4*7 + 5*9 + 6*11 + EXPECT_DOUBLE_EQ(C(1, 1), 154.0); // 4*8 + 5*10 + 6*12 +} + +TEST_F(MatrixExprTest, ChainedMatrixMultiplication) { + Matrix A{{1.0, 2.0}, {3.0, 4.0}}; + Matrix B{{5.0, 6.0}, {7.0, 8.0}}; + Matrix C{{9.0, 10.0}, {11.0, 12.0}}; + + // Chain matrix multiplications: (A * B) * C + Matrix result = A * B * C; + + // First compute A * B + Matrix AB = A * B; + EXPECT_DOUBLE_EQ(AB(0, 0), 19.0); // 1*5 + 2*7 + EXPECT_DOUBLE_EQ(AB(0, 1), 22.0); // 1*6 + 2*8 + EXPECT_DOUBLE_EQ(AB(1, 0), 43.0); // 3*5 + 4*7 + EXPECT_DOUBLE_EQ(AB(1, 1), 50.0); // 3*6 + 4*8 + + // Then (A * B) * C + EXPECT_DOUBLE_EQ(result(0, 0), 413.0); // 19*9 + 22*11 + EXPECT_DOUBLE_EQ(result(0, 1), 454.0); // 19*10 + 22*12 + EXPECT_DOUBLE_EQ(result(1, 0), 937.0); // 43*9 + 50*11 + EXPECT_DOUBLE_EQ(result(1, 1), 1030.0); // 43*10 + 50*12 +} + +// ============================================================================= +// Mixed Operations Tests +// ============================================================================= + +TEST_F(MatrixExprTest, MixedMatrixOperations) { + Matrix A, B, C, D; + + // Initialize matrices + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + A(i, j) = i + j + 1; + B(i, j) = (i + 1) * (j + 1); + C(i, j) = i * j + 1; + D(i, j) = 1.0; + } + } + + // Complex expression: A * B + C * D + Matrix result = A * B + C * D; + + // Verify a few key elements + Matrix AB = A * B; + Matrix CD = C * D; + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + EXPECT_DOUBLE_EQ(result(i, j), AB(i, j) + CD(i, j)); + } + } +} + +TEST_F(MatrixExprTest, ScalarMultiplicationInExpression) { + Matrix A{{1.0, 2.0}, {3.0, 4.0}}; + Matrix B{{5.0, 6.0}, {7.0, 8.0}}; + + Matrix result = 2.0 * (A + B) / 3.0; + + EXPECT_TRUE(approx_equal(result(0, 0), 4.0)); + EXPECT_TRUE(approx_equal(result(0, 1), 16.0/3.0)); + EXPECT_TRUE(approx_equal(result(1, 0), 20.0/3.0)); + EXPECT_TRUE(approx_equal(result(1, 1), 8.0)); +} + +// ============================================================================= +// Transpose Tests +// ============================================================================= + +TEST_F(MatrixExprTest, TransposeExpression) { + Matrix A{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; + + auto AT = transpose(A); + + // Check dimensions + EXPECT_EQ(AT.rows(), 3u); + EXPECT_EQ(AT.cols(), 2u); + + // Check values + EXPECT_DOUBLE_EQ(AT(0, 0), 1.0); + EXPECT_DOUBLE_EQ(AT(0, 1), 4.0); + EXPECT_DOUBLE_EQ(AT(1, 0), 2.0); + EXPECT_DOUBLE_EQ(AT(1, 1), 5.0); + EXPECT_DOUBLE_EQ(AT(2, 0), 3.0); + EXPECT_DOUBLE_EQ(AT(2, 1), 6.0); +} + +TEST_F(MatrixExprTest, TransposeInExpression) { + Matrix A{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}; + Matrix B{{7.0, 8.0}, {9.0, 10.0}, {11.0, 12.0}}; + + // Compute A^T * B (should be 2x2) + Matrix result = transpose(A) * B; + + EXPECT_DOUBLE_EQ(result(0, 0), 89.0); // 1*7 + 3*9 + 5*11 + EXPECT_DOUBLE_EQ(result(0, 1), 98.0); // 1*8 + 3*10 + 5*12 + EXPECT_DOUBLE_EQ(result(1, 0), 116.0); // 2*7 + 4*9 + 6*11 + EXPECT_DOUBLE_EQ(result(1, 1), 128.0); // 2*8 + 4*10 + 6*12 +} + +// ============================================================================= +// Unary Operations Tests +// ============================================================================= + +TEST_F(MatrixExprTest, NegationInExpression) { + Matrix A{{1.0, -2.0}, {3.0, -4.0}}; + Matrix B{{5.0, 6.0}, {-7.0, 8.0}}; + + Matrix result = -A + (-B); + + EXPECT_DOUBLE_EQ(result(0, 0), -6.0); + EXPECT_DOUBLE_EQ(result(0, 1), -4.0); + EXPECT_DOUBLE_EQ(result(1, 0), 4.0); + EXPECT_DOUBLE_EQ(result(1, 1), -4.0); +} + +TEST_F(MatrixExprTest, AbsoluteValueExpression) { + Matrix M{{-1.5, 2.3, -4.7}, {0.0, -3.2, 5.1}}; + + Matrix result = abs(M); + + EXPECT_DOUBLE_EQ(result(0, 0), 1.5); + EXPECT_DOUBLE_EQ(result(0, 1), 2.3); + EXPECT_DOUBLE_EQ(result(0, 2), 4.7); + EXPECT_DOUBLE_EQ(result(1, 0), 0.0); + EXPECT_DOUBLE_EQ(result(1, 1), 3.2); + EXPECT_DOUBLE_EQ(result(1, 2), 5.1); +} + +TEST_F(MatrixExprTest, SqrtExpression) { + Matrix M{{4.0, 9.0}, {16.0, 25.0}}; + + Matrix result = sqrt(M); + + EXPECT_DOUBLE_EQ(result(0, 0), 2.0); + EXPECT_DOUBLE_EQ(result(0, 1), 3.0); + EXPECT_DOUBLE_EQ(result(1, 0), 4.0); + EXPECT_DOUBLE_EQ(result(1, 1), 5.0); +} + +// ============================================================================= +// Element-wise Operations Tests +// ============================================================================= + +TEST_F(MatrixExprTest, HadamardProductExpression) { + Matrix A{{2.0, 3.0, 4.0}, {5.0, 6.0, 7.0}}; + Matrix B{{8.0, 9.0, 10.0}, {11.0, 12.0, 13.0}}; + + Matrix result = hadamard(A, B); + + EXPECT_DOUBLE_EQ(result(0, 0), 16.0); + EXPECT_DOUBLE_EQ(result(0, 1), 27.0); + EXPECT_DOUBLE_EQ(result(0, 2), 40.0); + EXPECT_DOUBLE_EQ(result(1, 0), 55.0); + EXPECT_DOUBLE_EQ(result(1, 1), 72.0); + EXPECT_DOUBLE_EQ(result(1, 2), 91.0); +} + +TEST_F(MatrixExprTest, HadamardDivisionExpression) { + Matrix A{{10.0, 18.0}, {28.0, 36.0}}; + Matrix B{{2.0, 3.0}, {4.0, 6.0}}; + + Matrix result = hadamard_div(A, B); + + EXPECT_DOUBLE_EQ(result(0, 0), 5.0); + EXPECT_DOUBLE_EQ(result(0, 1), 6.0); + EXPECT_DOUBLE_EQ(result(1, 0), 7.0); + EXPECT_DOUBLE_EQ(result(1, 1), 6.0); +} + +// ============================================================================= +// Norm and Trace Tests +// ============================================================================= + +TEST_F(MatrixExprTest, FrobeniusNormOfExpression) { + Matrix A{{1.0, 2.0}, {3.0, 4.0}}; + Matrix B{{2.0, 2.0}, {2.0, 2.0}}; + + double norm_sq = frobenius_norm_squared(A - B); + double norm = frobenius_norm(A - B); + + // (A - B) = [[-1, 0], [1, 2]] + // norm_squared = 1 + 0 + 1 + 4 = 6 + EXPECT_DOUBLE_EQ(norm_sq, 6.0); + EXPECT_DOUBLE_EQ(norm, std::sqrt(6.0)); +} + +TEST_F(MatrixExprTest, TraceOfExpression) { + Matrix A; + Matrix B; + + // Initialize as diagonal matrices + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + A(i, j) = (i == j) ? (i + 1) : 0.0; // diag(1, 2, 3) + B(i, j) = (i == j) ? (i + 4) : 0.0; // diag(4, 5, 6) + } + } + + double tr = trace(A + B); + + // trace(A + B) = trace(diag(5, 7, 9)) = 21 + EXPECT_DOUBLE_EQ(tr, 21.0); +} + +// ============================================================================= +// Type Deduction Tests +// ============================================================================= + +TEST_F(MatrixExprTest, TypeDeductionCorrectness) { + Matrix Mf{{1.0f, 2.0f}, {3.0f, 4.0f}}; + Matrix Md{{5.0, 6.0}, {7.0, 8.0}}; + + // Float expression + auto expr = Mf + Mf; + using ExprType = decltype(expr(0, 0)); + EXPECT_TRUE((std::is_same_v)); + + // Test that expression evaluates correctly + Matrix result = expr; + EXPECT_FLOAT_EQ(result(0, 0), 2.0f); + EXPECT_FLOAT_EQ(result(1, 1), 8.0f); +} + +// ============================================================================= +// SFINAE and Compile-time Tests +// ============================================================================= + +TEST_F(MatrixExprTest, SFINAEConstraints) { + // Test that MatrixExpr operators only work with MatrixExpr types + Matrix M1{{1.0, 2.0}, {3.0, 4.0}}; + Matrix M2{{5.0, 6.0}, {7.0, 8.0}}; + + // This should compile + auto expr = M1 + M2; + Matrix result = expr; + + // Verify the constraint checking + EXPECT_TRUE((std::is_base_of_v>, Matrix>)); +} + +// ============================================================================= +// Aliasing and Self-Assignment Tests +// ============================================================================= + +TEST_F(MatrixExprTest, SelfAssignmentWithExpression) { + Matrix A{{1.0, 2.0}, {3.0, 4.0}}; + Matrix B{{5.0, 6.0}, {7.0, 8.0}}; + + // Self-assignment through expression + A = A + B; + + EXPECT_DOUBLE_EQ(A(0, 0), 6.0); + EXPECT_DOUBLE_EQ(A(0, 1), 8.0); + EXPECT_DOUBLE_EQ(A(1, 0), 10.0); + EXPECT_DOUBLE_EQ(A(1, 1), 12.0); +} + +TEST_F(MatrixExprTest, AliasingInExpression) { + Matrix A{{2.0, 3.0}, {4.0, 5.0}}; + Matrix B{{1.0, 1.0}, {1.0, 1.0}}; + + // A appears on both sides + A = B + A; + + EXPECT_DOUBLE_EQ(A(0, 0), 3.0); + EXPECT_DOUBLE_EQ(A(0, 1), 4.0); + EXPECT_DOUBLE_EQ(A(1, 0), 5.0); + EXPECT_DOUBLE_EQ(A(1, 1), 6.0); +} + +// ============================================================================= +// Edge Cases Tests +// ============================================================================= + +TEST_F(MatrixExprTest, SingleElementMatrix) { + Matrix A{{5.0}}; + Matrix B{{3.0}}; + + Matrix result = A + B - A * 0.5; + + EXPECT_DOUBLE_EQ(result(0, 0), 5.5); +} + +TEST_F(MatrixExprTest, NonSquareMatrixOperations) { + Matrix A; + Matrix B; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 4; ++j) { + A(i, j) = i * 4 + j + 1; + B(i, j) = (i * 4 + j + 1) * 2; + } + } + + Matrix result = A + B - A * 0.5; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 4; ++j) { + double expected = A(i, j) + B(i, j) - A(i, j) * 0.5; + EXPECT_DOUBLE_EQ(result(i, j), expected); + } + } +} + +// ============================================================================= +// Diagonal Matrix Tests +// ============================================================================= + +TEST_F(MatrixExprTest, DiagonalMatrixExpression) { + Vector v{1.0, 2.0, 3.0}; + + auto diag = DiagonalExpr>(v); + + // Check dimensions + EXPECT_EQ(diag.rows(), 3u); + EXPECT_EQ(diag.cols(), 3u); + + // Check values + EXPECT_DOUBLE_EQ(diag(0, 0), 1.0); + EXPECT_DOUBLE_EQ(diag(1, 1), 2.0); + EXPECT_DOUBLE_EQ(diag(2, 2), 3.0); + + // Off-diagonal should be zero + EXPECT_DOUBLE_EQ(diag(0, 1), 0.0); + EXPECT_DOUBLE_EQ(diag(1, 0), 0.0); +} + +TEST_F(MatrixExprTest, DiagonalMatrixInExpression) { + Vector v{2.0, 3.0}; + Matrix A{{1.0, 1.0}, {1.0, 1.0}}; + + auto diag = DiagonalExpr>(v); + Matrix result = A + diag; + + EXPECT_DOUBLE_EQ(result(0, 0), 3.0); + EXPECT_DOUBLE_EQ(result(0, 1), 1.0); + EXPECT_DOUBLE_EQ(result(1, 0), 1.0); + EXPECT_DOUBLE_EQ(result(1, 1), 4.0); +} + +// ============================================================================= +// Complex Expression Pattern Tests +// ============================================================================= + +TEST_F(MatrixExprTest, ComplexNestedExpression) { + Matrix A{{1.0, 2.0}, {3.0, 4.0}}; + Matrix B{{5.0, 6.0}, {7.0, 8.0}}; + Matrix C{{9.0, 10.0}, {11.0, 12.0}}; + + // Complex expression with multiple operation types + Matrix result = 2.0 * abs(A - B) + sqrt(hadamard(C, C)) / 3.0; + + // |A - B| = |[-4, -4], [-4, -4]| = [4, 4], [4, 4] + // 2 * [4, 4], [4, 4] = [8, 8], [8, 8] + // C * C (element-wise) = [81, 100], [121, 144] + // sqrt(C * C) = [9, 10], [11, 12] + // sqrt(C * C) / 3 = [3, 10/3], [11/3, 4] + // result = [11, 34/3], [35/3, 12] + + EXPECT_DOUBLE_EQ(result(0, 0), 11.0); + EXPECT_TRUE(approx_equal(result(0, 1), 34.0/3.0)); + EXPECT_TRUE(approx_equal(result(1, 0), 35.0/3.0)); + EXPECT_DOUBLE_EQ(result(1, 1), 12.0); +} + +TEST_F(MatrixExprTest, MatrixVectorMixedExpression) { + Matrix A; + Vector v{1.0, 2.0, 3.0}; + + // Create identity matrix + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + A(i, j) = (i == j) ? 1.0 : 0.0; + } + } + + // Create diagonal from vector and add to identity + auto diag = DiagonalExpr>(v); + Matrix result = A + diag; + + // Result should be diag(2, 3, 4) + EXPECT_DOUBLE_EQ(result(0, 0), 2.0); + EXPECT_DOUBLE_EQ(result(1, 1), 3.0); + EXPECT_DOUBLE_EQ(result(2, 2), 4.0); + EXPECT_DOUBLE_EQ(result(0, 1), 0.0); + EXPECT_DOUBLE_EQ(result(1, 0), 0.0); +} diff --git a/tests/unitTests/FE/Math/test_Vector.cpp b/tests/unitTests/FE/Math/test_Vector.cpp new file mode 100644 index 000000000..a38a71727 --- /dev/null +++ b/tests/unitTests/FE/Math/test_Vector.cpp @@ -0,0 +1,589 @@ +/** + * @file test_Vector.cpp + * @brief Unit tests for Vector.h - fixed-size vectors with expression templates + */ + +#include +#include "FE/Math/Vector.h" +#include "FE/Math/VectorExpr.h" +#include "FE/Math/MathConstants.h" +#include +#include +#include +#include +#include + +using namespace svmp::FE::math; + +// Test fixture for Vector tests +class VectorTest : public ::testing::Test { +protected: + static constexpr double tolerance = 1e-14; + + void SetUp() override {} + void TearDown() override {} + + // Helper function to check if two values are approximately equal + template + bool approx_equal(T a, T b, T tol = tolerance) { + return std::abs(a - b) <= tol; + } +}; + +// ============================================================================= +// Construction and Initialization Tests +// ============================================================================= + +TEST_F(VectorTest, DefaultConstruction) { + Vector v; + EXPECT_EQ(v[0], 0.0); + EXPECT_EQ(v[1], 0.0); + EXPECT_EQ(v[2], 0.0); + + Vector vf; + for (size_t i = 0; i < 4; ++i) { + EXPECT_EQ(vf[i], 0.0f); + } +} + +TEST_F(VectorTest, FillConstruction) { + Vector v(5.0); + EXPECT_EQ(v[0], 5.0); + EXPECT_EQ(v[1], 5.0); + EXPECT_EQ(v[2], 5.0); + + Vector vi(-3); + for (size_t i = 0; i < 10; ++i) { + EXPECT_EQ(vi[i], -3); + } +} + +TEST_F(VectorTest, InitializerListConstruction) { + Vector v{1.0, 2.0, 3.0}; + EXPECT_EQ(v[0], 1.0); + EXPECT_EQ(v[1], 2.0); + EXPECT_EQ(v[2], 3.0); + + // Partial initialization + Vector v2{1.0, 2.0}; + EXPECT_EQ(v2[0], 1.0); + EXPECT_EQ(v2[1], 2.0); + EXPECT_EQ(v2[2], 0.0); + EXPECT_EQ(v2[3], 0.0); + EXPECT_EQ(v2[4], 0.0); +} + +TEST_F(VectorTest, CopyConstruction) { + Vector v1{1.0, 2.0, 3.0}; + Vector v2(v1); + + EXPECT_EQ(v2[0], 1.0); + EXPECT_EQ(v2[1], 2.0); + EXPECT_EQ(v2[2], 3.0); + + // Ensure deep copy + v2[0] = 10.0; + EXPECT_EQ(v1[0], 1.0); + EXPECT_EQ(v2[0], 10.0); +} + +TEST_F(VectorTest, MoveConstruction) { + Vector v1{1.0, 2.0, 3.0}; + Vector v2(std::move(v1)); + + EXPECT_EQ(v2[0], 1.0); + EXPECT_EQ(v2[1], 2.0); + EXPECT_EQ(v2[2], 3.0); +} + +// ============================================================================= +// Element Access Tests +// ============================================================================= + +TEST_F(VectorTest, ElementAccess) { + Vector v{1.0, 2.0, 3.0}; + + // Non-const access + EXPECT_EQ(v[0], 1.0); + EXPECT_EQ(v[1], 2.0); + EXPECT_EQ(v[2], 3.0); + + // Modification + v[1] = 5.0; + EXPECT_EQ(v[1], 5.0); + + // Const access + const Vector cv{4.0, 5.0, 6.0}; + EXPECT_EQ(cv[0], 4.0); + EXPECT_EQ(cv[1], 5.0); + EXPECT_EQ(cv[2], 6.0); +} + +TEST_F(VectorTest, ElementAccessBounds) { + Vector v{1.0, 2.0, 3.0}; + + // at() with bounds checking + EXPECT_EQ(v.at(0), 1.0); + EXPECT_EQ(v.at(1), 2.0); + EXPECT_EQ(v.at(2), 3.0); + + // Test out of bounds throws + EXPECT_THROW(v.at(3), std::out_of_range); + EXPECT_THROW(v.at(100), std::out_of_range); +} + +TEST_F(VectorTest, DataPointerAccess) { + Vector v{1.0, 2.0, 3.0}; + + double* data = v.data(); + EXPECT_EQ(data[0], 1.0); + EXPECT_EQ(data[1], 2.0); + EXPECT_EQ(data[2], 3.0); + + // Const data access + const Vector cv{4.0, 5.0, 6.0}; + const double* cdata = cv.data(); + EXPECT_EQ(cdata[0], 4.0); + EXPECT_EQ(cdata[1], 5.0); + EXPECT_EQ(cdata[2], 6.0); +} + +// ============================================================================= +// Arithmetic Operations Tests +// ============================================================================= + +TEST_F(VectorTest, Addition) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + + Vector c = a + b; + EXPECT_EQ(c[0], 5.0); + EXPECT_EQ(c[1], 7.0); + EXPECT_EQ(c[2], 9.0); +} + +TEST_F(VectorTest, Subtraction) { + Vector a{5.0, 7.0, 9.0}; + Vector b{4.0, 5.0, 6.0}; + + Vector c = a - b; + EXPECT_EQ(c[0], 1.0); + EXPECT_EQ(c[1], 2.0); + EXPECT_EQ(c[2], 3.0); +} + +TEST_F(VectorTest, ScalarMultiplication) { + Vector a{1.0, 2.0, 3.0}; + + // Scalar * Vector + Vector b = 2.0 * a; + EXPECT_EQ(b[0], 2.0); + EXPECT_EQ(b[1], 4.0); + EXPECT_EQ(b[2], 6.0); + + // Vector * Scalar + Vector c = a * 3.0; + EXPECT_EQ(c[0], 3.0); + EXPECT_EQ(c[1], 6.0); + EXPECT_EQ(c[2], 9.0); +} + +TEST_F(VectorTest, ScalarDivision) { + Vector a{2.0, 4.0, 6.0}; + + Vector b = a / 2.0; + EXPECT_EQ(b[0], 1.0); + EXPECT_EQ(b[1], 2.0); + EXPECT_EQ(b[2], 3.0); +} + +TEST_F(VectorTest, UnaryNegation) { + Vector a{1.0, -2.0, 3.0}; + + Vector b = -a; + EXPECT_EQ(b[0], -1.0); + EXPECT_EQ(b[1], 2.0); + EXPECT_EQ(b[2], -3.0); +} + +TEST_F(VectorTest, CompoundAssignment) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + + // += + a += b; + EXPECT_EQ(a[0], 5.0); + EXPECT_EQ(a[1], 7.0); + EXPECT_EQ(a[2], 9.0); + + // -= + a -= b; + EXPECT_EQ(a[0], 1.0); + EXPECT_EQ(a[1], 2.0); + EXPECT_EQ(a[2], 3.0); + + // *= + a *= 2.0; + EXPECT_EQ(a[0], 2.0); + EXPECT_EQ(a[1], 4.0); + EXPECT_EQ(a[2], 6.0); + + // /= + a /= 2.0; + EXPECT_EQ(a[0], 1.0); + EXPECT_EQ(a[1], 2.0); + EXPECT_EQ(a[2], 3.0); +} + +// ============================================================================= +// Vector Operations Tests +// ============================================================================= + +TEST_F(VectorTest, DotProduct) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + + double dot = a.dot(b); + EXPECT_EQ(dot, 32.0); // 1*4 + 2*5 + 3*6 = 4 + 10 + 18 = 32 + + // Test commutativity + EXPECT_EQ(b.dot(a), dot); + + // Test orthogonal vectors + Vector x{1.0, 0.0, 0.0}; + Vector y{0.0, 1.0, 0.0}; + EXPECT_EQ(x.dot(y), 0.0); +} + +TEST_F(VectorTest, CrossProduct3D) { + Vector x{1.0, 0.0, 0.0}; + Vector y{0.0, 1.0, 0.0}; + Vector z{0.0, 0.0, 1.0}; + + // Test basis vector cross products + Vector xy = x.cross(y); + EXPECT_EQ(xy[0], 0.0); + EXPECT_EQ(xy[1], 0.0); + EXPECT_EQ(xy[2], 1.0); + + Vector yx = y.cross(x); + EXPECT_EQ(yx[0], 0.0); + EXPECT_EQ(yx[1], 0.0); + EXPECT_EQ(yx[2], -1.0); + + // General cross product + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + Vector c = a.cross(b); + + EXPECT_EQ(c[0], -3.0); // 2*6 - 3*5 = 12 - 15 = -3 + EXPECT_EQ(c[1], 6.0); // 3*4 - 1*6 = 12 - 6 = 6 + EXPECT_EQ(c[2], -3.0); // 1*5 - 2*4 = 5 - 8 = -3 +} + +TEST_F(VectorTest, Norm) { + Vector v{3.0, 4.0, 0.0}; + EXPECT_EQ(v.norm(), 5.0); + + Vector unit{1.0, 0.0, 0.0}; + EXPECT_EQ(unit.norm(), 1.0); + + Vector zero{0.0, 0.0, 0.0}; + EXPECT_EQ(zero.norm(), 0.0); +} + +TEST_F(VectorTest, NormSquared) { + Vector v{3.0, 4.0, 0.0}; + EXPECT_EQ(v.norm_squared(), 25.0); + + Vector a{1.0, 2.0, 3.0}; + EXPECT_EQ(a.norm_squared(), 14.0); // 1 + 4 + 9 = 14 +} + +TEST_F(VectorTest, Normalize) { + Vector v{3.0, 4.0, 0.0}; + Vector n = v.normalized(); + + EXPECT_NEAR(n[0], 0.6, tolerance); + EXPECT_NEAR(n[1], 0.8, tolerance); + EXPECT_NEAR(n[2], 0.0, tolerance); + EXPECT_NEAR(n.norm(), 1.0, tolerance); + + // In-place normalization + v.normalize(); + EXPECT_NEAR(v[0], 0.6, tolerance); + EXPECT_NEAR(v[1], 0.8, tolerance); + EXPECT_NEAR(v.norm(), 1.0, tolerance); +} + +// ============================================================================= +// Expression Template Tests +// ============================================================================= + +TEST_F(VectorTest, ExpressionTemplatesNoTemporaries) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + Vector c{7.0, 8.0, 9.0}; + Vector d{10.0, 11.0, 12.0}; + + // Complex expression should create no temporaries + Vector result = a + b - c + d; + + EXPECT_EQ(result[0], 8.0); // 1 + 4 - 7 + 10 + EXPECT_EQ(result[1], 10.0); // 2 + 5 - 8 + 11 + EXPECT_EQ(result[2], 12.0); // 3 + 6 - 9 + 12 +} + +TEST_F(VectorTest, LazyEvaluation) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + + // Expression should not be evaluated until assignment + auto expr = a + b; // No computation yet + + Vector result = expr; // Evaluation happens here + EXPECT_EQ(result[0], 5.0); + EXPECT_EQ(result[1], 7.0); + EXPECT_EQ(result[2], 9.0); +} + +TEST_F(VectorTest, MixedExpressions) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + double scalar = 2.0; + + // Complex mixed expression + Vector result = scalar * (a + b) - a / scalar; + + EXPECT_NEAR(result[0], 9.5, tolerance); // 2*(1+4) - 1/2 + EXPECT_NEAR(result[1], 13.0, tolerance); // 2*(2+5) - 2/2 + EXPECT_NEAR(result[2], 16.5, tolerance); // 2*(3+6) - 3/2 +} + +// ============================================================================= +// Special Values Tests +// ============================================================================= + +TEST_F(VectorTest, ZeroVector) { + Vector zero = Vector::zero(); + EXPECT_EQ(zero[0], 0.0); + EXPECT_EQ(zero[1], 0.0); + EXPECT_EQ(zero[2], 0.0); + EXPECT_EQ(zero.norm(), 0.0); +} + +TEST_F(VectorTest, OnesVector) { + Vector ones = Vector::ones(); + EXPECT_EQ(ones[0], 1.0); + EXPECT_EQ(ones[1], 1.0); + EXPECT_EQ(ones[2], 1.0); +} + +TEST_F(VectorTest, BasisVectors) { + auto e0 = Vector::basis(0); + EXPECT_EQ(e0[0], 1.0); + EXPECT_EQ(e0[1], 0.0); + EXPECT_EQ(e0[2], 0.0); + + auto e1 = Vector::basis(1); + EXPECT_EQ(e1[0], 0.0); + EXPECT_EQ(e1[1], 1.0); + EXPECT_EQ(e1[2], 0.0); + + auto e2 = Vector::basis(2); + EXPECT_EQ(e2[0], 0.0); + EXPECT_EQ(e2[1], 0.0); + EXPECT_EQ(e2[2], 1.0); +} + +// ============================================================================= +// Edge Cases and Error Handling Tests +// ============================================================================= + +TEST_F(VectorTest, DivisionByZero) { + Vector v{1.0, 2.0, 3.0}; + + // Division by zero should produce inf + Vector result = v / 0.0; + EXPECT_TRUE(std::isinf(result[0])); + EXPECT_TRUE(std::isinf(result[1])); + EXPECT_TRUE(std::isinf(result[2])); +} + +TEST_F(VectorTest, NormalizeZeroVector) { + Vector zero{0.0, 0.0, 0.0}; + + // Normalizing zero vector should handle gracefully + Vector n = zero.normalized(); + EXPECT_TRUE(std::isnan(n[0]) || n[0] == 0.0); +} + +TEST_F(VectorTest, ExtremeLargeValues) { + double large = 1e308; // Near double max + Vector v{large, large, large}; + + // Operations should not overflow + Vector half = v / 2.0; + EXPECT_FALSE(std::isinf(half[0])); + EXPECT_EQ(half[0], large / 2.0); +} + +TEST_F(VectorTest, ExtremeSmallValues) { + double tiny = 1e-308; // Near double min + Vector v{tiny, tiny, tiny}; + + // Operations should maintain precision + Vector doubled = v * 2.0; + EXPECT_EQ(doubled[0], tiny * 2.0); +} + +// ============================================================================= +// Numerical Precision Tests +// ============================================================================= + +TEST_F(VectorTest, NumericalStability) { + // Test Kahan summation for better precision + Vector v{1e16, 1.0, -1e16, 1.0}; + // Computed for future validation - demonstrates numerical precision issues + [[maybe_unused]] double sum = v[0] + v[1] + v[2] + v[3]; + + // Direct summation might lose precision + // But vector operations should maintain it + Vector a{1e16, 0.0, -1e16, 0.0}; + Vector b{0.0, 1.0, 0.0, 1.0}; + Vector c = a + b; + + EXPECT_EQ(c[0], 1e16); + EXPECT_EQ(c[1], 1.0); + EXPECT_EQ(c[2], -1e16); + EXPECT_EQ(c[3], 1.0); +} + +TEST_F(VectorTest, OrthogonalityPreservation) { + // Create nearly orthogonal vectors + Vector a{1.0, 1e-15, 0.0}; + Vector b{0.0, 1.0, 0.0}; + + double dot = a.dot(b); + EXPECT_NEAR(dot, 1e-15, 1e-16); +} + +// ============================================================================= +// Comparison Operations Tests +// ============================================================================= + +TEST_F(VectorTest, Equality) { + Vector a{1.0, 2.0, 3.0}; + Vector b{1.0, 2.0, 3.0}; + Vector c{1.0, 2.0, 3.1}; + + EXPECT_TRUE(a == b); + EXPECT_FALSE(a == c); + EXPECT_FALSE(a != b); + EXPECT_TRUE(a != c); +} + +TEST_F(VectorTest, ApproximateEquality) { + Vector a{1.0, 2.0, 3.0}; + Vector b{1.0 + 1e-15, 2.0 - 1e-15, 3.0 + 1e-15}; + + EXPECT_TRUE(a.approx_equal(b, 1e-14)); + EXPECT_FALSE(a.approx_equal(b, 1e-16)); +} + +// ============================================================================= +// Thread Safety Tests +// ============================================================================= + +TEST_F(VectorTest, ThreadSafetyReadOnly) { + Vector v{1.0, 2.0, 3.0}; + + // Multiple threads reading should be safe + std::vector threads; + std::vector results(10); + + for (int i = 0; i < 10; ++i) { + threads.emplace_back([&v, &results, i]() { + results[static_cast(i)] = v.norm(); + }); + } + + for (auto& t : threads) { + t.join(); + } + + // All threads should get same result + double expected = v.norm(); + for (double r : results) { + EXPECT_EQ(r, expected); + } +} + +TEST_F(VectorTest, ThreadSafetyIsolated) { + // Each thread works on its own vector + std::vector threads; + std::vector> results(10); + + for (int i = 0; i < 10; ++i) { + threads.emplace_back([&results, i]() { + Vector local{static_cast(i), 0.0, 0.0}; + results[static_cast(i)] = local * 2.0; + }); + } + + for (auto& t : threads) { + t.join(); + } + + // Check each thread computed correctly + for (int i = 0; i < 10; ++i) { + EXPECT_EQ(results[static_cast(i)][0], 2.0 * i); + } +} + +// ============================================================================= +// Memory Alignment Tests +// ============================================================================= + +TEST_F(VectorTest, MemoryAlignment) { + Vector v; + + // Check that data is properly aligned for SIMD + std::uintptr_t addr = reinterpret_cast(v.data()); + EXPECT_EQ(addr % 32, 0) << "Vector data should be 32-byte aligned for AVX"; +} + +// ============================================================================= +// Utility Function Tests +// ============================================================================= + +TEST_F(VectorTest, MinMaxElements) { + Vector v{3.0, -1.0, 4.0, 1.0, -2.0}; + + EXPECT_EQ(v.min(), -2.0); + EXPECT_EQ(v.max(), 4.0); + EXPECT_EQ(v.min_index(), 4); + EXPECT_EQ(v.max_index(), 2); +} + +TEST_F(VectorTest, Sum) { + Vector v{1.0, 2.0, 3.0, 4.0}; + EXPECT_EQ(v.sum(), 10.0); + + Vector zero{0.0, 0.0, 0.0}; + EXPECT_EQ(zero.sum(), 0.0); +} + +TEST_F(VectorTest, Mean) { + Vector v{1.0, 2.0, 3.0, 4.0}; + EXPECT_EQ(v.mean(), 2.5); +} + +TEST_F(VectorTest, ToString) { + Vector v{1.0, 2.0, 3.0}; + std::stringstream ss; + ss << v; + + std::string expected = "[1, 2, 3]"; + EXPECT_EQ(ss.str(), expected); +} diff --git a/tests/unitTests/FE/Math/test_VectorExpr.cpp b/tests/unitTests/FE/Math/test_VectorExpr.cpp new file mode 100644 index 000000000..bd6d85d51 --- /dev/null +++ b/tests/unitTests/FE/Math/test_VectorExpr.cpp @@ -0,0 +1,409 @@ +/** + * @file test_VectorExpr.cpp + * @brief Unit tests for VectorExpr.h - vector expression templates + */ + +#include +#include "FE/Math/Vector.h" +#include "FE/Math/VectorExpr.h" +#include "FE/Math/MathConstants.h" +#include +#include +#include +#include +#include + +using namespace svmp::FE::math; + +// Test fixture for VectorExpr tests +class VectorExprTest : public ::testing::Test { +protected: + static constexpr double tolerance = 1e-14; + + // Custom allocator to track memory allocations + template + class TrackingAllocator { + public: + using value_type = T; + + static std::atomic allocations; + static std::atomic deallocations; + static std::atomic bytes_allocated; + + TrackingAllocator() = default; + + template + TrackingAllocator(const TrackingAllocator&) {} + + T* allocate(size_t n) { + allocations.fetch_add(1); + bytes_allocated.fetch_add(n * sizeof(T)); + return static_cast(::operator new(n * sizeof(T))); + } + + void deallocate(T* p, size_t n) { + deallocations.fetch_add(1); + ::operator delete(p); + } + + static void reset() { + allocations = 0; + deallocations = 0; + bytes_allocated = 0; + } + }; + + void SetUp() override { + TrackingAllocator::reset(); + } + + void TearDown() override {} + + template + bool approx_equal(T a, T b, T tol = tolerance) { + return std::abs(a - b) <= tol; + } +}; + +template +std::atomic VectorExprTest::TrackingAllocator::allocations{0}; +template +std::atomic VectorExprTest::TrackingAllocator::deallocations{0}; +template +std::atomic VectorExprTest::TrackingAllocator::bytes_allocated{0}; + +// ============================================================================= +// Lazy Evaluation Tests +// ============================================================================= + +TEST_F(VectorExprTest, LazyEvaluationNoTemporaries) { + // Expression templates should not create temporary vectors + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + Vector c{7.0, 8.0, 9.0}; + + // Build expression without evaluation + auto expr = a + b - c; + + // Expression type should not be Vector, but an expression type + using ExprType = decltype(expr); + EXPECT_FALSE((std::is_same_v>)); + + // Now evaluate + Vector result = expr; + EXPECT_DOUBLE_EQ(result[0], -2.0); + EXPECT_DOUBLE_EQ(result[1], -1.0); + EXPECT_DOUBLE_EQ(result[2], 0.0); +} + +TEST_F(VectorExprTest, LazyEvaluationAccessPattern) { + Vector a{1.0, 2.0, 3.0, 4.0}; + Vector b{5.0, 6.0, 7.0, 8.0}; + + auto expr = a + b; + + // Access individual elements without full evaluation + EXPECT_DOUBLE_EQ(expr[0], 6.0); + EXPECT_DOUBLE_EQ(expr[2], 10.0); + + // Size should be accessible + EXPECT_EQ(expr.size(), 4u); +} + +// ============================================================================= +// Expression Chaining Tests +// ============================================================================= + +TEST_F(VectorExprTest, ChainedAdditionSubtraction) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + Vector c{2.0, 3.0, 4.0}; + Vector d{1.0, 1.0, 1.0}; + + // Chain multiple operations + Vector result = a + b - c + d; + + EXPECT_DOUBLE_EQ(result[0], 4.0); + EXPECT_DOUBLE_EQ(result[1], 5.0); + EXPECT_DOUBLE_EQ(result[2], 6.0); +} + +TEST_F(VectorExprTest, DeepExpressionNesting) { + Vector v1{1.0, 2.0}; + Vector v2{3.0, 4.0}; + Vector v3{5.0, 6.0}; + Vector v4{7.0, 8.0}; + Vector v5{9.0, 10.0}; + + // Deep nesting + Vector result = ((v1 + v2) - (v3 - v4)) + v5; + + EXPECT_DOUBLE_EQ(result[0], 15.0); + EXPECT_DOUBLE_EQ(result[1], 18.0); +} + +// ============================================================================= +// Mixed Operations Tests +// ============================================================================= + +TEST_F(VectorExprTest, ScalarMultiplicationInExpression) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + + Vector result = 2.0 * (a + b) / 3.0; + + EXPECT_TRUE(approx_equal(result[0], 10.0/3.0)); + EXPECT_TRUE(approx_equal(result[1], 14.0/3.0)); + EXPECT_TRUE(approx_equal(result[2], 6.0)); +} + +TEST_F(VectorExprTest, MixedScalarVectorOperations) { + Vector v{2.0, 4.0, 6.0, 8.0}; + + // Complex mixed expression + Vector result = 3.0 * v / 2.0 + v * 0.5 - 1.0 * v; + + EXPECT_DOUBLE_EQ(result[0], 2.0); + EXPECT_DOUBLE_EQ(result[1], 4.0); + EXPECT_DOUBLE_EQ(result[2], 6.0); + EXPECT_DOUBLE_EQ(result[3], 8.0); +} + +// ============================================================================= +// Unary Operations Tests +// ============================================================================= + +TEST_F(VectorExprTest, NegationInExpression) { + Vector a{1.0, -2.0, 3.0}; + Vector b{4.0, 5.0, -6.0}; + + Vector result = -a + (-b); + + EXPECT_DOUBLE_EQ(result[0], -5.0); + EXPECT_DOUBLE_EQ(result[1], -3.0); + EXPECT_DOUBLE_EQ(result[2], 3.0); +} + +TEST_F(VectorExprTest, AbsoluteValueExpression) { + Vector v{-1.5, 2.3, -4.7, 0.0}; + + Vector result = abs(v); + + EXPECT_DOUBLE_EQ(result[0], 1.5); + EXPECT_DOUBLE_EQ(result[1], 2.3); + EXPECT_DOUBLE_EQ(result[2], 4.7); + EXPECT_DOUBLE_EQ(result[3], 0.0); +} + +TEST_F(VectorExprTest, SqrtExpression) { + Vector v{4.0, 9.0, 16.0}; + + Vector result = sqrt(v); + + EXPECT_DOUBLE_EQ(result[0], 2.0); + EXPECT_DOUBLE_EQ(result[1], 3.0); + EXPECT_DOUBLE_EQ(result[2], 4.0); +} + +// ============================================================================= +// Element-wise Operations Tests +// ============================================================================= + +TEST_F(VectorExprTest, HadamardProductExpression) { + Vector a{2.0, 3.0, 4.0}; + Vector b{5.0, 6.0, 7.0}; + + Vector result = hadamard(a, b); + + EXPECT_DOUBLE_EQ(result[0], 10.0); + EXPECT_DOUBLE_EQ(result[1], 18.0); + EXPECT_DOUBLE_EQ(result[2], 28.0); +} + +TEST_F(VectorExprTest, HadamardDivisionExpression) { + Vector a{10.0, 18.0, 28.0}; + Vector b{2.0, 3.0, 4.0}; + + Vector result = hadamard_div(a, b); + + EXPECT_DOUBLE_EQ(result[0], 5.0); + EXPECT_DOUBLE_EQ(result[1], 6.0); + EXPECT_DOUBLE_EQ(result[2], 7.0); +} + +// ============================================================================= +// Dot Product and Norm Tests +// ============================================================================= + +TEST_F(VectorExprTest, DotProductOfExpressions) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + Vector c{2.0, 2.0, 2.0}; + + // Dot product of expressions + double result = dot(a + b, c); + + EXPECT_DOUBLE_EQ(result, 42.0); +} + +TEST_F(VectorExprTest, NormOfExpression) { + Vector a{3.0, 0.0}; + Vector b{0.0, 4.0}; + + double result = norm(a + b); + + EXPECT_DOUBLE_EQ(result, 5.0); // norm of (3,4) = 5 +} + +TEST_F(VectorExprTest, NormalizeExpression) { + Vector v{3.0, 0.0, 4.0}; + + Vector result = normalize(v); + + EXPECT_DOUBLE_EQ(result[0], 0.6); + EXPECT_DOUBLE_EQ(result[1], 0.0); + EXPECT_DOUBLE_EQ(result[2], 0.8); +} + +// ============================================================================= +// Type Deduction Tests +// ============================================================================= + +TEST_F(VectorExprTest, TypeDeductionCorrectness) { + Vector vf{1.0f, 2.0f, 3.0f}; + Vector vd{4.0, 5.0, 6.0}; + + // Mixed type operations should promote to higher precision + auto expr = vf + vf; // float expression + using ExprType = decltype(expr[0]); + EXPECT_TRUE((std::is_same_v)); + + // Test that expression evaluates correctly + Vector result = expr; + EXPECT_FLOAT_EQ(result[0], 2.0f); + EXPECT_FLOAT_EQ(result[1], 4.0f); + EXPECT_FLOAT_EQ(result[2], 6.0f); +} + +// ============================================================================= +// SFINAE and Compile-time Tests +// ============================================================================= + +TEST_F(VectorExprTest, SFINAEConstraints) { + // Test that VectorExpr operators only work with VectorExpr types + Vector v1{1.0, 2.0, 3.0}; + Vector v2{4.0, 5.0, 6.0}; + + // This should compile + auto expr = v1 + v2; + Vector result = expr; + + // Verify the constraint checking + EXPECT_TRUE((std::is_base_of_v>, Vector>)); +} + +// ============================================================================= +// Aliasing and Self-Assignment Tests +// ============================================================================= + +TEST_F(VectorExprTest, SelfAssignmentWithExpression) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + + // Self-assignment through expression + a = a + b; + + EXPECT_DOUBLE_EQ(a[0], 5.0); + EXPECT_DOUBLE_EQ(a[1], 7.0); + EXPECT_DOUBLE_EQ(a[2], 9.0); +} + +TEST_F(VectorExprTest, AliasingInExpression) { + Vector a{2.0, 3.0, 4.0}; + Vector b{1.0, 1.0, 1.0}; + + // a appears on both sides + a = b + a; + + EXPECT_DOUBLE_EQ(a[0], 3.0); + EXPECT_DOUBLE_EQ(a[1], 4.0); + EXPECT_DOUBLE_EQ(a[2], 5.0); +} + +// ============================================================================= +// Edge Cases Tests +// ============================================================================= + +TEST_F(VectorExprTest, SingleElementVector) { + Vector a{5.0}; + Vector b{3.0}; + + Vector result = a + b - a * 0.5; + + EXPECT_DOUBLE_EQ(result[0], 5.5); +} + +TEST_F(VectorExprTest, EmptyExpression) { + Vector v{1.0, 2.0, 3.0}; + + // Expression that evaluates to identity + Vector result = v + v * 0.0; + + EXPECT_DOUBLE_EQ(result[0], 1.0); + EXPECT_DOUBLE_EQ(result[1], 2.0); + EXPECT_DOUBLE_EQ(result[2], 3.0); +} + +TEST_F(VectorExprTest, LargeVectorExpression) { + const size_t N = 100; + Vector a, b, c; + + for (size_t i = 0; i < N; ++i) { + a[i] = static_cast(i); + b[i] = static_cast(i * 2); + c[i] = static_cast(i * 3); + } + + Vector result = a + b - c / 2.0; + + for (size_t i = 0; i < N; ++i) { + EXPECT_DOUBLE_EQ(result[i], i + 2.0 * i - 1.5 * i); + } +} + +// ============================================================================= +// Complex Expression Pattern Tests +// ============================================================================= + +TEST_F(VectorExprTest, ComplexNestedExpression) { + Vector a{1.0, 2.0, 3.0}; + Vector b{4.0, 5.0, 6.0}; + Vector c{7.0, 8.0, 9.0}; + + // Complex expression with multiple operation types + Vector result = 2.0 * abs(a - b) + sqrt(hadamard(c, c)) / 3.0; + + // Verify each component + // |a - b| = |(-3, -3, -3)| = (3, 3, 3) + // 2 * (3, 3, 3) = (6, 6, 6) + // c * c = (49, 64, 81) + // sqrt(c * c) = (7, 8, 9) + // sqrt(c * c) / 3 = (7/3, 8/3, 3) + // result = (6 + 7/3, 6 + 8/3, 6 + 3) = (25/3, 26/3, 9) + + EXPECT_TRUE(approx_equal(result[0], 25.0/3.0)); + EXPECT_TRUE(approx_equal(result[1], 26.0/3.0)); + EXPECT_DOUBLE_EQ(result[2], 9.0); +} + +TEST_F(VectorExprTest, ChainedUnaryOperations) { + Vector v{-4.0, -9.0, -16.0, -25.0}; + + // Chain of unary operations + Vector result = sqrt(abs(-v)); + + EXPECT_DOUBLE_EQ(result[0], 2.0); + EXPECT_DOUBLE_EQ(result[1], 3.0); + EXPECT_DOUBLE_EQ(result[2], 4.0); + EXPECT_DOUBLE_EQ(result[3], 5.0); +} From dfdeead1edd2813a4a24bdba45fc13b883c6a919 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Fri, 5 Jun 2026 12:44:00 -0700 Subject: [PATCH 02/22] Update FSI HEX8 FE Basis reference results Regenerate affected FSI and FSI-ustruct HEX8 result_005.vtu references for the FE Basis path with nonzero HEX8 Hessian contributions. Update the pipe_3d PETSc and Trilinos references to match the base pipe_3d reference, preserving the existing shared-reference pattern across linear algebra variants. --- tests/cases/fsi/pipe_3d/result_005.vtu | 4 ++-- tests/cases/fsi/pipe_3d_petsc/result_005.vtu | 4 ++-- tests/cases/fsi/pipe_3d_trilinos_bj/result_005.vtu | 4 ++-- tests/cases/fsi/pipe_3d_trilinos_ml/result_005.vtu | 4 ++-- tests/cases/fsi/pipe_RCR_3d/result_005.vtu | 4 ++-- tests/cases/fsi_ustruct/pipe_3d/result_005.vtu | 4 ++-- tests/cases/fsi_ustruct/pipe_RCR_3d/result_005.vtu | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/cases/fsi/pipe_3d/result_005.vtu b/tests/cases/fsi/pipe_3d/result_005.vtu index b78ea6500..a7ca69daf 100644 --- a/tests/cases/fsi/pipe_3d/result_005.vtu +++ b/tests/cases/fsi/pipe_3d/result_005.vtu @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54ac116931be9b2a7d5024de8359f9ea09cae964e9bd34ba949f4bfb9312c8af -size 210065 +oid sha256:b13d09a343a3fd8d033b0e3ecaf2cd94ce68e2ee8665144f7a53cca201db4266 +size 227356 diff --git a/tests/cases/fsi/pipe_3d_petsc/result_005.vtu b/tests/cases/fsi/pipe_3d_petsc/result_005.vtu index b78ea6500..a7ca69daf 100644 --- a/tests/cases/fsi/pipe_3d_petsc/result_005.vtu +++ b/tests/cases/fsi/pipe_3d_petsc/result_005.vtu @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54ac116931be9b2a7d5024de8359f9ea09cae964e9bd34ba949f4bfb9312c8af -size 210065 +oid sha256:b13d09a343a3fd8d033b0e3ecaf2cd94ce68e2ee8665144f7a53cca201db4266 +size 227356 diff --git a/tests/cases/fsi/pipe_3d_trilinos_bj/result_005.vtu b/tests/cases/fsi/pipe_3d_trilinos_bj/result_005.vtu index b78ea6500..a7ca69daf 100644 --- a/tests/cases/fsi/pipe_3d_trilinos_bj/result_005.vtu +++ b/tests/cases/fsi/pipe_3d_trilinos_bj/result_005.vtu @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54ac116931be9b2a7d5024de8359f9ea09cae964e9bd34ba949f4bfb9312c8af -size 210065 +oid sha256:b13d09a343a3fd8d033b0e3ecaf2cd94ce68e2ee8665144f7a53cca201db4266 +size 227356 diff --git a/tests/cases/fsi/pipe_3d_trilinos_ml/result_005.vtu b/tests/cases/fsi/pipe_3d_trilinos_ml/result_005.vtu index b78ea6500..a7ca69daf 100644 --- a/tests/cases/fsi/pipe_3d_trilinos_ml/result_005.vtu +++ b/tests/cases/fsi/pipe_3d_trilinos_ml/result_005.vtu @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54ac116931be9b2a7d5024de8359f9ea09cae964e9bd34ba949f4bfb9312c8af -size 210065 +oid sha256:b13d09a343a3fd8d033b0e3ecaf2cd94ce68e2ee8665144f7a53cca201db4266 +size 227356 diff --git a/tests/cases/fsi/pipe_RCR_3d/result_005.vtu b/tests/cases/fsi/pipe_RCR_3d/result_005.vtu index 79eaced8c..6945fd005 100644 --- a/tests/cases/fsi/pipe_RCR_3d/result_005.vtu +++ b/tests/cases/fsi/pipe_RCR_3d/result_005.vtu @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f194a3c364de0bf1a6cc79ba542306469e151de36275a06564022730c3f2c84c -size 209865 +oid sha256:25a08e99ae0163800e73ea54720557d742548fe75a0eb6b68461d8bdb366972f +size 227320 diff --git a/tests/cases/fsi_ustruct/pipe_3d/result_005.vtu b/tests/cases/fsi_ustruct/pipe_3d/result_005.vtu index c838c9c3f..8b5f73c2a 100644 --- a/tests/cases/fsi_ustruct/pipe_3d/result_005.vtu +++ b/tests/cases/fsi_ustruct/pipe_3d/result_005.vtu @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:262ffb4d7b644280f15bb2e32c8e5fc5ddade7fa5cabd845c31fe3803e9ef0a0 -size 207864 +oid sha256:16f0f2b2ea6a133f54db03954e76ea7586b0fb56d36e2e350ccd21ebadaf4bfb +size 228764 diff --git a/tests/cases/fsi_ustruct/pipe_RCR_3d/result_005.vtu b/tests/cases/fsi_ustruct/pipe_RCR_3d/result_005.vtu index e9e051d73..7d6c64d9b 100644 --- a/tests/cases/fsi_ustruct/pipe_RCR_3d/result_005.vtu +++ b/tests/cases/fsi_ustruct/pipe_RCR_3d/result_005.vtu @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7dec176a56b610ed6b754f66e532a15ac1563b72c25198f49a0bc53adc6e4552 -size 207628 +oid sha256:5c00d715542a495f37a6ea1cd514cc654d3215360170a06c3af1440b71f7d093 +size 228708 From 8b47802fbcaf83ec07a4636de7ac6e6084db1364 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Sun, 7 Jun 2026 23:21:16 -0700 Subject: [PATCH 03/22] fixing temporary A + B expression in matrix and vector objects --- Code/Source/solver/FE/Math/MatrixExpr.h | 20 ++++++++++---------- Code/Source/solver/FE/Math/VectorExpr.h | 12 ++++++------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Code/Source/solver/FE/Math/MatrixExpr.h b/Code/Source/solver/FE/Math/MatrixExpr.h index da2f8c8d6..097f35361 100644 --- a/Code/Source/solver/FE/Math/MatrixExpr.h +++ b/Code/Source/solver/FE/Math/MatrixExpr.h @@ -82,8 +82,8 @@ class MatrixExpr { template class MatrixBinaryExpr : public MatrixExpr> { private: - const LHS& lhs_; - const RHS& rhs_; + LHS lhs_; + RHS rhs_; Op op_; public: @@ -131,7 +131,7 @@ class MatrixBinaryExpr : public MatrixExpr> { template class MatrixUnaryExpr : public MatrixExpr> { private: - const Expr& expr_; + Expr expr_; Op op_; public: @@ -178,7 +178,7 @@ class MatrixUnaryExpr : public MatrixExpr> { template class MatrixScalarExpr : public MatrixExpr> { private: - const Expr& expr_; + Expr expr_; Scalar scalar_; public: @@ -225,7 +225,7 @@ class MatrixScalarExpr : public MatrixExpr> { template class MatrixScalarDivExpr : public MatrixExpr> { private: - const Expr& expr_; + Expr expr_; Scalar scalar_; public: @@ -274,8 +274,8 @@ class MatrixScalarDivExpr : public MatrixExpr> template class MatrixMulExpr : public MatrixExpr> { private: - const LHS& lhs_; - const RHS& rhs_; + LHS lhs_; + RHS rhs_; public: /** @@ -326,7 +326,7 @@ class MatrixMulExpr : public MatrixExpr> { template class TransposeExpr : public MatrixExpr> { private: - const Expr& expr_; + Expr expr_; public: /** @@ -370,7 +370,7 @@ class TransposeExpr : public MatrixExpr> { template class DiagonalExpr : public MatrixExpr> { private: - const VecExpr& vec_; + VecExpr vec_; std::size_t n_; public: @@ -623,4 +623,4 @@ constexpr auto trace(const MatrixExpr& expr) { } // namespace FE } // namespace svmp -#endif // SVMP_FE_MATH_MATRIX_EXPR_H \ No newline at end of file +#endif // SVMP_FE_MATH_MATRIX_EXPR_H diff --git a/Code/Source/solver/FE/Math/VectorExpr.h b/Code/Source/solver/FE/Math/VectorExpr.h index 8b9c8e382..627d2fd88 100644 --- a/Code/Source/solver/FE/Math/VectorExpr.h +++ b/Code/Source/solver/FE/Math/VectorExpr.h @@ -72,8 +72,8 @@ class VectorExpr { template class VectorBinaryExpr : public VectorExpr> { private: - const LHS& lhs_; - const RHS& rhs_; + LHS lhs_; + RHS rhs_; Op op_; public: @@ -112,7 +112,7 @@ class VectorBinaryExpr : public VectorExpr> { template class VectorUnaryExpr : public VectorExpr> { private: - const Expr& expr_; + Expr expr_; Op op_; public: @@ -150,7 +150,7 @@ class VectorUnaryExpr : public VectorExpr> { template class VectorScalarExpr : public VectorExpr> { private: - const Expr& expr_; + Expr expr_; Scalar scalar_; public: @@ -188,7 +188,7 @@ class VectorScalarExpr : public VectorExpr> { template class VectorScalarDivExpr : public VectorExpr> { private: - const Expr& expr_; + Expr expr_; Scalar scalar_; public: @@ -415,4 +415,4 @@ constexpr auto normalize(const VectorExpr& expr) { } // namespace FE } // namespace svmp -#endif // SVMP_FE_MATH_VECTOR_EXPR_H \ No newline at end of file +#endif // SVMP_FE_MATH_VECTOR_EXPR_H From 4d6baaa57f809a9ee4d6261069a84aec6efc9806 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 8 Jun 2026 00:41:06 -0700 Subject: [PATCH 04/22] fixing fetch content for google tests --- Code/Source/solver/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Code/Source/solver/CMakeLists.txt b/Code/Source/solver/CMakeLists.txt index e42391862..4f317cf79 100644 --- a/Code/Source/solver/CMakeLists.txt +++ b/Code/Source/solver/CMakeLists.txt @@ -348,11 +348,11 @@ if(ENABLE_UNIT_TEST) # install Google Test #if(NOT TARGET gtest_main AND NOT TARGET gtest) - include(FetchContent) FetchContent_Declare( - googletest - URL https://github.com/google/googletest/archive/refs/heads/main.zip - DOWNLOAD_EXTRACT_TIMESTAMP TRUE + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.17.0 + DOWNLOAD_EXTRACT_TIMESTAMP TRUE ) FetchContent_MakeAvailable(googletest) #endif() From 81cad5461e14d754b42cd44b89d2abba10993d71 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 8 Jun 2026 00:51:47 -0700 Subject: [PATCH 05/22] adding fetch content to include for enabled unit tests --- Code/Source/solver/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Code/Source/solver/CMakeLists.txt b/Code/Source/solver/CMakeLists.txt index 4f317cf79..1adc6be78 100644 --- a/Code/Source/solver/CMakeLists.txt +++ b/Code/Source/solver/CMakeLists.txt @@ -345,7 +345,7 @@ if(ENABLE_UNIT_TEST) # link pthread on ubuntu20 find_package(Threads REQUIRED) - + include(FetchContent) # install Google Test #if(NOT TARGET gtest_main AND NOT TARGET gtest) FetchContent_Declare( From 004e678ca0e830d5a54f93b2a468fe347bf0456c Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 8 Jun 2026 12:20:53 -0700 Subject: [PATCH 06/22] removing basis optimizations, caching, pyramid support, manual/static reference tables and related unit tests --- Code/Source/solver/CMakeLists.txt | 8 + Code/Source/solver/FE/Basis/BasisCache.cpp | 309 - Code/Source/solver/FE/Basis/BasisCache.h | 456 - Code/Source/solver/FE/Basis/BasisFactory.cpp | 81 +- Code/Source/solver/FE/Basis/BasisFactory.h | 6 - Code/Source/solver/FE/Basis/BasisFunction.cpp | 262 +- Code/Source/solver/FE/Basis/BasisFunction.h | 367 +- Code/Source/solver/FE/Basis/BasisTolerance.h | 52 - Code/Source/solver/FE/Basis/BasisTraits.h | 55 +- Code/Source/solver/FE/Basis/LagrangeBasis.cpp | 8661 +---------------- Code/Source/solver/FE/Basis/LagrangeBasis.h | 123 +- .../solver/FE/Basis/LagrangeBasisFast.h | 1378 --- .../solver/FE/Basis/LagrangeBasisPyramid.cpp | 2069 ---- .../solver/FE/Basis/LagrangeBasisPyramid.h | 67 - .../solver/FE/Basis/LagrangeBasisSimplex.cpp | 2457 ----- .../solver/FE/Basis/LagrangeBasisSimplex.h | 78 - .../solver/FE/Basis/LagrangeBasisUtility.h | 25 - .../FE/Basis/NodeOrderingConventions.cpp | 580 +- .../solver/FE/Basis/NodeOrderingConventions.h | 508 +- .../solver/FE/Basis/PyramidModalBasis.h | 265 - .../solver/FE/Basis/SerendipityBasis.cpp | 74 +- .../Source/solver/FE/Basis/SerendipityBasis.h | 7 - Code/Source/solver/FE/Basis/VectorBasis.h | 255 - .../FE/Basis/VectorBasisEvaluationHelpers.cpp | 593 -- .../FE/Basis/VectorBasisEvaluationHelpers.h | 751 -- .../FE/Basis/VectorBasisModalPolynomial.h | 77 - Code/Source/solver/FE/Common/Alignment.h | 23 - Code/Source/solver/FE/Common/Types.h | 9 +- Code/Source/solver/FE/Math/Matrix.h | 2 +- Code/Source/solver/FE/Math/Vector.h | 2 +- .../solver/FE/Quadrature/QuadratureRule.h | 237 - Code/Source/solver/Timer.h | 21 +- Code/Source/solver/load_msh.cpp | 2 - Code/Source/solver/utils.cpp | 14 +- .../eigen3/unsupported/Eigen/CXX11/Tensor | 2 + .../FE/Basis/test_BasisCacheFactory.cpp | 256 - .../FE/Basis/test_BasisErrorPaths.cpp | 60 +- .../unitTests/FE/Basis/test_BasisHessians.cpp | 32 - .../FE/Basis/test_ConstexprBasis.cpp | 135 +- ...ePyramid.cpp => test_HigherOrderWedge.cpp} | 66 +- .../unitTests/FE/Basis/test_LagrangeBasis.cpp | 3198 +----- .../FE/Basis/test_SerendipityTensorModal.cpp | 12 +- tests/unitTests/test_common.h | 3 +- 43 files changed, 1060 insertions(+), 22578 deletions(-) delete mode 100644 Code/Source/solver/FE/Basis/BasisCache.cpp delete mode 100644 Code/Source/solver/FE/Basis/BasisCache.h delete mode 100644 Code/Source/solver/FE/Basis/BasisTolerance.h delete mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisFast.h delete mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisPyramid.cpp delete mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisPyramid.h delete mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisSimplex.cpp delete mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisSimplex.h delete mode 100644 Code/Source/solver/FE/Basis/LagrangeBasisUtility.h delete mode 100644 Code/Source/solver/FE/Basis/PyramidModalBasis.h delete mode 100644 Code/Source/solver/FE/Basis/VectorBasis.h delete mode 100644 Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.cpp delete mode 100644 Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.h delete mode 100644 Code/Source/solver/FE/Basis/VectorBasisModalPolynomial.h delete mode 100644 Code/Source/solver/FE/Common/Alignment.h delete mode 100644 Code/Source/solver/FE/Quadrature/QuadratureRule.h delete mode 100644 tests/unitTests/FE/Basis/test_BasisCacheFactory.cpp rename tests/unitTests/FE/Basis/{test_HigherOrderWedgePyramid.cpp => test_HigherOrderWedge.cpp} (64%) diff --git a/Code/Source/solver/CMakeLists.txt b/Code/Source/solver/CMakeLists.txt index 1adc6be78..bdebc4a52 100644 --- a/Code/Source/solver/CMakeLists.txt +++ b/Code/Source/solver/CMakeLists.txt @@ -355,6 +355,14 @@ if(ENABLE_UNIT_TEST) DOWNLOAD_EXTRACT_TIMESTAMP TRUE ) FetchContent_MakeAvailable(googletest) + + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_STANDARD GREATER_EQUAL 20) + foreach(GTEST_TARGET gtest gtest_main gmock gmock_main) + if(TARGET ${GTEST_TARGET}) + target_compile_options(${GTEST_TARGET} PRIVATE -std=gnu++17) + endif() + endforeach() + endif() #endif() enable_testing() diff --git a/Code/Source/solver/FE/Basis/BasisCache.cpp b/Code/Source/solver/FE/Basis/BasisCache.cpp deleted file mode 100644 index 6d8a4ede3..000000000 --- a/Code/Source/solver/FE/Basis/BasisCache.cpp +++ /dev/null @@ -1,309 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#include "BasisCache.h" -#include - -namespace svmp { -namespace FE { -namespace basis { - -namespace { - -QuadratureCacheKey make_quadrature_cache_key(const quadrature::QuadratureRule& quad) noexcept { - const auto fingerprint = quad.point_fingerprint(); - return QuadratureCacheKey{fingerprint.dimension, - fingerprint.num_points, - fingerprint.points_hash_a, - fingerprint.points_hash_b}; -} - -void mix_hash_word(std::uint64_t word, - std::uint64_t& hash_a, - std::uint64_t& hash_b) noexcept { - hash_a ^= word + 0x9e3779b97f4a7c15ULL + (hash_a << 6u) + (hash_a >> 2u); - hash_b ^= (word + 0xbf58476d1ce4e5b9ULL) + (hash_b << 7u) + (hash_b >> 3u); -} - -std::pair -identity_fingerprint(const std::string& identity) noexcept { - std::uint64_t hash_a = 0xa4093822299f31d0ULL; - std::uint64_t hash_b = 0x082efa98ec4e6c89ULL; - mix_hash_word(static_cast(identity.size()), hash_a, hash_b); - for (const char c : identity) { - mix_hash_word(static_cast(static_cast(c)), hash_a, hash_b); - } - return {hash_a, hash_b}; -} - -BasisCacheKey make_basis_cache_key(const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients, - bool hessians) { - StructuralBasisKey structural_key{ - basis.basis_type(), - basis.element_type(), - basis.dimension(), - basis.order(), - basis.size(), - basis.is_vector_valued(), - make_quadrature_cache_key(quad), - gradients, - hessians - }; - - BasisCacheKey key; - const bool uses_basis_identity = !basis.cache_identity_is_structural(); - if (!uses_basis_identity) { - key.value = structural_key; - return key; - } - - std::vector basis_identity_words; - const bool uses_structured_identity = basis.cache_identity_words(basis_identity_words); - if (!uses_structured_identity) { - basis_identity_words.clear(); - } - const std::string basis_identity = - uses_structured_identity ? std::string{} : basis.cache_identity(); - BasisIdentityFingerprint cached_identity_hash{}; - const bool has_cached_identity_hash = - uses_structured_identity && - basis.cache_identity_fingerprint(cached_identity_hash.hash_a, - cached_identity_hash.hash_b); - const auto identity_hash = uses_structured_identity - ? has_cached_identity_hash - ? std::pair{ - cached_identity_hash.hash_a, - cached_identity_hash.hash_b} - : [&basis_identity_words] { - const auto fingerprint = - compute_basis_identity_fingerprint(basis_identity_words); - return std::pair{ - fingerprint.hash_a, - fingerprint.hash_b}; - }() - : identity_fingerprint(basis_identity); - key.value = ParameterizedBasisKey{ - structural_key, - uses_structured_identity, - identity_hash.first, - identity_hash.second, - std::move(basis_identity_words), - basis_identity - }; - return key; -} - -} // namespace - -BasisCache& BasisCache::instance() { - static BasisCache cache; - return cache; -} - -const BasisCacheEntry& BasisCache::get_or_compute( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients, - bool hessians) { - return *get_or_compute_shared(basis, quad, gradients, hessians); -} - -std::shared_ptr BasisCache::get_or_compute_shared( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients, - bool hessians) { - const BasisCacheKey key = make_basis_cache_key(basis, quad, gradients, hessians); - - // Warm path: shared (reader) lock allows concurrent cache hits. - { - std::shared_lock read_lock(mutex_); - auto it = slots_.find(key); - if (it != slots_.end() && it->second.entry) { - return it->second.entry; - } - } - - std::shared_ptr in_flight; - bool owner = false; - { - std::unique_lock write_lock(mutex_); - auto& slot = slots_[key]; - if (slot.entry) { - return slot.entry; - } - - if (!slot.pending) { - in_flight = std::make_shared(); - slot.pending = in_flight; - owner = true; - } else { - in_flight = slot.pending; - } - } - - if (!owner) { - std::unique_lock wait_lock(in_flight->mutex); - in_flight->ready_cv.wait(wait_lock, [&in_flight] { return in_flight->ready; }); - if (in_flight->exception) { - std::rethrow_exception(in_flight->exception); - } - return in_flight->entry; - } - - try { - auto entry = std::make_shared(compute(basis, quad, gradients, hessians)); - { - std::unique_lock write_lock(mutex_); - auto slot_it = slots_.find(key); - if (slot_it == slots_.end()) { - slot_it = slots_.emplace(key, CacheSlot{}).first; - } - auto& slot = slot_it->second; - if (slot.entry) { - entry = slot.entry; - } else { - slot.entry = entry; - } - if (slot.pending == in_flight) { - slot.pending.reset(); - } - } - { - std::lock_guard ready_lock(in_flight->mutex); - in_flight->entry = entry; - in_flight->ready = true; - } - in_flight->ready_cv.notify_all(); - return entry; - } catch (...) { - { - std::lock_guard ready_lock(in_flight->mutex); - in_flight->exception = std::current_exception(); - in_flight->ready = true; - } - { - std::unique_lock write_lock(mutex_); - auto slot_it = slots_.find(key); - if (slot_it != slots_.end() && slot_it->second.pending == in_flight) { - slot_it->second.pending.reset(); - if (!slot_it->second.entry) { - slots_.erase(slot_it); - } - } - } - in_flight->ready_cv.notify_all(); - throw; - } -} - -const BasisCacheEntry& BasisCache::prewarm( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients, - bool hessians) { - return get_or_compute(basis, quad, gradients, hessians); -} - -BasisCacheHandle BasisCache::prewarm_handle( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients, - bool hessians) { - return BasisCacheHandle(get_or_compute_shared(basis, quad, gradients, hessians)); -} - -BasisCacheEntry BasisCache::compute_uncached( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients, - bool hessians) const { - return compute(basis, quad, gradients, hessians); -} - -void BasisCache::clear() { - std::unique_lock lock(mutex_); - for (auto it = slots_.begin(); it != slots_.end();) { - if (it->second.pending) { - it->second.entry.reset(); - ++it; - } else { - it = slots_.erase(it); - } - } -} - -std::size_t BasisCache::size() const { - std::shared_lock lock(mutex_); - std::size_t completed = 0; - for (const auto& [key, slot] : slots_) { - (void)key; - if (slot.entry) { - ++completed; - } - } - return completed; -} - -BasisCacheEntry BasisCache::compute(const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients, - bool hessians) const { - BasisCacheEntry entry; - const auto& points = quad.points(); - entry.num_qpts = points.size(); - entry.num_dofs = basis.size(); - - const bool vector_basis = basis.is_vector_valued(); - if (!vector_basis) { - entry.scalar_values.assign(entry.num_dofs * entry.num_qpts, Real(0)); - if (gradients) { - entry.gradients.assign(entry.num_dofs * 3u * entry.num_qpts, Real(0)); - } - if (hessians) { - entry.hessians.assign(entry.num_dofs * 9u * entry.num_qpts, Real(0)); - } - } else { - entry.vector_values_xyz.assign(entry.num_dofs * 3u * entry.num_qpts, Real(0)); - if (gradients && basis.supports_vector_jacobians()) { - entry.vector_jacobians.assign(entry.num_dofs * 9u * entry.num_qpts, Real(0)); - } - if (gradients && basis.supports_curl()) { - entry.vector_curls_xyz.assign(entry.num_dofs * 3u * entry.num_qpts, Real(0)); - } - if (gradients && basis.supports_divergence()) { - entry.vector_divergence.assign(entry.num_dofs * entry.num_qpts, Real(0)); - } - } - - if (vector_basis) { - if (entry.num_dofs > 0 && entry.num_qpts > 0) { - basis.evaluate_vector_at_quadrature_points( - points, - entry.vector_values_xyz.data(), - entry.vector_jacobians.empty() ? nullptr : entry.vector_jacobians.data(), - entry.vector_curls_xyz.empty() ? nullptr : entry.vector_curls_xyz.data(), - entry.vector_divergence.empty() ? nullptr : entry.vector_divergence.data()); - } - return entry; - } - - if (entry.num_dofs > 0 && entry.num_qpts > 0) { - basis.fill_scalar_cache_entry(points, - entry.num_qpts, - entry.scalar_values.data(), - gradients ? entry.gradients.data() : nullptr, - hessians ? entry.hessians.data() : nullptr); - } - - return entry; -} -} // namespace basis -} // namespace FE -} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/BasisCache.h b/Code/Source/solver/FE/Basis/BasisCache.h deleted file mode 100644 index a84c0e87a..000000000 --- a/Code/Source/solver/FE/Basis/BasisCache.h +++ /dev/null @@ -1,456 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#ifndef SVMP_FE_BASIS_BASISCACHE_H -#define SVMP_FE_BASIS_BASISCACHE_H - -/** - * @file BasisCache.h - * @brief Cache for basis evaluations at quadrature points - */ - -#include "BasisFunction.h" -#include "Quadrature/QuadratureRule.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace svmp { -namespace FE { -namespace basis { - -struct QuadratureCacheKey { - int dimension{0}; - std::size_t num_points{0}; - // Quadrature coordinates are intentionally fingerprinted from their exact - // Real bit patterns. Values such as -0.0 and +0.0 therefore produce - // distinct cache keys unless a future API explicitly normalizes them. The - // key intentionally ignores weights and rule class because basis values only - // depend on reference coordinates; bit-identical point sets share entries. - std::uint64_t points_hash_a{0}; - std::uint64_t points_hash_b{0}; - - bool operator==(const QuadratureCacheKey& other) const noexcept { - return dimension == other.dimension && - num_points == other.num_points && - points_hash_a == other.points_hash_a && - points_hash_b == other.points_hash_b; - } -}; - -struct StructuralBasisKey { - BasisType basis_type{BasisType::Custom}; - ElementType element_type{ElementType::Unknown}; - int dimension{0}; - int order{0}; - std::size_t num_dofs{0}; - bool vector_valued{false}; - QuadratureCacheKey quadrature; - bool with_gradients{false}; - bool with_hessians{false}; - - bool operator==(const StructuralBasisKey& other) const noexcept { - return basis_type == other.basis_type && - element_type == other.element_type && - dimension == other.dimension && - order == other.order && - num_dofs == other.num_dofs && - vector_valued == other.vector_valued && - quadrature == other.quadrature && - with_gradients == other.with_gradients && - with_hessians == other.with_hessians; - } -}; - -struct ParameterizedBasisKey { - StructuralBasisKey structural; - bool uses_structured_identity{false}; - std::uint64_t identity_hash_a{0}; - std::uint64_t identity_hash_b{0}; - std::vector basis_identity_words; - std::string basis_identity; - - bool operator==(const ParameterizedBasisKey& other) const noexcept { - return structural == other.structural && - uses_structured_identity == other.uses_structured_identity && - identity_hash_a == other.identity_hash_a && - identity_hash_b == other.identity_hash_b && - basis_identity_words == other.basis_identity_words && - basis_identity == other.basis_identity; - } -}; - -struct BasisCacheKey { - std::variant value; - - bool operator==(const BasisCacheKey& other) const noexcept { - return value == other.value; - } -}; - -struct BasisCacheKeyHash { - std::size_t operator()(const BasisCacheKey& key) const noexcept { - std::size_t seed = 0; - auto combine = [&seed](std::size_t value) noexcept { - seed ^= value + 0x9e3779b97f4a7c15ULL + (seed << 6u) + (seed >> 2u); - }; - - auto hash_structural = [&](const StructuralBasisKey& structural) noexcept { - combine(std::hash()(structural.quadrature.dimension)); - combine(std::hash()(structural.quadrature.num_points)); - combine(std::hash()(structural.quadrature.points_hash_a)); - combine(std::hash()(structural.quadrature.points_hash_b)); - combine(std::hash()(static_cast(structural.basis_type))); - combine(std::hash()(static_cast(structural.element_type))); - combine(std::hash()(structural.dimension)); - combine(std::hash()(structural.order)); - combine(std::hash()(structural.num_dofs)); - unsigned flags = 0u; - flags |= structural.vector_valued ? 1u : 0u; - flags |= structural.with_gradients ? 2u : 0u; - flags |= structural.with_hessians ? 4u : 0u; - combine(std::hash()(flags)); - }; - - std::visit([&](const auto& active_key) { - using ActiveKey = std::decay_t; - if constexpr (std::is_same_v) { - combine(0x5354525543544b45ULL); - hash_structural(active_key); - } else { - combine(0x504152414d4b4559ULL); - hash_structural(active_key.structural); - combine(active_key.uses_structured_identity ? 1u : 0u); - combine(std::hash()(active_key.identity_hash_a)); - combine(std::hash()(active_key.identity_hash_b)); - } - }, key.value); - return seed; - } -}; - -struct BasisCacheEntry { - std::size_t num_qpts{0}; - std::size_t num_dofs{0}; - // Scalar basis values in dof-major SoA layout: [dof * num_qpts + qp]. - std::vector scalar_values; - // Scalar reference gradients in dof/component/qpt SoA layout: - // [(dof * 3 + component) * num_qpts + qp]. - std::vector gradients; - // Scalar reference Hessians in dof/component/qpt SoA layout: - // [(dof * 9 + row * 3 + col) * num_qpts + qp]. - std::vector hessians; - - // Vector basis values in dof/component/qpt SoA layout: - // [(dof * 3 + component) * num_qpts + qp]. - std::vector vector_values_xyz; - // Vector basis reference Jacobians in dof/component/derivative/qpt layout: - // [(dof * 9 + component * 3 + derivative) * num_qpts + qp]. - std::vector vector_jacobians; - // Vector basis curls in dof/component/qpt SoA layout. - std::vector vector_curls_xyz; - // Vector basis divergences in dof/qpt SoA layout. - std::vector vector_divergence; - - // The object-returning accessors below are convenience helpers for tests, - // diagnostics, and occasional scalar use. Hot loops should prefer the SoA - // span accessors so they do not reconstruct Gradient, Hessian, or matrix - // objects per DOF and quadrature point. - - [[nodiscard]] Real scalarValue(std::size_t dof, std::size_t qp) const noexcept { - return scalar_values[dof * num_qpts + qp]; - } - - [[nodiscard]] std::span scalarValuesForDof(std::size_t dof) const noexcept { - if (num_qpts == 0) return {}; - return std::span(scalar_values.data() + dof * num_qpts, num_qpts); - } - - [[nodiscard]] Real gradientValue(std::size_t dof, - std::size_t component, - std::size_t qp) const noexcept { - return gradients[(dof * 3u + component) * num_qpts + qp]; - } - - [[nodiscard]] Gradient gradientVector(std::size_t dof, std::size_t qp) const noexcept { - Gradient out{}; - for (std::size_t component = 0; component < 3u; ++component) { - out[component] = gradientValue(dof, component, qp); - } - return out; - } - - [[nodiscard]] std::span gradientsForDofComponent(std::size_t dof, - std::size_t component) const noexcept { - if (num_qpts == 0) return {}; - return std::span(gradients.data() + (dof * 3u + component) * num_qpts, num_qpts); - } - - [[nodiscard]] std::span gradientsForDof(std::size_t dof) const noexcept { - if (num_qpts == 0) return {}; - return std::span(gradients.data() + dof * 3u * num_qpts, 3u * num_qpts); - } - - [[nodiscard]] Real hessianValue(std::size_t dof, - std::size_t row, - std::size_t col, - std::size_t qp) const noexcept { - return hessians[(dof * 9u + row * 3u + col) * num_qpts + qp]; - } - - [[nodiscard]] Hessian hessianMatrix(std::size_t dof, std::size_t qp) const noexcept { - Hessian out{}; - for (std::size_t row = 0; row < 3u; ++row) { - for (std::size_t col = 0; col < 3u; ++col) { - out(row, col) = hessianValue(dof, row, col, qp); - } - } - return out; - } - - [[nodiscard]] std::span hessiansForDofComponent(std::size_t dof, - std::size_t row, - std::size_t col) const noexcept { - if (num_qpts == 0) return {}; - return std::span(hessians.data() + (dof * 9u + row * 3u + col) * num_qpts, num_qpts); - } - - [[nodiscard]] std::span hessiansForDof(std::size_t dof) const noexcept { - if (num_qpts == 0) return {}; - return std::span(hessians.data() + dof * 9u * num_qpts, 9u * num_qpts); - } - - [[nodiscard]] Real vectorValue(std::size_t dof, - std::size_t component, - std::size_t qp) const noexcept { - return vector_values_xyz[(dof * 3u + component) * num_qpts + qp]; - } - - [[nodiscard]] math::Vector vectorValue(std::size_t dof, - std::size_t qp) const noexcept { - math::Vector out{}; - for (std::size_t component = 0; component < 3u; ++component) { - out[component] = vectorValue(dof, component, qp); - } - return out; - } - - [[nodiscard]] std::span vectorValuesForDofComponent(std::size_t dof, - std::size_t component) const noexcept { - if (num_qpts == 0) return {}; - return std::span(vector_values_xyz.data() + (dof * 3u + component) * num_qpts, num_qpts); - } - - [[nodiscard]] std::span vectorValuesForDof(std::size_t dof) const noexcept { - if (num_qpts == 0 || vector_values_xyz.empty()) return {}; - return std::span(vector_values_xyz.data() + dof * 3u * num_qpts, 3u * num_qpts); - } - - [[nodiscard]] Real vectorJacobianValue(std::size_t dof, - std::size_t component, - std::size_t derivative, - std::size_t qp) const noexcept { - return vector_jacobians[(dof * 9u + component * 3u + derivative) * num_qpts + qp]; - } - - [[nodiscard]] VectorJacobian vectorJacobianMatrix(std::size_t dof, - std::size_t qp) const noexcept { - VectorJacobian out{}; - for (std::size_t component = 0; component < 3u; ++component) { - for (std::size_t derivative = 0; derivative < 3u; ++derivative) { - out(component, derivative) = - vectorJacobianValue(dof, component, derivative, qp); - } - } - return out; - } - - [[nodiscard]] std::span vectorJacobiansForDofComponentDerivative( - std::size_t dof, - std::size_t component, - std::size_t derivative) const noexcept { - if (num_qpts == 0 || vector_jacobians.empty()) return {}; - return std::span( - vector_jacobians.data() + (dof * 9u + component * 3u + derivative) * num_qpts, - num_qpts); - } - - [[nodiscard]] std::span vectorJacobiansForDof(std::size_t dof) const noexcept { - if (num_qpts == 0 || vector_jacobians.empty()) return {}; - return std::span(vector_jacobians.data() + dof * 9u * num_qpts, 9u * num_qpts); - } - - [[nodiscard]] Real vectorCurlValue(std::size_t dof, - std::size_t component, - std::size_t qp) const noexcept { - return vector_curls_xyz[(dof * 3u + component) * num_qpts + qp]; - } - - [[nodiscard]] math::Vector vectorCurl(std::size_t dof, - std::size_t qp) const noexcept { - math::Vector out{}; - for (std::size_t component = 0; component < 3u; ++component) { - out[component] = vectorCurlValue(dof, component, qp); - } - return out; - } - - [[nodiscard]] std::span vectorCurlsForDofComponent(std::size_t dof, - std::size_t component) const noexcept { - if (num_qpts == 0 || vector_curls_xyz.empty()) return {}; - return std::span(vector_curls_xyz.data() + (dof * 3u + component) * num_qpts, num_qpts); - } - - [[nodiscard]] std::span vectorCurlsForDof(std::size_t dof) const noexcept { - if (num_qpts == 0 || vector_curls_xyz.empty()) return {}; - return std::span(vector_curls_xyz.data() + dof * 3u * num_qpts, 3u * num_qpts); - } - - [[nodiscard]] Real vectorDivergenceValue(std::size_t dof, - std::size_t qp) const noexcept { - return vector_divergence[dof * num_qpts + qp]; - } - - [[nodiscard]] std::span vectorDivergenceForDof(std::size_t dof) const noexcept { - if (num_qpts == 0 || vector_divergence.empty()) return {}; - return std::span(vector_divergence.data() + dof * num_qpts, num_qpts); - } -}; - -class BasisCacheHandle { -public: - BasisCacheHandle() = default; - - [[nodiscard]] const BasisCacheEntry& entry() const { - BASIS_CHECK_CONFIG(entry_ != nullptr, - "BasisCacheHandle: attempted to access an empty handle"); - return *entry_; - } - - [[nodiscard]] bool valid() const noexcept { return entry_ != nullptr; } - explicit operator bool() const noexcept { return valid(); } - -private: - friend class BasisCache; - - explicit BasisCacheHandle(std::shared_ptr entry) - : entry_(std::move(entry)) {} - - std::shared_ptr entry_; -}; - -class BasisCache { -public: - static BasisCache& instance(); - - const BasisCacheEntry& get_or_compute( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients = true, - bool hessians = false); - - /** - * @brief Compute an entry without consulting, publishing to, or waiting on - * the shared cache. - */ - BasisCacheEntry compute_uncached( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients = true, - bool hessians = false) const; - - /** - * @brief Eagerly populate the cache for the given (basis, quadrature) key - * - * Pays the compute cost up front so that subsequent get_or_compute calls - * for the same key hit the warm-cache path immediately. Equivalent to - * calling get_or_compute and discarding the return value. - * - * Returns the inserted (or pre-existing) entry for convenience. - */ - const BasisCacheEntry& prewarm( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients = true, - bool hessians = false); - - /** - * @brief Eagerly populate the cache and return a hot-loop handle. - * - * The returned handle owns a shared reference to the completed entry. Access - * through BasisCacheHandle::entry() performs no key construction, hashing, - * map lookup, or cache mutex acquisition. Calling clear() removes the entry - * from the global lookup map but does not invalidate existing handles. - */ - BasisCacheHandle prewarm_handle( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients = true, - bool hessians = false); - - /** - * @brief Remove completed cache entries. - * - * This is a soft clear: computations that were already in flight before - * clear() was called are allowed to publish their completed entry afterward. - * This preserves the returned-reference lifetime contract for concurrent - * get_or_compute() callers while still dropping all entries that had already - * completed at the time of the call. - */ - void clear(); - std::size_t size() const; - -private: - struct InFlightComputation { - std::mutex mutex; - std::condition_variable ready_cv; - bool ready{false}; - std::shared_ptr entry; - std::exception_ptr exception; - }; - - struct CacheSlot { - std::shared_ptr entry; - std::shared_ptr pending; - }; - - BasisCache() = default; - BasisCache(const BasisCache&) = delete; - BasisCache& operator=(const BasisCache&) = delete; - - BasisCacheEntry compute(const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients, - bool hessians) const; - - std::shared_ptr get_or_compute_shared( - const BasisFunction& basis, - const quadrature::QuadratureRule& quad, - bool gradients, - bool hessians); - - mutable std::shared_mutex mutex_; - std::unordered_map slots_; -}; - -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_BASISCACHE_H diff --git a/Code/Source/solver/FE/Basis/BasisFactory.cpp b/Code/Source/solver/FE/Basis/BasisFactory.cpp index dddbd4c5c..9f0867959 100644 --- a/Code/Source/solver/FE/Basis/BasisFactory.cpp +++ b/Code/Source/solver/FE/Basis/BasisFactory.cpp @@ -10,29 +10,12 @@ #include "LagrangeBasis.h" #include "SerendipityBasis.h" -#include -#include -#include - namespace svmp { namespace FE { namespace basis { namespace { -using CustomRegistryMap = - std::unordered_map; - -CustomRegistryMap& custom_registry() { - static CustomRegistryMap registry; - return registry; -} - -std::mutex& custom_registry_mutex() { - static std::mutex mutex; - return mutex; -} - int require_basis_order(const BasisRequest& req, const char* missing_message, const char* negative_message) { @@ -50,12 +33,12 @@ int require_basis_order(const BasisRequest& req, void require_scalar_c0_request(const BasisRequest& req) { if (req.field_type != FieldType::Scalar) { throw BasisConfigurationException( - "BasisFactory: Lagrange/Serendipity bases currently support scalar fields only", + "BasisFactory: Lagrange/Serendipity bases support scalar fields only", __FILE__, __LINE__, __func__); } if (req.continuity != Continuity::C0) { throw BasisConfigurationException( - "BasisFactory: migrated Lagrange/Serendipity scope supports C0 continuity only", + "BasisFactory: Lagrange/Serendipity bases support C0 continuity only", __FILE__, __LINE__, __func__); } } @@ -78,36 +61,6 @@ std::shared_ptr create_serendipity(const BasisRequest& req) { return std::make_shared(req.element_type, order); } -std::shared_ptr create_custom(const BasisRequest& req) { - if (req.custom_id.empty()) { - throw BasisConfigurationException( - "BasisFactory: custom basis requests require custom_id", - __FILE__, __LINE__, __func__); - } - - basis_factory::CustomFactory factory; - { - std::lock_guard lock(custom_registry_mutex()); - const auto it = custom_registry().find(req.custom_id); - if (it == custom_registry().end()) { - throw BasisConfigurationException( - "BasisFactory: no custom basis factory registered for id '" + - req.custom_id + "'", - __FILE__, __LINE__, __func__); - } - factory = it->second; - } - - auto basis = factory(req); - if (!basis) { - throw BasisConstructionException( - "BasisFactory: custom factory returned null basis for id '" + - req.custom_id + "'", - __FILE__, __LINE__, __func__); - } - return basis; -} - } // namespace namespace basis_factory { @@ -118,41 +71,13 @@ std::shared_ptr create(const BasisRequest& req) { return create_lagrange(req); case BasisType::Serendipity: return create_serendipity(req); - case BasisType::Custom: - return create_custom(req); default: throw BasisConfigurationException( - "BasisFactory: requested basis family is outside the migrated Lagrange/Serendipity scope", + "BasisFactory: requested basis family is outside the scalar Lagrange/Serendipity scope", __FILE__, __LINE__, __func__); } } -void register_custom(std::string custom_id, CustomFactory factory) { - if (custom_id.empty()) { - throw BasisConfigurationException( - "BasisFactory: custom factory id must not be empty", - __FILE__, __LINE__, __func__); - } - if (!factory) { - throw BasisConfigurationException( - "BasisFactory: custom factory must be callable", - __FILE__, __LINE__, __func__); - } - - std::lock_guard lock(custom_registry_mutex()); - custom_registry()[std::move(custom_id)] = std::move(factory); -} - -void unregister_custom(const std::string& custom_id) { - std::lock_guard lock(custom_registry_mutex()); - custom_registry().erase(custom_id); -} - -void clear_custom_registry_for_tests() { - std::lock_guard lock(custom_registry_mutex()); - custom_registry().clear(); -} - } // namespace basis_factory } // namespace basis diff --git a/Code/Source/solver/FE/Basis/BasisFactory.h b/Code/Source/solver/FE/Basis/BasisFactory.h index cedf1ba6d..c937dd4a0 100644 --- a/Code/Source/solver/FE/Basis/BasisFactory.h +++ b/Code/Source/solver/FE/Basis/BasisFactory.h @@ -14,7 +14,6 @@ */ #include "BasisFunction.h" -#include #include #include #include @@ -41,12 +40,7 @@ struct BasisRequest { namespace basis_factory { -using CustomFactory = std::function(const BasisRequest&)>; - [[nodiscard]] std::shared_ptr create(const BasisRequest& req); -void register_custom(std::string custom_id, CustomFactory factory); -void unregister_custom(const std::string& custom_id); -void clear_custom_registry_for_tests(); } // namespace basis_factory diff --git a/Code/Source/solver/FE/Basis/BasisFunction.cpp b/Code/Source/solver/FE/Basis/BasisFunction.cpp index 49c8d8763..2a1d4f6b0 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.cpp +++ b/Code/Source/solver/FE/Basis/BasisFunction.cpp @@ -6,11 +6,8 @@ */ #include "BasisFunction.h" -#include "VectorBasisEvaluationHelpers.h" + #include -#include -#include -#include namespace svmp { namespace FE { @@ -19,81 +16,26 @@ namespace basis { namespace { struct BasisFunctionScratch { - std::vector scalar_values; - std::vector scalar_gradients; - std::vector scalar_hessians; - std::vector> vector_values; - std::vector vector_jacobians; - std::vector> vector_curls; - std::vector vector_divergences; + std::vector values; + std::vector gradients; + std::vector hessians; void prewarm(std::size_t max_size) { - scalar_values.reserve(max_size); - scalar_gradients.reserve(max_size); - scalar_hessians.reserve(max_size); - vector_values.reserve(max_size); - vector_jacobians.reserve(max_size); - vector_curls.reserve(max_size); - vector_divergences.reserve(max_size); + values.reserve(max_size); + gradients.reserve(max_size); + hessians.reserve(max_size); } }; -BasisFunctionScratch& basis_function_scratch() { - // Scratch is intentionally thread-local: production assembly uses a - // persistent worker-thread team, so buffers stay warm on each worker. - static thread_local BasisFunctionScratch scratch; - return scratch; -} - -void mix_identity_hash_word(std::uint64_t word, - std::uint64_t& hash_a, - std::uint64_t& hash_b) noexcept { - hash_a ^= word + 0x9e3779b97f4a7c15ULL + (hash_a << 6u) + (hash_a >> 2u); - hash_b ^= (word + 0xbf58476d1ce4e5b9ULL) + (hash_b << 7u) + (hash_b >> 3u); +BasisFunctionScratch& scratch() { + static thread_local BasisFunctionScratch data; + return data; } } // namespace -BasisIdentityFingerprint -compute_basis_identity_fingerprint(std::span words) noexcept { - BasisIdentityFingerprint fingerprint{0x243f6a8885a308d3ULL, - 0x13198a2e03707344ULL}; - mix_identity_hash_word(static_cast(words.size()), - fingerprint.hash_a, - fingerprint.hash_b); - for (const auto word : words) { - mix_identity_hash_word(word, fingerprint.hash_a, fingerprint.hash_b); - } - return fingerprint; -} - -std::string BasisFunction::cache_identity() const { - std::ostringstream oss; - oss << "basis=" << static_cast(basis_type()) - << "|elem=" << static_cast(element_type()) - << "|dim=" << dimension() - << "|order=" << order() - << "|size=" << size() - << "|vector=" << is_vector_valued(); - return oss.str(); -} - -bool BasisFunction::cache_identity_words(std::vector& words) const { - (void)words; - return false; -} - -bool BasisFunction::cache_identity_fingerprint(std::uint64_t& hash_a, - std::uint64_t& hash_b) const { - (void)hash_a; - (void)hash_b; - return false; -} - -void prewarm_basis_function_scratch(std::size_t max_size, - std::size_t max_qpts) { - (void)max_qpts; - basis_function_scratch().prewarm(max_size); +void prewarm_basis_function_scratch(std::size_t max_size) { + scratch().prewarm(max_size); } void BasisFunction::evaluate_gradients(const math::Vector& xi, @@ -123,7 +65,7 @@ void BasisFunction::evaluate_all(const math::Vector& xi, void BasisFunction::evaluate_values_to(const math::Vector& xi, Real* SVMP_RESTRICT values_out) const { - auto& tmp = basis_function_scratch().scalar_values; + auto& tmp = scratch().values; tmp.resize(size()); evaluate_values(xi, tmp); std::copy_n(tmp.data(), tmp.size(), values_out); @@ -131,7 +73,7 @@ void BasisFunction::evaluate_values_to(const math::Vector& xi, void BasisFunction::evaluate_gradients_to(const math::Vector& xi, Real* SVMP_RESTRICT gradients_out) const { - auto& tmp = basis_function_scratch().scalar_gradients; + auto& tmp = scratch().gradients; tmp.resize(size()); evaluate_gradients(xi, tmp); for (std::size_t i = 0; i < tmp.size(); ++i) { @@ -143,7 +85,7 @@ void BasisFunction::evaluate_gradients_to(const math::Vector& xi, void BasisFunction::evaluate_hessians_to(const math::Vector& xi, Real* SVMP_RESTRICT hessians_out) const { - auto& tmp = basis_function_scratch().scalar_hessians; + auto& tmp = scratch().hessians; tmp.resize(size()); evaluate_hessians(xi, tmp); for (std::size_t i = 0; i < tmp.size(); ++i) { @@ -151,165 +93,6 @@ void BasisFunction::evaluate_hessians_to(const math::Vector& xi, } } -void BasisFunction::evaluate_at_quadrature_points( - const std::vector>& points, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { - evaluate_at_quadrature_points_strided( - points, points.size(), values_out, gradients_out, hessians_out); -} - -void BasisFunction::evaluate_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { - const std::size_t num_qpts = points.size(); - const std::size_t num_dofs = size(); - if (output_stride < num_qpts) { - throw BasisConfigurationException( - "BasisFunction strided evaluation requires output_stride >= points.size()", - __FILE__, __LINE__, __func__); - } - - auto& scratch = basis_function_scratch(); - auto& v_tmp = scratch.scalar_values; - auto& g_tmp = scratch.scalar_gradients; - auto& h_tmp = scratch.scalar_hessians; - if (values_out) v_tmp.resize(num_dofs); - if (gradients_out) g_tmp.resize(num_dofs); - if (hessians_out) h_tmp.resize(num_dofs); - - for (std::size_t q = 0; q < num_qpts; ++q) { - if (values_out && gradients_out && hessians_out) { - evaluate_all(points[q], v_tmp, g_tmp, h_tmp); - } else { - if (values_out) evaluate_values(points[q], v_tmp); - if (gradients_out) evaluate_gradients(points[q], g_tmp); - if (hessians_out) evaluate_hessians(points[q], h_tmp); - } - - if (values_out) { - for (std::size_t dof = 0; dof < num_dofs; ++dof) { - values_out[dof * output_stride + q] = v_tmp[dof]; - } - } - if (gradients_out) { - for (std::size_t dof = 0; dof < num_dofs; ++dof) { - for (std::size_t component = 0; component < 3u; ++component) { - gradients_out[(dof * 3u + component) * output_stride + q] = - g_tmp[dof][component]; - } - } - } - if (hessians_out) { - for (std::size_t dof = 0; dof < num_dofs; ++dof) { - store_hessian_strided( - h_tmp[dof], hessians_out + dof * 9u * output_stride, output_stride, q); - } - } - } -} - -void BasisFunction::fill_scalar_cache_entry( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { - evaluate_at_quadrature_points_strided( - points, output_stride, values_out, gradients_out, hessians_out); -} - -void BasisFunction::evaluate_vector_at_quadrature_points( - const std::vector>& points, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out) const { - evaluate_vector_at_quadrature_points_strided( - points, points.size(), values_out, jacobians_out, curls_out, divergence_out); -} - -void BasisFunction::evaluate_vector_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out) const { - const std::size_t num_qpts = points.size(); - const std::size_t num_dofs = size(); - detail::vector_common::validate_vector_strided_outputs( - num_qpts, output_stride, "BasisFunction"); - - auto& scratch = basis_function_scratch(); - auto& v_tmp = scratch.vector_values; - auto& j_tmp = scratch.vector_jacobians; - auto& c_tmp = scratch.vector_curls; - auto& d_tmp = scratch.vector_divergences; - if (values_out) v_tmp.resize(num_dofs); - if (jacobians_out) j_tmp.resize(num_dofs); - if (curls_out) c_tmp.resize(num_dofs); - if (divergence_out) d_tmp.resize(num_dofs); - - for (std::size_t q = 0; q < num_qpts; ++q) { - if (values_out) { - evaluate_vector_values(points[q], v_tmp); - detail::vector_common::write_vector_values_strided( - v_tmp, num_dofs, output_stride, q, values_out); - } - - if (jacobians_out) { - evaluate_vector_jacobians(points[q], j_tmp); - detail::vector_common::write_vector_jacobians_strided( - j_tmp, num_dofs, output_stride, q, jacobians_out); - } - - if (curls_out) { - evaluate_curl(points[q], c_tmp); - detail::vector_common::write_vector_curl_strided( - c_tmp, num_dofs, output_stride, q, curls_out); - } - - if (divergence_out) { - evaluate_divergence(points[q], d_tmp); - detail::vector_common::write_vector_divergence_strided( - d_tmp, num_dofs, output_stride, q, divergence_out); - } - } -} - -void BasisFunction::evaluate_vector_values( - const math::Vector&, - std::vector>&) const { - throw BasisEvaluationException("Vector-valued evaluation requested on scalar basis", - __FILE__, __LINE__, __func__); -} - -void BasisFunction::evaluate_vector_jacobians( - const math::Vector&, - std::vector&) const { - throw BasisEvaluationException("Vector-basis Jacobian evaluation requested on scalar basis", - __FILE__, __LINE__, __func__); -} - -void BasisFunction::evaluate_divergence( - const math::Vector&, - std::vector&) const { - throw BasisEvaluationException("Divergence requested on scalar basis", - __FILE__, __LINE__, __func__); -} - -void BasisFunction::evaluate_curl( - const math::Vector&, - std::vector>&) const { - throw BasisEvaluationException("Curl requested on scalar basis", - __FILE__, __LINE__, __func__); -} - void BasisFunction::numerical_gradient(const math::Vector& xi, std::vector& gradients, Real eps) const { @@ -320,11 +103,12 @@ void BasisFunction::numerical_gradient(const math::Vector& xi, for (int d = 0; d < dimension(); ++d) { math::Vector forward = xi; math::Vector backward = xi; - const std::size_t idx = static_cast(d); + const auto idx = static_cast(d); forward[idx] += eps; backward[idx] -= eps; - std::vector fwd, bwd; + std::vector fwd; + std::vector bwd; evaluate_values(forward, fwd); evaluate_values(backward, bwd); @@ -344,18 +128,20 @@ void BasisFunction::numerical_hessian(const math::Vector& xi, for (int d = 0; d < dimension(); ++d) { math::Vector forward = xi; math::Vector backward = xi; - const std::size_t col = static_cast(d); + const auto col = static_cast(d); forward[col] += eps; backward[col] -= eps; - std::vector g_forward, g_backward; + std::vector g_forward; + std::vector g_backward; evaluate_gradients(forward, g_forward); evaluate_gradients(backward, g_backward); for (std::size_t i = 0; i < base_grad.size(); ++i) { for (int k = 0; k < dimension(); ++k) { - const std::size_t row = static_cast(k); - hessians[i](row, col) = (g_forward[i][row] - g_backward[i][row]) / (Real(2) * eps); + const auto row = static_cast(k); + hessians[i](row, col) = + (g_forward[i][row] - g_backward[i][row]) / (Real(2) * eps); } } } diff --git a/Code/Source/solver/FE/Basis/BasisFunction.h b/Code/Source/solver/FE/Basis/BasisFunction.h index ee38a5b19..dbabf7061 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.h +++ b/Code/Source/solver/FE/Basis/BasisFunction.h @@ -8,24 +8,12 @@ #ifndef SVMP_FE_BASIS_BASISFUNCTION_H #define SVMP_FE_BASIS_BASISFUNCTION_H -/** - * @file BasisFunction.h - * @brief Abstract interface for basis function evaluation on reference elements - * - * The Basis module operates purely on reference elements and is independent of - * mesh-specific data structures. Implementations may leverage Math and - * Quadrature utilities but must not read mesh connectivity or geometry. - */ - -#include "Types.h" #include "BasisExceptions.h" -#include "Math/Vector.h" #include "Math/Matrix.h" -#include -#include -#include -#include -#include +#include "Math/Vector.h" +#include "Types.h" + +#include #include namespace svmp { @@ -34,18 +22,8 @@ namespace basis { using Gradient = math::Vector; using Hessian = math::Matrix; -using VectorJacobian = math::Matrix; - -struct BasisIdentityFingerprint { - std::uint64_t hash_a{0}; - std::uint64_t hash_b{0}; -}; - -[[nodiscard]] BasisIdentityFingerprint -compute_basis_identity_fingerprint(std::span words) noexcept; -void prewarm_basis_function_scratch(std::size_t max_size, - std::size_t max_qpts = 0); +void prewarm_basis_function_scratch(std::size_t max_size); [[nodiscard]] inline Hessian make_symmetric_hessian(Real xx, Real yy, @@ -57,363 +35,80 @@ void prewarm_basis_function_scratch(std::size_t max_size, hessian(0, 0) = xx; hessian(1, 1) = yy; hessian(2, 2) = zz; - hessian(0, 1) = xy; - hessian(1, 0) = xy; - hessian(0, 2) = xz; - hessian(2, 0) = xz; - hessian(1, 2) = yz; - hessian(2, 1) = yz; + hessian(0, 1) = hessian(1, 0) = xy; + hessian(0, 2) = hessian(2, 0) = xz; + hessian(1, 2) = hessian(2, 1) = yz; return hessian; } -// Raw Hessian buffers use row-major 3x3 blocks: -// dst[row * 3 + col] = H(row, col). inline void store_hessian(const Hessian& hessian, Real* dst) noexcept { - dst[0u] = hessian(0u, 0u); - dst[1u] = hessian(0u, 1u); - dst[2u] = hessian(0u, 2u); - dst[3u] = hessian(1u, 0u); - dst[4u] = hessian(1u, 1u); - dst[5u] = hessian(1u, 2u); - dst[6u] = hessian(2u, 0u); - dst[7u] = hessian(2u, 1u); - dst[8u] = hessian(2u, 2u); -} - -inline void store_hessian_strided(const Hessian& hessian, - Real* dst, - std::size_t stride, - std::size_t offset) noexcept { - dst[0u * stride + offset] = hessian(0u, 0u); - dst[1u * stride + offset] = hessian(0u, 1u); - dst[2u * stride + offset] = hessian(0u, 2u); - dst[3u * stride + offset] = hessian(1u, 0u); - dst[4u * stride + offset] = hessian(1u, 1u); - dst[5u * stride + offset] = hessian(1u, 2u); - dst[6u * stride + offset] = hessian(2u, 0u); - dst[7u * stride + offset] = hessian(2u, 1u); - dst[8u * stride + offset] = hessian(2u, 2u); -} - -inline void scatter_hessian_components_strided(const Real* src, - Real* dst, - std::size_t stride, - std::size_t offset) noexcept { - dst[0u * stride + offset] = src[0u]; - dst[1u * stride + offset] = src[1u]; - dst[2u * stride + offset] = src[2u]; - dst[3u * stride + offset] = src[3u]; - dst[4u * stride + offset] = src[4u]; - dst[5u * stride + offset] = src[5u]; - dst[6u * stride + offset] = src[6u]; - dst[7u * stride + offset] = src[7u]; - dst[8u * stride + offset] = src[8u]; + dst[0] = hessian(0, 0); + dst[1] = hessian(0, 1); + dst[2] = hessian(0, 2); + dst[3] = hessian(1, 0); + dst[4] = hessian(1, 1); + dst[5] = hessian(1, 2); + dst[6] = hessian(2, 0); + dst[7] = hessian(2, 1); + dst[8] = hessian(2, 2); } [[nodiscard]] inline Hessian load_hessian(const Real* src) noexcept { Hessian hessian{}; - hessian(0u, 0u) = src[0u]; - hessian(0u, 1u) = src[1u]; - hessian(0u, 2u) = src[2u]; - hessian(1u, 0u) = src[3u]; - hessian(1u, 1u) = src[4u]; - hessian(1u, 2u) = src[5u]; - hessian(2u, 0u) = src[6u]; - hessian(2u, 1u) = src[7u]; - hessian(2u, 2u) = src[8u]; + hessian(0, 0) = src[0]; + hessian(0, 1) = src[1]; + hessian(0, 2) = src[2]; + hessian(1, 0) = src[3]; + hessian(1, 1) = src[4]; + hessian(1, 2) = src[5]; + hessian(2, 0) = src[6]; + hessian(2, 1) = src[7]; + hessian(2, 2) = src[8]; return hessian; } inline void add_scaled_hessian(Hessian& target, const Hessian& source, Real scale) noexcept { - target(0u, 0u) += scale * source(0u, 0u); - target(0u, 1u) += scale * source(0u, 1u); - target(0u, 2u) += scale * source(0u, 2u); - target(1u, 0u) += scale * source(1u, 0u); - target(1u, 1u) += scale * source(1u, 1u); - target(1u, 2u) += scale * source(1u, 2u); - target(2u, 0u) += scale * source(2u, 0u); - target(2u, 1u) += scale * source(2u, 1u); - target(2u, 2u) += scale * source(2u, 2u); + for (std::size_t r = 0; r < 3u; ++r) { + for (std::size_t c = 0; c < 3u; ++c) { + target(r, c) += scale * source(r, c); + } + } } -/** - * @brief Base interface for scalar and vector-valued basis families - * - * All basis implementations operate in reference space. Physical mappings are - * handled by the Geometry module. Derivatives are returned with unused - * components set to zero for lower dimensional elements. - */ class BasisFunction { public: virtual ~BasisFunction() = default; - /// Basis family identifier virtual BasisType basis_type() const noexcept = 0; - - /// Underlying element type on the reference domain virtual ElementType element_type() const noexcept = 0; - - /// Reference dimensionality (1, 2, or 3) virtual int dimension() const noexcept = 0; - - /// Polynomial order (modal/nodal definition dependent) virtual int order() const noexcept = 0; - - /// Number of basis functions (scalar or vector-valued) virtual std::size_t size() const noexcept = 0; - /** - * @brief Whether BasisCache can key this basis from common structural fields. - * - * Return true only when basis_type/element_type/dimension/order/size and - * vector-valued status fully determine evaluation behavior. Parameterized - * bases such as splines and custom user bases should keep the default false - * so BasisCache includes cache_identity() in the key. - */ - virtual bool cache_identity_is_structural() const noexcept { return false; } - - /// Whether the basis is vector-valued (H(div)/H(curl)) - virtual bool is_vector_valued() const noexcept { return false; } - - /// Whether vector-valued basis Jacobians are available. - virtual bool supports_vector_jacobians() const noexcept { return false; } - - /// Whether vector-valued basis curls are available. - virtual bool supports_curl() const noexcept { return false; } - - /// Whether vector-valued basis divergences are available. - virtual bool supports_divergence() const noexcept { return false; } - - /** - * @brief Stable semantic identity used by BasisCache - * - * Derived classes should override this when evaluation depends on - * additional state beyond basis family / element / order metadata. - */ - virtual std::string cache_identity() const; - - /** - * @brief Optional exact structured identity payload for BasisCache keys. - * - * Parameterized bases may append stable integer/bit-pattern words and - * return true to let BasisCache avoid using cache_identity() as the exact - * key payload. The human-readable cache_identity() remains available for - * diagnostics and for custom bases that do not implement this path. - */ - virtual bool cache_identity_words(std::vector& words) const; - - /** - * @brief Optional cached fingerprint for structured identity words. - * - * Implementations that precompute cache_identity_words() may also cache the - * corresponding fingerprint. BasisCache still retains exact identity words - * for equality after hash matches. - */ - virtual bool cache_identity_fingerprint(std::uint64_t& hash_a, - std::uint64_t& hash_b) const; - - /** - * @brief Evaluate scalar basis values at a reference point - * @param xi Reference coordinates (unused entries are ignored) - * @param[out] values Output array resized to size() - */ virtual void evaluate_values(const math::Vector& xi, std::vector& values) const = 0; - - /** - * @brief Evaluate gradients of scalar basis functions - * - * Production bases must override this with analytic derivatives. - * Use numerical_gradient explicitly in tests or diagnostics when a finite - * difference approximation is intended. - */ virtual void evaluate_gradients(const math::Vector& xi, std::vector& gradients) const; - - /** - * @brief Evaluate Hessians of scalar basis functions - * - * Production bases must override this with analytic second derivatives. - * Use numerical_hessian explicitly in tests or diagnostics when a finite - * difference approximation is intended. - */ virtual void evaluate_hessians(const math::Vector& xi, std::vector& hessians) const; - - /** - * @brief Fused evaluation of values, gradients, and Hessians at one point - * - * Default implementation calls evaluate_values, evaluate_gradients, and - * evaluate_hessians in sequence. Bases that share intermediate - * computations (e.g., LagrangeBasis sharing per-axis 1D evaluations) - * should override this to avoid redundant work. - */ virtual void evaluate_all(const math::Vector& xi, std::vector& values, std::vector& gradients, std::vector& hessians) const; - /** - * @brief Fill SoA buffers with basis evaluations at all quadrature points - * - * Outputs are written directly to caller-provided strided buffers in - * DOF-major SoA layout — no scratch+transpose required by the caller. - * Pass `nullptr` for any output that is not needed. - * - * values_out: size num_dofs * num_qpts; element [d * num_qpts + q] - * gradients_out: size num_dofs * 3 * num_qpts; element [(d*3 + c) * num_qpts + q] - * hessians_out: size num_dofs * 9 * num_qpts; element [(d*9 + r*3 + c) * num_qpts + q] - * - * Non-null output ranges must not overlap each other. Implementations may - * fill requested quantities in any order that is efficient for the basis. - * - * Default implementation calls evaluate_all (or evaluate_values/gradients/ - * hessians as appropriate) per QP, materializing into temp buffers then - * scatter-writing to the output. Performance-sensitive bases must override - * this path so batched assembly does not fall back to Q virtual point - * evaluations. Unit coverage keeps an explicit list of hot bases that are - * expected to provide a direct strided implementation. - */ - virtual void evaluate_at_quadrature_points( - const std::vector>& points, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const; - - /** - * @brief Fill strided SoA buffers with basis evaluations at quadrature points - * - * Same component layout as evaluate_at_quadrature_points, but each - * dof/component row advances by `output_stride` rather than `points.size()`. - * This lets padded SIMD cache storage be filled directly. Non-null output - * ranges have the same non-overlap requirement. - */ - virtual void evaluate_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const; - - /** - * @brief Fill zero-initialized scalar cache storage. - * - * BasisCache allocates and zero-initializes its scalar SoA buffers before - * calling this hook. The default implementation overwrites all requested - * entries through the public strided evaluator. Sparse-support bases may - * override this and write only active entries, relying on the caller's - * zero-initialization for inactive DOFs and unused derivative components. - */ - virtual void fill_scalar_cache_entry( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const; - - /** - * @brief Fill SoA buffers with vector-basis evaluations at all quadrature points - * - * Outputs are written in DOF-major SoA layout. Pass `nullptr` for any - * quantity that is not needed. - * - * values_out: size num_dofs * 3 * num_qpts; element [(d*3 + c) * num_qpts + q] - * jacobians_out: size num_dofs * 9 * num_qpts; element [(d*9 + c*3 + r) * num_qpts + q] - * curls_out: size num_dofs * 3 * num_qpts; element [(d*3 + c) * num_qpts + q] - * divergence_out: size num_dofs * num_qpts; element [d * num_qpts + q] - * - * Non-null output ranges must not overlap each other. Implementations may - * fill requested quantities in any order that is efficient for the basis. - */ - virtual void evaluate_vector_at_quadrature_points( - const std::vector>& points, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out) const; - - /** - * @brief Fill strided SoA buffers with vector-basis evaluations - * - * Same component layout as evaluate_vector_at_quadrature_points, but each - * dof/component row advances by `output_stride` rather than `points.size()`. - * Non-null output ranges have the same non-overlap requirement. - * - * The base fallback loops over quadrature points through virtual point - * evaluation. H(div)/H(curl) bases used in assembly should override this - * method directly, and tests track the current hot vector families. - */ - virtual void evaluate_vector_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out) const; - - /** - * @brief Evaluate scalar basis values into a caller-provided raw buffer - * - * Caller is responsible for providing a buffer of at least size() Real - * entries. This avoids the per-call std::vector::resize() cost of the - * vector-output overload. Default implementation forwards through a temp - * vector; bases should override for direct write. - */ virtual void evaluate_values_to(const math::Vector& xi, Real* SVMP_RESTRICT values_out) const; - - /** - * @brief Evaluate gradients into a flat caller-provided buffer - * - * Layout: gradients_out[i * 3 + c] = component c of gradient of basis i. - * Caller provides a buffer of size() * 3 Real entries. - */ virtual void evaluate_gradients_to(const math::Vector& xi, Real* SVMP_RESTRICT gradients_out) const; - - /** - * @brief Evaluate Hessians into a flat caller-provided buffer - * - * Layout: hessians_out[i * 9 + r * 3 + c] = H_i(r, c). - */ virtual void evaluate_hessians_to(const math::Vector& xi, Real* SVMP_RESTRICT hessians_out) const; - /** - * @brief Evaluate vector-valued basis functions (H(div)/H(curl)) - * - * Default implementation throws; vector bases must override. - */ - virtual void evaluate_vector_values(const math::Vector& xi, - std::vector>& values) const; - - /** - * @brief Evaluate reference-space Jacobians of vector-valued basis functions - * - * The returned matrix for basis function `i` has entries - * `jacobians[i](component, derivative_direction) = d phi_i_component / d xi_direction`. - * Unused rows/columns are zero-filled for lower-dimensional elements. - */ - virtual void evaluate_vector_jacobians(const math::Vector& xi, - std::vector& jacobians) const; - - /// Evaluate divergence of vector-valued basis functions (if applicable) - virtual void evaluate_divergence(const math::Vector& xi, - std::vector& divergence) const; - - /// Evaluate curl of vector-valued basis functions (if applicable) - virtual void evaluate_curl(const math::Vector& xi, - std::vector>& curl) const; - protected: - /// Finite-difference helper for gradients of scalar bases void numerical_gradient(const math::Vector& xi, std::vector& gradients, Real eps = Real(1e-6)) const; - - /// Finite-difference helper for Hessians of scalar bases void numerical_hessian(const math::Vector& xi, std::vector& hessians, Real eps = Real(1e-5)) const; diff --git a/Code/Source/solver/FE/Basis/BasisTolerance.h b/Code/Source/solver/FE/Basis/BasisTolerance.h deleted file mode 100644 index 423551f09..000000000 --- a/Code/Source/solver/FE/Basis/BasisTolerance.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#ifndef SVMP_FE_BASIS_BASISTOLERANCE_H -#define SVMP_FE_BASIS_BASISTOLERANCE_H - -#include "Types.h" - -#include - -namespace svmp { -namespace FE { -namespace basis { -namespace detail { - -[[nodiscard]] constexpr Real basis_abs(Real value) noexcept { - return value < Real(0) ? -value : value; -} - -[[nodiscard]] constexpr Real basis_max(Real lhs, Real rhs) noexcept { - return lhs < rhs ? rhs : lhs; -} - -[[nodiscard]] constexpr Real basis_scaled_tolerance(Real scale = Real(1), - Real multiplier = Real(64)) noexcept { - return multiplier * std::numeric_limits::epsilon() * - basis_max(Real(1), basis_abs(scale)); -} - -[[nodiscard]] constexpr bool basis_near_zero(Real value, - Real scale = Real(1), - Real multiplier = Real(64)) noexcept { - return basis_abs(value) <= basis_scaled_tolerance(scale, multiplier); -} - -[[nodiscard]] constexpr bool basis_nearly_equal(Real a, - Real b, - Real multiplier = Real(64)) noexcept { - const Real scale = basis_max(Real(1), basis_max(basis_abs(a), basis_abs(b))); - return basis_abs(a - b) <= basis_scaled_tolerance(scale, multiplier); -} - -} // namespace detail -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_BASISTOLERANCE_H diff --git a/Code/Source/solver/FE/Basis/BasisTraits.h b/Code/Source/solver/FE/Basis/BasisTraits.h index 835dfe705..d97b59f1f 100644 --- a/Code/Source/solver/FE/Basis/BasisTraits.h +++ b/Code/Source/solver/FE/Basis/BasisTraits.h @@ -11,6 +11,7 @@ #include "Types.h" #include +#include namespace svmp { namespace FE { @@ -25,9 +26,39 @@ enum class BasisTopology { Tetrahedron, Hexahedron, Wedge, - Pyramid, }; +namespace detail { + +[[nodiscard]] constexpr Real basis_abs(Real value) noexcept { + return value < Real(0) ? -value : value; +} + +[[nodiscard]] constexpr Real basis_max(Real lhs, Real rhs) noexcept { + return lhs < rhs ? rhs : lhs; +} + +[[nodiscard]] constexpr Real basis_scaled_tolerance(Real scale = Real(1), + Real multiplier = Real(64)) noexcept { + return multiplier * std::numeric_limits::epsilon() * + basis_max(Real(1), basis_abs(scale)); +} + +[[nodiscard]] constexpr bool basis_near_zero(Real value, + Real scale = Real(1), + Real multiplier = Real(64)) noexcept { + return basis_abs(value) <= basis_scaled_tolerance(scale, multiplier); +} + +[[nodiscard]] constexpr bool basis_nearly_equal(Real a, + Real b, + Real multiplier = Real(64)) noexcept { + const Real scale = basis_max(Real(1), basis_max(basis_abs(a), basis_abs(b))); + return basis_abs(a - b) <= basis_scaled_tolerance(scale, multiplier); +} + +} // namespace detail + [[nodiscard]] constexpr bool is_point(ElementType type) noexcept { return type == ElementType::Point1; } @@ -60,8 +91,8 @@ enum class BasisTopology { } [[nodiscard]] constexpr bool is_pyramid(ElementType type) noexcept { - return type == ElementType::Pyramid5 || type == ElementType::Pyramid13 || - type == ElementType::Pyramid14; + (void)type; + return false; } [[nodiscard]] constexpr bool is_simplex(ElementType type) noexcept { @@ -98,9 +129,6 @@ enum class BasisTopology { if (is_wedge(type)) { return BasisTopology::Wedge; } - if (is_pyramid(type)) { - return BasisTopology::Pyramid; - } return BasisTopology::Unknown; } @@ -124,9 +152,6 @@ enum class BasisTopology { case ElementType::Wedge6: case ElementType::Wedge18: return ElementType::Wedge6; - case ElementType::Pyramid5: - case ElementType::Pyramid14: - return ElementType::Pyramid5; default: return type; } @@ -140,7 +165,6 @@ enum class BasisTopology { case ElementType::Tetra4: case ElementType::Hex8: case ElementType::Wedge6: - case ElementType::Pyramid5: return 1; case ElementType::Line3: case ElementType::Triangle6: @@ -148,7 +172,6 @@ enum class BasisTopology { case ElementType::Tetra10: case ElementType::Hex27: case ElementType::Wedge18: - case ElementType::Pyramid14: return 2; default: return -1; @@ -179,14 +202,6 @@ enum class BasisTopology { return triangle_lagrange_size(order) * line_lagrange_size(order); } -[[nodiscard]] constexpr std::size_t pyramid_lagrange_size(int order) noexcept { - if (order < 0) { - return 0u; - } - const std::size_t p = static_cast(order); - return (p + 1u) * (p + 2u) * (2u * p + 3u) / 6u; -} - [[nodiscard]] constexpr std::size_t complete_lagrange_alias_size(ElementType type) noexcept { const int order = complete_lagrange_alias_order(type); switch (canonical_lagrange_type(type)) { @@ -204,8 +219,6 @@ enum class BasisTopology { return hex_lagrange_size(order); case ElementType::Wedge6: return wedge_lagrange_size(order); - case ElementType::Pyramid5: - return pyramid_lagrange_size(order); default: return 0u; } diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp index 63b947516..7516d514a 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp @@ -6,16 +6,11 @@ */ #include "LagrangeBasis.h" -#include "BasisTraits.h" -#include "BasisTolerance.h" -#include "LagrangeBasisFast.h" #include "NodeOrderingConventions.h" -#include "LagrangeBasisPyramid.h" -#include "LagrangeBasisSimplex.h" -#include "LagrangeBasisUtility.h" + #include +#include #include -#include namespace svmp { namespace FE { @@ -23,8299 +18,597 @@ namespace basis { namespace { -using LagrangeTopology = BasisTopology; - -#if defined(_MSC_VER) -#define SVMP_LAGRANGE_NOINLINE __declspec(noinline) -#define SVMP_LAGRANGE_ALIGN64 -#elif defined(__GNUC__) || defined(__clang__) -#define SVMP_LAGRANGE_NOINLINE __attribute__((noinline)) -#define SVMP_LAGRANGE_ALIGN64 __attribute__((aligned(64))) -#else -#define SVMP_LAGRANGE_NOINLINE -#define SVMP_LAGRANGE_ALIGN64 -#endif +using Vec3 = math::Vector; -#ifndef FE_ALWAYS_INLINE -#if defined(_MSC_VER) -#define FE_ALWAYS_INLINE __forceinline -#elif defined(__GNUC__) || defined(__clang__) -#define FE_ALWAYS_INLINE __attribute__((always_inline)) inline -#else -#define FE_ALWAYS_INLINE inline -#endif -#endif - -SVMP_LAGRANGE_NOINLINE void evaluate_triangle_order1_gradients_strided( - std::size_t num_qpts, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out); +inline constexpr Real equispaced_pm_one_coord(int i, int order) { + if (order <= 0) { + return Real(0); + } + return Real(-1) + Real(2) * static_cast(i) / static_cast(order); +} -struct LagrangeTopologyTraits { - LagrangeTopology topology; - int dimension; +struct AxisEval { + std::vector value; + std::vector first; + std::vector second; }; -struct SimplexExponentHash { - std::size_t operator()(const std::array& exponents) const noexcept { - std::size_t seed = 0x9e3779b97f4a7c15ull; - for (const int exponent : exponents) { - const auto value = static_cast(exponent); - seed ^= value + 0x9e3779b97f4a7c15ull + (seed << 6u) + (seed >> 2u); - } - return seed; - } +struct SimplexEval { + std::vector value; + std::vector gradient; + std::vector hessian; }; -template -void assign_array(std::vector& out, const std::array& values) { - out.assign(values.begin(), values.end()); -} - -bool coordinate_matches_expected(Real coord, Real expected) noexcept { - return detail::basis_nearly_equal(coord, expected); -} +struct NormalizedLagrangeRequest { + ElementType element_type; + int order; +}; -template -void evaluate_fast_outputs(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) { - if (values != nullptr) { - std::array fast_values{}; - FastBasis::evaluate(xi, fast_values); - assign_array(*values, fast_values); - } - if (gradients != nullptr) { - std::array fast_gradients{}; - FastBasis::evaluate_gradients(xi, fast_gradients); - assign_array(*gradients, fast_gradients); - } - if (hessians != nullptr) { - std::array fast_hessians{}; - FastBasis::evaluate_hessians(xi, fast_hessians); - assign_array(*hessians, fast_hessians); +BasisTopology supported_lagrange_topology(ElementType type) { + const BasisTopology top = topology(type); + if (top == BasisTopology::Unknown) { + throw BasisElementCompatibilityException("LagrangeBasis: unsupported element type", + __FILE__, __LINE__, __func__); } + return top; } -template -void evaluate_fast_outputs_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - if (values_out != nullptr) { - std::array fast_values{}; - FastBasis::evaluate(xi, fast_values); - for (std::size_t i = 0; i < fast_values.size(); ++i) { - values_out[i] = fast_values[i]; - } - } - if (gradients_out != nullptr) { - std::array fast_gradients{}; - FastBasis::evaluate_gradients(xi, fast_gradients); - for (std::size_t i = 0; i < fast_gradients.size(); ++i) { - gradients_out[i * 3u + 0u] = fast_gradients[i][0]; - gradients_out[i * 3u + 1u] = fast_gradients[i][1]; - gradients_out[i * 3u + 2u] = fast_gradients[i][2]; - } - } - if (hessians_out != nullptr) { - std::array fast_hessians{}; - FastBasis::evaluate_hessians(xi, fast_hessians); - for (std::size_t i = 0; i < fast_hessians.size(); ++i) { - store_hessian(fast_hessians[i], hessians_out + i * 9u); - } +NormalizedLagrangeRequest normalize_lagrange_request(ElementType element_type, int order) { + switch (element_type) { + case ElementType::Line3: + return {ElementType::Line2, std::max(order, 2)}; + case ElementType::Triangle6: + return {ElementType::Triangle3, std::max(order, 2)}; + case ElementType::Quad9: + return {ElementType::Quad4, std::max(order, 2)}; + case ElementType::Tetra10: + return {ElementType::Tetra4, std::max(order, 2)}; + case ElementType::Hex27: + return {ElementType::Hex8, std::max(order, 2)}; + case ElementType::Wedge18: + return {ElementType::Wedge6, std::max(order, 2)}; + case ElementType::Quad8: + throw BasisElementCompatibilityException( + "LagrangeBasis: Quad8 is serendipity; use SerendipityBasis", + __FILE__, __LINE__, __func__); + case ElementType::Hex20: + throw BasisElementCompatibilityException( + "LagrangeBasis: Hex20 is serendipity; use SerendipityBasis", + __FILE__, __LINE__, __func__); + case ElementType::Wedge15: + throw BasisElementCompatibilityException( + "LagrangeBasis: Wedge15 is serendipity; use SerendipityBasis", + __FILE__, __LINE__, __func__); + case ElementType::Pyramid5: + case ElementType::Pyramid13: + case ElementType::Pyramid14: + throw BasisElementCompatibilityException( + "LagrangeBasis: pyramid support has been removed from the current solver basis scope", + __FILE__, __LINE__, __func__); + default: + return {element_type, order}; } } -template -void evaluate_fast_outputs_strided(const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - if (values_out != nullptr) { - std::array fast_values{}; - FastBasis::evaluate(xi, fast_values); - for (std::size_t i = 0; i < fast_values.size(); ++i) { - values_out[i * output_stride + q] = fast_values[i]; - } - } - if (gradients_out != nullptr) { - std::array fast_gradients{}; - FastBasis::evaluate_gradients(xi, fast_gradients); - for (std::size_t i = 0; i < fast_gradients.size(); ++i) { - Real* g = gradients_out + i * 3u * output_stride; - g[0u * output_stride + q] = fast_gradients[i][0]; - g[1u * output_stride + q] = fast_gradients[i][1]; - g[2u * output_stride + q] = fast_gradients[i][2]; - } - } - if (hessians_out != nullptr) { - std::array fast_hessians{}; - FastBasis::evaluate_hessians(xi, fast_hessians); - for (std::size_t i = 0; i < fast_hessians.size(); ++i) { - const Hessian& hessian = fast_hessians[i]; - Real* H = hessians_out + i * 9u * output_stride; - H[0u * output_stride + q] = hessian(0, 0); - H[1u * output_stride + q] = hessian(0, 1); - H[2u * output_stride + q] = hessian(0, 2); - H[3u * output_stride + q] = hessian(1, 0); - H[4u * output_stride + q] = hessian(1, 1); - H[5u * output_stride + q] = hessian(1, 2); - H[6u * output_stride + q] = hessian(2, 0); - H[7u * output_stride + q] = hessian(2, 1); - H[8u * output_stride + q] = hessian(2, 2); - } - } +std::size_t axis_index_pm_one(Real coord, int order) { + if (order <= 0) { + return 0u; } + const Real scaled = (coord + Real(1)) * Real(order) / Real(2); + return static_cast(std::llround(scaled)); } -template -bool evaluate_fixed_lagrange_fast_order(LagrangeTopology topology, - const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) { - switch (topology) { - case LagrangeTopology::Line: - evaluate_fast_outputs>(xi, values, gradients, hessians); - return true; - case LagrangeTopology::Quadrilateral: - evaluate_fast_outputs>(xi, values, gradients, hessians); - return true; - case LagrangeTopology::Hexahedron: - evaluate_fast_outputs>(xi, values, gradients, hessians); - return true; - case LagrangeTopology::Triangle: - evaluate_fast_outputs>(xi, values, gradients, hessians); - return true; - case LagrangeTopology::Tetrahedron: - evaluate_fast_outputs>(xi, values, gradients, hessians); - return true; - default: - return false; +int simplex_lattice_index(Real value, int order) { + if (order <= 0) { + return 0; } + return static_cast(std::llround(value * Real(order))); } -template -bool evaluate_fixed_lagrange_fast_to_order(LagrangeTopology topology, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - switch (topology) { - case LagrangeTopology::Line: - evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); - return true; - case LagrangeTopology::Quadrilateral: - evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); - return true; - case LagrangeTopology::Hexahedron: - evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); - return true; - case LagrangeTopology::Triangle: - evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); - return true; - case LagrangeTopology::Tetrahedron: - evaluate_fast_outputs_to>(xi, values_out, gradients_out, hessians_out); - return true; - default: - return false; +LagrangeBasis::SimplexExponent simplex_exponent_from_point(const Vec3& p, + BasisTopology top, + int order) { + LagrangeBasis::SimplexExponent e{0, 0, 0, 0}; + if (order <= 0) { + return e; } -} - -template -bool evaluate_fixed_lagrange_fast_strided_order( - LagrangeTopology topology, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - switch (topology) { - case LagrangeTopology::Line: - evaluate_fast_outputs_strided>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case LagrangeTopology::Quadrilateral: - evaluate_fast_outputs_strided>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case LagrangeTopology::Hexahedron: - evaluate_fast_outputs_strided>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case LagrangeTopology::Triangle: - evaluate_fast_outputs_strided>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case LagrangeTopology::Tetrahedron: - evaluate_fast_outputs_strided>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - default: - return false; + if (top == BasisTopology::Triangle) { + e[1] = simplex_lattice_index(p[0], order); + e[2] = simplex_lattice_index(p[1], order); + e[0] = order - e[1] - e[2]; + } else { + e[1] = simplex_lattice_index(p[0], order); + e[2] = simplex_lattice_index(p[1], order); + e[3] = simplex_lattice_index(p[2], order); + e[0] = order - e[1] - e[2] - e[3]; } + return e; } -void evaluate_triangle_order3_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - Real* row4 = values_out + 4u * output_stride; - Real* row5 = values_out + 5u * output_stride; - Real* row6 = values_out + 6u * output_stride; - Real* row7 = values_out + 7u * output_stride; - Real* row8 = values_out + 8u * output_stride; - Real* row9 = values_out + 9u * output_stride; - - if (points.size() == 4u && output_stride == 4u) { - Real p10[4]; - Real p11[4]; - Real p12[4]; - Real p20[4]; - Real p21[4]; - Real p22[4]; - Real p30[4]; - Real p31[4]; - Real p32[4]; - - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - - p10[q] = Real(3) * l0; - p11[q] = Real(3) * l1; - p12[q] = Real(3) * l2; - p20[q] = Real(0.5) * p10[q] * (p10[q] - Real(1)); - p21[q] = Real(0.5) * p11[q] * (p11[q] - Real(1)); - p22[q] = Real(0.5) * p12[q] * (p12[q] - Real(1)); - p30[q] = (p10[q] * (p10[q] - Real(1)) * (p10[q] - Real(2))) / Real(6); - p31[q] = (p11[q] * (p11[q] - Real(1)) * (p11[q] - Real(2))) / Real(6); - p32[q] = (p12[q] * (p12[q] - Real(1)) * (p12[q] - Real(2))) / Real(6); - } +void evaluate_1d_lagrange(Real x, const std::vector& nodes, AxisEval& out) { + const std::size_t n = nodes.size(); + out.value.assign(n, Real(0)); + out.first.assign(n, Real(0)); + out.second.assign(n, Real(0)); - row0[0] = p30[0]; row0[1] = p30[1]; row0[2] = p30[2]; row0[3] = p30[3]; - row1[0] = p31[0]; row1[1] = p31[1]; row1[2] = p31[2]; row1[3] = p31[3]; - row2[0] = p32[0]; row2[1] = p32[1]; row2[2] = p32[2]; row2[3] = p32[3]; - row3[0] = p20[0] * p11[0]; - row3[1] = p20[1] * p11[1]; - row3[2] = p20[2] * p11[2]; - row3[3] = p20[3] * p11[3]; - row4[0] = p10[0] * p21[0]; - row4[1] = p10[1] * p21[1]; - row4[2] = p10[2] * p21[2]; - row4[3] = p10[3] * p21[3]; - row5[0] = p21[0] * p12[0]; - row5[1] = p21[1] * p12[1]; - row5[2] = p21[2] * p12[2]; - row5[3] = p21[3] * p12[3]; - row6[0] = p11[0] * p22[0]; - row6[1] = p11[1] * p22[1]; - row6[2] = p11[2] * p22[2]; - row6[3] = p11[3] * p22[3]; - row7[0] = p10[0] * p22[0]; - row7[1] = p10[1] * p22[1]; - row7[2] = p10[2] * p22[2]; - row7[3] = p10[3] * p22[3]; - row8[0] = p20[0] * p12[0]; - row8[1] = p20[1] * p12[1]; - row8[2] = p20[2] * p12[2]; - row8[3] = p20[3] * p12[3]; - row9[0] = p10[0] * p11[0] * p12[0]; - row9[1] = p10[1] * p11[1] * p12[1]; - row9[2] = p10[2] * p11[2] * p12[2]; - row9[3] = p10[3] * p11[3] * p12[3]; + if (n == 1u) { + out.value[0] = Real(1); return; } - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - - const Real p10 = Real(3) * l0; - const Real p11 = Real(3) * l1; - const Real p12 = Real(3) * l2; - const Real p20 = Real(0.5) * p10 * (p10 - Real(1)); - const Real p21 = Real(0.5) * p11 * (p11 - Real(1)); - const Real p22 = Real(0.5) * p12 * (p12 - Real(1)); - const Real p30 = (p10 * (p10 - Real(1)) * (p10 - Real(2))) / Real(6); - const Real p31 = (p11 * (p11 - Real(1)) * (p11 - Real(2))) / Real(6); - const Real p32 = (p12 * (p12 - Real(1)) * (p12 - Real(2))) / Real(6); - - row0[q] = p30; - row1[q] = p31; - row2[q] = p32; - row3[q] = p20 * p11; - row4[q] = p10 * p21; - row5[q] = p21 * p12; - row6[q] = p11 * p22; - row7[q] = p10 * p22; - row8[q] = p20 * p12; - row9[q] = p10 * p11 * p12; - } -} + for (std::size_t i = 0; i < n; ++i) { + Real denom = Real(1); + for (std::size_t j = 0; j < n; ++j) { + if (j != i) { + denom *= nodes[i] - nodes[j]; + } + } -void evaluate_triangle_order2_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - Real* row4 = values_out + 4u * output_stride; - Real* row5 = values_out + 5u * output_stride; + Real value = Real(1); + for (std::size_t j = 0; j < n; ++j) { + if (j != i) { + value *= x - nodes[j]; + } + } + out.value[i] = value / denom; - if (points.size() == 4u && output_stride == 4u) { - Real l0[4]; - Real l1[4]; - Real l2[4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - l1[q] = xi[0]; - l2[q] = xi[1]; - l0[q] = Real(1) - l1[q] - l2[q]; + Real first = Real(0); + for (std::size_t m = 0; m < n; ++m) { + if (m == i) { + continue; + } + Real product = Real(1); + for (std::size_t j = 0; j < n; ++j) { + if (j != i && j != m) { + product *= x - nodes[j]; + } + } + first += product; } + out.first[i] = first / denom; - row0[0] = l0[0] * (Real(2) * l0[0] - Real(1)); - row0[1] = l0[1] * (Real(2) * l0[1] - Real(1)); - row0[2] = l0[2] * (Real(2) * l0[2] - Real(1)); - row0[3] = l0[3] * (Real(2) * l0[3] - Real(1)); - row1[0] = l1[0] * (Real(2) * l1[0] - Real(1)); - row1[1] = l1[1] * (Real(2) * l1[1] - Real(1)); - row1[2] = l1[2] * (Real(2) * l1[2] - Real(1)); - row1[3] = l1[3] * (Real(2) * l1[3] - Real(1)); - row2[0] = l2[0] * (Real(2) * l2[0] - Real(1)); - row2[1] = l2[1] * (Real(2) * l2[1] - Real(1)); - row2[2] = l2[2] * (Real(2) * l2[2] - Real(1)); - row2[3] = l2[3] * (Real(2) * l2[3] - Real(1)); - row3[0] = Real(4) * l0[0] * l1[0]; - row3[1] = Real(4) * l0[1] * l1[1]; - row3[2] = Real(4) * l0[2] * l1[2]; - row3[3] = Real(4) * l0[3] * l1[3]; - row4[0] = Real(4) * l1[0] * l2[0]; - row4[1] = Real(4) * l1[1] * l2[1]; - row4[2] = Real(4) * l1[2] * l2[2]; - row4[3] = Real(4) * l1[3] * l2[3]; - row5[0] = Real(4) * l0[0] * l2[0]; - row5[1] = Real(4) * l0[1] * l2[1]; - row5[2] = Real(4) * l0[2] * l2[2]; - row5[3] = Real(4) * l0[3] * l2[3]; - return; + Real second = Real(0); + for (std::size_t m = 0; m < n; ++m) { + if (m == i) { + continue; + } + for (std::size_t l = 0; l < n; ++l) { + if (l == i || l == m) { + continue; + } + Real product = Real(1); + for (std::size_t j = 0; j < n; ++j) { + if (j != i && j != m && j != l) { + product *= x - nodes[j]; + } + } + second += product; + } + } + out.second[i] = second / denom; } +} - auto write_q = [&](std::size_t q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - row0[q] = l0 * (Real(2) * l0 - Real(1)); - row1[q] = l1 * (Real(2) * l1 - Real(1)); - row2[q] = l2 * (Real(2) * l2 - Real(1)); - row3[q] = Real(4) * l0 * l1; - row4[q] = Real(4) * l1 * l2; - row5[q] = Real(4) * l0 * l2; - }; +std::array simplex_factor(int alpha, Real lambda, int order) { + Real value = Real(1); + Real first = Real(0); + Real second = Real(0); - if (points.size() == 4u) { - write_q(0u); - write_q(1u); - write_q(2u); - write_q(3u); - return; + for (int m = 0; m < alpha; ++m) { + const Real factor = Real(order) * lambda - Real(m); + const Real inv = Real(1) / Real(m + 1); + const Real old_value = value; + const Real old_first = first; + const Real old_second = second; + value = old_value * factor * inv; + first = (old_first * factor + old_value * Real(order)) * inv; + second = (old_second * factor + Real(2) * old_first * Real(order)) * inv; } - for (std::size_t q = 0; q < points.size(); ++q) { - write_q(q); - } + return {value, first, second}; } -void evaluate_triangle_order1_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; +void evaluate_simplex(const Vec3& xi, + BasisTopology top, + int order, + const std::vector& exponents, + SimplexEval& out) { + const std::size_t n = exponents.size(); + out.value.assign(n, Real(0)); + out.gradient.assign(n, Gradient{}); + out.hessian.assign(n, Hessian{}); - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - row0[q] = Real(1) - xi[0] - xi[1]; - row1[q] = xi[0]; - row2[q] = xi[1]; + if (n == 1u && order == 0) { + out.value[0] = Real(1); + return; } -} -void evaluate_triangle_order2_gradients_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - Real* row0 = gradients_out + 0u * 3u * output_stride; - Real* row1 = gradients_out + 1u * 3u * output_stride; - Real* row2 = gradients_out + 2u * 3u * output_stride; - Real* row3 = gradients_out + 3u * 3u * output_stride; - Real* row4 = gradients_out + 4u * 3u * output_stride; - Real* row5 = gradients_out + 5u * 3u * output_stride; + const int bary_count = top == BasisTopology::Triangle ? 3 : 4; + std::array lambda{Real(0), Real(0), Real(0), Real(0)}; + std::array lambda_grad{}; - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - const Real g0 = Real(1) - Real(4) * l0; - row0[0u * output_stride + q] = g0; - row0[1u * output_stride + q] = g0; - row0[2u * output_stride + q] = Real(0); - row1[0u * output_stride + q] = Real(4) * l1 - Real(1); - row1[1u * output_stride + q] = Real(0); - row1[2u * output_stride + q] = Real(0); - row2[0u * output_stride + q] = Real(0); - row2[1u * output_stride + q] = Real(4) * l2 - Real(1); - row2[2u * output_stride + q] = Real(0); - row3[0u * output_stride + q] = Real(4) * (l0 - l1); - row3[1u * output_stride + q] = Real(-4) * l1; - row3[2u * output_stride + q] = Real(0); - row4[0u * output_stride + q] = Real(4) * l2; - row4[1u * output_stride + q] = Real(4) * l1; - row4[2u * output_stride + q] = Real(0); - row5[0u * output_stride + q] = Real(-4) * l2; - row5[1u * output_stride + q] = Real(4) * (l0 - l2); - row5[2u * output_stride + q] = Real(0); + lambda[1] = xi[0]; + lambda[2] = xi[1]; + lambda_grad[1][0] = Real(1); + lambda_grad[2][1] = Real(1); + if (top == BasisTopology::Triangle) { + lambda[0] = Real(1) - xi[0] - xi[1]; + lambda_grad[0][0] = Real(-1); + lambda_grad[0][1] = Real(-1); + } else { + lambda[3] = xi[2]; + lambda[0] = Real(1) - xi[0] - xi[1] - xi[2]; + lambda_grad[0][0] = Real(-1); + lambda_grad[0][1] = Real(-1); + lambda_grad[0][2] = Real(-1); + lambda_grad[3][2] = Real(1); } -} -inline void write_constant_hessian_q4(Real* SVMP_RESTRICT row, - std::size_t output_stride, - Real h00, - Real h01, - Real h02, - Real h10, - Real h11, - Real h12, - Real h20, - Real h21, - Real h22) { - Real* c0 = row + 0u * output_stride; - Real* c1 = row + 1u * output_stride; - Real* c2 = row + 2u * output_stride; - Real* c3 = row + 3u * output_stride; - Real* c4 = row + 4u * output_stride; - Real* c5 = row + 5u * output_stride; - Real* c6 = row + 6u * output_stride; - Real* c7 = row + 7u * output_stride; - Real* c8 = row + 8u * output_stride; + for (std::size_t i = 0; i < n; ++i) { + std::array, 4> f{}; + for (int a = 0; a < bary_count; ++a) { + f[static_cast(a)] = + simplex_factor(exponents[i][static_cast(a)], + lambda[static_cast(a)], + order); + } - c0[0] = h00; c0[1] = h00; c0[2] = h00; c0[3] = h00; - c1[0] = h01; c1[1] = h01; c1[2] = h01; c1[3] = h01; - c2[0] = h02; c2[1] = h02; c2[2] = h02; c2[3] = h02; - c3[0] = h10; c3[1] = h10; c3[2] = h10; c3[3] = h10; - c4[0] = h11; c4[1] = h11; c4[2] = h11; c4[3] = h11; - c5[0] = h12; c5[1] = h12; c5[2] = h12; c5[3] = h12; - c6[0] = h20; c6[1] = h20; c6[2] = h20; c6[3] = h20; - c7[0] = h21; c7[1] = h21; c7[2] = h21; c7[3] = h21; - c8[0] = h22; c8[1] = h22; c8[2] = h22; c8[3] = h22; -} + Real value = Real(1); + for (int a = 0; a < bary_count; ++a) { + value *= f[static_cast(a)][0]; + } + out.value[i] = value; -SVMP_LAGRANGE_NOINLINE void evaluate_triangle_order2_hessians_q4( - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - write_constant_hessian_q4(hessians_out + 0u * 9u * output_stride, - output_stride, - Real(4), Real(4), Real(0), - Real(4), Real(4), Real(0), - Real(0), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 1u * 9u * output_stride, - output_stride, - Real(4), Real(0), Real(0), - Real(0), Real(0), Real(0), - Real(0), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 2u * 9u * output_stride, - output_stride, - Real(0), Real(0), Real(0), - Real(0), Real(4), Real(0), - Real(0), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 3u * 9u * output_stride, - output_stride, - Real(-8), Real(-4), Real(0), - Real(-4), Real(0), Real(0), - Real(0), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 4u * 9u * output_stride, - output_stride, - Real(0), Real(4), Real(0), - Real(4), Real(0), Real(0), - Real(0), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 5u * 9u * output_stride, - output_stride, - Real(0), Real(-4), Real(0), - Real(-4), Real(-8), Real(0), - Real(0), Real(0), Real(0)); -} + for (int a = 0; a < bary_count; ++a) { + Real product = f[static_cast(a)][1]; + for (int b = 0; b < bary_count; ++b) { + if (b != a) { + product *= f[static_cast(b)][0]; + } + } + for (std::size_t c = 0; c < 3u; ++c) { + out.gradient[i][c] += product * lambda_grad[static_cast(a)][c]; + } + } -SVMP_LAGRANGE_NOINLINE void evaluate_tet_order2_hessians_q4( - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - write_constant_hessian_q4(hessians_out + 0u * 9u * output_stride, - output_stride, - Real(4), Real(4), Real(4), - Real(4), Real(4), Real(4), - Real(4), Real(4), Real(4)); - write_constant_hessian_q4(hessians_out + 1u * 9u * output_stride, - output_stride, - Real(4), Real(0), Real(0), - Real(0), Real(0), Real(0), - Real(0), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 2u * 9u * output_stride, - output_stride, - Real(0), Real(0), Real(0), - Real(0), Real(4), Real(0), - Real(0), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 3u * 9u * output_stride, - output_stride, - Real(0), Real(0), Real(0), - Real(0), Real(0), Real(0), - Real(0), Real(0), Real(4)); - write_constant_hessian_q4(hessians_out + 4u * 9u * output_stride, - output_stride, - Real(-8), Real(-4), Real(-4), - Real(-4), Real(0), Real(0), - Real(-4), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 5u * 9u * output_stride, - output_stride, - Real(0), Real(4), Real(0), - Real(4), Real(0), Real(0), - Real(0), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 6u * 9u * output_stride, - output_stride, - Real(0), Real(-4), Real(0), - Real(-4), Real(-8), Real(-4), - Real(0), Real(-4), Real(0)); - write_constant_hessian_q4(hessians_out + 7u * 9u * output_stride, - output_stride, - Real(0), Real(0), Real(-4), - Real(0), Real(0), Real(-4), - Real(-4), Real(-4), Real(-8)); - write_constant_hessian_q4(hessians_out + 8u * 9u * output_stride, - output_stride, - Real(0), Real(0), Real(4), - Real(0), Real(0), Real(0), - Real(4), Real(0), Real(0)); - write_constant_hessian_q4(hessians_out + 9u * 9u * output_stride, - output_stride, - Real(0), Real(0), Real(0), - Real(0), Real(0), Real(4), - Real(0), Real(4), Real(0)); + for (int a = 0; a < bary_count; ++a) { + for (int b = 0; b < bary_count; ++b) { + Real product = (a == b) + ? f[static_cast(a)][2] + : f[static_cast(a)][1] * + f[static_cast(b)][1]; + for (int c = 0; c < bary_count; ++c) { + if (c != a && c != b) { + product *= f[static_cast(c)][0]; + } + } + for (std::size_t r = 0; r < 3u; ++r) { + for (std::size_t c = 0; c < 3u; ++c) { + out.hessian[i](r, c) += + product * + lambda_grad[static_cast(a)][r] * + lambda_grad[static_cast(b)][c]; + } + } + } + } + } } -void evaluate_tet_order1_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - row0[q] = Real(1) - xi[0] - xi[1] - xi[2]; - row1[q] = xi[0]; - row2[q] = xi[1]; - row3[q] = xi[2]; - } +void store_gradient(const Gradient& gradient, Real* dst) { + dst[0] = gradient[0]; + dst[1] = gradient[1]; + dst[2] = gradient[2]; } -void evaluate_tet_order1_gradients_strided( - std::size_t num_qpts, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - Real* row0 = gradients_out + 0u * 3u * output_stride; - Real* row1 = gradients_out + 1u * 3u * output_stride; - Real* row2 = gradients_out + 2u * 3u * output_stride; - Real* row3 = gradients_out + 3u * 3u * output_stride; +} // namespace - for (std::size_t q = 0; q < num_qpts; ++q) { - row0[0u * output_stride + q] = Real(-1); - row0[1u * output_stride + q] = Real(-1); - row0[2u * output_stride + q] = Real(-1); - row1[0u * output_stride + q] = Real(1); - row1[1u * output_stride + q] = Real(0); - row1[2u * output_stride + q] = Real(0); - row2[0u * output_stride + q] = Real(0); - row2[1u * output_stride + q] = Real(1); - row2[2u * output_stride + q] = Real(0); - row3[0u * output_stride + q] = Real(0); - row3[1u * output_stride + q] = Real(0); - row3[2u * output_stride + q] = Real(1); - } +void prewarm_lagrange_basis_scratch(int max_order, std::size_t max_qpts) { + const auto n = static_cast(std::max(0, max_order) + 1); + prewarm_basis_function_scratch(std::max(n * n * n, max_qpts)); } -void evaluate_zero_hessians_strided( - std::size_t num_nodes, - std::size_t num_qpts, - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - if (num_qpts == 4u) { - for (std::size_t node = 0; node < num_nodes; ++node) { - write_constant_hessian_q4(hessians_out + node * 9u * output_stride, - output_stride, - Real(0), Real(0), Real(0), - Real(0), Real(0), Real(0), - Real(0), Real(0), Real(0)); - } - return; +LagrangeBasis::LagrangeBasis(ElementType type, int order) + : element_type_(type), order_(order) { + const auto normalized = normalize_lagrange_request(element_type_, order_); + element_type_ = normalized.element_type; + order_ = normalized.order; + if (order_ < 0) { + throw BasisConfigurationException("LagrangeBasis requires non-negative polynomial order", + __FILE__, __LINE__, __func__); } - for (std::size_t node = 0; node < num_nodes; ++node) { - Real* row = hessians_out + node * 9u * output_stride; - for (std::size_t q = 0; q < num_qpts; ++q) { - row[0u * output_stride + q] = Real(0); - row[1u * output_stride + q] = Real(0); - row[2u * output_stride + q] = Real(0); - row[3u * output_stride + q] = Real(0); - row[4u * output_stride + q] = Real(0); - row[5u * output_stride + q] = Real(0); - row[6u * output_stride + q] = Real(0); - row[7u * output_stride + q] = Real(0); - row[8u * output_stride + q] = Real(0); - } - } + topology_ = supported_lagrange_topology(element_type_); + dimension_ = reference_dimension(element_type_); + init_nodes(); } -void evaluate_tet_order2_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - Real* row4 = values_out + 4u * output_stride; - Real* row5 = values_out + 5u * output_stride; - Real* row6 = values_out + 6u * output_stride; - Real* row7 = values_out + 7u * output_stride; - Real* row8 = values_out + 8u * output_stride; - Real* row9 = values_out + 9u * output_stride; - - if (points.size() == 4u && output_stride == 4u) { - Real l0[4]; - Real l1[4]; - Real l2[4]; - Real l3[4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - l1[q] = xi[0]; - l2[q] = xi[1]; - l3[q] = xi[2]; - l0[q] = Real(1) - l1[q] - l2[q] - l3[q]; - } - - row0[0] = l0[0] * (Real(2) * l0[0] - Real(1)); - row0[1] = l0[1] * (Real(2) * l0[1] - Real(1)); - row0[2] = l0[2] * (Real(2) * l0[2] - Real(1)); - row0[3] = l0[3] * (Real(2) * l0[3] - Real(1)); - row1[0] = l1[0] * (Real(2) * l1[0] - Real(1)); - row1[1] = l1[1] * (Real(2) * l1[1] - Real(1)); - row1[2] = l1[2] * (Real(2) * l1[2] - Real(1)); - row1[3] = l1[3] * (Real(2) * l1[3] - Real(1)); - row2[0] = l2[0] * (Real(2) * l2[0] - Real(1)); - row2[1] = l2[1] * (Real(2) * l2[1] - Real(1)); - row2[2] = l2[2] * (Real(2) * l2[2] - Real(1)); - row2[3] = l2[3] * (Real(2) * l2[3] - Real(1)); - row3[0] = l3[0] * (Real(2) * l3[0] - Real(1)); - row3[1] = l3[1] * (Real(2) * l3[1] - Real(1)); - row3[2] = l3[2] * (Real(2) * l3[2] - Real(1)); - row3[3] = l3[3] * (Real(2) * l3[3] - Real(1)); - row4[0] = Real(4) * l0[0] * l1[0]; - row4[1] = Real(4) * l0[1] * l1[1]; - row4[2] = Real(4) * l0[2] * l1[2]; - row4[3] = Real(4) * l0[3] * l1[3]; - row5[0] = Real(4) * l1[0] * l2[0]; - row5[1] = Real(4) * l1[1] * l2[1]; - row5[2] = Real(4) * l1[2] * l2[2]; - row5[3] = Real(4) * l1[3] * l2[3]; - row6[0] = Real(4) * l0[0] * l2[0]; - row6[1] = Real(4) * l0[1] * l2[1]; - row6[2] = Real(4) * l0[2] * l2[2]; - row6[3] = Real(4) * l0[3] * l2[3]; - row7[0] = Real(4) * l0[0] * l3[0]; - row7[1] = Real(4) * l0[1] * l3[1]; - row7[2] = Real(4) * l0[2] * l3[2]; - row7[3] = Real(4) * l0[3] * l3[3]; - row8[0] = Real(4) * l1[0] * l3[0]; - row8[1] = Real(4) * l1[1] * l3[1]; - row8[2] = Real(4) * l1[2] * l3[2]; - row8[3] = Real(4) * l1[3] * l3[3]; - row9[0] = Real(4) * l2[0] * l3[0]; - row9[1] = Real(4) * l2[1] * l3[1]; - row9[2] = Real(4) * l2[2] * l3[2]; - row9[3] = Real(4) * l2[3] * l3[3]; - return; +void LagrangeBasis::init_equispaced_1d_nodes() { + nodes_1d_.resize(static_cast(order_ + 1)); + for (int i = 0; i <= order_; ++i) { + nodes_1d_[static_cast(i)] = + equispaced_pm_one_coord(i, order_); } +} - auto write_q = [&](std::size_t q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - row0[q] = l0 * (Real(2) * l0 - Real(1)); - row1[q] = l1 * (Real(2) * l1 - Real(1)); - row2[q] = l2 * (Real(2) * l2 - Real(1)); - row3[q] = l3 * (Real(2) * l3 - Real(1)); - row4[q] = Real(4) * l0 * l1; - row5[q] = Real(4) * l1 * l2; - row6[q] = Real(4) * l0 * l2; - row7[q] = Real(4) * l0 * l3; - row8[q] = Real(4) * l1 * l3; - row9[q] = Real(4) * l2 * l3; - }; +void LagrangeBasis::init_nodes() { + nodes_.clear(); + nodes_1d_.clear(); + tensor_indices_.clear(); + simplex_exponents_.clear(); + wedge_indices_.clear(); - if (points.size() == 4u) { - write_q(0u); - write_q(1u); - write_q(2u); - write_q(3u); - return; + switch (topology_) { + case BasisTopology::Point: + build_point_nodes(); + return; + case BasisTopology::Line: + build_tensor_product_nodes(1); + return; + case BasisTopology::Quadrilateral: + build_tensor_product_nodes(2); + return; + case BasisTopology::Hexahedron: + build_tensor_product_nodes(3); + return; + case BasisTopology::Triangle: + case BasisTopology::Tetrahedron: + build_simplex_nodes(); + return; + case BasisTopology::Wedge: + build_wedge_nodes(); + return; + default: + break; } - for (std::size_t q = 0; q < points.size(); ++q) { - write_q(q); - } + throw BasisElementCompatibilityException("Unsupported element type in LagrangeBasis::init_nodes", + __FILE__, __LINE__, __func__); } -inline void write_tet_order2_gradient_q(Real* SVMP_RESTRICT row, - std::size_t output_stride, - std::size_t q, - Real gx, - Real gy, - Real gz) { - row[0u * output_stride + q] = gx; - row[1u * output_stride + q] = gy; - row[2u * output_stride + q] = gz; +void LagrangeBasis::build_point_nodes() { + nodes_.push_back(Vec3{Real(0), Real(0), Real(0)}); } -SVMP_LAGRANGE_NOINLINE void evaluate_tet_order2_gradients_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - Real* row0 = gradients_out + 0u * 3u * output_stride; - Real* row1 = gradients_out + 1u * 3u * output_stride; - Real* row2 = gradients_out + 2u * 3u * output_stride; - Real* row3 = gradients_out + 3u * 3u * output_stride; - Real* row4 = gradients_out + 4u * 3u * output_stride; - Real* row5 = gradients_out + 5u * 3u * output_stride; - Real* row6 = gradients_out + 6u * 3u * output_stride; - Real* row7 = gradients_out + 7u * 3u * output_stride; - Real* row8 = gradients_out + 8u * 3u * output_stride; - Real* row9 = gradients_out + 9u * 3u * output_stride; - - auto write_q = [&](std::size_t q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - const Real four = Real(4); - const Real g0 = Real(1) - four * l0; - - write_tet_order2_gradient_q(row0, output_stride, q, g0, g0, g0); - write_tet_order2_gradient_q(row1, output_stride, q, four * l1 - Real(1), Real(0), Real(0)); - write_tet_order2_gradient_q(row2, output_stride, q, Real(0), four * l2 - Real(1), Real(0)); - write_tet_order2_gradient_q(row3, output_stride, q, Real(0), Real(0), four * l3 - Real(1)); - write_tet_order2_gradient_q(row4, output_stride, q, four * (l0 - l1), -four * l1, -four * l1); - write_tet_order2_gradient_q(row5, output_stride, q, four * l2, four * l1, Real(0)); - write_tet_order2_gradient_q(row6, output_stride, q, -four * l2, four * (l0 - l2), -four * l2); - write_tet_order2_gradient_q(row7, output_stride, q, -four * l3, -four * l3, four * (l0 - l3)); - write_tet_order2_gradient_q(row8, output_stride, q, four * l3, Real(0), four * l1); - write_tet_order2_gradient_q(row9, output_stride, q, Real(0), four * l3, four * l2); - }; - - if (points.size() == 4u) { - write_q(0u); - write_q(1u); - write_q(2u); - write_q(3u); - return; - } - - for (std::size_t q = 0; q < points.size(); ++q) { - write_q(q); +void LagrangeBasis::build_tensor_product_nodes(int dimensions) { + init_equispaced_1d_nodes(); + nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); + tensor_indices_.reserve(nodes_.size()); + for (const auto& node : nodes_) { + TensorNodeIndex idx{0u, 0u, 0u}; + idx[0] = axis_index_pm_one(node[0], order_); + if (dimensions >= 2) { + idx[1] = axis_index_pm_one(node[1], order_); + } + if (dimensions >= 3) { + idx[2] = axis_index_pm_one(node[2], order_); + } + tensor_indices_.push_back(idx); } } -inline void fill_simplex_order3_factor_values(Real lambda, Real* SVMP_RESTRICT phi) { - const Real t = Real(3) * lambda; - phi[0] = Real(1); - phi[1] = t; - phi[2] = phi[1] * (t - Real(1)) * Real(0.5); - phi[3] = phi[2] * (t - Real(2)) / Real(3); -} - -void evaluate_tet_order3_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - Real* row4 = values_out + 4u * output_stride; - Real* row5 = values_out + 5u * output_stride; - Real* row6 = values_out + 6u * output_stride; - Real* row7 = values_out + 7u * output_stride; - Real* row8 = values_out + 8u * output_stride; - Real* row9 = values_out + 9u * output_stride; - Real* row10 = values_out + 10u * output_stride; - Real* row11 = values_out + 11u * output_stride; - Real* row12 = values_out + 12u * output_stride; - Real* row13 = values_out + 13u * output_stride; - Real* row14 = values_out + 14u * output_stride; - Real* row15 = values_out + 15u * output_stride; - Real* row16 = values_out + 16u * output_stride; - Real* row17 = values_out + 17u * output_stride; - Real* row18 = values_out + 18u * output_stride; - Real* row19 = values_out + 19u * output_stride; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - Real p0[4]; - Real p1[4]; - Real p2[4]; - Real p3[4]; - fill_simplex_order3_factor_values(l0, p0); - fill_simplex_order3_factor_values(l1, p1); - fill_simplex_order3_factor_values(l2, p2); - fill_simplex_order3_factor_values(l3, p3); - - row0[q] = p0[3]; - row1[q] = p1[3]; - row2[q] = p2[3]; - row3[q] = p3[3]; - row4[q] = p0[2] * p1[1]; - row5[q] = p0[1] * p1[2]; - row6[q] = p1[2] * p2[1]; - row7[q] = p1[1] * p2[2]; - row8[q] = p0[1] * p2[2]; - row9[q] = p0[2] * p2[1]; - row10[q] = p0[2] * p3[1]; - row11[q] = p0[1] * p3[2]; - row12[q] = p1[2] * p3[1]; - row13[q] = p1[1] * p3[2]; - row14[q] = p2[2] * p3[1]; - row15[q] = p2[1] * p3[2]; - row16[q] = p0[1] * p1[1] * p2[1]; - row17[q] = p0[1] * p1[1] * p3[1]; - row18[q] = p1[1] * p2[1] * p3[1]; - row19[q] = p0[1] * p2[1] * p3[1]; +void LagrangeBasis::build_simplex_nodes() { + nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); + simplex_exponents_.reserve(nodes_.size()); + for (const auto& node : nodes_) { + simplex_exponents_.push_back(simplex_exponent_from_point(node, topology_, order_)); } } -void evaluate_triangle_order3_gradients_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - Real* rows[10] = { - gradients_out + 0u * 3u * output_stride, - gradients_out + 1u * 3u * output_stride, - gradients_out + 2u * 3u * output_stride, - gradients_out + 3u * 3u * output_stride, - gradients_out + 4u * 3u * output_stride, - gradients_out + 5u * 3u * output_stride, - gradients_out + 6u * 3u * output_stride, - gradients_out + 7u * 3u * output_stride, - gradients_out + 8u * 3u * output_stride, - gradients_out + 9u * 3u * output_stride, - }; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - - const Real p10 = Real(3) * l0; - const Real p11 = Real(3) * l1; - const Real p12 = Real(3) * l2; - const Real p20 = Real(0.5) * p10 * (p10 - Real(1)); - const Real p21 = Real(0.5) * p11 * (p11 - Real(1)); - const Real p22 = Real(0.5) * p12 * (p12 - Real(1)); - const Real d10 = Real(3); - const Real d11 = Real(3); - const Real d12 = Real(3); - const Real d20 = Real(3) * p10 - Real(1.5); - const Real d21 = Real(3) * p11 - Real(1.5); - const Real d22 = Real(3) * p12 - Real(1.5); - const Real d30 = Real(1.5) * p10 * p10 - Real(3) * p10 + Real(1); - const Real d31 = Real(1.5) * p11 * p11 - Real(3) * p11 + Real(1); - const Real d32 = Real(1.5) * p12 * p12 - Real(3) * p12 + Real(1); - - const Real dl0[10] = { - d30, - Real(0), - Real(0), - d20 * p11, - d10 * p21, - Real(0), - Real(0), - d10 * p22, - d20 * p12, - d10 * p11 * p12, - }; - const Real dl1[10] = { - Real(0), - d31, - Real(0), - p20 * d11, - p10 * d21, - d21 * p12, - d11 * p22, - Real(0), - Real(0), - p10 * d11 * p12, - }; - const Real dl2[10] = { - Real(0), - Real(0), - d32, - Real(0), - Real(0), - p21 * d12, - p11 * d22, - p10 * d22, - p20 * d12, - p10 * p11 * d12, - }; +void LagrangeBasis::build_wedge_nodes() { + init_equispaced_1d_nodes(); + nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); + const auto tri_nodes = + ReferenceNodeLayout::get_lagrange_node_coords(ElementType::Triangle3, order_); + simplex_exponents_.reserve(tri_nodes.size()); + for (const auto& tri_node : tri_nodes) { + simplex_exponents_.push_back( + simplex_exponent_from_point(tri_node, BasisTopology::Triangle, order_)); + } - for (std::size_t node = 0; node < 10u; ++node) { - Real* g = rows[node]; - g[0u * output_stride + q] = dl1[node] - dl0[node]; - g[1u * output_stride + q] = dl2[node] - dl0[node]; - g[2u * output_stride + q] = Real(0); + wedge_indices_.reserve(nodes_.size()); + for (const auto& node : nodes_) { + const auto tri_exp = + simplex_exponent_from_point(node, BasisTopology::Triangle, order_); + auto it = std::find(simplex_exponents_.begin(), simplex_exponents_.end(), tri_exp); + if (it == simplex_exponents_.end()) { + throw BasisConstructionException("LagrangeBasis: wedge node triangle index lookup failed", + __FILE__, __LINE__, __func__); } + const std::size_t tri_index = + static_cast(std::distance(simplex_exponents_.begin(), it)); + wedge_indices_.push_back({tri_index, axis_index_pm_one(node[2], order_)}); } } -void evaluate_hex_order1_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - Real* row4 = values_out + 4u * output_stride; - Real* row5 = values_out + 5u * output_stride; - Real* row6 = values_out + 6u * output_stride; - Real* row7 = values_out + 7u * output_stride; - - const auto write_q = [&](std::size_t q) { - const auto& xi = points[q]; - const Real lx = (Real(1) - xi[0]) * Real(0.5); - const Real ly = (Real(1) - xi[1]) * Real(0.5); - const Real lz = (Real(1) - xi[2]) * Real(0.5); - const Real ux = (Real(1) + xi[0]) * Real(0.5); - const Real uy = (Real(1) + xi[1]) * Real(0.5); - const Real uz = (Real(1) + xi[2]) * Real(0.5); - const Real lxly = lx * ly; - const Real uxly = ux * ly; - const Real uxuy = ux * uy; - const Real lxuy = lx * uy; - row0[q] = lxly * lz; - row1[q] = uxly * lz; - row2[q] = uxuy * lz; - row3[q] = lxuy * lz; - row4[q] = lxly * uz; - row5[q] = uxly * uz; - row6[q] = uxuy * uz; - row7[q] = lxuy * uz; - }; - if (points.size() == 4u) { - write_q(0u); - write_q(1u); - write_q(2u); - write_q(3u); +void LagrangeBasis::evaluate_all_to(const Vec3& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + if (topology_ == BasisTopology::Point) { + if (values_out) { + values_out[0] = Real(1); + } + if (gradients_out) { + gradients_out[0] = gradients_out[1] = gradients_out[2] = Real(0); + } + if (hessians_out) { + std::fill_n(hessians_out, 9u, Real(0)); + } return; } - for (std::size_t q = 0; q < points.size(); ++q) { - write_q(q); - } -} - -template -void evaluate_hex_order1_outputs_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - constexpr Real half = Real(0.5); - constexpr std::array dx{{-half, half, half, -half, -half, half, half, -half}}; - constexpr std::array dy{{-half, -half, half, half, -half, -half, half, half}}; - constexpr std::array dz{{-half, -half, -half, -half, half, half, half, half}}; + if (topology_ == BasisTopology::Line || + topology_ == BasisTopology::Quadrilateral || + topology_ == BasisTopology::Hexahedron) { + AxisEval ax; + AxisEval ay; + AxisEval az; + evaluate_1d_lagrange(xi[0], nodes_1d_, ax); + if (dimension_ >= 2) { + evaluate_1d_lagrange(xi[1], nodes_1d_, ay); + } + if (dimension_ >= 3) { + evaluate_1d_lagrange(xi[2], nodes_1d_, az); + } - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real lx = (Real(1) - xi[0]) * half; - const Real ly = (Real(1) - xi[1]) * half; - const Real lz = (Real(1) - xi[2]) * half; - const Real ux = (Real(1) + xi[0]) * half; - const Real uy = (Real(1) + xi[1]) * half; - const Real uz = (Real(1) + xi[2]) * half; - const Real xval[8] = {lx, ux, ux, lx, lx, ux, ux, lx}; - const Real yval[8] = {ly, ly, uy, uy, ly, ly, uy, uy}; - const Real zval[8] = {lz, lz, lz, lz, uz, uz, uz, uz}; + for (std::size_t node = 0; node < tensor_indices_.size(); ++node) { + const auto& idx = tensor_indices_[node]; + const Real vx = ax.value[idx[0]]; + const Real dx = ax.first[idx[0]]; + const Real d2x = ax.second[idx[0]]; + const Real vy = dimension_ >= 2 ? ay.value[idx[1]] : Real(1); + const Real dy = dimension_ >= 2 ? ay.first[idx[1]] : Real(0); + const Real d2y = dimension_ >= 2 ? ay.second[idx[1]] : Real(0); + const Real vz = dimension_ >= 3 ? az.value[idx[2]] : Real(1); + const Real dz = dimension_ >= 3 ? az.first[idx[2]] : Real(0); + const Real d2z = dimension_ >= 3 ? az.second[idx[2]] : Real(0); - for (std::size_t node = 0; node < 8u; ++node) { - if constexpr (NeedValues) { - values_out[node * output_stride + q] = - xval[node] * yval[node] * zval[node]; + if (values_out) { + values_out[node] = vx * vy * vz; } - if constexpr (NeedGradients) { - Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; - g[0u * output_stride + q] = dx[node] * yval[node] * zval[node]; - g[1u * output_stride + q] = xval[node] * dy[node] * zval[node]; - g[2u * output_stride + q] = xval[node] * yval[node] * dz[node]; + if (gradients_out) { + Real* g = gradients_out + node * 3u; + g[0] = dx * vy * vz; + g[1] = vx * dy * vz; + g[2] = vx * vy * dz; } - if constexpr (NeedHessians) { - Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; - const Real hxy = dx[node] * dy[node] * zval[node]; - const Real hxz = dx[node] * yval[node] * dz[node]; - const Real hyz = xval[node] * dy[node] * dz[node]; - H[0u * output_stride + q] = Real(0); - H[1u * output_stride + q] = hxy; - H[2u * output_stride + q] = hxz; - H[3u * output_stride + q] = hxy; - H[4u * output_stride + q] = Real(0); - H[5u * output_stride + q] = hyz; - H[6u * output_stride + q] = hxz; - H[7u * output_stride + q] = hyz; - H[8u * output_stride + q] = Real(0); + if (hessians_out) { + Real* h = hessians_out + node * 9u; + h[0] = d2x * vy * vz; + h[1] = dx * dy * vz; + h[2] = dx * vy * dz; + h[3] = h[1]; + h[4] = vx * d2y * vz; + h[5] = vx * dy * dz; + h[6] = h[2]; + h[7] = h[5]; + h[8] = vx * vy * d2z; } } + return; } -} - -void evaluate_quad_order1_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - if (points.size() == 4u && output_stride == 4u) { - Real lx[4]; - Real ux[4]; - Real ly[4]; - Real uy[4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - lx[q] = (Real(1) - xi[0]) * Real(0.5); - ux[q] = (Real(1) + xi[0]) * Real(0.5); - ly[q] = (Real(1) - xi[1]) * Real(0.5); - uy[q] = (Real(1) + xi[1]) * Real(0.5); + if (topology_ == BasisTopology::Triangle || topology_ == BasisTopology::Tetrahedron) { + SimplexEval simplex; + evaluate_simplex(xi, topology_, order_, simplex_exponents_, simplex); + for (std::size_t i = 0; i < simplex.value.size(); ++i) { + if (values_out) { + values_out[i] = simplex.value[i]; + } + if (gradients_out) { + store_gradient(simplex.gradient[i], gradients_out + i * 3u); + } + if (hessians_out) { + store_hessian(simplex.hessian[i], hessians_out + i * 9u); + } } - row0[0] = lx[0] * ly[0]; - row0[1] = lx[1] * ly[1]; - row0[2] = lx[2] * ly[2]; - row0[3] = lx[3] * ly[3]; - row1[0] = ux[0] * ly[0]; - row1[1] = ux[1] * ly[1]; - row1[2] = ux[2] * ly[2]; - row1[3] = ux[3] * ly[3]; - row2[0] = ux[0] * uy[0]; - row2[1] = ux[1] * uy[1]; - row2[2] = ux[2] * uy[2]; - row2[3] = ux[3] * uy[3]; - row3[0] = lx[0] * uy[0]; - row3[1] = lx[1] * uy[1]; - row3[2] = lx[2] * uy[2]; - row3[3] = lx[3] * uy[3]; return; } - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real lx = (Real(1) - xi[0]) * Real(0.5); - const Real ly = (Real(1) - xi[1]) * Real(0.5); - const Real ux = (Real(1) + xi[0]) * Real(0.5); - const Real uy = (Real(1) + xi[1]) * Real(0.5); - row0[q] = lx * ly; - row1[q] = ux * ly; - row2[q] = ux * uy; - row3[q] = lx * uy; - } -} + if (topology_ == BasisTopology::Wedge) { + SimplexEval tri; + AxisEval z_axis; + evaluate_simplex(xi, BasisTopology::Triangle, order_, simplex_exponents_, tri); + evaluate_1d_lagrange(xi[2], nodes_1d_, z_axis); -void evaluate_quad_order1_gradients_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - Real* row0 = gradients_out + 0u * 3u * output_stride; - Real* row1 = gradients_out + 1u * 3u * output_stride; - Real* row2 = gradients_out + 2u * 3u * output_stride; - Real* row3 = gradients_out + 3u * 3u * output_stride; + for (std::size_t node = 0; node < wedge_indices_.size(); ++node) { + const auto [tri_idx, z_idx] = wedge_indices_[node]; + const Real tv = tri.value[tri_idx]; + const Real zv = z_axis.value[z_idx]; + const Real dz = z_axis.first[z_idx]; + const Real d2z = z_axis.second[z_idx]; - if (points.size() == 4u) { - Real lx[4]; - Real ly[4]; - Real ux[4]; - Real uy[4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - lx[q] = (Real(1) - xi[0]) * Real(0.5); - ly[q] = (Real(1) - xi[1]) * Real(0.5); - ux[q] = (Real(1) + xi[0]) * Real(0.5); - uy[q] = (Real(1) + xi[1]) * Real(0.5); + if (values_out) { + values_out[node] = tv * zv; + } + if (gradients_out) { + Real* g = gradients_out + node * 3u; + g[0] = tri.gradient[tri_idx][0] * zv; + g[1] = tri.gradient[tri_idx][1] * zv; + g[2] = tv * dz; + } + if (hessians_out) { + Real* h = hessians_out + node * 9u; + const Hessian& th = tri.hessian[tri_idx]; + const Gradient& tg = tri.gradient[tri_idx]; + h[0] = th(0, 0) * zv; + h[1] = th(0, 1) * zv; + h[2] = tg[0] * dz; + h[3] = h[1]; + h[4] = th(1, 1) * zv; + h[5] = tg[1] * dz; + h[6] = h[2]; + h[7] = h[5]; + h[8] = tv * d2z; + } } - - auto write_component = [](Real* SVMP_RESTRICT row, - Real a0, - Real a1, - Real a2, - Real a3) { - row[0] = a0; - row[1] = a1; - row[2] = a2; - row[3] = a3; - }; - - write_component(row0, Real(-0.5) * ly[0], Real(-0.5) * ly[1], - Real(-0.5) * ly[2], Real(-0.5) * ly[3]); - write_component(row0 + output_stride, Real(-0.5) * lx[0], Real(-0.5) * lx[1], - Real(-0.5) * lx[2], Real(-0.5) * lx[3]); - write_component(row0 + 2u * output_stride, Real(0), Real(0), Real(0), Real(0)); - - write_component(row1, Real(0.5) * ly[0], Real(0.5) * ly[1], - Real(0.5) * ly[2], Real(0.5) * ly[3]); - write_component(row1 + output_stride, Real(-0.5) * ux[0], Real(-0.5) * ux[1], - Real(-0.5) * ux[2], Real(-0.5) * ux[3]); - write_component(row1 + 2u * output_stride, Real(0), Real(0), Real(0), Real(0)); - - write_component(row2, Real(0.5) * uy[0], Real(0.5) * uy[1], - Real(0.5) * uy[2], Real(0.5) * uy[3]); - write_component(row2 + output_stride, Real(0.5) * ux[0], Real(0.5) * ux[1], - Real(0.5) * ux[2], Real(0.5) * ux[3]); - write_component(row2 + 2u * output_stride, Real(0), Real(0), Real(0), Real(0)); - - write_component(row3, Real(-0.5) * uy[0], Real(-0.5) * uy[1], - Real(-0.5) * uy[2], Real(-0.5) * uy[3]); - write_component(row3 + output_stride, Real(0.5) * lx[0], Real(0.5) * lx[1], - Real(0.5) * lx[2], Real(0.5) * lx[3]); - write_component(row3 + 2u * output_stride, Real(0), Real(0), Real(0), Real(0)); return; } - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real lx = (Real(1) - xi[0]) * Real(0.5); - const Real ly = (Real(1) - xi[1]) * Real(0.5); - const Real ux = (Real(1) + xi[0]) * Real(0.5); - const Real uy = (Real(1) + xi[1]) * Real(0.5); - row0[0u * output_stride + q] = Real(-0.5) * ly; - row0[1u * output_stride + q] = Real(-0.5) * lx; - row0[2u * output_stride + q] = Real(0); - row1[0u * output_stride + q] = Real( 0.5) * ly; - row1[1u * output_stride + q] = Real(-0.5) * ux; - row1[2u * output_stride + q] = Real(0); - row2[0u * output_stride + q] = Real( 0.5) * uy; - row2[1u * output_stride + q] = Real( 0.5) * ux; - row2[2u * output_stride + q] = Real(0); - row3[0u * output_stride + q] = Real(-0.5) * uy; - row3[1u * output_stride + q] = Real( 0.5) * lx; - row3[2u * output_stride + q] = Real(0); - } + throw BasisEvaluationException("Unsupported element in LagrangeBasis evaluation", + __FILE__, __LINE__, __func__); } -inline void write_quad_order1_hessian_q( - Real* SVMP_RESTRICT row, - std::size_t output_stride, - std::size_t q, - Real xy) { - row[0u * output_stride + q] = Real(0); - row[1u * output_stride + q] = xy; - row[2u * output_stride + q] = Real(0); - row[3u * output_stride + q] = xy; - row[4u * output_stride + q] = Real(0); - row[5u * output_stride + q] = Real(0); - row[6u * output_stride + q] = Real(0); - row[7u * output_stride + q] = Real(0); - row[8u * output_stride + q] = Real(0); +void LagrangeBasis::evaluate_values(const Vec3& xi, + std::vector& values) const { + values.resize(size()); + evaluate_values_to(xi, values.data()); } -void evaluate_quad_order1_hessians_strided( - std::size_t num_qpts, - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - Real* row0 = hessians_out + 0u * 9u * output_stride; - Real* row1 = hessians_out + 1u * 9u * output_stride; - Real* row2 = hessians_out + 2u * 9u * output_stride; - Real* row3 = hessians_out + 3u * 9u * output_stride; - - constexpr Real positive = Real(0.25); - constexpr Real negative = Real(-0.25); - for (std::size_t q = 0; q < num_qpts; ++q) { - write_quad_order1_hessian_q(row0, output_stride, q, positive); - write_quad_order1_hessian_q(row1, output_stride, q, negative); - write_quad_order1_hessian_q(row2, output_stride, q, positive); - write_quad_order1_hessian_q(row3, output_stride, q, negative); +void LagrangeBasis::evaluate_gradients(const Vec3& xi, + std::vector& gradients) const { + gradients.resize(size()); + std::vector flat(size() * 3u, Real(0)); + evaluate_gradients_to(xi, flat.data()); + for (std::size_t i = 0; i < size(); ++i) { + gradients[i][0] = flat[i * 3u + 0u]; + gradients[i][1] = flat[i * 3u + 1u]; + gradients[i][2] = flat[i * 3u + 2u]; } } -template -inline void write_quad_order1_all_q4( - std::size_t output_stride, - std::size_t i, - std::size_t j, - const Real lx[4][2], - const Real ly[4][2], - Real* SVMP_RESTRICT value_row, - Real* SVMP_RESTRICT grad_row, - Real* SVMP_RESTRICT hess_row) { - const Real xv = lx[Q][i]; - const Real yv = ly[Q][j]; - const Real xd = (i == 0u) ? Real(-0.5) : Real(0.5); - const Real yd = (j == 0u) ? Real(-0.5) : Real(0.5); - const Real hxy = xd * yd; - - value_row[Q] = xv * yv; - grad_row[0u * output_stride + Q] = xd * yv; - grad_row[1u * output_stride + Q] = xv * yd; - grad_row[2u * output_stride + Q] = Real(0); - hess_row[0u * output_stride + Q] = Real(0); - hess_row[4u * output_stride + Q] = Real(0); - hess_row[8u * output_stride + Q] = Real(0); - hess_row[1u * output_stride + Q] = hxy; - hess_row[3u * output_stride + Q] = hxy; - hess_row[2u * output_stride + Q] = Real(0); - hess_row[6u * output_stride + Q] = Real(0); - hess_row[5u * output_stride + Q] = Real(0); - hess_row[7u * output_stride + Q] = Real(0); -} - -void evaluate_quad_order1_all_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - constexpr auto node_axes = detail::make_quad_tensor_node_axes<1>(); - - Real lx[4][2]; - Real ly[4][2]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - lx[q][0] = (Real(1) - xi[0]) * Real(0.5); - lx[q][1] = (Real(1) + xi[0]) * Real(0.5); - ly[q][0] = (Real(1) - xi[1]) * Real(0.5); - ly[q][1] = (Real(1) + xi[1]) * Real(0.5); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - const std::size_t i = axes[0]; - const std::size_t j = axes[1]; - Real* value_row = values_out + node * output_stride; - Real* grad_row = gradients_out + node * 3u * output_stride; - Real* hess_row = hessians_out + node * 9u * output_stride; - write_quad_order1_all_q4<0u>( - output_stride, i, j, lx, ly, value_row, grad_row, hess_row); - write_quad_order1_all_q4<1u>( - output_stride, i, j, lx, ly, value_row, grad_row, hess_row); - write_quad_order1_all_q4<2u>( - output_stride, i, j, lx, ly, value_row, grad_row, hess_row); - write_quad_order1_all_q4<3u>( - output_stride, i, j, lx, ly, value_row, grad_row, hess_row); +void LagrangeBasis::evaluate_hessians(const Vec3& xi, + std::vector& hessians) const { + hessians.resize(size()); + std::vector flat(size() * 9u, Real(0)); + evaluate_hessians_to(xi, flat.data()); + for (std::size_t i = 0; i < size(); ++i) { + hessians[i] = load_hessian(flat.data() + i * 9u); } } -void evaluate_quad_order2_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - Real* row4 = values_out + 4u * output_stride; - Real* row5 = values_out + 5u * output_stride; - Real* row6 = values_out + 6u * output_stride; - Real* row7 = values_out + 7u * output_stride; - Real* row8 = values_out + 8u * output_stride; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real x = xi[0]; - const Real y = xi[1]; - const Real x0 = x * (x - Real(1)) * Real(0.5); - const Real x1 = x * (x + Real(1)) * Real(0.5); - const Real x2 = Real(1) - x * x; - const Real y0 = y * (y - Real(1)) * Real(0.5); - const Real y1 = y * (y + Real(1)) * Real(0.5); - const Real y2 = Real(1) - y * y; - - row0[q] = x0 * y0; - row1[q] = x1 * y0; - row2[q] = x1 * y1; - row3[q] = x0 * y1; - row4[q] = x2 * y0; - row5[q] = x1 * y2; - row6[q] = x2 * y1; - row7[q] = x0 * y2; - row8[q] = x2 * y2; +void LagrangeBasis::evaluate_all(const Vec3& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians) const { + values.resize(size()); + gradients.resize(size()); + hessians.resize(size()); + std::vector flat_g(size() * 3u, Real(0)); + std::vector flat_h(size() * 9u, Real(0)); + evaluate_all_to(xi, values.data(), flat_g.data(), flat_h.data()); + for (std::size_t i = 0; i < size(); ++i) { + gradients[i][0] = flat_g[i * 3u + 0u]; + gradients[i][1] = flat_g[i * 3u + 1u]; + gradients[i][2] = flat_g[i * 3u + 2u]; + hessians[i] = load_hessian(flat_h.data() + i * 9u); } } -inline void write_quad_order2_gradient_q( - Real* SVMP_RESTRICT row, - std::size_t output_stride, - std::size_t q, - Real dx, - Real dy) { - row[0u * output_stride + q] = dx; - row[1u * output_stride + q] = dy; - row[2u * output_stride + q] = Real(0); +void LagrangeBasis::evaluate_values_to(const Vec3& xi, + Real* SVMP_RESTRICT values_out) const { + evaluate_all_to(xi, values_out, nullptr, nullptr); } -void evaluate_quad_order2_gradients_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - if (points.size() == 4u) { - Real xv[4][3]; - Real yv[4][3]; - Real xd[4][3]; - Real yd[4][3]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real x = xi[0]; - const Real y = xi[1]; - xv[q][0] = x * (x - Real(1)) * Real(0.5); - xv[q][1] = x * (x + Real(1)) * Real(0.5); - xv[q][2] = Real(1) - x * x; - yv[q][0] = y * (y - Real(1)) * Real(0.5); - yv[q][1] = y * (y + Real(1)) * Real(0.5); - yv[q][2] = Real(1) - y * y; - xd[q][0] = x - Real(0.5); - xd[q][1] = x + Real(0.5); - xd[q][2] = Real(-2) * x; - yd[q][0] = y - Real(0.5); - yd[q][1] = y + Real(0.5); - yd[q][2] = Real(-2) * y; - } - - auto write_node = [&](std::size_t node, std::size_t i, std::size_t j) { - Real* SVMP_RESTRICT row = gradients_out + node * 3u * output_stride; - row[0u] = xd[0][i] * yv[0][j]; - row[1u] = xd[1][i] * yv[1][j]; - row[2u] = xd[2][i] * yv[2][j]; - row[3u] = xd[3][i] * yv[3][j]; - row[output_stride + 0u] = xv[0][i] * yd[0][j]; - row[output_stride + 1u] = xv[1][i] * yd[1][j]; - row[output_stride + 2u] = xv[2][i] * yd[2][j]; - row[output_stride + 3u] = xv[3][i] * yd[3][j]; - row[2u * output_stride + 0u] = Real(0); - row[2u * output_stride + 1u] = Real(0); - row[2u * output_stride + 2u] = Real(0); - row[2u * output_stride + 3u] = Real(0); - }; - - write_node(0u, 0u, 0u); - write_node(1u, 1u, 0u); - write_node(2u, 1u, 1u); - write_node(3u, 0u, 1u); - write_node(4u, 2u, 0u); - write_node(5u, 1u, 2u); - write_node(6u, 2u, 1u); - write_node(7u, 0u, 2u); - write_node(8u, 2u, 2u); - return; - } - - Real* row0 = gradients_out + 0u * 3u * output_stride; - Real* row1 = gradients_out + 1u * 3u * output_stride; - Real* row2 = gradients_out + 2u * 3u * output_stride; - Real* row3 = gradients_out + 3u * 3u * output_stride; - Real* row4 = gradients_out + 4u * 3u * output_stride; - Real* row5 = gradients_out + 5u * 3u * output_stride; - Real* row6 = gradients_out + 6u * 3u * output_stride; - Real* row7 = gradients_out + 7u * 3u * output_stride; - Real* row8 = gradients_out + 8u * 3u * output_stride; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real x = xi[0]; - const Real y = xi[1]; - const Real x0 = x * (x - Real(1)) * Real(0.5); - const Real x1 = x * (x + Real(1)) * Real(0.5); - const Real x2 = Real(1) - x * x; - const Real y0 = y * (y - Real(1)) * Real(0.5); - const Real y1 = y * (y + Real(1)) * Real(0.5); - const Real y2 = Real(1) - y * y; - const Real dx0 = x - Real(0.5); - const Real dx1 = x + Real(0.5); - const Real dx2 = Real(-2) * x; - const Real dy0 = y - Real(0.5); - const Real dy1 = y + Real(0.5); - const Real dy2 = Real(-2) * y; +void LagrangeBasis::evaluate_gradients_to(const Vec3& xi, + Real* SVMP_RESTRICT gradients_out) const { + evaluate_all_to(xi, nullptr, gradients_out, nullptr); +} - write_quad_order2_gradient_q(row0, output_stride, q, dx0 * y0, x0 * dy0); - write_quad_order2_gradient_q(row1, output_stride, q, dx1 * y0, x1 * dy0); - write_quad_order2_gradient_q(row2, output_stride, q, dx1 * y1, x1 * dy1); - write_quad_order2_gradient_q(row3, output_stride, q, dx0 * y1, x0 * dy1); - write_quad_order2_gradient_q(row4, output_stride, q, dx2 * y0, x2 * dy0); - write_quad_order2_gradient_q(row5, output_stride, q, dx1 * y2, x1 * dy2); - write_quad_order2_gradient_q(row6, output_stride, q, dx2 * y1, x2 * dy1); - write_quad_order2_gradient_q(row7, output_stride, q, dx0 * y2, x0 * dy2); - write_quad_order2_gradient_q(row8, output_stride, q, dx2 * y2, x2 * dy2); - } -} - -inline void write_quad_order2_hessian_q( - Real* SVMP_RESTRICT row, - std::size_t output_stride, - std::size_t q, - Real hxx, - Real hxy, - Real hyy) { - row[0u * output_stride + q] = hxx; - row[1u * output_stride + q] = hxy; - row[2u * output_stride + q] = Real(0); - row[3u * output_stride + q] = hxy; - row[4u * output_stride + q] = hyy; - row[5u * output_stride + q] = Real(0); - row[6u * output_stride + q] = Real(0); - row[7u * output_stride + q] = Real(0); - row[8u * output_stride + q] = Real(0); -} - -void evaluate_quad_order2_hessians_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - Real* row0 = hessians_out + 0u * 9u * output_stride; - Real* row1 = hessians_out + 1u * 9u * output_stride; - Real* row2 = hessians_out + 2u * 9u * output_stride; - Real* row3 = hessians_out + 3u * 9u * output_stride; - Real* row4 = hessians_out + 4u * 9u * output_stride; - Real* row5 = hessians_out + 5u * 9u * output_stride; - Real* row6 = hessians_out + 6u * 9u * output_stride; - Real* row7 = hessians_out + 7u * 9u * output_stride; - Real* row8 = hessians_out + 8u * 9u * output_stride; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real x = xi[0]; - const Real y = xi[1]; - const Real x0 = x * (x - Real(1)) * Real(0.5); - const Real x1 = x * (x + Real(1)) * Real(0.5); - const Real x2 = Real(1) - x * x; - const Real y0 = y * (y - Real(1)) * Real(0.5); - const Real y1 = y * (y + Real(1)) * Real(0.5); - const Real y2 = Real(1) - y * y; - const Real dx0 = x - Real(0.5); - const Real dx1 = x + Real(0.5); - const Real dx2 = Real(-2) * x; - const Real dy0 = y - Real(0.5); - const Real dy1 = y + Real(0.5); - const Real dy2 = Real(-2) * y; - - write_quad_order2_hessian_q(row0, output_stride, q, y0, dx0 * dy0, x0); - write_quad_order2_hessian_q(row1, output_stride, q, y0, dx1 * dy0, x1); - write_quad_order2_hessian_q(row2, output_stride, q, y1, dx1 * dy1, x1); - write_quad_order2_hessian_q(row3, output_stride, q, y1, dx0 * dy1, x0); - write_quad_order2_hessian_q(row4, output_stride, q, Real(-2) * y0, dx2 * dy0, x2); - write_quad_order2_hessian_q(row5, output_stride, q, y2, dx1 * dy2, Real(-2) * x1); - write_quad_order2_hessian_q(row6, output_stride, q, Real(-2) * y1, dx2 * dy1, x2); - write_quad_order2_hessian_q(row7, output_stride, q, y2, dx0 * dy2, Real(-2) * x0); - write_quad_order2_hessian_q(row8, output_stride, q, Real(-2) * y2, dx2 * dy2, Real(-2) * x2); - } -} - -inline void fill_order3_axis_values(Real x, Real* SVMP_RESTRICT values) { - const Real x2 = x * x; - values[0] = Real(-9.0 / 16.0) * (x - Real(1)) * (x2 - Real(1.0 / 9.0)); - values[1] = Real( 9.0 / 16.0) * (x + Real(1)) * (x2 - Real(1.0 / 9.0)); - values[2] = Real(27.0 / 16.0) * (x2 - Real(1)) * (x - Real(1.0 / 3.0)); - values[3] = Real(-27.0 / 16.0) * (x2 - Real(1)) * (x + Real(1.0 / 3.0)); -} - -inline void fill_order3_axis_value_scalars(Real x, - Real& v0, - Real& v1, - Real& v2, - Real& v3) { - const Real x2 = x * x; - v0 = Real(-9.0 / 16.0) * (x - Real(1)) * (x2 - Real(1.0 / 9.0)); - v1 = Real( 9.0 / 16.0) * (x + Real(1)) * (x2 - Real(1.0 / 9.0)); - v2 = Real(27.0 / 16.0) * (x2 - Real(1)) * (x - Real(1.0 / 3.0)); - v3 = Real(-27.0 / 16.0) * (x2 - Real(1)) * (x + Real(1.0 / 3.0)); -} - -void evaluate_line_order1_values_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - for (std::size_t q = 0; q < 4u; ++q) { - const Real x = points[q][0]; - row0[q] = (Real(1) - x) * Real(0.5); - row1[q] = (Real(1) + x) * Real(0.5); - } -} - -void evaluate_line_order2_values_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - for (std::size_t q = 0; q < 4u; ++q) { - const Real x = points[q][0]; - row0[q] = x * (x - Real(1)) * Real(0.5); - row1[q] = x * (x + Real(1)) * Real(0.5); - row2[q] = Real(1) - x * x; - } -} - -void evaluate_line_order3_values_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - for (std::size_t q = 0; q < 4u; ++q) { - Real values[4]; - fill_order3_axis_values(points[q][0], values); - row0[q] = values[0]; - row1[q] = values[1]; - row2[q] = values[2]; - row3[q] = values[3]; - } -} - -inline void fill_order3_axis_values_first(Real x, - Real* SVMP_RESTRICT values, - Real* SVMP_RESTRICT first); - -inline void fill_order3_axis_values_first_second(Real x, - Real* SVMP_RESTRICT values, - Real* SVMP_RESTRICT first, - Real* SVMP_RESTRICT second); - -inline void write_line_gradient_q4_row(Real* SVMP_RESTRICT row, - std::size_t output_stride, - Real g0, - Real g1, - Real g2, - Real g3) { - row[0] = g0; - row[1] = g1; - row[2] = g2; - row[3] = g3; - row[output_stride + 0u] = Real(0); - row[output_stride + 1u] = Real(0); - row[output_stride + 2u] = Real(0); - row[output_stride + 3u] = Real(0); - row[2u * output_stride + 0u] = Real(0); - row[2u * output_stride + 1u] = Real(0); - row[2u * output_stride + 2u] = Real(0); - row[2u * output_stride + 3u] = Real(0); -} - -inline void write_line_hessian_q4_row(Real* SVMP_RESTRICT row, - std::size_t output_stride, - Real h0, - Real h1, - Real h2, - Real h3) { - row[0] = h0; - row[1] = h1; - row[2] = h2; - row[3] = h3; - for (std::size_t component = 1u; component < 9u; ++component) { - Real* slot = row + component * output_stride; - slot[0] = Real(0); - slot[1] = Real(0); - slot[2] = Real(0); - slot[3] = Real(0); - } -} - -SVMP_LAGRANGE_NOINLINE void evaluate_line_order1_gradients_q4( - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - write_line_gradient_q4_row(gradients_out + 0u * 3u * output_stride, - output_stride, - Real(-0.5), Real(-0.5), Real(-0.5), Real(-0.5)); - write_line_gradient_q4_row(gradients_out + 1u * 3u * output_stride, - output_stride, - Real(0.5), Real(0.5), Real(0.5), Real(0.5)); -} - -SVMP_LAGRANGE_NOINLINE void evaluate_line_order1_hessians_q4( - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - write_line_hessian_q4_row(hessians_out + 0u * 9u * output_stride, - output_stride, Real(0), Real(0), Real(0), Real(0)); - write_line_hessian_q4_row(hessians_out + 1u * 9u * output_stride, - output_stride, Real(0), Real(0), Real(0), Real(0)); -} - -SVMP_LAGRANGE_NOINLINE void evaluate_line_order1_all_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - evaluate_line_order1_values_q4(points, output_stride, values_out); - evaluate_line_order1_gradients_q4(output_stride, gradients_out); - evaluate_line_order1_hessians_q4(output_stride, hessians_out); -} - -SVMP_LAGRANGE_NOINLINE void evaluate_line_order2_gradients_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - const Real x0 = points[0][0]; - const Real x1 = points[1][0]; - const Real x2 = points[2][0]; - const Real x3 = points[3][0]; - write_line_gradient_q4_row(gradients_out + 0u * 3u * output_stride, - output_stride, - x0 - Real(0.5), x1 - Real(0.5), - x2 - Real(0.5), x3 - Real(0.5)); - write_line_gradient_q4_row(gradients_out + 1u * 3u * output_stride, - output_stride, - x0 + Real(0.5), x1 + Real(0.5), - x2 + Real(0.5), x3 + Real(0.5)); - write_line_gradient_q4_row(gradients_out + 2u * 3u * output_stride, - output_stride, - Real(-2) * x0, Real(-2) * x1, - Real(-2) * x2, Real(-2) * x3); -} - -SVMP_LAGRANGE_NOINLINE void evaluate_line_order2_hessians_q4( - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - write_line_hessian_q4_row(hessians_out + 0u * 9u * output_stride, - output_stride, Real(1), Real(1), Real(1), Real(1)); - write_line_hessian_q4_row(hessians_out + 1u * 9u * output_stride, - output_stride, Real(1), Real(1), Real(1), Real(1)); - write_line_hessian_q4_row(hessians_out + 2u * 9u * output_stride, - output_stride, Real(-2), Real(-2), Real(-2), Real(-2)); -} - -SVMP_LAGRANGE_NOINLINE void evaluate_line_order2_all_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - evaluate_line_order2_values_q4(points, output_stride, values_out); - evaluate_line_order2_gradients_q4(points, output_stride, gradients_out); - evaluate_line_order2_hessians_q4(output_stride, hessians_out); -} - -SVMP_LAGRANGE_NOINLINE void evaluate_line_order3_gradients_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - Real first[4][4]; - for (std::size_t q = 0; q < 4u; ++q) { - Real values[4]; - fill_order3_axis_values_first(points[q][0], values, first[q]); - } - for (std::size_t node = 0; node < 4u; ++node) { - write_line_gradient_q4_row(gradients_out + node * 3u * output_stride, - output_stride, - first[0][node], first[1][node], - first[2][node], first[3][node]); - } -} - -SVMP_LAGRANGE_NOINLINE void evaluate_line_order3_hessians_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - Real second[4][4]; - for (std::size_t q = 0; q < 4u; ++q) { - Real values[4]; - Real first[4]; - fill_order3_axis_values_first_second(points[q][0], values, first, second[q]); - } - for (std::size_t node = 0; node < 4u; ++node) { - write_line_hessian_q4_row(hessians_out + node * 9u * output_stride, - output_stride, - second[0][node], second[1][node], - second[2][node], second[3][node]); - } -} - -SVMP_LAGRANGE_NOINLINE void evaluate_line_order3_all_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - Real values[4][4]; - Real first[4][4]; - Real second[4][4]; - for (std::size_t q = 0; q < 4u; ++q) { - fill_order3_axis_values_first_second(points[q][0], values[q], first[q], second[q]); - } - for (std::size_t node = 0; node < 4u; ++node) { - Real* value_row = values_out + node * output_stride; - value_row[0] = values[0][node]; - value_row[1] = values[1][node]; - value_row[2] = values[2][node]; - value_row[3] = values[3][node]; - write_line_gradient_q4_row(gradients_out + node * 3u * output_stride, - output_stride, - first[0][node], first[1][node], - first[2][node], first[3][node]); - write_line_hessian_q4_row(hessians_out + node * 9u * output_stride, - output_stride, - second[0][node], second[1][node], - second[2][node], second[3][node]); - } -} - -inline void fill_order3_axis_values_first(Real x, - Real* SVMP_RESTRICT values, - Real* SVMP_RESTRICT first) { - fill_order3_axis_values(x, values); - const Real x2 = x * x; - first[0] = Real(-9.0 / 16.0) * (Real(3) * x2 - Real(2) * x - Real(1.0 / 9.0)); - first[1] = Real( 9.0 / 16.0) * (Real(3) * x2 + Real(2) * x - Real(1.0 / 9.0)); - first[2] = Real(27.0 / 16.0) * (Real(3) * x2 - Real(2.0 / 3.0) * x - Real(1)); - first[3] = Real(-27.0 / 16.0) * (Real(3) * x2 + Real(2.0 / 3.0) * x - Real(1)); -} - -inline void fill_order3_axis_values_first_second(Real x, - Real* SVMP_RESTRICT values, - Real* SVMP_RESTRICT first, - Real* SVMP_RESTRICT second) { - fill_order3_axis_values_first(x, values, first); - second[0] = Real(-9.0 / 16.0) * (Real(6) * x - Real(2)); - second[1] = Real( 9.0 / 16.0) * (Real(6) * x + Real(2)); - second[2] = Real(27.0 / 16.0) * (Real(6) * x - Real(2.0 / 3.0)); - second[3] = Real(-27.0 / 16.0) * (Real(6) * x + Real(2.0 / 3.0)); -} - -inline void write_quad_order3_value_row_q4(Real* SVMP_RESTRICT row, - const Real lx[4][4], - const Real ly[4][4], - std::size_t i, - std::size_t j) { - row[0] = lx[0][i] * ly[0][j]; - row[1] = lx[1][i] * ly[1][j]; - row[2] = lx[2][i] * ly[2][j]; - row[3] = lx[3][i] * ly[3][j]; -} - -void evaluate_quad_order3_values_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - if (output_stride == 4u) { - Real* row0 = values_out + 0u * 4u; - Real* row1 = values_out + 1u * 4u; - Real* row2 = values_out + 2u * 4u; - Real* row3 = values_out + 3u * 4u; - Real* row4 = values_out + 4u * 4u; - Real* row5 = values_out + 5u * 4u; - Real* row6 = values_out + 6u * 4u; - Real* row7 = values_out + 7u * 4u; - Real* row8 = values_out + 8u * 4u; - Real* row9 = values_out + 9u * 4u; - Real* row10 = values_out + 10u * 4u; - Real* row11 = values_out + 11u * 4u; - Real* row12 = values_out + 12u * 4u; - Real* row13 = values_out + 13u * 4u; - Real* row14 = values_out + 14u * 4u; - Real* row15 = values_out + 15u * 4u; - - auto write_q = [&](std::size_t q) { - const auto& xi = points[q]; - Real x0; - Real x1; - Real x2; - Real x3; - Real y0; - Real y1; - Real y2; - Real y3; - fill_order3_axis_value_scalars(xi[0], x0, x1, x2, x3); - fill_order3_axis_value_scalars(xi[1], y0, y1, y2, y3); - row0[q] = x0 * y0; - row1[q] = x1 * y0; - row2[q] = x1 * y1; - row3[q] = x0 * y1; - row4[q] = x2 * y0; - row5[q] = x3 * y0; - row6[q] = x1 * y2; - row7[q] = x1 * y3; - row8[q] = x3 * y1; - row9[q] = x2 * y1; - row10[q] = x0 * y3; - row11[q] = x0 * y2; - row12[q] = x2 * y2; - row13[q] = x3 * y2; - row14[q] = x2 * y3; - row15[q] = x3 * y3; - }; - - write_q(0u); - write_q(1u); - write_q(2u); - write_q(3u); - return; - } - - Real lx[4][4]; - Real ly[4][4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order3_axis_values(xi[0], lx[q]); - fill_order3_axis_values(xi[1], ly[q]); - } - - write_quad_order3_value_row_q4(values_out + 0u * output_stride, lx, ly, 0u, 0u); - write_quad_order3_value_row_q4(values_out + 1u * output_stride, lx, ly, 1u, 0u); - write_quad_order3_value_row_q4(values_out + 2u * output_stride, lx, ly, 1u, 1u); - write_quad_order3_value_row_q4(values_out + 3u * output_stride, lx, ly, 0u, 1u); - write_quad_order3_value_row_q4(values_out + 4u * output_stride, lx, ly, 2u, 0u); - write_quad_order3_value_row_q4(values_out + 5u * output_stride, lx, ly, 3u, 0u); - write_quad_order3_value_row_q4(values_out + 6u * output_stride, lx, ly, 1u, 2u); - write_quad_order3_value_row_q4(values_out + 7u * output_stride, lx, ly, 1u, 3u); - write_quad_order3_value_row_q4(values_out + 8u * output_stride, lx, ly, 3u, 1u); - write_quad_order3_value_row_q4(values_out + 9u * output_stride, lx, ly, 2u, 1u); - write_quad_order3_value_row_q4(values_out + 10u * output_stride, lx, ly, 0u, 3u); - write_quad_order3_value_row_q4(values_out + 11u * output_stride, lx, ly, 0u, 2u); - write_quad_order3_value_row_q4(values_out + 12u * output_stride, lx, ly, 2u, 2u); - write_quad_order3_value_row_q4(values_out + 13u * output_stride, lx, ly, 3u, 2u); - write_quad_order3_value_row_q4(values_out + 14u * output_stride, lx, ly, 2u, 3u); - write_quad_order3_value_row_q4(values_out + 15u * output_stride, lx, ly, 3u, 3u); -} - -void evaluate_quad_order3_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - if (points.size() == 4u) { - evaluate_quad_order3_values_q4(points, output_stride, values_out); - return; - } - - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - Real* row4 = values_out + 4u * output_stride; - Real* row5 = values_out + 5u * output_stride; - Real* row6 = values_out + 6u * output_stride; - Real* row7 = values_out + 7u * output_stride; - Real* row8 = values_out + 8u * output_stride; - Real* row9 = values_out + 9u * output_stride; - Real* row10 = values_out + 10u * output_stride; - Real* row11 = values_out + 11u * output_stride; - Real* row12 = values_out + 12u * output_stride; - Real* row13 = values_out + 13u * output_stride; - Real* row14 = values_out + 14u * output_stride; - Real* row15 = values_out + 15u * output_stride; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - Real lx[4]; - Real ly[4]; - fill_order3_axis_values(xi[0], lx); - fill_order3_axis_values(xi[1], ly); - row0[q] = lx[0] * ly[0]; - row1[q] = lx[1] * ly[0]; - row2[q] = lx[1] * ly[1]; - row3[q] = lx[0] * ly[1]; - row4[q] = lx[2] * ly[0]; - row5[q] = lx[3] * ly[0]; - row6[q] = lx[1] * ly[2]; - row7[q] = lx[1] * ly[3]; - row8[q] = lx[3] * ly[1]; - row9[q] = lx[2] * ly[1]; - row10[q] = lx[0] * ly[3]; - row11[q] = lx[0] * ly[2]; - row12[q] = lx[2] * ly[2]; - row13[q] = lx[3] * ly[2]; - row14[q] = lx[2] * ly[3]; - row15[q] = lx[3] * ly[3]; - } -} - -template -inline void write_quad_gradient_row_q4( - Real* SVMP_RESTRICT row, - std::size_t output_stride, - const Real (&lx)[4][N], - const Real (&ly)[4][N], - const Real (&dx)[4][N], - const Real (&dy)[4][N], - std::size_t i, - std::size_t j) { - row[0u] = dx[0][i] * ly[0][j]; - row[1u] = dx[1][i] * ly[1][j]; - row[2u] = dx[2][i] * ly[2][j]; - row[3u] = dx[3][i] * ly[3][j]; - row[output_stride + 0u] = lx[0][i] * dy[0][j]; - row[output_stride + 1u] = lx[1][i] * dy[1][j]; - row[output_stride + 2u] = lx[2][i] * dy[2][j]; - row[output_stride + 3u] = lx[3][i] * dy[3][j]; - row[2u * output_stride + 0u] = Real(0); - row[2u * output_stride + 1u] = Real(0); - row[2u * output_stride + 2u] = Real(0); - row[2u * output_stride + 3u] = Real(0); -} - -inline void fill_order4_axis_values_first(Real x, - Real* SVMP_RESTRICT values, - Real* SVMP_RESTRICT first) { - const Real r = (x + Real(1)) * Real(2); - const Real r2 = r * r; - const Real r3 = r2 * r; - const Real f0 = r; - const Real f1 = r - Real(1); - const Real f2 = r - Real(2); - const Real f3 = r - Real(3); - const Real f4 = r - Real(4); - const Real f01 = f0 * f1; - const Real f12 = f1 * f2; - const Real f23 = f2 * f3; - const Real f34 = f3 * f4; - - values[0] = (f12 * f34) / Real(24); - values[1] = (f01 * f23) / Real(24); - values[2] = -(f0 * f2 * f34) / Real(6); - values[3] = (f01 * f34) / Real(4); - values[4] = -(f01 * f2 * f4) / Real(6); - - first[0] = (Real(4) * r3 - Real(30) * r2 + Real(70) * r - Real(50)) / Real(12); - first[1] = (Real(4) * r3 - Real(18) * r2 + Real(22) * r - Real(6)) / Real(12); - first[2] = (-Real(4) * r3 + Real(27) * r2 - Real(52) * r + Real(24)) / Real(3); - first[3] = Real(2) * r3 - Real(12) * r2 + Real(19) * r - Real(6); - first[4] = (-Real(4) * r3 + Real(21) * r2 - Real(28) * r + Real(8)) / Real(3); -} - -SVMP_LAGRANGE_NOINLINE void evaluate_quad_order3_gradients_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - constexpr auto node_axes = detail::make_quad_tensor_node_axes<3>(); - - Real lx[4][4]; - Real ly[4][4]; - Real dx[4][4]; - Real dy[4][4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order3_axis_values_first(xi[0], lx[q], dx[q]); - fill_order3_axis_values_first(xi[1], ly[q], dy[q]); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - write_quad_gradient_row_q4( - gradients_out + node * 3u * output_stride, - output_stride, - lx, - ly, - dx, - dy, - axes[0], - axes[1]); - } -} - -SVMP_LAGRANGE_NOINLINE void evaluate_quad_order4_gradients_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - constexpr auto node_axes = detail::make_quad_tensor_node_axes<4>(); - - Real lx[4][5]; - Real ly[4][5]; - Real dx[4][5]; - Real dy[4][5]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order4_axis_values_first(xi[0], lx[q], dx[q]); - fill_order4_axis_values_first(xi[1], ly[q], dy[q]); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - write_quad_gradient_row_q4( - gradients_out + node * 3u * output_stride, - output_stride, - lx, - ly, - dx, - dy, - axes[0], - axes[1]); - } -} - -void evaluate_quad_order3_gradients_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - if (points.size() == 4u) { - evaluate_quad_order3_gradients_q4(points, output_stride, gradients_out); - return; - } - - Real* row0 = gradients_out + 0u * 3u * output_stride; - Real* row1 = gradients_out + 1u * 3u * output_stride; - Real* row2 = gradients_out + 2u * 3u * output_stride; - Real* row3 = gradients_out + 3u * 3u * output_stride; - Real* row4 = gradients_out + 4u * 3u * output_stride; - Real* row5 = gradients_out + 5u * 3u * output_stride; - Real* row6 = gradients_out + 6u * 3u * output_stride; - Real* row7 = gradients_out + 7u * 3u * output_stride; - Real* row8 = gradients_out + 8u * 3u * output_stride; - Real* row9 = gradients_out + 9u * 3u * output_stride; - Real* row10 = gradients_out + 10u * 3u * output_stride; - Real* row11 = gradients_out + 11u * 3u * output_stride; - Real* row12 = gradients_out + 12u * 3u * output_stride; - Real* row13 = gradients_out + 13u * 3u * output_stride; - Real* row14 = gradients_out + 14u * 3u * output_stride; - Real* row15 = gradients_out + 15u * 3u * output_stride; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - Real lx[4]; - Real ly[4]; - Real dx[4]; - Real dy[4]; - fill_order3_axis_values_first(xi[0], lx, dx); - fill_order3_axis_values_first(xi[1], ly, dy); - write_quad_order2_gradient_q(row0, output_stride, q, dx[0] * ly[0], lx[0] * dy[0]); - write_quad_order2_gradient_q(row1, output_stride, q, dx[1] * ly[0], lx[1] * dy[0]); - write_quad_order2_gradient_q(row2, output_stride, q, dx[1] * ly[1], lx[1] * dy[1]); - write_quad_order2_gradient_q(row3, output_stride, q, dx[0] * ly[1], lx[0] * dy[1]); - write_quad_order2_gradient_q(row4, output_stride, q, dx[2] * ly[0], lx[2] * dy[0]); - write_quad_order2_gradient_q(row5, output_stride, q, dx[3] * ly[0], lx[3] * dy[0]); - write_quad_order2_gradient_q(row6, output_stride, q, dx[1] * ly[2], lx[1] * dy[2]); - write_quad_order2_gradient_q(row7, output_stride, q, dx[1] * ly[3], lx[1] * dy[3]); - write_quad_order2_gradient_q(row8, output_stride, q, dx[3] * ly[1], lx[3] * dy[1]); - write_quad_order2_gradient_q(row9, output_stride, q, dx[2] * ly[1], lx[2] * dy[1]); - write_quad_order2_gradient_q(row10, output_stride, q, dx[0] * ly[3], lx[0] * dy[3]); - write_quad_order2_gradient_q(row11, output_stride, q, dx[0] * ly[2], lx[0] * dy[2]); - write_quad_order2_gradient_q(row12, output_stride, q, dx[2] * ly[2], lx[2] * dy[2]); - write_quad_order2_gradient_q(row13, output_stride, q, dx[3] * ly[2], lx[3] * dy[2]); - write_quad_order2_gradient_q(row14, output_stride, q, dx[2] * ly[3], lx[2] * dy[3]); - write_quad_order2_gradient_q(row15, output_stride, q, dx[3] * ly[3], lx[3] * dy[3]); - } -} - -void evaluate_quad_order3_hessians_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - Real* row0 = hessians_out + 0u * 9u * output_stride; - Real* row1 = hessians_out + 1u * 9u * output_stride; - Real* row2 = hessians_out + 2u * 9u * output_stride; - Real* row3 = hessians_out + 3u * 9u * output_stride; - Real* row4 = hessians_out + 4u * 9u * output_stride; - Real* row5 = hessians_out + 5u * 9u * output_stride; - Real* row6 = hessians_out + 6u * 9u * output_stride; - Real* row7 = hessians_out + 7u * 9u * output_stride; - Real* row8 = hessians_out + 8u * 9u * output_stride; - Real* row9 = hessians_out + 9u * 9u * output_stride; - Real* row10 = hessians_out + 10u * 9u * output_stride; - Real* row11 = hessians_out + 11u * 9u * output_stride; - Real* row12 = hessians_out + 12u * 9u * output_stride; - Real* row13 = hessians_out + 13u * 9u * output_stride; - Real* row14 = hessians_out + 14u * 9u * output_stride; - Real* row15 = hessians_out + 15u * 9u * output_stride; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - Real lx[4]; - Real ly[4]; - Real dx[4]; - Real dy[4]; - Real hx[4]; - Real hy[4]; - fill_order3_axis_values_first_second(xi[0], lx, dx, hx); - fill_order3_axis_values_first_second(xi[1], ly, dy, hy); - write_quad_order2_hessian_q(row0, output_stride, q, hx[0] * ly[0], dx[0] * dy[0], lx[0] * hy[0]); - write_quad_order2_hessian_q(row1, output_stride, q, hx[1] * ly[0], dx[1] * dy[0], lx[1] * hy[0]); - write_quad_order2_hessian_q(row2, output_stride, q, hx[1] * ly[1], dx[1] * dy[1], lx[1] * hy[1]); - write_quad_order2_hessian_q(row3, output_stride, q, hx[0] * ly[1], dx[0] * dy[1], lx[0] * hy[1]); - write_quad_order2_hessian_q(row4, output_stride, q, hx[2] * ly[0], dx[2] * dy[0], lx[2] * hy[0]); - write_quad_order2_hessian_q(row5, output_stride, q, hx[3] * ly[0], dx[3] * dy[0], lx[3] * hy[0]); - write_quad_order2_hessian_q(row6, output_stride, q, hx[1] * ly[2], dx[1] * dy[2], lx[1] * hy[2]); - write_quad_order2_hessian_q(row7, output_stride, q, hx[1] * ly[3], dx[1] * dy[3], lx[1] * hy[3]); - write_quad_order2_hessian_q(row8, output_stride, q, hx[3] * ly[1], dx[3] * dy[1], lx[3] * hy[1]); - write_quad_order2_hessian_q(row9, output_stride, q, hx[2] * ly[1], dx[2] * dy[1], lx[2] * hy[1]); - write_quad_order2_hessian_q(row10, output_stride, q, hx[0] * ly[3], dx[0] * dy[3], lx[0] * hy[3]); - write_quad_order2_hessian_q(row11, output_stride, q, hx[0] * ly[2], dx[0] * dy[2], lx[0] * hy[2]); - write_quad_order2_hessian_q(row12, output_stride, q, hx[2] * ly[2], dx[2] * dy[2], lx[2] * hy[2]); - write_quad_order2_hessian_q(row13, output_stride, q, hx[3] * ly[2], dx[3] * dy[2], lx[3] * hy[2]); - write_quad_order2_hessian_q(row14, output_stride, q, hx[2] * ly[3], dx[2] * dy[3], lx[2] * hy[3]); - write_quad_order2_hessian_q(row15, output_stride, q, hx[3] * ly[3], dx[3] * dy[3], lx[3] * hy[3]); - } -} - -template -inline void write_quad_order3_all_q4( - std::size_t output_stride, - std::size_t i, - std::size_t j, - const Real lx[4][4], - const Real ly[4][4], - const Real dx[4][4], - const Real dy[4][4], - const Real hx[4][4], - const Real hy[4][4], - Real* SVMP_RESTRICT value_row, - Real* SVMP_RESTRICT grad_row, - Real* SVMP_RESTRICT hess_row) { - const Real xv = lx[Q][i]; - const Real yv = ly[Q][j]; - const Real xd = dx[Q][i]; - const Real yd = dy[Q][j]; - const Real hxy = xd * yd; - - value_row[Q] = xv * yv; - grad_row[0u * output_stride + Q] = xd * yv; - grad_row[1u * output_stride + Q] = xv * yd; - grad_row[2u * output_stride + Q] = Real(0); - hess_row[0u * output_stride + Q] = hx[Q][i] * yv; - hess_row[4u * output_stride + Q] = xv * hy[Q][j]; - hess_row[8u * output_stride + Q] = Real(0); - hess_row[1u * output_stride + Q] = hxy; - hess_row[3u * output_stride + Q] = hxy; - hess_row[2u * output_stride + Q] = Real(0); - hess_row[6u * output_stride + Q] = Real(0); - hess_row[5u * output_stride + Q] = Real(0); - hess_row[7u * output_stride + Q] = Real(0); -} - -void evaluate_quad_order3_all_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - constexpr auto node_axes = detail::make_quad_tensor_node_axes<3>(); - - Real lx[4][4]; - Real ly[4][4]; - Real dx[4][4]; - Real dy[4][4]; - Real hx[4][4]; - Real hy[4][4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order3_axis_values_first_second(xi[0], lx[q], dx[q], hx[q]); - fill_order3_axis_values_first_second(xi[1], ly[q], dy[q], hy[q]); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - const std::size_t i = axes[0]; - const std::size_t j = axes[1]; - Real* value_row = values_out + node * output_stride; - Real* grad_row = gradients_out + node * 3u * output_stride; - Real* hess_row = hessians_out + node * 9u * output_stride; - write_quad_order3_all_q4<0u>( - output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); - write_quad_order3_all_q4<1u>( - output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); - write_quad_order3_all_q4<2u>( - output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); - write_quad_order3_all_q4<3u>( - output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); - } -} - -void evaluate_hex_order3_values_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - constexpr auto node_axes = detail::make_hex_tensor_node_axes<3>(); - - Real lx[4][4]; - Real ly[4][4]; - Real lz[4][4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order3_axis_values(xi[0], lx[q]); - fill_order3_axis_values(xi[1], ly[q]); - fill_order3_axis_values(xi[2], lz[q]); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - const std::size_t i = axes[0]; - const std::size_t j = axes[1]; - const std::size_t k = axes[2]; - Real* row = values_out + node * output_stride; - row[0] = lx[0][i] * ly[0][j] * lz[0][k]; - row[1] = lx[1][i] * ly[1][j] * lz[1][k]; - row[2] = lx[2][i] * ly[2][j] * lz[2][k]; - row[3] = lx[3][i] * ly[3][j] * lz[3][k]; - } -} - -void evaluate_hex_order3_gradients_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - constexpr auto node_axes = detail::make_hex_tensor_node_axes<3>(); - - Real lx[4][4]; - Real ly[4][4]; - Real lz[4][4]; - Real dx[4][4]; - Real dy[4][4]; - Real dz[4][4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order3_axis_values_first(xi[0], lx[q], dx[q]); - fill_order3_axis_values_first(xi[1], ly[q], dy[q]); - fill_order3_axis_values_first(xi[2], lz[q], dz[q]); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - const std::size_t i = axes[0]; - const std::size_t j = axes[1]; - const std::size_t k = axes[2]; - Real* row = gradients_out + node * 3u * output_stride; - row[0] = dx[0][i] * ly[0][j] * lz[0][k]; - row[1] = dx[1][i] * ly[1][j] * lz[1][k]; - row[2] = dx[2][i] * ly[2][j] * lz[2][k]; - row[3] = dx[3][i] * ly[3][j] * lz[3][k]; - row[output_stride + 0u] = lx[0][i] * dy[0][j] * lz[0][k]; - row[output_stride + 1u] = lx[1][i] * dy[1][j] * lz[1][k]; - row[output_stride + 2u] = lx[2][i] * dy[2][j] * lz[2][k]; - row[output_stride + 3u] = lx[3][i] * dy[3][j] * lz[3][k]; - row[2u * output_stride + 0u] = lx[0][i] * ly[0][j] * dz[0][k]; - row[2u * output_stride + 1u] = lx[1][i] * ly[1][j] * dz[1][k]; - row[2u * output_stride + 2u] = lx[2][i] * ly[2][j] * dz[2][k]; - row[2u * output_stride + 3u] = lx[3][i] * ly[3][j] * dz[3][k]; - } -} - -template -inline void write_hex_order3_q4_hessian_outputs( - std::size_t output_stride, - std::size_t i, - std::size_t j, - std::size_t k, - const Real lx[4][4], - const Real ly[4][4], - const Real lz[4][4], - const Real dx[4][4], - const Real dy[4][4], - const Real dz[4][4], - const Real hx[4][4], - const Real hy[4][4], - const Real hz[4][4], - Real* SVMP_RESTRICT value_row, - Real* SVMP_RESTRICT grad_row, - Real* SVMP_RESTRICT hess_row) { - const Real xv = lx[Q][i]; - const Real yv = ly[Q][j]; - const Real zv = lz[Q][k]; - const Real yz = yv * zv; - - if constexpr (WriteValue) { - value_row[Q] = xv * yz; - } - - const Real xd = dx[Q][i]; - const Real yd = dy[Q][j]; - const Real zd = dz[Q][k]; - const Real yd_z = yd * zv; - const Real yv_zd = yv * zd; - - if constexpr (WriteGradient) { - grad_row[0u * output_stride + Q] = xd * yz; - grad_row[1u * output_stride + Q] = xv * yd_z; - grad_row[2u * output_stride + Q] = xv * yv_zd; - } - - const Real hxy = xd * yd_z; - const Real hxz = xd * yv_zd; - const Real hyz = xv * yd * zd; - hess_row[0u * output_stride + Q] = hx[Q][i] * yz; - hess_row[4u * output_stride + Q] = xv * hy[Q][j] * zv; - hess_row[8u * output_stride + Q] = xv * yv * hz[Q][k]; - hess_row[1u * output_stride + Q] = hxy; - hess_row[3u * output_stride + Q] = hxy; - hess_row[2u * output_stride + Q] = hxz; - hess_row[6u * output_stride + Q] = hxz; - hess_row[5u * output_stride + Q] = hyz; - hess_row[7u * output_stride + Q] = hyz; -} - -template -void evaluate_hex_order3_q4_hessian_outputs( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - constexpr auto node_axes = detail::make_hex_tensor_node_axes<3>(); - - Real lx[4][4]; - Real ly[4][4]; - Real lz[4][4]; - Real dx[4][4]; - Real dy[4][4]; - Real dz[4][4]; - Real hx[4][4]; - Real hy[4][4]; - Real hz[4][4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order3_axis_values_first_second(xi[0], lx[q], dx[q], hx[q]); - fill_order3_axis_values_first_second(xi[1], ly[q], dy[q], hy[q]); - fill_order3_axis_values_first_second(xi[2], lz[q], dz[q], hz[q]); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - const std::size_t i = axes[0]; - const std::size_t j = axes[1]; - const std::size_t k = axes[2]; - Real* value_row = values_out ? values_out + node * output_stride : nullptr; - Real* grad_row = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; - Real* hess_row = hessians_out + node * 9u * output_stride; - write_hex_order3_q4_hessian_outputs<0u, WriteValue, WriteGradient>( - output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, - value_row, grad_row, hess_row); - write_hex_order3_q4_hessian_outputs<1u, WriteValue, WriteGradient>( - output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, - value_row, grad_row, hess_row); - write_hex_order3_q4_hessian_outputs<2u, WriteValue, WriteGradient>( - output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, - value_row, grad_row, hess_row); - write_hex_order3_q4_hessian_outputs<3u, WriteValue, WriteGradient>( - output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, - value_row, grad_row, hess_row); - } -} - -void evaluate_hex_order3_hessians_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - evaluate_hex_order3_q4_hessian_outputs( - points, output_stride, nullptr, nullptr, hessians_out); -} - -void evaluate_hex_order3_all_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - evaluate_hex_order3_q4_hessian_outputs( - points, output_stride, values_out, gradients_out, hessians_out); -} - -void evaluate_hex_order2_values_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - Real* row4 = values_out + 4u * output_stride; - Real* row5 = values_out + 5u * output_stride; - Real* row6 = values_out + 6u * output_stride; - Real* row7 = values_out + 7u * output_stride; - Real* row8 = values_out + 8u * output_stride; - Real* row9 = values_out + 9u * output_stride; - Real* row10 = values_out + 10u * output_stride; - Real* row11 = values_out + 11u * output_stride; - Real* row12 = values_out + 12u * output_stride; - Real* row13 = values_out + 13u * output_stride; - Real* row14 = values_out + 14u * output_stride; - Real* row15 = values_out + 15u * output_stride; - Real* row16 = values_out + 16u * output_stride; - Real* row17 = values_out + 17u * output_stride; - Real* row18 = values_out + 18u * output_stride; - Real* row19 = values_out + 19u * output_stride; - Real* row20 = values_out + 20u * output_stride; - Real* row21 = values_out + 21u * output_stride; - Real* row22 = values_out + 22u * output_stride; - Real* row23 = values_out + 23u * output_stride; - Real* row24 = values_out + 24u * output_stride; - Real* row25 = values_out + 25u * output_stride; - Real* row26 = values_out + 26u * output_stride; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real x = xi[0]; - const Real y = xi[1]; - const Real z = xi[2]; - const Real x0 = x * (x - Real(1)) * Real(0.5); - const Real x1 = x * (x + Real(1)) * Real(0.5); - const Real x2 = Real(1) - x * x; - const Real y0 = y * (y - Real(1)) * Real(0.5); - const Real y1 = y * (y + Real(1)) * Real(0.5); - const Real y2 = Real(1) - y * y; - const Real z0 = z * (z - Real(1)) * Real(0.5); - const Real z1 = z * (z + Real(1)) * Real(0.5); - const Real z2 = Real(1) - z * z; - const Real x0y0 = x0 * y0; - const Real x1y0 = x1 * y0; - const Real x1y1 = x1 * y1; - const Real x0y1 = x0 * y1; - const Real x2y0 = x2 * y0; - const Real x1y2 = x1 * y2; - const Real x2y1 = x2 * y1; - const Real x0y2 = x0 * y2; - const Real x2y2 = x2 * y2; - - row0[q] = x0y0 * z0; - row1[q] = x1y0 * z0; - row2[q] = x1y1 * z0; - row3[q] = x0y1 * z0; - row4[q] = x0y0 * z1; - row5[q] = x1y0 * z1; - row6[q] = x1y1 * z1; - row7[q] = x0y1 * z1; - row8[q] = x2y0 * z0; - row9[q] = x1y2 * z0; - row10[q] = x2y1 * z0; - row11[q] = x0y2 * z0; - row12[q] = x2y0 * z1; - row13[q] = x1y2 * z1; - row14[q] = x2y1 * z1; - row15[q] = x0y2 * z1; - row16[q] = x0y0 * z2; - row17[q] = x1y0 * z2; - row18[q] = x1y1 * z2; - row19[q] = x0y1 * z2; - row20[q] = x2y2 * z0; - row21[q] = x2y2 * z1; - row22[q] = x2y0 * z2; - row23[q] = x1y2 * z2; - row24[q] = x2y1 * z2; - row25[q] = x0y2 * z2; - row26[q] = x2y2 * z2; - } -} - -inline void fill_order2_axis_values_first(Real x, - Real* SVMP_RESTRICT values, - Real* SVMP_RESTRICT first) { - values[0] = x * (x - Real(1)) * Real(0.5); - values[1] = x * (x + Real(1)) * Real(0.5); - values[2] = Real(1) - x * x; - first[0] = x - Real(0.5); - first[1] = x + Real(0.5); - first[2] = Real(-2) * x; -} - -inline void fill_order2_axis_values_first_second(Real x, - Real* SVMP_RESTRICT values, - Real* SVMP_RESTRICT first, - Real* SVMP_RESTRICT second) { - fill_order2_axis_values_first(x, values, first); - second[0] = Real(1); - second[1] = Real(1); - second[2] = Real(-2); -} - -template -inline void write_hex_order2_hessian_q4( - std::size_t output_stride, - std::size_t i, - std::size_t j, - std::size_t k, - const Real lx[4][3], - const Real ly[4][3], - const Real lz[4][3], - const Real dx[4][3], - const Real dy[4][3], - const Real dz[4][3], - const Real hx[4][3], - const Real hy[4][3], - const Real hz[4][3], - Real* SVMP_RESTRICT hess_row) { - const Real xv = lx[Q][i]; - const Real yv = ly[Q][j]; - const Real zv = lz[Q][k]; - const Real yz = yv * zv; - const Real xd = dx[Q][i]; - const Real yd = dy[Q][j]; - const Real zd = dz[Q][k]; - const Real yd_z = yd * zv; - const Real yv_zd = yv * zd; - const Real hxy = xd * yd_z; - const Real hxz = xd * yv_zd; - const Real hyz = xv * yd * zd; - hess_row[0u * output_stride + Q] = hx[Q][i] * yz; - hess_row[4u * output_stride + Q] = xv * hy[Q][j] * zv; - hess_row[8u * output_stride + Q] = xv * yv * hz[Q][k]; - hess_row[1u * output_stride + Q] = hxy; - hess_row[3u * output_stride + Q] = hxy; - hess_row[2u * output_stride + Q] = hxz; - hess_row[6u * output_stride + Q] = hxz; - hess_row[5u * output_stride + Q] = hyz; - hess_row[7u * output_stride + Q] = hyz; -} - -void evaluate_hex_order2_hessians_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - constexpr auto node_axes = detail::make_hex_tensor_node_axes<2>(); - - Real lx[4][3]; - Real ly[4][3]; - Real lz[4][3]; - Real dx[4][3]; - Real dy[4][3]; - Real dz[4][3]; - Real hx[4][3]; - Real hy[4][3]; - Real hz[4][3]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order2_axis_values_first_second(xi[0], lx[q], dx[q], hx[q]); - fill_order2_axis_values_first_second(xi[1], ly[q], dy[q], hy[q]); - fill_order2_axis_values_first_second(xi[2], lz[q], dz[q], hz[q]); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - const std::size_t i = axes[0]; - const std::size_t j = axes[1]; - const std::size_t k = axes[2]; - Real* hess_row = hessians_out + node * 9u * output_stride; - write_hex_order2_hessian_q4<0u>( - output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, hess_row); - write_hex_order2_hessian_q4<1u>( - output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, hess_row); - write_hex_order2_hessian_q4<2u>( - output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, hess_row); - write_hex_order2_hessian_q4<3u>( - output_stride, i, j, k, lx, ly, lz, dx, dy, dz, hx, hy, hz, hess_row); - } -} - -template -inline void write_quad_order2_all_q4( - std::size_t output_stride, - std::size_t i, - std::size_t j, - const Real lx[4][3], - const Real ly[4][3], - const Real dx[4][3], - const Real dy[4][3], - const Real hx[4][3], - const Real hy[4][3], - Real* SVMP_RESTRICT value_row, - Real* SVMP_RESTRICT grad_row, - Real* SVMP_RESTRICT hess_row) { - const Real xv = lx[Q][i]; - const Real yv = ly[Q][j]; - const Real xd = dx[Q][i]; - const Real yd = dy[Q][j]; - const Real hxy = xd * yd; - - value_row[Q] = xv * yv; - grad_row[0u * output_stride + Q] = xd * yv; - grad_row[1u * output_stride + Q] = xv * yd; - grad_row[2u * output_stride + Q] = Real(0); - hess_row[0u * output_stride + Q] = hx[Q][i] * yv; - hess_row[4u * output_stride + Q] = xv * hy[Q][j]; - hess_row[8u * output_stride + Q] = Real(0); - hess_row[1u * output_stride + Q] = hxy; - hess_row[3u * output_stride + Q] = hxy; - hess_row[2u * output_stride + Q] = Real(0); - hess_row[6u * output_stride + Q] = Real(0); - hess_row[5u * output_stride + Q] = Real(0); - hess_row[7u * output_stride + Q] = Real(0); -} - -void evaluate_quad_order2_all_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - constexpr auto node_axes = detail::make_quad_tensor_node_axes<2>(); - - Real lx[4][3]; - Real ly[4][3]; - Real dx[4][3]; - Real dy[4][3]; - Real hx[4][3]; - Real hy[4][3]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order2_axis_values_first_second(xi[0], lx[q], dx[q], hx[q]); - fill_order2_axis_values_first_second(xi[1], ly[q], dy[q], hy[q]); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - const std::size_t i = axes[0]; - const std::size_t j = axes[1]; - Real* value_row = values_out + node * output_stride; - Real* grad_row = gradients_out + node * 3u * output_stride; - Real* hess_row = hessians_out + node * 9u * output_stride; - write_quad_order2_all_q4<0u>( - output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); - write_quad_order2_all_q4<1u>( - output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); - write_quad_order2_all_q4<2u>( - output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); - write_quad_order2_all_q4<3u>( - output_stride, i, j, lx, ly, dx, dy, hx, hy, value_row, grad_row, hess_row); - } -} - -void evaluate_hex_order2_gradients_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - constexpr std::array, 27> node_axes = {{ - {{0u, 0u, 0u}}, {{1u, 0u, 0u}}, {{1u, 1u, 0u}}, {{0u, 1u, 0u}}, - {{0u, 0u, 1u}}, {{1u, 0u, 1u}}, {{1u, 1u, 1u}}, {{0u, 1u, 1u}}, - {{2u, 0u, 0u}}, {{1u, 2u, 0u}}, {{2u, 1u, 0u}}, {{0u, 2u, 0u}}, - {{2u, 0u, 1u}}, {{1u, 2u, 1u}}, {{2u, 1u, 1u}}, {{0u, 2u, 1u}}, - {{0u, 0u, 2u}}, {{1u, 0u, 2u}}, {{1u, 1u, 2u}}, {{0u, 1u, 2u}}, - {{2u, 2u, 0u}}, {{2u, 2u, 1u}}, {{2u, 0u, 2u}}, {{1u, 2u, 2u}}, - {{2u, 1u, 2u}}, {{0u, 2u, 2u}}, {{2u, 2u, 2u}}, - }}; - - Real lx[4][3]; - Real ly[4][3]; - Real lz[4][3]; - Real dx[4][3]; - Real dy[4][3]; - Real dz[4][3]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - fill_order2_axis_values_first(xi[0], lx[q], dx[q]); - fill_order2_axis_values_first(xi[1], ly[q], dy[q]); - fill_order2_axis_values_first(xi[2], lz[q], dz[q]); - } - - for (std::size_t node = 0; node < node_axes.size(); ++node) { - const auto& axes = node_axes[node]; - const std::size_t i = axes[0]; - const std::size_t j = axes[1]; - const std::size_t k = axes[2]; - Real* row = gradients_out + node * 3u * output_stride; - row[0] = dx[0][i] * ly[0][j] * lz[0][k]; - row[1] = dx[1][i] * ly[1][j] * lz[1][k]; - row[2] = dx[2][i] * ly[2][j] * lz[2][k]; - row[3] = dx[3][i] * ly[3][j] * lz[3][k]; - row[output_stride + 0u] = lx[0][i] * dy[0][j] * lz[0][k]; - row[output_stride + 1u] = lx[1][i] * dy[1][j] * lz[1][k]; - row[output_stride + 2u] = lx[2][i] * dy[2][j] * lz[2][k]; - row[output_stride + 3u] = lx[3][i] * dy[3][j] * lz[3][k]; - row[2u * output_stride + 0u] = lx[0][i] * ly[0][j] * dz[0][k]; - row[2u * output_stride + 1u] = lx[1][i] * ly[1][j] * dz[1][k]; - row[2u * output_stride + 2u] = lx[2][i] * ly[2][j] * dz[2][k]; - row[2u * output_stride + 3u] = lx[3][i] * ly[3][j] * dz[3][k]; - } -} - -template -void evaluate_constant_fast_hessians_strided( - std::size_t num_qpts, - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - std::array fast_hessians{}; - FastBasis::evaluate_hessians(math::Vector{}, fast_hessians); - for (std::size_t i = 0; i < fast_hessians.size(); ++i) { - const Hessian& hessian = fast_hessians[i]; - Real* H = hessians_out + i * 9u * output_stride; - const Real h00 = hessian(0, 0); - const Real h01 = hessian(0, 1); - const Real h02 = hessian(0, 2); - const Real h10 = hessian(1, 0); - const Real h11 = hessian(1, 1); - const Real h12 = hessian(1, 2); - const Real h20 = hessian(2, 0); - const Real h21 = hessian(2, 1); - const Real h22 = hessian(2, 2); - for (std::size_t q = 0; q < num_qpts; ++q) { - H[0u * output_stride + q] = h00; - H[1u * output_stride + q] = h01; - H[2u * output_stride + q] = h02; - H[3u * output_stride + q] = h10; - H[4u * output_stride + q] = h11; - H[5u * output_stride + q] = h12; - H[6u * output_stride + q] = h20; - H[7u * output_stride + q] = h21; - H[8u * output_stride + q] = h22; - } - } -} - -template -void evaluate_fast_outputs_with_constant_hessians_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - if (values_out != nullptr) { - std::array fast_values{}; - FastBasis::evaluate(xi, fast_values); - for (std::size_t i = 0; i < fast_values.size(); ++i) { - values_out[i * output_stride + q] = fast_values[i]; - } - } - if (gradients_out != nullptr) { - std::array fast_gradients{}; - FastBasis::evaluate_gradients(xi, fast_gradients); - for (std::size_t i = 0; i < fast_gradients.size(); ++i) { - Real* g = gradients_out + i * 3u * output_stride; - g[0u * output_stride + q] = fast_gradients[i][0]; - g[1u * output_stride + q] = fast_gradients[i][1]; - g[2u * output_stride + q] = fast_gradients[i][2]; - } - } - } - evaluate_constant_fast_hessians_strided( - points.size(), output_stride, hessians_out); -} - -template -void evaluate_wedge_fast_outputs_strided( - const std::vector>& wedge_indices, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - static_assert(Order >= 1 && Order <= 2, - "wedge fast outputs rely on low-order public triangle ordering"); - using TriFast = LagrangeTriFast; - constexpr std::size_t axis_size = static_cast(Order + 1); - const bool need_grad = gradients_out != nullptr; - const bool need_hess = hessians_out != nullptr; - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - std::array tri_values{}; - std::array tri_gradients{}; - std::array tri_hessians{}; - std::array z_values{}; - std::array z_first{}; - std::array z_second{}; - - TriFast::evaluate(xi, tri_values); - if (need_grad || need_hess) { - TriFast::evaluate_gradients(xi, tri_gradients); - } - if (need_hess) { - TriFast::evaluate_hessians(xi, tri_hessians); - detail::fill_axis_values_first_second(xi[2], z_values, z_first, z_second); - } else if (need_grad) { - detail::fill_axis_values_first(xi[2], z_values, z_first); - } else { - detail::fill_axis_values(xi[2], z_values); - } - - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - const Real tri_v = tri_values[tri]; - const Real zv = z_values[z]; - - if (values_out != nullptr) { - values_out[node * output_stride + q] = tri_v * zv; - } - - if (gradients_out != nullptr) { - Real* g = gradients_out + node * 3u * output_stride; - const Gradient& tri_g = tri_gradients[tri]; - g[0u * output_stride + q] = tri_g[0] * zv; - g[1u * output_stride + q] = tri_g[1] * zv; - g[2u * output_stride + q] = tri_v * z_first[z]; - } - - if (hessians_out != nullptr) { - Real* H = hessians_out + node * 9u * output_stride; - const Gradient& tri_g = tri_gradients[tri]; - const Hessian& tri_H = tri_hessians[tri]; - const Real zd = z_first[z]; - const Real hxz = tri_g[0] * zd; - const Real hxy = tri_H(0, 1) * zv; - const Real hyz = tri_g[1] * zd; - H[0u * output_stride + q] = tri_H(0, 0) * zv; - H[1u * output_stride + q] = hxy; - H[2u * output_stride + q] = hxz; - H[3u * output_stride + q] = hxy; - H[4u * output_stride + q] = tri_H(1, 1) * zv; - H[5u * output_stride + q] = hyz; - H[6u * output_stride + q] = hxz; - H[7u * output_stride + q] = hyz; - H[8u * output_stride + q] = tri_v * z_second[z]; - } - } - } -} - -template -inline void fill_triangle_simplex_product_factors(Real lambda, Real* SVMP_RESTRICT factors) { - const Real t = static_cast(Order) * lambda; - factors[0] = Real(1); - for (int a = 1; a <= Order; ++a) { - factors[a] = - factors[a - 1] * - (t - static_cast(a - 1)) / - static_cast(a); - } -} - -template -SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 bool evaluate_wedge_values_product_q4( - const std::vector>& simplex_exponents, - const std::vector>& wedge_indices, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - constexpr std::size_t tri_count = - static_cast((Order + 1) * (Order + 2) / 2); - if (simplex_exponents.size() != tri_count || points.size() != 4u) { - return false; - } - - Real tri_values[4][tri_count]; - std::array z_values[4]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - Real f0[Order + 1]; - Real f1[Order + 1]; - Real f2[Order + 1]; - fill_triangle_simplex_product_factors(l0, f0); - fill_triangle_simplex_product_factors(l1, f1); - fill_triangle_simplex_product_factors(l2, f2); - detail::fill_axis_values(xi[2], z_values[q]); - - for (std::size_t tri = 0; tri < tri_count; ++tri) { - const auto& e = simplex_exponents[tri]; - tri_values[q][tri] = - f0[static_cast(e[0])] * - f1[static_cast(e[1])] * - f2[static_cast(e[2])]; - } - } - - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* SVMP_RESTRICT row = values_out + node * output_stride; - row[0] = tri_values[0][tri] * z_values[0][z]; - row[1] = tri_values[1][tri] * z_values[1][z]; - row[2] = tri_values[2][tri] * z_values[2][z]; - row[3] = tri_values[3][tri] * z_values[3][z]; - } - return true; -} - -bool try_evaluate_wedge_values_product_q4( - const std::vector>& simplex_exponents, - const std::vector>& wedge_indices, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - switch (order) { - case 4: - return evaluate_wedge_values_product_q4<4>( - simplex_exponents, wedge_indices, points, output_stride, values_out); - case 5: - return evaluate_wedge_values_product_q4<5>( - simplex_exponents, wedge_indices, points, output_stride, values_out); - case 6: - return evaluate_wedge_values_product_q4<6>( - simplex_exponents, wedge_indices, points, output_stride, values_out); - case 7: - return evaluate_wedge_values_product_q4<7>( - simplex_exponents, wedge_indices, points, output_stride, values_out); - case 8: - return evaluate_wedge_values_product_q4<8>( - simplex_exponents, wedge_indices, points, output_stride, values_out); - default: - return false; - } -} - -void evaluate_wedge_order1_values_q4( - const std::vector>& wedge_indices, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real tri[4][3]; - Real axis[4][2]; - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - tri[q][0] = Real(1) - xi[0] - xi[1]; - tri[q][1] = xi[0]; - tri[q][2] = xi[1]; - axis[q][0] = (Real(1) - xi[2]) * Real(0.5); - axis[q][1] = (Real(1) + xi[2]) * Real(0.5); - } - - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri_node = index[0]; - const std::size_t axis_node = index[1]; - Real* row = values_out + node * output_stride; - row[0] = tri[0][tri_node] * axis[0][axis_node]; - row[1] = tri[1][tri_node] * axis[1][axis_node]; - row[2] = tri[2][tri_node] * axis[2][axis_node]; - row[3] = tri[3][tri_node] * axis[3][axis_node]; - } -} - -bool evaluate_wedge_fast_strided( - int order, - const std::vector>& wedge_indices, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - if (order == 3) { - return false; - } - if (order == 1 && - points.size() == 4u && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_wedge_order1_values_q4(wedge_indices, points, output_stride, values_out); - return true; - } - - switch (order) { - case 1: - evaluate_wedge_fast_outputs_strided<1>( - wedge_indices, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 2: - evaluate_wedge_fast_outputs_strided<2>( - wedge_indices, points, output_stride, values_out, gradients_out, hessians_out); - return true; - default: - return false; - } -} - -bool evaluate_fixed_lagrange_fast(LagrangeTopology topology, - int order, - const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) { - switch (order) { - case 1: - return evaluate_fixed_lagrange_fast_order<1>( - topology, xi, values, gradients, hessians); - case 2: - return evaluate_fixed_lagrange_fast_order<2>( - topology, xi, values, gradients, hessians); - case 3: - return evaluate_fixed_lagrange_fast_order<3>( - topology, xi, values, gradients, hessians); - default: - return false; - } -} - -bool evaluate_fixed_lagrange_fast_strided(LagrangeTopology topology, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - if (topology == LagrangeTopology::Line && - points.size() == 4u) { - const bool values_only = - values_out != nullptr && gradients_out == nullptr && hessians_out == nullptr; - const bool gradients_only = - values_out == nullptr && gradients_out != nullptr && hessians_out == nullptr; - const bool hessians_only = - values_out == nullptr && gradients_out == nullptr && hessians_out != nullptr; - const bool all_outputs = - values_out != nullptr && gradients_out != nullptr && hessians_out != nullptr; - if (values_only) { - if (order == 1) { - evaluate_line_order1_values_q4(points, output_stride, values_out); - return true; - } - if (order == 2) { - evaluate_line_order2_values_q4(points, output_stride, values_out); - return true; - } - if (order == 3) { - evaluate_line_order3_values_q4(points, output_stride, values_out); - return true; - } - } - if (order == 1) { - if (gradients_only) { - evaluate_line_order1_gradients_q4(output_stride, gradients_out); - return true; - } - if (hessians_only) { - evaluate_line_order1_hessians_q4(output_stride, hessians_out); - return true; - } - if (all_outputs) { - evaluate_line_order1_all_q4( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - } - if (order == 2) { - if (gradients_only) { - evaluate_line_order2_gradients_q4(points, output_stride, gradients_out); - return true; - } - if (hessians_only) { - evaluate_line_order2_hessians_q4(output_stride, hessians_out); - return true; - } - if (all_outputs) { - evaluate_line_order2_all_q4( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - } - if (order == 3) { - if (gradients_only) { - evaluate_line_order3_gradients_q4(points, output_stride, gradients_out); - return true; - } - if (hessians_only) { - evaluate_line_order3_hessians_q4(points, output_stride, hessians_out); - return true; - } - if (all_outputs) { - evaluate_line_order3_all_q4( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - } - } - - if (topology == LagrangeTopology::Tetrahedron && - order == 3 && - (gradients_out != nullptr || hessians_out != nullptr)) { - return false; - } - if (topology == LagrangeTopology::Triangle && - order == 3 && - hessians_out != nullptr) { - return false; - } - if (topology == LagrangeTopology::Triangle && - order == 1 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_triangle_order1_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Triangle && - order == 1 && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_triangle_order1_gradients_strided(points.size(), output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron && - order == 1 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_tet_order1_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron && - order == 1 && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_tet_order1_gradients_strided(points.size(), output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Triangle && - order == 1 && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_zero_hessians_strided(3u, points.size(), output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron && - order == 1 && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_zero_hessians_strided(4u, points.size(), output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Triangle && - order == 1 && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_triangle_order1_values_strided(points, output_stride, values_out); - evaluate_triangle_order1_gradients_strided(points.size(), output_stride, gradients_out); - evaluate_zero_hessians_strided(3u, points.size(), output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron && - order == 1 && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_tet_order1_values_strided(points, output_stride, values_out); - evaluate_tet_order1_gradients_strided(points.size(), output_stride, gradients_out); - evaluate_zero_hessians_strided(4u, points.size(), output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Triangle && - order == 2 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_triangle_order2_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Triangle && - order == 2 && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_triangle_order2_gradients_strided(points, output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Triangle && - order == 2 && - points.size() == 4u && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_triangle_order2_hessians_q4(output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Triangle && - order == 2 && - points.size() == 4u && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_triangle_order2_values_strided(points, output_stride, values_out); - evaluate_triangle_order2_gradients_strided(points, output_stride, gradients_out); - evaluate_triangle_order2_hessians_q4(output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron && - order == 2 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_tet_order2_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron && - order == 2 && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_tet_order2_gradients_strided(points, output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron && - order == 2 && - points.size() == 4u && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_tet_order2_hessians_q4(output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron && - order == 2 && - points.size() == 4u && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_tet_order2_values_strided(points, output_stride, values_out); - evaluate_tet_order2_gradients_strided(points, output_stride, gradients_out); - evaluate_tet_order2_hessians_q4(output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron && - order == 3 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_tet_order3_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Triangle && - order == 3 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_triangle_order3_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Triangle && - order == 3 && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_triangle_order3_gradients_strided(points, output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 1 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_hex_order1_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 1 && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_hex_order1_outputs_strided( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 1 && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_hex_order1_outputs_strided( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 1 && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_hex_order1_outputs_strided( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 1 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_quad_order1_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 1 && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_quad_order1_gradients_strided(points, output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 1 && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_quad_order1_hessians_strided(points.size(), output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 1 && - points.size() == 4u && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_quad_order1_all_q4(points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 2 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_quad_order2_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 2 && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_quad_order2_gradients_strided(points, output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 2 && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_quad_order2_hessians_strided(points, output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 2 && - points.size() == 4u && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_quad_order2_all_q4(points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 3 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_quad_order3_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 3 && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_quad_order3_gradients_strided(points, output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 3 && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_quad_order3_hessians_strided(points, output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Quadrilateral && - order == 3 && - points.size() == 4u && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_quad_order3_all_q4(points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 2 && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_hex_order2_values_strided(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 2 && - points.size() == 4u && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_hex_order2_gradients_q4(points, output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 2 && - points.size() == 4u && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_hex_order2_hessians_q4(points, output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 2 && - points.size() == 4u && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_hex_order2_values_strided(points, output_stride, values_out); - evaluate_hex_order2_gradients_q4(points, output_stride, gradients_out); - evaluate_hex_order2_hessians_q4(points, output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 3 && - points.size() == 4u && - output_stride == 4u && - hessians_out != nullptr) { - return false; - } - if (topology == LagrangeTopology::Hexahedron && - order == 3 && - points.size() == 4u && - values_out != nullptr && - gradients_out == nullptr && - hessians_out == nullptr) { - evaluate_hex_order3_values_q4(points, output_stride, values_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 3 && - points.size() == 4u && - values_out == nullptr && - gradients_out != nullptr && - hessians_out == nullptr) { - evaluate_hex_order3_gradients_q4(points, output_stride, gradients_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 3 && - points.size() == 4u && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - evaluate_hex_order3_hessians_q4(points, output_stride, hessians_out); - return true; - } - if (topology == LagrangeTopology::Hexahedron && - order == 3 && - points.size() == 4u && - values_out != nullptr && - gradients_out != nullptr && - hessians_out != nullptr) { - evaluate_hex_order3_all_q4(points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - if (hessians_out != nullptr && order > 1 && - (topology == LagrangeTopology::Quadrilateral || - topology == LagrangeTopology::Hexahedron)) { - return false; - } - if (hessians_out != nullptr) { - const bool hessians_only = values_out == nullptr && gradients_out == nullptr; - if (order == 1) { - if (topology == LagrangeTopology::Triangle && hessians_only) { - evaluate_fast_outputs_with_constant_hessians_strided>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron) { - evaluate_fast_outputs_with_constant_hessians_strided>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - } else if (order == 2) { - if (topology == LagrangeTopology::Triangle && hessians_only) { - evaluate_fast_outputs_with_constant_hessians_strided>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - if (topology == LagrangeTopology::Tetrahedron) { - evaluate_fast_outputs_with_constant_hessians_strided>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - } - } - } - - switch (order) { - case 1: - return evaluate_fixed_lagrange_fast_strided_order<1>( - topology, points, output_stride, values_out, gradients_out, hessians_out); - case 2: - return evaluate_fixed_lagrange_fast_strided_order<2>( - topology, points, output_stride, values_out, gradients_out, hessians_out); - case 3: - return evaluate_fixed_lagrange_fast_strided_order<3>( - topology, points, output_stride, values_out, gradients_out, hessians_out); - default: - return false; - } -} - -bool evaluate_fixed_lagrange_fast_to(LagrangeTopology topology, - int order, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - switch (order) { - case 1: - return evaluate_fixed_lagrange_fast_to_order<1>( - topology, xi, values_out, gradients_out, hessians_out); - case 2: - return evaluate_fixed_lagrange_fast_to_order<2>( - topology, xi, values_out, gradients_out, hessians_out); - case 3: - return evaluate_fixed_lagrange_fast_to_order<3>( - topology, xi, values_out, gradients_out, hessians_out); - default: - return false; - } -} - -template -struct AxisMonomialCoefficientTable { - std::array values{}; - std::array 1 ? N - 1 : 0)> first{}; - std::array 2 ? N - 2 : 0)> second{}; -}; - -template -constexpr AxisMonomialCoefficientTable make_axis_monomial_coefficient_table() { - AxisMonomialCoefficientTable table{}; - std::array nodes{}; - constexpr int order = static_cast(N) - 1; - for (std::size_t i = 0; i < N; ++i) { - nodes[i] = detail::equispaced_pm_one_coord(static_cast(i), order); - } - - for (std::size_t i = 0; i < N; ++i) { - std::array coeffs{}; - std::array next{}; - coeffs[0] = Real(1); - std::size_t degree = 0; - for (std::size_t j = 0; j < N; ++j) { - if (j == i) { - continue; - } - next = {}; - for (std::size_t k = 0; k <= degree; ++k) { - next[k] -= nodes[j] * coeffs[k]; - next[k + 1] += coeffs[k]; - } - coeffs = next; - ++degree; - } - - Real denominator = Real(1); - for (std::size_t j = 0; j < N; ++j) { - if (j != i) { - denominator *= nodes[i] - nodes[j]; - } - } - const Real inv_denominator = Real(1) / denominator; - for (std::size_t k = 0; k < N; ++k) { - table.values[i * N + k] = coeffs[k] * inv_denominator; - } - if constexpr (N >= 2) { - for (std::size_t k = 1; k < N; ++k) { - table.first[i * (N - 1) + (k - 1)] = - static_cast(k) * table.values[i * N + k]; - } - } - if constexpr (N >= 3) { - for (std::size_t k = 2; k < N; ++k) { - table.second[i * (N - 2) + (k - 2)] = - static_cast(k * (k - 1)) * table.values[i * N + k]; - } - } - } - - return table; -} - -template -void assign_axis_coefficient_table(const AxisMonomialCoefficientTable& table, - std::vector& values, - std::vector& first, - std::vector& second) { - assign_array(values, table.values); - assign_array(first, table.first); - assign_array(second, table.second); -} - -bool assign_precomputed_axis_coefficients(int n_axis, - std::vector& values, - std::vector& first, - std::vector& second) { - static constexpr auto kAxisCoefficients1 = make_axis_monomial_coefficient_table<1>(); - static constexpr auto kAxisCoefficients2 = make_axis_monomial_coefficient_table<2>(); - static constexpr auto kAxisCoefficients3 = make_axis_monomial_coefficient_table<3>(); - static constexpr auto kAxisCoefficients4 = make_axis_monomial_coefficient_table<4>(); - static constexpr auto kAxisCoefficients5 = make_axis_monomial_coefficient_table<5>(); - - switch (n_axis) { - case 1: - assign_axis_coefficient_table(kAxisCoefficients1, values, first, second); - return true; - case 2: - assign_axis_coefficient_table(kAxisCoefficients2, values, first, second); - return true; - case 3: - assign_axis_coefficient_table(kAxisCoefficients3, values, first, second); - return true; - case 4: - assign_axis_coefficient_table(kAxisCoefficients4, values, first, second); - return true; - case 5: - assign_axis_coefficient_table(kAxisCoefficients5, values, first, second); - return true; - default: - return false; - } -} - -LagrangeTopologyTraits lagrange_topology_traits(ElementType type) { - const auto topo = topology(type); - if (topo != LagrangeTopology::Unknown) { - return {topo, reference_dimension(type)}; - } - - throw BasisElementCompatibilityException("Unsupported element type for LagrangeBasis", - __FILE__, __LINE__, __func__); -} - -std::size_t lattice_index_pm_one(Real coord, int order, const char* context) { - if (order <= 0) { - if (!coordinate_matches_expected(coord, Real(0))) { - throw BasisNodeOrderingException(context, __FILE__, __LINE__, __func__); - } - return 0; - } - - const Real scaled = (coord + Real(1)) * static_cast(order) / Real(2); - const long idx = std::lround(scaled); - if (idx < 0 || idx > order || - !coordinate_matches_expected( - coord, - detail::equispaced_pm_one_coord(static_cast(idx), order))) { - throw BasisNodeOrderingException(context, __FILE__, __LINE__, __func__); - } - return static_cast(idx); -} - -int simplex_lattice_index(Real coord, int order, const char* context) { - if (order <= 0) { - if (!coordinate_matches_expected(coord, Real(0)) && - !coordinate_matches_expected(coord, Real(0.25)) && - !coordinate_matches_expected(coord, Real(1) / Real(3))) { - throw BasisNodeOrderingException(context, __FILE__, __LINE__, __func__); - } - return 0; - } - - const Real scaled = coord * static_cast(order); - const long idx = std::lround(scaled); - const Real reconstructed = static_cast(idx) / static_cast(order); - if (idx < 0 || idx > order || !coordinate_matches_expected(coord, reconstructed)) { - throw BasisNodeOrderingException(context, __FILE__, __LINE__, __func__); - } - return static_cast(idx); -} - -std::array triangle_exponents_from_public_node(const math::Vector& node, - int order) { - if (order == 0) { - return {0, 0, 0, 0}; - } - - const int j = simplex_lattice_index(node[0], order, - "LagrangeBasis: invalid triangle node coordinate for public ordering"); - const int k = simplex_lattice_index(node[1], order, - "LagrangeBasis: invalid triangle node coordinate for public ordering"); - const int i = order - j - k; - if (i < 0) { - throw BasisNodeOrderingException("LagrangeBasis: invalid triangle barycentric coordinates for public ordering", - __FILE__, __LINE__, __func__); - } - return {i, j, k, 0}; -} - -std::array tetrahedron_exponents_from_public_node(const math::Vector& node, - int order) { - if (order == 0) { - return {0, 0, 0, 0}; - } - - const int j = simplex_lattice_index(node[0], order, - "LagrangeBasis: invalid tetrahedron node x-coordinate for public ordering"); - const int k = simplex_lattice_index(node[1], order, - "LagrangeBasis: invalid tetrahedron node y-coordinate for public ordering"); - const int l = simplex_lattice_index(node[2], order, - "LagrangeBasis: invalid tetrahedron node z-coordinate for public ordering"); - const int i = order - j - k - l; - if (i < 0) { - throw BasisNodeOrderingException("LagrangeBasis: invalid tetrahedron barycentric coordinates for public ordering", - __FILE__, __LINE__, __func__); - } - return {i, j, k, l}; -} - -struct NormalizedLagrangeRequest { - ElementType element_type; - int order; -}; - -// Non-owning view of the per-axis 1D Lagrange basis evaluations -// (values, first derivative, second derivative), each of length `size`. -struct AxisBasisEvaluations { - const Real* values; - const Real* first; - const Real* second; - std::size_t size; -}; - -AxisBasisEvaluations constant_axis_basis() { - static const Real kOne[1] = {Real(1)}; - static const Real kZero[1] = {Real(0)}; - return AxisBasisEvaluations{kOne, kZero, kZero, 1}; -} - -// Horner-form evaluator for the precomputed 1D Lagrange basis. -// -// Inputs are precomputed monomial coefficients of L_i(x), L_i'(x), L_i''(x) -// (built once at LagrangeBasis construction). Evaluation is purely -// multiply-add on the coefficients — no divisions and no node-position -// lookups in the hot path. Templated on N for compile-time loop unrolling -// and FMA-friendly straight-line code on the common Hex/Quad/Line orders. -// -// Layout: -// v_coeffs: N * N entries; row i holds [c_i0, c_i1, ..., c_i(N-1)] -// such that L_i(x) = sum_k c_ik * x^k -// d_coeffs: N * (N-1) entries; row i holds derivative coefficients of L_i'(x) -// d2_coeffs: N * (N-2) entries; row i holds coefficients of L_i''(x) -// (only valid when N >= 3) -template -inline void evaluate_1d_horner_impl(const Real* v_coeffs, - const Real* d_coeffs, - const Real* d2_coeffs, - Real xi, - Real* values, Real* first, Real* second) { - if constexpr (N == 1) { - values[0] = v_coeffs[0]; - if (first) first[0] = Real(0); - if (second) second[0] = Real(0); - return; - } else { - // Values: degree N-1 polynomials. - for (int i = 0; i < N; ++i) { - const Real* c = v_coeffs + i * N; - Real r = c[N - 1]; - for (int k = N - 1; k > 0; --k) { - r = r * xi + c[k - 1]; - } - values[i] = r; - } - - if (!first && !second) return; - - if (first) { - // First derivatives: degree N-2 polynomials (per row of d_coeffs). - for (int i = 0; i < N; ++i) { - const Real* c = d_coeffs + i * (N - 1); - Real r = c[N - 2]; - for (int k = N - 2; k > 0; --k) { - r = r * xi + c[k - 1]; - } - first[i] = r; - } - } - - if (!second) return; - - if constexpr (N <= 2) { - for (int i = 0; i < N; ++i) second[i] = Real(0); - } else { - // Second derivatives: degree N-3 polynomials (per row of d2_coeffs). - for (int i = 0; i < N; ++i) { - const Real* c = d2_coeffs + i * (N - 2); - Real r = c[N - 3]; - for (int k = N - 3; k > 0; --k) { - r = r * xi + c[k - 1]; - } - second[i] = r; - } - } - } -} - -void fill_equispaced_barycentric_weights(int n_axis, Real* weights) { - const int order = n_axis - 1; - Real weight = (order % 2 == 0) ? Real(1) : Real(-1); - Real max_abs = Real(0); - for (int i = 0; i < n_axis; ++i) { - weights[i] = weight; - max_abs = std::max(max_abs, std::abs(weight)); - if (i < order) { - weight *= -static_cast(order - i) / static_cast(i + 1); - } - } - - if (max_abs > Real(0)) { - const Real inv_scale = Real(1) / max_abs; - for (int i = 0; i < n_axis; ++i) { - weights[i] *= inv_scale; - } - } -} - -bool coordinate_matches_axis_node(Real xi, Real node) { - return coordinate_matches_expected(xi, node); -} - -struct CompensatedSum { - Real sum{Real(0)}; - Real compensation{Real(0)}; - - void add(Real value) noexcept { - const Real y = value - compensation; - const Real t = sum + y; - compensation = (t - sum) - y; - sum = t; - } -}; - -void distribute_residual_by_abs(int n_axis, Real* values, Real residual) { - if (values == nullptr || n_axis <= 0 || residual == Real(0)) { - return; - } - - CompensatedSum abs_sum; - int largest_index = 0; - Real largest_abs = Real(0); - for (int i = 0; i < n_axis; ++i) { - const Real magnitude = std::abs(values[i]); - abs_sum.add(magnitude); - if (magnitude > largest_abs) { - largest_abs = magnitude; - largest_index = i; - } - } - - if (abs_sum.sum <= Real(0)) { - values[0] += residual; - return; - } - - const Real inv_abs_sum = Real(1) / abs_sum.sum; - CompensatedSum applied; - for (int i = 0; i < n_axis; ++i) { - const Real correction = residual * std::abs(values[i]) * inv_abs_sum; - values[i] += correction; - applied.add(correction); - } - values[largest_index] += residual - applied.sum; -} - -void evaluate_1d_barycentric_runtime(int n_axis, - Real xi, - const Real* weights, - Real* values, - Real* first, - Real* second) { - const int order = n_axis - 1; - BASIS_CHECK_EVAL(weights != nullptr, - "LagrangeBasis: missing cached barycentric weights for runtime axis evaluation"); - - int node_index = -1; - for (int i = 0; i < n_axis; ++i) { - const Real node = detail::equispaced_pm_one_coord(i, order); - if (coordinate_matches_axis_node(xi, node)) { - node_index = i; - break; - } - } - - if (node_index >= 0) { - std::fill(values, values + n_axis, Real(0)); - values[node_index] = Real(1); - if (!first && !second) { - return; - } - - const Real xk = detail::equispaced_pm_one_coord(node_index, order); - const Real wk = weights[static_cast(node_index)]; - Real reciprocal_sum = Real(0); - if (second) { - for (int m = 0; m < n_axis; ++m) { - if (m == node_index) { - continue; - } - const Real xm = detail::equispaced_pm_one_coord(m, order); - reciprocal_sum += Real(1) / (xk - xm); - } - } - - Real first_diagonal = Real(0); - Real second_diagonal = Real(0); - if (first) { - std::fill(first, first + n_axis, Real(0)); - } - if (second) { - std::fill(second, second + n_axis, Real(0)); - } - - for (int j = 0; j < n_axis; ++j) { - if (j == node_index) { - continue; - } - const Real xj = detail::equispaced_pm_one_coord(j, order); - const Real distance = xk - xj; - const Real offdiag_first = weights[static_cast(j)] / (wk * distance); - first_diagonal -= offdiag_first; - if (first) { - first[j] = offdiag_first; - } - if (second) { - const Real offdiag_second = - Real(2) * offdiag_first * (reciprocal_sum - Real(1) / distance); - second[j] = offdiag_second; - second_diagonal -= offdiag_second; - } - } - if (first) { - first[node_index] = first_diagonal; - } - if (second) { - second[node_index] = second_diagonal; - } - return; - } - - Real sum0 = Real(0); - Real sum1 = Real(0); - Real sum2 = Real(0); - for (int i = 0; i < n_axis; ++i) { - const Real node = detail::equispaced_pm_one_coord(i, order); - const Real inv_distance = Real(1) / (xi - node); - const Real weighted = weights[static_cast(i)] * inv_distance; - sum0 += weighted; - sum1 += weighted * inv_distance; - sum2 += weighted * inv_distance * inv_distance; - } - - const Real inv_sum0 = Real(1) / sum0; - const Real first_ratio = sum1 * inv_sum0; - const Real second_ratio = sum2 * inv_sum0; - const Real first_ratio_sq = first_ratio * first_ratio; - - CompensatedSum value_sum; - CompensatedSum first_sum; - CompensatedSum second_sum; - for (int i = 0; i < n_axis; ++i) { - const Real node = detail::equispaced_pm_one_coord(i, order); - const Real inv_distance = Real(1) / (xi - node); - const Real value = weights[static_cast(i)] * inv_distance * inv_sum0; - values[i] = value; - value_sum.add(value); - if (first || second) { - const Real derivative_factor = first_ratio - inv_distance; - if (first) { - first[i] = value * derivative_factor; - first_sum.add(first[i]); - } - if (second) { - second[i] = value * (derivative_factor * derivative_factor + - inv_distance * inv_distance - - Real(2) * second_ratio + - first_ratio_sq); - second_sum.add(second[i]); - } - } - } - - distribute_residual_by_abs(n_axis, values, Real(1) - value_sum.sum); - if (first) { - distribute_residual_by_abs(n_axis, first, -first_sum.sum); - } - if (second) { - distribute_residual_by_abs(n_axis, second, -second_sum.sum); - } -} - -// 1D Lagrange-basis evaluator. Writes n_axis entries to each non-null output -// buffer. Dispatches to compile-time Horner specializations for sizes 1..9 -// (orders 0..8 — the Lagrange performance sweep) and uses barycentric -// evaluation above that threshold to avoid high-order monomial conditioning -// issues. -void evaluate_1d_basis_to(const Real* v_coeffs, - const Real* d_coeffs, - const Real* d2_coeffs, - const Real* barycentric_weights, - int n_axis, Real xi, - Real* values, Real* first, Real* second) { - switch (n_axis) { - case 1: evaluate_1d_horner_impl<1>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; - case 2: evaluate_1d_horner_impl<2>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; - case 3: evaluate_1d_horner_impl<3>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; - case 4: evaluate_1d_horner_impl<4>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; - case 5: evaluate_1d_horner_impl<5>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; - case 6: evaluate_1d_horner_impl<6>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; - case 7: evaluate_1d_horner_impl<7>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; - case 8: evaluate_1d_horner_impl<8>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; - case 9: evaluate_1d_horner_impl<9>(v_coeffs, d_coeffs, d2_coeffs, xi, values, first, second); return; - default: - evaluate_1d_barycentric_runtime(n_axis, xi, barycentric_weights, values, first, second); - return; - } -} - -// Selects which derivative passes are computed by the 1D evaluator. -enum class AxisDeriv { - ValuesOnly, // skip first and second - ValuesAndFirst, // for gradients - ValuesAndFirstAndSecond, // for hessians or fused evaluate_all -}; - -// Per-axis storage (values, first derivative, second derivative). Backed by -// per-thread scratch that grows lazily; subsequent calls reuse capacity with no -// reallocation. -struct AxisScratch { - std::vector values; - std::vector first; - std::vector second; - - void reserveFor(std::size_t n) { - if (values.size() < n) values.resize(n); - if (first.size() < n) first.resize(n); - if (second.size() < n) second.resize(n); - } -}; - -struct AxisBatchScratch { - std::vector values; - std::vector first; - std::vector second; - - void resizeFor(std::size_t count, AxisDeriv level) { - if (values.size() < count) values.resize(count); - if (level != AxisDeriv::ValuesOnly && first.size() < count) first.resize(count); - if (level == AxisDeriv::ValuesAndFirstAndSecond && second.size() < count) second.resize(count); - } -}; - -template -inline void fill_simplex_factor_sequence_fixed(Real lambda, - Real* SVMP_RESTRICT phi, - Real* SVMP_RESTRICT dphi, - Real* SVMP_RESTRICT d2phi) { - static_assert(!NeedSecond || NeedFirst, - "second derivative factors require first-derivative recurrence state"); - phi[0] = Real(1); - if constexpr (NeedFirst) { - dphi[0] = Real(0); - } - if constexpr (NeedSecond) { - d2phi[0] = Real(0); - } - - const Real t = static_cast(Order) * lambda; - const Real dt_dlambda = static_cast(Order); - Real dphi_dt_prev = Real(0); - Real d2phi_dt2_prev = Real(0); - for (int a = 1; a <= Order; ++a) { - const std::size_t au = static_cast(a); - const Real inv_a = Real(1) / static_cast(a); - const Real s = (t - static_cast(a - 1)) * inv_a; - phi[au] = s * phi[au - 1]; - - if constexpr (NeedFirst) { - const Real dphi_dt_old = dphi_dt_prev; - const Real dphi_dt = inv_a * phi[au - 1] + s * dphi_dt_old; - dphi[au] = dt_dlambda * dphi_dt; - - if constexpr (NeedSecond) { - const Real d2phi_dt2 = Real(2) * inv_a * dphi_dt_old + s * d2phi_dt2_prev; - d2phi[au] = dt_dlambda * dt_dlambda * d2phi_dt2; - d2phi_dt2_prev = d2phi_dt2; - } - dphi_dt_prev = dphi_dt; - } - } -} - -template -inline void fill_triangle_factors_q4( - const std::vector>& points, - Real (&phi0)[4][Order + 1], - Real (&phi1)[4][Order + 1], - Real (&phi2)[4][Order + 1], - Real (&dphi0)[4][Order + 1], - Real (&dphi1)[4][Order + 1], - Real (&dphi2)[4][Order + 1], - Real (&d2phi0)[4][Order + 1], - Real (&d2phi1)[4][Order + 1], - Real (&d2phi2)[4][Order + 1]) { - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - if constexpr (NeedSecond) { - fill_simplex_factor_sequence_fixed( - l0, phi0[q], dphi0[q], d2phi0[q]); - fill_simplex_factor_sequence_fixed( - l1, phi1[q], dphi1[q], d2phi1[q]); - fill_simplex_factor_sequence_fixed( - l2, phi2[q], dphi2[q], d2phi2[q]); - } else { - fill_simplex_factor_sequence_fixed( - l0, phi0[q], dphi0[q], nullptr); - fill_simplex_factor_sequence_fixed( - l1, phi1[q], dphi1[q], nullptr); - fill_simplex_factor_sequence_fixed( - l2, phi2[q], dphi2[q], nullptr); - } - } -} - -template -inline void write_wedge_gradient_strided_q(std::size_t tri_stride, - std::size_t axis_stride, - std::size_t tri, - std::size_t z, - std::size_t output_stride, - const Real* SVMP_RESTRICT tri_values, - const Real* SVMP_RESTRICT tri_g, - const AxisBatchScratch& axis_batch, - Real* SVMP_RESTRICT g) { - const std::size_t tri_q = tri * tri_stride + Q; - const std::size_t z_q = Q * axis_stride + z; - const Real tri_v = tri_values[tri_q]; - const Real zv = axis_batch.values[z_q]; - g[0u * output_stride + Q] = tri_g[0u * tri_stride + Q] * zv; - g[1u * output_stride + Q] = tri_g[1u * tri_stride + Q] * zv; - g[2u * output_stride + Q] = tri_v * axis_batch.first[z_q]; -} - -template -inline void write_wedge_gradient_stride4_q(std::size_t tri_stride, - std::size_t axis_stride, - std::size_t tri, - std::size_t z, - const Real* SVMP_RESTRICT tri_values, - const Real* SVMP_RESTRICT tri_g, - const AxisBatchScratch& axis_batch, - Real* SVMP_RESTRICT g) { - const std::size_t tri_q = tri * tri_stride + Q; - const std::size_t z_q = Q * axis_stride + z; - const Real tri_v = tri_values[tri_q]; - const Real zv = axis_batch.values[z_q]; - g[Q] = tri_g[0u * tri_stride + Q] * zv; - g[4u + Q] = tri_g[1u * tri_stride + Q] * zv; - g[8u + Q] = tri_v * axis_batch.first[z_q]; -} - -template -inline void write_wedge_hessian_strided_q(std::size_t tri_stride, - std::size_t axis_stride, - std::size_t tri, - std::size_t z, - std::size_t output_stride, - const Real* SVMP_RESTRICT tri_values, - const Real* SVMP_RESTRICT tri_g, - const Real* SVMP_RESTRICT tri_H, - const AxisBatchScratch& axis_batch, - Real* SVMP_RESTRICT H) { - const std::size_t tri_q = tri * tri_stride + Q; - const std::size_t z_q = Q * axis_stride + z; - const Real tri_v = tri_values[tri_q]; - const Real zv = axis_batch.values[z_q]; - const Real zd = axis_batch.first[z_q]; - const Real tri_gx = tri_g[0u * tri_stride + Q]; - const Real tri_gy = tri_g[1u * tri_stride + Q]; - const Real tri_hxx = tri_H[0u * tri_stride + Q]; - const Real tri_hxy = tri_H[1u * tri_stride + Q]; - const Real tri_hyy = tri_H[2u * tri_stride + Q]; - const Real hxz = tri_gx * zd; - const Real hxy = tri_hxy * zv; - const Real hyz = tri_gy * zd; - - H[0u * output_stride + Q] = tri_hxx * zv; - H[1u * output_stride + Q] = hxy; - H[2u * output_stride + Q] = hxz; - H[3u * output_stride + Q] = hxy; - H[4u * output_stride + Q] = tri_hyy * zv; - H[5u * output_stride + Q] = hyz; - H[6u * output_stride + Q] = hxz; - H[7u * output_stride + Q] = hyz; - H[8u * output_stride + Q] = tri_v * axis_batch.second[z_q]; -} - -template -inline void write_wedge_hessian_stride4_q(std::size_t tri_stride, - std::size_t axis_stride, - std::size_t tri, - std::size_t z, - const Real* SVMP_RESTRICT tri_values, - const Real* SVMP_RESTRICT tri_g, - const Real* SVMP_RESTRICT tri_H, - const AxisBatchScratch& axis_batch, - Real* SVMP_RESTRICT H) { - const std::size_t tri_q = tri * tri_stride + Q; - const std::size_t z_q = Q * axis_stride + z; - const Real tri_v = tri_values[tri_q]; - const Real zv = axis_batch.values[z_q]; - const Real zd = axis_batch.first[z_q]; - const Real tri_gx = tri_g[0u * tri_stride + Q]; - const Real tri_gy = tri_g[1u * tri_stride + Q]; - const Real tri_hxx = tri_H[0u * tri_stride + Q]; - const Real tri_hxy = tri_H[1u * tri_stride + Q]; - const Real tri_hyy = tri_H[2u * tri_stride + Q]; - const Real hxz = tri_gx * zd; - const Real hxy = tri_hxy * zv; - const Real hyz = tri_gy * zd; - - H[Q] = tri_hxx * zv; - H[4u + Q] = hxy; - H[8u + Q] = hxz; - H[12u + Q] = hxy; - H[16u + Q] = tri_hyy * zv; - H[20u + Q] = hyz; - H[24u + Q] = hxz; - H[28u + Q] = hyz; - H[32u + Q] = tri_v * axis_batch.second[z_q]; -} - -template -inline void write_wedge_all_strided_q(std::size_t tri_stride, - std::size_t axis_stride, - std::size_t tri, - std::size_t z, - std::size_t output_stride, - const Real* SVMP_RESTRICT tri_values, - const Real* SVMP_RESTRICT tri_g, - const Real* SVMP_RESTRICT tri_H, - const AxisBatchScratch& axis_batch, - Real* SVMP_RESTRICT value_row, - Real* SVMP_RESTRICT g, - Real* SVMP_RESTRICT H) { - const std::size_t tri_q = tri * tri_stride + Q; - const std::size_t z_q = Q * axis_stride + z; - const Real tri_v = tri_values[tri_q]; - const Real zv = axis_batch.values[z_q]; - const Real zd = axis_batch.first[z_q]; - const Real tri_gx = tri_g[0u * tri_stride + Q]; - const Real tri_gy = tri_g[1u * tri_stride + Q]; - const Real tri_hxx = tri_H[0u * tri_stride + Q]; - const Real tri_hxy = tri_H[1u * tri_stride + Q]; - const Real tri_hyy = tri_H[2u * tri_stride + Q]; - const Real hxz = tri_gx * zd; - const Real hxy = tri_hxy * zv; - const Real hyz = tri_gy * zd; - - value_row[Q] = tri_v * zv; - g[0u * output_stride + Q] = tri_gx * zv; - g[1u * output_stride + Q] = tri_gy * zv; - g[2u * output_stride + Q] = tri_v * zd; - H[0u * output_stride + Q] = tri_hxx * zv; - H[1u * output_stride + Q] = hxy; - H[2u * output_stride + Q] = hxz; - H[3u * output_stride + Q] = hxy; - H[4u * output_stride + Q] = tri_hyy * zv; - H[5u * output_stride + Q] = hyz; - H[6u * output_stride + Q] = hxz; - H[7u * output_stride + Q] = hyz; - H[8u * output_stride + Q] = tri_v * axis_batch.second[z_q]; -} - -template -inline void write_wedge_all_stride4_q(std::size_t tri_stride, - std::size_t axis_stride, - std::size_t tri, - std::size_t z, - const Real* SVMP_RESTRICT tri_values, - const Real* SVMP_RESTRICT tri_g, - const Real* SVMP_RESTRICT tri_H, - const AxisBatchScratch& axis_batch, - Real* SVMP_RESTRICT value_row, - Real* SVMP_RESTRICT g, - Real* SVMP_RESTRICT H) { - const std::size_t tri_q = tri * tri_stride + Q; - const std::size_t z_q = Q * axis_stride + z; - const Real tri_v = tri_values[tri_q]; - const Real zv = axis_batch.values[z_q]; - const Real zd = axis_batch.first[z_q]; - const Real tri_gx = tri_g[0u * tri_stride + Q]; - const Real tri_gy = tri_g[1u * tri_stride + Q]; - const Real tri_hxx = tri_H[0u * tri_stride + Q]; - const Real tri_hxy = tri_H[1u * tri_stride + Q]; - const Real tri_hyy = tri_H[2u * tri_stride + Q]; - const Real hxz = tri_gx * zd; - const Real hxy = tri_hxy * zv; - const Real hyz = tri_gy * zd; - - value_row[Q] = tri_v * zv; - g[Q] = tri_gx * zv; - g[4u + Q] = tri_gy * zv; - g[8u + Q] = tri_v * zd; - H[Q] = tri_hxx * zv; - H[4u + Q] = hxy; - H[8u + Q] = hxz; - H[12u + Q] = hxy; - H[16u + Q] = tri_hyy * zv; - H[20u + Q] = hyz; - H[24u + Q] = hxz; - H[28u + Q] = hyz; - H[32u + Q] = tri_v * axis_batch.second[z_q]; -} - -template -bool evaluate_wedge_fused_stride4_q4( - const std::vector>& simplex_exponents, - const std::vector& wedge_node_by_tri_z, - const std::vector>& points, - const AxisBatchScratch& axis_batch, - int n_axis, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - static_assert(Order >= 3 && Order <= 8, "fused wedge q4 path covers orders 3..8"); - const std::size_t tri_count = simplex_exponents.size(); - const std::size_t z_count = static_cast(n_axis); - if (points.size() != 4u || - z_count != static_cast(Order + 1) || - wedge_node_by_tri_z.size() != tri_count * z_count) { - return false; - } - - Real phi0[4][Order + 1]; - Real phi1[4][Order + 1]; - Real phi2[4][Order + 1]; - Real dphi0[4][Order + 1]; - Real dphi1[4][Order + 1]; - Real dphi2[4][Order + 1]; - Real d2phi0[4][Order + 1]; - Real d2phi1[4][Order + 1]; - Real d2phi2[4][Order + 1]; - fill_triangle_factors_q4( - points, phi0, phi1, phi2, dphi0, dphi1, dphi2, d2phi0, d2phi1, d2phi2); - - for (std::size_t tri = 0; tri < tri_count; ++tri) { - const auto& e = simplex_exponents[tri]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - - Real tri_v[4]; - Real tri_gx[4]; - Real tri_gy[4]; - Real tri_hxx[4]; - Real tri_hxy[4]; - Real tri_hyy[4]; - for (std::size_t q = 0; q < 4u; ++q) { - const Real v0 = phi0[q][i0]; - const Real v1 = phi1[q][i1]; - const Real v2 = phi2[q][i2]; - const Real D0 = dphi0[q][i0]; - const Real D1 = dphi1[q][i1]; - const Real D2 = dphi2[q][i2]; - const Real dl0 = D0 * v1 * v2; - tri_v[q] = v0 * v1 * v2; - tri_gx[q] = v0 * D1 * v2 - dl0; - tri_gy[q] = v0 * v1 * D2 - dl0; - - if constexpr (NeedHess) { - const Real DD0 = d2phi0[q][i0]; - const Real DD1 = d2phi1[q][i1]; - const Real DD2 = d2phi2[q][i2]; - const Real H00 = DD0 * v1 * v2; - const Real H11 = v0 * DD1 * v2; - const Real H22 = v0 * v1 * DD2; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - tri_hxx[q] = H00 - Real(2) * H01 + H11; - tri_hxy[q] = H00 - H01 - H02 + H12; - tri_hyy[q] = H00 - Real(2) * H02 + H22; - } - } - - for (std::size_t z = 0; z < z_count; ++z) { - const std::size_t node = wedge_node_by_tri_z[tri * z_count + z]; - Real* SVMP_RESTRICT value_row = - values_out != nullptr ? values_out + node * 4u : nullptr; - Real* SVMP_RESTRICT g = - gradients_out != nullptr ? gradients_out + node * 12u : nullptr; - Real* SVMP_RESTRICT H = - hessians_out != nullptr ? hessians_out + node * 36u : nullptr; - - const Real z0 = axis_batch.values[z]; - const Real z1 = axis_batch.values[z_count + z]; - const Real z2 = axis_batch.values[2u * z_count + z]; - const Real z3 = axis_batch.values[3u * z_count + z]; - const Real dz0 = axis_batch.first[z]; - const Real dz1 = axis_batch.first[z_count + z]; - const Real dz2 = axis_batch.first[2u * z_count + z]; - const Real dz3 = axis_batch.first[3u * z_count + z]; - - if (value_row != nullptr) { - value_row[0] = tri_v[0] * z0; - value_row[1] = tri_v[1] * z1; - value_row[2] = tri_v[2] * z2; - value_row[3] = tri_v[3] * z3; - } - if (g != nullptr) { - g[0] = tri_gx[0] * z0; - g[1] = tri_gx[1] * z1; - g[2] = tri_gx[2] * z2; - g[3] = tri_gx[3] * z3; - g[4] = tri_gy[0] * z0; - g[5] = tri_gy[1] * z1; - g[6] = tri_gy[2] * z2; - g[7] = tri_gy[3] * z3; - g[8] = tri_v[0] * dz0; - g[9] = tri_v[1] * dz1; - g[10] = tri_v[2] * dz2; - g[11] = tri_v[3] * dz3; - } - if constexpr (NeedHess) { - if (H != nullptr) { - const Real d2z0 = axis_batch.second[z]; - const Real d2z1 = axis_batch.second[z_count + z]; - const Real d2z2 = axis_batch.second[2u * z_count + z]; - const Real d2z3 = axis_batch.second[3u * z_count + z]; - const Real hxz0 = tri_gx[0] * dz0; - const Real hxz1 = tri_gx[1] * dz1; - const Real hxz2 = tri_gx[2] * dz2; - const Real hxz3 = tri_gx[3] * dz3; - const Real hyz0 = tri_gy[0] * dz0; - const Real hyz1 = tri_gy[1] * dz1; - const Real hyz2 = tri_gy[2] * dz2; - const Real hyz3 = tri_gy[3] * dz3; - H[0] = tri_hxx[0] * z0; - H[1] = tri_hxx[1] * z1; - H[2] = tri_hxx[2] * z2; - H[3] = tri_hxx[3] * z3; - H[4] = tri_hxy[0] * z0; - H[5] = tri_hxy[1] * z1; - H[6] = tri_hxy[2] * z2; - H[7] = tri_hxy[3] * z3; - H[8] = hxz0; - H[9] = hxz1; - H[10] = hxz2; - H[11] = hxz3; - H[12] = H[4]; - H[13] = H[5]; - H[14] = H[6]; - H[15] = H[7]; - H[16] = tri_hyy[0] * z0; - H[17] = tri_hyy[1] * z1; - H[18] = tri_hyy[2] * z2; - H[19] = tri_hyy[3] * z3; - H[20] = hyz0; - H[21] = hyz1; - H[22] = hyz2; - H[23] = hyz3; - H[24] = hxz0; - H[25] = hxz1; - H[26] = hxz2; - H[27] = hxz3; - H[28] = hyz0; - H[29] = hyz1; - H[30] = hyz2; - H[31] = hyz3; - H[32] = tri_v[0] * d2z0; - H[33] = tri_v[1] * d2z1; - H[34] = tri_v[2] * d2z2; - H[35] = tri_v[3] * d2z3; - } - } - } - } - return true; -} - -template -bool try_evaluate_wedge_fused_stride4_q4( - const std::vector>& simplex_exponents, - const std::vector& wedge_node_by_tri_z, - int order, - const std::vector>& points, - const AxisBatchScratch& axis_batch, - int n_axis, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - switch (order) { - case 3: - return evaluate_wedge_fused_stride4_q4<3, NeedHess>( - simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, - values_out, gradients_out, hessians_out); - case 4: - return evaluate_wedge_fused_stride4_q4<4, NeedHess>( - simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, - values_out, gradients_out, hessians_out); - case 5: - return evaluate_wedge_fused_stride4_q4<5, NeedHess>( - simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, - values_out, gradients_out, hessians_out); - case 6: - return evaluate_wedge_fused_stride4_q4<6, NeedHess>( - simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, - values_out, gradients_out, hessians_out); - case 7: - return evaluate_wedge_fused_stride4_q4<7, NeedHess>( - simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, - values_out, gradients_out, hessians_out); - case 8: - return evaluate_wedge_fused_stride4_q4<8, NeedHess>( - simplex_exponents, wedge_node_by_tri_z, points, axis_batch, n_axis, - values_out, gradients_out, hessians_out); - default: - return false; - } -} - -struct TensorProductTableScratch { - std::vector vv; - std::vector dv; - std::vector vd; - std::vector d2v; - std::vector vd2; - std::vector dd; - - void resizeFor(std::size_t count) { - if (vv.size() < count) vv.resize(count); - if (dv.size() < count) dv.resize(count); - if (vd.size() < count) vd.resize(count); - if (d2v.size() < count) d2v.resize(count); - if (vd2.size() < count) vd2.resize(count); - if (dd.size() < count) dd.resize(count); - } -}; - -// Caller-provided scratch buffers used by tensor-product evaluation. Three -// independent axes plus reusable simplex/wedge intermediates. -struct LagrangeEvaluateScratch { - AxisScratch axis_x; - AxisScratch axis_y; - AxisScratch axis_z; - AxisBatchScratch axis_x_batch; - AxisBatchScratch axis_y_batch; - AxisBatchScratch axis_z_batch; - TensorProductTableScratch tensor_tables; - - std::vector tri_values; - std::vector tri_gradients; - std::vector tri_hessians; - std::vector tri_gradient_components; - std::vector tri_hessian_components; - std::vector wedge_tri_values_batch; - std::vector wedge_tri_gradient_batch; - std::vector wedge_tri_hessian_batch; - - std::vector strided_values_tmp; - std::vector strided_gradients_tmp; - std::vector strided_hessians_tmp; - - void prewarm(int max_order, std::size_t max_qpts) { - const int clamped_order = std::max(max_order, 0); - const std::size_t axis_size = static_cast(clamped_order) + 1u; - const std::size_t axis_batch_size = axis_size * max_qpts; - const std::size_t tensor_table_size = - axis_size * axis_size * std::max(max_qpts, 1u); - const std::size_t tensor_dofs = tensor_table_size * axis_size; - const std::size_t tri_count = axis_size * (axis_size + 1u) / 2u; - - axis_x.reserveFor(axis_size); - axis_y.reserveFor(axis_size); - axis_z.reserveFor(axis_size); - axis_x_batch.resizeFor(axis_batch_size, AxisDeriv::ValuesAndFirstAndSecond); - axis_y_batch.resizeFor(axis_batch_size, AxisDeriv::ValuesAndFirstAndSecond); - axis_z_batch.resizeFor(axis_batch_size, AxisDeriv::ValuesAndFirstAndSecond); - tensor_tables.resizeFor(tensor_table_size); - tri_values.reserve(tri_count); - tri_gradients.reserve(tri_count); - tri_hessians.reserve(tri_count); - tri_gradient_components.reserve(tri_count * 3u); - tri_hessian_components.reserve(tri_count * 9u); - wedge_tri_values_batch.reserve(tri_count * max_qpts); - wedge_tri_gradient_batch.reserve(tri_count * 3u * max_qpts); - wedge_tri_hessian_batch.reserve(tri_count * 9u * max_qpts); - strided_values_tmp.reserve(tensor_dofs); - strided_gradients_tmp.reserve(tensor_dofs * 3u); - strided_hessians_tmp.reserve(tensor_dofs * 9u); - } -}; - -LagrangeEvaluateScratch& evaluate_scratch() { - // Scratch is intentionally thread-local: assembly and benchmark callers run - // evaluation on persistent worker threads, so capacity is reused by thread. - static thread_local LagrangeEvaluateScratch s; - return s; -} - -// Fill axis scratch and return a non-owning view. Uncomputed slots still have -// valid pointers to scratch storage (they may hold stale data) — callers must -// only read the slots they requested via `level`. Common low orders use -// precomputed Horner coefficients; high orders use barycentric axis evaluation. -AxisBasisEvaluations fill_axis_scratch(AxisScratch& s, - const Real* v_coeffs, - const Real* d_coeffs, - const Real* d2_coeffs, - const Real* barycentric_weights, - int n_axis, Real xi, - AxisDeriv level) { - const std::size_t n = static_cast(n_axis); - s.reserveFor(n); - Real* first = (level == AxisDeriv::ValuesOnly) ? nullptr : s.first.data(); - Real* second = (level == AxisDeriv::ValuesAndFirstAndSecond) ? s.second.data() : nullptr; - evaluate_1d_basis_to(v_coeffs, d_coeffs, d2_coeffs, barycentric_weights, - n_axis, xi, s.values.data(), first, second); - return AxisBasisEvaluations{s.values.data(), s.first.data(), s.second.data(), n}; -} - -void fill_axis_batch(AxisBatchScratch& scratch, - const std::vector>& points, - std::size_t component, - const Real* v_coeffs, - const Real* d_coeffs, - const Real* d2_coeffs, - const Real* barycentric_weights, - int n_axis, - AxisDeriv level) { - const std::size_t count = points.size() * static_cast(n_axis); - scratch.resizeFor(count, level); - Real* first = (level == AxisDeriv::ValuesOnly) ? nullptr : scratch.first.data(); - Real* second = (level == AxisDeriv::ValuesAndFirstAndSecond) ? scratch.second.data() : nullptr; - const std::size_t axis_stride = static_cast(n_axis); - for (std::size_t q = 0; q < points.size(); ++q) { - evaluate_1d_basis_to(v_coeffs, d_coeffs, d2_coeffs, barycentric_weights, n_axis, - points[q][component], - scratch.values.data() + q * axis_stride, - first ? first + q * axis_stride : nullptr, - second ? second + q * axis_stride : nullptr); - } -} - -// Maximum yz-table footprint that fits comfortably on the stack for the -// Lagrange performance sweep. Order-8 hex q=4 needs 4*(9x9) entries per table. -// Higher orders fall back to thread_local heap buffers. -inline constexpr std::size_t kMaxStackYZ = 384; - -struct TensorProductVectorSink { - std::vector* values; - std::vector* gradients; - std::vector* hessians; - - bool wants_values() const noexcept { return values != nullptr; } - bool wants_gradients() const noexcept { return gradients != nullptr; } - bool wants_hessians() const noexcept { return hessians != nullptr; } - - void prepare(std::size_t n_nodes) const { - if (values) values->resize(n_nodes); - if (gradients) gradients->resize(n_nodes); - if (hessians) hessians->resize(n_nodes); - } - - void write_value(std::size_t n, Real value) const { - (*values)[n] = value; - } - - void write_gradient(std::size_t n, Real dx, Real dy, Real dz) const { - auto& g = (*gradients)[n]; - g[0] = dx; - g[1] = dy; - g[2] = dz; - } - - void write_hessian(std::size_t n, - Real xx, - Real yy, - Real zz, - Real xy, - Real xz, - Real yz) const { - (*hessians)[n] = make_symmetric_hessian(xx, yy, zz, xy, xz, yz); - } -}; - -struct TensorProductRawSink { - Real* values; - Real* gradients; - Real* hessians; - - bool wants_values() const noexcept { return values != nullptr; } - bool wants_gradients() const noexcept { return gradients != nullptr; } - bool wants_hessians() const noexcept { return hessians != nullptr; } - - void prepare(std::size_t) const {} - - void write_value(std::size_t n, Real value) const { - values[n] = value; - } - - void write_gradient(std::size_t n, Real dx, Real dy, Real dz) const { - Real* g = gradients + n * 3u; - g[0] = dx; - g[1] = dy; - g[2] = dz; - } - - void write_hessian(std::size_t n, - Real xx, - Real yy, - Real zz, - Real xy, - Real xz, - Real yz) const { - Real* H = hessians + n * 9u; - H[0] = xx; - H[4] = yy; - H[8] = zz; - H[1] = xy; H[3] = xy; - H[2] = xz; H[6] = xz; - H[5] = yz; H[7] = yz; - } -}; - -// Fused sum-factorized tensor-product evaluator. -// -// Precomputes one to six (ny x nz)-shaped tables of partial products -// `M_xy[j*nz + k]` so that the inner per-node loop performs at most one -// multiplication per output instead of two. With all three output buffers -// supplied, this is the fused values + gradients + hessians path that shares -// every per-axis evaluation. -// -// Per-node multiply count (vs. the unfactored variants): -// values only : 1 (was 2) -// gradients only : 3 (was 6) -// hessians only : 6 (was 12) -// all three : 10 (was 20) -// -// Dimensional scope: works uniformly for Line/Quadrilateral/Hexahedron with -// the unused axes' size folded to 1 via constant_axis_basis(). -template -void evaluate_tensor_product_factorized_impl( - const std::vector>& tensor_indices, - const AxisBasisEvaluations& x_axis, - const AxisBasisEvaluations& y_axis, - const AxisBasisEvaluations& z_axis, - const Sink& sink) { - const std::size_t ny = y_axis.size; - const std::size_t nz = z_axis.size; - const std::size_t nyz = ny * nz; - const bool need_values = sink.wants_values(); - const bool need_grad = sink.wants_gradients(); - const bool need_hess = sink.wants_hessians(); - - Real Mvv_stack[kMaxStackYZ]; - Real Mdv_stack[kMaxStackYZ]; - Real Mvd_stack[kMaxStackYZ]; - Real Md2v_stack[kMaxStackYZ]; - Real Mvd2_stack[kMaxStackYZ]; - Real Mdd_stack[kMaxStackYZ]; - - Real* Mvv; - Real* Mdv; - Real* Mvd; - Real* Md2v; - Real* Mvd2; - Real* Mdd; - if (nyz <= kMaxStackYZ) { - Mvv = Mvv_stack; - Mdv = Mdv_stack; - Mvd = Mvd_stack; - Md2v = Md2v_stack; - Mvd2 = Mvd2_stack; - Mdd = Mdd_stack; - } else { - auto& tables = evaluate_scratch().tensor_tables; - tables.resizeFor(nyz); - Mvv = tables.vv.data(); - Mdv = tables.dv.data(); - Mvd = tables.vd.data(); - Md2v = tables.d2v.data(); - Mvd2 = tables.vd2.data(); - Mdd = tables.dd.data(); - } - - // M_vv is required by every output (values, ∂ξ, ∂ξ²). - for (std::size_t j = 0; j < ny; ++j) { - const Real yv = y_axis.values[j]; - for (std::size_t k = 0; k < nz; ++k) { - Mvv[j * nz + k] = yv * z_axis.values[k]; - } - } - - if (need_grad || need_hess) { - for (std::size_t j = 0; j < ny; ++j) { - const Real yv = y_axis.values[j]; - const Real yd = y_axis.first[j]; - for (std::size_t k = 0; k < nz; ++k) { - Mdv[j * nz + k] = yd * z_axis.values[k]; - Mvd[j * nz + k] = yv * z_axis.first[k]; - } - } - } - - if (need_hess) { - for (std::size_t j = 0; j < ny; ++j) { - const Real yv = y_axis.values[j]; - const Real yd = y_axis.first[j]; - const Real yd2 = y_axis.second[j]; - for (std::size_t k = 0; k < nz; ++k) { - Md2v[j * nz + k] = yd2 * z_axis.values[k]; - Mvd2[j * nz + k] = yv * z_axis.second[k]; - Mdd[j * nz + k] = yd * z_axis.first[k]; - } - } - } - - const std::size_t n_nodes = tensor_indices.size(); - sink.prepare(n_nodes); - - for (std::size_t n = 0; n < n_nodes; ++n) { - const auto& idx = tensor_indices[n]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - - const Real Lx = x_axis.values[i]; - - if (need_values) { - sink.write_value(n, Lx * Mvv[jk]); - } - - if (need_grad) { - const Real dLx = x_axis.first[i]; - sink.write_gradient(n, - dLx * Mvv[jk], - Lx * Mdv[jk], - Lx * Mvd[jk]); - } - - if (need_hess) { - const Real dLx = x_axis.first[i]; - const Real d2Lx = x_axis.second[i]; - sink.write_hessian(n, - d2Lx * Mvv[jk], - Lx * Md2v[jk], - Lx * Mvd2[jk], - dLx * Mdv[jk], - dLx * Mvd[jk], - Lx * Mdd[jk]); - } - } -} - -void evaluate_tensor_product_factorized( - const std::vector>& tensor_indices, - const AxisBasisEvaluations& x_axis, - const AxisBasisEvaluations& y_axis, - const AxisBasisEvaluations& z_axis, - std::vector* values_out, - std::vector* gradients_out, - std::vector* hessians_out) { - const TensorProductVectorSink sink{values_out, gradients_out, hessians_out}; - evaluate_tensor_product_factorized_impl(tensor_indices, x_axis, y_axis, z_axis, sink); -} - -void evaluate_tensor_product_factorized_to( - const std::vector>& tensor_indices, - const AxisBasisEvaluations& x_axis, - const AxisBasisEvaluations& y_axis, - const AxisBasisEvaluations& z_axis, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const TensorProductRawSink sink{values_out, gradients_out, hessians_out}; - evaluate_tensor_product_factorized_impl(tensor_indices, x_axis, y_axis, z_axis, sink); -} - -template -inline void write_tensor_product_value_strided_q( - std::size_t axis_stride, - std::size_t nyz, - std::size_t i, - std::size_t jk, - const AxisBatchScratch& x_batch, - const Real* SVMP_RESTRICT Mvv, - Real* SVMP_RESTRICT value_row) { - const std::size_t q_axis = Q * axis_stride; - const std::size_t slot = Q * nyz + jk; - value_row[Q] = x_batch.values[q_axis + i] * Mvv[slot]; -} - -template -inline void write_tensor_product_hessian_strided_q( - std::size_t axis_stride, - std::size_t nyz, - std::size_t i, - std::size_t jk, - std::size_t output_stride, - const AxisBatchScratch& x_batch, - const Real* SVMP_RESTRICT Mvv, - const Real* SVMP_RESTRICT Mdv, - const Real* SVMP_RESTRICT Mvd, - const Real* SVMP_RESTRICT Md2v, - const Real* SVMP_RESTRICT Mvd2, - const Real* SVMP_RESTRICT Mdd, - Real* SVMP_RESTRICT hess_row) { - const std::size_t q_axis = Q * axis_stride; - const std::size_t slot = Q * nyz + jk; - const Real xv = x_batch.values[q_axis + i]; - const Real xd = x_batch.first[q_axis + i]; - const Real x2 = x_batch.second[q_axis + i]; - const Real hxy = xd * Mdv[slot]; - const Real hxz = xd * Mvd[slot]; - const Real hyz = xv * Mdd[slot]; - hess_row[0u * output_stride + Q] = x2 * Mvv[slot]; - hess_row[4u * output_stride + Q] = xv * Md2v[slot]; - hess_row[8u * output_stride + Q] = xv * Mvd2[slot]; - hess_row[1u * output_stride + Q] = hxy; - hess_row[3u * output_stride + Q] = hxy; - hess_row[2u * output_stride + Q] = hxz; - hess_row[6u * output_stride + Q] = hxz; - hess_row[5u * output_stride + Q] = hyz; - hess_row[7u * output_stride + Q] = hyz; -} - -template -inline void write_tensor_product_hessian_stride4_q( - std::size_t axis_stride, - std::size_t nyz, - std::size_t i, - std::size_t jk, - const AxisBatchScratch& x_batch, - const Real* SVMP_RESTRICT Mvv, - const Real* SVMP_RESTRICT Mdv, - const Real* SVMP_RESTRICT Mvd, - const Real* SVMP_RESTRICT Md2v, - const Real* SVMP_RESTRICT Mvd2, - const Real* SVMP_RESTRICT Mdd, - Real* SVMP_RESTRICT hess_row) { - const std::size_t q_axis = Q * axis_stride; - const std::size_t slot = Q * nyz + jk; - const Real xv = x_batch.values[q_axis + i]; - const Real xd = x_batch.first[q_axis + i]; - const Real x2 = x_batch.second[q_axis + i]; - const Real hxy = xd * Mdv[slot]; - const Real hxz = xd * Mvd[slot]; - const Real hyz = xv * Mdd[slot]; - hess_row[Q] = x2 * Mvv[slot]; - hess_row[16u + Q] = xv * Md2v[slot]; - hess_row[32u + Q] = xv * Mvd2[slot]; - hess_row[4u + Q] = hxy; - hess_row[12u + Q] = hxy; - hess_row[8u + Q] = hxz; - hess_row[24u + Q] = hxz; - hess_row[20u + Q] = hyz; - hess_row[28u + Q] = hyz; -} - -template -inline void write_tensor_product_gradient_strided_q( - std::size_t axis_stride, - std::size_t nyz, - std::size_t i, - std::size_t jk, - std::size_t output_stride, - const AxisBatchScratch& x_batch, - const Real* SVMP_RESTRICT Mvv, - const Real* SVMP_RESTRICT Mdv, - const Real* SVMP_RESTRICT Mvd, - Real* SVMP_RESTRICT grad_row) { - const std::size_t q_axis = Q * axis_stride; - const std::size_t slot = Q * nyz + jk; - const Real xv = x_batch.values[q_axis + i]; - const Real xd = x_batch.first[q_axis + i]; - grad_row[0u * output_stride + Q] = xd * Mvv[slot]; - grad_row[1u * output_stride + Q] = xv * Mdv[slot]; - grad_row[2u * output_stride + Q] = xv * Mvd[slot]; -} - -template -inline void write_tensor_product_gradient_stride4_q( - std::size_t axis_stride, - std::size_t nyz, - std::size_t i, - std::size_t jk, - const AxisBatchScratch& x_batch, - const Real* SVMP_RESTRICT Mvv, - const Real* SVMP_RESTRICT Mdv, - const Real* SVMP_RESTRICT Mvd, - Real* SVMP_RESTRICT grad_row) { - const std::size_t q_axis = Q * axis_stride; - const std::size_t slot = Q * nyz + jk; - const Real xv = x_batch.values[q_axis + i]; - const Real xd = x_batch.first[q_axis + i]; - grad_row[Q] = xd * Mvv[slot]; - grad_row[4u + Q] = xv * Mdv[slot]; - grad_row[8u + Q] = xv * Mvd[slot]; -} - -template -inline void write_tensor_product_all_strided_q( - std::size_t axis_stride, - std::size_t nyz, - std::size_t i, - std::size_t jk, - std::size_t output_stride, - const AxisBatchScratch& x_batch, - const Real* SVMP_RESTRICT Mvv, - const Real* SVMP_RESTRICT Mdv, - const Real* SVMP_RESTRICT Mvd, - const Real* SVMP_RESTRICT Md2v, - const Real* SVMP_RESTRICT Mvd2, - const Real* SVMP_RESTRICT Mdd, - Real* SVMP_RESTRICT value_row, - Real* SVMP_RESTRICT grad_row, - Real* SVMP_RESTRICT hess_row) { - const std::size_t q_axis = Q * axis_stride; - const std::size_t slot = Q * nyz + jk; - const Real xv = x_batch.values[q_axis + i]; - const Real xd = x_batch.first[q_axis + i]; - value_row[Q] = xv * Mvv[slot]; - grad_row[0u * output_stride + Q] = xd * Mvv[slot]; - grad_row[1u * output_stride + Q] = xv * Mdv[slot]; - grad_row[2u * output_stride + Q] = xv * Mvd[slot]; - - const Real x2 = x_batch.second[q_axis + i]; - const Real hxy = xd * Mdv[slot]; - const Real hxz = xd * Mvd[slot]; - const Real hyz = xv * Mdd[slot]; - hess_row[0u * output_stride + Q] = x2 * Mvv[slot]; - hess_row[4u * output_stride + Q] = xv * Md2v[slot]; - hess_row[8u * output_stride + Q] = xv * Mvd2[slot]; - hess_row[1u * output_stride + Q] = hxy; - hess_row[3u * output_stride + Q] = hxy; - hess_row[2u * output_stride + Q] = hxz; - hess_row[6u * output_stride + Q] = hxz; - hess_row[5u * output_stride + Q] = hyz; - hess_row[7u * output_stride + Q] = hyz; -} - -template -inline void write_tensor_product_all_stride4_q( - std::size_t axis_stride, - std::size_t nyz, - std::size_t i, - std::size_t jk, - const AxisBatchScratch& x_batch, - const Real* SVMP_RESTRICT Mvv, - const Real* SVMP_RESTRICT Mdv, - const Real* SVMP_RESTRICT Mvd, - const Real* SVMP_RESTRICT Md2v, - const Real* SVMP_RESTRICT Mvd2, - const Real* SVMP_RESTRICT Mdd, - Real* SVMP_RESTRICT value_row, - Real* SVMP_RESTRICT grad_row, - Real* SVMP_RESTRICT hess_row) { - const std::size_t q_axis = Q * axis_stride; - const std::size_t slot = Q * nyz + jk; - const Real xv = x_batch.values[q_axis + i]; - const Real xd = x_batch.first[q_axis + i]; - value_row[Q] = xv * Mvv[slot]; - grad_row[Q] = xd * Mvv[slot]; - grad_row[4u + Q] = xv * Mdv[slot]; - grad_row[8u + Q] = xv * Mvd[slot]; - - const Real x2 = x_batch.second[q_axis + i]; - const Real hxy = xd * Mdv[slot]; - const Real hxz = xd * Mvd[slot]; - const Real hyz = xv * Mdd[slot]; - hess_row[Q] = x2 * Mvv[slot]; - hess_row[16u + Q] = xv * Md2v[slot]; - hess_row[32u + Q] = xv * Mvd2[slot]; - hess_row[4u + Q] = hxy; - hess_row[12u + Q] = hxy; - hess_row[8u + Q] = hxz; - hess_row[24u + Q] = hxz; - hess_row[20u + Q] = hyz; - hess_row[28u + Q] = hyz; -} - -SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 bool -evaluate_tensor_product_values_stride4_q4_transposed( - const std::vector>& tensor_indices, - std::size_t axis_stride, - const AxisBatchScratch& x_batch, - const AxisBatchScratch& y_batch, - const AxisBatchScratch& z_batch, - Real* SVMP_RESTRICT values_out) { - const std::size_t nyz = axis_stride * axis_stride; - const std::size_t table_count = 4u * nyz; - if (table_count > kMaxStackYZ || values_out == nullptr) { - return false; - } - - Real Mvv_stack[kMaxStackYZ]; - for (std::size_t j = 0; j < axis_stride; ++j) { - const Real yv0 = y_batch.values[j]; - const Real yv1 = y_batch.values[axis_stride + j]; - const Real yv2 = y_batch.values[2u * axis_stride + j]; - const Real yv3 = y_batch.values[3u * axis_stride + j]; - for (std::size_t k = 0; k < axis_stride; ++k) { - const std::size_t base = (j * axis_stride + k) * 4u; - Mvv_stack[base + 0u] = yv0 * z_batch.values[k]; - Mvv_stack[base + 1u] = yv1 * z_batch.values[axis_stride + k]; - Mvv_stack[base + 2u] = yv2 * z_batch.values[2u * axis_stride + k]; - Mvv_stack[base + 3u] = yv3 * z_batch.values[3u * axis_stride + k]; - } - } - - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = (idx[1] * axis_stride + idx[2]) * 4u; - Real* SVMP_RESTRICT value_row = values_out + node * 4u; - value_row[0u] = x_batch.values[i] * Mvv_stack[jk + 0u]; - value_row[1u] = x_batch.values[axis_stride + i] * Mvv_stack[jk + 1u]; - value_row[2u] = x_batch.values[2u * axis_stride + i] * Mvv_stack[jk + 2u]; - value_row[3u] = x_batch.values[3u * axis_stride + i] * Mvv_stack[jk + 3u]; - } - - return true; -} - -SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 bool -evaluate_tensor_product_gradients_stride4_q4_transposed( - const std::vector>& tensor_indices, - std::size_t axis_stride, - const AxisBatchScratch& x_batch, - const AxisBatchScratch& y_batch, - const AxisBatchScratch& z_batch, - Real* SVMP_RESTRICT gradients_out) { - const std::size_t nyz = axis_stride * axis_stride; - const std::size_t table_count = 4u * nyz; - if (table_count > kMaxStackYZ || gradients_out == nullptr) { - return false; - } - - Real Mvv_stack[kMaxStackYZ]; - Real Mdv_stack[kMaxStackYZ]; - Real Mvd_stack[kMaxStackYZ]; - for (std::size_t j = 0; j < axis_stride; ++j) { - const Real yv0 = y_batch.values[j]; - const Real yv1 = y_batch.values[axis_stride + j]; - const Real yv2 = y_batch.values[2u * axis_stride + j]; - const Real yv3 = y_batch.values[3u * axis_stride + j]; - const Real yd0 = y_batch.first[j]; - const Real yd1 = y_batch.first[axis_stride + j]; - const Real yd2 = y_batch.first[2u * axis_stride + j]; - const Real yd3 = y_batch.first[3u * axis_stride + j]; - for (std::size_t k = 0; k < axis_stride; ++k) { - const std::size_t base = (j * axis_stride + k) * 4u; - const Real zv0 = z_batch.values[k]; - const Real zv1 = z_batch.values[axis_stride + k]; - const Real zv2 = z_batch.values[2u * axis_stride + k]; - const Real zv3 = z_batch.values[3u * axis_stride + k]; - const Real zd0 = z_batch.first[k]; - const Real zd1 = z_batch.first[axis_stride + k]; - const Real zd2 = z_batch.first[2u * axis_stride + k]; - const Real zd3 = z_batch.first[3u * axis_stride + k]; - - Mvv_stack[base + 0u] = yv0 * zv0; - Mvv_stack[base + 1u] = yv1 * zv1; - Mvv_stack[base + 2u] = yv2 * zv2; - Mvv_stack[base + 3u] = yv3 * zv3; - Mdv_stack[base + 0u] = yd0 * zv0; - Mdv_stack[base + 1u] = yd1 * zv1; - Mdv_stack[base + 2u] = yd2 * zv2; - Mdv_stack[base + 3u] = yd3 * zv3; - Mvd_stack[base + 0u] = yv0 * zd0; - Mvd_stack[base + 1u] = yv1 * zd1; - Mvd_stack[base + 2u] = yv2 * zd2; - Mvd_stack[base + 3u] = yv3 * zd3; - } - } - - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = (idx[1] * axis_stride + idx[2]) * 4u; - - const Real xv0 = x_batch.values[i]; - const Real xv1 = x_batch.values[axis_stride + i]; - const Real xv2 = x_batch.values[2u * axis_stride + i]; - const Real xv3 = x_batch.values[3u * axis_stride + i]; - const Real xd0 = x_batch.first[i]; - const Real xd1 = x_batch.first[axis_stride + i]; - const Real xd2 = x_batch.first[2u * axis_stride + i]; - const Real xd3 = x_batch.first[3u * axis_stride + i]; - - Real* SVMP_RESTRICT grad_row = gradients_out + node * 12u; - grad_row[0u] = xd0 * Mvv_stack[jk + 0u]; - grad_row[1u] = xd1 * Mvv_stack[jk + 1u]; - grad_row[2u] = xd2 * Mvv_stack[jk + 2u]; - grad_row[3u] = xd3 * Mvv_stack[jk + 3u]; - grad_row[4u] = xv0 * Mdv_stack[jk + 0u]; - grad_row[5u] = xv1 * Mdv_stack[jk + 1u]; - grad_row[6u] = xv2 * Mdv_stack[jk + 2u]; - grad_row[7u] = xv3 * Mdv_stack[jk + 3u]; - grad_row[8u] = xv0 * Mvd_stack[jk + 0u]; - grad_row[9u] = xv1 * Mvd_stack[jk + 1u]; - grad_row[10u] = xv2 * Mvd_stack[jk + 2u]; - grad_row[11u] = xv3 * Mvd_stack[jk + 3u]; - } - - return true; -} - -template -SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 bool -evaluate_tensor_product_second_stride4_q4_transposed( - const std::vector>& tensor_indices, - std::size_t axis_stride, - const AxisBatchScratch& x_batch, - const AxisBatchScratch& y_batch, - const AxisBatchScratch& z_batch, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const std::size_t nyz = axis_stride * axis_stride; - const std::size_t table_count = 4u * nyz; - if (table_count > kMaxStackYZ || hessians_out == nullptr) { - return false; - } - if constexpr (NeedAllOutputs) { - if (values_out == nullptr || gradients_out == nullptr) { - return false; - } - } - - Real Mvv_stack[kMaxStackYZ]; - Real Mdv_stack[kMaxStackYZ]; - Real Mvd_stack[kMaxStackYZ]; - Real Md2v_stack[kMaxStackYZ]; - Real Mvd2_stack[kMaxStackYZ]; - Real Mdd_stack[kMaxStackYZ]; - - for (std::size_t j = 0; j < axis_stride; ++j) { - const Real yv0 = y_batch.values[j]; - const Real yv1 = y_batch.values[axis_stride + j]; - const Real yv2 = y_batch.values[2u * axis_stride + j]; - const Real yv3 = y_batch.values[3u * axis_stride + j]; - const Real yd0 = y_batch.first[j]; - const Real yd1 = y_batch.first[axis_stride + j]; - const Real yd2 = y_batch.first[2u * axis_stride + j]; - const Real yd3 = y_batch.first[3u * axis_stride + j]; - const Real y20 = y_batch.second[j]; - const Real y21 = y_batch.second[axis_stride + j]; - const Real y22 = y_batch.second[2u * axis_stride + j]; - const Real y23 = y_batch.second[3u * axis_stride + j]; - - for (std::size_t k = 0; k < axis_stride; ++k) { - const std::size_t base = (j * axis_stride + k) * 4u; - const Real zv0 = z_batch.values[k]; - const Real zv1 = z_batch.values[axis_stride + k]; - const Real zv2 = z_batch.values[2u * axis_stride + k]; - const Real zv3 = z_batch.values[3u * axis_stride + k]; - const Real zd0 = z_batch.first[k]; - const Real zd1 = z_batch.first[axis_stride + k]; - const Real zd2 = z_batch.first[2u * axis_stride + k]; - const Real zd3 = z_batch.first[3u * axis_stride + k]; - const Real z20 = z_batch.second[k]; - const Real z21 = z_batch.second[axis_stride + k]; - const Real z22 = z_batch.second[2u * axis_stride + k]; - const Real z23 = z_batch.second[3u * axis_stride + k]; - - Mvv_stack[base + 0u] = yv0 * zv0; - Mvv_stack[base + 1u] = yv1 * zv1; - Mvv_stack[base + 2u] = yv2 * zv2; - Mvv_stack[base + 3u] = yv3 * zv3; - Mdv_stack[base + 0u] = yd0 * zv0; - Mdv_stack[base + 1u] = yd1 * zv1; - Mdv_stack[base + 2u] = yd2 * zv2; - Mdv_stack[base + 3u] = yd3 * zv3; - Mvd_stack[base + 0u] = yv0 * zd0; - Mvd_stack[base + 1u] = yv1 * zd1; - Mvd_stack[base + 2u] = yv2 * zd2; - Mvd_stack[base + 3u] = yv3 * zd3; - Md2v_stack[base + 0u] = y20 * zv0; - Md2v_stack[base + 1u] = y21 * zv1; - Md2v_stack[base + 2u] = y22 * zv2; - Md2v_stack[base + 3u] = y23 * zv3; - Mvd2_stack[base + 0u] = yv0 * z20; - Mvd2_stack[base + 1u] = yv1 * z21; - Mvd2_stack[base + 2u] = yv2 * z22; - Mvd2_stack[base + 3u] = yv3 * z23; - Mdd_stack[base + 0u] = yd0 * zd0; - Mdd_stack[base + 1u] = yd1 * zd1; - Mdd_stack[base + 2u] = yd2 * zd2; - Mdd_stack[base + 3u] = yd3 * zd3; - } - } - - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = (idx[1] * axis_stride + idx[2]) * 4u; - - const Real xv0 = x_batch.values[i]; - const Real xv1 = x_batch.values[axis_stride + i]; - const Real xv2 = x_batch.values[2u * axis_stride + i]; - const Real xv3 = x_batch.values[3u * axis_stride + i]; - const Real xd0 = x_batch.first[i]; - const Real xd1 = x_batch.first[axis_stride + i]; - const Real xd2 = x_batch.first[2u * axis_stride + i]; - const Real xd3 = x_batch.first[3u * axis_stride + i]; - const Real x20 = x_batch.second[i]; - const Real x21 = x_batch.second[axis_stride + i]; - const Real x22 = x_batch.second[2u * axis_stride + i]; - const Real x23 = x_batch.second[3u * axis_stride + i]; - - const Real mvv0 = Mvv_stack[jk + 0u]; - const Real mvv1 = Mvv_stack[jk + 1u]; - const Real mvv2 = Mvv_stack[jk + 2u]; - const Real mvv3 = Mvv_stack[jk + 3u]; - const Real mdv0 = Mdv_stack[jk + 0u]; - const Real mdv1 = Mdv_stack[jk + 1u]; - const Real mdv2 = Mdv_stack[jk + 2u]; - const Real mdv3 = Mdv_stack[jk + 3u]; - const Real mvd0 = Mvd_stack[jk + 0u]; - const Real mvd1 = Mvd_stack[jk + 1u]; - const Real mvd2 = Mvd_stack[jk + 2u]; - const Real mvd3 = Mvd_stack[jk + 3u]; - const Real md2v0 = Md2v_stack[jk + 0u]; - const Real md2v1 = Md2v_stack[jk + 1u]; - const Real md2v2 = Md2v_stack[jk + 2u]; - const Real md2v3 = Md2v_stack[jk + 3u]; - const Real mvd20 = Mvd2_stack[jk + 0u]; - const Real mvd21 = Mvd2_stack[jk + 1u]; - const Real mvd22 = Mvd2_stack[jk + 2u]; - const Real mvd23 = Mvd2_stack[jk + 3u]; - const Real mdd0 = Mdd_stack[jk + 0u]; - const Real mdd1 = Mdd_stack[jk + 1u]; - const Real mdd2 = Mdd_stack[jk + 2u]; - const Real mdd3 = Mdd_stack[jk + 3u]; - - if constexpr (NeedAllOutputs) { - Real* SVMP_RESTRICT value_row = values_out + node * 4u; - value_row[0u] = xv0 * mvv0; - value_row[1u] = xv1 * mvv1; - value_row[2u] = xv2 * mvv2; - value_row[3u] = xv3 * mvv3; - - Real* SVMP_RESTRICT grad_row = gradients_out + node * 12u; - grad_row[0u] = xd0 * mvv0; - grad_row[1u] = xd1 * mvv1; - grad_row[2u] = xd2 * mvv2; - grad_row[3u] = xd3 * mvv3; - grad_row[4u] = xv0 * mdv0; - grad_row[5u] = xv1 * mdv1; - grad_row[6u] = xv2 * mdv2; - grad_row[7u] = xv3 * mdv3; - grad_row[8u] = xv0 * mvd0; - grad_row[9u] = xv1 * mvd1; - grad_row[10u] = xv2 * mvd2; - grad_row[11u] = xv3 * mvd3; - } - - const Real hxy0 = xd0 * mdv0; - const Real hxy1 = xd1 * mdv1; - const Real hxy2 = xd2 * mdv2; - const Real hxy3 = xd3 * mdv3; - const Real hxz0 = xd0 * mvd0; - const Real hxz1 = xd1 * mvd1; - const Real hxz2 = xd2 * mvd2; - const Real hxz3 = xd3 * mvd3; - const Real hyz0 = xv0 * mdd0; - const Real hyz1 = xv1 * mdd1; - const Real hyz2 = xv2 * mdd2; - const Real hyz3 = xv3 * mdd3; - - Real* SVMP_RESTRICT hess_row = hessians_out + node * 36u; - hess_row[0u] = x20 * mvv0; - hess_row[1u] = x21 * mvv1; - hess_row[2u] = x22 * mvv2; - hess_row[3u] = x23 * mvv3; - hess_row[4u] = hxy0; - hess_row[5u] = hxy1; - hess_row[6u] = hxy2; - hess_row[7u] = hxy3; - hess_row[8u] = hxz0; - hess_row[9u] = hxz1; - hess_row[10u] = hxz2; - hess_row[11u] = hxz3; - hess_row[12u] = hxy0; - hess_row[13u] = hxy1; - hess_row[14u] = hxy2; - hess_row[15u] = hxy3; - hess_row[16u] = xv0 * md2v0; - hess_row[17u] = xv1 * md2v1; - hess_row[18u] = xv2 * md2v2; - hess_row[19u] = xv3 * md2v3; - hess_row[20u] = hyz0; - hess_row[21u] = hyz1; - hess_row[22u] = hyz2; - hess_row[23u] = hyz3; - hess_row[24u] = hxz0; - hess_row[25u] = hxz1; - hess_row[26u] = hxz2; - hess_row[27u] = hxz3; - hess_row[28u] = hyz0; - hess_row[29u] = hyz1; - hess_row[30u] = hyz2; - hess_row[31u] = hyz3; - hess_row[32u] = xv0 * mvd20; - hess_row[33u] = xv1 * mvd21; - hess_row[34u] = xv2 * mvd22; - hess_row[35u] = xv3 * mvd23; - } - - return true; -} - -template -constexpr std::size_t line_public_axis_index(std::size_t node) noexcept { - return node == 0u ? 0u : (node == 1u ? static_cast(N - 1) : node - 1u); -} - -template -constexpr std::array make_line_axis_inv_denoms() noexcept { - std::array inv_denoms{}; - for (int i = 0; i < N; ++i) { - Real denom = Real(1); - for (int j = 0; j < N; ++j) { - if (j != i) { - denom *= static_cast(i - j); - } - } - inv_denoms[static_cast(i)] = Real(1) / denom; - } - return inv_denoms; -} - -template -void fill_line_values_product(Real x, Real* SVMP_RESTRICT values) { - static constexpr auto inv_denoms = make_line_axis_inv_denoms(); - const Real p = static_cast(N - 1); - const Real r = (x + Real(1)) * p * Real(0.5); - Real prefix[N]; - Real suffix[N]; - prefix[0] = Real(1); - for (int i = 1; i < N; ++i) { - prefix[i] = prefix[i - 1] * (r - static_cast(i - 1)); - } - suffix[N - 1] = Real(1); - for (int i = N - 2; i >= 0; --i) { - suffix[i] = suffix[i + 1] * (r - static_cast(i + 1)); - } - for (int i = 0; i < N; ++i) { - const std::size_t slot = static_cast(i); - values[slot] = prefix[i] * suffix[i] * inv_denoms[slot]; - } -} - -template -void fill_line_values_product_derivatives(Real x, - Real* SVMP_RESTRICT values, - Real* SVMP_RESTRICT first, - Real* SVMP_RESTRICT second) { - static constexpr auto inv_denoms = make_line_axis_inv_denoms(); - const Real p = static_cast(N - 1); - const Real drdx = p * Real(0.5); - const Real d2rdx2 = drdx * drdx; - const Real r = (x + Real(1)) * drdx; - - Real prefix[N + 1]; - Real prefix_d1[N + 1]; - Real prefix_d2[N + 1]; - Real suffix[N + 1]; - Real suffix_d1[N + 1]; - Real suffix_d2[N + 1]; - - const bool need_second = second != nullptr; - - prefix[0] = Real(1); - prefix_d1[0] = Real(0); - if (need_second) { - prefix_d2[0] = Real(0); - } - for (int i = 0; i < N; ++i) { - const Real factor = r - static_cast(i); - prefix[i + 1] = prefix[i] * factor; - prefix_d1[i + 1] = prefix_d1[i] * factor + prefix[i]; - if (need_second) { - prefix_d2[i + 1] = prefix_d2[i] * factor + Real(2) * prefix_d1[i]; - } - } - - suffix[N] = Real(1); - suffix_d1[N] = Real(0); - if (need_second) { - suffix_d2[N] = Real(0); - } - for (int i = N - 1; i >= 0; --i) { - const Real factor = r - static_cast(i); - suffix[i] = suffix[i + 1] * factor; - suffix_d1[i] = suffix_d1[i + 1] * factor + suffix[i + 1]; - if (need_second) { - suffix_d2[i] = suffix_d2[i + 1] * factor + Real(2) * suffix_d1[i + 1]; - } - } - - for (int i = 0; i < N; ++i) { - const std::size_t slot = static_cast(i); - const Real inv = inv_denoms[slot]; - const Real pre = prefix[i]; - const Real suf = suffix[i + 1]; - const Real pre_d1 = prefix_d1[i]; - const Real suf_d1 = suffix_d1[i + 1]; - values[slot] = pre * suf * inv; - if (first != nullptr) { - first[slot] = (pre_d1 * suf + pre * suf_d1) * inv * drdx; - } - if (second != nullptr) { - const Real d2 = - prefix_d2[i] * suf + - Real(2) * pre_d1 * suf_d1 + - pre * suffix_d2[i + 1]; - second[slot] = d2 * inv * d2rdx2; - } - } -} - -template -void fill_axis_batch_product_q4( - AxisBatchScratch& scratch, - const std::vector>& points, - std::size_t component, - AxisDeriv level) { - constexpr std::size_t axis_stride = static_cast(N); - scratch.resizeFor(4u * axis_stride, level); - for (std::size_t q = 0; q < 4u; ++q) { - Real* values = scratch.values.data() + q * axis_stride; - if (level == AxisDeriv::ValuesOnly) { - fill_line_values_product(points[q][component], values); - } else { - Real* first = scratch.first.data() + q * axis_stride; - Real* second = level == AxisDeriv::ValuesAndFirstAndSecond - ? scratch.second.data() + q * axis_stride - : nullptr; - fill_line_values_product_derivatives( - points[q][component], values, first, second); - } - } -} - -bool try_fill_axis_batch_product_q4( - AxisBatchScratch& scratch, - const std::vector>& points, - std::size_t component, - int n_axis, - AxisDeriv level) { - switch (n_axis) { - case 5: - fill_axis_batch_product_q4<5>(scratch, points, component, level); - return true; - case 6: - fill_axis_batch_product_q4<6>(scratch, points, component, level); - return true; - case 7: - fill_axis_batch_product_q4<7>(scratch, points, component, level); - return true; - case 8: - fill_axis_batch_product_q4<8>(scratch, points, component, level); - return true; - case 9: - fill_axis_batch_product_q4<9>(scratch, points, component, level); - return true; - default: - return false; - } -} - -template -SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 void evaluate_line_values_product_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real q0[N]; - Real q1[N]; - Real q2[N]; - Real q3[N]; - fill_line_values_product(points[0][0], q0); - fill_line_values_product(points[1][0], q1); - fill_line_values_product(points[2][0], q2); - fill_line_values_product(points[3][0], q3); - - for (std::size_t node = 0; node < static_cast(N); ++node) { - const std::size_t i = line_public_axis_index(node); - Real* row = values_out + node * output_stride; - row[0] = q0[i]; - row[1] = q1[i]; - row[2] = q2[i]; - row[3] = q3[i]; - } -} - -FE_ALWAYS_INLINE void write_line_order4_values_q( - Real x, - std::size_t q, - Real* SVMP_RESTRICT row0, - Real* SVMP_RESTRICT row1, - Real* SVMP_RESTRICT row2, - Real* SVMP_RESTRICT row3, - Real* SVMP_RESTRICT row4) { - const Real r = (x + Real(1)) * Real(2); - const Real f0 = r; - const Real f1 = r - Real(1); - const Real f2 = r - Real(2); - const Real f3 = r - Real(3); - const Real f4 = r - Real(4); - const Real f01 = f0 * f1; - const Real f12 = f1 * f2; - const Real f23 = f2 * f3; - const Real f34 = f3 * f4; - const Real v0 = (f12 * f34) / Real(24); - const Real v1 = -(f0 * f2 * f34) / Real(6); - const Real v2 = (f01 * f34) / Real(4); - const Real v3 = -(f01 * f2 * f4) / Real(6); - const Real v4 = (f01 * f23) / Real(24); - row0[q] = v0; - row1[q] = v4; - row2[q] = v1; - row3[q] = v2; - row4[q] = v3; -} - -SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 void evaluate_line_order4_values_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real* row0 = values_out + 0u * output_stride; - Real* row1 = values_out + 1u * output_stride; - Real* row2 = values_out + 2u * output_stride; - Real* row3 = values_out + 3u * output_stride; - Real* row4 = values_out + 4u * output_stride; - write_line_order4_values_q(points[0][0], 0u, row0, row1, row2, row3, row4); - write_line_order4_values_q(points[1][0], 1u, row0, row1, row2, row3, row4); - write_line_order4_values_q(points[2][0], 2u, row0, row1, row2, row3, row4); - write_line_order4_values_q(points[3][0], 3u, row0, row1, row2, row3, row4); -} - -SVMP_LAGRANGE_NOINLINE void evaluate_triangle_order1_gradients_strided( - std::size_t num_qpts, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - Real* SVMP_RESTRICT row0 = gradients_out + 0u * 3u * output_stride; - Real* SVMP_RESTRICT row1 = gradients_out + 1u * 3u * output_stride; - Real* SVMP_RESTRICT row2 = gradients_out + 2u * 3u * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - row0[0u * output_stride + q] = Real(-1); - row0[1u * output_stride + q] = Real(-1); - row0[2u * output_stride + q] = Real(0); - row1[0u * output_stride + q] = Real(1); - row1[1u * output_stride + q] = Real(0); - row1[2u * output_stride + q] = Real(0); - row2[0u * output_stride + q] = Real(0); - row2[1u * output_stride + q] = Real(1); - row2[2u * output_stride + q] = Real(0); - } -} - -template -SVMP_LAGRANGE_NOINLINE void evaluate_line_hessians_product_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT hessians_out) { - Real values[4][N]; - Real second[4][N]; - for (std::size_t q = 0; q < 4u; ++q) { - fill_line_values_product_derivatives( - points[q][0], values[q], nullptr, second[q]); - } - for (std::size_t node = 0; node < static_cast(N); ++node) { - const std::size_t i = line_public_axis_index(node); - write_line_hessian_q4_row(hessians_out + node * 9u * output_stride, - output_stride, - second[0][i], second[1][i], - second[2][i], second[3][i]); - } -} - -template -SVMP_LAGRANGE_NOINLINE void evaluate_line_all_product_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - Real values[4][N]; - Real first[4][N]; - Real second[4][N]; - for (std::size_t q = 0; q < 4u; ++q) { - fill_line_values_product_derivatives( - points[q][0], values[q], first[q], second[q]); - } - for (std::size_t node = 0; node < static_cast(N); ++node) { - const std::size_t i = line_public_axis_index(node); - Real* value_row = values_out + node * output_stride; - value_row[0] = values[0][i]; - value_row[1] = values[1][i]; - value_row[2] = values[2][i]; - value_row[3] = values[3][i]; - write_line_gradient_q4_row(gradients_out + node * 3u * output_stride, - output_stride, - first[0][i], first[1][i], - first[2][i], first[3][i]); - write_line_hessian_q4_row(hessians_out + node * 9u * output_stride, - output_stride, - second[0][i], second[1][i], - second[2][i], second[3][i]); - } -} - -inline void write_quad_product_value_row_q4( - Real* SVMP_RESTRICT row, - const Real* SVMP_RESTRICT x0, - const Real* SVMP_RESTRICT x1, - const Real* SVMP_RESTRICT x2, - const Real* SVMP_RESTRICT x3, - const Real* SVMP_RESTRICT y0, - const Real* SVMP_RESTRICT y1, - const Real* SVMP_RESTRICT y2, - const Real* SVMP_RESTRICT y3, - std::size_t i, - std::size_t j) { - row[0] = x0[i] * y0[j]; - row[1] = x1[i] * y1[j]; - row[2] = x2[i] * y2[j]; - row[3] = x3[i] * y3[j]; -} - -template -void evaluate_quad_values_product_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - Real x0[N]; - Real x1[N]; - Real x2[N]; - Real x3[N]; - Real y0[N]; - Real y1[N]; - Real y2[N]; - Real y3[N]; - fill_line_values_product(points[0][0], x0); - fill_line_values_product(points[1][0], x1); - fill_line_values_product(points[2][0], x2); - fill_line_values_product(points[3][0], x3); - fill_line_values_product(points[0][1], y0); - fill_line_values_product(points[1][1], y1); - fill_line_values_product(points[2][1], y2); - fill_line_values_product(points[3][1], y3); - - constexpr std::size_t p = static_cast(N - 1); - std::size_t node = 0u; - write_quad_product_value_row_q4(values_out + node++ * output_stride, - x0, x1, x2, x3, y0, y1, y2, y3, 0u, 0u); - write_quad_product_value_row_q4(values_out + node++ * output_stride, - x0, x1, x2, x3, y0, y1, y2, y3, p, 0u); - write_quad_product_value_row_q4(values_out + node++ * output_stride, - x0, x1, x2, x3, y0, y1, y2, y3, p, p); - write_quad_product_value_row_q4(values_out + node++ * output_stride, - x0, x1, x2, x3, y0, y1, y2, y3, 0u, p); - - for (std::size_t i = 1u; i < p; ++i) { - write_quad_product_value_row_q4(values_out + node++ * output_stride, - x0, x1, x2, x3, y0, y1, y2, y3, i, 0u); - } - for (std::size_t j = 1u; j < p; ++j) { - write_quad_product_value_row_q4(values_out + node++ * output_stride, - x0, x1, x2, x3, y0, y1, y2, y3, p, j); - } - for (std::size_t i = p - 1u; i > 0u; --i) { - write_quad_product_value_row_q4(values_out + node++ * output_stride, - x0, x1, x2, x3, y0, y1, y2, y3, i, p); - } - for (std::size_t j = p - 1u; j > 0u; --j) { - write_quad_product_value_row_q4(values_out + node++ * output_stride, - x0, x1, x2, x3, y0, y1, y2, y3, 0u, j); - } - for (std::size_t j = 1u; j < p; ++j) { - for (std::size_t i = 1u; i < p; ++i) { - write_quad_product_value_row_q4(values_out + node++ * output_stride, - x0, x1, x2, x3, y0, y1, y2, y3, i, j); - } - } -} - -template -void evaluate_quad_derivatives_product_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const bool need_grad = gradients_out != nullptr; - const bool need_hess = hessians_out != nullptr; - Real xv[4][N]; - Real xd[4][N]; - Real x2[4][N]; - Real yv[4][N]; - Real yd[4][N]; - Real y2[4][N]; - - for (std::size_t q = 0; q < 4u; ++q) { - fill_line_values_product_derivatives( - points[q][0], xv[q], (need_grad || need_hess) ? xd[q] : nullptr, - need_hess ? x2[q] : nullptr); - fill_line_values_product_derivatives( - points[q][1], yv[q], (need_grad || need_hess) ? yd[q] : nullptr, - need_hess ? y2[q] : nullptr); - } - - constexpr std::size_t p = static_cast(N - 1); - std::size_t node = 0u; - auto write_node = [&](std::size_t i, std::size_t j) { - Real* value_row = values_out != nullptr ? values_out + node * output_stride : nullptr; - Real* grad_row = gradients_out != nullptr ? gradients_out + node * 3u * output_stride : nullptr; - Real* hess_row = hessians_out != nullptr ? hessians_out + node * 9u * output_stride : nullptr; - if (grad_row != nullptr) { - grad_row[2u * output_stride + 0u] = Real(0); - grad_row[2u * output_stride + 1u] = Real(0); - grad_row[2u * output_stride + 2u] = Real(0); - grad_row[2u * output_stride + 3u] = Real(0); - } - if (hess_row != nullptr) { - hess_row[2u * output_stride + 0u] = Real(0); - hess_row[2u * output_stride + 1u] = Real(0); - hess_row[2u * output_stride + 2u] = Real(0); - hess_row[2u * output_stride + 3u] = Real(0); - hess_row[5u * output_stride + 0u] = Real(0); - hess_row[5u * output_stride + 1u] = Real(0); - hess_row[5u * output_stride + 2u] = Real(0); - hess_row[5u * output_stride + 3u] = Real(0); - hess_row[6u * output_stride + 0u] = Real(0); - hess_row[6u * output_stride + 1u] = Real(0); - hess_row[6u * output_stride + 2u] = Real(0); - hess_row[6u * output_stride + 3u] = Real(0); - hess_row[7u * output_stride + 0u] = Real(0); - hess_row[7u * output_stride + 1u] = Real(0); - hess_row[7u * output_stride + 2u] = Real(0); - hess_row[7u * output_stride + 3u] = Real(0); - hess_row[8u * output_stride + 0u] = Real(0); - hess_row[8u * output_stride + 1u] = Real(0); - hess_row[8u * output_stride + 2u] = Real(0); - hess_row[8u * output_stride + 3u] = Real(0); - } - for (std::size_t q = 0; q < 4u; ++q) { - const Real x_value = xv[q][i]; - const Real y_value = yv[q][j]; - if (value_row != nullptr) { - value_row[q] = x_value * y_value; - } - if (grad_row != nullptr) { - grad_row[0u * output_stride + q] = xd[q][i] * y_value; - grad_row[1u * output_stride + q] = x_value * yd[q][j]; - } - if (hess_row != nullptr) { - const Real hxy = xd[q][i] * yd[q][j]; - hess_row[0u * output_stride + q] = x2[q][i] * y_value; - hess_row[1u * output_stride + q] = hxy; - hess_row[3u * output_stride + q] = hxy; - hess_row[4u * output_stride + q] = x_value * y2[q][j]; - } - } - ++node; - }; - - write_node(0u, 0u); - write_node(p, 0u); - write_node(p, p); - write_node(0u, p); - for (std::size_t i = 1u; i < p; ++i) { - write_node(i, 0u); - } - for (std::size_t j = 1u; j < p; ++j) { - write_node(p, j); - } - for (std::size_t i = p - 1u; i > 0u; --i) { - write_node(i, p); - } - for (std::size_t j = p - 1u; j > 0u; --j) { - write_node(0u, j); - } - for (std::size_t j = 1u; j < p; ++j) { - for (std::size_t i = 1u; i < p; ++i) { - write_node(i, j); - } - } -} - -SVMP_LAGRANGE_NOINLINE SVMP_LAGRANGE_ALIGN64 void evaluate_quad_order8_gradients_product_q4( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - constexpr int N = 9; - constexpr std::size_t p = 8u; - Real xv[4][N]; - Real xd[4][N]; - Real yv[4][N]; - Real yd[4][N]; - - for (std::size_t q = 0; q < 4u; ++q) { - fill_line_values_product_derivatives(points[q][0], xv[q], xd[q], nullptr); - fill_line_values_product_derivatives(points[q][1], yv[q], yd[q], nullptr); - } - - std::size_t node = 0u; - auto write_node = [&](std::size_t i, std::size_t j) { - Real* SVMP_RESTRICT row = gradients_out + node * 3u * output_stride; - row[0u] = xd[0][i] * yv[0][j]; - row[1u] = xd[1][i] * yv[1][j]; - row[2u] = xd[2][i] * yv[2][j]; - row[3u] = xd[3][i] * yv[3][j]; - row[output_stride + 0u] = xv[0][i] * yd[0][j]; - row[output_stride + 1u] = xv[1][i] * yd[1][j]; - row[output_stride + 2u] = xv[2][i] * yd[2][j]; - row[output_stride + 3u] = xv[3][i] * yd[3][j]; - row[2u * output_stride + 0u] = Real(0); - row[2u * output_stride + 1u] = Real(0); - row[2u * output_stride + 2u] = Real(0); - row[2u * output_stride + 3u] = Real(0); - ++node; - }; - - write_node(0u, 0u); - write_node(p, 0u); - write_node(p, p); - write_node(0u, p); - for (std::size_t i = 1u; i < p; ++i) { - write_node(i, 0u); - } - for (std::size_t j = 1u; j < p; ++j) { - write_node(p, j); - } - for (std::size_t i = p - 1u; i > 0u; --i) { - write_node(i, p); - } - for (std::size_t j = p - 1u; j > 0u; --j) { - write_node(0u, j); - } - for (std::size_t j = 1u; j < p; ++j) { - for (std::size_t i = 1u; i < p; ++i) { - write_node(i, j); - } - } -} - -template -void evaluate_line_gradients_horner_q4( - const std::vector>& points, - std::size_t output_stride, - const Real* SVMP_RESTRICT d_coeffs, - Real* SVMP_RESTRICT gradients_out) { - const Real x0 = points[0][0]; - const Real x1 = points[1][0]; - const Real x2 = points[2][0]; - const Real x3 = points[3][0]; - - for (std::size_t node = 0; node < static_cast(N); ++node) { - const std::size_t i = line_public_axis_index(node); - const Real* c = d_coeffs + i * static_cast(N - 1); - Real r0 = c[N - 2]; - Real r1 = c[N - 2]; - Real r2 = c[N - 2]; - Real r3 = c[N - 2]; - for (int k = N - 2; k > 0; --k) { - const Real ck = c[k - 1]; - r0 = r0 * x0 + ck; - r1 = r1 * x1 + ck; - r2 = r2 * x2 + ck; - r3 = r3 * x3 + ck; - } - Real* row = gradients_out + node * 3u * output_stride; - row[0] = r0; - row[1] = r1; - row[2] = r2; - row[3] = r3; - row[output_stride + 0u] = Real(0); - row[output_stride + 1u] = Real(0); - row[output_stride + 2u] = Real(0); - row[output_stride + 3u] = Real(0); - row[2u * output_stride + 0u] = Real(0); - row[2u * output_stride + 1u] = Real(0); - row[2u * output_stride + 2u] = Real(0); - row[2u * output_stride + 3u] = Real(0); - } -} - -bool try_evaluate_line_values_horner_q4( - const std::vector>& points, - std::size_t output_stride, - const Real* SVMP_RESTRICT v_coeffs, - int n_axis, - Real* SVMP_RESTRICT values_out) { - (void)v_coeffs; - switch (n_axis) { - case 5: - evaluate_line_order4_values_q4(points, output_stride, values_out); - return true; - case 6: - evaluate_line_values_product_q4<6>(points, output_stride, values_out); - return true; - case 7: - evaluate_line_values_product_q4<7>(points, output_stride, values_out); - return true; - case 8: - evaluate_line_values_product_q4<8>(points, output_stride, values_out); - return true; - case 9: - evaluate_line_values_product_q4<9>(points, output_stride, values_out); - return true; - default: - return false; - } -} - -bool try_evaluate_line_gradients_horner_q4( - const std::vector>& points, - std::size_t output_stride, - const Real* SVMP_RESTRICT d_coeffs, - int n_axis, - Real* SVMP_RESTRICT gradients_out) { - switch (n_axis) { - case 5: - evaluate_line_gradients_horner_q4<5>(points, output_stride, d_coeffs, gradients_out); - return true; - case 6: - evaluate_line_gradients_horner_q4<6>(points, output_stride, d_coeffs, gradients_out); - return true; - case 7: - evaluate_line_gradients_horner_q4<7>(points, output_stride, d_coeffs, gradients_out); - return true; - case 8: - evaluate_line_gradients_horner_q4<8>(points, output_stride, d_coeffs, gradients_out); - return true; - case 9: - evaluate_line_gradients_horner_q4<9>(points, output_stride, d_coeffs, gradients_out); - return true; - default: - return false; - } -} - -SVMP_LAGRANGE_NOINLINE bool try_evaluate_line_hessians_product_q4( - const std::vector>& points, - std::size_t output_stride, - int n_axis, - Real* SVMP_RESTRICT hessians_out) { - switch (n_axis) { - case 5: - evaluate_line_hessians_product_q4<5>(points, output_stride, hessians_out); - return true; - case 6: - evaluate_line_hessians_product_q4<6>(points, output_stride, hessians_out); - return true; - case 7: - evaluate_line_hessians_product_q4<7>(points, output_stride, hessians_out); - return true; - case 8: - evaluate_line_hessians_product_q4<8>(points, output_stride, hessians_out); - return true; - case 9: - evaluate_line_hessians_product_q4<9>(points, output_stride, hessians_out); - return true; - default: - return false; - } -} - -SVMP_LAGRANGE_NOINLINE bool try_evaluate_line_all_product_q4( - const std::vector>& points, - std::size_t output_stride, - int n_axis, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - switch (n_axis) { - case 5: - evaluate_line_all_product_q4<5>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 6: - evaluate_line_all_product_q4<6>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 7: - evaluate_line_all_product_q4<7>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 8: - evaluate_line_all_product_q4<8>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 9: - evaluate_line_all_product_q4<9>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - default: - return false; - } -} - -SVMP_LAGRANGE_NOINLINE bool try_evaluate_quad_values_product_q4( - const std::vector>& points, - std::size_t output_stride, - int n_axis, - Real* SVMP_RESTRICT values_out) { - switch (n_axis) { - case 5: - evaluate_quad_values_product_q4<5>(points, output_stride, values_out); - return true; - case 6: - evaluate_quad_values_product_q4<6>(points, output_stride, values_out); - return true; - case 7: - evaluate_quad_values_product_q4<7>(points, output_stride, values_out); - return true; - case 8: - evaluate_quad_values_product_q4<8>(points, output_stride, values_out); - return true; - case 9: - evaluate_quad_values_product_q4<9>(points, output_stride, values_out); - return true; - default: - return false; - } -} - -SVMP_LAGRANGE_NOINLINE bool try_evaluate_quad_derivatives_product_q4( - const std::vector>& points, - std::size_t output_stride, - int n_axis, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - switch (n_axis) { - case 5: - evaluate_quad_derivatives_product_q4<5>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 6: - evaluate_quad_derivatives_product_q4<6>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 7: - evaluate_quad_derivatives_product_q4<7>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 8: - evaluate_quad_derivatives_product_q4<8>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 9: - evaluate_quad_derivatives_product_q4<9>( - points, output_stride, values_out, gradients_out, hessians_out); - return true; - default: - return false; - } -} - -void evaluate_tensor_product_points_strided( - LagrangeTopology topology, - const std::vector>& tensor_indices, - const std::vector>& points, - std::size_t output_stride, - const Real* v_coeffs, - const Real* d_coeffs, - const Real* d2_coeffs, - const Real* barycentric_weights, - int n_axis, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const std::size_t num_qpts = points.size(); - if (num_qpts == 0 || tensor_indices.empty()) { - return; - } - - const bool need_grad = gradients_out != nullptr; - const bool need_hess = hessians_out != nullptr; - const bool values_only = values_out != nullptr && !need_grad && !need_hess; - const bool gradients_only = values_out == nullptr && need_grad && !need_hess; - const bool hessians_only = values_out == nullptr && gradients_out == nullptr && need_hess; - const bool all_outputs = values_out != nullptr && need_grad && need_hess; - const AxisDeriv level = need_hess - ? AxisDeriv::ValuesAndFirstAndSecond - : (need_grad ? AxisDeriv::ValuesAndFirst : AxisDeriv::ValuesOnly); - - if (topology == LagrangeTopology::Line && num_qpts == 4u) { - if (values_only && - try_evaluate_line_values_horner_q4( - points, output_stride, v_coeffs, n_axis, values_out)) { - return; - } - if (gradients_only && - try_evaluate_line_gradients_horner_q4( - points, output_stride, d_coeffs, n_axis, gradients_out)) { - return; - } - if (hessians_only && - try_evaluate_line_hessians_product_q4( - points, output_stride, n_axis, hessians_out)) { - return; - } - if (all_outputs && - try_evaluate_line_all_product_q4( - points, output_stride, n_axis, values_out, gradients_out, hessians_out)) { - return; - } - } - if (topology == LagrangeTopology::Quadrilateral && - values_only && - num_qpts == 4u && - try_evaluate_quad_values_product_q4(points, output_stride, n_axis, values_out)) { - return; - } - if (topology == LagrangeTopology::Quadrilateral && - gradients_only && - num_qpts == 4u && - n_axis == 5) { - evaluate_quad_order4_gradients_q4(points, output_stride, gradients_out); - return; - } - if (topology == LagrangeTopology::Quadrilateral && - gradients_only && - num_qpts == 4u && - n_axis == 9) { - evaluate_quad_order8_gradients_product_q4(points, output_stride, gradients_out); - return; - } - if (topology == LagrangeTopology::Quadrilateral && - (gradients_only || hessians_only || all_outputs) && - num_qpts == 4u && - try_evaluate_quad_derivatives_product_q4( - points, output_stride, n_axis, values_out, gradients_out, hessians_out)) { - return; - } - - auto& scratch = evaluate_scratch(); - AxisBatchScratch& x_batch = scratch.axis_x_batch; - AxisBatchScratch& y_batch = scratch.axis_y_batch; - AxisBatchScratch& z_batch = scratch.axis_z_batch; - - const bool has_y = topology != LagrangeTopology::Line; - const bool has_z = topology == LagrangeTopology::Hexahedron; - const std::size_t axis_stride = static_cast(n_axis); - const bool use_product_axis_batch = - has_z && - gradients_only && - num_qpts == 4u && - n_axis >= 5 && - n_axis <= 9; - auto fill_tensor_axis_batch = [&](AxisBatchScratch& batch, std::size_t component) { - if (use_product_axis_batch && - try_fill_axis_batch_product_q4(batch, points, component, n_axis, level)) { - return; - } - fill_axis_batch(batch, points, component, v_coeffs, d_coeffs, d2_coeffs, - barycentric_weights, n_axis, level); - }; - - fill_tensor_axis_batch(x_batch, 0u); - if (!has_y) { - if (values_only) { - if (num_qpts == 4u) { - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const std::size_t i = tensor_indices[node][0]; - Real* value_row = values_out + node * output_stride; - value_row[0] = x_batch.values[i]; - value_row[1] = x_batch.values[axis_stride + i]; - value_row[2] = x_batch.values[2u * axis_stride + i]; - value_row[3] = x_batch.values[3u * axis_stride + i]; - } - return; - } - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const std::size_t i = tensor_indices[node][0]; - Real* value_row = values_out + node * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - value_row[q] = x_batch.values[q * axis_stride + i]; - } - } - return; - } - - if (gradients_only) { - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const std::size_t i = tensor_indices[node][0]; - Real* grad_row = gradients_out + node * 3u * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - grad_row[0u * output_stride + q] = - x_batch.first[q * axis_stride + i]; - grad_row[1u * output_stride + q] = Real(0); - grad_row[2u * output_stride + q] = Real(0); - } - } - return; - } - - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const std::size_t i = tensor_indices[node][0]; - Real* value_row = values_out ? values_out + node * output_stride : nullptr; - Real* grad_row = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; - Real* hess_row = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t q_axis = q * axis_stride + i; - if (value_row != nullptr) { - value_row[q] = x_batch.values[q_axis]; - } - if (need_grad) { - grad_row[0u * output_stride + q] = x_batch.first[q_axis]; - grad_row[1u * output_stride + q] = Real(0); - grad_row[2u * output_stride + q] = Real(0); - } - if (need_hess) { - hess_row[0u * output_stride + q] = x_batch.second[q_axis]; - hess_row[1u * output_stride + q] = Real(0); - hess_row[2u * output_stride + q] = Real(0); - hess_row[3u * output_stride + q] = Real(0); - hess_row[4u * output_stride + q] = Real(0); - hess_row[5u * output_stride + q] = Real(0); - hess_row[6u * output_stride + q] = Real(0); - hess_row[7u * output_stride + q] = Real(0); - hess_row[8u * output_stride + q] = Real(0); - } - } - } - return; - } - const bool use_tensor_tables = - has_z || - (axis_stride == 8u && !(need_hess && values_out == nullptr && gradients_out == nullptr)); - if (use_tensor_tables) { - fill_tensor_axis_batch(y_batch, 1u); - } else if (has_y) { - fill_tensor_axis_batch(y_batch, 1u); - } - if (has_z) { - fill_tensor_axis_batch(z_batch, 2u); - } - - if (use_tensor_tables) { - const std::size_t ny = axis_stride; - const std::size_t nz = has_z ? axis_stride : 1u; - const std::size_t nyz = ny * nz; - const std::size_t table_count = num_qpts * nyz; - - if (has_z && num_qpts == 4u && output_stride == 4u) { - if (values_only && - evaluate_tensor_product_values_stride4_q4_transposed( - tensor_indices, axis_stride, x_batch, y_batch, z_batch, values_out)) { - return; - } - if (gradients_only && - evaluate_tensor_product_gradients_stride4_q4_transposed( - tensor_indices, axis_stride, x_batch, y_batch, z_batch, gradients_out)) { - return; - } - if (hessians_only && - evaluate_tensor_product_second_stride4_q4_transposed( - tensor_indices, axis_stride, x_batch, y_batch, z_batch, - nullptr, nullptr, hessians_out)) { - return; - } - if (all_outputs && - evaluate_tensor_product_second_stride4_q4_transposed( - tensor_indices, axis_stride, x_batch, y_batch, z_batch, - values_out, gradients_out, hessians_out)) { - return; - } - } - - Real Mvv_stack[kMaxStackYZ]; - Real Mdv_stack[kMaxStackYZ]; - Real Mvd_stack[kMaxStackYZ]; - Real Md2v_stack[kMaxStackYZ]; - Real Mvd2_stack[kMaxStackYZ]; - Real Mdd_stack[kMaxStackYZ]; - - Real* Mvv; - Real* Mdv; - Real* Mvd; - Real* Md2v; - Real* Mvd2; - Real* Mdd; - if (table_count <= kMaxStackYZ) { - Mvv = Mvv_stack; - Mdv = Mdv_stack; - Mvd = Mvd_stack; - Md2v = Md2v_stack; - Mvd2 = Mvd2_stack; - Mdd = Mdd_stack; - } else { - auto& tables = scratch.tensor_tables; - tables.resizeFor(table_count); - Mvv = tables.vv.data(); - Mdv = tables.dv.data(); - Mvd = tables.vd.data(); - Md2v = tables.d2v.data(); - Mvd2 = tables.vd2.data(); - Mdd = tables.dd.data(); - } - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t q_axis = q * axis_stride; - const std::size_t q_table = q * nyz; - for (std::size_t j = 0; j < ny; ++j) { - const Real yv = y_batch.values[q_axis + j]; - const Real yd = (need_grad || need_hess) ? y_batch.first[q_axis + j] : Real(0); - const Real y2 = need_hess ? y_batch.second[q_axis + j] : Real(0); - for (std::size_t k = 0; k < nz; ++k) { - const std::size_t slot = q_table + j * nz + k; - const Real zv = has_z ? z_batch.values[q_axis + k] : Real(1); - Mvv[slot] = yv * zv; - if (need_grad || need_hess) { - const Real zd = has_z ? z_batch.first[q_axis + k] : Real(0); - Mdv[slot] = yd * zv; - Mvd[slot] = yv * zd; - } - if (need_hess) { - const Real zd = has_z ? z_batch.first[q_axis + k] : Real(0); - const Real z2 = has_z ? z_batch.second[q_axis + k] : Real(0); - Md2v[slot] = y2 * zv; - Mvd2[slot] = yv * z2; - Mdd[slot] = yd * zd; - } - } - } - } - - if (values_only) { - if (has_z && num_qpts == 4u) { - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - Real* value_row = values_out + node * output_stride; - - write_tensor_product_value_strided_q<0>( - axis_stride, nyz, i, jk, x_batch, Mvv, value_row); - write_tensor_product_value_strided_q<1>( - axis_stride, nyz, i, jk, x_batch, Mvv, value_row); - write_tensor_product_value_strided_q<2>( - axis_stride, nyz, i, jk, x_batch, Mvv, value_row); - write_tensor_product_value_strided_q<3>( - axis_stride, nyz, i, jk, x_batch, Mvv, value_row); - } - return; - } - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - Real* value_row = values_out + node * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t q_axis = q * axis_stride; - const std::size_t slot = q * nyz + jk; - value_row[q] = x_batch.values[q_axis + i] * Mvv[slot]; - } - } - return; - } - - if (gradients_only) { - if (has_z && num_qpts == 4u) { - if (output_stride == 4u) { - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - Real* grad_row = gradients_out + node * 3u * output_stride; - - write_tensor_product_gradient_stride4_q<0>( - axis_stride, nyz, i, jk, x_batch, Mvv, Mdv, Mvd, grad_row); - write_tensor_product_gradient_stride4_q<1>( - axis_stride, nyz, i, jk, x_batch, Mvv, Mdv, Mvd, grad_row); - write_tensor_product_gradient_stride4_q<2>( - axis_stride, nyz, i, jk, x_batch, Mvv, Mdv, Mvd, grad_row); - write_tensor_product_gradient_stride4_q<3>( - axis_stride, nyz, i, jk, x_batch, Mvv, Mdv, Mvd, grad_row); - } - } else { - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - Real* grad_row = gradients_out + node * 3u * output_stride; - - write_tensor_product_gradient_strided_q<0>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, grad_row); - write_tensor_product_gradient_strided_q<1>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, grad_row); - write_tensor_product_gradient_strided_q<2>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, grad_row); - write_tensor_product_gradient_strided_q<3>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, grad_row); - } - } - return; - } - - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - Real* grad_row = gradients_out + node * 3u * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t q_axis = q * axis_stride; - const std::size_t slot = q * nyz + jk; - const Real xv = x_batch.values[q_axis + i]; - const Real xd = x_batch.first[q_axis + i]; - grad_row[0u * output_stride + q] = xd * Mvv[slot]; - grad_row[1u * output_stride + q] = xv * Mdv[slot]; - grad_row[2u * output_stride + q] = xv * Mvd[slot]; - } - } - return; - } - - if (has_z && num_qpts == 4u && hessians_only) { - if (output_stride == 4u) { - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - Real* hess_row = hessians_out + node * 9u * output_stride; - - write_tensor_product_hessian_stride4_q<0>( - axis_stride, nyz, i, jk, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); - write_tensor_product_hessian_stride4_q<1>( - axis_stride, nyz, i, jk, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); - write_tensor_product_hessian_stride4_q<2>( - axis_stride, nyz, i, jk, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); - write_tensor_product_hessian_stride4_q<3>( - axis_stride, nyz, i, jk, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); - } - } else { - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - Real* hess_row = hessians_out + node * 9u * output_stride; - - write_tensor_product_hessian_strided_q<0>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); - write_tensor_product_hessian_strided_q<1>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); - write_tensor_product_hessian_strided_q<2>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); - write_tensor_product_hessian_strided_q<3>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, hess_row); - } - } - return; - } - - if (has_z && num_qpts == 4u && all_outputs) { - if (output_stride == 4u) { - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - Real* value_row = values_out + node * output_stride; - Real* grad_row = gradients_out + node * 3u * output_stride; - Real* hess_row = hessians_out + node * 9u * output_stride; - - write_tensor_product_all_stride4_q<0>( - axis_stride, nyz, i, jk, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); - write_tensor_product_all_stride4_q<1>( - axis_stride, nyz, i, jk, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); - write_tensor_product_all_stride4_q<2>( - axis_stride, nyz, i, jk, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); - write_tensor_product_all_stride4_q<3>( - axis_stride, nyz, i, jk, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); - } - } else { - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - Real* value_row = values_out + node * output_stride; - Real* grad_row = gradients_out + node * 3u * output_stride; - Real* hess_row = hessians_out + node * 9u * output_stride; - - write_tensor_product_all_strided_q<0>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); - write_tensor_product_all_strided_q<1>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); - write_tensor_product_all_strided_q<2>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); - write_tensor_product_all_strided_q<3>( - axis_stride, nyz, i, jk, output_stride, x_batch, - Mvv, Mdv, Mvd, Md2v, Mvd2, Mdd, value_row, grad_row, hess_row); - } - } - return; - } - - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t jk = idx[1] * nz + idx[2]; - - Real* value_row = values_out ? values_out + node * output_stride : nullptr; - Real* grad_row = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; - Real* hess_row = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t q_axis = q * axis_stride; - const std::size_t slot = q * nyz + jk; - const Real xv = x_batch.values[q_axis + i]; - - if (value_row != nullptr) { - value_row[q] = xv * Mvv[slot]; - } - - if (need_grad) { - const Real xd = x_batch.first[q_axis + i]; - grad_row[0u * output_stride + q] = xd * Mvv[slot]; - grad_row[1u * output_stride + q] = xv * Mdv[slot]; - grad_row[2u * output_stride + q] = xv * Mvd[slot]; - } - - if (need_hess) { - const Real xd = x_batch.first[q_axis + i]; - const Real x2 = x_batch.second[q_axis + i]; - const Real hxy = xd * Mdv[slot]; - const Real hxz = xd * Mvd[slot]; - const Real hyz = xv * Mdd[slot]; - hess_row[0u * output_stride + q] = x2 * Mvv[slot]; - hess_row[4u * output_stride + q] = xv * Md2v[slot]; - hess_row[8u * output_stride + q] = xv * Mvd2[slot]; - hess_row[1u * output_stride + q] = hxy; - hess_row[3u * output_stride + q] = hxy; - hess_row[2u * output_stride + q] = hxz; - hess_row[6u * output_stride + q] = hxz; - hess_row[5u * output_stride + q] = hyz; - hess_row[7u * output_stride + q] = hyz; - } - } - } - return; - } - - for (std::size_t node = 0; node < tensor_indices.size(); ++node) { - const auto& idx = tensor_indices[node]; - const std::size_t i = idx[0]; - const std::size_t j = idx[1]; - - Real* value_row = values_out ? values_out + node * output_stride : nullptr; - Real* grad_row = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; - Real* hess_row = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t q_axis = q * axis_stride; - const Real xv = x_batch.values[q_axis + i]; - const Real yv = y_batch.values[q_axis + j]; - - if (value_row != nullptr) { - value_row[q] = xv * yv; - } - - if (need_grad) { - const Real xd = x_batch.first[q_axis + i]; - const Real yd = y_batch.first[q_axis + j]; - grad_row[0u * output_stride + q] = xd * yv; - grad_row[1u * output_stride + q] = xv * yd; - grad_row[2u * output_stride + q] = Real(0); - } - - if (need_hess) { - const Real xd = x_batch.first[q_axis + i]; - const Real yd = y_batch.first[q_axis + j]; - const Real x2 = x_batch.second[q_axis + i]; - const Real y2 = y_batch.second[q_axis + j]; - const Real hxy = xd * yd; - - hess_row[0u * output_stride + q] = x2 * yv; - hess_row[4u * output_stride + q] = xv * y2; - hess_row[8u * output_stride + q] = Real(0); - hess_row[1u * output_stride + q] = hxy; - hess_row[3u * output_stride + q] = hxy; - hess_row[2u * output_stride + q] = Real(0); - hess_row[6u * output_stride + q] = Real(0); - hess_row[5u * output_stride + q] = Real(0); - hess_row[7u * output_stride + q] = Real(0); - } - } - } -} - -void evaluate_wedge_points_strided( - const std::vector>& simplex_exponents, - const std::vector>& wedge_indices, - const std::vector& wedge_node_by_tri_z, - int order, - const std::vector>& points, - std::size_t output_stride, - const Real* v_coeffs, - const Real* d_coeffs, - const Real* d2_coeffs, - const Real* barycentric_weights, - int n_axis, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - if (points.empty() || wedge_indices.empty()) { - return; - } - - const bool want_values = values_out != nullptr; - const bool need_grad = gradients_out != nullptr; - const bool need_hess = hessians_out != nullptr; - const bool values_only = want_values && !need_grad && !need_hess; - const bool gradients_only = !want_values && need_grad && !need_hess; - const bool hessians_only = !want_values && !need_grad && need_hess; - const bool all_outputs = want_values && need_grad && need_hess; - const bool use_batched_wedge = - (values_only && order <= 3) || - (gradients_only && order >= 2) || - (hessians_only && order >= 3) || - (all_outputs && order >= 3); - if (values_only && - order >= 4 && - order <= 8 && - try_evaluate_wedge_values_product_q4( - simplex_exponents, wedge_indices, order, points, output_stride, values_out)) { - return; - } - const AxisDeriv level = need_hess - ? AxisDeriv::ValuesAndFirstAndSecond - : (need_grad ? AxisDeriv::ValuesAndFirst : AxisDeriv::ValuesOnly); - - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const std::size_t tri_count = simplex_exponents.size(); - if (use_batched_wedge) { - const std::size_t num_qpts = points.size(); - const std::size_t tri_stride = num_qpts; - if (num_qpts == 4u && - output_stride == 4u && - (gradients_only || hessians_only || all_outputs) && - order >= 3 && - order <= 8 && - wedge_node_by_tri_z.size() == tri_count * static_cast(n_axis)) { - const bool use_product_axis_batch = - gradients_only && - n_axis >= 5 && - n_axis <= 9; - if (!use_product_axis_batch || - !try_fill_axis_batch_product_q4( - scratch.axis_z_batch, points, 2u, n_axis, level)) { - fill_axis_batch(scratch.axis_z_batch, - points, - 2u, - v_coeffs, - d_coeffs, - d2_coeffs, - barycentric_weights, - n_axis, - level); - } - if (need_hess) { - if (try_evaluate_wedge_fused_stride4_q4( - simplex_exponents, wedge_node_by_tri_z, order, points, - scratch.axis_z_batch, n_axis, values_out, gradients_out, hessians_out)) { - return; - } - } else if (try_evaluate_wedge_fused_stride4_q4( - simplex_exponents, wedge_node_by_tri_z, order, points, - scratch.axis_z_batch, n_axis, values_out, gradients_out, hessians_out)) { - return; - } - } - - const std::size_t tri_values_size = tri_count * tri_stride; - scratch.wedge_tri_values_batch.resize(tri_values_size); - if (need_grad || need_hess) { - scratch.wedge_tri_gradient_batch.resize(tri_count * 2u * tri_stride); - } - if (need_hess) { - scratch.wedge_tri_hessian_batch.resize(tri_count * 3u * tri_stride); - } - - detail::evaluate_triangle_simplex_basis_wedge_components_strided( - simplex_exponents, - order, - points, - tri_stride, - scratch.wedge_tri_values_batch.data(), - (need_grad || need_hess) ? scratch.wedge_tri_gradient_batch.data() : nullptr, - need_hess ? scratch.wedge_tri_hessian_batch.data() : nullptr); - - const bool use_product_axis_batch = - gradients_only && - points.size() == 4u && - n_axis >= 5 && - n_axis <= 9; - if (!use_product_axis_batch || - !try_fill_axis_batch_product_q4( - scratch.axis_z_batch, points, 2u, n_axis, level)) { - fill_axis_batch(scratch.axis_z_batch, - points, - 2u, - v_coeffs, - d_coeffs, - d2_coeffs, - barycentric_weights, - n_axis, - level); - } - - const std::size_t axis_stride = static_cast(n_axis); - if (all_outputs) { - if (num_qpts == 4u) { - if (output_stride == 4u) { - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* value_row = values_out + node * output_stride; - Real* g = gradients_out + node * 3u * output_stride; - Real* H = hessians_out + node * 9u * output_stride; - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; - const Real* tri_values = scratch.wedge_tri_values_batch.data(); - - write_wedge_all_stride4_q<0>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); - write_wedge_all_stride4_q<1>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); - write_wedge_all_stride4_q<2>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); - write_wedge_all_stride4_q<3>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); - } - } else { - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* value_row = values_out + node * output_stride; - Real* g = gradients_out + node * 3u * output_stride; - Real* H = hessians_out + node * 9u * output_stride; - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; - const Real* tri_values = scratch.wedge_tri_values_batch.data(); - - write_wedge_all_strided_q<0>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); - write_wedge_all_strided_q<1>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); - write_wedge_all_strided_q<2>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); - write_wedge_all_strided_q<3>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, tri_H, scratch.axis_z_batch, value_row, g, H); - } - } - return; - } - - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* value_row = values_out + node * output_stride; - Real* g = gradients_out + node * 3u * output_stride; - Real* H = hessians_out + node * 9u * output_stride; - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t tri_q = tri * tri_stride + q; - const std::size_t z_q = q * axis_stride + z; - const Real tri_v = scratch.wedge_tri_values_batch[tri_q]; - const Real zv = scratch.axis_z_batch.values[z_q]; - const Real zd = scratch.axis_z_batch.first[z_q]; - const Real tri_gx = tri_g[0u * tri_stride + q]; - const Real tri_gy = tri_g[1u * tri_stride + q]; - const Real tri_hxx = tri_H[0u * tri_stride + q]; - const Real tri_hxy = tri_H[1u * tri_stride + q]; - const Real tri_hyy = tri_H[2u * tri_stride + q]; - const Real hxz = tri_gx * zd; - const Real hxy = tri_hxy * zv; - const Real hyz = tri_gy * zd; - - value_row[q] = tri_v * zv; - g[0u * output_stride + q] = tri_gx * zv; - g[1u * output_stride + q] = tri_gy * zv; - g[2u * output_stride + q] = tri_v * zd; - H[0u * output_stride + q] = tri_hxx * zv; - H[1u * output_stride + q] = hxy; - H[2u * output_stride + q] = hxz; - H[3u * output_stride + q] = hxy; - H[4u * output_stride + q] = tri_hyy * zv; - H[5u * output_stride + q] = hyz; - H[6u * output_stride + q] = hxz; - H[7u * output_stride + q] = hyz; - H[8u * output_stride + q] = tri_v * scratch.axis_z_batch.second[z_q]; - } - } - return; - } - - if (hessians_only) { - if (num_qpts == 4u) { - if (output_stride == 4u) { - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* H = hessians_out + node * 9u * output_stride; - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; - const Real* tri_values = scratch.wedge_tri_values_batch.data(); - - write_wedge_hessian_stride4_q<0>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, tri_H, scratch.axis_z_batch, H); - write_wedge_hessian_stride4_q<1>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, tri_H, scratch.axis_z_batch, H); - write_wedge_hessian_stride4_q<2>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, tri_H, scratch.axis_z_batch, H); - write_wedge_hessian_stride4_q<3>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, tri_H, scratch.axis_z_batch, H); - } - } else { - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* H = hessians_out + node * 9u * output_stride; - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; - const Real* tri_values = scratch.wedge_tri_values_batch.data(); - - write_wedge_hessian_strided_q<0>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, tri_H, scratch.axis_z_batch, H); - write_wedge_hessian_strided_q<1>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, tri_H, scratch.axis_z_batch, H); - write_wedge_hessian_strided_q<2>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, tri_H, scratch.axis_z_batch, H); - write_wedge_hessian_strided_q<3>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, tri_H, scratch.axis_z_batch, H); - } - } - return; - } - - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* H = hessians_out + node * 9u * output_stride; - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t tri_q = tri * tri_stride + q; - const std::size_t z_q = q * axis_stride + z; - const Real tri_v = scratch.wedge_tri_values_batch[tri_q]; - const Real zv = scratch.axis_z_batch.values[z_q]; - const Real zd = scratch.axis_z_batch.first[z_q]; - const Real tri_gx = tri_g[0u * tri_stride + q]; - const Real tri_gy = tri_g[1u * tri_stride + q]; - const Real tri_hxx = tri_H[0u * tri_stride + q]; - const Real tri_hxy = tri_H[1u * tri_stride + q]; - const Real tri_hyy = tri_H[2u * tri_stride + q]; - const Real hxz = tri_gx * zd; - const Real hxy = tri_hxy * zv; - const Real hyz = tri_gy * zd; - - H[0u * output_stride + q] = tri_hxx * zv; - H[1u * output_stride + q] = hxy; - H[2u * output_stride + q] = hxz; - H[3u * output_stride + q] = hxy; - H[4u * output_stride + q] = tri_hyy * zv; - H[5u * output_stride + q] = hyz; - H[6u * output_stride + q] = hxz; - H[7u * output_stride + q] = hyz; - H[8u * output_stride + q] = tri_v * scratch.axis_z_batch.second[z_q]; - } - } - return; - } - - if (gradients_only) { - if (num_qpts == 4u) { - if (output_stride == 4u) { - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* g = gradients_out + node * 3u * output_stride; - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - const Real* tri_values = scratch.wedge_tri_values_batch.data(); - - write_wedge_gradient_stride4_q<0>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, scratch.axis_z_batch, g); - write_wedge_gradient_stride4_q<1>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, scratch.axis_z_batch, g); - write_wedge_gradient_stride4_q<2>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, scratch.axis_z_batch, g); - write_wedge_gradient_stride4_q<3>( - tri_stride, axis_stride, tri, z, - tri_values, tri_g, scratch.axis_z_batch, g); - } - } else { - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* g = gradients_out + node * 3u * output_stride; - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - const Real* tri_values = scratch.wedge_tri_values_batch.data(); - - write_wedge_gradient_strided_q<0>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, scratch.axis_z_batch, g); - write_wedge_gradient_strided_q<1>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, scratch.axis_z_batch, g); - write_wedge_gradient_strided_q<2>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, scratch.axis_z_batch, g); - write_wedge_gradient_strided_q<3>( - tri_stride, axis_stride, tri, z, output_stride, - tri_values, tri_g, scratch.axis_z_batch, g); - } - } - return; - } - - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* g = gradients_out + node * 3u * output_stride; - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t tri_q = tri * tri_stride + q; - const std::size_t z_q = q * axis_stride + z; - const Real tri_v = scratch.wedge_tri_values_batch[tri_q]; - const Real zv = scratch.axis_z_batch.values[z_q]; - g[0u * output_stride + q] = tri_g[0u * tri_stride + q] * zv; - g[1u * output_stride + q] = tri_g[1u * tri_stride + q] * zv; - g[2u * output_stride + q] = tri_v * scratch.axis_z_batch.first[z_q]; - } - } - return; - } - - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - Real* value_row = values_out ? values_out + node * output_stride : nullptr; - Real* g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; - Real* H = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t tri_q = tri * tri_stride + q; - const std::size_t z_q = q * axis_stride + z; - const Real tri_v = scratch.wedge_tri_values_batch[tri_q]; - const Real zv = scratch.axis_z_batch.values[z_q]; - if (values_out != nullptr) { - value_row[q] = tri_v * zv; - } - - if (need_grad) { - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - g[0u * output_stride + q] = tri_g[0u * tri_stride + q] * zv; - g[1u * output_stride + q] = tri_g[1u * tri_stride + q] * zv; - g[2u * output_stride + q] = tri_v * scratch.axis_z_batch.first[z_q]; - } - - if (need_hess) { - const Real* tri_g = scratch.wedge_tri_gradient_batch.data() + tri * 2u * tri_stride; - const Real* tri_H = scratch.wedge_tri_hessian_batch.data() + tri * 3u * tri_stride; - const Real zd = scratch.axis_z_batch.first[z_q]; - const Real hxz = tri_g[0u * tri_stride + q] * zd; - const Real hxy = tri_H[1u * tri_stride + q] * zv; - const Real hyz = tri_g[1u * tri_stride + q] * zd; - H[0u * output_stride + q] = tri_H[0u * tri_stride + q] * zv; - H[1u * output_stride + q] = hxy; - H[2u * output_stride + q] = hxz; - H[3u * output_stride + q] = hxy; - H[4u * output_stride + q] = tri_H[2u * tri_stride + q] * zv; - H[5u * output_stride + q] = hyz; - H[6u * output_stride + q] = hxz; - H[7u * output_stride + q] = hyz; - H[8u * output_stride + q] = tri_v * scratch.axis_z_batch.second[z_q]; - } - } - } - - return; - } - - scratch.tri_values.resize(tri_count); - if (need_grad || need_hess) { - scratch.tri_gradient_components.resize(tri_count * 3u); - } - if (need_hess) { - scratch.tri_hessian_components.resize(tri_count * 9u); - } - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const AxisBasisEvaluations z_axis = - fill_axis_scratch(scratch.axis_z, - v_coeffs, - d_coeffs, - d2_coeffs, - barycentric_weights, - n_axis, - xi[2], - level); - detail::evaluate_triangle_simplex_basis_to( - simplex_exponents, - order, - xi, - scratch.tri_values.data(), - (need_grad || need_hess) ? scratch.tri_gradient_components.data() : nullptr, - need_hess ? scratch.tri_hessian_components.data() : nullptr); - - for (std::size_t node = 0; node < wedge_indices.size(); ++node) { - const auto& index = wedge_indices[node]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - const Real tri_v = scratch.tri_values[tri]; - const Real zv = z_axis.values[z]; - - if (values_out != nullptr) { - values_out[node * output_stride + q] = tri_v * zv; - } - - if (need_grad) { - const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; - Real* g = gradients_out + node * 3u * output_stride; - g[0u * output_stride + q] = tri_g[0] * zv; - g[1u * output_stride + q] = tri_g[1] * zv; - g[2u * output_stride + q] = tri_v * z_axis.first[z]; - } - - if (need_hess) { - const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; - const Real* tri_H = scratch.tri_hessian_components.data() + tri * 9u; - const Real zd = z_axis.first[z]; - const Real hxz = tri_g[0] * zd; - const Real hxy = tri_H[1] * zv; - const Real hyz = tri_g[1] * zd; - Real* H = hessians_out + node * 9u * output_stride; - H[0u * output_stride + q] = tri_H[0] * zv; - H[1u * output_stride + q] = hxy; - H[2u * output_stride + q] = hxz; - H[3u * output_stride + q] = hxy; - H[4u * output_stride + q] = tri_H[4] * zv; - H[5u * output_stride + q] = hyz; - H[6u * output_stride + q] = hxz; - H[7u * output_stride + q] = hyz; - H[8u * output_stride + q] = tri_v * z_axis.second[z]; - } - } - } -} - -NormalizedLagrangeRequest normalize_lagrange_request(ElementType element_type, int order) { - switch (element_type) { - case ElementType::Line3: - return {ElementType::Line2, std::max(order, 2)}; - case ElementType::Triangle6: - return {ElementType::Triangle3, std::max(order, 2)}; - case ElementType::Quad9: - return {ElementType::Quad4, std::max(order, 2)}; - case ElementType::Quad8: - throw BasisElementCompatibilityException( - "Quad8 is a serendipity element; use SerendipityBasis for Quad8", - __FILE__, __LINE__, __func__); - case ElementType::Tetra10: - return {ElementType::Tetra4, std::max(order, 2)}; - case ElementType::Hex27: - return {ElementType::Hex8, std::max(order, 2)}; - case ElementType::Hex20: - throw BasisElementCompatibilityException( - "Hex20 is a serendipity element; use SerendipityBasis for Hex20", - __FILE__, __LINE__, __func__); - case ElementType::Wedge18: - return {ElementType::Wedge6, std::max(order, 2)}; - case ElementType::Wedge15: - throw BasisElementCompatibilityException( - "Wedge15 is a serendipity element; use SerendipityBasis for Wedge15", - __FILE__, __LINE__, __func__); - case ElementType::Pyramid13: - throw BasisElementCompatibilityException( - "Pyramid13 is a serendipity variant; use SerendipityBasis (Pyramid13) or the complete-family Lagrange path via LagrangeBasis (Pyramid5, order >= 2)", - __FILE__, __LINE__, __func__); - case ElementType::Pyramid14: - return {ElementType::Pyramid5, std::max(order, 2)}; - default: - return {element_type, order}; - } -} - -} // namespace - -void prewarm_lagrange_basis_scratch(int max_order, std::size_t max_qpts) { - evaluate_scratch().prewarm(max_order, max_qpts); -} - -LagrangeBasis::LagrangeBasis(ElementType type, int order) - : element_type_(type), dimension_(0), order_(order) { - const NormalizedLagrangeRequest normalized = normalize_lagrange_request(element_type_, order_); - element_type_ = normalized.element_type; - order_ = normalized.order; - - if (order_ < 0) { - throw BasisConfigurationException("LagrangeBasis requires non-negative polynomial order", - __FILE__, __LINE__, __func__); - } - - dimension_ = lagrange_topology_traits(element_type_).dimension; - - init_nodes(); - init_evaluation_dispatch(); -} - -void LagrangeBasis::init_nodes() { - nodes_.clear(); - nodes_1d_.clear(); - tensor_indices_.clear(); - simplex_exponents_.clear(); - wedge_indices_.clear(); - wedge_node_by_tri_z_.clear(); - axis_v_coeffs_.clear(); - axis_d_coeffs_.clear(); - axis_d2_coeffs_.clear(); - axis_barycentric_weights_.clear(); - const auto topology = lagrange_topology_traits(element_type_).topology; - topology_id_ = static_cast(topology); - switch (topology) { - case LagrangeTopology::Point: - build_point_nodes(); - return; - case LagrangeTopology::Line: - build_tensor_product_nodes(1); - compute_axis_monomial_coefficients(); - return; - case LagrangeTopology::Quadrilateral: - build_tensor_product_nodes(2); - compute_axis_monomial_coefficients(); - return; - case LagrangeTopology::Hexahedron: - build_tensor_product_nodes(3); - compute_axis_monomial_coefficients(); - return; - case LagrangeTopology::Triangle: - case LagrangeTopology::Tetrahedron: - build_simplex_nodes(); - return; - case LagrangeTopology::Wedge: - build_wedge_nodes(); - compute_axis_monomial_coefficients(); - return; - case LagrangeTopology::Pyramid: - build_pyramid_nodes(); - return; - case LagrangeTopology::Unknown: - break; - } - - throw BasisElementCompatibilityException("Unsupported element type in LagrangeBasis::init_nodes", - __FILE__, __LINE__, __func__); -} - -void LagrangeBasis::init_evaluation_dispatch() { - const auto topology = static_cast(topology_id_); - switch (topology) { - case LagrangeTopology::Point: - vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_point_vectors; - return; - case LagrangeTopology::Line: - case LagrangeTopology::Quadrilateral: - case LagrangeTopology::Hexahedron: - vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_tensor_product_vectors; - return; - case LagrangeTopology::Triangle: - vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_triangle_vectors; - return; - case LagrangeTopology::Tetrahedron: - vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_tetrahedron_vectors; - return; - case LagrangeTopology::Wedge: - vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_wedge_vectors; - return; - case LagrangeTopology::Pyramid: - vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_pyramid_vectors; - return; - case LagrangeTopology::Unknown: - break; - } - vector_evaluation_dispatch_ = &LagrangeBasis::evaluate_unsupported_vectors; -} - -void LagrangeBasis::compute_axis_monomial_coefficients() { - const int N = static_cast(nodes_1d_.size()); - if (N == 0) return; - - axis_barycentric_weights_.resize(static_cast(N)); - fill_equispaced_barycentric_weights(N, axis_barycentric_weights_.data()); - - if (assign_precomputed_axis_coefficients(N, axis_v_coeffs_, axis_d_coeffs_, axis_d2_coeffs_)) { - return; - } - - axis_v_coeffs_.assign(static_cast(N) * static_cast(N), Real(0)); - if (N >= 2) { - axis_d_coeffs_.assign(static_cast(N) * static_cast(N - 1), Real(0)); - } - if (N >= 3) { - axis_d2_coeffs_.assign(static_cast(N) * static_cast(N - 2), Real(0)); - } - - if (N == 1) { - axis_v_coeffs_[0] = Real(1); - return; - } - - // For each L_i, compute monomial coefficients of P_i(x) = prod_{j != i} (x - x_j), - // then divide by w_i = prod_{j != i} (x_i - x_j). - std::vector coeffs; - coeffs.reserve(static_cast(N)); - for (int i = 0; i < N; ++i) { - coeffs.assign(1, Real(1)); // start with constant polynomial 1 - for (int j = 0; j < N; ++j) { - if (j == i) continue; - // Multiply (x - x_j) into coeffs (in-place via temp). - std::vector next(coeffs.size() + 1, Real(0)); - for (std::size_t k = 0; k < coeffs.size(); ++k) { - next[k] -= nodes_1d_[static_cast(j)] * coeffs[k]; - next[k + 1] += coeffs[k]; - } - coeffs.swap(next); - } - // Divide by w_i. - Real denom = Real(1); - for (int j = 0; j < N; ++j) { - if (j == i) continue; - denom *= (nodes_1d_[static_cast(i)] - nodes_1d_[static_cast(j)]); - } - const Real inv_denom = Real(1) / denom; - for (int k = 0; k < N; ++k) { - axis_v_coeffs_[static_cast(i) * static_cast(N) + static_cast(k)] - = coeffs[static_cast(k)] * inv_denom; - } - - // First derivative coefficients: d/dx (sum_k c_ik * x^k) = sum_{k>=1} k*c_ik * x^(k-1). - if (N >= 2) { - for (int k = 1; k < N; ++k) { - axis_d_coeffs_[static_cast(i) * static_cast(N - 1) - + static_cast(k - 1)] - = static_cast(k) - * axis_v_coeffs_[static_cast(i) * static_cast(N) - + static_cast(k)]; - } - } - - // Second derivative coefficients: d^2/dx^2 = sum_{k>=2} k*(k-1)*c_ik * x^(k-2). - if (N >= 3) { - for (int k = 2; k < N; ++k) { - axis_d2_coeffs_[static_cast(i) * static_cast(N - 2) - + static_cast(k - 2)] - = static_cast(k * (k - 1)) - * axis_v_coeffs_[static_cast(i) * static_cast(N) - + static_cast(k)]; - } - } - } -} - -void LagrangeBasis::build_point_nodes() { - nodes_.push_back(math::Vector{Real(0), Real(0), Real(0)}); -} - -void LagrangeBasis::init_equispaced_1d_nodes() { - nodes_1d_.clear(); - for (int i = 0; i <= std::max(order_, 0); ++i) { - nodes_1d_.push_back(detail::equispaced_pm_one_coord(i, order_)); - } -} - -void LagrangeBasis::build_tensor_product_nodes(int dimensions) { - init_equispaced_1d_nodes(); - - if (dimensions < 1 || dimensions > 3) { - throw BasisConfigurationException("LagrangeBasis::build_tensor_product_nodes requires dimension 1, 2, or 3", - __FILE__, __LINE__, __func__); - } - - nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); - tensor_indices_.resize(nodes_.size(), TensorNodeIndex{0u, 0u, 0u}); - for (std::size_t n = 0; n < nodes_.size(); ++n) { - tensor_indices_[n][0] = lattice_index_pm_one( - nodes_[n][0], order_, - "LagrangeBasis: invalid tensor-product x-coordinate in public node ordering"); - if (dimensions >= 2) { - tensor_indices_[n][1] = lattice_index_pm_one( - nodes_[n][1], order_, - "LagrangeBasis: invalid tensor-product y-coordinate in public node ordering"); - } - if (dimensions == 3) { - tensor_indices_[n][2] = lattice_index_pm_one( - nodes_[n][2], order_, - "LagrangeBasis: invalid tensor-product z-coordinate in public node ordering"); - } - } -} - -void LagrangeBasis::build_simplex_nodes() { - nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); - const auto topology = static_cast(topology_id_); - simplex_exponents_.clear(); - simplex_exponents_.reserve(nodes_.size()); - for (const auto& node : nodes_) { - switch (topology) { - case LagrangeTopology::Triangle: - simplex_exponents_.push_back(triangle_exponents_from_public_node(node, order_)); - break; - case LagrangeTopology::Tetrahedron: - simplex_exponents_.push_back(tetrahedron_exponents_from_public_node(node, order_)); - break; - default: - throw BasisElementCompatibilityException("LagrangeBasis::build_simplex_nodes requires simplex topology", - __FILE__, __LINE__, __func__); - } - } -} - -void LagrangeBasis::build_wedge_nodes() { - init_equispaced_1d_nodes(); - const auto triangle_nodes = ReferenceNodeLayout::get_lagrange_node_coords(ElementType::Triangle3, order_); - simplex_exponents_.clear(); - simplex_exponents_.reserve(triangle_nodes.size()); - std::unordered_map, std::size_t, SimplexExponentHash> triangle_index_by_exponent; - triangle_index_by_exponent.reserve(triangle_nodes.size()); - for (std::size_t tri = 0; tri < triangle_nodes.size(); ++tri) { - const auto exponents = triangle_exponents_from_public_node(triangle_nodes[tri], order_); - simplex_exponents_.push_back(exponents); - const auto inserted = triangle_index_by_exponent.emplace(exponents, tri); - if (!inserted.second) { - throw BasisNodeOrderingException("LagrangeBasis: duplicate wedge triangle descriptor", - __FILE__, __LINE__, __func__); - } - } - - nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); - wedge_indices_.clear(); - wedge_indices_.reserve(nodes_.size()); - const std::size_t z_count = static_cast(order_ + 1); - const std::size_t missing_node = nodes_.size(); - wedge_node_by_tri_z_.assign(triangle_nodes.size() * z_count, missing_node); - for (std::size_t node_index = 0; node_index < nodes_.size(); ++node_index) { - const auto& node = nodes_[node_index]; - const auto exponents = triangle_exponents_from_public_node(node, order_); - const auto found = triangle_index_by_exponent.find(exponents); - if (found == triangle_index_by_exponent.end()) { - throw BasisNodeOrderingException("LagrangeBasis: failed to resolve wedge triangle descriptor in public ordering", - __FILE__, __LINE__, __func__); - } - const std::size_t tri = found->second; - const std::size_t z = - lattice_index_pm_one(node[2], order_, - "LagrangeBasis: invalid wedge z-coordinate in public node ordering"); - wedge_indices_.push_back(WedgeNodeIndex{tri, z}); - wedge_node_by_tri_z_[tri * z_count + z] = node_index; - } - for (std::size_t entry = 0; entry < wedge_node_by_tri_z_.size(); ++entry) { - if (wedge_node_by_tri_z_[entry] == missing_node) { - throw BasisNodeOrderingException("LagrangeBasis: incomplete wedge tensor-product node map", - __FILE__, __LINE__, __func__); - } - } -} - -void LagrangeBasis::build_pyramid_nodes() { - nodes_ = detail::lagrange_pyramid::nodes(order_); -} - -void LagrangeBasis::evaluate_point_vectors(const math::Vector&, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const { - if (values != nullptr) { - values->resize(1u); - (*values)[0] = Real(1); - } - if (gradients != nullptr) { - gradients->resize(1u); - (*gradients)[0] = Gradient{}; - } - if (hessians != nullptr) { - hessians->resize(1u); - (*hessians)[0] = Hessian{}; - } -} - -void LagrangeBasis::evaluate_tensor_product_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const { - const auto topology = static_cast(topology_id_); - if (evaluate_fixed_lagrange_fast(topology, order_, xi, values, gradients, hessians)) { - return; - } - - const int n_axis = static_cast(nodes_1d_.size()); - const Real* vc = axis_v_coeffs_.data(); - const Real* dc = axis_d_coeffs_.data(); - const Real* d2c = axis_d2_coeffs_.data(); - const Real* bw = axis_barycentric_weights_.data(); - const AxisDeriv level = hessians != nullptr ? AxisDeriv::ValuesAndFirstAndSecond - : gradients != nullptr ? AxisDeriv::ValuesAndFirst - : AxisDeriv::ValuesOnly; - - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations x_axis = - fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], level); - AxisBasisEvaluations y_axis = constant_axis_basis(); - AxisBasisEvaluations z_axis = constant_axis_basis(); - - if (topology != LagrangeTopology::Line) { - y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], level); - } - if (topology == LagrangeTopology::Hexahedron) { - z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], level); - } - - evaluate_tensor_product_factorized(tensor_indices_, x_axis, y_axis, z_axis, - values, gradients, hessians); -} - -void LagrangeBasis::evaluate_triangle_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const { - const auto topology = static_cast(topology_id_); - if (evaluate_fixed_lagrange_fast(topology, order_, xi, values, gradients, hessians)) { - return; - } - detail::evaluate_triangle_simplex_basis(simplex_exponents_, order_, xi, - values, gradients, hessians); -} - -void LagrangeBasis::evaluate_tetrahedron_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const { - const auto topology = static_cast(topology_id_); - if (evaluate_fixed_lagrange_fast(topology, order_, xi, values, gradients, hessians)) { - return; - } - detail::evaluate_tetrahedron_simplex_basis(simplex_exponents_, order_, xi, - values, gradients, hessians); -} - -void LagrangeBasis::evaluate_wedge_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const { - const int n_axis = static_cast(nodes_1d_.size()); - const Real* vc = axis_v_coeffs_.data(); - const Real* dc = axis_d_coeffs_.data(); - const Real* d2c = axis_d2_coeffs_.data(); - const Real* bw = axis_barycentric_weights_.data(); - const AxisDeriv level = hessians != nullptr ? AxisDeriv::ValuesAndFirstAndSecond - : gradients != nullptr ? AxisDeriv::ValuesAndFirst - : AxisDeriv::ValuesOnly; - - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations z_axis = - fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], level); - - if (hessians != nullptr) { - detail::evaluate_triangle_simplex_basis( - simplex_exponents_, order_, xi, - &scratch.tri_values, &scratch.tri_gradients, &scratch.tri_hessians); - } else if (gradients != nullptr) { - detail::evaluate_triangle_simplex_basis( - simplex_exponents_, order_, xi, - &scratch.tri_values, &scratch.tri_gradients, nullptr); - } else { - detail::evaluate_triangle_simplex_basis( - simplex_exponents_, order_, xi, - &scratch.tri_values, nullptr, nullptr); - } - - const std::size_t n_nodes = wedge_indices_.size(); - if (values != nullptr) { - values->resize(n_nodes); - } - if (gradients != nullptr) { - gradients->resize(n_nodes); - } - if (hessians != nullptr) { - hessians->resize(n_nodes); - } - - for (std::size_t n = 0; n < n_nodes; ++n) { - const auto& index = wedge_indices_[n]; - const std::size_t tri_idx = index[0]; - const std::size_t z_idx = index[1]; - const Real zv = z_axis.values[z_idx]; - const Real tri_v = scratch.tri_values[tri_idx]; - - if (values != nullptr) { - (*values)[n] = tri_v * zv; - } - if (gradients != nullptr) { - const Real zd = z_axis.first[z_idx]; - (*gradients)[n][0] = scratch.tri_gradients[tri_idx][0] * zv; - (*gradients)[n][1] = scratch.tri_gradients[tri_idx][1] * zv; - (*gradients)[n][2] = tri_v * zd; - } - if (hessians != nullptr) { - const Real zd = z_axis.first[z_idx]; - const Real zd2 = z_axis.second[z_idx]; - Hessian H{}; - H(0, 0) = scratch.tri_hessians[tri_idx](0, 0) * zv; - H(1, 1) = scratch.tri_hessians[tri_idx](1, 1) * zv; - H(0, 1) = scratch.tri_hessians[tri_idx](0, 1) * zv; - H(1, 0) = H(0, 1); - H(2, 2) = tri_v * zd2; - H(0, 2) = scratch.tri_gradients[tri_idx][0] * zd; - H(2, 0) = H(0, 2); - H(1, 2) = scratch.tri_gradients[tri_idx][1] * zd; - H(2, 1) = H(1, 2); - (*hessians)[n] = H; - } - } -} - -void LagrangeBasis::evaluate_pyramid_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const { - if (values != nullptr && gradients != nullptr && hessians != nullptr) { - detail::lagrange_pyramid::evaluate_all(order_, xi, *values, *gradients, *hessians); - return; - } - if (values != nullptr) { - detail::lagrange_pyramid::evaluate_values(order_, xi, *values); - } - if (gradients != nullptr) { - detail::lagrange_pyramid::evaluate_gradients(order_, xi, *gradients); - } - if (hessians != nullptr) { - detail::lagrange_pyramid::evaluate_hessians(order_, xi, *hessians); - } -} - -void LagrangeBasis::evaluate_unsupported_vectors(const math::Vector&, - std::vector*, - std::vector*, - std::vector*) const { - throw BasisEvaluationException("Unsupported element in LagrangeBasis vector evaluation", - __FILE__, __LINE__, __func__); -} - -void LagrangeBasis::evaluate_values(const math::Vector& xi, - std::vector& values) const { - (this->*vector_evaluation_dispatch_)(xi, &values, nullptr, nullptr); -} - -void LagrangeBasis::evaluate_gradients(const math::Vector& xi, - std::vector& gradients) const { - (this->*vector_evaluation_dispatch_)(xi, nullptr, &gradients, nullptr); -} - -void LagrangeBasis::evaluate_hessians(const math::Vector& xi, - std::vector& hessians) const { - (this->*vector_evaluation_dispatch_)(xi, nullptr, nullptr, &hessians); -} - -void LagrangeBasis::evaluate_all(const math::Vector& xi, - std::vector& values, - std::vector& gradients, - std::vector& hessians) const { - (this->*vector_evaluation_dispatch_)(xi, &values, &gradients, &hessians); -} - -void LagrangeBasis::evaluate_values_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out) const { - const auto topology = static_cast(topology_id_); - if (evaluate_fixed_lagrange_fast_to(topology, order_, xi, values_out, nullptr, nullptr)) { - return; - } - - const int n_axis = static_cast(nodes_1d_.size()); - const Real* vc = axis_v_coeffs_.data(); - const Real* dc = axis_d_coeffs_.data(); - const Real* d2c = axis_d2_coeffs_.data(); - const Real* bw = axis_barycentric_weights_.data(); - switch (topology) { - case LagrangeTopology::Point: - values_out[0] = Real(1); - return; - case LagrangeTopology::Line: - case LagrangeTopology::Quadrilateral: - case LagrangeTopology::Hexahedron: { - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations x_axis = - fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], AxisDeriv::ValuesOnly); - AxisBasisEvaluations y_axis = constant_axis_basis(); - AxisBasisEvaluations z_axis = constant_axis_basis(); - if (topology != LagrangeTopology::Line) { - y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], AxisDeriv::ValuesOnly); - } - if (topology == LagrangeTopology::Hexahedron) { - z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesOnly); - } - evaluate_tensor_product_factorized_to(tensor_indices_, x_axis, y_axis, z_axis, - values_out, nullptr, nullptr); - return; - } - case LagrangeTopology::Triangle: - detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, - values_out, nullptr, nullptr); - return; - case LagrangeTopology::Tetrahedron: - detail::evaluate_tetrahedron_simplex_basis_to(simplex_exponents_, order_, xi, - values_out, nullptr, nullptr); - return; - case LagrangeTopology::Wedge: { - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations z_axis = - fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesOnly); - scratch.tri_values.resize(simplex_exponents_.size()); - detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, - scratch.tri_values.data(), nullptr, nullptr); - for (std::size_t n = 0; n < wedge_indices_.size(); ++n) { - const auto& index = wedge_indices_[n]; - values_out[n] = scratch.tri_values[index[0]] * z_axis.values[index[1]]; - } - return; - } - case LagrangeTopology::Pyramid: { - detail::lagrange_pyramid::evaluate_values_to(order_, xi, values_out); - return; - } - case LagrangeTopology::Unknown: - break; - } - - throw BasisEvaluationException("Unsupported element in evaluate_values_to", - __FILE__, __LINE__, __func__); -} - -void LagrangeBasis::evaluate_gradients_to(const math::Vector& xi, - Real* SVMP_RESTRICT gradients_out) const { - const auto topology = static_cast(topology_id_); - if (evaluate_fixed_lagrange_fast_to(topology, order_, xi, nullptr, gradients_out, nullptr)) { - return; - } - - const int n_axis = static_cast(nodes_1d_.size()); - const Real* vc = axis_v_coeffs_.data(); - const Real* dc = axis_d_coeffs_.data(); - const Real* d2c = axis_d2_coeffs_.data(); - const Real* bw = axis_barycentric_weights_.data(); - switch (topology) { - case LagrangeTopology::Point: - gradients_out[0] = Real(0); - gradients_out[1] = Real(0); - gradients_out[2] = Real(0); - return; - case LagrangeTopology::Line: - case LagrangeTopology::Quadrilateral: - case LagrangeTopology::Hexahedron: { - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations x_axis = - fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], AxisDeriv::ValuesAndFirst); - AxisBasisEvaluations y_axis = constant_axis_basis(); - AxisBasisEvaluations z_axis = constant_axis_basis(); - if (topology != LagrangeTopology::Line) { - y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], AxisDeriv::ValuesAndFirst); - } - if (topology == LagrangeTopology::Hexahedron) { - z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirst); - } - evaluate_tensor_product_factorized_to(tensor_indices_, x_axis, y_axis, z_axis, - nullptr, gradients_out, nullptr); - return; - } - case LagrangeTopology::Triangle: - detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, - nullptr, gradients_out, nullptr); - return; - case LagrangeTopology::Tetrahedron: - detail::evaluate_tetrahedron_simplex_basis_to(simplex_exponents_, order_, xi, - nullptr, gradients_out, nullptr); - return; - case LagrangeTopology::Wedge: { - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations z_axis = - fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirst); - const std::size_t tri_count = simplex_exponents_.size(); - scratch.tri_values.resize(tri_count); - scratch.tri_gradient_components.resize(tri_count * 3u); - detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, - scratch.tri_values.data(), - scratch.tri_gradient_components.data(), - nullptr); - for (std::size_t n = 0; n < wedge_indices_.size(); ++n) { - const auto& index = wedge_indices_[n]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; - Real* g = gradients_out + n * 3u; - g[0] = tri_g[0] * z_axis.values[z]; - g[1] = tri_g[1] * z_axis.values[z]; - g[2] = scratch.tri_values[tri] * z_axis.first[z]; - } - return; - } - case LagrangeTopology::Pyramid: { - detail::lagrange_pyramid::evaluate_gradients_to(order_, xi, gradients_out); - return; - } - case LagrangeTopology::Unknown: - break; - } - - throw BasisEvaluationException("Unsupported element in evaluate_gradients_to", - __FILE__, __LINE__, __func__); -} - -void LagrangeBasis::evaluate_hessians_to(const math::Vector& xi, +void LagrangeBasis::evaluate_hessians_to(const Vec3& xi, Real* SVMP_RESTRICT hessians_out) const { - const auto topology = static_cast(topology_id_); - if (evaluate_fixed_lagrange_fast_to(topology, order_, xi, nullptr, nullptr, hessians_out)) { - return; - } - - const int n_axis = static_cast(nodes_1d_.size()); - const Real* vc = axis_v_coeffs_.data(); - const Real* dc = axis_d_coeffs_.data(); - const Real* d2c = axis_d2_coeffs_.data(); - const Real* bw = axis_barycentric_weights_.data(); - switch (topology) { - case LagrangeTopology::Point: - for (std::size_t i = 0; i < 9; ++i) { - hessians_out[i] = Real(0); - } - return; - case LagrangeTopology::Line: - case LagrangeTopology::Quadrilateral: - case LagrangeTopology::Hexahedron: { - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations x_axis = - fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], AxisDeriv::ValuesAndFirstAndSecond); - AxisBasisEvaluations y_axis = constant_axis_basis(); - AxisBasisEvaluations z_axis = constant_axis_basis(); - if (topology != LagrangeTopology::Line) { - y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], AxisDeriv::ValuesAndFirstAndSecond); - } - if (topology == LagrangeTopology::Hexahedron) { - z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirstAndSecond); - } - evaluate_tensor_product_factorized_to(tensor_indices_, x_axis, y_axis, z_axis, - nullptr, nullptr, hessians_out); - return; - } - case LagrangeTopology::Triangle: - detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, - nullptr, nullptr, hessians_out); - return; - case LagrangeTopology::Tetrahedron: - detail::evaluate_tetrahedron_simplex_basis_to(simplex_exponents_, order_, xi, - nullptr, nullptr, hessians_out); - return; - case LagrangeTopology::Wedge: { - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations z_axis = - fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirstAndSecond); - const std::size_t tri_count = simplex_exponents_.size(); - scratch.tri_values.resize(tri_count); - scratch.tri_gradient_components.resize(tri_count * 3u); - scratch.tri_hessian_components.resize(tri_count * 9u); - detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, - scratch.tri_values.data(), - scratch.tri_gradient_components.data(), - scratch.tri_hessian_components.data()); - for (std::size_t n = 0; n < wedge_indices_.size(); ++n) { - const auto& index = wedge_indices_[n]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - const Real zv = z_axis.values[z]; - const Real zd = z_axis.first[z]; - const Real zd2 = z_axis.second[z]; - const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; - const Real* tri_H = scratch.tri_hessian_components.data() + tri * 9u; - const Real hxy = tri_H[1] * zv; - const Real hxz = tri_g[0] * zd; - const Real hyz = tri_g[1] * zd; - Real* H = hessians_out + n * 9u; - H[0] = tri_H[0] * zv; - H[4] = tri_H[4] * zv; - H[1] = hxy; - H[3] = hxy; - H[8] = scratch.tri_values[tri] * zd2; - H[2] = hxz; - H[6] = hxz; - H[5] = hyz; - H[7] = hyz; - } - return; - } - case LagrangeTopology::Pyramid: { - detail::lagrange_pyramid::evaluate_hessians_to(order_, xi, hessians_out); - return; - } - case LagrangeTopology::Unknown: - break; - } - - throw BasisEvaluationException("Unsupported element in evaluate_hessians_to", - __FILE__, __LINE__, __func__); -} - -void LagrangeBasis::evaluate_all_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { - const auto topology = static_cast(topology_id_); - if (evaluate_fixed_lagrange_fast_to(topology, order_, xi, values_out, gradients_out, hessians_out)) { - return; - } - - const int n_axis = static_cast(nodes_1d_.size()); - const Real* vc = axis_v_coeffs_.data(); - const Real* dc = axis_d_coeffs_.data(); - const Real* d2c = axis_d2_coeffs_.data(); - const Real* bw = axis_barycentric_weights_.data(); - switch (topology) { - case LagrangeTopology::Point: - values_out[0] = Real(1); - gradients_out[0] = Real(0); - gradients_out[1] = Real(0); - gradients_out[2] = Real(0); - for (std::size_t i = 0; i < 9; ++i) { - hessians_out[i] = Real(0); - } - return; - case LagrangeTopology::Line: - case LagrangeTopology::Quadrilateral: - case LagrangeTopology::Hexahedron: { - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations x_axis = - fill_axis_scratch(scratch.axis_x, vc, dc, d2c, bw, n_axis, xi[0], AxisDeriv::ValuesAndFirstAndSecond); - AxisBasisEvaluations y_axis = constant_axis_basis(); - AxisBasisEvaluations z_axis = constant_axis_basis(); - if (topology != LagrangeTopology::Line) { - y_axis = fill_axis_scratch(scratch.axis_y, vc, dc, d2c, bw, n_axis, xi[1], AxisDeriv::ValuesAndFirstAndSecond); - } - if (topology == LagrangeTopology::Hexahedron) { - z_axis = fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirstAndSecond); - } - evaluate_tensor_product_factorized_to(tensor_indices_, x_axis, y_axis, z_axis, - values_out, gradients_out, hessians_out); - return; - } - case LagrangeTopology::Triangle: - detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, - values_out, gradients_out, hessians_out); - return; - case LagrangeTopology::Tetrahedron: - detail::evaluate_tetrahedron_simplex_basis_to(simplex_exponents_, order_, xi, - values_out, gradients_out, hessians_out); - return; - case LagrangeTopology::Wedge: { - LagrangeEvaluateScratch& scratch = evaluate_scratch(); - const AxisBasisEvaluations z_axis = - fill_axis_scratch(scratch.axis_z, vc, dc, d2c, bw, n_axis, xi[2], AxisDeriv::ValuesAndFirstAndSecond); - const std::size_t tri_count = simplex_exponents_.size(); - scratch.tri_values.resize(tri_count); - scratch.tri_gradient_components.resize(tri_count * 3u); - scratch.tri_hessian_components.resize(tri_count * 9u); - detail::evaluate_triangle_simplex_basis_to(simplex_exponents_, order_, xi, - scratch.tri_values.data(), - scratch.tri_gradient_components.data(), - scratch.tri_hessian_components.data()); - for (std::size_t n = 0; n < wedge_indices_.size(); ++n) { - const auto& index = wedge_indices_[n]; - const std::size_t tri = index[0]; - const std::size_t z = index[1]; - const Real zv = z_axis.values[z]; - const Real zd = z_axis.first[z]; - const Real zd2 = z_axis.second[z]; - const Real tri_v = scratch.tri_values[tri]; - const Real* tri_g = scratch.tri_gradient_components.data() + tri * 3u; - const Real* tri_H = scratch.tri_hessian_components.data() + tri * 9u; - const Real hxy = tri_H[1] * zv; - const Real hxz = tri_g[0] * zd; - const Real hyz = tri_g[1] * zd; - - values_out[n] = tri_v * zv; - - Real* g = gradients_out + n * 3u; - g[0] = tri_g[0] * zv; - g[1] = tri_g[1] * zv; - g[2] = tri_v * zd; - - Real* H = hessians_out + n * 9u; - H[0] = tri_H[0] * zv; - H[4] = tri_H[4] * zv; - H[1] = hxy; - H[3] = hxy; - H[8] = tri_v * zd2; - H[2] = hxz; - H[6] = hxz; - H[5] = hyz; - H[7] = hyz; - } - return; - } - case LagrangeTopology::Pyramid: { - detail::lagrange_pyramid::evaluate_all_to( - order_, xi, values_out, gradients_out, hessians_out); - return; - } - case LagrangeTopology::Unknown: - break; - } - - throw BasisEvaluationException("Unsupported element in evaluate_all_to", - __FILE__, __LINE__, __func__); -} - -void LagrangeBasis::evaluate_at_quadrature_points( - const std::vector>& points, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { - evaluate_at_quadrature_points_strided(points, points.size(), values_out, gradients_out, hessians_out); -} - -void LagrangeBasis::evaluate_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { - const std::size_t num_qpts = points.size(); - const std::size_t num_dofs = size(); - if (output_stride < num_qpts) { - throw BasisConfigurationException("LagrangeBasis strided evaluation requires output_stride >= points.size()", - __FILE__, __LINE__, __func__); - } - if (values_out == nullptr && gradients_out == nullptr && hessians_out == nullptr) { - return; - } - - const auto topology = static_cast(topology_id_); - if (evaluate_fixed_lagrange_fast_strided(topology, - order_, - points, - output_stride, - values_out, - gradients_out, - hessians_out)) { - return; - } - - if (topology == LagrangeTopology::Line || - topology == LagrangeTopology::Quadrilateral || - topology == LagrangeTopology::Hexahedron) { - evaluate_tensor_product_points_strided(topology, - tensor_indices_, - points, - output_stride, - axis_v_coeffs_.data(), - axis_d_coeffs_.data(), - axis_d2_coeffs_.data(), - axis_barycentric_weights_.data(), - static_cast(nodes_1d_.size()), - values_out, - gradients_out, - hessians_out); - return; - } - - if (topology == LagrangeTopology::Triangle) { - detail::evaluate_triangle_simplex_basis_strided( - simplex_exponents_, order_, points, output_stride, values_out, gradients_out, hessians_out); - return; - } - - if (topology == LagrangeTopology::Tetrahedron) { - detail::evaluate_tetrahedron_simplex_basis_strided( - simplex_exponents_, order_, points, output_stride, values_out, gradients_out, hessians_out); - return; - } - - if (topology == LagrangeTopology::Wedge && - evaluate_wedge_fast_strided(order_, - wedge_indices_, - points, - output_stride, - values_out, - gradients_out, - hessians_out)) { - return; - } - - const bool wedge_scalar_hessian_fallback = - topology == LagrangeTopology::Wedge && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr && - order_ <= 2; - if (topology == LagrangeTopology::Wedge && !wedge_scalar_hessian_fallback) { - evaluate_wedge_points_strided(simplex_exponents_, - wedge_indices_, - wedge_node_by_tri_z_, - order_, - points, - output_stride, - axis_v_coeffs_.data(), - axis_d_coeffs_.data(), - axis_d2_coeffs_.data(), - axis_barycentric_weights_.data(), - static_cast(nodes_1d_.size()), - values_out, - gradients_out, - hessians_out); - return; - } - - if (topology == LagrangeTopology::Pyramid) { - detail::lagrange_pyramid::evaluate_at_quadrature_points_strided( - order_, points, output_stride, values_out, gradients_out, hessians_out); - return; - } - - auto& scratch = evaluate_scratch(); - auto& v_tmp = scratch.strided_values_tmp; - auto& g_tmp = scratch.strided_gradients_tmp; - auto& h_tmp = scratch.strided_hessians_tmp; - - if (values_out) v_tmp.resize(num_dofs); - if (gradients_out) g_tmp.resize(num_dofs * 3u); - if (hessians_out) h_tmp.resize(num_dofs * 9u); - - for (std::size_t q = 0; q < num_qpts; ++q) { - if (values_out && gradients_out && hessians_out) { - evaluate_all_to(points[q], v_tmp.data(), g_tmp.data(), h_tmp.data()); - } else { - if (values_out) evaluate_values_to(points[q], v_tmp.data()); - if (gradients_out) evaluate_gradients_to(points[q], g_tmp.data()); - if (hessians_out) evaluate_hessians_to(points[q], h_tmp.data()); - } - - if (values_out) { - for (std::size_t d = 0; d < num_dofs; ++d) { - values_out[d * output_stride + q] = v_tmp[d]; - } - } - if (gradients_out) { - for (std::size_t d = 0; d < num_dofs; ++d) { - gradients_out[(d * 3u + 0u) * output_stride + q] = g_tmp[d * 3u + 0u]; - gradients_out[(d * 3u + 1u) * output_stride + q] = g_tmp[d * 3u + 1u]; - gradients_out[(d * 3u + 2u) * output_stride + q] = g_tmp[d * 3u + 2u]; - } - } - if (hessians_out) { - for (std::size_t d = 0; d < num_dofs; ++d) { - scatter_hessian_components_strided( - h_tmp.data() + d * 9u, - hessians_out + d * 9u * output_stride, - output_stride, - q); - } - } - } + evaluate_all_to(xi, nullptr, nullptr, hessians_out); } } // namespace basis diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.h b/Code/Source/solver/FE/Basis/LagrangeBasis.h index 91f7e379c..a5fe8e0fa 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.h +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.h @@ -8,12 +8,9 @@ #ifndef SVMP_FE_BASIS_LAGRANGEBASIS_H #define SVMP_FE_BASIS_LAGRANGEBASIS_H -/** - * @file LagrangeBasis.h - * @brief Nodal Lagrange polynomial basis on reference elements - */ - #include "BasisFunction.h" +#include "BasisTraits.h" + #include #include @@ -23,33 +20,12 @@ namespace basis { void prewarm_lagrange_basis_scratch(int max_order, std::size_t max_qpts = 0); -/** - * @brief Complete nodal H1 Lagrange basis on canonical reference topologies - * - * Supports arbitrary polynomial order on the canonical complete families: - * `Line2`, `Triangle3`, `Quad4`, `Tetra4`, `Hex8`, `Wedge6`, and `Pyramid5`. - * Low-order complete-family aliases (`Line3`, `Triangle6`, `Quad9`, - * `Tetra10`, `Hex27`, `Wedge18`, `Pyramid14`) normalize to their canonical - * topology plus order. Serendipity variants remain intentionally excluded. - * - * Node locations are generated on canonical reference elements using - * equispaced coordinates on tensor-product elements, barycentric grids on - * simplices, tensorized triangle-line grids on wedges, and a rational nodal - * pyramid construction on `Pyramid5`. - * - * The evaluator is numerically stabilized for those nodes, but the - * interpolation problem itself remains the equispaced Lagrange problem. For - * high-order interpolation, especially order >= 4, prefer `SpectralBasis` - * (GLL / Warp & Blend nodes) unless exact equispaced nodal placement is part - * of the requested discretization. - * - * For the rational pyramid family, basis values remain exact at the apex. - * Gradients and Hessians are analytic on the supported interior reference - * domain, but the exact-apex nodal derivative limit is not unique and those - * derivative queries throw at the exact apex. - */ class LagrangeBasis : public BasisFunction { public: + using TensorNodeIndex = std::array; + using SimplexExponent = std::array; + using WedgeNodeIndex = std::array; + LagrangeBasis(ElementType type, int order); BasisType basis_type() const noexcept override { return BasisType::Lagrange; } @@ -57,7 +33,6 @@ class LagrangeBasis : public BasisFunction { int dimension() const noexcept override { return dimension_; } int order() const noexcept override { return order_; } std::size_t size() const noexcept override { return nodes_.size(); } - bool cache_identity_is_structural() const noexcept override { return true; } const std::vector>& nodes() const noexcept { return nodes_; } @@ -72,96 +47,32 @@ class LagrangeBasis : public BasisFunction { std::vector& gradients, std::vector& hessians) const final; - void evaluate_at_quadrature_points( - const std::vector>& points, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const final; - void evaluate_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const final; - - // Raw-pointer output API. Caller must pre-size buffers to size(). - void evaluate_values_to(const math::Vector& xi, Real* SVMP_RESTRICT values_out) const final; - void evaluate_gradients_to(const math::Vector& xi, Real* SVMP_RESTRICT gradients_out) const final; - void evaluate_hessians_to(const math::Vector& xi, Real* SVMP_RESTRICT hessians_out) const final; + void evaluate_values_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out) const final; + void evaluate_gradients_to(const math::Vector& xi, + Real* SVMP_RESTRICT gradients_out) const final; + void evaluate_hessians_to(const math::Vector& xi, + Real* SVMP_RESTRICT hessians_out) const final; private: - using TensorNodeIndex = std::array; - using WedgeNodeIndex = std::array; - using VectorEvaluationDispatch = void (LagrangeBasis::*)( - const math::Vector&, - std::vector*, - std::vector*, - std::vector*) const; - - // Cached topology encoded as int because the topology enum lives in - // the .cpp anon namespace. Set once in init_nodes. - int topology_id_ = 0; - ElementType element_type_; - int dimension_; - int order_; + BasisTopology topology_{BasisTopology::Unknown}; + int dimension_{0}; + int order_{0}; std::vector nodes_1d_; std::vector> nodes_; std::vector tensor_indices_; - std::vector> simplex_exponents_; + std::vector simplex_exponents_; std::vector wedge_indices_; - std::vector wedge_node_by_tri_z_; - - // Precomputed Horner-form coefficients of the 1D Lagrange basis. - // Layout per axis (n_axis = nodes_1d_.size() = order_+1): - // axis_v_coeffs_[i * n_axis + k] = coeff of x^k in L_i(x), 0 <= i,k < n_axis - // axis_d_coeffs_[i * (n_axis - 1) + k] = coeff of x^k in L_i'(x) - // axis_d2_coeffs_[i * (n_axis - 2) + k] = coeff of x^k in L_i''(x) (only if n_axis >= 3) - // Populated by build_tensor_product_nodes / build_wedge_nodes. - std::vector axis_v_coeffs_; - std::vector axis_d_coeffs_; - std::vector axis_d2_coeffs_; - std::vector axis_barycentric_weights_; - VectorEvaluationDispatch vector_evaluation_dispatch_{nullptr}; void init_nodes(); - void init_evaluation_dispatch(); void build_point_nodes(); void build_tensor_product_nodes(int dimensions); void build_simplex_nodes(); void build_wedge_nodes(); - void build_pyramid_nodes(); void init_equispaced_1d_nodes(); - void compute_axis_monomial_coefficients(); - void evaluate_point_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const; - void evaluate_tensor_product_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const; - void evaluate_triangle_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const; - void evaluate_tetrahedron_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const; - void evaluate_wedge_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const; - void evaluate_pyramid_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const; - void evaluate_unsupported_vectors(const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) const; + void evaluate_all_to(const math::Vector& xi, Real* SVMP_RESTRICT values_out, Real* SVMP_RESTRICT gradients_out, diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisFast.h b/Code/Source/solver/FE/Basis/LagrangeBasisFast.h deleted file mode 100644 index 5b9faae04..000000000 --- a/Code/Source/solver/FE/Basis/LagrangeBasisFast.h +++ /dev/null @@ -1,1378 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#ifndef SVMP_FE_BASIS_LAGRANGEBASISFAST_H -#define SVMP_FE_BASIS_LAGRANGEBASISFAST_H - -/** - * @file LagrangeBasisFast.h - * @brief Header-only zero-overhead specializations of the Lagrange basis - * - * Provides templated static methods for the common nodal Lagrange families - * with compile-time-known polynomial order. Callers that know their basis - * type and order at compile time use these directly — there is no virtual - * dispatch, no std::vector allocation, no scratch lookup, and no topology - * switch. The output buffers are stack-allocated std::array, sized at - * compile time. The compiler fully unrolls and constant-folds. - * - * These specializations are an alternative entry point to the runtime path - * provided by `LagrangeBasis`. The runtime path remains the canonical API - * for generic callers; these specializations serve hot loops that know the - * element type. - * - * Node orderings match `ReferenceNodeLayout::get_lagrange_node_coords(...)` (VTK). - */ - -#include "Types.h" -#include "Math/Vector.h" -#include "Math/Matrix.h" -#include -#include - -namespace svmp { -namespace FE { -namespace basis { - -using Gradient = math::Vector; -using Hessian = math::Matrix; - -namespace detail { - -constexpr Gradient scaled_gradient(const Gradient& gradient, Real scale) { - return Gradient{scale * gradient[0], scale * gradient[1], scale * gradient[2]}; -} - -constexpr Gradient p2_edge_gradient(Real left, - const Gradient& left_gradient, - Real right, - const Gradient& right_gradient) { - return Gradient{ - Real(4) * (left_gradient[0] * right + right_gradient[0] * left), - Real(4) * (left_gradient[1] * right + right_gradient[1] * left), - Real(4) * (left_gradient[2] * right + right_gradient[2] * left), - }; -} - -constexpr Hessian p2_vertex_hessian(const Gradient& gradient) { - Hessian hessian{}; - for (std::size_t row = 0; row < 3u; ++row) { - for (std::size_t col = 0; col < 3u; ++col) { - hessian(row, col) = Real(4) * gradient[row] * gradient[col]; - } - } - return hessian; -} - -constexpr Hessian p2_edge_hessian(const Gradient& left_gradient, - const Gradient& right_gradient) { - Hessian hessian{}; - for (std::size_t row = 0; row < 3u; ++row) { - for (std::size_t col = 0; col < 3u; ++col) { - hessian(row, col) = Real(4) * ( - left_gradient[row] * right_gradient[col] + - right_gradient[row] * left_gradient[col]); - } - } - return hessian; -} - -constexpr std::size_t public_axis_index(int lattice, int order) noexcept { - return lattice == 0 ? 0u : - lattice == order ? 1u : - static_cast(lattice + 1); -} - -template -constexpr Real public_axis_coord(std::size_t public_index) noexcept { - const int lattice = public_index == 0u ? 0 : - public_index == 1u ? Order : - static_cast(public_index) - 1; - return Real(-1) + Real(2) * static_cast(lattice) / static_cast(Order); -} - -template -constexpr std::array make_public_axis_nodes() { - std::array nodes{}; - for (std::size_t i = 0; i < nodes.size(); ++i) { - nodes[i] = public_axis_coord(i); - } - return nodes; -} - -template -constexpr std::array make_public_axis_inverse_denominators() { - constexpr auto nodes = make_public_axis_nodes(); - std::array inv_denominators{}; - for (std::size_t i = 0; i < nodes.size(); ++i) { - Real denominator = Real(1); - for (std::size_t j = 0; j < nodes.size(); ++j) { - if (j != i) { - denominator *= nodes[i] - nodes[j]; - } - } - inv_denominators[i] = Real(1) / denominator; - } - return inv_denominators; -} - -template -void fill_axis_lagrange(Real x, - std::array& values, - std::array* first, - std::array* second) { - constexpr auto nodes = make_public_axis_nodes(); - constexpr auto inv_denominators = make_public_axis_inverse_denominators(); - for (std::size_t i = 0; i < nodes.size(); ++i) { - Real product = Real(1); - for (std::size_t j = 0; j < nodes.size(); ++j) { - if (j != i) { - product *= x - nodes[j]; - } - } - values[i] = product * inv_denominators[i]; - - if constexpr (NeedFirst) { - Real derivative = Real(0); - for (std::size_t m = 0; m < nodes.size(); ++m) { - if (m == i) { - continue; - } - Real term = Real(1); - for (std::size_t j = 0; j < nodes.size(); ++j) { - if (j != i && j != m) { - term *= x - nodes[j]; - } - } - derivative += term; - } - (*first)[i] = derivative * inv_denominators[i]; - } - - if constexpr (NeedSecond) { - Real curvature = Real(0); - for (std::size_t m = 0; m < nodes.size(); ++m) { - if (m == i) { - continue; - } - for (std::size_t l = 0; l < nodes.size(); ++l) { - if (l == i || l == m) { - continue; - } - Real term = Real(1); - for (std::size_t j = 0; j < nodes.size(); ++j) { - if (j != i && j != m && j != l) { - term *= x - nodes[j]; - } - } - curvature += term; - } - } - (*second)[i] = curvature * inv_denominators[i]; - } - } -} - -template -void fill_axis_values(Real x, std::array& values) { - fill_axis_lagrange(x, values, nullptr, nullptr); -} - -template -void fill_axis_values_first(Real x, - std::array& values, - std::array& first) { - fill_axis_lagrange(x, values, &first, nullptr); -} - -template -void fill_axis_values_first_second(Real x, - std::array& values, - std::array& first, - std::array& second) { - fill_axis_lagrange(x, values, &first, &second); -} - -template -constexpr std::array, (Order + 1) * (Order + 1)> -make_quad_tensor_node_axes() { - std::array, (Order + 1) * (Order + 1)> axes{}; - std::size_t n = 0; - - axes[n++] = {{0u, 0u}}; - axes[n++] = {{1u, 0u}}; - axes[n++] = {{1u, 1u}}; - axes[n++] = {{0u, 1u}}; - - for (int i = 1; i < Order; ++i) { - axes[n++] = {{public_axis_index(i, Order), 0u}}; - } - for (int j = 1; j < Order; ++j) { - axes[n++] = {{1u, public_axis_index(j, Order)}}; - } - for (int i = Order - 1; i >= 1; --i) { - axes[n++] = {{public_axis_index(i, Order), 1u}}; - } - for (int j = Order - 1; j >= 1; --j) { - axes[n++] = {{0u, public_axis_index(j, Order)}}; - } - - for (int j = 1; j < Order; ++j) { - for (int i = 1; i < Order; ++i) { - axes[n++] = {{public_axis_index(i, Order), public_axis_index(j, Order)}}; - } - } - - return axes; -} - -template -constexpr std::array, (Order + 1) * (Order + 1) * (Order + 1)> -make_hex_tensor_node_axes() { - std::array, (Order + 1) * (Order + 1) * (Order + 1)> axes{}; - std::size_t n = 0; - - axes[n++] = {{0u, 0u, 0u}}; - axes[n++] = {{1u, 0u, 0u}}; - axes[n++] = {{1u, 1u, 0u}}; - axes[n++] = {{0u, 1u, 0u}}; - axes[n++] = {{0u, 0u, 1u}}; - axes[n++] = {{1u, 0u, 1u}}; - axes[n++] = {{1u, 1u, 1u}}; - axes[n++] = {{0u, 1u, 1u}}; - - for (int i = 1; i < Order; ++i) { - axes[n++] = {{public_axis_index(i, Order), 0u, 0u}}; - } - for (int j = 1; j < Order; ++j) { - axes[n++] = {{1u, public_axis_index(j, Order), 0u}}; - } - for (int i = Order - 1; i >= 1; --i) { - axes[n++] = {{public_axis_index(i, Order), 1u, 0u}}; - } - for (int j = Order - 1; j >= 1; --j) { - axes[n++] = {{0u, public_axis_index(j, Order), 0u}}; - } - for (int i = 1; i < Order; ++i) { - axes[n++] = {{public_axis_index(i, Order), 0u, 1u}}; - } - for (int j = 1; j < Order; ++j) { - axes[n++] = {{1u, public_axis_index(j, Order), 1u}}; - } - for (int i = Order - 1; i >= 1; --i) { - axes[n++] = {{public_axis_index(i, Order), 1u, 1u}}; - } - for (int j = Order - 1; j >= 1; --j) { - axes[n++] = {{0u, public_axis_index(j, Order), 1u}}; - } - for (int k = 1; k < Order; ++k) { - axes[n++] = {{0u, 0u, public_axis_index(k, Order)}}; - } - for (int k = 1; k < Order; ++k) { - axes[n++] = {{1u, 0u, public_axis_index(k, Order)}}; - } - for (int k = 1; k < Order; ++k) { - axes[n++] = {{1u, 1u, public_axis_index(k, Order)}}; - } - for (int k = 1; k < Order; ++k) { - axes[n++] = {{0u, 1u, public_axis_index(k, Order)}}; - } - - for (int j = 1; j < Order; ++j) { - for (int i = 1; i < Order; ++i) { - axes[n++] = {{public_axis_index(i, Order), public_axis_index(j, Order), 0u}}; - } - } - for (int j = 1; j < Order; ++j) { - for (int i = 1; i < Order; ++i) { - axes[n++] = {{public_axis_index(i, Order), public_axis_index(j, Order), 1u}}; - } - } - for (int k = 1; k < Order; ++k) { - for (int i = 1; i < Order; ++i) { - axes[n++] = {{public_axis_index(i, Order), 0u, public_axis_index(k, Order)}}; - } - } - for (int k = 1; k < Order; ++k) { - for (int j = 1; j < Order; ++j) { - axes[n++] = {{1u, public_axis_index(j, Order), public_axis_index(k, Order)}}; - } - } - for (int k = 1; k < Order; ++k) { - for (int i = Order - 1; i >= 1; --i) { - axes[n++] = {{public_axis_index(i, Order), 1u, public_axis_index(k, Order)}}; - } - } - for (int k = 1; k < Order; ++k) { - for (int j = Order - 1; j >= 1; --j) { - axes[n++] = {{0u, public_axis_index(j, Order), public_axis_index(k, Order)}}; - } - } - - for (int k = 1; k < Order; ++k) { - for (int j = 1; j < Order; ++j) { - for (int i = 1; i < Order; ++i) { - axes[n++] = {{public_axis_index(i, Order), - public_axis_index(j, Order), - public_axis_index(k, Order)}}; - } - } - } - - return axes; -} - -template -constexpr std::array, (Order + 1) * (Order + 2) / 2> -make_triangle_simplex_exponents() { - std::array, (Order + 1) * (Order + 2) / 2> exponents{}; - std::size_t n = 0; - - exponents[n++] = {{static_cast(Order), 0u, 0u}}; - exponents[n++] = {{0u, static_cast(Order), 0u}}; - exponents[n++] = {{0u, 0u, static_cast(Order)}}; - - for (int m = 1; m < Order; ++m) { - exponents[n++] = {{static_cast(Order - m), static_cast(m), 0u}}; - } - for (int m = 1; m < Order; ++m) { - exponents[n++] = {{0u, static_cast(Order - m), static_cast(m)}}; - } - for (int m = 1; m < Order; ++m) { - exponents[n++] = {{static_cast(m), 0u, static_cast(Order - m)}}; - } - - for (int c = 1; c <= Order - 2; ++c) { - for (int b = 1; b <= Order - c - 1; ++b) { - const int a = Order - b - c; - exponents[n++] = {{static_cast(a), - static_cast(b), - static_cast(c)}}; - } - } - - return exponents; -} - -template -constexpr std::array, (Order + 1) * (Order + 2) * (Order + 3) / 6> -make_tetrahedron_simplex_exponents() { - std::array, (Order + 1) * (Order + 2) * (Order + 3) / 6> exponents{}; - std::size_t n = 0; - - exponents[n++] = {{static_cast(Order), 0u, 0u, 0u}}; - exponents[n++] = {{0u, static_cast(Order), 0u, 0u}}; - exponents[n++] = {{0u, 0u, static_cast(Order), 0u}}; - exponents[n++] = {{0u, 0u, 0u, static_cast(Order)}}; - - constexpr int edges[6][2] = { - {0, 1}, {1, 2}, {2, 0}, {0, 3}, {1, 3}, {2, 3} - }; - for (const auto& edge : edges) { - for (int m = 1; m < Order; ++m) { - std::array e{}; - e[static_cast(edge[0])] = static_cast(Order - m); - e[static_cast(edge[1])] = static_cast(m); - exponents[n++] = e; - } - } - - constexpr int faces[4][3] = { - {0, 1, 2}, - {0, 1, 3}, - {1, 2, 3}, - {0, 2, 3}, - }; - for (const auto& face : faces) { - for (int c = 1; c <= Order - 2; ++c) { - for (int b = 1; b <= Order - c - 1; ++b) { - const int a = Order - b - c; - std::array e{}; - e[static_cast(face[0])] = static_cast(a); - e[static_cast(face[1])] = static_cast(b); - e[static_cast(face[2])] = static_cast(c); - exponents[n++] = e; - } - } - } - - for (int l = 1; l <= Order - 3; ++l) { - for (int k = 1; k <= Order - l - 2; ++k) { - for (int j = 1; j <= Order - l - k - 1; ++j) { - const int i = Order - j - k - l; - exponents[n++] = {{static_cast(i), - static_cast(j), - static_cast(k), - static_cast(l)}}; - } - } - } - - return exponents; -} - -template -void fill_simplex_factor_sequence(Real lambda, - std::array& phi, - std::array* dphi, - std::array* d2phi) { - phi[0] = Real(1); - if constexpr (NeedFirst) { - (*dphi)[0] = Real(0); - } - if constexpr (NeedSecond) { - (*d2phi)[0] = Real(0); - } - - const Real t = static_cast(Order) * lambda; - constexpr Real dt_dlambda = static_cast(Order); - Real dphi_dt_prev = Real(0); - Real d2phi_dt2_prev = Real(0); - - for (int a = 1; a <= Order; ++a) { - const std::size_t au = static_cast(a); - const Real inv_a = Real(1) / static_cast(a); - const Real s = (t - static_cast(a - 1)) * inv_a; - phi[au] = s * phi[au - 1]; - - if constexpr (NeedFirst) { - const Real dphi_dt = inv_a * phi[au - 1] + s * dphi_dt_prev; - (*dphi)[au] = dt_dlambda * dphi_dt; - - if constexpr (NeedSecond) { - const Real d2phi_dt2 = Real(2) * inv_a * dphi_dt_prev + s * d2phi_dt2_prev; - (*d2phi)[au] = dt_dlambda * dt_dlambda * d2phi_dt2; - d2phi_dt2_prev = d2phi_dt2; - } - - dphi_dt_prev = dphi_dt; - } - } -} - -template -void fill_simplex_factor_values(Real lambda, std::array& phi) { - fill_simplex_factor_sequence(lambda, phi, nullptr, nullptr); -} - -template -void fill_simplex_factor_values_first(Real lambda, - std::array& phi, - std::array& dphi) { - fill_simplex_factor_sequence(lambda, phi, &dphi, nullptr); -} - -template -void fill_simplex_factor_values_first_second(Real lambda, - std::array& phi, - std::array& dphi, - std::array& d2phi) { - fill_simplex_factor_sequence(lambda, phi, &dphi, &d2phi); -} - -} // namespace detail - -// --------------------------------------------------------------------------- -// LagrangeLineFast -// --------------------------------------------------------------------------- -template -struct LagrangeLineFast; - -template<> -struct LagrangeLineFast<1> { - static constexpr int n_dofs = 2; - - static constexpr void evaluate(const math::Vector& xi, std::array& out) { - out[0] = (Real(1) - xi[0]) * Real(0.5); - out[1] = (Real(1) + xi[0]) * Real(0.5); - } - - static constexpr void evaluate_gradients(const math::Vector& /*xi*/, - std::array& out) { - out[0] = Gradient{Real(-0.5), Real(0), Real(0)}; - out[1] = Gradient{Real( 0.5), Real(0), Real(0)}; - } - - static constexpr void evaluate_hessians(const math::Vector& /*xi*/, - std::array& out) { - out[0] = Hessian{}; - out[1] = Hessian{}; - } -}; - -template<> -struct LagrangeLineFast<2> { - static constexpr int n_dofs = 3; - - static constexpr void evaluate(const math::Vector& xi, std::array& out) { - const Real x = xi[0]; - out[0] = x * (x - Real(1)) * Real(0.5); - out[1] = x * (x + Real(1)) * Real(0.5); - out[2] = (Real(1) - x) * (Real(1) + x); - } - - static constexpr void evaluate_gradients(const math::Vector& xi, - std::array& out) { - const Real x = xi[0]; - out[0] = Gradient{x - Real(0.5), Real(0), Real(0)}; - out[1] = Gradient{x + Real(0.5), Real(0), Real(0)}; - out[2] = Gradient{Real(-2) * x, Real(0), Real(0)}; - } - - static constexpr void evaluate_hessians(const math::Vector& /*xi*/, - std::array& out) { - out[0] = Hessian{}; - out[1] = Hessian{}; - out[2] = Hessian{}; - out[0](0, 0) = Real(1); - out[1](0, 0) = Real(1); - out[2](0, 0) = Real(-2); - } -}; - -template<> -struct LagrangeLineFast<3> { - static constexpr int n_dofs = 4; - - static void evaluate(const math::Vector& xi, std::array& out) { - detail::fill_axis_values<3>(xi[0], out); - } - - static void evaluate_gradients(const math::Vector& xi, - std::array& out) { - std::array values{}; - std::array first{}; - detail::fill_axis_values_first<3>(xi[0], values, first); - for (std::size_t i = 0; i < first.size(); ++i) { - out[i] = Gradient{first[i], Real(0), Real(0)}; - } - } - - static void evaluate_hessians(const math::Vector& xi, - std::array& out) { - std::array values{}; - std::array first{}; - std::array second{}; - detail::fill_axis_values_first_second<3>(xi[0], values, first, second); - for (std::size_t i = 0; i < second.size(); ++i) { - Hessian H{}; - H(0, 0) = second[i]; - out[i] = H; - } - } -}; - -// --------------------------------------------------------------------------- -// LagrangeQuadFast -// --------------------------------------------------------------------------- -template -struct LagrangeQuadFast; - -template<> -struct LagrangeQuadFast<1> { - static constexpr int n_dofs = 4; - - // VTK Quad4 corner ordering: (-,-), (+,-), (+,+), (-,+). - static constexpr void evaluate(const math::Vector& xi, std::array& out) { - const Real lx = (Real(1) - xi[0]) * Real(0.5); - const Real ly = (Real(1) - xi[1]) * Real(0.5); - const Real ux = (Real(1) + xi[0]) * Real(0.5); - const Real uy = (Real(1) + xi[1]) * Real(0.5); - out[0] = lx * ly; - out[1] = ux * ly; - out[2] = ux * uy; - out[3] = lx * uy; - } - - static constexpr void evaluate_gradients(const math::Vector& xi, - std::array& out) { - const Real lx = (Real(1) - xi[0]) * Real(0.5); - const Real ly = (Real(1) - xi[1]) * Real(0.5); - const Real ux = (Real(1) + xi[0]) * Real(0.5); - const Real uy = (Real(1) + xi[1]) * Real(0.5); - out[0] = Gradient{Real(-0.5) * ly, Real(-0.5) * lx, Real(0)}; - out[1] = Gradient{Real( 0.5) * ly, Real(-0.5) * ux, Real(0)}; - out[2] = Gradient{Real( 0.5) * uy, Real( 0.5) * ux, Real(0)}; - out[3] = Gradient{Real(-0.5) * uy, Real( 0.5) * lx, Real(0)}; - } - - static constexpr void evaluate_hessians(const math::Vector& /*xi*/, - std::array& out) { - out[0] = Hessian{}; - out[1] = Hessian{}; - out[2] = Hessian{}; - out[3] = Hessian{}; - constexpr Real qrt = Real(0.25); - out[0](0, 1) = qrt; out[0](1, 0) = qrt; - out[1](0, 1) = -qrt; out[1](1, 0) = -qrt; - out[2](0, 1) = qrt; out[2](1, 0) = qrt; - out[3](0, 1) = -qrt; out[3](1, 0) = -qrt; - } -}; - -template<> -struct LagrangeQuadFast<2> { - static constexpr int n_dofs = 9; - - static constexpr std::array, n_dofs> node_axes = {{ - {{0u, 0u}}, {{1u, 0u}}, {{1u, 1u}}, {{0u, 1u}}, - {{2u, 0u}}, {{1u, 2u}}, {{2u, 1u}}, {{0u, 2u}}, - {{2u, 2u}}, - }}; - - static void evaluate(const math::Vector& xi, std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); - LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - out[n] = lx[node_axes[n][0]] * ly[node_axes[n][1]]; - } - } - - static void evaluate_gradients(const math::Vector& xi, - std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> gx{}; - std::array::n_dofs> gy{}; - LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); - LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); - LagrangeLineFast<2>::evaluate_gradients({xi[0], Real(0), Real(0)}, gx); - LagrangeLineFast<2>::evaluate_gradients({xi[1], Real(0), Real(0)}, gy); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - const auto i = node_axes[n][0]; - const auto j = node_axes[n][1]; - out[n] = Gradient{gx[i][0] * ly[j], lx[i] * gy[j][0], Real(0)}; - } - } - - static void evaluate_hessians(const math::Vector& xi, - std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> gx{}; - std::array::n_dofs> gy{}; - std::array::n_dofs> hx{}; - std::array::n_dofs> hy{}; - LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); - LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); - LagrangeLineFast<2>::evaluate_gradients({xi[0], Real(0), Real(0)}, gx); - LagrangeLineFast<2>::evaluate_gradients({xi[1], Real(0), Real(0)}, gy); - LagrangeLineFast<2>::evaluate_hessians({xi[0], Real(0), Real(0)}, hx); - LagrangeLineFast<2>::evaluate_hessians({xi[1], Real(0), Real(0)}, hy); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - const auto i = node_axes[n][0]; - const auto j = node_axes[n][1]; - Hessian H{}; - H(0, 0) = hx[i](0, 0) * ly[j]; - H(1, 1) = lx[i] * hy[j](0, 0); - H(0, 1) = gx[i][0] * gy[j][0]; - H(1, 0) = H(0, 1); - out[n] = H; - } - } -}; - -template<> -struct LagrangeQuadFast<3> { - static constexpr int n_dofs = 16; - - static constexpr std::array, n_dofs> node_axes = - detail::make_quad_tensor_node_axes<3>(); - - static void evaluate(const math::Vector& xi, std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - detail::fill_axis_values<3>(xi[0], lx); - detail::fill_axis_values<3>(xi[1], ly); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - out[n] = lx[node_axes[n][0]] * ly[node_axes[n][1]]; - } - } - - static void evaluate_gradients(const math::Vector& xi, - std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> gx{}; - std::array::n_dofs> gy{}; - detail::fill_axis_values_first<3>(xi[0], lx, gx); - detail::fill_axis_values_first<3>(xi[1], ly, gy); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - const auto i = node_axes[n][0]; - const auto j = node_axes[n][1]; - out[n] = Gradient{gx[i] * ly[j], lx[i] * gy[j], Real(0)}; - } - } - - static void evaluate_hessians(const math::Vector& xi, - std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> gx{}; - std::array::n_dofs> gy{}; - std::array::n_dofs> hx{}; - std::array::n_dofs> hy{}; - detail::fill_axis_values_first_second<3>(xi[0], lx, gx, hx); - detail::fill_axis_values_first_second<3>(xi[1], ly, gy, hy); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - const auto i = node_axes[n][0]; - const auto j = node_axes[n][1]; - Hessian H{}; - H(0, 0) = hx[i] * ly[j]; - H(1, 1) = lx[i] * hy[j]; - H(0, 1) = gx[i] * gy[j]; - H(1, 0) = H(0, 1); - out[n] = H; - } - } -}; - -// --------------------------------------------------------------------------- -// LagrangeHexFast -// --------------------------------------------------------------------------- -template -struct LagrangeHexFast; - -template<> -struct LagrangeHexFast<1> { - static constexpr int n_dofs = 8; - - // VTK Hex8 corner ordering: (-,-,-), (+,-,-), (+,+,-), (-,+,-), - // (-,-,+), (+,-,+), (+,+,+), (-,+,+). - static constexpr void evaluate(const math::Vector& xi, std::array& out) { - const Real lx = (Real(1) - xi[0]) * Real(0.5); - const Real ly = (Real(1) - xi[1]) * Real(0.5); - const Real lz = (Real(1) - xi[2]) * Real(0.5); - const Real ux = (Real(1) + xi[0]) * Real(0.5); - const Real uy = (Real(1) + xi[1]) * Real(0.5); - const Real uz = (Real(1) + xi[2]) * Real(0.5); - // Precompute z-plane partial products (sum factorization). - const Real lxly = lx * ly; - const Real uxly = ux * ly; - const Real uxuy = ux * uy; - const Real lxuy = lx * uy; - out[0] = lxly * lz; - out[1] = uxly * lz; - out[2] = uxuy * lz; - out[3] = lxuy * lz; - out[4] = lxly * uz; - out[5] = uxly * uz; - out[6] = uxuy * uz; - out[7] = lxuy * uz; - } - - static constexpr void evaluate_gradients(const math::Vector& xi, - std::array& out) { - const Real lx = (Real(1) - xi[0]) * Real(0.5); - const Real ly = (Real(1) - xi[1]) * Real(0.5); - const Real lz = (Real(1) - xi[2]) * Real(0.5); - const Real ux = (Real(1) + xi[0]) * Real(0.5); - const Real uy = (Real(1) + xi[1]) * Real(0.5); - const Real uz = (Real(1) + xi[2]) * Real(0.5); - // dL_0(x)/dx = -0.5, dL_1(x)/dx = +0.5 along each axis. - out[0] = Gradient{Real(-0.5) * ly * lz, Real(-0.5) * lx * lz, Real(-0.5) * lx * ly}; - out[1] = Gradient{Real( 0.5) * ly * lz, Real(-0.5) * ux * lz, Real(-0.5) * ux * ly}; - out[2] = Gradient{Real( 0.5) * uy * lz, Real( 0.5) * ux * lz, Real(-0.5) * ux * uy}; - out[3] = Gradient{Real(-0.5) * uy * lz, Real( 0.5) * lx * lz, Real(-0.5) * lx * uy}; - out[4] = Gradient{Real(-0.5) * ly * uz, Real(-0.5) * lx * uz, Real( 0.5) * lx * ly}; - out[5] = Gradient{Real( 0.5) * ly * uz, Real(-0.5) * ux * uz, Real( 0.5) * ux * ly}; - out[6] = Gradient{Real( 0.5) * uy * uz, Real( 0.5) * ux * uz, Real( 0.5) * ux * uy}; - out[7] = Gradient{Real(-0.5) * uy * uz, Real( 0.5) * lx * uz, Real( 0.5) * lx * uy}; - } - - static constexpr void evaluate_hessians(const math::Vector& xi, - std::array& out) { - const Real lx = (Real(1) - xi[0]) * Real(0.5); - const Real ly = (Real(1) - xi[1]) * Real(0.5); - const Real lz = (Real(1) - xi[2]) * Real(0.5); - const Real ux = (Real(1) + xi[0]) * Real(0.5); - const Real uy = (Real(1) + xi[1]) * Real(0.5); - const Real uz = (Real(1) + xi[2]) * Real(0.5); - const Real ax[8] = {lx, ux, ux, lx, lx, ux, ux, lx}; - const Real ay[8] = {ly, ly, uy, uy, ly, ly, uy, uy}; - const Real az[8] = {lz, lz, lz, lz, uz, uz, uz, uz}; - const int sx[8] = {-1, 1, 1, -1, -1, 1, 1, -1}; - const int sy[8] = {-1, -1, 1, 1, -1, -1, 1, 1}; - const int sz[8] = {-1, -1, -1, -1, 1, 1, 1, 1}; - constexpr Real qrt = Real(0.25); - for (std::size_t n = 0; n < static_cast(n_dofs); ++n) { - out[n] = Hessian{}; - out[n](0, 1) = static_cast(sx[n] * sy[n]) * qrt * az[n]; - out[n](1, 0) = out[n](0, 1); - out[n](0, 2) = static_cast(sx[n] * sz[n]) * qrt * ay[n]; - out[n](2, 0) = out[n](0, 2); - out[n](1, 2) = static_cast(sy[n] * sz[n]) * qrt * ax[n]; - out[n](2, 1) = out[n](1, 2); - } - } -}; - -template<> -struct LagrangeHexFast<2> { - static constexpr int n_dofs = 27; - - static constexpr std::array, n_dofs> node_axes = {{ - {{0u, 0u, 0u}}, {{1u, 0u, 0u}}, {{1u, 1u, 0u}}, {{0u, 1u, 0u}}, - {{0u, 0u, 1u}}, {{1u, 0u, 1u}}, {{1u, 1u, 1u}}, {{0u, 1u, 1u}}, - {{2u, 0u, 0u}}, {{1u, 2u, 0u}}, {{2u, 1u, 0u}}, {{0u, 2u, 0u}}, - {{2u, 0u, 1u}}, {{1u, 2u, 1u}}, {{2u, 1u, 1u}}, {{0u, 2u, 1u}}, - {{0u, 0u, 2u}}, {{1u, 0u, 2u}}, {{1u, 1u, 2u}}, {{0u, 1u, 2u}}, - {{2u, 2u, 0u}}, {{2u, 2u, 1u}}, {{2u, 0u, 2u}}, {{1u, 2u, 2u}}, - {{2u, 1u, 2u}}, {{0u, 2u, 2u}}, {{2u, 2u, 2u}}, - }}; - - static void evaluate(const math::Vector& xi, std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> lz{}; - LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); - LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); - LagrangeLineFast<2>::evaluate({xi[2], Real(0), Real(0)}, lz); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - out[n] = lx[node_axes[n][0]] * ly[node_axes[n][1]] * lz[node_axes[n][2]]; - } - } - - static void evaluate_gradients(const math::Vector& xi, - std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> lz{}; - std::array::n_dofs> gx{}; - std::array::n_dofs> gy{}; - std::array::n_dofs> gz{}; - LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); - LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); - LagrangeLineFast<2>::evaluate({xi[2], Real(0), Real(0)}, lz); - LagrangeLineFast<2>::evaluate_gradients({xi[0], Real(0), Real(0)}, gx); - LagrangeLineFast<2>::evaluate_gradients({xi[1], Real(0), Real(0)}, gy); - LagrangeLineFast<2>::evaluate_gradients({xi[2], Real(0), Real(0)}, gz); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - const auto i = node_axes[n][0]; - const auto j = node_axes[n][1]; - const auto k = node_axes[n][2]; - out[n] = Gradient{ - gx[i][0] * ly[j] * lz[k], - lx[i] * gy[j][0] * lz[k], - lx[i] * ly[j] * gz[k][0], - }; - } - } - - static void evaluate_hessians(const math::Vector& xi, - std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> lz{}; - std::array::n_dofs> gx{}; - std::array::n_dofs> gy{}; - std::array::n_dofs> gz{}; - std::array::n_dofs> hx{}; - std::array::n_dofs> hy{}; - std::array::n_dofs> hz{}; - LagrangeLineFast<2>::evaluate({xi[0], Real(0), Real(0)}, lx); - LagrangeLineFast<2>::evaluate({xi[1], Real(0), Real(0)}, ly); - LagrangeLineFast<2>::evaluate({xi[2], Real(0), Real(0)}, lz); - LagrangeLineFast<2>::evaluate_gradients({xi[0], Real(0), Real(0)}, gx); - LagrangeLineFast<2>::evaluate_gradients({xi[1], Real(0), Real(0)}, gy); - LagrangeLineFast<2>::evaluate_gradients({xi[2], Real(0), Real(0)}, gz); - LagrangeLineFast<2>::evaluate_hessians({xi[0], Real(0), Real(0)}, hx); - LagrangeLineFast<2>::evaluate_hessians({xi[1], Real(0), Real(0)}, hy); - LagrangeLineFast<2>::evaluate_hessians({xi[2], Real(0), Real(0)}, hz); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - const auto i = node_axes[n][0]; - const auto j = node_axes[n][1]; - const auto k = node_axes[n][2]; - Hessian H{}; - H(0, 0) = hx[i](0, 0) * ly[j] * lz[k]; - H(1, 1) = lx[i] * hy[j](0, 0) * lz[k]; - H(2, 2) = lx[i] * ly[j] * hz[k](0, 0); - H(0, 1) = gx[i][0] * gy[j][0] * lz[k]; - H(1, 0) = H(0, 1); - H(0, 2) = gx[i][0] * ly[j] * gz[k][0]; - H(2, 0) = H(0, 2); - H(1, 2) = lx[i] * gy[j][0] * gz[k][0]; - H(2, 1) = H(1, 2); - out[n] = H; - } - } -}; - -template<> -struct LagrangeHexFast<3> { - static constexpr int n_dofs = 64; - - static constexpr std::array, n_dofs> node_axes = - detail::make_hex_tensor_node_axes<3>(); - - static void evaluate(const math::Vector& xi, std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> lz{}; - detail::fill_axis_values<3>(xi[0], lx); - detail::fill_axis_values<3>(xi[1], ly); - detail::fill_axis_values<3>(xi[2], lz); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - out[n] = lx[node_axes[n][0]] * ly[node_axes[n][1]] * lz[node_axes[n][2]]; - } - } - - static void evaluate_gradients(const math::Vector& xi, - std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> lz{}; - std::array::n_dofs> gx{}; - std::array::n_dofs> gy{}; - std::array::n_dofs> gz{}; - detail::fill_axis_values_first<3>(xi[0], lx, gx); - detail::fill_axis_values_first<3>(xi[1], ly, gy); - detail::fill_axis_values_first<3>(xi[2], lz, gz); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - const auto i = node_axes[n][0]; - const auto j = node_axes[n][1]; - const auto k = node_axes[n][2]; - out[n] = Gradient{ - gx[i] * ly[j] * lz[k], - lx[i] * gy[j] * lz[k], - lx[i] * ly[j] * gz[k], - }; - } - } - - static void evaluate_hessians(const math::Vector& xi, - std::array& out) { - std::array::n_dofs> lx{}; - std::array::n_dofs> ly{}; - std::array::n_dofs> lz{}; - std::array::n_dofs> gx{}; - std::array::n_dofs> gy{}; - std::array::n_dofs> gz{}; - std::array::n_dofs> hx{}; - std::array::n_dofs> hy{}; - std::array::n_dofs> hz{}; - detail::fill_axis_values_first_second<3>(xi[0], lx, gx, hx); - detail::fill_axis_values_first_second<3>(xi[1], ly, gy, hy); - detail::fill_axis_values_first_second<3>(xi[2], lz, gz, hz); - for (std::size_t n = 0; n < node_axes.size(); ++n) { - const auto i = node_axes[n][0]; - const auto j = node_axes[n][1]; - const auto k = node_axes[n][2]; - Hessian H{}; - H(0, 0) = hx[i] * ly[j] * lz[k]; - H(1, 1) = lx[i] * hy[j] * lz[k]; - H(2, 2) = lx[i] * ly[j] * hz[k]; - H(0, 1) = gx[i] * gy[j] * lz[k]; - H(1, 0) = H(0, 1); - H(0, 2) = gx[i] * ly[j] * gz[k]; - H(2, 0) = H(0, 2); - H(1, 2) = lx[i] * gy[j] * gz[k]; - H(2, 1) = H(1, 2); - out[n] = H; - } - } -}; - -// --------------------------------------------------------------------------- -// LagrangeTriFast -// --------------------------------------------------------------------------- -template -struct LagrangeTriFast; - -template<> -struct LagrangeTriFast<1> { - static constexpr int n_dofs = 3; - - static constexpr void evaluate(const math::Vector& xi, std::array& out) { - out[0] = Real(1) - xi[0] - xi[1]; - out[1] = xi[0]; - out[2] = xi[1]; - } - - static constexpr void evaluate_gradients(const math::Vector& /*xi*/, - std::array& out) { - out[0] = Gradient{Real(-1), Real(-1), Real(0)}; - out[1] = Gradient{Real( 1), Real( 0), Real(0)}; - out[2] = Gradient{Real( 0), Real( 1), Real(0)}; - } - - static constexpr void evaluate_hessians(const math::Vector& /*xi*/, - std::array& out) { - out[0] = Hessian{}; - out[1] = Hessian{}; - out[2] = Hessian{}; - } -}; - -template<> -struct LagrangeTriFast<2> { - static constexpr int n_dofs = 6; - - static constexpr void evaluate(const math::Vector& xi, std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - - out[0] = l0 * (Real(2) * l0 - Real(1)); - out[1] = l1 * (Real(2) * l1 - Real(1)); - out[2] = l2 * (Real(2) * l2 - Real(1)); - out[3] = Real(4) * l0 * l1; - out[4] = Real(4) * l1 * l2; - out[5] = Real(4) * l0 * l2; - } - - static constexpr void evaluate_gradients(const math::Vector& xi, - std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - constexpr Gradient g0{Real(-1), Real(-1), Real(0)}; - constexpr Gradient g1{Real( 1), Real( 0), Real(0)}; - constexpr Gradient g2{Real( 0), Real( 1), Real(0)}; - - out[0] = detail::scaled_gradient(g0, Real(4) * l0 - Real(1)); - out[1] = detail::scaled_gradient(g1, Real(4) * l1 - Real(1)); - out[2] = detail::scaled_gradient(g2, Real(4) * l2 - Real(1)); - out[3] = detail::p2_edge_gradient(l0, g0, l1, g1); - out[4] = detail::p2_edge_gradient(l1, g1, l2, g2); - out[5] = detail::p2_edge_gradient(l0, g0, l2, g2); - } - - static constexpr void evaluate_hessians(const math::Vector& /*xi*/, - std::array& out) { - constexpr Gradient g0{Real(-1), Real(-1), Real(0)}; - constexpr Gradient g1{Real( 1), Real( 0), Real(0)}; - constexpr Gradient g2{Real( 0), Real( 1), Real(0)}; - - out[0] = detail::p2_vertex_hessian(g0); - out[1] = detail::p2_vertex_hessian(g1); - out[2] = detail::p2_vertex_hessian(g2); - out[3] = detail::p2_edge_hessian(g0, g1); - out[4] = detail::p2_edge_hessian(g1, g2); - out[5] = detail::p2_edge_hessian(g0, g2); - } -}; - -template<> -struct LagrangeTriFast<3> { - static constexpr int n_dofs = 10; - - static constexpr std::array, n_dofs> exponents = - detail::make_triangle_simplex_exponents<3>(); - - static void evaluate(const math::Vector& xi, std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - std::array phi0{}; - std::array phi1{}; - std::array phi2{}; - detail::fill_simplex_factor_values<3>(l0, phi0); - detail::fill_simplex_factor_values<3>(l1, phi1); - detail::fill_simplex_factor_values<3>(l2, phi2); - - for (std::size_t n = 0; n < exponents.size(); ++n) { - const auto& e = exponents[n]; - out[n] = phi0[e[0]] * phi1[e[1]] * phi2[e[2]]; - } - } - - static void evaluate_gradients(const math::Vector& xi, - std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - std::array phi0{}; - std::array phi1{}; - std::array phi2{}; - std::array dphi0{}; - std::array dphi1{}; - std::array dphi2{}; - detail::fill_simplex_factor_values_first<3>(l0, phi0, dphi0); - detail::fill_simplex_factor_values_first<3>(l1, phi1, dphi1); - detail::fill_simplex_factor_values_first<3>(l2, phi2, dphi2); - - for (std::size_t n = 0; n < exponents.size(); ++n) { - const auto& e = exponents[n]; - const Real v0 = phi0[e[0]]; - const Real v1 = phi1[e[1]]; - const Real v2 = phi2[e[2]]; - const Real dl0 = dphi0[e[0]] * v1 * v2; - const Real dl1 = v0 * dphi1[e[1]] * v2; - const Real dl2 = v0 * v1 * dphi2[e[2]]; - out[n] = Gradient{dl1 - dl0, dl2 - dl0, Real(0)}; - } - } - - static void evaluate_hessians(const math::Vector& xi, - std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - std::array phi0{}; - std::array phi1{}; - std::array phi2{}; - std::array dphi0{}; - std::array dphi1{}; - std::array dphi2{}; - std::array d2phi0{}; - std::array d2phi1{}; - std::array d2phi2{}; - detail::fill_simplex_factor_values_first_second<3>(l0, phi0, dphi0, d2phi0); - detail::fill_simplex_factor_values_first_second<3>(l1, phi1, dphi1, d2phi1); - detail::fill_simplex_factor_values_first_second<3>(l2, phi2, dphi2, d2phi2); - - for (std::size_t n = 0; n < exponents.size(); ++n) { - const auto& e = exponents[n]; - const Real v0 = phi0[e[0]]; - const Real v1 = phi1[e[1]]; - const Real v2 = phi2[e[2]]; - const Real D0 = dphi0[e[0]]; - const Real D1 = dphi1[e[1]]; - const Real D2 = dphi2[e[2]]; - const Real H00 = d2phi0[e[0]] * v1 * v2; - const Real H11 = v0 * d2phi1[e[1]] * v2; - const Real H22 = v0 * v1 * d2phi2[e[2]]; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - - Hessian H{}; - H(0, 0) = H00 - Real(2) * H01 + H11; - H(1, 1) = H00 - Real(2) * H02 + H22; - H(0, 1) = H00 - H01 - H02 + H12; - H(1, 0) = H(0, 1); - out[n] = H; - } - } -}; - -// --------------------------------------------------------------------------- -// LagrangeTetFast -// --------------------------------------------------------------------------- -template -struct LagrangeTetFast; - -template<> -struct LagrangeTetFast<1> { - static constexpr int n_dofs = 4; - - static constexpr void evaluate(const math::Vector& xi, std::array& out) { - out[0] = Real(1) - xi[0] - xi[1] - xi[2]; - out[1] = xi[0]; - out[2] = xi[1]; - out[3] = xi[2]; - } - - static constexpr void evaluate_gradients(const math::Vector& /*xi*/, - std::array& out) { - out[0] = Gradient{Real(-1), Real(-1), Real(-1)}; - out[1] = Gradient{Real( 1), Real( 0), Real( 0)}; - out[2] = Gradient{Real( 0), Real( 1), Real( 0)}; - out[3] = Gradient{Real( 0), Real( 0), Real( 1)}; - } - - static constexpr void evaluate_hessians(const math::Vector& /*xi*/, - std::array& out) { - out[0] = Hessian{}; - out[1] = Hessian{}; - out[2] = Hessian{}; - out[3] = Hessian{}; - } -}; - -template<> -struct LagrangeTetFast<2> { - static constexpr int n_dofs = 10; - - static constexpr void evaluate(const math::Vector& xi, std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - - out[0] = l0 * (Real(2) * l0 - Real(1)); - out[1] = l1 * (Real(2) * l1 - Real(1)); - out[2] = l2 * (Real(2) * l2 - Real(1)); - out[3] = l3 * (Real(2) * l3 - Real(1)); - out[4] = Real(4) * l0 * l1; - out[5] = Real(4) * l1 * l2; - out[6] = Real(4) * l0 * l2; - out[7] = Real(4) * l0 * l3; - out[8] = Real(4) * l1 * l3; - out[9] = Real(4) * l2 * l3; - } - - static constexpr void evaluate_gradients(const math::Vector& xi, - std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - constexpr Gradient g0{Real(-1), Real(-1), Real(-1)}; - constexpr Gradient g1{Real( 1), Real( 0), Real( 0)}; - constexpr Gradient g2{Real( 0), Real( 1), Real( 0)}; - constexpr Gradient g3{Real( 0), Real( 0), Real( 1)}; - - out[0] = detail::scaled_gradient(g0, Real(4) * l0 - Real(1)); - out[1] = detail::scaled_gradient(g1, Real(4) * l1 - Real(1)); - out[2] = detail::scaled_gradient(g2, Real(4) * l2 - Real(1)); - out[3] = detail::scaled_gradient(g3, Real(4) * l3 - Real(1)); - out[4] = detail::p2_edge_gradient(l0, g0, l1, g1); - out[5] = detail::p2_edge_gradient(l1, g1, l2, g2); - out[6] = detail::p2_edge_gradient(l0, g0, l2, g2); - out[7] = detail::p2_edge_gradient(l0, g0, l3, g3); - out[8] = detail::p2_edge_gradient(l1, g1, l3, g3); - out[9] = detail::p2_edge_gradient(l2, g2, l3, g3); - } - - static constexpr void evaluate_hessians(const math::Vector& /*xi*/, - std::array& out) { - constexpr Gradient g0{Real(-1), Real(-1), Real(-1)}; - constexpr Gradient g1{Real( 1), Real( 0), Real( 0)}; - constexpr Gradient g2{Real( 0), Real( 1), Real( 0)}; - constexpr Gradient g3{Real( 0), Real( 0), Real( 1)}; - - out[0] = detail::p2_vertex_hessian(g0); - out[1] = detail::p2_vertex_hessian(g1); - out[2] = detail::p2_vertex_hessian(g2); - out[3] = detail::p2_vertex_hessian(g3); - out[4] = detail::p2_edge_hessian(g0, g1); - out[5] = detail::p2_edge_hessian(g1, g2); - out[6] = detail::p2_edge_hessian(g0, g2); - out[7] = detail::p2_edge_hessian(g0, g3); - out[8] = detail::p2_edge_hessian(g1, g3); - out[9] = detail::p2_edge_hessian(g2, g3); - } -}; - -template<> -struct LagrangeTetFast<3> { - static constexpr int n_dofs = 20; - - static constexpr std::array, n_dofs> exponents = - detail::make_tetrahedron_simplex_exponents<3>(); - - static void evaluate(const math::Vector& xi, std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - std::array phi0{}; - std::array phi1{}; - std::array phi2{}; - std::array phi3{}; - detail::fill_simplex_factor_values<3>(l0, phi0); - detail::fill_simplex_factor_values<3>(l1, phi1); - detail::fill_simplex_factor_values<3>(l2, phi2); - detail::fill_simplex_factor_values<3>(l3, phi3); - - for (std::size_t n = 0; n < exponents.size(); ++n) { - const auto& e = exponents[n]; - out[n] = phi0[e[0]] * phi1[e[1]] * phi2[e[2]] * phi3[e[3]]; - } - } - - static void evaluate_gradients(const math::Vector& xi, - std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - std::array phi0{}; - std::array phi1{}; - std::array phi2{}; - std::array phi3{}; - std::array dphi0{}; - std::array dphi1{}; - std::array dphi2{}; - std::array dphi3{}; - detail::fill_simplex_factor_values_first<3>(l0, phi0, dphi0); - detail::fill_simplex_factor_values_first<3>(l1, phi1, dphi1); - detail::fill_simplex_factor_values_first<3>(l2, phi2, dphi2); - detail::fill_simplex_factor_values_first<3>(l3, phi3, dphi3); - - for (std::size_t n = 0; n < exponents.size(); ++n) { - const auto& e = exponents[n]; - const Real v0 = phi0[e[0]]; - const Real v1 = phi1[e[1]]; - const Real v2 = phi2[e[2]]; - const Real v3 = phi3[e[3]]; - const Real dl0 = dphi0[e[0]] * v1 * v2 * v3; - const Real dl1 = v0 * dphi1[e[1]] * v2 * v3; - const Real dl2 = v0 * v1 * dphi2[e[2]] * v3; - const Real dl3 = v0 * v1 * v2 * dphi3[e[3]]; - out[n] = Gradient{dl1 - dl0, dl2 - dl0, dl3 - dl0}; - } - } - - static void evaluate_hessians(const math::Vector& xi, - std::array& out) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - std::array phi0{}; - std::array phi1{}; - std::array phi2{}; - std::array phi3{}; - std::array dphi0{}; - std::array dphi1{}; - std::array dphi2{}; - std::array dphi3{}; - std::array d2phi0{}; - std::array d2phi1{}; - std::array d2phi2{}; - std::array d2phi3{}; - detail::fill_simplex_factor_values_first_second<3>(l0, phi0, dphi0, d2phi0); - detail::fill_simplex_factor_values_first_second<3>(l1, phi1, dphi1, d2phi1); - detail::fill_simplex_factor_values_first_second<3>(l2, phi2, dphi2, d2phi2); - detail::fill_simplex_factor_values_first_second<3>(l3, phi3, dphi3, d2phi3); - - for (std::size_t n = 0; n < exponents.size(); ++n) { - const auto& e = exponents[n]; - const Real v0 = phi0[e[0]]; - const Real v1 = phi1[e[1]]; - const Real v2 = phi2[e[2]]; - const Real v3 = phi3[e[3]]; - const Real D0 = dphi0[e[0]]; - const Real D1 = dphi1[e[1]]; - const Real D2 = dphi2[e[2]]; - const Real D3 = dphi3[e[3]]; - - const Real H00 = d2phi0[e[0]] * v1 * v2 * v3; - const Real H11 = v0 * d2phi1[e[1]] * v2 * v3; - const Real H22 = v0 * v1 * d2phi2[e[2]] * v3; - const Real H33 = v0 * v1 * v2 * d2phi3[e[3]]; - const Real H01 = D0 * D1 * v2 * v3; - const Real H02 = D0 * v1 * D2 * v3; - const Real H03 = D0 * v1 * v2 * D3; - const Real H12 = v0 * D1 * D2 * v3; - const Real H13 = v0 * D1 * v2 * D3; - const Real H23 = v0 * v1 * D2 * D3; - - Hessian H{}; - H(0, 0) = H00 - Real(2) * H01 + H11; - H(1, 1) = H00 - Real(2) * H02 + H22; - H(2, 2) = H00 - Real(2) * H03 + H33; - H(0, 1) = H00 - H01 - H02 + H12; - H(1, 0) = H(0, 1); - H(0, 2) = H00 - H01 - H03 + H13; - H(2, 0) = H(0, 2); - H(1, 2) = H00 - H02 - H03 + H23; - H(2, 1) = H(1, 2); - out[n] = H; - } - } -}; - -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_LAGRANGEBASISFAST_H diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.cpp b/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.cpp deleted file mode 100644 index 4a332621e..000000000 --- a/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.cpp +++ /dev/null @@ -1,2069 +0,0 @@ -#include "LagrangeBasisPyramid.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Basis/BasisExceptions.h" -#include "BasisTolerance.h" -#include "Math/DenseLinearAlgebra.h" -#include "Math/DenseTransformKernels.h" -#include "LagrangeBasisUtility.h" -#include "PyramidModalBasis.h" - -namespace svmp { -namespace FE { -namespace basis { -namespace detail { - -class PyramidLagrangeCache { -public: - using ModalTerm = pyramid_modal::Term; - - struct UvPolynomial { - using Power = std::pair; - std::vector> coeffs; - - void add_term(int pu, int pv, Real coeff, Real tol = Real(1e-14)) { - if (std::abs(coeff) <= tol) { - return; - } - const auto key = std::make_pair(pu, pv); - const auto found = std::lower_bound( - coeffs.begin(), - coeffs.end(), - key, - [](const auto& entry, const Power& value) { return entry.first < value; }); - if (found == coeffs.end() || found->first != key) { - coeffs.insert(found, {key, coeff}); - return; - } - - found->second += coeff; - if (std::abs(found->second) <= tol) { - coeffs.erase(found); - } - } - - void add_scaled(const UvPolynomial& other, Real scale, Real tol = Real(1e-14)) { - if (std::abs(scale) <= tol) { - return; - } - for (const auto& [powers, coeff] : other.coeffs) { - add_term(powers.first, powers.second, scale * coeff, tol); - } - } - - bool empty(Real tol = Real(1e-12)) const { - for (const auto& [powers, coeff] : coeffs) { - (void)powers; - if (std::abs(coeff) > tol) { - return false; - } - } - return true; - } - - bool is_constant(Real tol = Real(1e-12)) const { - for (const auto& [powers, coeff] : coeffs) { - if (std::abs(coeff) <= tol) { - continue; - } - if (powers.first != 0 || powers.second != 0) { - return false; - } - } - return true; - } - - Real constant_value(Real tol = Real(1e-12)) const { - Real value = Real(0); - for (const auto& [powers, coeff] : coeffs) { - if (std::abs(coeff) <= tol) { - continue; - } - if (powers.first == 0 && powers.second == 0) { - value += coeff; - } - } - return value; - } - }; - - struct ApexSeries { - std::vector> by_power; - - void add_term(int beta, int pu, int pv, Real coeff, Real tol = Real(1e-14)) { - const auto found = find_or_insert(beta); - found->second.add_term(pu, pv, coeff, tol); - if (found->second.empty(tol)) { - by_power.erase(found); - } - } - - void add_scaled(const ApexSeries& other, Real scale, Real tol = Real(1e-14)) { - if (std::abs(scale) <= tol) { - return; - } - for (const auto& [beta, poly] : other.by_power) { - const auto found = find_or_insert(beta); - found->second.add_scaled(poly, scale, tol); - if (found->second.empty(tol)) { - by_power.erase(found); - } - } - } - - private: - std::vector>::iterator find_or_insert(int beta) { - const auto found = std::lower_bound( - by_power.begin(), - by_power.end(), - beta, - [](const auto& entry, int value) { return entry.first < value; }); - if (found != by_power.end() && found->first == beta) { - return found; - } - return by_power.insert(found, {beta, UvPolynomial{}}); - } - }; - - using GradientSeries = std::array; - using HessianSeries = std::array, 3>; - - enum class ApexLimitKind { - Constant, - DirectionDependent, - Singular, - }; - - enum class ApexRankStatus { - Exact, - DirectionDependent, - Singular, - }; - - struct ApexClassification { - ApexLimitKind kind{ApexLimitKind::Constant}; - Real constant_value{0}; - int leading_power{1}; - }; - - struct ApexData { - std::vector values; - std::vector gradients; - std::vector hessians; - ApexRankStatus gradient_status{ApexRankStatus::Exact}; - ApexRankStatus hessian_status{ApexRankStatus::Exact}; - }; - - struct OrderData { - int order{0}; - std::vector> nodes; - std::vector modal_terms; - std::vector modal_to_nodal; - ApexData apex; - }; - - struct EvaluationScratch { - std::vector modal_values; - std::vector modal_gradient_components; - std::vector modal_hessian_components; - std::vector modal_gradients; - std::vector modal_hessians; - pyramid_modal::EvaluationPoint modal_point; - - void prewarm(std::size_t max_size, std::size_t max_qpts) { - const std::size_t batched_size = max_size * std::max(max_qpts, 1u); - modal_values.reserve(batched_size); - modal_gradient_components.reserve(batched_size * 3u); - modal_hessian_components.reserve(batched_size * 9u); - modal_gradients.reserve(max_size); - modal_hessians.reserve(max_size); - } - }; - - static EvaluationScratch& evaluation_scratch() { - // Scratch is intentionally thread-local: production assembly uses a - // persistent worker-thread team, so buffers stay warm on each worker. - static thread_local EvaluationScratch scratch; - return scratch; - } - - static void prewarm_scratch(std::size_t max_size, std::size_t max_qpts) { - evaluation_scratch().prewarm(max_size, max_qpts); - } - - static bool is_apex_point(const math::Vector& xi) { - const Real tol = apex_coord_tolerance(); - return std::abs(xi[0]) <= tol && - std::abs(xi[1]) <= tol && - std::abs(Real(1) - xi[2]) <= tol; - } - - static bool on_degenerate_top_plane(const math::Vector& xi) { - return basis_near_zero(Real(1) - xi[2]); - } - - static void validate_top_plane_query(const math::Vector& xi) { - if (on_degenerate_top_plane(xi) && !is_apex_point(xi)) [[unlikely]] { - throw BasisEvaluationException( - "Pyramid reference evaluation on the degenerate z=1 plane is only defined at the apex", - __FILE__, __LINE__, __func__); - } - } - - static OrderData build_order_data(int order) { - OrderData data; - data.order = order; - - data.nodes = build_public_nodes(order); - data.modal_terms = pyramid_modal::build_terms(order); - - const std::size_t n = data.nodes.size(); - if (data.modal_terms.size() != n) { - throw BasisConstructionException("LagrangeBasis pyramid modal basis size mismatch", - __FILE__, __LINE__, __func__); - } - - std::vector vandermonde(n * n, Real(0)); - for (std::size_t row = 0; row < n; ++row) { - pyramid_modal::EvaluationPoint modal_point; - pyramid_modal::prepare_evaluation_point( - data.modal_terms, data.nodes[row], modal_point); - for (std::size_t col = 0; col < n; ++col) { - Real value = Real(0); - pyramid_modal::evaluate_term(data.modal_terms[col], modal_point, value); - vandermonde[row * n + col] = value; - } - } - - const auto inverse_result = math::invert_dense_matrix_with_diagnostics( - std::move(vandermonde), - n, - "LagrangeBasis pyramid Vandermonde"); - math::validate_dense_inverse_diagnostics( - inverse_result, - n, - "LagrangeBasis pyramid Vandermonde"); - const std::vector& inverse = inverse_result.inverse; - - data.modal_to_nodal.assign(n * n, Real(0)); - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { - data.modal_to_nodal[basis_i * n + modal_j] = - inverse[modal_j * n + basis_i]; - } - } - data.apex = build_apex_data(data); - return data; - } - - static bool has_low_order_fast_modal_to_nodal(const OrderData& data) noexcept { - return data.order == 1 || data.order == 2; - } - - static const OrderData& get(int order) { - constexpr int kMaxOnceCachedOrder = 12; - if (order >= 0 && order <= kMaxOnceCachedOrder) { - static std::array flags; - static std::array, kMaxOnceCachedOrder + 1> cache; - const auto idx = static_cast(order); - std::call_once(flags[idx], [idx, order]() { - cache[idx] = std::make_unique(build_order_data(order)); - }); - return *cache[idx]; - } - - static std::mutex fallback_mutex; - static std::map> fallback_cache; - - std::lock_guard lock(fallback_mutex); - const auto found = fallback_cache.find(order); - if (found != fallback_cache.end()) { - return *found->second; - } - - auto data = std::make_unique(build_order_data(order)); - const auto [it, inserted] = fallback_cache.emplace(order, std::move(data)); - (void)inserted; - return *it->second; - } - - static void evaluate_values(const OrderData& data, - const math::Vector& xi, - std::vector& values) { - validate_top_plane_query(xi); - if (is_apex_point(xi)) { - values = data.apex.values; - return; - } - - auto& scratch = evaluation_scratch(); - auto& modal = scratch.modal_values; - auto& modal_point = scratch.modal_point; - modal.resize(data.modal_terms.size()); - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { - pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, modal[m]); - } - if (has_low_order_fast_modal_to_nodal(data)) { - apply_sparse_basis_to_nodal(data, modal, values); - } else { - apply_modal_to_nodal(data, modal, values); - } - } - - static void evaluate_gradients(const OrderData& data, - const math::Vector& xi, - std::vector& gradients) { - validate_top_plane_query(xi); - if (is_apex_point(xi)) { - if (data.apex.gradient_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("gradient", data.apex.gradient_status), - __FILE__, __LINE__, __func__); - } - gradients = data.apex.gradients; - return; - } - - auto& scratch = evaluation_scratch(); - auto& modal_gradients = scratch.modal_gradients; - auto& modal_point = scratch.modal_point; - modal_gradients.resize(data.modal_terms.size()); - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { - Real value = Real(0); - pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, value, &modal_gradients[m]); - } - if (has_low_order_fast_modal_to_nodal(data)) { - apply_sparse_basis_to_nodal(data, modal_gradients, gradients); - } else { - apply_modal_to_nodal(data, modal_gradients, gradients); - } - } - - static void evaluate_hessians(const OrderData& data, - const math::Vector& xi, - std::vector& hessians) { - validate_top_plane_query(xi); - if (is_apex_point(xi)) { - if (data.apex.hessian_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("Hessian", data.apex.hessian_status), - __FILE__, __LINE__, __func__); - } - hessians = data.apex.hessians; - return; - } - - auto& scratch = evaluation_scratch(); - auto& modal_hessians = scratch.modal_hessians; - auto& modal_point = scratch.modal_point; - modal_hessians.resize(data.modal_terms.size()); - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { - Real value = Real(0); - pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, value, nullptr, &modal_hessians[m]); - } - if (has_low_order_fast_modal_to_nodal(data)) { - apply_sparse_basis_to_nodal(data, modal_hessians, hessians); - } else { - apply_modal_to_nodal(data, modal_hessians, hessians); - } - } - - static void evaluate_all(const OrderData& data, - const math::Vector& xi, - std::vector& values, - std::vector& gradients, - std::vector& hessians) { - validate_top_plane_query(xi); - if (is_apex_point(xi)) { - if (data.apex.gradient_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("gradient", data.apex.gradient_status), - __FILE__, __LINE__, __func__); - } - if (data.apex.hessian_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("Hessian", data.apex.hessian_status), - __FILE__, __LINE__, __func__); - } - values = data.apex.values; - gradients = data.apex.gradients; - hessians = data.apex.hessians; - return; - } - - const std::size_t n = data.modal_terms.size(); - auto& scratch = evaluation_scratch(); - auto& modal_values = scratch.modal_values; - auto& modal_gradients = scratch.modal_gradients; - auto& modal_hessians = scratch.modal_hessians; - auto& modal_point = scratch.modal_point; - modal_values.resize(n); - modal_gradients.resize(n); - modal_hessians.resize(n); - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - - for (std::size_t m = 0; m < n; ++m) { - pyramid_modal::evaluate_term( - data.modal_terms[m], modal_point, modal_values[m], &modal_gradients[m], &modal_hessians[m]); - } - - if (has_low_order_fast_modal_to_nodal(data)) { - apply_sparse_basis_to_nodal_all( - data, modal_values, modal_gradients, modal_hessians, values, gradients, hessians); - return; - } - - values.resize(n); - gradients.resize(n); - hessians.resize(n); - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - const Real* row = data.modal_to_nodal.data() + basis_i * n; - Gradient gradient{}; - Hessian hessian{}; - Real value = Real(0); - for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { - const Real coeff = row[modal_j]; - value += coeff * modal_values[modal_j]; - - const Real* modal_gradient = modal_gradients[modal_j].data(); - gradient[0] += coeff * modal_gradient[0]; - gradient[1] += coeff * modal_gradient[1]; - gradient[2] += coeff * modal_gradient[2]; - - const Real* modal_hessian = modal_hessians[modal_j].data(); - Real* hessian_data = hessian.data(); - hessian_data[0] += coeff * modal_hessian[0]; - hessian_data[1] += coeff * modal_hessian[1]; - hessian_data[2] += coeff * modal_hessian[2]; - hessian_data[4] += coeff * modal_hessian[4]; - hessian_data[5] += coeff * modal_hessian[5]; - hessian_data[8] += coeff * modal_hessian[8]; - } - values[basis_i] = value; - gradients[basis_i] = gradient; - Real* hessian_data = hessian.data(); - hessian_data[3] = hessian_data[1]; - hessian_data[6] = hessian_data[2]; - hessian_data[7] = hessian_data[5]; - hessians[basis_i] = hessian; - } - } - - static void evaluate_values_to(const OrderData& data, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out) { - validate_top_plane_query(xi); - if (is_apex_point(xi)) { - std::copy(data.apex.values.begin(), data.apex.values.end(), values_out); - return; - } - - auto& scratch = evaluation_scratch(); - auto& modal = scratch.modal_values; - auto& modal_point = scratch.modal_point; - modal.resize(data.modal_terms.size()); - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { - pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, modal[m]); - } - if (has_low_order_fast_modal_to_nodal(data)) { - apply_sparse_basis_to_nodal_to(data, modal, values_out); - } else { - apply_modal_to_nodal_to(data, modal, values_out); - } - } - - static void evaluate_gradients_to(const OrderData& data, - const math::Vector& xi, - Real* SVMP_RESTRICT gradients_out) { - validate_top_plane_query(xi); - if (is_apex_point(xi)) { - if (data.apex.gradient_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("gradient", data.apex.gradient_status), - __FILE__, __LINE__, __func__); - } - for (std::size_t i = 0; i < data.apex.gradients.size(); ++i) { - gradients_out[i * 3u + 0u] = data.apex.gradients[i][0]; - gradients_out[i * 3u + 1u] = data.apex.gradients[i][1]; - gradients_out[i * 3u + 2u] = data.apex.gradients[i][2]; - } - return; - } - - auto& scratch = evaluation_scratch(); - auto& modal_gradients = scratch.modal_gradients; - auto& modal_point = scratch.modal_point; - modal_gradients.resize(data.modal_terms.size()); - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { - Real value = Real(0); - pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, value, &modal_gradients[m]); - } - if (has_low_order_fast_modal_to_nodal(data)) { - apply_sparse_basis_to_nodal_to(data, modal_gradients, gradients_out); - } else { - apply_modal_to_nodal_to(data, modal_gradients, gradients_out); - } - } - - static void evaluate_hessians_to(const OrderData& data, - const math::Vector& xi, - Real* SVMP_RESTRICT hessians_out) { - validate_top_plane_query(xi); - if (is_apex_point(xi)) { - if (data.apex.hessian_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("Hessian", data.apex.hessian_status), - __FILE__, __LINE__, __func__); - } - for (std::size_t i = 0; i < data.apex.hessians.size(); ++i) { - store_hessian(data.apex.hessians[i], hessians_out + i * 9u); - } - return; - } - - auto& scratch = evaluation_scratch(); - auto& modal_hessians = scratch.modal_hessians; - auto& modal_point = scratch.modal_point; - modal_hessians.resize(data.modal_terms.size()); - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - for (std::size_t m = 0; m < data.modal_terms.size(); ++m) { - Real value = Real(0); - pyramid_modal::evaluate_term(data.modal_terms[m], modal_point, value, nullptr, &modal_hessians[m]); - } - if (has_low_order_fast_modal_to_nodal(data)) { - apply_sparse_basis_to_nodal_to(data, modal_hessians, hessians_out); - } else { - apply_modal_to_nodal_to(data, modal_hessians, hessians_out); - } - } - - static void evaluate_all_to(const OrderData& data, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - validate_top_plane_query(xi); - if (is_apex_point(xi)) { - if (data.apex.gradient_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("gradient", data.apex.gradient_status), - __FILE__, __LINE__, __func__); - } - if (data.apex.hessian_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("Hessian", data.apex.hessian_status), - __FILE__, __LINE__, __func__); - } - std::copy(data.apex.values.begin(), data.apex.values.end(), values_out); - for (std::size_t i = 0; i < data.apex.gradients.size(); ++i) { - gradients_out[i * 3u + 0u] = data.apex.gradients[i][0]; - gradients_out[i * 3u + 1u] = data.apex.gradients[i][1]; - gradients_out[i * 3u + 2u] = data.apex.gradients[i][2]; - } - for (std::size_t i = 0; i < data.apex.hessians.size(); ++i) { - const Real* hessian = data.apex.hessians[i].data(); - std::copy(hessian, hessian + 9u, hessians_out + i * 9u); - } - return; - } - - const std::size_t n = data.modal_terms.size(); - auto& scratch = evaluation_scratch(); - auto& modal_values = scratch.modal_values; - auto& modal_gradients = scratch.modal_gradients; - auto& modal_hessians = scratch.modal_hessians; - auto& modal_point = scratch.modal_point; - modal_values.resize(n); - modal_gradients.resize(n); - modal_hessians.resize(n); - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - - for (std::size_t m = 0; m < n; ++m) { - pyramid_modal::evaluate_term( - data.modal_terms[m], modal_point, modal_values[m], &modal_gradients[m], &modal_hessians[m]); - } - - if (has_low_order_fast_modal_to_nodal(data)) { - apply_sparse_basis_to_nodal_all_to( - data, modal_values, modal_gradients, modal_hessians, values_out, gradients_out, hessians_out); - return; - } - - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - const Real* row = data.modal_to_nodal.data() + basis_i * n; - Real value = Real(0); - Real gradient[3] = {Real(0), Real(0), Real(0)}; - Real hessian[9] = {}; - for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { - const Real coeff = row[modal_j]; - value += coeff * modal_values[modal_j]; - - const Real* modal_gradient = modal_gradients[modal_j].data(); - gradient[0] += coeff * modal_gradient[0]; - gradient[1] += coeff * modal_gradient[1]; - gradient[2] += coeff * modal_gradient[2]; - - const Real* modal_hessian = modal_hessians[modal_j].data(); - hessian[0] += coeff * modal_hessian[0]; - hessian[1] += coeff * modal_hessian[1]; - hessian[2] += coeff * modal_hessian[2]; - hessian[4] += coeff * modal_hessian[4]; - hessian[5] += coeff * modal_hessian[5]; - hessian[8] += coeff * modal_hessian[8]; - } - - values_out[basis_i] = value; - Real* gradient_out = gradients_out + basis_i * 3u; - gradient_out[0] = gradient[0]; - gradient_out[1] = gradient[1]; - gradient_out[2] = gradient[2]; - - Real* hessian_out = hessians_out + basis_i * 9u; - hessian_out[0] = hessian[0]; - hessian_out[1] = hessian[1]; - hessian_out[2] = hessian[2]; - hessian_out[3] = hessian[1]; - hessian_out[4] = hessian[4]; - hessian_out[5] = hessian[5]; - hessian_out[6] = hessian[2]; - hessian_out[7] = hessian[5]; - hessian_out[8] = hessian[8]; - } - } - - static void evaluate_at_quadrature_points_strided( - const OrderData& data, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const unsigned mask = (values_out != nullptr ? 1u : 0u) | - (gradients_out != nullptr ? 2u : 0u) | - (hessians_out != nullptr ? 4u : 0u); - switch (mask) { - case 0u: - validate_strided_points(points); - return; - case 1u: - evaluate_at_quadrature_points_strided_impl( - data, points, output_stride, values_out, gradients_out, hessians_out); - return; - case 2u: - evaluate_at_quadrature_points_strided_impl( - data, points, output_stride, values_out, gradients_out, hessians_out); - return; - case 3u: - evaluate_at_quadrature_points_strided_impl( - data, points, output_stride, values_out, gradients_out, hessians_out); - return; - case 4u: - evaluate_at_quadrature_points_strided_impl( - data, points, output_stride, values_out, gradients_out, hessians_out); - return; - case 5u: - evaluate_at_quadrature_points_strided_impl( - data, points, output_stride, values_out, gradients_out, hessians_out); - return; - case 6u: - evaluate_at_quadrature_points_strided_impl( - data, points, output_stride, values_out, gradients_out, hessians_out); - return; - case 7u: - evaluate_at_quadrature_points_strided_impl( - data, points, output_stride, values_out, gradients_out, hessians_out); - return; - default: - return; - } - } - -private: - static void validate_strided_points(const std::vector>& points) { - for (const auto& xi : points) { - validate_top_plane_query(xi); - } - } - - template - static void write_apex_strided(const OrderData& data, - std::size_t q, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const std::size_t n = data.modal_terms.size(); - if constexpr (NeedValues) { - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - values_out[basis_i * output_stride + q] = data.apex.values[basis_i]; - } - } - if constexpr (NeedGradients) { - if (data.apex.gradient_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("gradient", data.apex.gradient_status), - __FILE__, __LINE__, __func__); - } - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - Real* g = gradients_out + basis_i * 3u * output_stride; - g[0u * output_stride + q] = data.apex.gradients[basis_i][0]; - g[1u * output_stride + q] = data.apex.gradients[basis_i][1]; - g[2u * output_stride + q] = data.apex.gradients[basis_i][2]; - } - } - if constexpr (NeedHessians) { - if (data.apex.hessian_status != ApexRankStatus::Exact) { - throw BasisEvaluationException( - apex_status_message("Hessian", data.apex.hessian_status), - __FILE__, __LINE__, __func__); - } - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - const Real* hessian = data.apex.hessians[basis_i].data(); - Real* H = hessians_out + basis_i * 9u * output_stride; - for (std::size_t component = 0; component < 9u; ++component) { - H[component * output_stride + q] = hessian[component]; - } - } - } - } - - template - static void fill_low_order_modal_jet(std::size_t modal_i, - const Real* SVMP_RESTRICT xp, - const Real* SVMP_RESTRICT yp, - const Real* SVMP_RESTRICT zp, - const Real* SVMP_RESTRICT inv_tp, - Real* SVMP_RESTRICT modal_values, - Real (*SVMP_RESTRICT modal_gradients)[3], - Real (*SVMP_RESTRICT modal_hessians)[9]) { - const Real xy_base = xp[Px] * yp[Py]; - const Real base = xy_base * zp[Pz]; - const Real inv_denom = inv_tp[DenomPower]; - const Real value = base * inv_denom; - - if constexpr (NeedValues) { - modal_values[modal_i] = value; - } - if constexpr (NeedGradients) { - Real* g = modal_gradients[modal_i]; - if constexpr (Px > 0) { - g[0] = static_cast(Px) * xp[Px - 1] * yp[Py] * zp[Pz] * inv_denom; - } else { - g[0] = Real(0); - } - if constexpr (Py > 0) { - g[1] = static_cast(Py) * xp[Px] * yp[Py - 1] * zp[Pz] * inv_denom; - } else { - g[1] = Real(0); - } - Real gz = Real(0); - if constexpr (Pz > 0) { - gz += static_cast(Pz) * xy_base * zp[Pz - 1] * inv_denom; - } - if constexpr (DenomPower > 0) { - gz += static_cast(DenomPower) * base * inv_tp[DenomPower + 1]; - } - g[2] = gz; - } - if constexpr (NeedHessians) { - Real* H = modal_hessians[modal_i]; - if constexpr (Px > 1) { - H[0] = static_cast(Px * (Px - 1)) * - xp[Px - 2] * yp[Py] * zp[Pz] * inv_denom; - } else { - H[0] = Real(0); - } - if constexpr (Py > 1) { - H[4] = static_cast(Py * (Py - 1)) * - xp[Px] * yp[Py - 2] * zp[Pz] * inv_denom; - } else { - H[4] = Real(0); - } - Real hxy = Real(0); - if constexpr (Px > 0 && Py > 0) { - hxy = static_cast(Px * Py) * - xp[Px - 1] * yp[Py - 1] * zp[Pz] * inv_denom; - } - H[1] = hxy; - H[3] = hxy; - - Real hxz = Real(0); - if constexpr (Px > 0) { - constexpr Real px_real = static_cast(Px); - const Real x_deriv_y = px_real * xp[Px - 1] * yp[Py]; - if constexpr (Pz > 0) { - hxz += x_deriv_y * static_cast(Pz) * - zp[Pz - 1] * inv_denom; - } - if constexpr (DenomPower > 0) { - hxz += x_deriv_y * static_cast(DenomPower) * - zp[Pz] * inv_tp[DenomPower + 1]; - } - } - H[2] = hxz; - H[6] = hxz; - - Real hyz = Real(0); - if constexpr (Py > 0) { - constexpr Real py_real = static_cast(Py); - const Real x_y_deriv = py_real * xp[Px] * yp[Py - 1]; - if constexpr (Pz > 0) { - hyz += x_y_deriv * static_cast(Pz) * - zp[Pz - 1] * inv_denom; - } - if constexpr (DenomPower > 0) { - hyz += x_y_deriv * static_cast(DenomPower) * - zp[Pz] * inv_tp[DenomPower + 1]; - } - } - H[5] = hyz; - H[7] = hyz; - - Real hzz = Real(0); - if constexpr (Pz > 1) { - hzz += static_cast(Pz * (Pz - 1)) * - xy_base * zp[Pz - 2] * inv_denom; - } - if constexpr (Pz > 0 && DenomPower > 0) { - hzz += static_cast(2 * Pz * DenomPower) * xy_base * - zp[Pz - 1] * inv_tp[DenomPower + 1]; - } - if constexpr (DenomPower > 0) { - hzz += static_cast(DenomPower * (DenomPower + 1)) * - base * inv_tp[DenomPower + 2]; - } - H[8] = hzz; - } - } - - template - static void evaluate_low_order_modal_jets(const OrderData& data, - const math::Vector& xi, - Real* SVMP_RESTRICT modal_values, - Real (*SVMP_RESTRICT modal_gradients)[3], - Real (*SVMP_RESTRICT modal_hessians)[9]) { - const Real x = xi[0]; - const Real y = xi[1]; - const Real z = xi[2]; - const Real inv_t = Real(1) / (Real(1) - z); - const Real xp[3] = {Real(1), x, x * x}; - const Real yp[3] = {Real(1), y, y * y}; - const Real zp[3] = {Real(1), z, z * z}; - Real inv_tp[5] = {Real(1), inv_t, Real(0), Real(0), Real(0)}; - inv_tp[2] = inv_tp[1] * inv_t; - inv_tp[3] = inv_tp[2] * inv_t; - inv_tp[4] = inv_tp[3] * inv_t; - - fill_low_order_modal_jet<0, 0, 0, 0, NeedValues, NeedGradients, NeedHessians>( - 0u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<1, 0, 0, 0, NeedValues, NeedGradients, NeedHessians>( - 1u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - if (data.order == 1) { - fill_low_order_modal_jet<0, 1, 0, 0, NeedValues, NeedGradients, NeedHessians>( - 2u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<1, 1, 0, 1, NeedValues, NeedGradients, NeedHessians>( - 3u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<0, 0, 1, 0, NeedValues, NeedGradients, NeedHessians>( - 4u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - return; - } - - fill_low_order_modal_jet<2, 0, 0, 0, NeedValues, NeedGradients, NeedHessians>( - 2u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<0, 1, 0, 0, NeedValues, NeedGradients, NeedHessians>( - 3u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<1, 1, 0, 1, NeedValues, NeedGradients, NeedHessians>( - 4u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<2, 1, 0, 1, NeedValues, NeedGradients, NeedHessians>( - 5u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<0, 2, 0, 0, NeedValues, NeedGradients, NeedHessians>( - 6u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<1, 2, 0, 1, NeedValues, NeedGradients, NeedHessians>( - 7u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<2, 2, 0, 2, NeedValues, NeedGradients, NeedHessians>( - 8u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<0, 0, 1, 0, NeedValues, NeedGradients, NeedHessians>( - 9u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<1, 0, 1, 0, NeedValues, NeedGradients, NeedHessians>( - 10u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<0, 1, 1, 0, NeedValues, NeedGradients, NeedHessians>( - 11u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<1, 1, 1, 1, NeedValues, NeedGradients, NeedHessians>( - 12u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - fill_low_order_modal_jet<0, 0, 2, 0, NeedValues, NeedGradients, NeedHessians>( - 13u, xp, yp, zp, inv_tp, modal_values, modal_gradients, modal_hessians); - } - - template - static bool try_evaluate_low_order_strided( - const OrderData& data, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - if (!has_low_order_fast_modal_to_nodal(data)) { - return false; - } - for (const auto& xi : points) { - validate_top_plane_query(xi); - if (is_apex_point(xi)) { - return false; - } - } - - Real modal_values[14]; - Real modal_gradients[14][3]; - Real modal_hessians[14][9]; - for (std::size_t q = 0; q < points.size(); ++q) { - evaluate_low_order_modal_jets( - data, points[q], modal_values, modal_gradients, modal_hessians); - if constexpr (NeedValues) { - apply_low_order_combination( - data, - 1u, - [&](std::size_t modal_i, std::size_t) { - return modal_values[modal_i]; - }, - [&](std::size_t basis_i, std::size_t, Real value) { - values_out[basis_i * output_stride + q] = value; - }); - } - if constexpr (NeedGradients) { - apply_low_order_combination( - data, - 3u, - [&](std::size_t modal_i, std::size_t component) { - return modal_gradients[modal_i][component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - gradients_out[basis_i * 3u * output_stride + - component * output_stride + q] = value; - }); - } - if constexpr (NeedHessians) { - apply_low_order_combination( - data, - 9u, - [&](std::size_t modal_i, std::size_t component) { - return modal_hessians[modal_i][component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - hessians_out[basis_i * 9u * output_stride + - component * output_stride + q] = value; - }); - } - } - return true; - } - - template - static void evaluate_at_quadrature_points_strided_impl( - const OrderData& data, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const std::size_t n = data.modal_terms.size(); - if (points.empty() || n == 0u) { - return; - } - if (try_evaluate_low_order_strided( - data, points, output_stride, values_out, gradients_out, hessians_out)) { - return; - } - - auto& scratch = evaluation_scratch(); - auto& modal_values = scratch.modal_values; - auto& modal_gradients = scratch.modal_gradients; - auto& modal_hessians = scratch.modal_hessians; - auto& modal_point = scratch.modal_point; - if constexpr (NeedValues) { - modal_values.resize(n); - } - if constexpr (NeedGradients) { - modal_gradients.resize(n); - } - if constexpr (NeedHessians) { - modal_hessians.resize(n); - } - const bool use_fast_modal_to_nodal = has_low_order_fast_modal_to_nodal(data); - - if (!use_fast_modal_to_nodal) { - bool has_apex_query = false; - for (const auto& xi : points) { - validate_top_plane_query(xi); - has_apex_query = has_apex_query || is_apex_point(xi); - } - - if (!has_apex_query) { - const std::size_t num_qpts = points.size(); - if constexpr (NeedValues) { - modal_values.resize(n * num_qpts); - } - if constexpr (NeedGradients) { - scratch.modal_gradient_components.resize(n * 3u * num_qpts); - } - if constexpr (NeedHessians) { - scratch.modal_hessian_components.resize(n * 9u * num_qpts); - } - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { - Real modal_value = Real(0); - Gradient modal_gradient{}; - Hessian modal_hessian{}; - pyramid_modal::evaluate_term( - data.modal_terms[modal_j], - modal_point, - modal_value, - NeedGradients ? &modal_gradient : nullptr, - NeedHessians ? &modal_hessian : nullptr); - if constexpr (NeedValues) { - modal_values[modal_j * num_qpts + q] = modal_value; - } - if constexpr (NeedGradients) { - for (std::size_t component = 0; component < 3u; ++component) { - scratch.modal_gradient_components[ - (modal_j * 3u + component) * num_qpts + q] = - modal_gradient[component]; - } - } - if constexpr (NeedHessians) { - for (std::size_t component = 0; component < 9u; ++component) { - scratch.modal_hessian_components[ - (modal_j * 9u + component) * num_qpts + q] = - modal_hessian.data()[component]; - } - } - } - } - - const Real* transform = data.modal_to_nodal.data(); - if constexpr (NeedValues) { - math::dense_transform_batched_row_major( - transform, - n, - n, - modal_values.data(), - num_qpts, - values_out, - output_stride, - num_qpts); - } - if constexpr (NeedGradients) { - for (std::size_t component = 0; component < 3u; ++component) { - math::dense_transform_batched_row_major( - transform, - n, - n, - scratch.modal_gradient_components.data() + component * num_qpts, - 3u * num_qpts, - gradients_out + component * output_stride, - 3u * output_stride, - num_qpts); - } - } - if constexpr (NeedHessians) { - for (std::size_t component = 0; component < 9u; ++component) { - math::dense_transform_batched_row_major( - transform, - n, - n, - scratch.modal_hessian_components.data() + component * num_qpts, - 9u * num_qpts, - hessians_out + component * output_stride, - 9u * output_stride, - num_qpts); - } - } - return; - } - } - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - validate_top_plane_query(xi); - - if (is_apex_point(xi)) { - write_apex_strided( - data, q, output_stride, values_out, gradients_out, hessians_out); - continue; - } - - pyramid_modal::prepare_evaluation_point(data.modal_terms, xi, modal_point); - for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { - Gradient* gradient_out = nullptr; - Hessian* hessian_out = nullptr; - if constexpr (NeedGradients) { - gradient_out = &modal_gradients[modal_j]; - } - if constexpr (NeedHessians) { - hessian_out = &modal_hessians[modal_j]; - } - if constexpr (NeedValues) { - pyramid_modal::evaluate_term( - data.modal_terms[modal_j], - modal_point, - modal_values[modal_j], - gradient_out, - hessian_out); - } else { - Real value = Real(0); - pyramid_modal::evaluate_term( - data.modal_terms[modal_j], - modal_point, - value, - gradient_out, - hessian_out); - } - } - - if (use_fast_modal_to_nodal) { - if constexpr (NeedValues) { - apply_low_order_combination( - data, - 1u, - [&](std::size_t modal_i, std::size_t) { - return modal_values[modal_i]; - }, - [&](std::size_t basis_i, std::size_t, Real value) { - values_out[basis_i * output_stride + q] = value; - }); - } - if constexpr (NeedGradients) { - apply_low_order_combination( - data, - 3u, - [&](std::size_t modal_i, std::size_t component) { - return modal_gradients[modal_i][component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - gradients_out[basis_i * 3u * output_stride + - component * output_stride + q] = value; - }); - } - if constexpr (NeedHessians) { - apply_low_order_combination( - data, - 9u, - [&](std::size_t modal_i, std::size_t component) { - return modal_hessians[modal_i].data()[component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - hessians_out[basis_i * 9u * output_stride + - component * output_stride + q] = value; - }); - } - continue; - } - - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - const Real* matrix_row = data.modal_to_nodal.data() + basis_i * n; - [[maybe_unused]] Real value = Real(0); - [[maybe_unused]] std::array gradient{}; - [[maybe_unused]] std::array hessian{}; - - for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { - const Real coeff = matrix_row[modal_j]; - if constexpr (NeedValues) { - value += coeff * modal_values[modal_j]; - } - if constexpr (NeedGradients) { - const Real* modal_gradient = modal_gradients[modal_j].data(); - gradient[0] += coeff * modal_gradient[0]; - gradient[1] += coeff * modal_gradient[1]; - gradient[2] += coeff * modal_gradient[2]; - } - if constexpr (NeedHessians) { - const Real* modal_hessian = modal_hessians[modal_j].data(); - for (std::size_t component = 0; component < 9u; ++component) { - hessian[component] += coeff * modal_hessian[component]; - } - } - } - - if constexpr (NeedValues) { - values_out[basis_i * output_stride + q] = value; - } - if constexpr (NeedGradients) { - Real* g = gradients_out + basis_i * 3u * output_stride; - g[0u * output_stride + q] = gradient[0]; - g[1u * output_stride + q] = gradient[1]; - g[2u * output_stride + q] = gradient[2]; - } - if constexpr (NeedHessians) { - Real* H = hessians_out + basis_i * 9u * output_stride; - for (std::size_t component = 0; component < 9u; ++component) { - H[component * output_stride + q] = hessian[component]; - } - } - } - } - } - - static Real apex_coord_tolerance() noexcept { - return basis_scaled_tolerance(); - } - - // Coefficient pruning for symbolic apex series, not a reference-coordinate - // roundoff test. Keep this strict and separate from BasisTolerance. - static constexpr Real kSeriesTolerance = Real(1e-12); - - static Real binomial_coeff(int n, int k) { - if (k < 0 || k > n) { - return Real(0); - } - if (k == 0 || k == n) { - return Real(1); - } - k = std::min(k, n - k); - Real coeff = Real(1); - for (int i = 1; i <= k; ++i) { - coeff *= static_cast(n - (k - i)); - coeff /= static_cast(i); - } - return coeff; - } - - static void add_z_expansion(ApexSeries& series, - int z_power, - int beta0, - int pu, - int pv, - Real coeff) { - for (int q = 0; q <= z_power; ++q) { - const Real z_coeff = coeff * binomial_coeff(z_power, q) * - ((q % 2 == 0) ? Real(1) : Real(-1)); - series.add_term(beta0 + q, pu, pv, z_coeff, kSeriesTolerance); - } - } - - static ApexSeries modal_value_asymptotic(const ModalTerm& term) { - ApexSeries series; - add_z_expansion(series, - term.pz, - term.px + term.py - term.denom_power, - term.px, - term.py, - Real(1)); - return series; - } - - static GradientSeries modal_gradient_asymptotic(const ModalTerm& term) { - GradientSeries gradient_series{}; - - if (term.px > 0) { - add_z_expansion(gradient_series[0], - term.pz, - term.px - 1 + term.py - term.denom_power, - term.px - 1, - term.py, - static_cast(term.px)); - } - - if (term.py > 0) { - add_z_expansion(gradient_series[1], - term.pz, - term.px + term.py - 1 - term.denom_power, - term.px, - term.py - 1, - static_cast(term.py)); - } - - if (term.pz > 0) { - add_z_expansion(gradient_series[2], - term.pz - 1, - term.px + term.py - term.denom_power, - term.px, - term.py, - static_cast(term.pz)); - } - if (term.denom_power > 0) { - add_z_expansion(gradient_series[2], - term.pz, - term.px + term.py - term.denom_power - 1, - term.px, - term.py, - static_cast(term.denom_power)); - } - - return gradient_series; - } - - static HessianSeries modal_hessian_asymptotic(const ModalTerm& term) { - HessianSeries hessian_series{}; - - if (term.px > 1) { - add_z_expansion(hessian_series[0][0], - term.pz, - term.px - 2 + term.py - term.denom_power, - term.px - 2, - term.py, - static_cast(term.px * (term.px - 1))); - } - - if (term.py > 1) { - add_z_expansion(hessian_series[1][1], - term.pz, - term.px + term.py - 2 - term.denom_power, - term.px, - term.py - 2, - static_cast(term.py * (term.py - 1))); - } - - if (term.px > 0 && term.py > 0) { - add_z_expansion(hessian_series[0][1], - term.pz, - term.px + term.py - 2 - term.denom_power, - term.px - 1, - term.py - 1, - static_cast(term.px * term.py)); - hessian_series[1][0] = hessian_series[0][1]; - } - - if (term.px > 0 && term.pz > 0) { - add_z_expansion(hessian_series[0][2], - term.pz - 1, - term.px - 1 + term.py - term.denom_power, - term.px - 1, - term.py, - static_cast(term.px * term.pz)); - } - if (term.px > 0 && term.denom_power > 0) { - add_z_expansion(hessian_series[0][2], - term.pz, - term.px - 1 + term.py - term.denom_power - 1, - term.px - 1, - term.py, - static_cast(term.px * term.denom_power)); - } - hessian_series[2][0] = hessian_series[0][2]; - - if (term.py > 0 && term.pz > 0) { - add_z_expansion(hessian_series[1][2], - term.pz - 1, - term.px + term.py - 1 - term.denom_power, - term.px, - term.py - 1, - static_cast(term.py * term.pz)); - } - if (term.py > 0 && term.denom_power > 0) { - add_z_expansion(hessian_series[1][2], - term.pz, - term.px + term.py - 1 - term.denom_power - 1, - term.px, - term.py - 1, - static_cast(term.py * term.denom_power)); - } - hessian_series[2][1] = hessian_series[1][2]; - - if (term.pz > 1) { - add_z_expansion(hessian_series[2][2], - term.pz - 2, - term.px + term.py - term.denom_power, - term.px, - term.py, - static_cast(term.pz * (term.pz - 1))); - } - if (term.pz > 0 && term.denom_power > 0) { - add_z_expansion(hessian_series[2][2], - term.pz - 1, - term.px + term.py - term.denom_power - 1, - term.px, - term.py, - static_cast(2 * term.pz * term.denom_power)); - } - if (term.denom_power > 0) { - add_z_expansion(hessian_series[2][2], - term.pz, - term.px + term.py - term.denom_power - 2, - term.px, - term.py, - static_cast(term.denom_power * (term.denom_power + 1))); - } - - return hessian_series; - } - - static ApexClassification classify_series(const ApexSeries& series) { - for (const auto& [beta, poly] : series.by_power) { - if (poly.empty(kSeriesTolerance)) { - continue; - } - if (beta < 0) { - return {ApexLimitKind::Singular, Real(0), beta}; - } - if (beta > 0) { - return {ApexLimitKind::Constant, Real(0), beta}; - } - if (poly.is_constant(kSeriesTolerance)) { - return {ApexLimitKind::Constant, poly.constant_value(kSeriesTolerance), beta}; - } - return {ApexLimitKind::DirectionDependent, Real(0), beta}; - } - return {ApexLimitKind::Constant, Real(0), 1}; - } - - static void accumulate_rank_status(ApexRankStatus& status, - const ApexClassification& classification) { - if (classification.kind == ApexLimitKind::Singular) { - status = ApexRankStatus::Singular; - return; - } - if (classification.kind == ApexLimitKind::DirectionDependent && - status != ApexRankStatus::Singular) { - status = ApexRankStatus::DirectionDependent; - } - } - - static std::string apex_status_message(const char* rank, - ApexRankStatus status) { - switch (status) { - case ApexRankStatus::DirectionDependent: - return std::string("Pyramid rational nodal ") + rank + - " at the exact apex is not uniquely defined under admissible interior approaches"; - case ApexRankStatus::Singular: - return std::string("Pyramid rational nodal ") + rank + - " at the exact apex is singular for this basis family"; - case ApexRankStatus::Exact: - return std::string("Pyramid rational nodal ") + rank + - " apex evaluation unexpectedly reported non-exact status"; - } - return std::string("Pyramid rational nodal ") + rank + - " apex evaluation is not available"; - } - - static ApexData build_apex_data(const OrderData& data) { - const std::size_t n = data.modal_terms.size(); - - std::vector modal_values(n); - std::vector modal_gradients(n); - std::vector modal_hessians(n); - for (std::size_t m = 0; m < n; ++m) { - modal_values[m] = modal_value_asymptotic(data.modal_terms[m]); - modal_gradients[m] = modal_gradient_asymptotic(data.modal_terms[m]); - modal_hessians[m] = modal_hessian_asymptotic(data.modal_terms[m]); - } - - std::vector nodal_values(n); - std::vector nodal_gradients(n); - std::vector nodal_hessians(n); - for (std::size_t i = 0; i < n; ++i) { - for (std::size_t m = 0; m < n; ++m) { - const Real coeff = data.modal_to_nodal[i * n + m]; - nodal_values[i].add_scaled(modal_values[m], coeff, kSeriesTolerance); - for (int d = 0; d < 3; ++d) { - nodal_gradients[i][static_cast(d)].add_scaled( - modal_gradients[m][static_cast(d)], coeff, kSeriesTolerance); - } - for (int r = 0; r < 3; ++r) { - for (int c = 0; c < 3; ++c) { - nodal_hessians[i][static_cast(r)][static_cast(c)] - .add_scaled( - modal_hessians[m][static_cast(r)][static_cast(c)], - coeff, - kSeriesTolerance); - } - } - } - } - - ApexData apex; - apex.values.assign(n, Real(0)); - apex.gradients.assign(n, Gradient{}); - apex.hessians.assign(n, Hessian{}); - - for (std::size_t i = 0; i < n; ++i) { - const ApexClassification value_class = classify_series(nodal_values[i]); - if (value_class.kind != ApexLimitKind::Constant) { - throw BasisConstructionException( - "Pyramid nodal value at apex is not uniquely defined for basis index " + - std::to_string(i), - __FILE__, __LINE__, __func__); - } - apex.values[i] = value_class.constant_value; - - for (int d = 0; d < 3; ++d) { - const ApexClassification grad_class = classify_series( - nodal_gradients[i][static_cast(d)]); - accumulate_rank_status(apex.gradient_status, grad_class); - if (grad_class.kind == ApexLimitKind::Constant) { - apex.gradients[i][static_cast(d)] = grad_class.constant_value; - } - } - - for (int r = 0; r < 3; ++r) { - for (int c = 0; c < 3; ++c) { - const ApexClassification hess_class = classify_series( - nodal_hessians[i][static_cast(r)][static_cast(c)]); - accumulate_rank_status(apex.hessian_status, hess_class); - if (hess_class.kind == ApexLimitKind::Constant) { - apex.hessians[i](static_cast(r), - static_cast(c)) = hess_class.constant_value; - } - } - } - } - - if (apex.gradient_status != ApexRankStatus::Exact) { - apex.gradients.clear(); - } - if (apex.hessian_status != ApexRankStatus::Exact) { - apex.hessians.clear(); - } - - return apex; - } - - static std::vector> build_public_nodes(int order) { - if (order == 0) { - return {math::Vector{Real(0), Real(0), Real(0.25)}}; - } - - std::vector> nodes; - nodes.reserve(static_cast((order + 1) * (order + 2) * (2 * order + 3) / 6)); - - nodes.push_back(math::Vector{Real(-1), Real(-1), Real(0)}); - nodes.push_back(math::Vector{Real(1), Real(-1), Real(0)}); - nodes.push_back(math::Vector{Real(1), Real(1), Real(0)}); - nodes.push_back(math::Vector{Real(-1), Real(1), Real(0)}); - nodes.push_back(math::Vector{Real(0), Real(0), Real(1)}); - - for (int m = 1; m < order; ++m) { - nodes.push_back(math::Vector{equispaced_pm_one_coord(m, order), Real(-1), Real(0)}); - } - for (int m = 1; m < order; ++m) { - nodes.push_back(math::Vector{Real(1), equispaced_pm_one_coord(m, order), Real(0)}); - } - for (int m = order - 1; m >= 1; --m) { - nodes.push_back(math::Vector{equispaced_pm_one_coord(m, order), Real(1), Real(0)}); - } - for (int m = order - 1; m >= 1; --m) { - nodes.push_back(math::Vector{Real(-1), equispaced_pm_one_coord(m, order), Real(0)}); - } - - for (int level = 1; level < order; ++level) { - const Real z = static_cast(level) / static_cast(order); - const Real scale = Real(1) - z; - nodes.push_back(math::Vector{-scale, -scale, z}); - nodes.push_back(math::Vector{scale, -scale, z}); - nodes.push_back(math::Vector{scale, scale, z}); - nodes.push_back(math::Vector{-scale, scale, z}); - } - - for (int j = 1; j < order; ++j) { - for (int i = 1; i < order; ++i) { - nodes.push_back(math::Vector{equispaced_pm_one_coord(i, order), - equispaced_pm_one_coord(j, order), - Real(0)}); - } - } - - for (int level = 1; level < order - 1; ++level) { - const int n = order - level; - const Real z = static_cast(level) / static_cast(order); - const Real scale = Real(1) - z; - - for (int m = 1; m < n; ++m) { - const Real s = equispaced_pm_one_coord(m, n) * scale; - nodes.push_back(math::Vector{s, -scale, z}); - } - for (int m = 1; m < n; ++m) { - const Real s = equispaced_pm_one_coord(m, n) * scale; - nodes.push_back(math::Vector{scale, s, z}); - } - for (int m = n - 1; m >= 1; --m) { - const Real s = equispaced_pm_one_coord(m, n) * scale; - nodes.push_back(math::Vector{s, scale, z}); - } - for (int m = n - 1; m >= 1; --m) { - const Real s = equispaced_pm_one_coord(m, n) * scale; - nodes.push_back(math::Vector{-scale, s, z}); - } - } - - for (int level = 1; level < order - 1; ++level) { - const int n = order - level; - const Real z = static_cast(level) / static_cast(order); - const Real scale = Real(1) - z; - for (int j = 1; j < n; ++j) { - for (int i = 1; i < n; ++i) { - nodes.push_back(math::Vector{equispaced_pm_one_coord(i, n) * scale, - equispaced_pm_one_coord(j, n) * scale, - z}); - } - } - } - - return nodes; - } - - struct VectorValueSink { - std::vector& output; - void resize(std::size_t n) const { output.resize(n); } - void write(std::size_t i, Real value) const { output[i] = value; } - }; - - struct RawValueSink { - Real* output; - void resize(std::size_t) const {} - void write(std::size_t i, Real value) const { output[i] = value; } - }; - - struct VectorGradientSink { - std::vector& output; - void resize(std::size_t n) const { output.resize(n); } - void write(std::size_t i, const Gradient& value) const { output[i] = value; } - }; - - struct RawGradientSink { - Real* output; - void resize(std::size_t) const {} - void write(std::size_t i, const Gradient& value) const { - Real* dst = output + i * 3u; - dst[0] = value[0]; - dst[1] = value[1]; - dst[2] = value[2]; - } - }; - - struct VectorHessianSink { - std::vector& output; - void resize(std::size_t n) const { output.resize(n); } - void write(std::size_t i, const Hessian& value) const { output[i] = value; } - }; - - struct RawHessianSink { - Real* output; - void resize(std::size_t) const {} - void write(std::size_t i, const Hessian& value) const { - store_hessian(value, output + i * 9u); - } - }; - - template - static void apply_order1_combination(std::size_t components, - const Get& get, - const Set& set) { - for (std::size_t c = 0; c < components; ++c) { - const Real m0 = get(0u, c); - const Real m1 = get(1u, c); - const Real m2 = get(2u, c); - const Real m3 = get(3u, c); - const Real m4 = get(4u, c); - set(0u, c, Real(0.25) * (m0 - m1 - m2 + m3 - m4)); - set(1u, c, Real(0.25) * (m0 + m1 - m2 - m3 - m4)); - set(2u, c, Real(0.25) * (m0 + m1 + m2 + m3 - m4)); - set(3u, c, Real(0.25) * (m0 - m1 + m2 - m3 - m4)); - set(4u, c, m4); - } - } - - template - static void apply_order2_combination(std::size_t components, - const Get& get, - const Set& set) { - for (std::size_t c = 0; c < components; ++c) { - const Real m0 = get(0u, c); - const Real m1 = get(1u, c); - const Real m2 = get(2u, c); - const Real m3 = get(3u, c); - const Real m4 = get(4u, c); - const Real m5 = get(5u, c); - const Real m6 = get(6u, c); - const Real m7 = get(7u, c); - const Real m8 = get(8u, c); - const Real m9 = get(9u, c); - const Real m10 = get(10u, c); - const Real m11 = get(11u, c); - const Real m12 = get(12u, c); - const Real m13 = get(13u, c); - set(0u, c, Real(0.25) * (m4 - m5 - m7 + m8 - m9 + m10 + m11 - Real(2) * m12 + m13)); - set(1u, c, Real(0.25) * (-m4 - m5 + m7 + m8 - m9 - m10 + m11 + Real(2) * m12 + m13)); - set(2u, c, Real(0.25) * (m4 + m5 + m7 + m8 - m9 - m10 - m11 - Real(2) * m12 + m13)); - set(3u, c, Real(0.25) * (-m4 + m5 - m7 + m8 - m9 + m10 - m11 + Real(2) * m12 + m13)); - set(4u, c, -m9 + Real(2) * m13); - set(5u, c, Real(0.5) * (-m3 + m5 + m6 - m8 + m11)); - set(6u, c, Real(0.5) * (m1 + m2 - m7 - m8 - m10)); - set(7u, c, Real(0.5) * (m3 - m5 + m6 - m8 - m11)); - set(8u, c, Real(0.5) * (-m1 + m2 + m7 - m8 + m10)); - set(9u, c, m9 - m10 - m11 + m12 - m13); - set(10u, c, m9 + m10 - m11 - m12 - m13); - set(11u, c, m9 + m10 + m11 + m12 - m13); - set(12u, c, m9 - m10 + m11 - m12 - m13); - set(13u, c, m0 - m2 - m6 + m8 - Real(2) * m9 + m13); - } - } - - template - static void apply_low_order_combination(const OrderData& data, - std::size_t components, - const Get& get, - const Set& set) { - if (data.order == 1) { - apply_order1_combination(components, get, set); - return; - } - apply_order2_combination(components, get, set); - } - - static void apply_sparse_basis_to_nodal(const OrderData& data, - const std::vector& modal_values, - std::vector& nodal_values) { - const std::size_t n = modal_values.size(); - nodal_values.resize(n); - apply_low_order_combination( - data, - 1u, - [&](std::size_t modal_i, std::size_t) { return modal_values[modal_i]; }, - [&](std::size_t basis_i, std::size_t, Real value) { nodal_values[basis_i] = value; }); - } - - static void apply_sparse_basis_to_nodal_to(const OrderData& data, - const std::vector& modal_values, - Real* SVMP_RESTRICT nodal_values) { - apply_low_order_combination( - data, - 1u, - [&](std::size_t modal_i, std::size_t) { return modal_values[modal_i]; }, - [&](std::size_t basis_i, std::size_t, Real value) { nodal_values[basis_i] = value; }); - } - - static void apply_sparse_basis_to_nodal(const OrderData& data, - const std::vector& modal_gradients, - std::vector& nodal_gradients) { - const std::size_t n = modal_gradients.size(); - nodal_gradients.resize(n); - apply_low_order_combination( - data, - 3u, - [&](std::size_t modal_i, std::size_t component) { - return modal_gradients[modal_i][component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - nodal_gradients[basis_i][component] = value; - }); - } - - static void apply_sparse_basis_to_nodal_to(const OrderData& data, - const std::vector& modal_gradients, - Real* SVMP_RESTRICT nodal_gradients) { - apply_low_order_combination( - data, - 3u, - [&](std::size_t modal_i, std::size_t component) { - return modal_gradients[modal_i][component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - nodal_gradients[basis_i * 3u + component] = value; - }); - } - - static void apply_sparse_basis_to_nodal(const OrderData& data, - const std::vector& modal_hessians, - std::vector& nodal_hessians) { - const std::size_t n = modal_hessians.size(); - nodal_hessians.resize(n); - apply_low_order_combination( - data, - 9u, - [&](std::size_t modal_i, std::size_t component) { - return modal_hessians[modal_i].data()[component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - nodal_hessians[basis_i].data()[component] = value; - }); - } - - static void apply_sparse_basis_to_nodal_to(const OrderData& data, - const std::vector& modal_hessians, - Real* SVMP_RESTRICT nodal_hessians) { - apply_low_order_combination( - data, - 9u, - [&](std::size_t modal_i, std::size_t component) { - return modal_hessians[modal_i].data()[component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - nodal_hessians[basis_i * 9u + component] = value; - }); - } - - static void apply_sparse_basis_to_nodal_all( - const OrderData& data, - const std::vector& modal_values, - const std::vector& modal_gradients, - const std::vector& modal_hessians, - std::vector& nodal_values, - std::vector& nodal_gradients, - std::vector& nodal_hessians) { - const std::size_t n = modal_values.size(); - nodal_values.resize(n); - nodal_gradients.resize(n); - nodal_hessians.resize(n); - apply_low_order_combination( - data, - 1u, - [&](std::size_t modal_i, std::size_t) { return modal_values[modal_i]; }, - [&](std::size_t basis_i, std::size_t, Real value) { nodal_values[basis_i] = value; }); - apply_low_order_combination( - data, - 3u, - [&](std::size_t modal_i, std::size_t component) { - return modal_gradients[modal_i][component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - nodal_gradients[basis_i][component] = value; - }); - apply_low_order_combination( - data, - 9u, - [&](std::size_t modal_i, std::size_t component) { - return modal_hessians[modal_i].data()[component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - nodal_hessians[basis_i].data()[component] = value; - }); - } - - static void apply_sparse_basis_to_nodal_all_to( - const OrderData& data, - const std::vector& modal_values, - const std::vector& modal_gradients, - const std::vector& modal_hessians, - Real* SVMP_RESTRICT nodal_values, - Real* SVMP_RESTRICT nodal_gradients, - Real* SVMP_RESTRICT nodal_hessians) { - apply_low_order_combination( - data, - 1u, - [&](std::size_t modal_i, std::size_t) { return modal_values[modal_i]; }, - [&](std::size_t basis_i, std::size_t, Real value) { nodal_values[basis_i] = value; }); - apply_low_order_combination( - data, - 3u, - [&](std::size_t modal_i, std::size_t component) { - return modal_gradients[modal_i][component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - nodal_gradients[basis_i * 3u + component] = value; - }); - apply_low_order_combination( - data, - 9u, - [&](std::size_t modal_i, std::size_t component) { - return modal_hessians[modal_i].data()[component]; - }, - [&](std::size_t basis_i, std::size_t component, Real value) { - nodal_hessians[basis_i * 9u + component] = value; - }); - } - - template - // Keep modal transform helpers free of forced-inline attributes unless - // compiler-versioned benchmarks and LLVM IR checks show a stable benefit. - static void apply_modal_values_to_nodal(const OrderData& data, - const std::vector& modal_values, - const Sink& sink) { - const std::size_t n = modal_values.size(); - sink.resize(n); - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - const Real* row = data.modal_to_nodal.data() + basis_i * n; - Real value = Real(0); - for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { - value += row[modal_j] * modal_values[modal_j]; - } - sink.write(basis_i, value); - } - } - - template - static void apply_modal_gradients_to_nodal(const OrderData& data, - const std::vector& modal_gradients, - const Sink& sink) { - const std::size_t n = modal_gradients.size(); - sink.resize(n); - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - const Real* row = data.modal_to_nodal.data() + basis_i * n; - Gradient gradient{}; - for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { - const Real coeff = row[modal_j]; - for (std::size_t component = 0; component < 3u; ++component) { - gradient[component] += coeff * modal_gradients[modal_j][component]; - } - } - sink.write(basis_i, gradient); - } - } - - template - static void apply_modal_hessians_to_nodal(const OrderData& data, - const std::vector& modal_hessians, - const Sink& sink) { - const std::size_t n = modal_hessians.size(); - sink.resize(n); - for (std::size_t basis_i = 0; basis_i < n; ++basis_i) { - const Real* matrix_row = data.modal_to_nodal.data() + basis_i * n; - Hessian hessian{}; - for (std::size_t modal_j = 0; modal_j < n; ++modal_j) { - const Real coeff = matrix_row[modal_j]; - for (std::size_t row = 0; row < 3u; ++row) { - for (std::size_t col = 0; col < 3u; ++col) { - hessian(row, col) += coeff * modal_hessians[modal_j](row, col); - } - } - } - sink.write(basis_i, hessian); - } - } - - static void apply_modal_to_nodal(const OrderData& data, - const std::vector& modal_values, - std::vector& nodal_values) { - apply_modal_values_to_nodal(data, modal_values, VectorValueSink{nodal_values}); - } - - static void apply_modal_to_nodal(const OrderData& data, - const std::vector& modal_gradients, - std::vector& nodal_gradients) { - apply_modal_gradients_to_nodal(data, modal_gradients, VectorGradientSink{nodal_gradients}); - } - - static void apply_modal_to_nodal(const OrderData& data, - const std::vector& modal_hessians, - std::vector& nodal_hessians) { - apply_modal_hessians_to_nodal(data, modal_hessians, VectorHessianSink{nodal_hessians}); - } - - static void apply_modal_to_nodal_to(const OrderData& data, - const std::vector& modal_values, - Real* nodal_values) { - apply_modal_values_to_nodal(data, modal_values, RawValueSink{nodal_values}); - } - - static void apply_modal_to_nodal_to(const OrderData& data, - const std::vector& modal_gradients, - Real* nodal_gradients) { - apply_modal_gradients_to_nodal(data, modal_gradients, RawGradientSink{nodal_gradients}); - } - - static void apply_modal_to_nodal_to(const OrderData& data, - const std::vector& modal_hessians, - Real* nodal_hessians) { - apply_modal_hessians_to_nodal(data, modal_hessians, RawHessianSink{nodal_hessians}); - } -}; - -namespace lagrange_pyramid { - -const std::vector>& nodes(int order) { - return PyramidLagrangeCache::get(order).nodes; -} - -void prewarm_scratch(int order, std::size_t max_qpts) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::prewarm_scratch(data.modal_terms.size(), max_qpts); -} - -void evaluate_values(int order, - const math::Vector& xi, - std::vector& values) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::evaluate_values(data, xi, values); -} - -void evaluate_gradients(int order, - const math::Vector& xi, - std::vector& gradients) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::evaluate_gradients(data, xi, gradients); -} - -void evaluate_hessians(int order, - const math::Vector& xi, - std::vector& hessians) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::evaluate_hessians(data, xi, hessians); -} - -void evaluate_all(int order, - const math::Vector& xi, - std::vector& values, - std::vector& gradients, - std::vector& hessians) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::evaluate_all(data, xi, values, gradients, hessians); -} - -void evaluate_values_to(int order, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::evaluate_values_to(data, xi, values_out); -} - -void evaluate_gradients_to(int order, - const math::Vector& xi, - Real* SVMP_RESTRICT gradients_out) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::evaluate_gradients_to(data, xi, gradients_out); -} - -void evaluate_hessians_to(int order, - const math::Vector& xi, - Real* SVMP_RESTRICT hessians_out) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::evaluate_hessians_to(data, xi, hessians_out); -} - -void evaluate_all_to(int order, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::evaluate_all_to(data, xi, values_out, gradients_out, hessians_out); -} - -void evaluate_at_quadrature_points_strided( - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const auto& data = PyramidLagrangeCache::get(order); - PyramidLagrangeCache::evaluate_at_quadrature_points_strided( - data, points, output_stride, values_out, gradients_out, hessians_out); -} - -} // namespace lagrange_pyramid - -} // namespace detail -} // namespace basis -} // namespace FE -} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.h b/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.h deleted file mode 100644 index 76859501c..000000000 --- a/Code/Source/solver/FE/Basis/LagrangeBasisPyramid.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef SVMP_FE_BASIS_LAGRANGEBASISPYRAMID_H -#define SVMP_FE_BASIS_LAGRANGEBASISPYRAMID_H - -// Private declarations for the rational pyramid Lagrange helper implemented in -// LagrangeBasisPyramid.cpp. This header is intentionally small so the large -// construction and apex-classification code stays out of LagrangeBasis.cpp. - -#include "BasisFunction.h" - -#include -#include - -namespace svmp { -namespace FE { -namespace basis { -namespace detail { -namespace lagrange_pyramid { - -const std::vector>& nodes(int order); - -void prewarm_scratch(int order, std::size_t max_qpts = 0); - -void evaluate_values(int order, - const math::Vector& xi, - std::vector& values); -void evaluate_gradients(int order, - const math::Vector& xi, - std::vector& gradients); -void evaluate_hessians(int order, - const math::Vector& xi, - std::vector& hessians); -void evaluate_all(int order, - const math::Vector& xi, - std::vector& values, - std::vector& gradients, - std::vector& hessians); - -void evaluate_values_to(int order, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out); -void evaluate_gradients_to(int order, - const math::Vector& xi, - Real* SVMP_RESTRICT gradients_out); -void evaluate_hessians_to(int order, - const math::Vector& xi, - Real* SVMP_RESTRICT hessians_out); -void evaluate_all_to(int order, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out); - -void evaluate_at_quadrature_points_strided( - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out); - -} // namespace lagrange_pyramid -} // namespace detail -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_LAGRANGEBASISPYRAMID_H diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.cpp b/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.cpp deleted file mode 100644 index 36325576a..000000000 --- a/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.cpp +++ /dev/null @@ -1,2457 +0,0 @@ -#include "LagrangeBasisSimplex.h" - -#include -#include - -namespace svmp { -namespace FE { -namespace basis { -namespace detail { - -// Falling-factorial (equispaced barycentric) Lagrange factors for simplex nodes. -// -// For a fixed polynomial order p and barycentric coordinate lambda in [0, 1], -// define -// phi_a(lambda) = product_{m=0}^{a-1} (p * lambda - m) / (a - m), a = 0..p -// Then for a multi-index (i0, i1, ..., id) with sum i_k = p, the simplex -// Lagrange basis function is product_k phi_{i_k}(lambda_k), nodal on the -// barycentric lattice. -// -// Output buffers must each be sized to at least p+1 entries; the function -// writes every output slot (no pre-zero required by the caller). -template -void simplex_lagrange_factor_sequence_impl(int p, - Real lambda, - Real* phi, - Real* dphi, - Real* d2phi) { - static_assert(!NeedSecond || NeedFirst, - "second derivative factors require first-derivative recurrence state"); - - phi[0] = Real(1); - if constexpr (NeedFirst) { - dphi[0] = Real(0); - } - if constexpr (NeedSecond) { - d2phi[0] = Real(0); - } - if (p == 0) { - return; - } - - const Real t = static_cast(p) * lambda; - const Real dt_dlambda = static_cast(p); - - Real dphi_dt_prev = Real(0); - Real d2phi_dt2_prev = Real(0); - - for (int a = 1; a <= p; ++a) { - const std::size_t au = static_cast(a); - const Real inv_a = Real(1) / static_cast(a); - const Real s = (t - static_cast(a - 1)) * inv_a; - - phi[au] = s * phi[au - 1]; - - if constexpr (NeedFirst) { - const Real dphi_dt_old = dphi_dt_prev; - const Real dphi_dt = inv_a * phi[au - 1] + s * dphi_dt_old; - dphi[au] = dt_dlambda * dphi_dt; - - if constexpr (NeedSecond) { - const Real d2phi_dt2 = Real(2) * inv_a * dphi_dt_old + s * d2phi_dt2_prev; - d2phi[au] = dt_dlambda * dt_dlambda * d2phi_dt2; - d2phi_dt2_prev = d2phi_dt2; - } - - dphi_dt_prev = dphi_dt; - } - } -} - -void simplex_lagrange_factor_sequence(int p, - Real lambda, - Real* phi, - Real* dphi, - Real* d2phi) { - if (d2phi != nullptr) { - simplex_lagrange_factor_sequence_impl(p, lambda, phi, dphi, d2phi); - } else if (dphi != nullptr) { - simplex_lagrange_factor_sequence_impl(p, lambda, phi, dphi, nullptr); - } else { - simplex_lagrange_factor_sequence_impl(p, lambda, phi, nullptr, nullptr); - } -} - -constexpr int kFixedSimplexAxisOrder = 12; -constexpr std::size_t kFixedSimplexAxisSize = - static_cast(kFixedSimplexAxisOrder + 1); -constexpr std::size_t kFixedSimplexBatchEntries = 512; - -template -inline void simplex_lagrange_factor_values_product(Real lambda, - Real* SVMP_RESTRICT values) { - static_assert(Order >= 0, "simplex order must be non-negative"); - values[0] = Real(1); - const Real t = static_cast(Order) * lambda; - for (int a = 1; a <= Order; ++a) { - const Real inv_a = Real(1) / static_cast(a); - values[a] = values[a - 1] * (t - static_cast(a - 1)) * inv_a; - } -} - -template -void evaluate_triangle_simplex_values_q4( - const std::vector>& simplex_exponents, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - static_assert(Order >= 4 && Order <= 8, "specialized simplex path covers orders 4..8"); - - Real phi0[4][Order + 1]; - Real phi1[4][Order + 1]; - Real phi2[4][Order + 1]; - - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - simplex_lagrange_factor_values_product(l0, phi0[q]); - simplex_lagrange_factor_values_product(l1, phi1[q]); - simplex_lagrange_factor_values_product(l2, phi2[q]); - } - - const std::size_t num_nodes = simplex_exponents.size(); - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* SVMP_RESTRICT row = values_out + node * output_stride; - row[0] = phi0[0][i0] * phi1[0][i1] * phi2[0][i2]; - row[1] = phi0[1][i0] * phi1[1][i1] * phi2[1][i2]; - row[2] = phi0[2][i0] * phi1[2][i1] * phi2[2][i2]; - row[3] = phi0[3][i0] * phi1[3][i1] * phi2[3][i2]; - } -} - -bool try_evaluate_triangle_simplex_values_q4( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - switch (order) { - case 4: - evaluate_triangle_simplex_values_q4<4>( - simplex_exponents, points, output_stride, values_out); - return true; - case 5: - evaluate_triangle_simplex_values_q4<5>( - simplex_exponents, points, output_stride, values_out); - return true; - case 6: - evaluate_triangle_simplex_values_q4<6>( - simplex_exponents, points, output_stride, values_out); - return true; - case 7: - evaluate_triangle_simplex_values_q4<7>( - simplex_exponents, points, output_stride, values_out); - return true; - case 8: - evaluate_triangle_simplex_values_q4<8>( - simplex_exponents, points, output_stride, values_out); - return true; - default: - return false; - } -} - -template -void evaluate_tetrahedron_simplex_values_q4( - const std::vector>& simplex_exponents, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - static_assert(Order >= 4 && Order <= 8, "specialized simplex path covers orders 4..8"); - - Real phi0[4][Order + 1]; - Real phi1[4][Order + 1]; - Real phi2[4][Order + 1]; - Real phi3[4][Order + 1]; - - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - simplex_lagrange_factor_values_product(l0, phi0[q]); - simplex_lagrange_factor_values_product(l1, phi1[q]); - simplex_lagrange_factor_values_product(l2, phi2[q]); - simplex_lagrange_factor_values_product(l3, phi3[q]); - } - - const std::size_t num_nodes = simplex_exponents.size(); - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real* SVMP_RESTRICT row = values_out + node * output_stride; - row[0] = phi0[0][i0] * phi1[0][i1] * phi2[0][i2] * phi3[0][i3]; - row[1] = phi0[1][i0] * phi1[1][i1] * phi2[1][i2] * phi3[1][i3]; - row[2] = phi0[2][i0] * phi1[2][i1] * phi2[2][i2] * phi3[2][i3]; - row[3] = phi0[3][i0] * phi1[3][i1] * phi2[3][i2] * phi3[3][i3]; - } -} - -bool try_evaluate_tetrahedron_simplex_values_q4( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out) { - switch (order) { - case 4: - evaluate_tetrahedron_simplex_values_q4<4>( - simplex_exponents, points, output_stride, values_out); - return true; - case 5: - evaluate_tetrahedron_simplex_values_q4<5>( - simplex_exponents, points, output_stride, values_out); - return true; - case 6: - evaluate_tetrahedron_simplex_values_q4<6>( - simplex_exponents, points, output_stride, values_out); - return true; - case 7: - evaluate_tetrahedron_simplex_values_q4<7>( - simplex_exponents, points, output_stride, values_out); - return true; - case 8: - evaluate_tetrahedron_simplex_values_q4<8>( - simplex_exponents, points, output_stride, values_out); - return true; - default: - return false; - } -} - -template -void evaluate_tetrahedron_simplex_gradients_q4( - const std::vector>& simplex_exponents, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - static_assert(Order >= 3 && Order <= 8, - "specialized tetrahedron gradient path covers orders 3..8"); - - Real phi0[4][Order + 1]; - Real phi1[4][Order + 1]; - Real phi2[4][Order + 1]; - Real phi3[4][Order + 1]; - Real dphi0[4][Order + 1]; - Real dphi1[4][Order + 1]; - Real dphi2[4][Order + 1]; - Real dphi3[4][Order + 1]; - - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - simplex_lagrange_factor_sequence_impl( - Order, l0, phi0[q], dphi0[q], nullptr); - simplex_lagrange_factor_sequence_impl( - Order, l1, phi1[q], dphi1[q], nullptr); - simplex_lagrange_factor_sequence_impl( - Order, l2, phi2[q], dphi2[q], nullptr); - simplex_lagrange_factor_sequence_impl( - Order, l3, phi3[q], dphi3[q], nullptr); - } - - const std::size_t num_nodes = simplex_exponents.size(); - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real gx[4]; - Real gy[4]; - Real gz[4]; - - for (std::size_t q = 0; q < 4u; ++q) { - const Real v0 = phi0[q][i0]; - const Real v1 = phi1[q][i1]; - const Real v2 = phi2[q][i2]; - const Real v3 = phi3[q][i3]; - const Real D0 = dphi0[q][i0]; - const Real D1 = dphi1[q][i1]; - const Real D2 = dphi2[q][i2]; - const Real D3 = dphi3[q][i3]; - const Real v23 = v2 * v3; - const Real v01 = v0 * v1; - const Real dl0 = D0 * v1 * v23; - gx[q] = v0 * D1 * v23 - dl0; - gy[q] = v01 * D2 * v3 - dl0; - gz[q] = v01 * v2 * D3 - dl0; - } - - Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; - g[0u] = gx[0]; - g[1u] = gx[1]; - g[2u] = gx[2]; - g[3u] = gx[3]; - g[output_stride + 0u] = gy[0]; - g[output_stride + 1u] = gy[1]; - g[output_stride + 2u] = gy[2]; - g[output_stride + 3u] = gy[3]; - g[2u * output_stride + 0u] = gz[0]; - g[2u * output_stride + 1u] = gz[1]; - g[2u * output_stride + 2u] = gz[2]; - g[2u * output_stride + 3u] = gz[3]; - } -} - -template -void evaluate_triangle_simplex_gradients_q4( - const std::vector>& simplex_exponents, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - static_assert((Order == 2) || (Order >= 4 && Order <= 8), - "specialized simplex path covers order 2 and orders 4..8"); - - Real phi0[4][Order + 1]; - Real phi1[4][Order + 1]; - Real phi2[4][Order + 1]; - Real dphi0[4][Order + 1]; - Real dphi1[4][Order + 1]; - Real dphi2[4][Order + 1]; - - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - simplex_lagrange_factor_sequence_impl( - Order, l0, phi0[q], dphi0[q], nullptr); - simplex_lagrange_factor_sequence_impl( - Order, l1, phi1[q], dphi1[q], nullptr); - simplex_lagrange_factor_sequence_impl( - Order, l2, phi2[q], dphi2[q], nullptr); - } - - const std::size_t num_nodes = simplex_exponents.size(); - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; - - for (std::size_t q = 0; q < 4u; ++q) { - const Real v0 = phi0[q][i0]; - const Real v1 = phi1[q][i1]; - const Real v2 = phi2[q][i2]; - const Real D0 = dphi0[q][i0]; - const Real D1 = dphi1[q][i1]; - const Real D2 = dphi2[q][i2]; - const Real dl0 = D0 * v1 * v2; - g[0u * output_stride + q] = v0 * D1 * v2 - dl0; - g[1u * output_stride + q] = v0 * v1 * D2 - dl0; - g[2u * output_stride + q] = Real(0); - } - } -} - -bool try_evaluate_triangle_simplex_gradients_q4( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT gradients_out) { - switch (order) { - case 2: - evaluate_triangle_simplex_gradients_q4<2>( - simplex_exponents, points, output_stride, gradients_out); - return true; - case 4: - evaluate_triangle_simplex_gradients_q4<4>( - simplex_exponents, points, output_stride, gradients_out); - return true; - case 5: - evaluate_triangle_simplex_gradients_q4<5>( - simplex_exponents, points, output_stride, gradients_out); - return true; - case 6: - evaluate_triangle_simplex_gradients_q4<6>( - simplex_exponents, points, output_stride, gradients_out); - return true; - case 7: - evaluate_triangle_simplex_gradients_q4<7>( - simplex_exponents, points, output_stride, gradients_out); - return true; - case 8: - evaluate_triangle_simplex_gradients_q4<8>( - simplex_exponents, points, output_stride, gradients_out); - return true; - default: - return false; - } -} - -template -void evaluate_triangle_simplex_hessian_outputs_q4( - const std::vector>& simplex_exponents, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - static_assert(Order >= 2 && Order <= 8, "specialized simplex path covers orders 2..8"); - - Real phi0[4][Order + 1]; - Real phi1[4][Order + 1]; - Real phi2[4][Order + 1]; - Real dphi0[4][Order + 1]; - Real dphi1[4][Order + 1]; - Real dphi2[4][Order + 1]; - Real d2phi0[4][Order + 1]; - Real d2phi1[4][Order + 1]; - Real d2phi2[4][Order + 1]; - - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - simplex_lagrange_factor_sequence_impl( - Order, l0, phi0[q], dphi0[q], d2phi0[q]); - simplex_lagrange_factor_sequence_impl( - Order, l1, phi1[q], dphi1[q], d2phi1[q]); - simplex_lagrange_factor_sequence_impl( - Order, l2, phi2[q], dphi2[q], d2phi2[q]); - } - - const std::size_t num_nodes = simplex_exponents.size(); - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* SVMP_RESTRICT value_row = values_out ? values_out + node * output_stride : nullptr; - Real* SVMP_RESTRICT g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; - Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; - H[2u * output_stride + 0u] = Real(0); - H[2u * output_stride + 1u] = Real(0); - H[2u * output_stride + 2u] = Real(0); - H[2u * output_stride + 3u] = Real(0); - H[5u * output_stride + 0u] = Real(0); - H[5u * output_stride + 1u] = Real(0); - H[5u * output_stride + 2u] = Real(0); - H[5u * output_stride + 3u] = Real(0); - H[6u * output_stride + 0u] = Real(0); - H[6u * output_stride + 1u] = Real(0); - H[6u * output_stride + 2u] = Real(0); - H[6u * output_stride + 3u] = Real(0); - H[7u * output_stride + 0u] = Real(0); - H[7u * output_stride + 1u] = Real(0); - H[7u * output_stride + 2u] = Real(0); - H[7u * output_stride + 3u] = Real(0); - H[8u * output_stride + 0u] = Real(0); - H[8u * output_stride + 1u] = Real(0); - H[8u * output_stride + 2u] = Real(0); - H[8u * output_stride + 3u] = Real(0); - - for (std::size_t q = 0; q < 4u; ++q) { - const Real v0 = phi0[q][i0]; - const Real v1 = phi1[q][i1]; - const Real v2 = phi2[q][i2]; - if (value_row != nullptr) { - value_row[q] = v0 * v1 * v2; - } - - const Real D0 = dphi0[q][i0]; - const Real D1 = dphi1[q][i1]; - const Real D2 = dphi2[q][i2]; - if (g != nullptr) { - const Real dl0 = D0 * v1 * v2; - g[0u * output_stride + q] = v0 * D1 * v2 - dl0; - g[1u * output_stride + q] = v0 * v1 * D2 - dl0; - g[2u * output_stride + q] = Real(0); - } - - const Real DD0 = d2phi0[q][i0]; - const Real DD1 = d2phi1[q][i1]; - const Real DD2 = d2phi2[q][i2]; - const Real H00 = DD0 * v1 * v2; - const Real H11 = v0 * DD1 * v2; - const Real H22 = v0 * v1 * DD2; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - const Real h01 = H00 - H01 - H02 + H12; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = h01; - H[3u * output_stride + q] = h01; - H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; - } - } -} - -bool try_evaluate_triangle_simplex_hessian_outputs_q4( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - switch (order) { - case 2: - evaluate_triangle_simplex_hessian_outputs_q4<2>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 3: - evaluate_triangle_simplex_hessian_outputs_q4<3>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 4: - evaluate_triangle_simplex_hessian_outputs_q4<4>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 5: - evaluate_triangle_simplex_hessian_outputs_q4<5>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 6: - evaluate_triangle_simplex_hessian_outputs_q4<6>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 7: - evaluate_triangle_simplex_hessian_outputs_q4<7>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 8: - evaluate_triangle_simplex_hessian_outputs_q4<8>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - default: - return false; - } -} - -template -inline void write_tetrahedron_simplex_hessian_q4( - const Real (&phi0)[4][Order + 1], - const Real (&phi1)[4][Order + 1], - const Real (&phi2)[4][Order + 1], - const Real (&phi3)[4][Order + 1], - const Real (&dphi0)[4][Order + 1], - const Real (&dphi1)[4][Order + 1], - const Real (&dphi2)[4][Order + 1], - const Real (&dphi3)[4][Order + 1], - const Real (&d2phi0)[4][Order + 1], - const Real (&d2phi1)[4][Order + 1], - const Real (&d2phi2)[4][Order + 1], - const Real (&d2phi3)[4][Order + 1], - std::size_t i0, - std::size_t i1, - std::size_t i2, - std::size_t i3, - std::size_t output_stride, - Real* SVMP_RESTRICT H) { - const Real v0 = phi0[Q][i0]; - const Real v1 = phi1[Q][i1]; - const Real v2 = phi2[Q][i2]; - const Real v3 = phi3[Q][i3]; - const Real D0 = dphi0[Q][i0]; - const Real D1 = dphi1[Q][i1]; - const Real D2 = dphi2[Q][i2]; - const Real D3 = dphi3[Q][i3]; - const Real DD0 = d2phi0[Q][i0]; - const Real DD1 = d2phi1[Q][i1]; - const Real DD2 = d2phi2[Q][i2]; - const Real DD3 = d2phi3[Q][i3]; - const Real H00 = DD0 * v1 * v2 * v3; - const Real H11 = v0 * DD1 * v2 * v3; - const Real H22 = v0 * v1 * DD2 * v3; - const Real H33 = v0 * v1 * v2 * DD3; - const Real H01 = D0 * D1 * v2 * v3; - const Real H02 = D0 * v1 * D2 * v3; - const Real H03 = D0 * v1 * v2 * D3; - const Real H12 = v0 * D1 * D2 * v3; - const Real H13 = v0 * D1 * v2 * D3; - const Real H23 = v0 * v1 * D2 * D3; - const Real h01 = H00 - H01 - H02 + H12; - const Real h02 = H00 - H01 - H03 + H13; - const Real h12 = H00 - H02 - H03 + H23; - H[0u * output_stride + Q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + Q] = h01; - H[2u * output_stride + Q] = h02; - H[3u * output_stride + Q] = h01; - H[4u * output_stride + Q] = H00 - Real(2) * H02 + H22; - H[5u * output_stride + Q] = h12; - H[6u * output_stride + Q] = h02; - H[7u * output_stride + Q] = h12; - H[8u * output_stride + Q] = H00 - Real(2) * H03 + H33; -} - -template -inline void write_tetrahedron_simplex_hessian_stride4_q( - const Real (&phi0)[4][Order + 1], - const Real (&phi1)[4][Order + 1], - const Real (&phi2)[4][Order + 1], - const Real (&phi3)[4][Order + 1], - const Real (&dphi0)[4][Order + 1], - const Real (&dphi1)[4][Order + 1], - const Real (&dphi2)[4][Order + 1], - const Real (&dphi3)[4][Order + 1], - const Real (&d2phi0)[4][Order + 1], - const Real (&d2phi1)[4][Order + 1], - const Real (&d2phi2)[4][Order + 1], - const Real (&d2phi3)[4][Order + 1], - std::size_t i0, - std::size_t i1, - std::size_t i2, - std::size_t i3, - Real* SVMP_RESTRICT H) { - const Real v0 = phi0[Q][i0]; - const Real v1 = phi1[Q][i1]; - const Real v2 = phi2[Q][i2]; - const Real v3 = phi3[Q][i3]; - const Real D0 = dphi0[Q][i0]; - const Real D1 = dphi1[Q][i1]; - const Real D2 = dphi2[Q][i2]; - const Real D3 = dphi3[Q][i3]; - const Real DD0 = d2phi0[Q][i0]; - const Real DD1 = d2phi1[Q][i1]; - const Real DD2 = d2phi2[Q][i2]; - const Real DD3 = d2phi3[Q][i3]; - const Real v12 = v1 * v2; - const Real v13 = v1 * v3; - const Real v23 = v2 * v3; - const Real v123 = v1 * v23; - const Real v023 = v0 * v23; - const Real v013 = v0 * v13; - const Real v012 = v0 * v12; - const Real H00 = DD0 * v123; - const Real H11 = DD1 * v023; - const Real H22 = DD2 * v013; - const Real H33 = DD3 * v012; - const Real H01 = D0 * D1 * v23; - const Real H02 = D0 * D2 * v13; - const Real H03 = D0 * D3 * v12; - const Real H12 = D1 * D2 * v0 * v3; - const Real H13 = D1 * D3 * v0 * v2; - const Real H23 = D2 * D3 * v0 * v1; - const Real h01 = H00 - H01 - H02 + H12; - const Real h02 = H00 - H01 - H03 + H13; - const Real h12 = H00 - H02 - H03 + H23; - H[Q] = H00 - Real(2) * H01 + H11; - H[4u + Q] = h01; - H[8u + Q] = h02; - H[12u + Q] = h01; - H[16u + Q] = H00 - Real(2) * H02 + H22; - H[20u + Q] = h12; - H[24u + Q] = h02; - H[28u + Q] = h12; - H[32u + Q] = H00 - Real(2) * H03 + H33; -} - -template -inline void write_tetrahedron_simplex_all_stride4_q( - const Real (&phi0)[4][Order + 1], - const Real (&phi1)[4][Order + 1], - const Real (&phi2)[4][Order + 1], - const Real (&phi3)[4][Order + 1], - const Real (&dphi0)[4][Order + 1], - const Real (&dphi1)[4][Order + 1], - const Real (&dphi2)[4][Order + 1], - const Real (&dphi3)[4][Order + 1], - const Real (&d2phi0)[4][Order + 1], - const Real (&d2phi1)[4][Order + 1], - const Real (&d2phi2)[4][Order + 1], - const Real (&d2phi3)[4][Order + 1], - std::size_t i0, - std::size_t i1, - std::size_t i2, - std::size_t i3, - Real* SVMP_RESTRICT value_row, - Real* SVMP_RESTRICT g, - Real* SVMP_RESTRICT H) { - const Real v0 = phi0[Q][i0]; - const Real v1 = phi1[Q][i1]; - const Real v2 = phi2[Q][i2]; - const Real v3 = phi3[Q][i3]; - const Real D0 = dphi0[Q][i0]; - const Real D1 = dphi1[Q][i1]; - const Real D2 = dphi2[Q][i2]; - const Real D3 = dphi3[Q][i3]; - const Real DD0 = d2phi0[Q][i0]; - const Real DD1 = d2phi1[Q][i1]; - const Real DD2 = d2phi2[Q][i2]; - const Real DD3 = d2phi3[Q][i3]; - const Real v12 = v1 * v2; - const Real v13 = v1 * v3; - const Real v23 = v2 * v3; - const Real v123 = v1 * v23; - const Real v023 = v0 * v23; - const Real v013 = v0 * v13; - const Real v012 = v0 * v12; - const Real dl0 = D0 * v123; - const Real H00 = DD0 * v123; - const Real H11 = DD1 * v023; - const Real H22 = DD2 * v013; - const Real H33 = DD3 * v012; - const Real H01 = D0 * D1 * v23; - const Real H02 = D0 * D2 * v13; - const Real H03 = D0 * D3 * v12; - const Real H12 = D1 * D2 * v0 * v3; - const Real H13 = D1 * D3 * v0 * v2; - const Real H23 = D2 * D3 * v0 * v1; - const Real h01 = H00 - H01 - H02 + H12; - const Real h02 = H00 - H01 - H03 + H13; - const Real h12 = H00 - H02 - H03 + H23; - - value_row[Q] = v0 * v123; - g[Q] = D1 * v023 - dl0; - g[4u + Q] = D2 * v013 - dl0; - g[8u + Q] = D3 * v012 - dl0; - H[Q] = H00 - Real(2) * H01 + H11; - H[4u + Q] = h01; - H[8u + Q] = h02; - H[12u + Q] = h01; - H[16u + Q] = H00 - Real(2) * H02 + H22; - H[20u + Q] = h12; - H[24u + Q] = h02; - H[28u + Q] = h12; - H[32u + Q] = H00 - Real(2) * H03 + H33; -} - -template -void evaluate_tetrahedron_simplex_hessian_outputs_q4( - const std::vector>& simplex_exponents, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - static_assert(Order >= 2 && Order <= 8, "specialized simplex path covers orders 2..8"); - - Real phi0[4][Order + 1]; - Real phi1[4][Order + 1]; - Real phi2[4][Order + 1]; - Real phi3[4][Order + 1]; - Real dphi0[4][Order + 1]; - Real dphi1[4][Order + 1]; - Real dphi2[4][Order + 1]; - Real dphi3[4][Order + 1]; - Real d2phi0[4][Order + 1]; - Real d2phi1[4][Order + 1]; - Real d2phi2[4][Order + 1]; - Real d2phi3[4][Order + 1]; - - for (std::size_t q = 0; q < 4u; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - simplex_lagrange_factor_sequence_impl( - Order, l0, phi0[q], dphi0[q], d2phi0[q]); - simplex_lagrange_factor_sequence_impl( - Order, l1, phi1[q], dphi1[q], d2phi1[q]); - simplex_lagrange_factor_sequence_impl( - Order, l2, phi2[q], dphi2[q], d2phi2[q]); - simplex_lagrange_factor_sequence_impl( - Order, l3, phi3[q], dphi3[q], d2phi3[q]); - } - - const std::size_t num_nodes = simplex_exponents.size(); - if (values_out == nullptr && gradients_out == nullptr) { - if (output_stride == 4u) { - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real* SVMP_RESTRICT H = hessians_out + node * 36u; - write_tetrahedron_simplex_hessian_stride4_q( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, H); - write_tetrahedron_simplex_hessian_stride4_q( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, H); - write_tetrahedron_simplex_hessian_stride4_q( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, H); - write_tetrahedron_simplex_hessian_stride4_q( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, H); - } - } else { - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; - write_tetrahedron_simplex_hessian_q4( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, output_stride, H); - write_tetrahedron_simplex_hessian_q4( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, output_stride, H); - write_tetrahedron_simplex_hessian_q4( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, output_stride, H); - write_tetrahedron_simplex_hessian_q4( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, output_stride, H); - } - } - return; - } - - if (values_out != nullptr && gradients_out != nullptr) { - if (output_stride == 4u) { - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real* SVMP_RESTRICT value_row = values_out + node * output_stride; - Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; - Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; - write_tetrahedron_simplex_all_stride4_q( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, value_row, g, H); - write_tetrahedron_simplex_all_stride4_q( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, value_row, g, H); - write_tetrahedron_simplex_all_stride4_q( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, value_row, g, H); - write_tetrahedron_simplex_all_stride4_q( - phi0, phi1, phi2, phi3, dphi0, dphi1, dphi2, dphi3, - d2phi0, d2phi1, d2phi2, d2phi3, i0, i1, i2, i3, value_row, g, H); - } - return; - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real* SVMP_RESTRICT value_row = values_out + node * output_stride; - Real* SVMP_RESTRICT g = gradients_out + node * 3u * output_stride; - Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; - - for (std::size_t q = 0; q < 4u; ++q) { - const Real v0 = phi0[q][i0]; - const Real v1 = phi1[q][i1]; - const Real v2 = phi2[q][i2]; - const Real v3 = phi3[q][i3]; - const Real D0 = dphi0[q][i0]; - const Real D1 = dphi1[q][i1]; - const Real D2 = dphi2[q][i2]; - const Real D3 = dphi3[q][i3]; - const Real DD0 = d2phi0[q][i0]; - const Real DD1 = d2phi1[q][i1]; - const Real DD2 = d2phi2[q][i2]; - const Real DD3 = d2phi3[q][i3]; - const Real v12 = v1 * v2; - const Real v13 = v1 * v3; - const Real v23 = v2 * v3; - const Real v123 = v1 * v23; - const Real v023 = v0 * v23; - const Real v013 = v0 * v13; - const Real v012 = v0 * v12; - const Real dl0 = D0 * v123; - const Real H00 = DD0 * v123; - const Real H11 = DD1 * v023; - const Real H22 = DD2 * v013; - const Real H33 = DD3 * v012; - const Real H01 = D0 * D1 * v23; - const Real H02 = D0 * D2 * v13; - const Real H03 = D0 * D3 * v12; - const Real H12 = D1 * D2 * v0 * v3; - const Real H13 = D1 * D3 * v0 * v2; - const Real H23 = D2 * D3 * v0 * v1; - const Real h01 = H00 - H01 - H02 + H12; - const Real h02 = H00 - H01 - H03 + H13; - const Real h12 = H00 - H02 - H03 + H23; - - value_row[q] = v0 * v123; - g[0u * output_stride + q] = D1 * v023 - dl0; - g[1u * output_stride + q] = D2 * v013 - dl0; - g[2u * output_stride + q] = D3 * v012 - dl0; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = h01; - H[2u * output_stride + q] = h02; - H[3u * output_stride + q] = h01; - H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; - H[5u * output_stride + q] = h12; - H[6u * output_stride + q] = h02; - H[7u * output_stride + q] = h12; - H[8u * output_stride + q] = H00 - Real(2) * H03 + H33; - } - } - return; - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real* SVMP_RESTRICT value_row = values_out ? values_out + node * output_stride : nullptr; - Real* SVMP_RESTRICT g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; - Real* SVMP_RESTRICT H = hessians_out + node * 9u * output_stride; - - for (std::size_t q = 0; q < 4u; ++q) { - const Real v0 = phi0[q][i0]; - const Real v1 = phi1[q][i1]; - const Real v2 = phi2[q][i2]; - const Real v3 = phi3[q][i3]; - if (value_row != nullptr) { - value_row[q] = v0 * v1 * v2 * v3; - } - - const Real D0 = dphi0[q][i0]; - const Real D1 = dphi1[q][i1]; - const Real D2 = dphi2[q][i2]; - const Real D3 = dphi3[q][i3]; - if (g != nullptr) { - const Real dl0 = D0 * v1 * v2 * v3; - g[0u * output_stride + q] = v0 * D1 * v2 * v3 - dl0; - g[1u * output_stride + q] = v0 * v1 * D2 * v3 - dl0; - g[2u * output_stride + q] = v0 * v1 * v2 * D3 - dl0; - } - - const Real DD0 = d2phi0[q][i0]; - const Real DD1 = d2phi1[q][i1]; - const Real DD2 = d2phi2[q][i2]; - const Real DD3 = d2phi3[q][i3]; - const Real H00 = DD0 * v1 * v2 * v3; - const Real H11 = v0 * DD1 * v2 * v3; - const Real H22 = v0 * v1 * DD2 * v3; - const Real H33 = v0 * v1 * v2 * DD3; - const Real H01 = D0 * D1 * v2 * v3; - const Real H02 = D0 * v1 * D2 * v3; - const Real H03 = D0 * v1 * v2 * D3; - const Real H12 = v0 * D1 * D2 * v3; - const Real H13 = v0 * D1 * v2 * D3; - const Real H23 = v0 * v1 * D2 * D3; - const Real h01 = H00 - H01 - H02 + H12; - const Real h02 = H00 - H01 - H03 + H13; - const Real h12 = H00 - H02 - H03 + H23; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = h01; - H[2u * output_stride + q] = h02; - H[3u * output_stride + q] = h01; - H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; - H[5u * output_stride + q] = h12; - H[6u * output_stride + q] = h02; - H[7u * output_stride + q] = h12; - H[8u * output_stride + q] = H00 - Real(2) * H03 + H33; - } - } -} - -bool try_evaluate_tetrahedron_simplex_hessian_outputs_q4( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - switch (order) { - case 2: - evaluate_tetrahedron_simplex_hessian_outputs_q4<2>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 3: - evaluate_tetrahedron_simplex_hessian_outputs_q4<3>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 4: - evaluate_tetrahedron_simplex_hessian_outputs_q4<4>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 5: - evaluate_tetrahedron_simplex_hessian_outputs_q4<5>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 6: - evaluate_tetrahedron_simplex_hessian_outputs_q4<6>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 7: - evaluate_tetrahedron_simplex_hessian_outputs_q4<7>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - case 8: - evaluate_tetrahedron_simplex_hessian_outputs_q4<8>( - simplex_exponents, points, output_stride, values_out, gradients_out, hessians_out); - return true; - default: - return false; - } -} - -// Per-thread scratch space for simplex factor sequences. Common low orders use -// fixed storage; higher orders fall back to dynamic vectors. -struct SimplexAxisScratch { - std::size_t size{0}; - std::array phi_fixed{}; - std::array dphi_fixed{}; - std::array d2phi_fixed{}; - std::vector phi_dynamic; - std::vector dphi_dynamic; - std::vector d2phi_dynamic; - - void reserveFor(std::size_t n) { - size = n; - if (n <= kFixedSimplexAxisSize) { - return; - } - if (phi_dynamic.size() < n) phi_dynamic.resize(n); - if (dphi_dynamic.size() < n) dphi_dynamic.resize(n); - if (d2phi_dynamic.size() < n) d2phi_dynamic.resize(n); - } - - Real* phi() noexcept { - return size <= kFixedSimplexAxisSize ? phi_fixed.data() : phi_dynamic.data(); - } - - Real* dphi() noexcept { - return size <= kFixedSimplexAxisSize ? dphi_fixed.data() : dphi_dynamic.data(); - } - - Real* d2phi() noexcept { - return size <= kFixedSimplexAxisSize ? d2phi_fixed.data() : d2phi_dynamic.data(); - } - - const Real* phi() const noexcept { - return size <= kFixedSimplexAxisSize ? phi_fixed.data() : phi_dynamic.data(); - } - - const Real* dphi() const noexcept { - return size <= kFixedSimplexAxisSize ? dphi_fixed.data() : dphi_dynamic.data(); - } - - const Real* d2phi() const noexcept { - return size <= kFixedSimplexAxisSize ? d2phi_fixed.data() : d2phi_dynamic.data(); - } -}; - -SimplexAxisScratch& simplex_axis_scratch_slot(int slot) { - thread_local SimplexAxisScratch s[4]; - return s[slot]; -} - -struct SimplexVectorSink { - std::vector* values; - std::vector* gradients; - std::vector* hessians; - - bool wants_values() const noexcept { return values != nullptr; } - bool wants_gradients() const noexcept { return gradients != nullptr; } - bool wants_hessians() const noexcept { return hessians != nullptr; } - - void prepare(std::size_t n_nodes) const { - if (values) values->resize(n_nodes); - if (gradients) gradients->resize(n_nodes); - if (hessians) hessians->resize(n_nodes); - } - - void write_value(std::size_t n, Real value) const { - (*values)[n] = value; - } - - void write_gradient(std::size_t n, Real x, Real y, Real z) const { - auto& gradient = (*gradients)[n]; - gradient[0] = x; - gradient[1] = y; - gradient[2] = z; - } - - void write_hessian(std::size_t n, - Real xx, - Real yy, - Real zz, - Real xy, - Real xz, - Real yz) const { - Hessian hessian{}; - hessian(0, 0) = xx; - hessian(1, 1) = yy; - hessian(2, 2) = zz; - hessian(0, 1) = xy; hessian(1, 0) = xy; - hessian(0, 2) = xz; hessian(2, 0) = xz; - hessian(1, 2) = yz; hessian(2, 1) = yz; - (*hessians)[n] = hessian; - } -}; - -struct SimplexRawSink { - Real* values; - Real* gradients; - Real* hessians; - - bool wants_values() const noexcept { return values != nullptr; } - bool wants_gradients() const noexcept { return gradients != nullptr; } - bool wants_hessians() const noexcept { return hessians != nullptr; } - - void prepare(std::size_t) const {} - - void write_value(std::size_t n, Real value) const { - values[n] = value; - } - - void write_gradient(std::size_t n, Real x, Real y, Real z) const { - Real* gradient = gradients + n * 3u; - gradient[0] = x; - gradient[1] = y; - gradient[2] = z; - } - - void write_hessian(std::size_t n, - Real xx, - Real yy, - Real zz, - Real xy, - Real xz, - Real yz) const { - Real* hessian = hessians + n * 9u; - hessian[0] = xx; - hessian[1] = xy; - hessian[2] = xz; - hessian[3] = xy; - hessian[4] = yy; - hessian[5] = yz; - hessian[6] = xz; - hessian[7] = yz; - hessian[8] = zz; - } -}; - -template -void evaluate_triangle_simplex_basis_impl(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - const Sink& sink) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - - const std::size_t n = static_cast(order + 1); - SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); - SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); - SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); - s0.reserveFor(n); - s1.reserveFor(n); - s2.reserveFor(n); - - const std::size_t num_nodes = simplex_exponents.size(); - sink.prepare(num_nodes); - const bool need_values = sink.wants_values(); - const bool need_gradients = sink.wants_gradients(); - const bool need_hessians = sink.wants_hessians(); - Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; - Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; - Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; - Real* d20_out = need_hessians ? s0.d2phi() : nullptr; - Real* d21_out = need_hessians ? s1.d2phi() : nullptr; - Real* d22_out = need_hessians ? s2.d2phi() : nullptr; - - simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); - simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); - simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); - const Real* phi0 = s0.phi(); - const Real* phi1 = s1.phi(); - const Real* phi2 = s2.phi(); - const Real* dphi0 = s0.dphi(); - const Real* dphi1 = s1.dphi(); - const Real* dphi2 = s2.dphi(); - const Real* d2phi0 = s0.d2phi(); - const Real* d2phi1 = s1.d2phi(); - const Real* d2phi2 = s2.d2phi(); - - for (std::size_t n_idx = 0; n_idx < num_nodes; ++n_idx) { - const auto& e = simplex_exponents[n_idx]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - - const Real v0 = phi0[i0]; - const Real v1 = phi1[i1]; - const Real v2 = phi2[i2]; - if (need_values) { - sink.write_value(n_idx, v0 * v1 * v2); - } - if (!need_gradients && !need_hessians) { - continue; - } - - const Real D0 = dphi0[i0]; - const Real D1 = dphi1[i1]; - const Real D2 = dphi2[i2]; - - if (need_gradients) { - const Real dl0 = D0 * v1 * v2; - const Real dl1 = v0 * D1 * v2; - const Real dl2 = v0 * v1 * D2; - sink.write_gradient(n_idx, dl1 - dl0, dl2 - dl0, Real(0)); - } - - if (need_hessians) { - const Real DD0 = d2phi0[i0]; - const Real DD1 = d2phi1[i1]; - const Real DD2 = d2phi2[i2]; - - const Real H00 = DD0 * v1 * v2; - const Real H11 = v0 * DD1 * v2; - const Real H22 = v0 * v1 * DD2; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - - sink.write_hessian(n_idx, - H00 - Real(2) * H01 + H11, - H00 - Real(2) * H02 + H22, - Real(0), - H00 - H01 - H02 + H12, - Real(0), - Real(0)); - } - } -} - -void evaluate_triangle_simplex_basis(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) { - const SimplexVectorSink sink{values, gradients, hessians}; - evaluate_triangle_simplex_basis_impl(simplex_exponents, order, xi, sink); -} - -void evaluate_triangle_simplex_basis_to(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const SimplexRawSink sink{values_out, gradients_out, hessians_out}; - evaluate_triangle_simplex_basis_impl(simplex_exponents, order, xi, sink); -} - -void evaluate_triangle_simplex_basis_strided( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const std::size_t num_nodes = simplex_exponents.size(); - if (points.empty() || num_nodes == 0u) { - return; - } - - const std::size_t sequence_size = static_cast(order + 1); - const std::size_t num_qpts = points.size(); - const bool need_gradients = gradients_out != nullptr; - const bool need_hessians = hessians_out != nullptr; - if (num_qpts == 4u && - values_out != nullptr && - !need_gradients && - !need_hessians && - try_evaluate_triangle_simplex_values_q4( - simplex_exponents, order, points, output_stride, values_out)) { - return; - } - if (num_qpts == 4u && - values_out == nullptr && - need_gradients && - !need_hessians && - try_evaluate_triangle_simplex_gradients_q4( - simplex_exponents, order, points, output_stride, gradients_out)) { - return; - } - if (num_qpts == 4u && - need_hessians && - try_evaluate_triangle_simplex_hessian_outputs_q4( - simplex_exponents, order, points, output_stride, - values_out, gradients_out, hessians_out)) { - return; - } - const std::size_t batch_entries = sequence_size * num_qpts; - if (batch_entries <= kFixedSimplexBatchEntries) { - if (values_out != nullptr && gradients_out == nullptr && hessians_out == nullptr) { - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - const std::size_t offset = q * sequence_size; - simplex_lagrange_factor_sequence( - order, l0, phi0_batch.data() + offset, nullptr, nullptr); - simplex_lagrange_factor_sequence( - order, l1, phi1_batch.data() + offset, nullptr, nullptr); - simplex_lagrange_factor_sequence( - order, l2, phi2_batch.data() + offset, nullptr, nullptr); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* value_row = values_out + node * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - value_row[q] = - phi0_batch[offset + i0] * - phi1_batch[offset + i1] * - phi2_batch[offset + i2]; - } - } - return; - } - - if (values_out == nullptr && gradients_out != nullptr && hessians_out == nullptr) { - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - std::array dphi0_batch; - std::array dphi1_batch; - std::array dphi2_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - const std::size_t offset = q * sequence_size; - simplex_lagrange_factor_sequence( - order, l0, phi0_batch.data() + offset, dphi0_batch.data() + offset, nullptr); - simplex_lagrange_factor_sequence( - order, l1, phi1_batch.data() + offset, dphi1_batch.data() + offset, nullptr); - simplex_lagrange_factor_sequence( - order, l2, phi2_batch.data() + offset, dphi2_batch.data() + offset, nullptr); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* g = gradients_out + node * 3u * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - const Real v0 = phi0_batch[offset + i0]; - const Real v1 = phi1_batch[offset + i1]; - const Real v2 = phi2_batch[offset + i2]; - const Real D0 = dphi0_batch[offset + i0]; - const Real D1 = dphi1_batch[offset + i1]; - const Real D2 = dphi2_batch[offset + i2]; - const Real dl0 = D0 * v1 * v2; - g[0u * output_stride + q] = v0 * D1 * v2 - dl0; - g[1u * output_stride + q] = v0 * v1 * D2 - dl0; - g[2u * output_stride + q] = Real(0); - } - } - return; - } - - if (order >= 4 && - values_out == nullptr && - gradients_out == nullptr && - hessians_out != nullptr) { - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - std::array dphi0_batch; - std::array dphi1_batch; - std::array dphi2_batch; - std::array d2phi0_batch; - std::array d2phi1_batch; - std::array d2phi2_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - const std::size_t offset = q * sequence_size; - simplex_lagrange_factor_sequence( - order, l0, phi0_batch.data() + offset, - dphi0_batch.data() + offset, d2phi0_batch.data() + offset); - simplex_lagrange_factor_sequence( - order, l1, phi1_batch.data() + offset, - dphi1_batch.data() + offset, d2phi1_batch.data() + offset); - simplex_lagrange_factor_sequence( - order, l2, phi2_batch.data() + offset, - dphi2_batch.data() + offset, d2phi2_batch.data() + offset); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* H = hessians_out + node * 9u * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - const Real v0 = phi0_batch[offset + i0]; - const Real v1 = phi1_batch[offset + i1]; - const Real v2 = phi2_batch[offset + i2]; - const Real D0 = dphi0_batch[offset + i0]; - const Real D1 = dphi1_batch[offset + i1]; - const Real D2 = dphi2_batch[offset + i2]; - const Real DD0 = d2phi0_batch[offset + i0]; - const Real DD1 = d2phi1_batch[offset + i1]; - const Real DD2 = d2phi2_batch[offset + i2]; - const Real H00 = DD0 * v1 * v2; - const Real H11 = v0 * DD1 * v2; - const Real H22 = v0 * v1 * DD2; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - const Real h01 = H00 - H01 - H02 + H12; - - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = h01; - H[2u * output_stride + q] = Real(0); - H[3u * output_stride + q] = h01; - H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; - H[5u * output_stride + q] = Real(0); - H[6u * output_stride + q] = Real(0); - H[7u * output_stride + q] = Real(0); - H[8u * output_stride + q] = Real(0); - } - } - return; - } - - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - std::array dphi0_batch; - std::array dphi1_batch; - std::array dphi2_batch; - std::array d2phi0_batch; - std::array d2phi1_batch; - std::array d2phi2_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - const std::size_t offset = q * sequence_size; - Real* d0_out = (need_gradients || need_hessians) ? dphi0_batch.data() + offset : nullptr; - Real* d1_out = (need_gradients || need_hessians) ? dphi1_batch.data() + offset : nullptr; - Real* d2_out = (need_gradients || need_hessians) ? dphi2_batch.data() + offset : nullptr; - Real* d20_out = need_hessians ? d2phi0_batch.data() + offset : nullptr; - Real* d21_out = need_hessians ? d2phi1_batch.data() + offset : nullptr; - Real* d22_out = need_hessians ? d2phi2_batch.data() + offset : nullptr; - simplex_lagrange_factor_sequence(order, l0, phi0_batch.data() + offset, d0_out, d20_out); - simplex_lagrange_factor_sequence(order, l1, phi1_batch.data() + offset, d1_out, d21_out); - simplex_lagrange_factor_sequence(order, l2, phi2_batch.data() + offset, d2_out, d22_out); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* value_row = values_out ? values_out + node * output_stride : nullptr; - Real* g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; - Real* H = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - const Real v0 = phi0_batch[offset + i0]; - const Real v1 = phi1_batch[offset + i1]; - const Real v2 = phi2_batch[offset + i2]; - if (value_row != nullptr) { - value_row[q] = v0 * v1 * v2; - } - if (!need_gradients && !need_hessians) { - continue; - } - - const Real D0 = dphi0_batch[offset + i0]; - const Real D1 = dphi1_batch[offset + i1]; - const Real D2 = dphi2_batch[offset + i2]; - - if (gradients_out != nullptr) { - const Real dl0 = D0 * v1 * v2; - const Real dl1 = v0 * D1 * v2; - const Real dl2 = v0 * v1 * D2; - g[0u * output_stride + q] = dl1 - dl0; - g[1u * output_stride + q] = dl2 - dl0; - g[2u * output_stride + q] = Real(0); - } - - if (hessians_out != nullptr) { - const Real DD0 = d2phi0_batch[offset + i0]; - const Real DD1 = d2phi1_batch[offset + i1]; - const Real DD2 = d2phi2_batch[offset + i2]; - const Real H00 = DD0 * v1 * v2; - const Real H11 = v0 * DD1 * v2; - const Real H22 = v0 * v1 * DD2; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - const Real h01 = H00 - H01 - H02 + H12; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = h01; - H[2u * output_stride + q] = Real(0); - H[3u * output_stride + q] = h01; - H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; - H[5u * output_stride + q] = Real(0); - H[6u * output_stride + q] = Real(0); - H[7u * output_stride + q] = Real(0); - H[8u * output_stride + q] = Real(0); - } - } - } - return; - } - - SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); - SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); - SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); - s0.reserveFor(sequence_size); - s1.reserveFor(sequence_size); - s2.reserveFor(sequence_size); - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - - Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; - Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; - Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; - Real* d20_out = need_hessians ? s0.d2phi() : nullptr; - Real* d21_out = need_hessians ? s1.d2phi() : nullptr; - Real* d22_out = need_hessians ? s2.d2phi() : nullptr; - - simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); - simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); - simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); - const Real* phi0 = s0.phi(); - const Real* phi1 = s1.phi(); - const Real* phi2 = s2.phi(); - const Real* dphi0 = s0.dphi(); - const Real* dphi1 = s1.dphi(); - const Real* dphi2 = s2.dphi(); - const Real* d2phi0 = s0.d2phi(); - const Real* d2phi1 = s1.d2phi(); - const Real* d2phi2 = s2.d2phi(); - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - - const Real v0 = phi0[i0]; - const Real v1 = phi1[i1]; - const Real v2 = phi2[i2]; - const Real value = v0 * v1 * v2; - if (values_out != nullptr) { - values_out[node * output_stride + q] = value; - } - if (!need_gradients && !need_hessians) { - continue; - } - - const Real D0 = dphi0[i0]; - const Real D1 = dphi1[i1]; - const Real D2 = dphi2[i2]; - - if (gradients_out != nullptr) { - const Real dl0 = D0 * v1 * v2; - const Real dl1 = v0 * D1 * v2; - const Real dl2 = v0 * v1 * D2; - Real* g = gradients_out + node * 3u * output_stride; - g[0u * output_stride + q] = dl1 - dl0; - g[1u * output_stride + q] = dl2 - dl0; - g[2u * output_stride + q] = Real(0); - } - - if (hessians_out != nullptr) { - const Real DD0 = d2phi0[i0]; - const Real DD1 = d2phi1[i1]; - const Real DD2 = d2phi2[i2]; - - const Real H00 = DD0 * v1 * v2; - const Real H11 = v0 * DD1 * v2; - const Real H22 = v0 * v1 * DD2; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - - Real* H = hessians_out + node * 9u * output_stride; - const Real h01 = H00 - H01 - H02 + H12; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = h01; - H[2u * output_stride + q] = Real(0); - H[3u * output_stride + q] = h01; - H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; - H[5u * output_stride + q] = Real(0); - H[6u * output_stride + q] = Real(0); - H[7u * output_stride + q] = Real(0); - H[8u * output_stride + q] = Real(0); - } - } - } -} - -void evaluate_triangle_simplex_basis_wedge_components_strided( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_xy_out, - Real* SVMP_RESTRICT hessians_xx_xy_yy_out) { - const std::size_t num_nodes = simplex_exponents.size(); - if (points.empty() || num_nodes == 0u) { - return; - } - - const std::size_t sequence_size = static_cast(order + 1); - const std::size_t num_qpts = points.size(); - const bool need_gradients = gradients_xy_out != nullptr; - const bool need_hessians = hessians_xx_xy_yy_out != nullptr; - const std::size_t batch_entries = sequence_size * num_qpts; - - if (batch_entries <= kFixedSimplexBatchEntries) { - if (values_out != nullptr && - gradients_xy_out != nullptr && - hessians_xx_xy_yy_out == nullptr) { - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - std::array dphi0_batch; - std::array dphi1_batch; - std::array dphi2_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - const std::size_t offset = q * sequence_size; - simplex_lagrange_factor_sequence( - order, l0, phi0_batch.data() + offset, dphi0_batch.data() + offset, nullptr); - simplex_lagrange_factor_sequence( - order, l1, phi1_batch.data() + offset, dphi1_batch.data() + offset, nullptr); - simplex_lagrange_factor_sequence( - order, l2, phi2_batch.data() + offset, dphi2_batch.data() + offset, nullptr); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* value_row = values_out + node * output_stride; - Real* g = gradients_xy_out + node * 2u * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - const Real v0 = phi0_batch[offset + i0]; - const Real v1 = phi1_batch[offset + i1]; - const Real v2 = phi2_batch[offset + i2]; - const Real D0 = dphi0_batch[offset + i0]; - const Real D1 = dphi1_batch[offset + i1]; - const Real D2 = dphi2_batch[offset + i2]; - const Real dl0 = D0 * v1 * v2; - value_row[q] = v0 * v1 * v2; - g[0u * output_stride + q] = v0 * D1 * v2 - dl0; - g[1u * output_stride + q] = v0 * v1 * D2 - dl0; - } - } - return; - } - - if (values_out != nullptr && - gradients_xy_out != nullptr && - hessians_xx_xy_yy_out != nullptr) { - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - std::array dphi0_batch; - std::array dphi1_batch; - std::array dphi2_batch; - std::array d2phi0_batch; - std::array d2phi1_batch; - std::array d2phi2_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - const std::size_t offset = q * sequence_size; - simplex_lagrange_factor_sequence_impl( - order, l0, phi0_batch.data() + offset, - dphi0_batch.data() + offset, d2phi0_batch.data() + offset); - simplex_lagrange_factor_sequence_impl( - order, l1, phi1_batch.data() + offset, - dphi1_batch.data() + offset, d2phi1_batch.data() + offset); - simplex_lagrange_factor_sequence_impl( - order, l2, phi2_batch.data() + offset, - dphi2_batch.data() + offset, d2phi2_batch.data() + offset); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* SVMP_RESTRICT value_row = values_out + node * output_stride; - Real* SVMP_RESTRICT g = gradients_xy_out + node * 2u * output_stride; - Real* SVMP_RESTRICT H = hessians_xx_xy_yy_out + node * 3u * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - const Real v0 = phi0_batch[offset + i0]; - const Real v1 = phi1_batch[offset + i1]; - const Real v2 = phi2_batch[offset + i2]; - const Real D0 = dphi0_batch[offset + i0]; - const Real D1 = dphi1_batch[offset + i1]; - const Real D2 = dphi2_batch[offset + i2]; - const Real dl0 = D0 * v1 * v2; - const Real dl1 = v0 * D1 * v2; - const Real dl2 = v0 * v1 * D2; - const Real DD0 = d2phi0_batch[offset + i0]; - const Real DD1 = d2phi1_batch[offset + i1]; - const Real DD2 = d2phi2_batch[offset + i2]; - const Real H00 = DD0 * v1 * v2; - const Real H11 = v0 * DD1 * v2; - const Real H22 = v0 * v1 * DD2; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - - value_row[q] = v0 * v1 * v2; - g[0u * output_stride + q] = dl1 - dl0; - g[1u * output_stride + q] = dl2 - dl0; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = H00 - H01 - H02 + H12; - H[2u * output_stride + q] = H00 - Real(2) * H02 + H22; - } - } - return; - } - - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - std::array dphi0_batch; - std::array dphi1_batch; - std::array dphi2_batch; - std::array d2phi0_batch; - std::array d2phi1_batch; - std::array d2phi2_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - const std::size_t offset = q * sequence_size; - Real* d0_out = (need_gradients || need_hessians) ? dphi0_batch.data() + offset : nullptr; - Real* d1_out = (need_gradients || need_hessians) ? dphi1_batch.data() + offset : nullptr; - Real* d2_out = (need_gradients || need_hessians) ? dphi2_batch.data() + offset : nullptr; - Real* d20_out = need_hessians ? d2phi0_batch.data() + offset : nullptr; - Real* d21_out = need_hessians ? d2phi1_batch.data() + offset : nullptr; - Real* d22_out = need_hessians ? d2phi2_batch.data() + offset : nullptr; - simplex_lagrange_factor_sequence(order, l0, phi0_batch.data() + offset, d0_out, d20_out); - simplex_lagrange_factor_sequence(order, l1, phi1_batch.data() + offset, d1_out, d21_out); - simplex_lagrange_factor_sequence(order, l2, phi2_batch.data() + offset, d2_out, d22_out); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - Real* value_row = values_out ? values_out + node * output_stride : nullptr; - Real* g = gradients_xy_out ? gradients_xy_out + node * 2u * output_stride : nullptr; - Real* H = hessians_xx_xy_yy_out ? hessians_xx_xy_yy_out + node * 3u * output_stride : nullptr; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - const Real v0 = phi0_batch[offset + i0]; - const Real v1 = phi1_batch[offset + i1]; - const Real v2 = phi2_batch[offset + i2]; - if (value_row != nullptr) { - value_row[q] = v0 * v1 * v2; - } - if (!need_gradients && !need_hessians) { - continue; - } - - const Real D0 = dphi0_batch[offset + i0]; - const Real D1 = dphi1_batch[offset + i1]; - const Real D2 = dphi2_batch[offset + i2]; - const Real dl0 = D0 * v1 * v2; - const Real dl1 = v0 * D1 * v2; - const Real dl2 = v0 * v1 * D2; - - if (gradients_xy_out != nullptr) { - g[0u * output_stride + q] = dl1 - dl0; - g[1u * output_stride + q] = dl2 - dl0; - } - - if (hessians_xx_xy_yy_out != nullptr) { - const Real DD0 = d2phi0_batch[offset + i0]; - const Real DD1 = d2phi1_batch[offset + i1]; - const Real DD2 = d2phi2_batch[offset + i2]; - const Real H00 = DD0 * v1 * v2; - const Real H11 = v0 * DD1 * v2; - const Real H22 = v0 * v1 * DD2; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = H00 - H01 - H02 + H12; - H[2u * output_stride + q] = H00 - Real(2) * H02 + H22; - } - } - } - return; - } - - SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); - SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); - SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); - s0.reserveFor(sequence_size); - s1.reserveFor(sequence_size); - s2.reserveFor(sequence_size); - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l0 = Real(1) - l1 - l2; - - Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; - Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; - Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; - Real* d20_out = need_hessians ? s0.d2phi() : nullptr; - Real* d21_out = need_hessians ? s1.d2phi() : nullptr; - Real* d22_out = need_hessians ? s2.d2phi() : nullptr; - simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); - simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); - simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); - - const Real* phi0 = s0.phi(); - const Real* phi1 = s1.phi(); - const Real* phi2 = s2.phi(); - const Real* dphi0 = s0.dphi(); - const Real* dphi1 = s1.dphi(); - const Real* dphi2 = s2.dphi(); - const Real* d2phi0 = s0.d2phi(); - const Real* d2phi1 = s1.d2phi(); - const Real* d2phi2 = s2.d2phi(); - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const Real v0 = phi0[i0]; - const Real v1 = phi1[i1]; - const Real v2 = phi2[i2]; - - if (values_out != nullptr) { - values_out[node * output_stride + q] = v0 * v1 * v2; - } - if (!need_gradients && !need_hessians) { - continue; - } - - const Real D0 = dphi0[i0]; - const Real D1 = dphi1[i1]; - const Real D2 = dphi2[i2]; - const Real dl0 = D0 * v1 * v2; - const Real dl1 = v0 * D1 * v2; - const Real dl2 = v0 * v1 * D2; - - if (gradients_xy_out != nullptr) { - Real* g = gradients_xy_out + node * 2u * output_stride; - g[0u * output_stride + q] = dl1 - dl0; - g[1u * output_stride + q] = dl2 - dl0; - } - - if (hessians_xx_xy_yy_out != nullptr) { - const Real DD0 = d2phi0[i0]; - const Real DD1 = d2phi1[i1]; - const Real DD2 = d2phi2[i2]; - const Real H00 = DD0 * v1 * v2; - const Real H11 = v0 * DD1 * v2; - const Real H22 = v0 * v1 * DD2; - const Real H01 = D0 * D1 * v2; - const Real H02 = D0 * v1 * D2; - const Real H12 = v0 * D1 * D2; - Real* H = hessians_xx_xy_yy_out + node * 3u * output_stride; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = H00 - H01 - H02 + H12; - H[2u * output_stride + q] = H00 - Real(2) * H02 + H22; - } - } - } -} - -template -void evaluate_tetrahedron_simplex_basis_impl(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - const Sink& sink) { - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - - const std::size_t n = static_cast(order + 1); - SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); - SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); - SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); - SimplexAxisScratch& s3 = simplex_axis_scratch_slot(3); - s0.reserveFor(n); - s1.reserveFor(n); - s2.reserveFor(n); - s3.reserveFor(n); - - const std::size_t num_nodes = simplex_exponents.size(); - sink.prepare(num_nodes); - const bool need_values = sink.wants_values(); - const bool need_gradients = sink.wants_gradients(); - const bool need_hessians = sink.wants_hessians(); - Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; - Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; - Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; - Real* d3_out = (need_gradients || need_hessians) ? s3.dphi() : nullptr; - Real* d20_out = need_hessians ? s0.d2phi() : nullptr; - Real* d21_out = need_hessians ? s1.d2phi() : nullptr; - Real* d22_out = need_hessians ? s2.d2phi() : nullptr; - Real* d23_out = need_hessians ? s3.d2phi() : nullptr; - - simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); - simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); - simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); - simplex_lagrange_factor_sequence(order, l3, s3.phi(), d3_out, d23_out); - const Real* phi0 = s0.phi(); - const Real* phi1 = s1.phi(); - const Real* phi2 = s2.phi(); - const Real* phi3 = s3.phi(); - const Real* dphi0 = s0.dphi(); - const Real* dphi1 = s1.dphi(); - const Real* dphi2 = s2.dphi(); - const Real* dphi3 = s3.dphi(); - const Real* d2phi0 = s0.d2phi(); - const Real* d2phi1 = s1.d2phi(); - const Real* d2phi2 = s2.d2phi(); - const Real* d2phi3 = s3.d2phi(); - - for (std::size_t n_idx = 0; n_idx < num_nodes; ++n_idx) { - const auto& e = simplex_exponents[n_idx]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - - const Real v0 = phi0[i0]; - const Real v1 = phi1[i1]; - const Real v2 = phi2[i2]; - const Real v3 = phi3[i3]; - if (need_values) { - sink.write_value(n_idx, v0 * v1 * v2 * v3); - } - if (!need_gradients && !need_hessians) { - continue; - } - - const Real D0 = dphi0[i0]; - const Real D1 = dphi1[i1]; - const Real D2 = dphi2[i2]; - const Real D3 = dphi3[i3]; - - if (need_gradients) { - const Real dl0 = D0 * v1 * v2 * v3; - const Real dl1 = v0 * D1 * v2 * v3; - const Real dl2 = v0 * v1 * D2 * v3; - const Real dl3 = v0 * v1 * v2 * D3; - sink.write_gradient(n_idx, dl1 - dl0, dl2 - dl0, dl3 - dl0); - } - - if (need_hessians) { - const Real DD0 = d2phi0[i0]; - const Real DD1 = d2phi1[i1]; - const Real DD2 = d2phi2[i2]; - const Real DD3 = d2phi3[i3]; - - const Real H00 = DD0 * v1 * v2 * v3; - const Real H11 = v0 * DD1 * v2 * v3; - const Real H22 = v0 * v1 * DD2 * v3; - const Real H33 = v0 * v1 * v2 * DD3; - - const Real H01 = D0 * D1 * v2 * v3; - const Real H02 = D0 * v1 * D2 * v3; - const Real H03 = D0 * v1 * v2 * D3; - const Real H12 = v0 * D1 * D2 * v3; - const Real H13 = v0 * D1 * v2 * D3; - const Real H23 = v0 * v1 * D2 * D3; - - sink.write_hessian(n_idx, - H00 - Real(2) * H01 + H11, - H00 - Real(2) * H02 + H22, - H00 - Real(2) * H03 + H33, - H00 - H01 - H02 + H12, - H00 - H01 - H03 + H13, - H00 - H02 - H03 + H23); - } - } -} - -void evaluate_tetrahedron_simplex_basis(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians) { - const SimplexVectorSink sink{values, gradients, hessians}; - evaluate_tetrahedron_simplex_basis_impl(simplex_exponents, order, xi, sink); -} - -void evaluate_tetrahedron_simplex_basis_to(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const SimplexRawSink sink{values_out, gradients_out, hessians_out}; - evaluate_tetrahedron_simplex_basis_impl(simplex_exponents, order, xi, sink); -} - -void evaluate_tetrahedron_simplex_basis_strided( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) { - const std::size_t num_nodes = simplex_exponents.size(); - if (points.empty() || num_nodes == 0u) { - return; - } - - const std::size_t sequence_size = static_cast(order + 1); - const std::size_t num_qpts = points.size(); - const bool need_gradients = gradients_out != nullptr; - const bool need_hessians = hessians_out != nullptr; - if (num_qpts == 4u && - values_out != nullptr && - !need_gradients && - !need_hessians && - try_evaluate_tetrahedron_simplex_values_q4( - simplex_exponents, order, points, output_stride, values_out)) { - return; - } - if (num_qpts == 4u && - values_out == nullptr && - need_gradients && - !need_hessians) { - switch (order) { - case 3: - evaluate_tetrahedron_simplex_gradients_q4<3>( - simplex_exponents, points, output_stride, gradients_out); - return; - case 4: - evaluate_tetrahedron_simplex_gradients_q4<4>( - simplex_exponents, points, output_stride, gradients_out); - return; - case 5: - evaluate_tetrahedron_simplex_gradients_q4<5>( - simplex_exponents, points, output_stride, gradients_out); - return; - case 6: - evaluate_tetrahedron_simplex_gradients_q4<6>( - simplex_exponents, points, output_stride, gradients_out); - return; - case 7: - evaluate_tetrahedron_simplex_gradients_q4<7>( - simplex_exponents, points, output_stride, gradients_out); - return; - case 8: - evaluate_tetrahedron_simplex_gradients_q4<8>( - simplex_exponents, points, output_stride, gradients_out); - return; - default: - break; - } - } - if (num_qpts == 4u && - need_hessians && - try_evaluate_tetrahedron_simplex_hessian_outputs_q4( - simplex_exponents, order, points, output_stride, - values_out, gradients_out, hessians_out)) { - return; - } - const std::size_t batch_entries = sequence_size * num_qpts; - if (batch_entries <= kFixedSimplexBatchEntries) { - if (values_out != nullptr && gradients_out == nullptr && hessians_out == nullptr) { - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - std::array phi3_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - const std::size_t offset = q * sequence_size; - simplex_lagrange_factor_sequence( - order, l0, phi0_batch.data() + offset, nullptr, nullptr); - simplex_lagrange_factor_sequence( - order, l1, phi1_batch.data() + offset, nullptr, nullptr); - simplex_lagrange_factor_sequence( - order, l2, phi2_batch.data() + offset, nullptr, nullptr); - simplex_lagrange_factor_sequence( - order, l3, phi3_batch.data() + offset, nullptr, nullptr); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real* value_row = values_out + node * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - value_row[q] = - phi0_batch[offset + i0] * - phi1_batch[offset + i1] * - phi2_batch[offset + i2] * - phi3_batch[offset + i3]; - } - } - return; - } - - if (values_out == nullptr && gradients_out != nullptr && hessians_out == nullptr) { - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - std::array phi3_batch; - std::array dphi0_batch; - std::array dphi1_batch; - std::array dphi2_batch; - std::array dphi3_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - const std::size_t offset = q * sequence_size; - simplex_lagrange_factor_sequence( - order, l0, phi0_batch.data() + offset, dphi0_batch.data() + offset, nullptr); - simplex_lagrange_factor_sequence( - order, l1, phi1_batch.data() + offset, dphi1_batch.data() + offset, nullptr); - simplex_lagrange_factor_sequence( - order, l2, phi2_batch.data() + offset, dphi2_batch.data() + offset, nullptr); - simplex_lagrange_factor_sequence( - order, l3, phi3_batch.data() + offset, dphi3_batch.data() + offset, nullptr); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real* g = gradients_out + node * 3u * output_stride; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - const Real v0 = phi0_batch[offset + i0]; - const Real v1 = phi1_batch[offset + i1]; - const Real v2 = phi2_batch[offset + i2]; - const Real v3 = phi3_batch[offset + i3]; - const Real D0 = dphi0_batch[offset + i0]; - const Real D1 = dphi1_batch[offset + i1]; - const Real D2 = dphi2_batch[offset + i2]; - const Real D3 = dphi3_batch[offset + i3]; - const Real v23 = v2 * v3; - const Real dl0 = D0 * v1 * v23; - g[0u * output_stride + q] = v0 * D1 * v23 - dl0; - g[1u * output_stride + q] = v0 * v1 * D2 * v3 - dl0; - g[2u * output_stride + q] = v0 * v1 * v2 * D3 - dl0; - } - } - return; - } - - std::array phi0_batch; - std::array phi1_batch; - std::array phi2_batch; - std::array phi3_batch; - std::array dphi0_batch; - std::array dphi1_batch; - std::array dphi2_batch; - std::array dphi3_batch; - std::array d2phi0_batch; - std::array d2phi1_batch; - std::array d2phi2_batch; - std::array d2phi3_batch; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - const std::size_t offset = q * sequence_size; - Real* d0_out = (need_gradients || need_hessians) ? dphi0_batch.data() + offset : nullptr; - Real* d1_out = (need_gradients || need_hessians) ? dphi1_batch.data() + offset : nullptr; - Real* d2_out = (need_gradients || need_hessians) ? dphi2_batch.data() + offset : nullptr; - Real* d3_out = (need_gradients || need_hessians) ? dphi3_batch.data() + offset : nullptr; - Real* d20_out = need_hessians ? d2phi0_batch.data() + offset : nullptr; - Real* d21_out = need_hessians ? d2phi1_batch.data() + offset : nullptr; - Real* d22_out = need_hessians ? d2phi2_batch.data() + offset : nullptr; - Real* d23_out = need_hessians ? d2phi3_batch.data() + offset : nullptr; - simplex_lagrange_factor_sequence(order, l0, phi0_batch.data() + offset, d0_out, d20_out); - simplex_lagrange_factor_sequence(order, l1, phi1_batch.data() + offset, d1_out, d21_out); - simplex_lagrange_factor_sequence(order, l2, phi2_batch.data() + offset, d2_out, d22_out); - simplex_lagrange_factor_sequence(order, l3, phi3_batch.data() + offset, d3_out, d23_out); - } - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - Real* value_row = values_out ? values_out + node * output_stride : nullptr; - Real* g = gradients_out ? gradients_out + node * 3u * output_stride : nullptr; - Real* H = hessians_out ? hessians_out + node * 9u * output_stride : nullptr; - - for (std::size_t q = 0; q < num_qpts; ++q) { - const std::size_t offset = q * sequence_size; - const Real v0 = phi0_batch[offset + i0]; - const Real v1 = phi1_batch[offset + i1]; - const Real v2 = phi2_batch[offset + i2]; - const Real v3 = phi3_batch[offset + i3]; - if (value_row != nullptr) { - value_row[q] = v0 * v1 * v2 * v3; - } - if (!need_gradients && !need_hessians) { - continue; - } - - const Real D0 = dphi0_batch[offset + i0]; - const Real D1 = dphi1_batch[offset + i1]; - const Real D2 = dphi2_batch[offset + i2]; - const Real D3 = dphi3_batch[offset + i3]; - - if (gradients_out != nullptr) { - const Real dl0 = D0 * v1 * v2 * v3; - const Real dl1 = v0 * D1 * v2 * v3; - const Real dl2 = v0 * v1 * D2 * v3; - const Real dl3 = v0 * v1 * v2 * D3; - g[0u * output_stride + q] = dl1 - dl0; - g[1u * output_stride + q] = dl2 - dl0; - g[2u * output_stride + q] = dl3 - dl0; - } - - if (hessians_out != nullptr) { - const Real DD0 = d2phi0_batch[offset + i0]; - const Real DD1 = d2phi1_batch[offset + i1]; - const Real DD2 = d2phi2_batch[offset + i2]; - const Real DD3 = d2phi3_batch[offset + i3]; - const Real H00 = DD0 * v1 * v2 * v3; - const Real H11 = v0 * DD1 * v2 * v3; - const Real H22 = v0 * v1 * DD2 * v3; - const Real H33 = v0 * v1 * v2 * DD3; - const Real H01 = D0 * D1 * v2 * v3; - const Real H02 = D0 * v1 * D2 * v3; - const Real H03 = D0 * v1 * v2 * D3; - const Real H12 = v0 * D1 * D2 * v3; - const Real H13 = v0 * D1 * v2 * D3; - const Real H23 = v0 * v1 * D2 * D3; - const Real h01 = H00 - H01 - H02 + H12; - const Real h02 = H00 - H01 - H03 + H13; - const Real h12 = H00 - H02 - H03 + H23; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = h01; - H[2u * output_stride + q] = h02; - H[3u * output_stride + q] = h01; - H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; - H[5u * output_stride + q] = h12; - H[6u * output_stride + q] = h02; - H[7u * output_stride + q] = h12; - H[8u * output_stride + q] = H00 - Real(2) * H03 + H33; - } - } - } - return; - } - - SimplexAxisScratch& s0 = simplex_axis_scratch_slot(0); - SimplexAxisScratch& s1 = simplex_axis_scratch_slot(1); - SimplexAxisScratch& s2 = simplex_axis_scratch_slot(2); - SimplexAxisScratch& s3 = simplex_axis_scratch_slot(3); - s0.reserveFor(sequence_size); - s1.reserveFor(sequence_size); - s2.reserveFor(sequence_size); - s3.reserveFor(sequence_size); - - for (std::size_t q = 0; q < points.size(); ++q) { - const auto& xi = points[q]; - const Real l1 = xi[0]; - const Real l2 = xi[1]; - const Real l3 = xi[2]; - const Real l0 = Real(1) - l1 - l2 - l3; - - Real* d0_out = (need_gradients || need_hessians) ? s0.dphi() : nullptr; - Real* d1_out = (need_gradients || need_hessians) ? s1.dphi() : nullptr; - Real* d2_out = (need_gradients || need_hessians) ? s2.dphi() : nullptr; - Real* d3_out = (need_gradients || need_hessians) ? s3.dphi() : nullptr; - Real* d20_out = need_hessians ? s0.d2phi() : nullptr; - Real* d21_out = need_hessians ? s1.d2phi() : nullptr; - Real* d22_out = need_hessians ? s2.d2phi() : nullptr; - Real* d23_out = need_hessians ? s3.d2phi() : nullptr; - - simplex_lagrange_factor_sequence(order, l0, s0.phi(), d0_out, d20_out); - simplex_lagrange_factor_sequence(order, l1, s1.phi(), d1_out, d21_out); - simplex_lagrange_factor_sequence(order, l2, s2.phi(), d2_out, d22_out); - simplex_lagrange_factor_sequence(order, l3, s3.phi(), d3_out, d23_out); - const Real* phi0 = s0.phi(); - const Real* phi1 = s1.phi(); - const Real* phi2 = s2.phi(); - const Real* phi3 = s3.phi(); - const Real* dphi0 = s0.dphi(); - const Real* dphi1 = s1.dphi(); - const Real* dphi2 = s2.dphi(); - const Real* dphi3 = s3.dphi(); - const Real* d2phi0 = s0.d2phi(); - const Real* d2phi1 = s1.d2phi(); - const Real* d2phi2 = s2.d2phi(); - const Real* d2phi3 = s3.d2phi(); - - for (std::size_t node = 0; node < num_nodes; ++node) { - const auto& e = simplex_exponents[node]; - const std::size_t i0 = static_cast(e[0]); - const std::size_t i1 = static_cast(e[1]); - const std::size_t i2 = static_cast(e[2]); - const std::size_t i3 = static_cast(e[3]); - - const Real v0 = phi0[i0]; - const Real v1 = phi1[i1]; - const Real v2 = phi2[i2]; - const Real v3 = phi3[i3]; - if (values_out != nullptr) { - values_out[node * output_stride + q] = v0 * v1 * v2 * v3; - } - if (!need_gradients && !need_hessians) { - continue; - } - - const Real D0 = dphi0[i0]; - const Real D1 = dphi1[i1]; - const Real D2 = dphi2[i2]; - const Real D3 = dphi3[i3]; - - if (gradients_out != nullptr) { - const Real dl0 = D0 * v1 * v2 * v3; - const Real dl1 = v0 * D1 * v2 * v3; - const Real dl2 = v0 * v1 * D2 * v3; - const Real dl3 = v0 * v1 * v2 * D3; - Real* g = gradients_out + node * 3u * output_stride; - g[0u * output_stride + q] = dl1 - dl0; - g[1u * output_stride + q] = dl2 - dl0; - g[2u * output_stride + q] = dl3 - dl0; - } - - if (hessians_out != nullptr) { - const Real DD0 = d2phi0[i0]; - const Real DD1 = d2phi1[i1]; - const Real DD2 = d2phi2[i2]; - const Real DD3 = d2phi3[i3]; - - const Real H00 = DD0 * v1 * v2 * v3; - const Real H11 = v0 * DD1 * v2 * v3; - const Real H22 = v0 * v1 * DD2 * v3; - const Real H33 = v0 * v1 * v2 * DD3; - - const Real H01 = D0 * D1 * v2 * v3; - const Real H02 = D0 * v1 * D2 * v3; - const Real H03 = D0 * v1 * v2 * D3; - const Real H12 = v0 * D1 * D2 * v3; - const Real H13 = v0 * D1 * v2 * D3; - const Real H23 = v0 * v1 * D2 * D3; - - const Real h01 = H00 - H01 - H02 + H12; - const Real h02 = H00 - H01 - H03 + H13; - const Real h12 = H00 - H02 - H03 + H23; - - Real* H = hessians_out + node * 9u * output_stride; - H[0u * output_stride + q] = H00 - Real(2) * H01 + H11; - H[1u * output_stride + q] = h01; - H[2u * output_stride + q] = h02; - H[3u * output_stride + q] = h01; - H[4u * output_stride + q] = H00 - Real(2) * H02 + H22; - H[5u * output_stride + q] = h12; - H[6u * output_stride + q] = h02; - H[7u * output_stride + q] = h12; - H[8u * output_stride + q] = H00 - Real(2) * H03 + H33; - } - } - } -} - -} // namespace detail -} // namespace basis -} // namespace FE -} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.h b/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.h deleted file mode 100644 index 19cf725bd..000000000 --- a/Code/Source/solver/FE/Basis/LagrangeBasisSimplex.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef SVMP_FE_BASIS_LAGRANGEBASISSIMPLEX_H -#define SVMP_FE_BASIS_LAGRANGEBASISSIMPLEX_H - -// Private declarations for simplex Lagrange evaluation helpers implemented in -// LagrangeBasisSimplex.cpp. - -#include "BasisFunction.h" - -#include -#include -#include - -namespace svmp { -namespace FE { -namespace basis { -namespace detail { - -void evaluate_triangle_simplex_basis(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians); - -void evaluate_triangle_simplex_basis_to(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out); - -void evaluate_triangle_simplex_basis_strided( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out); - -void evaluate_triangle_simplex_basis_wedge_components_strided( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_xy_out, - Real* SVMP_RESTRICT hessians_xx_xy_yy_out); - -void evaluate_tetrahedron_simplex_basis(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - std::vector* values, - std::vector* gradients, - std::vector* hessians); - -void evaluate_tetrahedron_simplex_basis_to(const std::vector>& simplex_exponents, - int order, - const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out); - -void evaluate_tetrahedron_simplex_basis_strided( - const std::vector>& simplex_exponents, - int order, - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out); - -} // namespace detail -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_LAGRANGEBASISSIMPLEX_H diff --git a/Code/Source/solver/FE/Basis/LagrangeBasisUtility.h b/Code/Source/solver/FE/Basis/LagrangeBasisUtility.h deleted file mode 100644 index e622de1c6..000000000 --- a/Code/Source/solver/FE/Basis/LagrangeBasisUtility.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef SVMP_FE_BASIS_LAGRANGEBASISUTILITY_H -#define SVMP_FE_BASIS_LAGRANGEBASISUTILITY_H - -// Private helper for LagrangeBasis internals. -// This header is only intended to be included after the FE basis scalar types -// are already available. - -namespace svmp { -namespace FE { -namespace basis { -namespace detail { - -inline constexpr Real equispaced_pm_one_coord(int i, int order) { - if (order <= 0) { - return Real(0); - } - return Real(-1) + Real(2) * static_cast(i) / static_cast(order); -} - -} // namespace detail -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_LAGRANGEBASISUTILITY_H diff --git a/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp b/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp index 20f743916..ae3ea8ed3 100644 --- a/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp +++ b/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp @@ -6,8 +6,8 @@ */ #include "NodeOrderingConventions.h" -#include "Basis/BasisExceptions.h" -#include "Basis/BasisTraits.h" +#include "BasisExceptions.h" +#include "BasisTraits.h" #include @@ -18,160 +18,7 @@ namespace basis { namespace { using Point = math::Vector; -using RawPoint = std::array; -template -using NodeTable = std::array; - -struct NodeTableView { - const RawPoint* data{nullptr}; - std::size_t size{0}; -}; - -inline constexpr NodeTable<2> kLine2Nodes = {{ - {Real(-1), Real(0), Real(0)}, - {Real(1), Real(0), Real(0)}, -}}; - -inline constexpr NodeTable<3> kLine3Nodes = {{ - {Real(-1), Real(0), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(0), Real(0)}, -}}; - -inline constexpr NodeTable<3> kTriangle3Nodes = {{ - {Real(0), Real(0), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, -}}; - -inline constexpr NodeTable<6> kTriangle6Nodes = {{ - {Real(0), Real(0), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, - {Real(0.5), Real(0), Real(0)}, - {Real(0.5), Real(0.5), Real(0)}, - {Real(0), Real(0.5), Real(0)}, -}}; - -inline constexpr NodeTable<4> kQuad4Nodes = {{ - {Real(-1), Real(-1), Real(0)}, - {Real(1), Real(-1), Real(0)}, - {Real(1), Real(1), Real(0)}, - {Real(-1), Real(1), Real(0)}, -}}; - -inline constexpr NodeTable<9> kQuad9Nodes = {{ - {Real(-1), Real(-1), Real(0)}, - {Real(1), Real(-1), Real(0)}, - {Real(1), Real(1), Real(0)}, - {Real(-1), Real(1), Real(0)}, - {Real(0), Real(-1), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, - {Real(-1), Real(0), Real(0)}, - {Real(0), Real(0), Real(0)}, -}}; - -inline constexpr NodeTable<8> kQuad8Nodes = {{ - {Real(-1), Real(-1), Real(0)}, - {Real(1), Real(-1), Real(0)}, - {Real(1), Real(1), Real(0)}, - {Real(-1), Real(1), Real(0)}, - {Real(0), Real(-1), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, - {Real(-1), Real(0), Real(0)}, -}}; - -inline constexpr NodeTable<4> kTetra4Nodes = {{ - {Real(0), Real(0), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, - {Real(0), Real(0), Real(1)}, -}}; - -inline constexpr NodeTable<10> kTetra10Nodes = {{ - {Real(0), Real(0), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, - {Real(0), Real(0), Real(1)}, - {Real(0.5), Real(0), Real(0)}, - {Real(0.5), Real(0.5), Real(0)}, - {Real(0), Real(0.5), Real(0)}, - {Real(0), Real(0), Real(0.5)}, - {Real(0.5), Real(0), Real(0.5)}, - {Real(0), Real(0.5), Real(0.5)}, -}}; - -inline constexpr NodeTable<8> kHex8Nodes = {{ - {Real(-1), Real(-1), Real(-1)}, - {Real(1), Real(-1), Real(-1)}, - {Real(1), Real(1), Real(-1)}, - {Real(-1), Real(1), Real(-1)}, - {Real(-1), Real(-1), Real(1)}, - {Real(1), Real(-1), Real(1)}, - {Real(1), Real(1), Real(1)}, - {Real(-1), Real(1), Real(1)}, -}}; - -inline constexpr NodeTable<27> kHex27Nodes = {{ - {Real(-1), Real(-1), Real(-1)}, - {Real(1), Real(-1), Real(-1)}, - {Real(1), Real(1), Real(-1)}, - {Real(-1), Real(1), Real(-1)}, - {Real(-1), Real(-1), Real(1)}, - {Real(1), Real(-1), Real(1)}, - {Real(1), Real(1), Real(1)}, - {Real(-1), Real(1), Real(1)}, - {Real(0), Real(-1), Real(-1)}, - {Real(1), Real(0), Real(-1)}, - {Real(0), Real(1), Real(-1)}, - {Real(-1), Real(0), Real(-1)}, - {Real(0), Real(-1), Real(1)}, - {Real(1), Real(0), Real(1)}, - {Real(0), Real(1), Real(1)}, - {Real(-1), Real(0), Real(1)}, - {Real(-1), Real(-1), Real(0)}, - {Real(1), Real(-1), Real(0)}, - {Real(1), Real(1), Real(0)}, - {Real(-1), Real(1), Real(0)}, - {Real(0), Real(0), Real(-1)}, - {Real(0), Real(0), Real(1)}, - {Real(0), Real(-1), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, - {Real(-1), Real(0), Real(0)}, - {Real(0), Real(0), Real(0)}, -}}; - -inline constexpr NodeTable<20> kHex20Nodes = {{ - {Real(-1), Real(-1), Real(-1)}, - {Real(1), Real(-1), Real(-1)}, - {Real(1), Real(1), Real(-1)}, - {Real(-1), Real(1), Real(-1)}, - {Real(-1), Real(-1), Real(1)}, - {Real(1), Real(-1), Real(1)}, - {Real(1), Real(1), Real(1)}, - {Real(-1), Real(1), Real(1)}, - {Real(0), Real(-1), Real(-1)}, - {Real(1), Real(0), Real(-1)}, - {Real(0), Real(1), Real(-1)}, - {Real(-1), Real(0), Real(-1)}, - {Real(0), Real(-1), Real(1)}, - {Real(1), Real(0), Real(1)}, - {Real(0), Real(1), Real(1)}, - {Real(-1), Real(0), Real(1)}, - {Real(-1), Real(-1), Real(0)}, - {Real(1), Real(-1), Real(0)}, - {Real(1), Real(1), Real(0)}, - {Real(-1), Real(1), Real(0)}, -}}; - -// Mesh uses conventional Hex20 ordering: corners first, then edge midpoints in -// {bottom, top, vertical} groups. The quadratic Hex20 serendipity polynomial -// table uses an axis-grouped edge order. This maps public mesh/reference index -// to the internal polynomial-table index. constexpr std::array kHex20MeshToBasisOrder = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 13, 10, 12, @@ -179,157 +26,6 @@ constexpr std::array kHex20MeshToBasisOrder = { 16, 17, 19, 18 }; -inline constexpr NodeTable<6> kWedge6Nodes = {{ - {Real(0), Real(0), Real(-1)}, - {Real(1), Real(0), Real(-1)}, - {Real(0), Real(1), Real(-1)}, - {Real(0), Real(0), Real(1)}, - {Real(1), Real(0), Real(1)}, - {Real(0), Real(1), Real(1)}, -}}; - -inline constexpr NodeTable<18> kWedge18Nodes = {{ - {Real(0), Real(0), Real(-1)}, - {Real(1), Real(0), Real(-1)}, - {Real(0), Real(1), Real(-1)}, - {Real(0), Real(0), Real(1)}, - {Real(1), Real(0), Real(1)}, - {Real(0), Real(1), Real(1)}, - {Real(0.5), Real(0), Real(-1)}, - {Real(0.5), Real(0.5), Real(-1)}, - {Real(0), Real(0.5), Real(-1)}, - {Real(0.5), Real(0), Real(1)}, - {Real(0.5), Real(0.5), Real(1)}, - {Real(0), Real(0.5), Real(1)}, - {Real(0), Real(0), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, - {Real(0.5), Real(0), Real(0)}, - {Real(0.5), Real(0.5), Real(0)}, - {Real(0), Real(0.5), Real(0)}, -}}; - -inline constexpr NodeTable<15> kWedge15Nodes = {{ - {Real(0), Real(0), Real(-1)}, - {Real(1), Real(0), Real(-1)}, - {Real(0), Real(1), Real(-1)}, - {Real(0), Real(0), Real(1)}, - {Real(1), Real(0), Real(1)}, - {Real(0), Real(1), Real(1)}, - {Real(0.5), Real(0), Real(-1)}, - {Real(0.5), Real(0.5), Real(-1)}, - {Real(0), Real(0.5), Real(-1)}, - {Real(0.5), Real(0), Real(1)}, - {Real(0.5), Real(0.5), Real(1)}, - {Real(0), Real(0.5), Real(1)}, - {Real(0), Real(0), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, -}}; - -inline constexpr NodeTable<5> kPyramid5Nodes = {{ - {Real(-1), Real(-1), Real(0)}, - {Real(1), Real(-1), Real(0)}, - {Real(1), Real(1), Real(0)}, - {Real(-1), Real(1), Real(0)}, - {Real(0), Real(0), Real(1)}, -}}; - -inline constexpr NodeTable<14> kPyramid14Nodes = {{ - {Real(-1), Real(-1), Real(0)}, - {Real(1), Real(-1), Real(0)}, - {Real(1), Real(1), Real(0)}, - {Real(-1), Real(1), Real(0)}, - {Real(0), Real(0), Real(1)}, - {Real(0), Real(-1), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, - {Real(-1), Real(0), Real(0)}, - {Real(-0.5), Real(-0.5), Real(0.5)}, - {Real(0.5), Real(-0.5), Real(0.5)}, - {Real(0.5), Real(0.5), Real(0.5)}, - {Real(-0.5), Real(0.5), Real(0.5)}, - {Real(0), Real(0), Real(0)}, -}}; - -inline constexpr NodeTable<13> kPyramid13Nodes = {{ - {Real(-1), Real(-1), Real(0)}, - {Real(1), Real(-1), Real(0)}, - {Real(1), Real(1), Real(0)}, - {Real(-1), Real(1), Real(0)}, - {Real(0), Real(0), Real(1)}, - {Real(0), Real(-1), Real(0)}, - {Real(1), Real(0), Real(0)}, - {Real(0), Real(1), Real(0)}, - {Real(-1), Real(0), Real(0)}, - {Real(-0.5), Real(-0.5), Real(0.5)}, - {Real(0.5), Real(-0.5), Real(0.5)}, - {Real(0.5), Real(0.5), Real(0.5)}, - {Real(-0.5), Real(0.5), Real(0.5)}, -}}; - -template -constexpr NodeTableView view(const NodeTable& table) noexcept { - return NodeTableView{table.data(), table.size()}; -} - -Point to_point(const RawPoint& raw) { - return Point{raw[0], raw[1], raw[2]}; -} - -constexpr NodeTableView fixed_node_table(ElementType elem_type) noexcept { - switch (elem_type) { - case ElementType::Line2: return view(kLine2Nodes); - case ElementType::Line3: return view(kLine3Nodes); - case ElementType::Triangle3: return view(kTriangle3Nodes); - case ElementType::Triangle6: return view(kTriangle6Nodes); - case ElementType::Quad4: return view(kQuad4Nodes); - case ElementType::Quad8: return view(kQuad8Nodes); - case ElementType::Quad9: return view(kQuad9Nodes); - case ElementType::Tetra4: return view(kTetra4Nodes); - case ElementType::Tetra10: return view(kTetra10Nodes); - case ElementType::Hex8: return view(kHex8Nodes); - case ElementType::Hex20: return view(kHex20Nodes); - case ElementType::Hex27: return view(kHex27Nodes); - case ElementType::Wedge6: return view(kWedge6Nodes); - case ElementType::Wedge15: return view(kWedge15Nodes); - case ElementType::Wedge18: return view(kWedge18Nodes); - case ElementType::Pyramid5: return view(kPyramid5Nodes); - case ElementType::Pyramid13: return view(kPyramid13Nodes); - case ElementType::Pyramid14: return view(kPyramid14Nodes); - default: return {}; - } -} - -constexpr NodeTableView fixed_complete_lagrange_table(ElementType canonical_type, - int order) noexcept { - switch (canonical_type) { - case ElementType::Line2: - return order == 1 ? view(kLine2Nodes) : - order == 2 ? view(kLine3Nodes) : NodeTableView{}; - case ElementType::Triangle3: - return order == 1 ? view(kTriangle3Nodes) : - order == 2 ? view(kTriangle6Nodes) : NodeTableView{}; - case ElementType::Quad4: - return order == 1 ? view(kQuad4Nodes) : - order == 2 ? view(kQuad9Nodes) : NodeTableView{}; - case ElementType::Tetra4: - return order == 1 ? view(kTetra4Nodes) : - order == 2 ? view(kTetra10Nodes) : NodeTableView{}; - case ElementType::Hex8: - return order == 1 ? view(kHex8Nodes) : - order == 2 ? view(kHex27Nodes) : NodeTableView{}; - case ElementType::Wedge6: - return order == 1 ? view(kWedge6Nodes) : - order == 2 ? view(kWedge18Nodes) : NodeTableView{}; - case ElementType::Pyramid5: - return order == 1 ? view(kPyramid5Nodes) : - order == 2 ? view(kPyramid14Nodes) : NodeTableView{}; - default: - return {}; - } -} - Real line_coord_pm_one(int i, int order) { if (order <= 0) { return Real(0); @@ -352,10 +48,10 @@ void append_triangle_face_interior(std::vector& nodes, for (int c = 1; c <= order - 2; ++c) { for (int b = 1; b <= order - c - 1; ++b) { const int a = order - b - c; - const Real la = static_cast(a) / static_cast(order); - const Real lb = static_cast(b) / static_cast(order); - const Real lc = static_cast(c) / static_cast(order); - nodes.push_back(v0 * la + v1 * lb + v2 * lc); + const Real inv = Real(1) / Real(order); + nodes.push_back(v0 * (Real(a) * inv) + + v1 * (Real(b) * inv) + + v2 * (Real(c) * inv)); } } } @@ -382,7 +78,6 @@ std::vector generate_triangle_nodes(int order) { std::vector nodes; nodes.reserve(static_cast((order + 1) * (order + 2) / 2)); - nodes.push_back(Point{Real(0), Real(0), Real(0)}); nodes.push_back(Point{Real(1), Real(0), Real(0)}); nodes.push_back(Point{Real(0), Real(1), Real(0)}); @@ -398,13 +93,11 @@ std::vector generate_triangle_nodes(int order) { nodes.push_back(Point{Real(0), line_coord_zero_one(order - m, order), Real(0)}); } - append_triangle_face_interior( - nodes, - Point{Real(0), Real(0), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - order); - + append_triangle_face_interior(nodes, + Point{Real(0), Real(0), Real(0)}, + Point{Real(1), Real(0), Real(0)}, + Point{Real(0), Real(1), Real(0)}, + order); return nodes; } @@ -415,7 +108,6 @@ std::vector generate_quad_nodes(int order) { std::vector nodes; nodes.reserve(static_cast((order + 1) * (order + 1))); - nodes.push_back(Point{Real(-1), Real(-1), Real(0)}); nodes.push_back(Point{Real(1), Real(-1), Real(0)}); nodes.push_back(Point{Real(1), Real(1), Real(0)}); @@ -433,13 +125,12 @@ std::vector generate_quad_nodes(int order) { for (int j = order - 1; j >= 1; --j) { nodes.push_back(Point{Real(-1), line_coord_pm_one(j, order), Real(0)}); } - for (int j = 1; j < order; ++j) { for (int i = 1; i < order; ++i) { - nodes.push_back(Point{line_coord_pm_one(i, order), line_coord_pm_one(j, order), Real(0)}); + nodes.push_back(Point{line_coord_pm_one(i, order), + line_coord_pm_one(j, order), Real(0)}); } } - return nodes; } @@ -448,22 +139,20 @@ std::vector generate_tetra_nodes(int order) { return {Point{Real(0.25), Real(0.25), Real(0.25)}}; } - std::vector nodes; - nodes.reserve(static_cast((order + 1) * (order + 2) * (order + 3) / 6)); - const Point verts[] = { Point{Real(0), Real(0), Real(0)}, Point{Real(1), Real(0), Real(0)}, Point{Real(0), Real(1), Real(0)}, Point{Real(0), Real(0), Real(1)}, }; + + std::vector nodes; + nodes.reserve(static_cast((order + 1) * (order + 2) * (order + 3) / 6)); for (const auto& v : verts) { nodes.push_back(v); } - const int edges[6][2] = { - {0, 1}, {1, 2}, {2, 0}, {0, 3}, {1, 3}, {2, 3} - }; + const int edges[6][2] = {{0, 1}, {1, 2}, {2, 0}, {0, 3}, {1, 3}, {2, 3}}; for (const auto& edge : edges) { for (int m = 1; m < order; ++m) { const Real t = static_cast(m) / static_cast(order); @@ -471,32 +160,24 @@ std::vector generate_tetra_nodes(int order) { } } - const int faces[4][3] = { - {0, 1, 2}, - {0, 1, 3}, - {1, 2, 3}, - {0, 2, 3}, - }; + const int faces[4][3] = {{0, 1, 2}, {0, 1, 3}, {1, 2, 3}, {0, 2, 3}}; for (const auto& face : faces) { - append_triangle_face_interior( - nodes, - verts[face[0]], - verts[face[1]], - verts[face[2]], - order); + append_triangle_face_interior(nodes, + verts[face[0]], + verts[face[1]], + verts[face[2]], + order); } for (int l = 1; l <= order - 3; ++l) { for (int k = 1; k <= order - l - 2; ++k) { for (int j = 1; j <= order - l - k - 1; ++j) { - const Real x = static_cast(j) / static_cast(order); - const Real y = static_cast(k) / static_cast(order); - const Real z = static_cast(l) / static_cast(order); - nodes.push_back(Point{x, y, z}); + nodes.push_back(Point{Real(j) / Real(order), + Real(k) / Real(order), + Real(l) / Real(order)}); } } } - return nodes; } @@ -505,9 +186,6 @@ std::vector generate_hex_nodes(int order) { return {Point{Real(0), Real(0), Real(0)}}; } - std::vector nodes; - nodes.reserve(static_cast((order + 1) * (order + 1) * (order + 1))); - const Point verts[] = { Point{Real(-1), Real(-1), Real(-1)}, Point{Real(1), Real(-1), Real(-1)}, @@ -518,6 +196,9 @@ std::vector generate_hex_nodes(int order) { Point{Real(1), Real(1), Real(1)}, Point{Real(-1), Real(1), Real(1)}, }; + + std::vector nodes; + nodes.reserve(static_cast((order + 1) * (order + 1) * (order + 1))); for (const auto& v : verts) { nodes.push_back(v); } @@ -564,7 +245,6 @@ std::vector generate_hex_nodes(int order) { nodes.push_back(Point{Real(-1), line_coord_pm_one(j, order), line_coord_pm_one(k, order)}); } } - for (int k = 1; k < order; ++k) { for (int j = 1; j < order; ++j) { for (int i = 1; i < order; ++i) { @@ -574,7 +254,6 @@ std::vector generate_hex_nodes(int order) { } } } - return nodes; } @@ -583,9 +262,6 @@ std::vector generate_wedge_nodes(int order) { return {Point{Real(1) / Real(3), Real(1) / Real(3), Real(0)}}; } - std::vector nodes; - nodes.reserve(static_cast((order + 1) * (order + 1) * (order + 2) / 2)); - const Point verts[] = { Point{Real(0), Real(0), Real(-1)}, Point{Real(1), Real(0), Real(-1)}, @@ -594,6 +270,9 @@ std::vector generate_wedge_nodes(int order) { Point{Real(1), Real(0), Real(1)}, Point{Real(0), Real(1), Real(1)}, }; + + std::vector nodes; + nodes.reserve(static_cast((order + 1) * (order + 1) * (order + 2) / 2)); for (const auto& v : verts) { nodes.push_back(v); } @@ -610,10 +289,8 @@ std::vector generate_wedge_nodes(int order) { } } - append_triangle_face_interior( - nodes, verts[0], verts[1], verts[2], order); - append_triangle_face_interior( - nodes, verts[3], verts[4], verts[5], order); + append_triangle_face_interior(nodes, verts[0], verts[1], verts[2], order); + append_triangle_face_interior(nodes, verts[3], verts[4], verts[5], order); for (int r = 1; r < order; ++r) { const Real z = line_coord_pm_one(r, order); @@ -635,138 +312,21 @@ std::vector generate_wedge_nodes(int order) { const Real z = line_coord_pm_one(r, order); for (int c = 1; c <= order - 2; ++c) { for (int b = 1; b <= order - c - 1; ++b) { - const Real x = static_cast(b) / static_cast(order); - const Real y = static_cast(c) / static_cast(order); - nodes.push_back(Point{x, y, z}); - } - } - } - - return nodes; -} - -std::vector generate_pyramid_nodes(int order) { - if (order == 0) { - return {Point{Real(0), Real(0), Real(0.25)}}; - } - - std::vector nodes; - nodes.reserve(static_cast((order + 1) * (order + 2) * (2 * order + 3) / 6)); - - nodes.push_back(Point{Real(-1), Real(-1), Real(0)}); - nodes.push_back(Point{Real(1), Real(-1), Real(0)}); - nodes.push_back(Point{Real(1), Real(1), Real(0)}); - nodes.push_back(Point{Real(-1), Real(1), Real(0)}); - nodes.push_back(Point{Real(0), Real(0), Real(1)}); - - for (int m = 1; m < order; ++m) { - nodes.push_back(Point{line_coord_pm_one(m, order), Real(-1), Real(0)}); - } - for (int m = 1; m < order; ++m) { - nodes.push_back(Point{Real(1), line_coord_pm_one(m, order), Real(0)}); - } - for (int m = order - 1; m >= 1; --m) { - nodes.push_back(Point{line_coord_pm_one(m, order), Real(1), Real(0)}); - } - for (int m = order - 1; m >= 1; --m) { - nodes.push_back(Point{Real(-1), line_coord_pm_one(m, order), Real(0)}); - } - - for (int level = 1; level < order; ++level) { - const Real z = static_cast(level) / static_cast(order); - const Real scale = Real(1) - z; - nodes.push_back(Point{-scale, -scale, z}); - nodes.push_back(Point{scale, -scale, z}); - nodes.push_back(Point{scale, scale, z}); - nodes.push_back(Point{-scale, scale, z}); - } - - for (int j = 1; j < order; ++j) { - for (int i = 1; i < order; ++i) { - nodes.push_back(Point{line_coord_pm_one(i, order), line_coord_pm_one(j, order), Real(0)}); - } - } - - for (int level = 1; level < order - 1; ++level) { - const int n = order - level; - const Real z = static_cast(level) / static_cast(order); - const Real scale = Real(1) - z; - - for (int m = 1; m < n; ++m) { - const Real s = line_coord_pm_one(m, n) * scale; - nodes.push_back(Point{s, -scale, z}); - } - for (int m = 1; m < n; ++m) { - const Real s = line_coord_pm_one(m, n) * scale; - nodes.push_back(Point{scale, s, z}); - } - for (int m = n - 1; m >= 1; --m) { - const Real s = line_coord_pm_one(m, n) * scale; - nodes.push_back(Point{s, scale, z}); - } - for (int m = n - 1; m >= 1; --m) { - const Real s = line_coord_pm_one(m, n) * scale; - nodes.push_back(Point{-scale, s, z}); - } - } - - for (int level = 1; level < order - 1; ++level) { - const int n = order - level; - const Real z = static_cast(level) / static_cast(order); - const Real scale = Real(1) - z; - for (int j = 1; j < n; ++j) { - for (int i = 1; i < n; ++i) { - nodes.push_back(Point{line_coord_pm_one(i, n) * scale, - line_coord_pm_one(j, n) * scale, + nodes.push_back(Point{Real(b) / Real(order), + Real(c) / Real(order), z}); } } } - return nodes; } -} // namespace - -math::Vector ReferenceNodeLayout::get_node_coords(ElementType elem_type, - std::size_t local_node) { - const auto table = fixed_node_table(elem_type); - if (table.data != nullptr && local_node < table.size) { - return to_point(table.data[local_node]); - } - - throw BasisNodeOrderingException("Invalid element type or node index in ReferenceNodeLayout::get_node_coords", - __FILE__, __LINE__, __func__); -} - -std::size_t ReferenceNodeLayout::num_nodes(ElementType elem_type) { - const auto table = fixed_node_table(elem_type); - if (table.data != nullptr) { - return table.size; - } - - throw BasisNodeOrderingException("Unknown element type in ReferenceNodeLayout::num_nodes", - __FILE__, __LINE__, __func__); -} - -std::vector> -ReferenceNodeLayout::get_lagrange_node_coords(ElementType canonical_type, int order) { +std::vector complete_lagrange_nodes(ElementType canonical_type, int order) { if (order < 0) { - throw BasisNodeOrderingException("ReferenceNodeLayout::get_lagrange_node_coords requires non-negative order", + throw BasisNodeOrderingException("ReferenceNodeLayout requires non-negative Lagrange order", __FILE__, __LINE__, __func__); } - const ElementType type = canonical_lagrange_type(canonical_type); - const auto fixed_table = fixed_complete_lagrange_table(type, order); - if (fixed_table.data != nullptr) { - std::vector nodes; - nodes.reserve(fixed_table.size); - for (std::size_t i = 0; i < fixed_table.size; ++i) { - nodes.push_back(to_point(fixed_table.data[i])); - } - return nodes; - } - switch (type) { case ElementType::Point1: return {Point{Real(0), Real(0), Real(0)}}; @@ -783,24 +343,70 @@ ReferenceNodeLayout::get_lagrange_node_coords(ElementType canonical_type, int or case ElementType::Wedge6: return generate_wedge_nodes(order); case ElementType::Pyramid5: - return generate_pyramid_nodes(order); - case ElementType::Quad8: - case ElementType::Hex20: - case ElementType::Wedge15: + throw BasisNodeOrderingException("ReferenceNodeLayout: pyramid node ordering is disabled", + __FILE__, __LINE__, __func__); + default: + throw BasisNodeOrderingException("ReferenceNodeLayout: unsupported Lagrange topology", + __FILE__, __LINE__, __func__); + } +} + +std::vector element_nodes(ElementType elem_type) { + const int order = complete_lagrange_alias_order(elem_type); + if (order >= 0) { + return complete_lagrange_nodes(elem_type, order); + } + + switch (elem_type) { + case ElementType::Quad8: { + auto nodes = generate_quad_nodes(2); + nodes.resize(8u); + return nodes; + } + case ElementType::Hex20: { + auto nodes = generate_hex_nodes(2); + nodes.resize(20u); + return nodes; + } + case ElementType::Wedge15: { + auto nodes = generate_wedge_nodes(2); + nodes.resize(15u); + return nodes; + } case ElementType::Pyramid13: - throw BasisNodeOrderingException("ReferenceNodeLayout::get_lagrange_node_coords does not support serendipity topologies", + throw BasisNodeOrderingException("ReferenceNodeLayout: pyramid node ordering is disabled", __FILE__, __LINE__, __func__); default: - throw BasisNodeOrderingException("ReferenceNodeLayout::get_lagrange_node_coords: unsupported topology", + throw BasisNodeOrderingException("ReferenceNodeLayout: unknown element type", __FILE__, __LINE__, __func__); } } +} // namespace + +math::Vector ReferenceNodeLayout::get_node_coords(ElementType elem_type, + std::size_t local_node) { + const auto nodes = element_nodes(elem_type); + if (local_node >= nodes.size()) { + throw BasisNodeOrderingException("ReferenceNodeLayout::get_node_coords: node index out of range", + __FILE__, __LINE__, __func__); + } + return nodes[local_node]; +} + +std::size_t ReferenceNodeLayout::num_nodes(ElementType elem_type) { + return element_nodes(elem_type).size(); +} + +std::vector> +ReferenceNodeLayout::get_lagrange_node_coords(ElementType canonical_type, int order) { + return complete_lagrange_nodes(canonical_type, order); +} + std::span ReferenceNodeLayout::mesh_to_basis_ordering(ElementType elem_type) { if (elem_type == ElementType::Hex20) { - return std::span( - kHex20MeshToBasisOrder.data(), - kHex20MeshToBasisOrder.size()); + return std::span(kHex20MeshToBasisOrder.data(), + kHex20MeshToBasisOrder.size()); } return {}; } diff --git a/Code/Source/solver/FE/Basis/NodeOrderingConventions.h b/Code/Source/solver/FE/Basis/NodeOrderingConventions.h index 52af4d932..8a43cc4e3 100644 --- a/Code/Source/solver/FE/Basis/NodeOrderingConventions.h +++ b/Code/Source/solver/FE/Basis/NodeOrderingConventions.h @@ -8,526 +8,28 @@ #ifndef SVMP_FE_BASIS_NODEORDERINGCONVENTIONS_H #define SVMP_FE_BASIS_NODEORDERINGCONVENTIONS_H -#include "Types.h" #include "Math/Vector.h" -#include -#include - -/** - * @file NodeOrderingConventions.h - * @brief Documentation of node ordering conventions for all element types - * - * This file provides comprehensive documentation of the node ordering - * conventions used throughout the FE library. These orderings are consistent - * with VTK conventions and must be matched exactly when interfacing with - * the Mesh library. - * - * IMPORTANT: The FE library (Basis, Quadrature, Geometry) uses "node" to refer - * to degrees of freedom locations on reference elements. The Mesh library uses - * "vertex" for geometry vertices and "cell" for mesh elements. When interfacing - * between the two, ensure consistent ordering. - * - * Reference Element Conventions: - * - Line: xi in [-1, 1] - * - Quad: (xi, eta) in [-1, 1] x [-1, 1] - * - Hex: (xi, eta, zeta) in [-1, 1]^3 - * - Triangle: (xi, eta) in simplex with vertices (0,0), (1,0), (0,1) - * - Tetrahedron: (xi, eta, zeta) in simplex with vertices - * (0,0,0), (1,0,0), (0,1,0), (0,0,1) - * - Wedge: Triangle base x line height, zeta in [-1, 1] - * - Pyramid: Quad base at z=0, apex at (0, 0, 1) - * - * - * ============================================================================= - * 1D ELEMENTS - * ============================================================================= - * - * Line2 (Linear Line) - * ------------------- - * 0---------1 - * | | - * xi=-1 xi=+1 - * - * Node 0: xi = -1 - * Node 1: xi = +1 - * - * - * Line3 (Quadratic Line) - * ---------------------- - * 0----2----1 - * | | | - * xi=-1 0 xi=+1 - * - * Node 0: xi = -1 - * Node 1: xi = +1 - * Node 2: xi = 0 (mid-edge) - * - * - * ============================================================================= - * 2D QUADRILATERAL ELEMENTS - * ============================================================================= - * - * Quad4 (Bilinear Quadrilateral) - * ------------------------------ - * - * 3-----------2 - * | | - * | | - * | | - * 0-----------1 - * - * Node 0: (xi, eta) = (-1, -1) - * Node 1: (xi, eta) = (+1, -1) - * Node 2: (xi, eta) = (+1, +1) - * Node 3: (xi, eta) = (-1, +1) - * - * - * Quad8 (Serendipity Quadrilateral) - * --------------------------------- - * - * 3-----6-----2 - * | | - * 7 5 - * | | - * 0-----4-----1 - * - * Corners (same as Quad4): - * Node 0: (-1, -1) - * Node 1: (+1, -1) - * Node 2: (+1, +1) - * Node 3: (-1, +1) - * - * Mid-edge nodes: - * Node 4: ( 0, -1) (edge 0-1) - * Node 5: (+1, 0) (edge 1-2) - * Node 6: ( 0, +1) (edge 2-3) - * Node 7: (-1, 0) (edge 3-0) - * - * - * Quad9 (Biquadratic Quadrilateral) - * --------------------------------- - * - * 3-----6-----2 - * | | - * 7 8 5 - * | | - * 0-----4-----1 - * - * Same as Quad8 plus: - * Node 8: (0, 0) (center) - * - * - * ============================================================================= - * 3D HEXAHEDRAL ELEMENTS - * ============================================================================= - * - * Hex8 (Trilinear Hexahedron) - * --------------------------- - * - * 7-----------6 - * /| /| - * / | / | - * 4-----------5 | - * | | | | - * | 3--------|--2 - * | / | / - * |/ |/ - * 0-----------1 - * - * Bottom face (zeta = -1): - * Node 0: (xi, eta, zeta) = (-1, -1, -1) - * Node 1: (xi, eta, zeta) = (+1, -1, -1) - * Node 2: (xi, eta, zeta) = (+1, +1, -1) - * Node 3: (xi, eta, zeta) = (-1, +1, -1) - * - * Top face (zeta = +1): - * Node 4: (xi, eta, zeta) = (-1, -1, +1) - * Node 5: (xi, eta, zeta) = (+1, -1, +1) - * Node 6: (xi, eta, zeta) = (+1, +1, +1) - * Node 7: (xi, eta, zeta) = (-1, +1, +1) - * - * - * Hex20 (Serendipity Hexahedron) - * ------------------------------ - * - * 7-----14-----6 - * /| /| - * 15 | 13 | - * / 19 / 18 - * 4-----12-----5 | - * | | | | - * | 3-----10-|---2 - * 16 / 17 / - * | 11 | 9 - * |/ |/ - * 0------8-----1 - * - * Corners (same as Hex8): Nodes 0-7 - * - * Mid-edge nodes on bottom face (zeta = -1): - * Node 8: ( 0, -1, -1) (edge 0-1) - * Node 9: (+1, 0, -1) (edge 1-2) - * Node 10: ( 0, +1, -1) (edge 2-3) - * Node 11: (-1, 0, -1) (edge 3-0) - * - * Mid-edge nodes on top face (zeta = +1): - * Node 12: ( 0, -1, +1) (edge 4-5) - * Node 13: (+1, 0, +1) (edge 5-6) - * Node 14: ( 0, +1, +1) (edge 6-7) - * Node 15: (-1, 0, +1) (edge 7-4) - * - * Mid-edge nodes on vertical edges: - * Node 16: (-1, -1, 0) (edge 0-4) - * Node 17: (+1, -1, 0) (edge 1-5) - * Node 18: (+1, +1, 0) (edge 2-6) - * Node 19: (-1, +1, 0) (edge 3-7) - * - * - * Hex27 (Triquadratic Hexahedron) - * ------------------------------- - * Same as Hex20 plus face-center and body-center nodes: - * - * Face centers: - * Node 20: ( 0, 0, -1) (bottom face) - * Node 21: ( 0, 0, +1) (top face) - * Node 22: ( 0, -1, 0) (front face) - * Node 23: (+1, 0, 0) (right face) - * Node 24: ( 0, +1, 0) (back face) - * Node 25: (-1, 0, 0) (left face) - * - * Body center: - * Node 26: (0, 0, 0) - * - * - * ============================================================================= - * 2D TRIANGULAR ELEMENTS - * ============================================================================= - * - * Triangle3 (Linear Triangle) - * --------------------------- - * - * 2 - * |\ - * | \ - * | \ - * | \ - * 0----1 - * - * Reference: (xi, eta) simplex with vertices at: - * Node 0: (xi, eta) = (0, 0) - * Node 1: (xi, eta) = (1, 0) - * Node 2: (xi, eta) = (0, 1) - * - * - * Triangle6 (Quadratic Triangle) - * ------------------------------ - * - * 2 - * |\ - * | \ - * 5 4 - * | \ - * 0--3--1 - * - * Corners: Nodes 0-2 (same as Triangle3) - * - * Mid-edge nodes: - * Node 3: (0.5, 0) (edge 0-1) - * Node 4: (0.5, 0.5) (edge 1-2) - * Node 5: ( 0, 0.5) (edge 2-0) - * - * - * ============================================================================= - * 3D TETRAHEDRAL ELEMENTS - * ============================================================================= - * - * Tetrahedron4 (Linear Tetrahedron) - * --------------------------------- - * - * 3 - * /|\ - * / | \ - * / | \ - * / | \ - * / | \ - * 0-----|-----2 - * \ | / - * \ | / - * \ | / - * \ | / - * \|/ - * 1 - * - * Reference: (xi, eta, zeta) simplex with vertices at: - * Node 0: (0, 0, 0) - * Node 1: (1, 0, 0) - * Node 2: (0, 1, 0) - * Node 3: (0, 0, 1) - * - * - * Tetrahedron10 (Quadratic Tetrahedron) - * ------------------------------------- - * Corners: Nodes 0-3 (same as Tet4) - * - * Mid-edge nodes: - * Node 4: (0.5, 0, 0) (edge 0-1) - * Node 5: (0.5, 0.5, 0) (edge 1-2) - * Node 6: ( 0, 0.5, 0) (edge 2-0) - * Node 7: ( 0, 0, 0.5) (edge 0-3) - * Node 8: (0.5, 0, 0.5) (edge 1-3) - * Node 9: ( 0, 0.5, 0.5) (edge 2-3) - * - * - * ============================================================================= - * 3D WEDGE (PRISM) ELEMENTS - * ============================================================================= - * - * Wedge6 (Linear Wedge) - * --------------------- - * - * 5 - * /|\ - * / | \ - * / | \ - * 3---|---4 - * | 2 | - * | / \ | - * | / \ | - * |/ \| - * 0-------1 - * - * Reference: Triangle base at zeta = -1, top at zeta = +1 - * - * Bottom face (zeta = -1): - * Node 0: (0, 0, -1) - * Node 1: (1, 0, -1) - * Node 2: (0, 1, -1) - * - * Top face (zeta = +1): - * Node 3: (0, 0, +1) - * Node 4: (1, 0, +1) - * Node 5: (0, 1, +1) - * - * - * Wedge15 (Quadratic Wedge) - * ------------------------- - * Corners: Nodes 0-5 (same as Wedge6) - * - * Mid-edge nodes on bottom face: - * Node 6: (0.5, 0, -1) (edge 0-1) - * Node 7: (0.5, 0.5, -1) (edge 1-2) - * Node 8: ( 0, 0.5, -1) (edge 2-0) - * - * Mid-edge nodes on top face: - * Node 9: (0.5, 0, +1) (edge 3-4) - * Node 10: (0.5, 0.5, +1) (edge 4-5) - * Node 11: ( 0, 0.5, +1) (edge 5-3) - * - * Mid-edge nodes on vertical edges: - * Node 12: (0, 0, 0) (edge 0-3) - * Node 13: (1, 0, 0) (edge 1-4) - * Node 14: (0, 1, 0) (edge 2-5) - * - * - * Wedge18 (Complete Quadratic Wedge) - * ---------------------------------- - * Corners and mid-edges: Nodes 0-14 (same as Wedge15) - * - * Face-center nodes on quadrilateral faces: - * Node 15: (0.5, 0.0, 0.0) (face with vertices 0-1-4-3, y = 0) - * Node 16: (0.5, 0.5, 0.0) (face with vertices 1-2-5-4, x + y = 1) - * Node 17: (0.0, 0.5, 0.0) (face with vertices 2-0-3-5, x = 0) - * - * - * ============================================================================= - * 3D PYRAMID ELEMENTS - * ============================================================================= - * - * Pyramid5 (Linear Pyramid) - * ------------------------- - * - * 4 - * /|\ - * / | \ - * / | \ - * / | \ - * 3----|----2 - * | | | - * | + | (apex projects to center of base) - * | | - * 0---------1 - * - * Reference: Quad base in xi-eta plane at zeta = 0, apex at zeta = 1 - * - * Base (zeta = 0): - * Node 0: (-1, -1, 0) - * Node 1: (+1, -1, 0) - * Node 2: (+1, +1, 0) - * Node 3: (-1, +1, 0) - * - * Apex: - * Node 4: (0, 0, 1) - * - * - * Pyramid13 (Quadratic Pyramid) - * ----------------------------- - * Corners: Nodes 0-4 (same as Pyramid5) - * - * Mid-edge nodes on base: - * Node 5: ( 0, -1, 0) (edge 0-1) - * Node 6: (+1, 0, 0) (edge 1-2) - * Node 7: ( 0, +1, 0) (edge 2-3) - * Node 8: (-1, 0, 0) (edge 3-0) - * - * Mid-edge nodes to apex: - * Node 9: (-0.5, -0.5, 0.5) (edge 0-4) - * Node 10: (+0.5, -0.5, 0.5) (edge 1-4) - * Node 11: (+0.5, +0.5, 0.5) (edge 2-4) - * Node 12: (-0.5, +0.5, 0.5) (edge 3-4) - * - * - * Pyramid14 (Quadratic Rational Pyramid) - * -------------------------------------- - * - * This retained low-order compatibility layout matches the generated - * complete-family quadratic Lagrange ordering for the reference pyramid with - * base (-1,-1,0)..(1,1,0) and apex at (0,0,1). Nodes 0-12 coincide with the - * Pyramid13 layout; node 13 is the base center. - * - * Base corners (same as Pyramid5): - * Node 0: (-1, -1, 0) - * Node 1: (+1, -1, 0) - * Node 2: (+1, +1, 0) - * Node 3: (-1, +1, 0) - * - * Apex: - * Node 4: (0, 0, 1) - * - * Base mid-edges (same as Pyramid13): - * Node 5: ( 0, -1, 0) (edge 0-1) - * Node 6: (+1, 0, 0) (edge 1-2) - * Node 7: ( 0, +1, 0) (edge 2-3) - * Node 8: (-1, 0, 0) (edge 3-0) - * - * Mid-edges to apex (same as Pyramid13): - * Node 9: (-0.5, -0.5, 0.5) (edge 0-4) - * Node 10: (+0.5, -0.5, 0.5) (edge 1-4) - * Node 11: (+0.5, +0.5, 0.5) (edge 2-4) - * Node 12: (-0.5, +0.5, 0.5) (edge 3-4) - * - * Base center: - * Node 13: (0, 0, 0) - * - * - * ============================================================================= - * NOTES ON VTK COMPATIBILITY - * ============================================================================= - * - * The node orderings above are consistent with VTK cell types: - * - * VTK_LINE (3) -> Line2 - * VTK_QUADRATIC_EDGE (21) -> Line3 - * VTK_TRIANGLE (5) -> Triangle3 - * VTK_QUADRATIC_TRIANGLE (22) -> Triangle6 - * VTK_QUAD (9) -> Quad4 - * VTK_QUADRATIC_QUAD (23) -> Quad8 - * VTK_BIQUADRATIC_QUAD (28) -> Quad9 - * VTK_TETRA (10) -> Tetrahedron4 - * VTK_QUADRATIC_TETRA (24) -> Tetrahedron10 - * VTK_HEXAHEDRON (12) -> Hex8 - * VTK_QUADRATIC_HEXAHEDRON (25) -> Hex20 - * VTK_TRIQUADRATIC_HEXAHEDRON (29) -> Hex27 - * VTK_WEDGE (13) -> Wedge6 - * VTK_QUADRATIC_WEDGE (26) -> Wedge15 - * VTK_BIQUADRATIC_QUADRATIC_WEDGE (32) -> Wedge18 - * VTK_PYRAMID (14) -> Pyramid5 - * VTK_QUADRATIC_PYRAMID (27) -> Pyramid13 - * - * - * ============================================================================= - * BARYCENTRIC COORDINATES - * ============================================================================= - * - * For simplex elements, barycentric coordinates (lambda_0, ..., lambda_n) - * satisfy sum(lambda_i) = 1. - * - * Triangle: - * lambda_0 = 1 - xi - eta - * lambda_1 = xi - * lambda_2 = eta - * - * Tetrahedron: - * lambda_0 = 1 - xi - eta - zeta - * lambda_1 = xi - * lambda_2 = eta - * lambda_3 = zeta - * - */ +#include "Types.h" +#include #include +#include namespace svmp { namespace FE { namespace basis { -/** - * @brief Basis-side reference node coordinate queries - * - * This is intentionally named differently from `svmp::NodeOrdering` in Mesh, - * which handles mesh-format permutations rather than reference basis layouts. - */ class ReferenceNodeLayout { public: - /** - * @brief Get reference coordinates for a node - * @param elem_type Element type - * @param local_node Local node index (0-based) - * - * Complete-family low-order Lagrange aliases (`Line2/3`, `Triangle3/6`, - * `Quad4/9`, `Tetra4/10`, `Hex8/27`, `Wedge6/18`, `Pyramid5/14`) are - * served by the generated arbitrary-order Lagrange ordering path. Explicit - * hard-coded tables remain only for serendipity-only enums such as - * `Quad8`, `Hex20`, `Wedge15`, and `Pyramid13`. - * - * @return Reference coordinates (xi, eta, zeta) - */ - static math::Vector get_node_coords(ElementType elem_type, std::size_t local_node); - - /** - * @brief Get number of nodes for an element type - * - * The low-order complete-family Lagrange aliases share the same generated - * ordering path used by `get_node_coords`. - */ + static math::Vector get_node_coords(ElementType elem_type, + std::size_t local_node); static std::size_t num_nodes(ElementType elem_type); - /** - * @brief Generate complete-family Lagrange node coordinates for a canonical topology and order - * - * This covers arbitrary-order complete nodal Lagrange spaces on the - * canonical topologies `Line2`, `Triangle3`, `Quad4`, `Tetra4`, `Hex8`, - * `Wedge6`, and `Pyramid5`. Serendipity variants are intentionally - * excluded. - */ static std::vector> get_lagrange_node_coords(ElementType canonical_type, int order); - /** - * @brief Optional mapping from mesh/reference node order to internal basis order - * - * Returns an empty span when the public node order is already the basis - * table order or no special mapping is registered. - */ static std::span mesh_to_basis_ordering(ElementType elem_type); - - /** - * @brief Check if element is a simplex (triangle, tetrahedron) - */ static bool is_simplex(ElementType elem_type); - - /** - * @brief Check if element uses tensor-product topology - */ static bool is_tensor_product(ElementType elem_type); }; diff --git a/Code/Source/solver/FE/Basis/PyramidModalBasis.h b/Code/Source/solver/FE/Basis/PyramidModalBasis.h deleted file mode 100644 index 1ecdae282..000000000 --- a/Code/Source/solver/FE/Basis/PyramidModalBasis.h +++ /dev/null @@ -1,265 +0,0 @@ -#ifndef SVMP_FE_BASIS_PYRAMIDMODALBASIS_H -#define SVMP_FE_BASIS_PYRAMIDMODALBASIS_H - -// Shared rational/modal pyramid helpers for scalar complete-family and spectral -// pyramid bases. The degenerate z=1 top plane is evaluated by its apex limit; -// callers that reject non-apex top-plane queries must validate before calling. - -#include "BasisFunction.h" -#include "BasisTolerance.h" -#include -#include -#include - -namespace svmp { -namespace FE { -namespace basis { -namespace pyramid_modal { - -struct Term { - int px{0}; - int py{0}; - int pz{0}; - int denom_power{0}; -}; - -struct EvaluationPoint { - Real x{Real(0)}; - Real y{Real(0)}; - Real z{Real(0)}; - Real t{Real(1)}; - bool top_plane{false}; - std::vector x_powers; - std::vector y_powers; - std::vector z_powers; - std::vector t_powers; -}; - -inline std::vector build_terms(int order) { - std::vector terms; - terms.reserve(static_cast((order + 1) * (order + 2) * - (2 * order + 3) / 6)); - for (int pz = 0; pz <= order; ++pz) { - const int n = order - pz; - for (int py = 0; py <= n; ++py) { - for (int px = 0; px <= n; ++px) { - terms.push_back(Term{px, py, pz, std::min(px, py)}); - } - } - } - return terms; -} - -inline bool on_degenerate_top_plane(const math::Vector& xi, - Real tolerance = detail::basis_scaled_tolerance()) { - return std::abs(Real(1) - xi[2]) <= tolerance; -} - -inline void fill_powers(Real base, int max_power, std::vector& powers) { - powers.assign(static_cast(max_power + 1), Real(1)); - for (int p = 1; p <= max_power; ++p) { - powers[static_cast(p)] = - powers[static_cast(p - 1)] * base; - } -} - -inline void prepare_evaluation_point(const math::Vector& xi, - int max_px, - int max_py, - int max_pz, - int max_denom_power, - EvaluationPoint& point) { - point.x = xi[0]; - point.y = xi[1]; - point.z = xi[2]; - point.t = Real(1) - point.z; - point.top_plane = on_degenerate_top_plane(xi); - - fill_powers(point.x, std::max(max_px, 0), point.x_powers); - fill_powers(point.y, std::max(max_py, 0), point.y_powers); - fill_powers(point.z, std::max(max_pz, 0), point.z_powers); - if (point.top_plane) [[unlikely]] { - point.t_powers.assign(1u, Real(1)); - } else { - fill_powers(point.t, std::max(max_denom_power + 2, 0), point.t_powers); - } -} - -inline void prepare_evaluation_point(const std::vector& terms, - const math::Vector& xi, - EvaluationPoint& point) { - int max_px = 0; - int max_py = 0; - int max_pz = 0; - int max_denom_power = 0; - for (const Term& term : terms) { - max_px = std::max(max_px, term.px); - max_py = std::max(max_py, term.py); - max_pz = std::max(max_pz, term.pz); - max_denom_power = std::max(max_denom_power, term.denom_power); - } - prepare_evaluation_point(xi, max_px, max_py, max_pz, max_denom_power, point); -} - -inline void evaluate_term(const Term& term, - const EvaluationPoint& point, - Real& value, - Gradient* gradient = nullptr, - Hessian* hessian = nullptr) { - const auto pow_x = [&](int p) -> Real { - return point.x_powers[static_cast(p)]; - }; - const auto pow_y = [&](int p) -> Real { - return point.y_powers[static_cast(p)]; - }; - const auto pow_z = [&](int p) -> Real { - return point.z_powers[static_cast(p)]; - }; - const auto pow_t = [&](int p) -> Real { - return point.t_powers[static_cast(p)]; - }; - - if (point.top_plane) [[unlikely]] { - if (term.px == 0 && term.py == 0) { - value = pow_z(term.pz); - } else { - value = Real(0); - } - if (gradient != nullptr) { - *gradient = Gradient{}; - if (term.px == 0 && term.py == 0 && term.pz > 0) { - (*gradient)[2] = static_cast(term.pz) * pow_z(term.pz - 1); - } - } - if (hessian != nullptr) { - *hessian = Hessian{}; - if (term.px == 0 && term.py == 0 && term.pz > 1) { - (*hessian)(2, 2) = - static_cast(term.pz * (term.pz - 1)) * - pow_z(term.pz - 2); - } - } - return; - } - - const Real base = pow_x(term.px) * pow_y(term.py) * pow_z(term.pz); - const Real denom = pow_t(term.denom_power); - value = base / denom; - - if (gradient != nullptr) { - *gradient = Gradient{}; - if (term.px > 0) { - (*gradient)[0] = - static_cast(term.px) * pow_x(term.px - 1) * - pow_y(term.py) * pow_z(term.pz) / denom; - } - if (term.py > 0) { - (*gradient)[1] = - static_cast(term.py) * pow_x(term.px) * - pow_y(term.py - 1) * pow_z(term.pz) / denom; - } - - Real gz = Real(0); - if (term.pz > 0) { - gz += static_cast(term.pz) * pow_x(term.px) * - pow_y(term.py) * pow_z(term.pz - 1) / denom; - } - if (term.denom_power > 0) { - gz += static_cast(term.denom_power) * base / pow_t(term.denom_power + 1); - } - (*gradient)[2] = gz; - } - - if (hessian == nullptr) { - return; - } - - *hessian = Hessian{}; - if (term.px > 1) { - (*hessian)(0, 0) = - static_cast(term.px * (term.px - 1)) * - pow_x(term.px - 2) * pow_y(term.py) * pow_z(term.pz) / denom; - } - if (term.py > 1) { - (*hessian)(1, 1) = - static_cast(term.py * (term.py - 1)) * - pow_x(term.px) * pow_y(term.py - 2) * pow_z(term.pz) / denom; - } - if (term.px > 0 && term.py > 0) { - const Real hxy = - static_cast(term.px * term.py) * - pow_x(term.px - 1) * pow_y(term.py - 1) * pow_z(term.pz) / denom; - (*hessian)(0, 1) = hxy; - (*hessian)(1, 0) = hxy; - } - - if (term.px > 0) { - Real hxz = - static_cast(term.px) * pow_x(term.px - 1) * - pow_y(term.py) / denom; - if (term.pz > 0) { - hxz *= static_cast(term.pz) * pow_z(term.pz - 1); - } else { - hxz = Real(0); - } - if (term.denom_power > 0) { - hxz += static_cast(term.px * term.denom_power) * - pow_x(term.px - 1) * pow_y(term.py) * - pow_z(term.pz) / pow_t(term.denom_power + 1); - } - (*hessian)(0, 2) = hxz; - (*hessian)(2, 0) = hxz; - } - - if (term.py > 0) { - Real hyz = - static_cast(term.py) * pow_x(term.px) * - pow_y(term.py - 1) / denom; - if (term.pz > 0) { - hyz *= static_cast(term.pz) * pow_z(term.pz - 1); - } else { - hyz = Real(0); - } - if (term.denom_power > 0) { - hyz += static_cast(term.py * term.denom_power) * - pow_x(term.px) * pow_y(term.py - 1) * - pow_z(term.pz) / pow_t(term.denom_power + 1); - } - (*hessian)(1, 2) = hyz; - (*hessian)(2, 1) = hyz; - } - - Real hzz = Real(0); - if (term.pz > 1) { - hzz += static_cast(term.pz * (term.pz - 1)) * - pow_x(term.px) * pow_y(term.py) * pow_z(term.pz - 2) / denom; - } - if (term.pz > 0 && term.denom_power > 0) { - hzz += static_cast(2 * term.pz * term.denom_power) * - pow_x(term.px) * pow_y(term.py) * - pow_z(term.pz - 1) / pow_t(term.denom_power + 1); - } - if (term.denom_power > 0) { - hzz += static_cast(term.denom_power * (term.denom_power + 1)) * - base / pow_t(term.denom_power + 2); - } - (*hessian)(2, 2) = hzz; -} - -inline void evaluate_term(const Term& term, - const math::Vector& xi, - Real& value, - Gradient* gradient = nullptr, - Hessian* hessian = nullptr) { - EvaluationPoint point; - prepare_evaluation_point( - xi, term.px, term.py, term.pz, term.denom_power, point); - evaluate_term(term, point, value, gradient, hessian); -} - -} // namespace pyramid_modal -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_PYRAMIDMODALBASIS_H diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp index 309fd18be..e6395cee4 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp @@ -157,13 +157,6 @@ std::vector quad_serendipity_inverse_vandermonde( const std::string label = "Quad order " + std::to_string(order); return invert_dense_matrix(std::move(vandermonde), n, label.c_str()); } -constexpr std::array kPyramid13CenterRedistribution = { - Real(-0.25), Real(-0.25), Real(-0.25), Real(-0.25), - Real(0), - Real(0.5), Real(0.5), Real(0.5), Real(0.5), - Real(0), Real(0), Real(0), Real(0) -}; - constexpr std::array, 15> kWedge15MonomialExponents = {{ {{0, 0, 0}}, {{0, 0, 1}}, @@ -497,20 +490,8 @@ SerendipityBasis::SerendipityBasis(ElementType type, int order, bool geometry_mo "SerendipityBasis supports up to quadratic on wedge15", __FILE__, __LINE__, __func__); } - } else if (type == ElementType::Pyramid13) { - dimension_ = 3; - if (order_ < 2) { - order_ = 2; - } - if (order_ == 2) { - size_ = 13; - } else { - throw BasisConfigurationException( - "SerendipityBasis supports up to quadratic on pyramid13", - __FILE__, __LINE__, __func__); - } } else { - throw BasisElementCompatibilityException("SerendipityBasis supports Quad4/Quad8, Hex8/Hex20, Wedge15, and Pyramid13 elements", + throw BasisElementCompatibilityException("SerendipityBasis supports Quad4/Quad8, Hex8/Hex20, and Wedge15 elements", __FILE__, __LINE__, __func__); } @@ -522,17 +503,6 @@ SerendipityBasis::SerendipityBasis(ElementType type, int order, bool geometry_mo } } -bool SerendipityBasis::cache_identity_words(std::vector& words) const { - words.push_back(0x736572656e646970ULL); - words.push_back(static_cast(basis_type())); - words.push_back(static_cast(element_type_)); - words.push_back(static_cast(dimension_)); - words.push_back(static_cast(order_)); - words.push_back(static_cast(size_)); - words.push_back(geometry_mode_ ? 1u : 0u); - return true; -} - void SerendipityBasis::evaluate_values(const math::Vector& xi, std::vector& values) const { values.assign(size_, Real(0)); @@ -617,15 +587,6 @@ void SerendipityBasis::evaluate_values(const math::Vector& xi, return; } - if (element_type_ == ElementType::Pyramid13) { - static const LagrangeBasis parent(ElementType::Pyramid14, 2); - std::array parent_values{}; - parent.evaluate_values_to(xi, parent_values.data()); - for (std::size_t i = 0; i < 13; ++i) { - values[i] = parent_values[i] + kPyramid13CenterRedistribution[i] * parent_values[13]; - } - return; - } } void SerendipityBasis::evaluate_gradients(const math::Vector& xi, @@ -762,25 +723,6 @@ void SerendipityBasis::evaluate_gradients(const math::Vector& xi, return; } - if (element_type_ == ElementType::Pyramid13) { - static const LagrangeBasis parent(ElementType::Pyramid14, 2); - std::array parent_gradients{}; - // Pyramid13 inherits the complete-family pyramid apex contract from the - // parent basis rather than introducing a separate regularized path. - parent.evaluate_gradients_to(xi, parent_gradients.data()); - const auto parent_gradient = [&](std::size_t node, std::size_t component) { - return parent_gradients[node * 3u + component]; - }; - for (std::size_t i = 0; i < 13; ++i) { - for (std::size_t c = 0; c < 3u; ++c) { - gradients[i][c] = - parent_gradient(i, c) + - kPyramid13CenterRedistribution[i] * parent_gradient(13u, c); - } - } - return; - } - throw BasisEvaluationException("SerendipityBasis::evaluate_gradients: unsupported serendipity configuration", __FILE__, __LINE__, __func__); } @@ -859,20 +801,6 @@ void SerendipityBasis::evaluate_hessians(const math::Vector& xi, return; } - if (element_type_ == ElementType::Pyramid13) { - static const LagrangeBasis parent(ElementType::Pyramid14, 2); - std::array parent_hessians{}; - // Pyramid13 inherits the complete-family pyramid apex contract from the - // parent basis rather than introducing a separate regularized path. - parent.evaluate_hessians_to(xi, parent_hessians.data()); - const Hessian center_hessian = load_hessian(parent_hessians.data() + 13u * 9u); - for (std::size_t i = 0; i < 13; ++i) { - hessians[i] = load_hessian(parent_hessians.data() + i * 9u); - add_scaled_hessian(hessians[i], center_hessian, kPyramid13CenterRedistribution[i]); - } - return; - } - throw BasisEvaluationException("SerendipityBasis::evaluate_hessians: unsupported serendipity configuration", __FILE__, __LINE__, __func__); } diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.h b/Code/Source/solver/FE/Basis/SerendipityBasis.h index 98c01415a..10e426164 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.h +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.h @@ -11,17 +11,11 @@ /** * @file SerendipityBasis.h * @brief Reduced-degree-of-freedom serendipity bases - * - * `Pyramid13` inherits its apex contract from the complete-family rational - * pyramid basis: values remain exact at the apex, while exact-apex gradient - * and Hessian queries throw because the inherited nodal derivative limit is - * not unique. */ #include "BasisFunction.h" #include -#include namespace svmp { namespace FE { @@ -37,7 +31,6 @@ class SerendipityBasis : public BasisFunction { int order() const noexcept override { return order_; } std::size_t size() const noexcept override { return size_; } const std::vector>& nodes() const noexcept { return nodes_; } - bool cache_identity_words(std::vector& words) const override; void evaluate_values(const math::Vector& xi, std::vector& values) const override; diff --git a/Code/Source/solver/FE/Basis/VectorBasis.h b/Code/Source/solver/FE/Basis/VectorBasis.h deleted file mode 100644 index d442c2160..000000000 --- a/Code/Source/solver/FE/Basis/VectorBasis.h +++ /dev/null @@ -1,255 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#ifndef SVMP_FE_BASIS_VECTORBASIS_H -#define SVMP_FE_BASIS_VECTORBASIS_H - -/** - * @file VectorBasis.h - * @brief Vector-valued bases for H(div) and H(curl) conforming spaces - */ - -#include "BasisFunction.h" -#include "VectorBasisModalPolynomial.h" -#include -#include -#include - -namespace svmp { -namespace FE { -namespace basis { - -/** - * @brief DOF entity type for vector-valued basis functions - */ -enum class DofEntity { - Vertex, ///< DOF associated with a vertex - Edge, ///< DOF associated with an edge (tangential moments for H(curl)) - Face, ///< DOF associated with a face (normal moments for H(div), tangential for H(curl)) - Interior ///< DOF associated with element interior -}; - -/** - * @brief DOF association metadata for a single DOF - */ -struct DofAssociation { - DofEntity entity_type{DofEntity::Interior}; - int entity_id{-1}; ///< Local index of the entity (edge/face/vertex) - int moment_index{0}; ///< Index within the entity's moment space -}; - -struct SparseModalCoefficientMatrix { - std::size_t rows{0}; - std::size_t cols{0}; - std::vector row_offsets; - std::vector dofs; - std::vector coefficients; -}; - -class VectorBasisFunction : public BasisFunction { -public: - bool is_vector_valued() const noexcept override { return true; } - bool supports_vector_jacobians() const noexcept override { return true; } - void evaluate_values(const math::Vector&, - std::vector&) const override { - throw BasisEvaluationException("Vector basis uses evaluate_vector_values", - __FILE__, __LINE__, __func__); - } - - void evaluate_vector_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out) const override; - - /** - * @brief Get DOF association metadata for all basis functions - * - * Returns a vector of size(), where each entry describes which - * geometric entity (vertex/edge/face/interior) the corresponding - * DOF is associated with. This is essential for orientation-aware - * assembly of H(div) and H(curl) spaces. - */ - virtual std::vector dof_associations() const { - // Default: all interior DOFs (subclasses should override) - std::vector result(size()); - for (std::size_t i = 0; i < size(); ++i) { - result[i].entity_type = DofEntity::Interior; - result[i].entity_id = 0; - result[i].moment_index = static_cast(i); - } - return result; - } -}; - -/** - * @brief Raviart-Thomas H(div) basis on supported element families - */ -class RaviartThomasBasis : public VectorBasisFunction { -public: - RaviartThomasBasis(ElementType type, int order = 0); - - BasisType basis_type() const noexcept override { return BasisType::RaviartThomas; } - ElementType element_type() const noexcept override { return element_type_; } - int dimension() const noexcept override { return dimension_; } - int order() const noexcept override { return order_; } - std::size_t size() const noexcept override { return size_; } - bool cache_identity_is_structural() const noexcept override { return true; } - - void evaluate_vector_values(const math::Vector& xi, - std::vector>& values) const override; - void evaluate_vector_jacobians(const math::Vector& xi, - std::vector& jacobians) const override; - void evaluate_divergence(const math::Vector& xi, - std::vector& divergence) const override; - bool supports_divergence() const noexcept override { return true; } - void evaluate_vector_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out) const override; - - /// Get DOF associations (face/edge DOFs for 2D, face DOFs for 3D H(div)) - std::vector dof_associations() const override; - -private: - using ModalTerm = VectorBasisModalTerm; - using ModalPolynomial = VectorBasisModalPolynomial; - using SeedJacobianEvaluator = void (*)( - const math::Vector&, - std::vector&); - - ElementType element_type_; - int dimension_; - int order_; - std::size_t size_{0}; - - bool nodal_generated_{false}; - bool use_transformed_direct_seed_{false}; ///< True for wedge/pyramid RT(k=1,2) transformed from direct seed functions - std::vector transformed_seed_indices_; - std::vector> transformed_monomial_candidates_; ///< {component, px, py, pz} - std::vector monomials_; - std::array modal_power_limits_{{0, 0, 0}}; - std::array transformed_power_limits_{{0, 0, 0}}; - SeedJacobianEvaluator transformed_seed_jacobian_evaluator_{nullptr}; - // Sparse coefficients for nodal basis in modal monomial basis: - // phi_j = sum_p c(p,j) * modal_p. - // Rows index modal functions; entries target nodal DOFs. - SparseModalCoefficientMatrix modal_sparse_coeffs_; - SparseModalCoefficientMatrix transformed_sparse_coeffs_; -}; - -/** - * @brief First-kind Nedelec H(curl) basis on supported element families - */ -class NedelecBasis : public VectorBasisFunction { -public: - NedelecBasis(ElementType type, int order = 0); - - BasisType basis_type() const noexcept override { return BasisType::Nedelec; } - ElementType element_type() const noexcept override { return element_type_; } - int dimension() const noexcept override { return dimension_; } - int order() const noexcept override { return order_; } - std::size_t size() const noexcept override { return size_; } - bool cache_identity_is_structural() const noexcept override { return true; } - - void evaluate_vector_values(const math::Vector& xi, - std::vector>& values) const override; - void evaluate_vector_jacobians(const math::Vector& xi, - std::vector& jacobians) const override; - void evaluate_curl(const math::Vector& xi, - std::vector>& curl) const override; - bool supports_curl() const noexcept override { return true; } - void evaluate_vector_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out) const override; - - /// Get DOF associations (edge DOFs for H(curl), face DOFs for 3D interior) - std::vector dof_associations() const override; - -private: - using ModalTerm = VectorBasisModalTerm; - using ModalPolynomial = VectorBasisModalPolynomial; - using SeedJacobianEvaluator = void (*)( - const math::Vector&, - std::vector&); - - ElementType element_type_; - int dimension_; - int order_; - std::size_t size_{0}; - - bool nodal_generated_{false}; - bool use_transformed_direct_seed_{false}; ///< True for wedge/pyramid ND(k=1,2) transformed from direct seed/candidate functions - std::vector> transformed_monomial_candidates_; ///< {component, px, py, pz} - std::vector monomials_; - SparseModalCoefficientMatrix modal_sparse_coeffs_; - SparseModalCoefficientMatrix transformed_sparse_coeffs_; - std::array modal_power_limits_{{0, 0, 0}}; - std::array transformed_power_limits_{{0, 0, 0}}; - SeedJacobianEvaluator transformed_seed_jacobian_evaluator_{nullptr}; -}; - -/** - * @brief Brezzi-Douglas-Marini basis (simple linear variant) - */ -class BDMBasis : public VectorBasisFunction { -public: - BDMBasis(ElementType type, int order = 1); - - BasisType basis_type() const noexcept override { return BasisType::BDM; } - ElementType element_type() const noexcept override { return element_type_; } - int dimension() const noexcept override { return dimension_; } - int order() const noexcept override { return order_; } - std::size_t size() const noexcept override { return size_; } - bool cache_identity_is_structural() const noexcept override { return true; } - - void evaluate_vector_values(const math::Vector& xi, - std::vector>& values) const override; - void evaluate_vector_jacobians(const math::Vector& xi, - std::vector& jacobians) const override; - void evaluate_divergence(const math::Vector& xi, - std::vector& divergence) const override; - bool supports_divergence() const noexcept override { return true; } - void evaluate_vector_at_quadrature_points_strided( - const std::vector>& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out) const override; - - /// Get DOF associations (face/edge DOFs for H(div)) - std::vector dof_associations() const override; - -private: - using ModalTerm = VectorBasisModalTerm; - using ModalPolynomial = VectorBasisModalPolynomial; - - ElementType element_type_; - int dimension_; - int order_; - std::size_t size_{0}; - bool nodal_generated_{false}; - std::vector monomials_; - SparseModalCoefficientMatrix modal_sparse_coeffs_; - std::array modal_power_limits_{{0, 0, 0}}; -}; - -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_VECTORBASIS_H diff --git a/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.cpp b/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.cpp deleted file mode 100644 index 7ec848633..000000000 --- a/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.cpp +++ /dev/null @@ -1,593 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#include "VectorBasisEvaluationHelpers.h" - -#include -#include -#include -#include - -namespace svmp { -namespace FE { -namespace basis { -namespace detail { -namespace vector_common { - -VectorBasisScratch& vector_basis_scratch() { - // Scratch is intentionally thread-local: production assembly uses a - // persistent worker-thread team, so buffers stay warm on each worker. - static thread_local VectorBasisScratch scratch; - return scratch; -} - -void prewarm_vector_basis_scratch(std::size_t max_size, std::size_t max_qpts) { - vector_basis_scratch().prewarm(max_size, max_qpts); -} - -void fill_powers(Real x, int max_p, std::vector& out) { - BASIS_CHECK_CONSTRUCTION(max_p >= 0, "powers: negative max_p"); - out.assign(static_cast(max_p + 1), Real(1)); - for (int i = 1; i <= max_p; ++i) { - out[static_cast(i)] = - out[static_cast(i - 1)] * x; - } -} - -void fill_power_tables(const Vec3& xi, - const std::array& limits, - VectorBasisScratch& scratch) { - fill_powers(xi[0], limits[0], scratch.px); - fill_powers(xi[1], limits[1], scratch.py); - fill_powers(xi[2], limits[2], scratch.pz); -} - -namespace { - -constexpr Real kSparseCoefficientRelativeTolerance = - Real(256) * std::numeric_limits::epsilon(); - -void fill_batched_axis_powers(const std::vector& points, - std::size_t axis, - int max_power, - std::vector& out) { - BASIS_CHECK_CONSTRUCTION(max_power >= 0, "batched powers: negative max_p"); - const std::size_t num_qpts = points.size(); - out.assign(static_cast(max_power + 1) * num_qpts, Real(1)); - if (num_qpts == 0 || max_power == 0) { - return; - } - - Real* first_power = out.data() + num_qpts; - for (std::size_t q = 0; q < num_qpts; ++q) { - first_power[q] = points[q][axis]; - } - for (int power = 2; power <= max_power; ++power) { - const Real* previous = - out.data() + static_cast(power - 1) * num_qpts; - Real* current = out.data() + static_cast(power) * num_qpts; - for (std::size_t q = 0; q < num_qpts; ++q) { - current[q] = previous[q] * points[q][axis]; - } - } -} - -} // namespace - -void fill_batched_power_tables(const std::vector& points, - const std::array& limits, - VectorBasisScratch& scratch) { - fill_batched_axis_powers(points, 0u, limits[0], scratch.batched_px); - fill_batched_axis_powers(points, 1u, limits[1], scratch.batched_py); - fill_batched_axis_powers(points, 2u, limits[2], scratch.batched_pz); -} - -void validate_vector_strided_outputs(std::size_t num_qpts, - std::size_t output_stride, - const char* family_name) { - if (output_stride < num_qpts) { - throw BasisConfigurationException( - std::string(family_name) + - " strided vector evaluation requires output_stride >= points.size()", - __FILE__, __LINE__, __func__); - } -} - -void zero_active_strided_rows(Real* output, - std::size_t rows, - std::size_t output_stride, - std::size_t num_qpts) { - for (std::size_t row = 0; row < rows; ++row) { - std::fill_n(output + row * output_stride, num_qpts, Real(0)); - } -} - -SparseModalCoefficientMatrix build_sparse_modal_coefficients( - const std::vector& dense_coefficients, - std::size_t rows, - std::size_t cols) { - BASIS_CHECK_CONSTRUCTION(dense_coefficients.size() == rows * cols, - "build_sparse_modal_coefficients: dense coefficient size mismatch"); - - SparseModalCoefficientMatrix sparse; - sparse.rows = rows; - sparse.cols = cols; - sparse.row_offsets.reserve(rows + 1u); - sparse.row_offsets.push_back(0u); - - Real max_abs = Real(0); - for (const Real coefficient : dense_coefficients) { - max_abs = std::max(max_abs, std::abs(coefficient)); - } - const Real prune_threshold = kSparseCoefficientRelativeTolerance * max_abs; - - for (std::size_t row = 0; row < rows; ++row) { - const Real* dense_row = dense_coefficients.data() + row * cols; - for (std::size_t col = 0; col < cols; ++col) { - const Real coefficient = dense_row[col]; - if (std::abs(coefficient) > prune_threshold) { - sparse.dofs.push_back(col); - sparse.coefficients.push_back(coefficient); - } - } - sparse.row_offsets.push_back(sparse.dofs.size()); - } - - return sparse; -} - -Vec3 curl_from_jacobian(const VectorJacobian& J) noexcept { - return Vec3{J(2u, 1u) - J(1u, 2u), - J(0u, 2u) - J(2u, 0u), - J(1u, 0u) - J(0u, 1u)}; -} - -Real divergence_from_jacobian(const VectorJacobian& J) noexcept { - return J(0u, 0u) + J(1u, 1u) + J(2u, 2u); -} - -void write_vector_values_strided(const std::vector& values, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT values_out) { - if (values_out == nullptr) { - return; - } - BASIS_CHECK_CONSTRUCTION(values.size() == num_dofs, - "vector value evaluation returned the wrong number of DOFs"); - for (std::size_t dof = 0; dof < num_dofs; ++dof) { - for (std::size_t component = 0; component < 3u; ++component) { - values_out[(dof * 3u + component) * output_stride + q] = - values[dof][component]; - } - } -} - -void write_vector_jacobians_strided(const std::vector& jacobians, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT jacobians_out) { - if (jacobians_out == nullptr) { - return; - } - BASIS_CHECK_CONSTRUCTION(jacobians.size() == num_dofs, - "vector Jacobian evaluation returned the wrong number of DOFs"); - for (std::size_t dof = 0; dof < num_dofs; ++dof) { - const auto& J = jacobians[dof]; - for (std::size_t component = 0; component < 3u; ++component) { - for (std::size_t derivative = 0; derivative < 3u; ++derivative) { - jacobians_out[(dof * 9u + component * 3u + derivative) * - output_stride + q] = J(component, derivative); - } - } - } -} - -void write_vector_curl_strided(const std::vector& curl, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT curls_out) { - if (curls_out == nullptr) { - return; - } - BASIS_CHECK_CONSTRUCTION(curl.size() == num_dofs, - "vector curl evaluation returned the wrong number of DOFs"); - for (std::size_t dof = 0; dof < num_dofs; ++dof) { - for (std::size_t component = 0; component < 3u; ++component) { - curls_out[(dof * 3u + component) * output_stride + q] = - curl[dof][component]; - } - } -} - -void write_vector_divergence_strided(const std::vector& divergence, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT divergence_out) { - if (divergence_out == nullptr) { - return; - } - BASIS_CHECK_CONSTRUCTION(divergence.size() == num_dofs, - "vector divergence evaluation returned the wrong number of DOFs"); - for (std::size_t dof = 0; dof < num_dofs; ++dof) { - divergence_out[dof * output_stride + q] = divergence[dof]; - } -} - -void write_curl_and_divergence_from_jacobians_strided( - const std::vector& jacobians, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out) { - BASIS_CHECK_CONSTRUCTION(jacobians.size() == num_dofs, - "vector Jacobian evaluation returned the wrong number of DOFs"); - for (std::size_t dof = 0; dof < num_dofs; ++dof) { - const auto& J = jacobians[dof]; - if (curls_out != nullptr) { - const Vec3 curl = curl_from_jacobian(J); - for (std::size_t component = 0; component < 3u; ++component) { - curls_out[(dof * 3u + component) * output_stride + q] = - curl[component]; - } - } - if (divergence_out != nullptr) { - divergence_out[dof * output_stride + q] = divergence_from_jacobian(J); - } - } -} - -Vec3 lerp(const Vec3& a, const Vec3& b, Real s) { - const Real t = (s + Real(1)) * Real(0.5); - return a * (Real(1) - t) + b * t; -} - -Vec3 bilinear(const std::array& v, Real u, Real w) { - const Real N0 = Real(0.25) * (Real(1) - u) * (Real(1) - w); - const Real N1 = Real(0.25) * (Real(1) + u) * (Real(1) - w); - const Real N2 = Real(0.25) * (Real(1) + u) * (Real(1) + w); - const Real N3 = Real(0.25) * (Real(1) - u) * (Real(1) + w); - return v[0] * N0 + v[1] * N1 + v[2] * N2 + v[3] * N3; -} - -Vec3 bilinear_du(const std::array& v, Real u, Real w) { - (void)u; - const Real dN0 = -Real(0.25) * (Real(1) - w); - const Real dN1 = Real(0.25) * (Real(1) - w); - const Real dN2 = Real(0.25) * (Real(1) + w); - const Real dN3 = -Real(0.25) * (Real(1) + w); - return v[0] * dN0 + v[1] * dN1 + v[2] * dN2 + v[3] * dN3; -} - -Vec3 bilinear_dw(const std::array& v, Real u, Real w) { - (void)w; - const Real dN0 = -Real(0.25) * (Real(1) - u); - const Real dN1 = -Real(0.25) * (Real(1) + u); - const Real dN2 = Real(0.25) * (Real(1) + u); - const Real dN3 = Real(0.25) * (Real(1) - u); - return v[0] * dN0 + v[1] * dN1 + v[2] * dN2 + v[3] * dN3; -} - -Vec3 cross3(const Vec3& a, const Vec3& b) { - return Vec3{a[1] * b[2] - a[2] * b[1], - a[2] * b[0] - a[0] * b[2], - a[0] * b[1] - a[1] * b[0]}; -} - -Vec3 normalize3(const Vec3& v) { - const Real n = v.norm(); - BASIS_CHECK_CONSTRUCTION(n > std::numeric_limits::epsilon(), - "normalize3: zero-length vector"); - return v / n; -} - -std::array component_monomial_power_limits( - const std::vector>& candidates) { - std::array limits{{0, 0, 0}}; - for (const auto& mono : candidates) { - limits[0] = std::max(limits[0], mono[1]); - limits[1] = std::max(limits[1], mono[2]); - limits[2] = std::max(limits[2], mono[3]); - } - return limits; -} - -std::size_t triangle_poly_dim(std::size_t k) { - return (k + 1u) * (k + 2u) / 2u; -} - -std::size_t tetra_poly_dim(std::size_t k) { - return (k + 1u) * (k + 2u) * (k + 3u) / 6u; -} - -std::size_t rt_wedge_size(int order) { - const std::size_t k = static_cast(order); - const std::size_t face_dofs = - 2u * triangle_poly_dim(k) + 3u * (k + 1u) * (k + 1u); - const std::size_t interior_dofs = - (k >= 1u) ? (3u * k * (k + 1u) * (k + 1u) / 2u) : 0u; - return face_dofs + interior_dofs; -} - -std::size_t rt_pyramid_size(int order) { - const std::size_t k = static_cast(order); - const std::size_t face_dofs = (k + 1u) * (k + 1u) + 4u * triangle_poly_dim(k); - const std::size_t interior_dofs = (k >= 1u) ? (3u * k * k * k) : 0u; - return face_dofs + interior_dofs; -} - -std::size_t nd_wedge_size(int order) { - const std::size_t k = static_cast(order); - const std::size_t edge_dofs = 9u * (k + 1u); - const std::size_t face_dofs = (k >= 1u) ? (8u * k * (k + 1u)) : 0u; - const std::size_t interior_dofs = - (k >= 2u) ? (3u * k * (k - 1u) * (k + 1u) / 2u) : 0u; - return edge_dofs + face_dofs + interior_dofs; -} - -std::size_t nd_pyramid_size(int order) { - const std::size_t k = static_cast(order); - const std::size_t edge_dofs = 8u * (k + 1u); - const std::size_t face_dofs = (k >= 1u) ? (6u * k * (k + 1u)) : 0u; - const std::size_t interior_dofs = - (k >= 2u) ? (k * (k - 1u) * (k + 1u) / 2u) : 0u; - return edge_dofs + face_dofs + interior_dofs; -} - -void ensure_supported_hybrid_vector_order(ElementType type, - int order, - const char* family_name) { - (void)type; - (void)order; - (void)family_name; -} - -std::vector> make_component_monomial_candidates( - int max_total_degree) { - BASIS_CHECK_CONSTRUCTION(max_total_degree >= 0, - "make_component_monomial_candidates: negative total degree"); - - std::vector> candidates; - for (int component = 0; component < 3; ++component) { - for (int total = 0; total <= max_total_degree; ++total) { - for (int pz = 0; pz <= total; ++pz) { - for (int py = 0; py <= total - pz; ++py) { - const int px = total - py - pz; - candidates.push_back({component, px, py, pz}); - } - } - } - } - return candidates; -} - -std::vector> make_rt_extra_monomial_candidates(ElementType type, - int order) { - if (order >= 3) { - return make_component_monomial_candidates(3 * order); - } - - std::vector> candidates; - if (!is_pyramid(type) || order != 2) { - return candidates; - } - - for (int component = 0; component < 3; ++component) { - for (int pz = 0; pz <= 2; ++pz) { - for (int py = 0; py <= 2 - pz; ++py) { - for (int px = 0; px <= 2 - py - pz; ++px) { - candidates.push_back({component, px, py, pz}); - } - } - } - } - return candidates; -} - -Real eval_transformed_rt_monomial_scalar(const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz) { - return px[static_cast(mono[1])] * - py[static_cast(mono[2])] * - pz[static_cast(mono[3])]; -} - -Real eval_transformed_rt_monomial_divergence(const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz) { - const int component = mono[0]; - const int px_pow = mono[1]; - const int py_pow = mono[2]; - const int pz_pow = mono[3]; - - if (component == 0) { - if (px_pow == 0) { - return Real(0); - } - return Real(px_pow) * - px[static_cast(px_pow - 1)] * - py[static_cast(py_pow)] * - pz[static_cast(pz_pow)]; - } - if (component == 1) { - if (py_pow == 0) { - return Real(0); - } - return Real(py_pow) * - px[static_cast(px_pow)] * - py[static_cast(py_pow - 1)] * - pz[static_cast(pz_pow)]; - } - if (pz_pow == 0) { - return Real(0); - } - return Real(pz_pow) * - px[static_cast(px_pow)] * - py[static_cast(py_pow)] * - pz[static_cast(pz_pow - 1)]; -} - -void add_component_monomial_jacobian(VectorJacobian& J, - int component, - int px_pow, - int py_pow, - int pz_pow, - Real coefficient, - const std::vector& px, - const std::vector& py, - const std::vector& pz) { - const auto comp = static_cast(component); - if (px_pow > 0) { - J(comp, 0) += coefficient * Real(px_pow) * - px[static_cast(px_pow - 1)] * - py[static_cast(py_pow)] * - pz[static_cast(pz_pow)]; - } - if (py_pow > 0) { - J(comp, 1) += coefficient * Real(py_pow) * - px[static_cast(px_pow)] * - py[static_cast(py_pow - 1)] * - pz[static_cast(pz_pow)]; - } - if (pz_pow > 0) { - J(comp, 2) += coefficient * Real(pz_pow) * - px[static_cast(px_pow)] * - py[static_cast(py_pow)] * - pz[static_cast(pz_pow - 1)]; - } -} - -VectorJacobian eval_transformed_component_monomial_jacobian( - const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz) { - VectorJacobian J{}; - add_component_monomial_jacobian( - J, mono[0], mono[1], mono[2], mono[3], Real(1), px, py, pz); - return J; -} - -void add_component_monomial_curl(Vec3& curl, - int component, - int px_pow, - int py_pow, - int pz_pow, - Real coefficient, - const std::vector& px, - const std::vector& py, - const std::vector& pz) { - const Real dphidx = (px_pow == 0) - ? Real(0) - : coefficient * Real(px_pow) * - px[static_cast(px_pow - 1)] * - py[static_cast(py_pow)] * - pz[static_cast(pz_pow)]; - const Real dphidy = (py_pow == 0) - ? Real(0) - : coefficient * Real(py_pow) * - px[static_cast(px_pow)] * - py[static_cast(py_pow - 1)] * - pz[static_cast(pz_pow)]; - const Real dphidz = (pz_pow == 0) - ? Real(0) - : coefficient * Real(pz_pow) * - px[static_cast(px_pow)] * - py[static_cast(py_pow)] * - pz[static_cast(pz_pow - 1)]; - - if (component == 0) { - curl[1] += dphidz; - curl[2] -= dphidy; - } else if (component == 1) { - curl[0] -= dphidz; - curl[2] += dphidx; - } else { - curl[0] += dphidy; - curl[1] -= dphidx; - } -} - -std::vector> make_nd_extra_monomial_candidates(ElementType, - int order) { - if (order >= 3) { - return make_component_monomial_candidates(3 * order); - } - - std::vector> candidates; - const int max_total_degree = (order == 1) ? 4 : 5; - for (int component = 0; component < 3; ++component) { - for (int total = 0; total <= max_total_degree; ++total) { - for (int pz = 0; pz <= total; ++pz) { - for (int py = 0; py <= total - pz; ++py) { - const int px = total - py - pz; - candidates.push_back({component, px, py, pz}); - } - } - } - } - return candidates; -} - -Real eval_transformed_nd_monomial_scalar(const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz) { - return px[static_cast(mono[1])] * - py[static_cast(mono[2])] * - pz[static_cast(mono[3])]; -} - -Vec3 eval_transformed_nd_monomial_curl(const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz) { - const int component = mono[0]; - const int px_pow = mono[1]; - const int py_pow = mono[2]; - const int pz_pow = mono[3]; - - const Real dphidx = (px_pow == 0) - ? Real(0) - : Real(px_pow) * - px[static_cast(px_pow - 1)] * - py[static_cast(py_pow)] * - pz[static_cast(pz_pow)]; - const Real dphidy = (py_pow == 0) - ? Real(0) - : Real(py_pow) * - px[static_cast(px_pow)] * - py[static_cast(py_pow - 1)] * - pz[static_cast(pz_pow)]; - const Real dphidz = (pz_pow == 0) - ? Real(0) - : Real(pz_pow) * - px[static_cast(px_pow)] * - py[static_cast(py_pow)] * - pz[static_cast(pz_pow - 1)]; - - if (component == 0) { - return Vec3{Real(0), dphidz, -dphidy}; - } - if (component == 1) { - return Vec3{-dphidz, Real(0), dphidx}; - } - return Vec3{dphidy, -dphidx, Real(0)}; -} - -} // namespace vector_common -} // namespace detail -} // namespace basis -} // namespace FE -} // namespace svmp diff --git a/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.h b/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.h deleted file mode 100644 index e0e6daa10..000000000 --- a/Code/Source/solver/FE/Basis/VectorBasisEvaluationHelpers.h +++ /dev/null @@ -1,751 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#ifndef SVMP_FE_BASIS_VECTORBASISEVALUATIONHELPERS_H -#define SVMP_FE_BASIS_VECTORBASISEVALUATIONHELPERS_H - -#include "VectorBasis.h" -#include "Basis/BasisTraits.h" - -#include -#include -#include -#include -#include - -namespace svmp { -namespace FE { -namespace basis { -namespace detail { -namespace vector_common { - -using Vec3 = math::Vector; - -struct VectorBasisScratch { - std::vector px; - std::vector py; - std::vector pz; - std::vector batched_px; - std::vector batched_py; - std::vector batched_pz; - std::vector candidate_values; - std::vector candidate_dx; - std::vector candidate_dy; - std::vector candidate_dz; - std::vector modal_values_batched; - std::vector modal_jacobians_batched; - std::vector modal_curls_batched; - std::vector modal_divergence_batched; - std::vector vector_values; - std::vector vector_jacobians; - std::vector scalars; - std::vector api_values; - std::vector api_jacobians; - std::vector api_curl; - std::vector api_divergence; - - void prewarm(std::size_t max_size, std::size_t max_qpts) { - const std::size_t batched_size = max_size * std::max(max_qpts, 1u); - px.reserve(max_size); - py.reserve(max_size); - pz.reserve(max_size); - batched_px.reserve(batched_size); - batched_py.reserve(batched_size); - batched_pz.reserve(batched_size); - candidate_values.reserve(max_size); - candidate_dx.reserve(max_size); - candidate_dy.reserve(max_size); - candidate_dz.reserve(max_size); - modal_values_batched.reserve(batched_size * 3u); - modal_jacobians_batched.reserve(batched_size * 9u); - modal_curls_batched.reserve(batched_size * 3u); - modal_divergence_batched.reserve(batched_size); - vector_values.reserve(max_size); - vector_jacobians.reserve(max_size); - scalars.reserve(max_size); - api_values.reserve(max_size); - api_jacobians.reserve(max_size); - api_curl.reserve(max_size); - api_divergence.reserve(max_size); - } -}; - -VectorBasisScratch& vector_basis_scratch(); -void prewarm_vector_basis_scratch(std::size_t max_size, std::size_t max_qpts = 0); - -void fill_powers(Real x, int max_p, std::vector& out); -void fill_power_tables(const Vec3& xi, - const std::array& limits, - VectorBasisScratch& scratch); -void fill_batched_power_tables(const std::vector& points, - const std::array& limits, - VectorBasisScratch& scratch); -void validate_vector_strided_outputs(std::size_t num_qpts, - std::size_t output_stride, - const char* family_name); -void zero_active_strided_rows(Real* output, - std::size_t rows, - std::size_t output_stride, - std::size_t num_qpts); -SparseModalCoefficientMatrix build_sparse_modal_coefficients( - const std::vector& dense_coefficients, - std::size_t rows, - std::size_t cols); -Vec3 curl_from_jacobian(const VectorJacobian& J) noexcept; -Real divergence_from_jacobian(const VectorJacobian& J) noexcept; - -inline Real batched_power_product(const std::vector& px, - const std::vector& py, - const std::vector& pz, - std::size_t stride, - int px_pow, - int py_pow, - int pz_pow, - std::size_t q) noexcept { - return px[static_cast(px_pow) * stride + q] * - py[static_cast(py_pow) * stride + q] * - pz[static_cast(pz_pow) * stride + q]; -} - -inline Real batched_component_partial(const std::vector& px, - const std::vector& py, - const std::vector& pz, - std::size_t stride, - int px_pow, - int py_pow, - int pz_pow, - int derivative_axis, - std::size_t q) noexcept { - if (derivative_axis == 0) { - if (px_pow == 0) { - return Real(0); - } - return Real(px_pow) * - px[static_cast(px_pow - 1) * stride + q] * - py[static_cast(py_pow) * stride + q] * - pz[static_cast(pz_pow) * stride + q]; - } - if (derivative_axis == 1) { - if (py_pow == 0) { - return Real(0); - } - return Real(py_pow) * - px[static_cast(px_pow) * stride + q] * - py[static_cast(py_pow - 1) * stride + q] * - pz[static_cast(pz_pow) * stride + q]; - } - if (pz_pow == 0) { - return Real(0); - } - return Real(pz_pow) * - px[static_cast(px_pow) * stride + q] * - py[static_cast(py_pow) * stride + q] * - pz[static_cast(pz_pow - 1) * stride + q]; -} - -inline Vec3 curl_from_component_gradient(int component, - Real dphidx, - Real dphidy, - Real dphidz) noexcept { - if (component == 0) { - return Vec3{Real(0), dphidz, -dphidy}; - } - if (component == 1) { - return Vec3{-dphidz, Real(0), dphidx}; - } - return Vec3{dphidy, -dphidx, Real(0)}; -} - -inline void axpy_qpoints(Real* target, - const Real* source, - Real coefficient, - std::size_t num_qpts) noexcept { - for (std::size_t q = 0; q < num_qpts; ++q) { - target[q] += coefficient * source[q]; - } -} - -void write_vector_values_strided(const std::vector& values, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT values_out); -void write_vector_jacobians_strided(const std::vector& jacobians, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT jacobians_out); -void write_vector_curl_strided(const std::vector& curl, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT curls_out); -void write_vector_divergence_strided(const std::vector& divergence, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT divergence_out); -void write_curl_and_divergence_from_jacobians_strided( - const std::vector& jacobians, - std::size_t num_dofs, - std::size_t output_stride, - std::size_t q, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out); - -template -void evaluate_vector_public_api_strided( - const BasisLike& basis, - const std::vector& points, - std::size_t output_stride, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out, - bool use_direct_curl, - bool use_direct_divergence, - const char* family_name) { - const std::size_t num_qpts = points.size(); - const std::size_t num_dofs = basis.size(); - validate_vector_strided_outputs(num_qpts, output_stride, family_name); - - auto& scratch = vector_basis_scratch(); - for (std::size_t q = 0; q < num_qpts; ++q) { - if (values_out != nullptr) { - basis.evaluate_vector_values(points[q], scratch.api_values); - write_vector_values_strided( - scratch.api_values, num_dofs, output_stride, q, values_out); - } - - const bool needs_jacobians = - jacobians_out != nullptr || - (curls_out != nullptr && !use_direct_curl) || - (divergence_out != nullptr && !use_direct_divergence); - - if (needs_jacobians) { - basis.evaluate_vector_jacobians(points[q], scratch.api_jacobians); - write_vector_jacobians_strided( - scratch.api_jacobians, num_dofs, output_stride, q, jacobians_out); - write_curl_and_divergence_from_jacobians_strided( - scratch.api_jacobians, - num_dofs, - output_stride, - q, - curls_out, - divergence_out); - continue; - } - - if (curls_out != nullptr) { - basis.evaluate_curl(points[q], scratch.api_curl); - write_vector_curl_strided( - scratch.api_curl, num_dofs, output_stride, q, curls_out); - } - if (divergence_out != nullptr) { - basis.evaluate_divergence(points[q], scratch.api_divergence); - write_vector_divergence_strided( - scratch.api_divergence, num_dofs, output_stride, q, divergence_out); - } - } -} - -Vec3 lerp(const Vec3& a, const Vec3& b, Real s); -Vec3 bilinear(const std::array& v, Real u, Real w); -Vec3 bilinear_du(const std::array& v, Real u, Real w); -Vec3 bilinear_dw(const std::array& v, Real u, Real w); -Vec3 cross3(const Vec3& a, const Vec3& b); -Vec3 normalize3(const Vec3& v); - -template -std::array modal_power_limits(const ModalPolynomials& monomials) { - std::array limits{{0, 0, 0}}; - for (const auto& poly : monomials) { - for (int t = 0; t < poly.num_terms; ++t) { - const auto& m = poly.terms[static_cast(t)]; - limits[0] = std::max(limits[0], m.px); - limits[1] = std::max(limits[1], m.py); - limits[2] = std::max(limits[2], m.pz); - } - } - return limits; -} - -std::array component_monomial_power_limits( - const std::vector>& candidates); -std::size_t triangle_poly_dim(std::size_t k); -std::size_t tetra_poly_dim(std::size_t k); -std::size_t rt_wedge_size(int order); -std::size_t rt_pyramid_size(int order); -std::size_t nd_wedge_size(int order); -std::size_t nd_pyramid_size(int order); -void ensure_supported_hybrid_vector_order(ElementType type, - int order, - const char* family_name); -std::vector> make_component_monomial_candidates(int max_total_degree); -std::vector> make_rt_extra_monomial_candidates(ElementType type, - int order); -Real eval_transformed_rt_monomial_scalar(const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz); -Real eval_transformed_rt_monomial_divergence(const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz); - -void add_component_monomial_jacobian(VectorJacobian& J, - int component, - int px_pow, - int py_pow, - int pz_pow, - Real coefficient, - const std::vector& px, - const std::vector& py, - const std::vector& pz); -VectorJacobian eval_transformed_component_monomial_jacobian( - const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz); -void add_component_monomial_curl(Vec3& curl, - int component, - int px_pow, - int py_pow, - int pz_pow, - Real coefficient, - const std::vector& px, - const std::vector& py, - const std::vector& pz); - -template -void evaluate_nodal_modal_vector_values_with_limits(const ModalPolynomials& monomials, - const SparseModalCoefficientMatrix& sparse_coeffs, - std::size_t n, - const Vec3& xi, - const std::array& power_limits, - std::vector& values) { - values.assign(n, Vec3{}); - - auto& scratch = vector_basis_scratch(); - fill_power_tables(xi, power_limits, scratch); - const auto& px = scratch.px; - const auto& py = scratch.py; - const auto& pz = scratch.pz; - - auto& modal_vals = scratch.vector_values; - modal_vals.assign(n, Vec3{}); - for (std::size_t p = 0; p < n; ++p) { - const auto& poly = monomials[p]; - auto& v = modal_vals[p]; - for (int t = 0; t < poly.num_terms; ++t) { - const auto& m = poly.terms[static_cast(t)]; - const Real mv = - px[static_cast(m.px)] * - py[static_cast(m.py)] * - pz[static_cast(m.pz)]; - v[static_cast(m.component)] += m.coefficient * mv; - } - } - - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && - sparse_coeffs.cols == n && - sparse_coeffs.row_offsets.size() == n + 1u, - "evaluate_nodal_modal_vector_values: sparse coefficient size mismatch"); - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), - "evaluate_nodal_modal_vector_values: sparse coefficient entry mismatch"); - for (std::size_t p = 0; p < n; ++p) { - const Vec3& mv = modal_vals[p]; - const std::size_t row_begin = sparse_coeffs.row_offsets[p]; - const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; - for (std::size_t entry = row_begin; entry < row_end; ++entry) { - const std::size_t dof = sparse_coeffs.dofs[entry]; - const Real c = sparse_coeffs.coefficients[entry]; - values[dof][0] += c * mv[0]; - values[dof][1] += c * mv[1]; - values[dof][2] += c * mv[2]; - } - } -} - -template -void evaluate_nodal_modal_vector_jacobians_with_limits(const ModalPolynomials& monomials, - const SparseModalCoefficientMatrix& sparse_coeffs, - std::size_t n, - const Vec3& xi, - const std::array& power_limits, - std::vector& jacobians) { - jacobians.assign(n, VectorJacobian{}); - - auto& scratch = vector_basis_scratch(); - fill_power_tables(xi, power_limits, scratch); - const auto& px = scratch.px; - const auto& py = scratch.py; - const auto& pz = scratch.pz; - - auto& modal_jacs = scratch.vector_jacobians; - modal_jacs.assign(n, VectorJacobian{}); - for (std::size_t p = 0; p < n; ++p) { - const auto& poly = monomials[p]; - auto& J = modal_jacs[p]; - for (int t = 0; t < poly.num_terms; ++t) { - const auto& m = poly.terms[static_cast(t)]; - add_component_monomial_jacobian(J, m.component, m.px, m.py, m.pz, m.coefficient, px, py, pz); - } - } - - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && - sparse_coeffs.cols == n && - sparse_coeffs.row_offsets.size() == n + 1u, - "evaluate_nodal_modal_vector_jacobians: sparse coefficient size mismatch"); - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), - "evaluate_nodal_modal_vector_jacobians: sparse coefficient entry mismatch"); - for (std::size_t p = 0; p < n; ++p) { - const auto& Jp = modal_jacs[p]; - const std::size_t row_begin = sparse_coeffs.row_offsets[p]; - const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; - for (std::size_t entry = row_begin; entry < row_end; ++entry) { - const std::size_t dof = sparse_coeffs.dofs[entry]; - const Real c = sparse_coeffs.coefficients[entry]; - for (std::size_t r = 0; r < 3; ++r) { - for (std::size_t col = 0; col < 3; ++col) { - jacobians[dof](r, col) += c * Jp(r, col); - } - } - } - } -} - -template -void evaluate_nodal_modal_vector_curl_with_limits(const ModalPolynomials& monomials, - const SparseModalCoefficientMatrix& sparse_coeffs, - std::size_t n, - const Vec3& xi, - const std::array& power_limits, - std::vector& curl) { - curl.assign(n, Vec3{}); - - auto& scratch = vector_basis_scratch(); - fill_power_tables(xi, power_limits, scratch); - const auto& px = scratch.px; - const auto& py = scratch.py; - const auto& pz = scratch.pz; - - auto& modal_curl = scratch.vector_values; - modal_curl.assign(n, Vec3{}); - for (std::size_t p = 0; p < n; ++p) { - const auto& poly = monomials[p]; - auto& c = modal_curl[p]; - for (int t = 0; t < poly.num_terms; ++t) { - const auto& m = poly.terms[static_cast(t)]; - add_component_monomial_curl(c, m.component, m.px, m.py, m.pz, m.coefficient, px, py, pz); - } - } - - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && - sparse_coeffs.cols == n && - sparse_coeffs.row_offsets.size() == n + 1u, - "evaluate_nodal_modal_vector_curl: sparse coefficient size mismatch"); - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), - "evaluate_nodal_modal_vector_curl: sparse coefficient entry mismatch"); - for (std::size_t p = 0; p < n; ++p) { - const Vec3& cm = modal_curl[p]; - const std::size_t row_begin = sparse_coeffs.row_offsets[p]; - const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; - for (std::size_t entry = row_begin; entry < row_end; ++entry) { - const std::size_t dof = sparse_coeffs.dofs[entry]; - const Real c = sparse_coeffs.coefficients[entry]; - curl[dof][0] += c * cm[0]; - curl[dof][1] += c * cm[1]; - curl[dof][2] += c * cm[2]; - } - } -} - -template -void evaluate_nodal_modal_divergence_with_limits(const ModalPolynomials& monomials, - const SparseModalCoefficientMatrix& sparse_coeffs, - std::size_t n, - const Vec3& xi, - const std::array& power_limits, - std::vector& divergence) { - divergence.assign(n, Real(0)); - - auto& scratch = vector_basis_scratch(); - fill_power_tables(xi, power_limits, scratch); - const auto& px = scratch.px; - const auto& py = scratch.py; - const auto& pz = scratch.pz; - - auto& modal_divergence = scratch.scalars; - modal_divergence.assign(n, Real(0)); - for (std::size_t p = 0; p < n; ++p) { - const auto& poly = monomials[p]; - Real div = Real(0); - for (int t = 0; t < poly.num_terms; ++t) { - const auto& m = poly.terms[static_cast(t)]; - if (m.component == 0 && m.px > 0) { - div += m.coefficient * Real(m.px) * - px[static_cast(m.px - 1)] * - py[static_cast(m.py)] * - pz[static_cast(m.pz)]; - } else if (m.component == 1 && m.py > 0) { - div += m.coefficient * Real(m.py) * - px[static_cast(m.px)] * - py[static_cast(m.py - 1)] * - pz[static_cast(m.pz)]; - } else if (m.component == 2 && m.pz > 0) { - div += m.coefficient * Real(m.pz) * - px[static_cast(m.px)] * - py[static_cast(m.py)] * - pz[static_cast(m.pz - 1)]; - } - } - modal_divergence[p] = div; - } - - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && - sparse_coeffs.cols == n && - sparse_coeffs.row_offsets.size() == n + 1u, - "evaluate_nodal_modal_divergence: sparse coefficient size mismatch"); - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), - "evaluate_nodal_modal_divergence: sparse coefficient entry mismatch"); - for (std::size_t p = 0; p < n; ++p) { - const Real div = modal_divergence[p]; - if (div == Real(0)) { - continue; - } - const std::size_t row_begin = sparse_coeffs.row_offsets[p]; - const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; - for (std::size_t entry = row_begin; entry < row_end; ++entry) { - divergence[sparse_coeffs.dofs[entry]] += - sparse_coeffs.coefficients[entry] * div; - } - } -} - -template -void evaluate_nodal_modal_vector_strided_with_limits( - const ModalPolynomials& monomials, - const SparseModalCoefficientMatrix& sparse_coeffs, - std::size_t n, - const std::vector& points, - std::size_t output_stride, - const std::array& power_limits, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT jacobians_out, - Real* SVMP_RESTRICT curls_out, - Real* SVMP_RESTRICT divergence_out, - const char* family_name) { - const std::size_t num_qpts = points.size(); - validate_vector_strided_outputs(num_qpts, output_stride, family_name); - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.rows == n && - sparse_coeffs.cols == n && - sparse_coeffs.row_offsets.size() == n + 1u, - "evaluate_nodal_modal_vector_strided: sparse coefficient size mismatch"); - BASIS_CHECK_CONSTRUCTION(sparse_coeffs.dofs.size() == sparse_coeffs.coefficients.size(), - "evaluate_nodal_modal_vector_strided: sparse coefficient entry mismatch"); - - auto& scratch = vector_basis_scratch(); - const bool need_values = values_out != nullptr; - const bool need_jacobians = jacobians_out != nullptr; - const bool need_curls = curls_out != nullptr; - const bool need_divergence = divergence_out != nullptr; - - if (need_values) { - zero_active_strided_rows(values_out, n * 3u, output_stride, num_qpts); - } - if (need_jacobians) { - zero_active_strided_rows(jacobians_out, n * 9u, output_stride, num_qpts); - } - if (need_curls) { - zero_active_strided_rows(curls_out, n * 3u, output_stride, num_qpts); - } - if (need_divergence) { - zero_active_strided_rows(divergence_out, n, output_stride, num_qpts); - } - if (num_qpts == 0 || n == 0) { - return; - } - - fill_batched_power_tables(points, power_limits, scratch); - const auto& px = scratch.batched_px; - const auto& py = scratch.batched_py; - const auto& pz = scratch.batched_pz; - const std::size_t power_stride = num_qpts; - const bool need_modal_gradient = need_jacobians || need_curls || need_divergence; - - auto& modal_values = scratch.modal_values_batched; - auto& modal_jacobians = scratch.modal_jacobians_batched; - auto& modal_curls = scratch.modal_curls_batched; - auto& modal_divergence = scratch.modal_divergence_batched; - - for (std::size_t p = 0; p < n; ++p) { - if (need_values) { - modal_values.assign(3u * num_qpts, Real(0)); - } - if (need_jacobians) { - modal_jacobians.assign(9u * num_qpts, Real(0)); - } - if (need_curls) { - modal_curls.assign(3u * num_qpts, Real(0)); - } - if (need_divergence) { - modal_divergence.assign(num_qpts, Real(0)); - } - - const auto& poly = monomials[p]; - for (int term_index = 0; term_index < poly.num_terms; ++term_index) { - const auto& term = poly.terms[static_cast(term_index)]; - const std::size_t component = static_cast(term.component); - Real* modal_value_row = need_values - ? modal_values.data() + component * num_qpts - : nullptr; - Real* modal_jacobian_row = need_jacobians - ? modal_jacobians.data() + component * 3u * num_qpts - : nullptr; - Real* modal_curl_rows = need_curls ? modal_curls.data() : nullptr; - Real* modal_divergence_row = - need_divergence ? modal_divergence.data() : nullptr; - - if (need_values) { - for (std::size_t q = 0; q < num_qpts; ++q) { - modal_value_row[q] += - term.coefficient * - batched_power_product(px, - py, - pz, - power_stride, - term.px, - term.py, - term.pz, - q); - } - } - - if (need_modal_gradient) { - for (std::size_t q = 0; q < num_qpts; ++q) { - const Real dphidx = - term.coefficient * - batched_component_partial(px, - py, - pz, - power_stride, - term.px, - term.py, - term.pz, - 0, - q); - const Real dphidy = - term.coefficient * - batched_component_partial(px, - py, - pz, - power_stride, - term.px, - term.py, - term.pz, - 1, - q); - const Real dphidz = - term.coefficient * - batched_component_partial(px, - py, - pz, - power_stride, - term.px, - term.py, - term.pz, - 2, - q); - - if (need_jacobians) { - modal_jacobian_row[q] += dphidx; - modal_jacobian_row[num_qpts + q] += dphidy; - modal_jacobian_row[2u * num_qpts + q] += dphidz; - } - if (need_curls) { - const Vec3 curl = - curl_from_component_gradient(term.component, - dphidx, - dphidy, - dphidz); - modal_curl_rows[q] += curl[0]; - modal_curl_rows[num_qpts + q] += curl[1]; - modal_curl_rows[2u * num_qpts + q] += curl[2]; - } - if (need_divergence) { - const Real div = term.component == 0 ? dphidx - : term.component == 1 ? dphidy - : dphidz; - modal_divergence_row[q] += div; - } - } - } - } - - const std::size_t row_begin = sparse_coeffs.row_offsets[p]; - const std::size_t row_end = sparse_coeffs.row_offsets[p + 1u]; - for (std::size_t entry = row_begin; entry < row_end; ++entry) { - const std::size_t dof = sparse_coeffs.dofs[entry]; - const Real c = sparse_coeffs.coefficients[entry]; - if (need_values) { - for (std::size_t component = 0; component < 3u; ++component) { - axpy_qpoints(values_out + (dof * 3u + component) * output_stride, - modal_values.data() + component * num_qpts, - c, - num_qpts); - } - } - if (need_jacobians) { - for (std::size_t row = 0; row < 3u; ++row) { - for (std::size_t col = 0; col < 3u; ++col) { - axpy_qpoints(jacobians_out + - (dof * 9u + row * 3u + col) * output_stride, - modal_jacobians.data() + - (row * 3u + col) * num_qpts, - c, - num_qpts); - } - } - } - if (need_curls) { - for (std::size_t component = 0; component < 3u; ++component) { - axpy_qpoints(curls_out + (dof * 3u + component) * output_stride, - modal_curls.data() + component * num_qpts, - c, - num_qpts); - } - } - if (need_divergence) { - axpy_qpoints(divergence_out + dof * output_stride, - modal_divergence.data(), - c, - num_qpts); - } - } - } -} - -std::vector> make_nd_extra_monomial_candidates(ElementType type, - int order); -Real eval_transformed_nd_monomial_scalar(const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz); -Vec3 eval_transformed_nd_monomial_curl(const std::array& mono, - const std::vector& px, - const std::vector& py, - const std::vector& pz); - - -} // namespace vector_common -} // namespace detail -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_VECTORBASISEVALUATIONHELPERS_H diff --git a/Code/Source/solver/FE/Basis/VectorBasisModalPolynomial.h b/Code/Source/solver/FE/Basis/VectorBasisModalPolynomial.h deleted file mode 100644 index 6e1a7202b..000000000 --- a/Code/Source/solver/FE/Basis/VectorBasisModalPolynomial.h +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#ifndef SVMP_FE_BASIS_VECTORBASISMODALPOLYNOMIAL_H -#define SVMP_FE_BASIS_VECTORBASISMODALPOLYNOMIAL_H - -#include "Types.h" - -#include -#include -#include - -namespace svmp { -namespace FE { -namespace basis { - -struct VectorBasisModalTerm { - int component{0}; // 0=x, 1=y, 2=z - int px{0}; - int py{0}; - int pz{0}; - Real coefficient{Real(1)}; -}; - -struct VectorBasisModalPolynomial { - std::array terms{}; - int num_terms{0}; -}; - -inline bool modal_terms_equal(const VectorBasisModalTerm& lhs, - const VectorBasisModalTerm& rhs) noexcept { - return lhs.component == rhs.component && - lhs.px == rhs.px && - lhs.py == rhs.py && - lhs.pz == rhs.pz && - lhs.coefficient == rhs.coefficient; -} - -inline bool modal_polynomials_equal(const VectorBasisModalPolynomial& lhs, - const VectorBasisModalPolynomial& rhs) noexcept { - if (lhs.num_terms != rhs.num_terms) { - return false; - } - for (int term = 0; term < lhs.num_terms; ++term) { - const auto index = static_cast(term); - if (!modal_terms_equal(lhs.terms[index], rhs.terms[index])) { - return false; - } - } - return true; -} - -inline bool append_unique_modal_polynomial( - std::vector& polynomials, - const VectorBasisModalPolynomial& polynomial) { - const auto found = std::find_if( - polynomials.begin(), - polynomials.end(), - [&](const VectorBasisModalPolynomial& existing) { - return modal_polynomials_equal(existing, polynomial); - }); - if (found != polynomials.end()) { - return false; - } - polynomials.push_back(polynomial); - return true; -} - -} // namespace basis -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_BASIS_VECTORBASISMODALPOLYNOMIAL_H diff --git a/Code/Source/solver/FE/Common/Alignment.h b/Code/Source/solver/FE/Common/Alignment.h deleted file mode 100644 index 8d33a7a7a..000000000 --- a/Code/Source/solver/FE/Common/Alignment.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef SVMP_FE_CORE_ALIGNMENT_H -#define SVMP_FE_CORE_ALIGNMENT_H - -/** - * @file Alignment.h - * @brief Global alignment constants used across FE modules. - */ - -#include - -namespace svmp { -namespace FE { - -/// Preferred cache-line/SIMD alignment for performance-critical arrays. -inline constexpr std::size_t kFEPreferredAlignmentBytes = 64u; - -/// Alignment for small fixed-size math objects that are commonly passed by value. -inline constexpr std::size_t kFEFixedObjectAlignmentBytes = 32u; - -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_CORE_ALIGNMENT_H diff --git a/Code/Source/solver/FE/Common/Types.h b/Code/Source/solver/FE/Common/Types.h index 60312a524..bb3f23bca 100644 --- a/Code/Source/solver/FE/Common/Types.h +++ b/Code/Source/solver/FE/Common/Types.h @@ -70,8 +70,9 @@ enum class CellFamily { #endif } // namespace svmp #endif -#include #include +#include +#include #include #include #include @@ -174,6 +175,12 @@ constexpr BlockId INVALID_BLOCK_ID = std::numeric_limits::max(); */ constexpr FieldId CURRENT_SOLUTION_FIELD_ID = std::numeric_limits::max(); +/// Preferred cache-line/SIMD alignment for performance-critical arrays. +inline constexpr std::size_t kFEPreferredAlignmentBytes = 64u; + +/// Alignment for small fixed-size math objects that are commonly passed by value. +inline constexpr std::size_t kFEFixedObjectAlignmentBytes = 32u; + // ============================================================================ // Field Value Entry (for point evaluation of field-dependent expressions) // ============================================================================ diff --git a/Code/Source/solver/FE/Math/Matrix.h b/Code/Source/solver/FE/Math/Matrix.h index 0b80091f9..6058ab943 100644 --- a/Code/Source/solver/FE/Math/Matrix.h +++ b/Code/Source/solver/FE/Math/Matrix.h @@ -14,7 +14,7 @@ #include "MatrixExpr.h" #include "Vector.h" #include "MathConstants.h" -#include "../Common/Alignment.h" +#include "../Common/Types.h" #include #include #include diff --git a/Code/Source/solver/FE/Math/Vector.h b/Code/Source/solver/FE/Math/Vector.h index e272bd6dd..76c7be152 100644 --- a/Code/Source/solver/FE/Math/Vector.h +++ b/Code/Source/solver/FE/Math/Vector.h @@ -12,7 +12,7 @@ #include "VectorExpr.h" #include "MathConstants.h" -#include "../Common/Alignment.h" +#include "../Common/Types.h" #include #include #include diff --git a/Code/Source/solver/FE/Quadrature/QuadratureRule.h b/Code/Source/solver/FE/Quadrature/QuadratureRule.h deleted file mode 100644 index f7d186891..000000000 --- a/Code/Source/solver/FE/Quadrature/QuadratureRule.h +++ /dev/null @@ -1,237 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#ifndef SVMP_FE_QUADRATURE_RULE_H -#define SVMP_FE_QUADRATURE_RULE_H - -/** - * @file QuadratureRule.h - * @brief Abstracted quadrature rule representation for FE integration - * - * This header defines the base class for all quadrature rules used by the - * finite element infrastructure. Rules are expressed in reference element - * space only; mapping to physical space is handled by the Geometry module. - * - * The interface is intentionally lightweight and header-only to avoid coupling - * Quadrature to other modules while remaining compatible with the Mesh library - * through shared type aliases provided by FE/Common/Types.h. - */ - -#include "Types.h" -#include "FEException.h" -#include "Math/Vector.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace svmp { -namespace FE { -namespace quadrature { - -/// Convenience alias for quadrature point representation in reference space -using QuadPoint = math::Vector; - -struct QuadraturePointFingerprint { - int dimension{0}; - std::size_t num_points{0}; - std::uint64_t points_hash_a{0}; - std::uint64_t points_hash_b{0}; -}; - -/** - * @brief Base class for quadrature rules over reference elements - * - * Derived classes populate the point/weight data via the protected setters. - * The class performs lightweight consistency checks (size agreement, basic - * reference-measure validation) but leaves element-specific checks to callers. - */ -class QuadratureRule { -public: - virtual ~QuadratureRule() = default; - - /// Number of quadrature points - std::size_t num_points() const noexcept { return points_.size(); } - - /// Polynomial exactness degree reported by the rule - int order() const noexcept { return order_; } - - /// Spatial dimension of the reference domain - int dimension() const noexcept { return dimension_; } - - /// Cell family the rule integrates over (line, tri, quad, ...) - svmp::CellFamily cell_family() const noexcept { return cell_family_; } - - /// Access a single quadrature point (no bounds checking) - QuadPoint point(std::size_t i) const noexcept { return points_[i]; } - - /// Access a single quadrature weight (no bounds checking) - Real weight(std::size_t i) const noexcept { return weights_[i]; } - - /// Bulk accessors - const std::vector& points() const noexcept { return points_; } - const std::vector& weights() const noexcept { return weights_; } - - /// Cached coordinate-only fingerprint for consumers whose values depend on - /// reference points but not quadrature weights. - QuadraturePointFingerprint point_fingerprint() const noexcept { return point_fingerprint_; } - - /// Stable semantic identity used by BasisCache - virtual std::string cache_identity() const; - - /** - * @brief Validate rule data for basic consistency - * @param tol Relative tolerance for weight sum check - * @return True if rule passes size and weight checks - */ - virtual bool is_valid(Real tol = 1e-12) const; - - /** - * @brief Reference-domain measure for the element family - * - * Length/area/volume of the canonical reference element: - * - Line [-1,1] -> 2 - * - Quad [-1,1]^2 -> 4 - * - Hex [-1,1]^3 -> 8 - * - Tri (0,0)-(1,0)-(0,1) -> 0.5 - * - Tet simplex at origin -> 1/6 - * - Wedge (triangle x line) -> 1 - * - Pyramid (x,y in [-1,1], z in [0,1]) -> 4/3 - */ - Real reference_measure() const noexcept; - -protected: - QuadratureRule(svmp::CellFamily family, int dimension, int order = 0) - : cell_family_(family), dimension_(dimension), order_(order) {} - - /// Assign point and weight storage (sizes must match) - void set_data(std::vector pts, std::vector wts); - - /// Override computed order in derived classes - void set_order(int ord) noexcept { order_ = ord; } - -private: - std::string build_cache_identity() const; - QuadraturePointFingerprint build_point_fingerprint() const noexcept; - - svmp::CellFamily cell_family_; - int dimension_; - int order_; - std::vector points_; - std::vector weights_; - std::string cache_identity_; - QuadraturePointFingerprint point_fingerprint_; -}; - -// -------------------------------------------------------------------------------- -// Inline implementations -// -------------------------------------------------------------------------------- - -inline void QuadratureRule::set_data(std::vector pts, std::vector wts) { - if (pts.size() != wts.size()) { - throw FEException("QuadratureRule: points/weights size mismatch", - StatusCode::InvalidArgument, - __FILE__, __LINE__, __func__); - } - points_ = std::move(pts); - weights_ = std::move(wts); - point_fingerprint_ = build_point_fingerprint(); - cache_identity_ = build_cache_identity(); -} - -inline bool QuadratureRule::is_valid(Real tol) const { - if (points_.empty() || points_.size() != weights_.size()) { - return false; - } - Real sum_w = Real(0); - for (Real w : weights_) { - if (!std::isfinite(w)) { - return false; - } - sum_w += w; - } - const Real ref = reference_measure(); - const Real denom = std::max(Real(1), std::abs(ref)); - return std::abs(sum_w - ref) <= tol * denom; -} - -inline std::string QuadratureRule::cache_identity() const { - if (!cache_identity_.empty()) { - return cache_identity_; - } - return build_cache_identity(); -} - -inline std::string QuadratureRule::build_cache_identity() const { - std::ostringstream oss; - oss << "dim=" << dimension_ - << "|npts=" << points_.size(); - - oss << std::setprecision(std::numeric_limits::max_digits10); - for (const auto& pt : points_) { - oss << "|pt=" << pt[0] << ',' << pt[1] << ',' << pt[2]; - } - return oss.str(); -} - -inline QuadraturePointFingerprint QuadratureRule::build_point_fingerprint() const noexcept { - auto real_bits = [](Real value) noexcept { - static_assert(sizeof(Real) <= sizeof(std::uint64_t), - "Quadrature point fingerprints assume Real fits in 64 bits"); - std::uint64_t bits = 0; - std::memcpy(&bits, &value, sizeof(Real)); - return bits; - }; - auto mix_hash = [](std::uint64_t& seed, std::uint64_t value) noexcept { - seed ^= value + 0x9e3779b97f4a7c15ULL + (seed << 6u) + (seed >> 2u); - }; - - QuadraturePointFingerprint fingerprint; - fingerprint.dimension = dimension_; - fingerprint.num_points = points_.size(); - fingerprint.points_hash_a = 1469598103934665603ULL; - fingerprint.points_hash_b = 1099511628211ULL; - - mix_hash(fingerprint.points_hash_a, static_cast(fingerprint.dimension)); - mix_hash(fingerprint.points_hash_a, static_cast(fingerprint.num_points)); - mix_hash(fingerprint.points_hash_b, static_cast(fingerprint.num_points)); - mix_hash(fingerprint.points_hash_b, static_cast(fingerprint.dimension)); - for (const auto& point : points_) { - for (std::size_t component = 0; component < 3u; ++component) { - const std::uint64_t bits = real_bits(point[component]); - mix_hash(fingerprint.points_hash_a, bits); - mix_hash(fingerprint.points_hash_b, bits ^ (0xbf58476d1ce4e5b9ULL + component)); - } - } - return fingerprint; -} - -inline Real QuadratureRule::reference_measure() const noexcept { - switch (cell_family_) { - case svmp::CellFamily::Line: return Real(2); - case svmp::CellFamily::Quad: return Real(4); - case svmp::CellFamily::Hex: return Real(8); - case svmp::CellFamily::Triangle: return Real(0.5); - case svmp::CellFamily::Tetra: return Real(1.0 / 6.0); - case svmp::CellFamily::Wedge: return Real(1.0); // 0.5 area * length 2 - case svmp::CellFamily::Pyramid: return Real(4.0 / 3.0); - case svmp::CellFamily::Point: return Real(1.0); - default: return Real(1.0); - } -} - -} // namespace quadrature -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_QUADRATURE_RULE_H diff --git a/Code/Source/solver/Timer.h b/Code/Source/solver/Timer.h index 6810ae17c..b8ffa29df 100644 --- a/Code/Source/solver/Timer.h +++ b/Code/Source/solver/Timer.h @@ -4,28 +4,24 @@ #ifndef TIMER_H #define TIMER_H -#include -#include -#include +#include /// @brief Keep track of time class Timer { public: - double get_elapsed_time() + double get_elapsed_time() const { return get_time() - current_time; } - double get_time() + double get_time() const { - auto now = std::chrono::system_clock::now(); - auto now_ms = std::chrono::time_point_cast(now); - - auto value = now_ms.time_since_epoch(); - auto duration = value.count() / 1000.0; - return static_cast(duration); + timeval now{}; + gettimeofday(&now, nullptr); + return static_cast(now.tv_sec) + + static_cast(now.tv_usec) * 1.0e-6; } void set_time() @@ -33,8 +29,7 @@ class Timer current_time = get_time(); } - double current_time; + double current_time{0.0}; }; #endif - diff --git a/Code/Source/solver/load_msh.cpp b/Code/Source/solver/load_msh.cpp index c7c5a62ba..50d0ca858 100644 --- a/Code/Source/solver/load_msh.cpp +++ b/Code/Source/solver/load_msh.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -300,4 +299,3 @@ void read_sv(Simulation* simulation, mshType& mesh, const MeshParameters* mesh_p } } }; - diff --git a/Code/Source/solver/utils.cpp b/Code/Source/solver/utils.cpp index 4d5b847cd..233d35474 100644 --- a/Code/Source/solver/utils.cpp +++ b/Code/Source/solver/utils.cpp @@ -4,7 +4,6 @@ #include "utils.h" #include -#include #include #include @@ -13,6 +12,7 @@ #include #include #include +#include /* MacOS #include @@ -35,12 +35,10 @@ int CountBits(int n) double cput() { - auto now = std::chrono::system_clock::now(); - auto now_ms = std::chrono::time_point_cast(now); - - auto value = now_ms.time_since_epoch(); - auto duration = value.count() / 1000.0; - return static_cast(duration); + timeval now{}; + gettimeofday(&now, nullptr); + return static_cast(now.tv_sec) + + static_cast(now.tv_usec) * 1.0e-6; } Vector @@ -386,4 +384,4 @@ void find_loc(const Array& array, int value, std::array& ind) } } -}; \ No newline at end of file +}; diff --git a/Code/ThirdParty/eigen/include/eigen3/unsupported/Eigen/CXX11/Tensor b/Code/ThirdParty/eigen/include/eigen3/unsupported/Eigen/CXX11/Tensor index 0938bb554..45b176fe7 100644 --- a/Code/ThirdParty/eigen/include/eigen3/unsupported/Eigen/CXX11/Tensor +++ b/Code/ThirdParty/eigen/include/eigen3/unsupported/Eigen/CXX11/Tensor @@ -34,7 +34,9 @@ */ #include +#ifdef EIGEN_USE_GPU #include +#endif #include #include #include diff --git a/tests/unitTests/FE/Basis/test_BasisCacheFactory.cpp b/tests/unitTests/FE/Basis/test_BasisCacheFactory.cpp deleted file mode 100644 index 216fd0401..000000000 --- a/tests/unitTests/FE/Basis/test_BasisCacheFactory.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/** - * @file test_BasisCacheFactory.cpp - * @brief Tests for the migrated Basis cache and factory subset. - */ - -#include - -#include "FE/Basis/BasisCache.h" -#include "FE/Basis/BasisFactory.h" -#include "FE/Basis/LagrangeBasis.h" -#include "FE/Basis/SerendipityBasis.h" -#include "FE/Quadrature/QuadratureRule.h" - -#include -#include - -using namespace svmp::FE; -using namespace svmp::FE::basis; -using namespace svmp::FE::quadrature; - -namespace { - -class CustomQuadratureRule final : public QuadratureRule { -public: - CustomQuadratureRule(svmp::CellFamily family, - int dimension, - int order, - std::vector points, - std::vector weights) - : QuadratureRule(family, dimension, order) - { - set_data(std::move(points), std::move(weights)); - } -}; - -CustomQuadratureRule line_rule() { - return CustomQuadratureRule( - svmp::CellFamily::Line, 1, 3, - { - QuadPoint{Real(-0.5), Real(0), Real(0)}, - QuadPoint{Real(0.5), Real(0), Real(0)} - }, - {Real(1), Real(1)}); -} - -CustomQuadratureRule quad_rule(Real first_weight = Real(1)) { - return CustomQuadratureRule( - svmp::CellFamily::Quad, 2, 3, - { - QuadPoint{Real(-0.5), Real(-0.5), Real(0)}, - QuadPoint{Real(0.5), Real(-0.25), Real(0)}, - QuadPoint{Real(0.0), Real(0.5), Real(0)} - }, - {first_weight, Real(1), Real(2)}); -} - -class TestCustomScalarBasis final : public BasisFunction { -public: - explicit TestCustomScalarBasis(int tag) - : tag_(tag) - { - } - - BasisType basis_type() const noexcept override { return BasisType::Custom; } - ElementType element_type() const noexcept override { return ElementType::Line2; } - int dimension() const noexcept override { return 1; } - int order() const noexcept override { return 1; } - std::size_t size() const noexcept override { return 2u; } - - std::string cache_identity() const override { - return BasisFunction::cache_identity() + "|tag=" + std::to_string(tag_); - } - - void evaluate_values(const math::Vector& xi, - std::vector& values) const override - { - values.resize(2u); - const Real shift = Real(tag_) * Real(0.125); - values[0] = Real(0.5) * (Real(1) - xi[0]) + shift; - values[1] = Real(0.5) * (Real(1) + xi[0]) - shift; - } - - void evaluate_gradients(const math::Vector&, - std::vector& gradients) const override - { - gradients.assign(2u, Gradient{}); - gradients[0][0] = Real(-0.5); - gradients[1][0] = Real(0.5); - } - -private: - int tag_{0}; -}; - -class StructuredIdentityScalarBasis final : public BasisFunction { -public: - explicit StructuredIdentityScalarBasis(int tag) - : tag_(tag) - { - } - - BasisType basis_type() const noexcept override { return BasisType::Custom; } - ElementType element_type() const noexcept override { return ElementType::Line2; } - int dimension() const noexcept override { return 1; } - int order() const noexcept override { return 1; } - std::size_t size() const noexcept override { return 2u; } - - bool cache_identity_words(std::vector& words) const override { - words.push_back(0x7374727563746964ULL); - words.push_back(static_cast(tag_)); - return true; - } - - std::string cache_identity() const override { - ++string_identity_calls; - return BasisFunction::cache_identity() + "|structured-tag=" + std::to_string(tag_); - } - - void evaluate_values(const math::Vector& xi, - std::vector& values) const override - { - values.resize(2u); - values[0] = Real(1) - xi[0] + Real(tag_); - values[1] = xi[0] - Real(tag_); - } - - mutable std::size_t string_identity_calls{0}; - -private: - int tag_{0}; -}; - -} // namespace - -TEST(BasisFactory, CreatesLagrangeAndSerendipityBases) { - auto lagrange = basis_factory::create( - BasisRequest{ElementType::Line2, BasisType::Lagrange, 2}); - ASSERT_NE(lagrange, nullptr); - EXPECT_EQ(lagrange->basis_type(), BasisType::Lagrange); - EXPECT_EQ(lagrange->element_type(), ElementType::Line2); - EXPECT_EQ(lagrange->order(), 2); - - auto serendipity = basis_factory::create( - BasisRequest{ElementType::Quad8, BasisType::Serendipity, 2}); - ASSERT_NE(serendipity, nullptr); - EXPECT_EQ(serendipity->basis_type(), BasisType::Serendipity); - EXPECT_EQ(serendipity->element_type(), ElementType::Quad8); - EXPECT_EQ(serendipity->size(), 8u); -} - -TEST(BasisFactory, RejectsOutOfScopeAndInvalidRequests) { - EXPECT_THROW( - (void)basis_factory::create(BasisRequest{ElementType::Line2, BasisType::Lagrange}), - BasisConfigurationException); - EXPECT_THROW( - (void)basis_factory::create( - BasisRequest{ElementType::Line2, BasisType::Lagrange, -1}), - BasisConfigurationException); - EXPECT_THROW( - (void)basis_factory::create( - BasisRequest{ElementType::Line2, BasisType::Bernstein, 1}), - BasisConfigurationException); - EXPECT_THROW( - (void)basis_factory::create( - BasisRequest{ElementType::Line2, - BasisType::Lagrange, - 1, - Continuity::H_div, - FieldType::Vector}), - BasisConfigurationException); -} - -TEST(BasisFactory, SupportsCustomFactoryRegistration) { - basis_factory::clear_custom_registry_for_tests(); - basis_factory::register_custom( - "test-custom", - [](const BasisRequest& req) { - const int tag = req.order.value_or(0); - return std::make_shared(tag); - }); - - BasisRequest req{ElementType::Line2, BasisType::Custom, 7}; - req.custom_id = "test-custom"; - auto custom = basis_factory::create(req); - ASSERT_NE(custom, nullptr); - EXPECT_EQ(custom->basis_type(), BasisType::Custom); - EXPECT_EQ(custom->size(), 2u); - - basis_factory::unregister_custom("test-custom"); - EXPECT_THROW((void)basis_factory::create(req), BasisConfigurationException); - basis_factory::clear_custom_registry_for_tests(); -} - -TEST(BasisCache, ReusesEntriesForSameBasisAndQuadratureCoordinates) { - LagrangeBasis basis(ElementType::Line2, 2); - const auto quad = line_rule(); - - auto& cache = BasisCache::instance(); - cache.clear(); - const auto& entry1 = cache.get_or_compute(basis, quad, true, true); - const auto& entry2 = cache.get_or_compute(basis, quad, true, true); - - EXPECT_EQ(&entry1, &entry2); - EXPECT_EQ(entry1.num_qpts, quad.num_points()); - EXPECT_EQ(entry1.num_dofs, basis.size()); - ASSERT_EQ(entry1.scalar_values.size(), basis.size() * quad.num_points()); - ASSERT_EQ(entry1.gradients.size(), basis.size() * 3u * quad.num_points()); - ASSERT_EQ(entry1.hessians.size(), basis.size() * 9u * quad.num_points()); - EXPECT_EQ(cache.size(), 1u); -} - -TEST(BasisCache, ReusesCoordinateIdenticalQuadratureRulesIgnoringWeights) { - SerendipityBasis basis(ElementType::Quad8, 2); - const auto quad_a = quad_rule(Real(1)); - const auto quad_b = quad_rule(Real(0.25)); - - auto& cache = BasisCache::instance(); - cache.clear(); - const auto& entry_a = cache.get_or_compute(basis, quad_a, true, false); - const auto& entry_b = cache.get_or_compute(basis, quad_b, true, false); - - EXPECT_EQ(&entry_a, &entry_b); - EXPECT_EQ(cache.size(), 1u); -} - -TEST(BasisCache, SeparatesStringIdentityCustomBases) { - TestCustomScalarBasis custom_a(1); - TestCustomScalarBasis custom_b(2); - const auto quad = line_rule(); - - auto& cache = BasisCache::instance(); - cache.clear(); - const auto& entry_a = cache.get_or_compute(custom_a, quad, false, false); - const auto& entry_b = cache.get_or_compute(custom_b, quad, false, false); - - EXPECT_NE(&entry_a, &entry_b); - EXPECT_NE(entry_a.scalar_values, entry_b.scalar_values); - EXPECT_EQ(cache.size(), 2u); -} - -TEST(BasisCache, StructuredIdentityAvoidsStringFallbackAndSeparatesBases) { - StructuredIdentityScalarBasis custom_a(1); - StructuredIdentityScalarBasis custom_b(2); - const auto quad = line_rule(); - - auto& cache = BasisCache::instance(); - cache.clear(); - const auto& entry_a = cache.get_or_compute(custom_a, quad, false, false); - const auto& entry_b = cache.get_or_compute(custom_b, quad, false, false); - - EXPECT_NE(&entry_a, &entry_b); - EXPECT_EQ(custom_a.string_identity_calls, 0u); - EXPECT_EQ(custom_b.string_identity_calls, 0u); - EXPECT_EQ(cache.size(), 2u); -} - diff --git a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp index 967f078aa..7838702b0 100644 --- a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp +++ b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp @@ -21,7 +21,7 @@ namespace { class MinimalScalarBasis : public BasisFunction { public: - BasisType basis_type() const noexcept override { return BasisType::Custom; } + BasisType basis_type() const noexcept override { return BasisType::Lagrange; } ElementType element_type() const noexcept override { return ElementType::Line2; } int dimension() const noexcept override { return 1; } int order() const noexcept override { return 1; } @@ -36,7 +36,7 @@ class MinimalScalarBasis : public BasisFunction { class CompleteFallbackBasis : public BasisFunction { public: - BasisType basis_type() const noexcept override { return BasisType::Custom; } + BasisType basis_type() const noexcept override { return BasisType::Lagrange; } ElementType element_type() const noexcept override { return ElementType::Triangle3; } int dimension() const noexcept override { return 2; } int order() const noexcept override { return 1; } @@ -90,6 +90,8 @@ TEST(BasisErrorPaths, SerendipityInvalidRequestsThrowBasisExceptions) { BasisElementCompatibilityException); EXPECT_THROW(SerendipityBasis(ElementType::Quad8, 3), BasisConfigurationException); + EXPECT_THROW(SerendipityBasis(ElementType::Pyramid13, 2), + BasisElementCompatibilityException); EXPECT_THROW(SerendipityBasis(ElementType::Pyramid14, 2), BasisElementCompatibilityException); } @@ -104,6 +106,13 @@ TEST(BasisErrorPaths, BasisFactoryInvalidRequestsThrowBasisExceptions) { EXPECT_THROW((void)basis_factory::create( BasisRequest{ElementType::Line2, BasisType::Bernstein, 1}), BasisConfigurationException); + EXPECT_THROW((void)basis_factory::create( + BasisRequest{ElementType::Pyramid5, BasisType::Lagrange, 1}), + BasisElementCompatibilityException); + + BasisRequest vector_req{ElementType::Line2, BasisType::Lagrange, 1}; + vector_req.field_type = FieldType::Vector; + EXPECT_THROW((void)basis_factory::create(vector_req), BasisConfigurationException); auto serendipity = basis_factory::create( BasisRequest{ElementType::Quad8, BasisType::Serendipity, 2}); @@ -130,6 +139,8 @@ TEST(BasisErrorPaths, NodeOrderingInvalidNodeThrows) { BasisNodeOrderingException); EXPECT_THROW((void)ReferenceNodeLayout::get_lagrange_node_coords(ElementType::Quad8, 2), BasisNodeOrderingException); + EXPECT_THROW((void)ReferenceNodeLayout::num_nodes(ElementType::Pyramid5), + BasisNodeOrderingException); } TEST(BasisErrorPaths, BasisFunctionDefaultsThrowForMissingDerivatives) { @@ -142,25 +153,22 @@ TEST(BasisErrorPaths, BasisFunctionDefaultsThrowForMissingDerivatives) { EXPECT_THROW(basis.evaluate_hessians(xi, hessians), BasisEvaluationException); } -TEST(BasisErrorPaths, BasisFunctionFallbackWritesFlatAndStridedLayouts) { +TEST(BasisErrorPaths, BasisFunctionFallbackWritesRawLayouts) { CompleteFallbackBasis basis; - const std::vector> points = { - {Real(0.25), Real(0.5), Real(-0.25)}, - {Real(-0.5), Real(0.75), Real(0.125)} - }; - prewarm_basis_function_scratch(basis.size(), points.size()); + const math::Vector point{Real(0.25), Real(0.5), Real(-0.25)}; + prewarm_basis_function_scratch(basis.size()); std::vector flat_values(basis.size()); std::vector flat_gradients(basis.size() * 3u); std::vector flat_hessians(basis.size() * 9u); - basis.evaluate_values_to(points.front(), flat_values.data()); - basis.evaluate_gradients_to(points.front(), flat_gradients.data()); - basis.evaluate_hessians_to(points.front(), flat_hessians.data()); + basis.evaluate_values_to(point, flat_values.data()); + basis.evaluate_gradients_to(point, flat_gradients.data()); + basis.evaluate_hessians_to(point, flat_hessians.data()); std::vector expected_values; std::vector expected_gradients; std::vector expected_hessians; - basis.evaluate_all(points.front(), expected_values, expected_gradients, expected_hessians); + basis.evaluate_all(point, expected_values, expected_gradients, expected_hessians); for (std::size_t d = 0; d < basis.size(); ++d) { EXPECT_EQ(flat_values[d], expected_values[d]); for (std::size_t c = 0; c < 3u; ++c) { @@ -172,32 +180,4 @@ TEST(BasisErrorPaths, BasisFunctionFallbackWritesFlatAndStridedLayouts) { } } } - - constexpr std::size_t output_stride = 3u; - std::vector values(basis.size() * output_stride, Real(-99)); - std::vector gradients(basis.size() * 3u * output_stride, Real(-99)); - std::vector hessians(basis.size() * 9u * output_stride, Real(-99)); - basis.evaluate_at_quadrature_points_strided( - points, output_stride, values.data(), gradients.data(), hessians.data()); - - for (std::size_t q = 0; q < points.size(); ++q) { - basis.evaluate_all(points[q], expected_values, expected_gradients, expected_hessians); - for (std::size_t d = 0; d < basis.size(); ++d) { - EXPECT_EQ(values[d * output_stride + q], expected_values[d]); - for (std::size_t c = 0; c < 3u; ++c) { - EXPECT_EQ(gradients[(d * 3u + c) * output_stride + q], - expected_gradients[d][c]); - } - for (std::size_t r = 0; r < 3u; ++r) { - for (std::size_t c = 0; c < 3u; ++c) { - EXPECT_EQ(hessians[(d * 9u + r * 3u + c) * output_stride + q], - expected_hessians[d](r, c)); - } - } - } - } - - for (std::size_t d = 0; d < basis.size(); ++d) { - EXPECT_EQ(values[d * output_stride + 2u], Real(-99)); - } } diff --git a/tests/unitTests/FE/Basis/test_BasisHessians.cpp b/tests/unitTests/FE/Basis/test_BasisHessians.cpp index 0899ce358..f786b07cd 100644 --- a/tests/unitTests/FE/Basis/test_BasisHessians.cpp +++ b/tests/unitTests/FE/Basis/test_BasisHessians.cpp @@ -61,8 +61,6 @@ std::vector> sample_points_for(ElementType type) { return {{Real(0.1), Real(-0.2), Real(0.3)}, {Real(-0.35), Real(0.25), Real(-0.15)}}; case ElementType::Wedge6: return {{Real(0.18), Real(0.22), Real(-0.2)}, {Real(0.12), Real(0.16), Real(0.1)}}; - case ElementType::Pyramid5: - return {{Real(0.0), Real(0.0), Real(0.2)}, {Real(0.12), Real(-0.08), Real(0.24)}}; default: return {{Real(0), Real(0), Real(0)}}; } @@ -200,8 +198,6 @@ TEST(BasisHessians, LagrangeCanonicalTopologiesMatchNumericalHessians) { {ElementType::Tetra4, 2, Real(1e-6), Real(1e-5)}, {ElementType::Hex8, 2, Real(1e-6), Real(1e-5)}, {ElementType::Wedge6, 2, Real(1e-5), Real(1e-5)}, - {ElementType::Pyramid5, 1, Real(2e-6), Real(1e-5)}, - {ElementType::Pyramid5, 3, Real(4e-4), Real(2e-5)}, }; for (const auto& c : cases) { @@ -223,7 +219,6 @@ TEST(BasisHessians, LagrangeHessiansSumToZeroAndAreSymmetric) { {ElementType::Tetra4, 2, {Real(0.15), Real(0.2), Real(0.1)}, Real(1e-10)}, {ElementType::Hex8, 2, {Real(0.1), Real(-0.2), Real(0.3)}, Real(1e-12)}, {ElementType::Wedge6, 2, {Real(0.2), Real(0.15), Real(-0.3)}, Real(1e-10)}, - {ElementType::Pyramid5, 1, {Real(0.1), Real(-0.2), Real(0.3)}, Real(1e-8)}, }; for (const auto& c : cases) { @@ -233,25 +228,6 @@ TEST(BasisHessians, LagrangeHessiansSumToZeroAndAreSymmetric) { } } -TEST(BasisHessians, LagrangePyramidExactApexHessianThrows) { - const struct Case { - ElementType type; - int order; - } cases[] = { - {ElementType::Pyramid5, 1}, - {ElementType::Pyramid14, 2}, - {ElementType::Pyramid5, 4}, - }; - - const math::Vector apex{Real(0), Real(0), Real(1)}; - for (const auto& c : cases) { - LagrangeBasis basis(c.type, c.order); - std::vector hessians; - EXPECT_THROW(basis.evaluate_hessians(apex, hessians), BasisEvaluationException) - << "order " << c.order; - } -} - TEST(BasisHessians, SerendipityHessiansSumToZeroAndAreSymmetric) { const struct Case { ElementType type; @@ -262,7 +238,6 @@ TEST(BasisHessians, SerendipityHessiansSumToZeroAndAreSymmetric) { {ElementType::Quad8, 2, {Real(0.17), Real(-0.31), Real(0)}, Real(1e-10)}, {ElementType::Hex20, 2, {Real(0.2), Real(-0.1), Real(0.3)}, Real(1e-10)}, {ElementType::Wedge15, 2, {Real(0.2), Real(0.3), Real(0.1)}, Real(1e-10)}, - {ElementType::Pyramid13, 2, {Real(0.1), Real(-0.2), Real(0.4)}, Real(1e-8)}, }; for (const auto& c : cases) { @@ -272,13 +247,6 @@ TEST(BasisHessians, SerendipityHessiansSumToZeroAndAreSymmetric) { } } -TEST(BasisHessians, SerendipityPyramidExactApexHessianThrows) { - SerendipityBasis basis(ElementType::Pyramid13, 2); - std::vector hessians; - EXPECT_THROW(basis.evaluate_hessians({Real(0), Real(0), Real(1)}, hessians), - BasisEvaluationException); -} - TEST(BasisHessians, SolverMappedVolumeSelectionsSatisfyInvariants) { const struct Case { ElementType type; diff --git a/tests/unitTests/FE/Basis/test_ConstexprBasis.cpp b/tests/unitTests/FE/Basis/test_ConstexprBasis.cpp index a1031fa76..44e588fdc 100644 --- a/tests/unitTests/FE/Basis/test_ConstexprBasis.cpp +++ b/tests/unitTests/FE/Basis/test_ConstexprBasis.cpp @@ -1,21 +1,16 @@ /** * @file test_ConstexprBasis.cpp - * @brief Compile-time and lightweight runtime checks for migrated Basis helpers. + * @brief Compile-time and lightweight runtime checks for reduced Basis helpers. */ -#include "FE/Basis/BasisTolerance.h" +#include "FE/Basis/BasisExceptions.h" #include "FE/Basis/BasisTraits.h" -#include "FE/Basis/LagrangeBasis.h" -#include "FE/Basis/LagrangeBasisFast.h" #include "FE/Basis/NodeOrderingConventions.h" #include -#include #include #include -#include -#include #include namespace svmp { @@ -30,26 +25,27 @@ static_assert(is_quadrilateral(ElementType::Quad8)); static_assert(is_tetrahedron(ElementType::Tetra10)); static_assert(is_hexahedron(ElementType::Hex20)); static_assert(is_wedge(ElementType::Wedge18)); -static_assert(is_pyramid(ElementType::Pyramid14)); +static_assert(!is_pyramid(ElementType::Pyramid5)); +static_assert(!is_pyramid(ElementType::Pyramid14)); static_assert(is_simplex(ElementType::Triangle3)); static_assert(is_simplex(ElementType::Tetra4)); static_assert(!is_simplex(ElementType::Wedge6)); static_assert(is_tensor_product(ElementType::Line2)); static_assert(is_tensor_product(ElementType::Quad9)); static_assert(is_tensor_product(ElementType::Hex27)); -static_assert(!is_tensor_product(ElementType::Pyramid5)); -static_assert(reference_dimension(ElementType::Pyramid14) == 3); +static_assert(!is_tensor_product(ElementType::Wedge6)); +static_assert(topology(ElementType::Pyramid5) == BasisTopology::Unknown); static_assert(canonical_lagrange_type(ElementType::Hex27) == ElementType::Hex8); static_assert(canonical_lagrange_type(ElementType::Pyramid13) == ElementType::Pyramid13); static_assert(complete_lagrange_alias_order(ElementType::Wedge18) == 2); -static_assert(complete_lagrange_alias_order(ElementType::Hex20) == -1); +static_assert(complete_lagrange_alias_order(ElementType::Pyramid14) == -1); static_assert(line_lagrange_size(2) == 3u); static_assert(triangle_lagrange_size(2) == 6u); static_assert(quad_lagrange_size(2) == 9u); static_assert(tetra_lagrange_size(2) == 10u); static_assert(hex_lagrange_size(2) == 27u); static_assert(wedge_lagrange_size(2) == 18u); -static_assert(pyramid_lagrange_size(2) == 14u); +static_assert(complete_lagrange_alias_size(ElementType::Pyramid14) == 0u); static_assert(detail::basis_abs(Real(-2)) == Real(2)); static_assert(detail::basis_max(Real(2), Real(3)) == Real(3)); static_assert(detail::basis_near_zero(std::numeric_limits::epsilon() * Real(32))); @@ -57,73 +53,7 @@ static_assert(detail::basis_nearly_equal( Real(1), Real(1) + std::numeric_limits::epsilon() * Real(32))); -constexpr auto kLineFastValues = [] { - math::Vector xi{Real(0), Real(0), Real(0)}; - std::array::n_dofs> values{}; - LagrangeLineFast<1>::evaluate(xi, values); - return values; -}(); -static_assert(kLineFastValues[0] == Real(0.5)); -static_assert(kLineFastValues[1] == Real(0.5)); - -constexpr auto kLineP2FastHessians = [] { - math::Vector xi{Real(0), Real(0), Real(0)}; - std::array::n_dofs> hessians{}; - LagrangeLineFast<2>::evaluate_hessians(xi, hessians); - return hessians; -}(); -static_assert(kLineP2FastHessians[0](0, 0) == Real(1)); -static_assert(kLineP2FastHessians[1](0, 0) == Real(1)); -static_assert(kLineP2FastHessians[2](0, 0) == Real(-2)); - -constexpr auto kTriP2FastValues = [] { - math::Vector xi{Real(0.25), Real(0.25), Real(0)}; - std::array::n_dofs> values{}; - LagrangeTriFast<2>::evaluate(xi, values); - return values; -}(); -static_assert(kTriP2FastValues[0] == Real(0)); -static_assert(kTriP2FastValues[3] == Real(0.5)); -static_assert(kTriP2FastValues[4] == Real(0.25)); - -template -constexpr bool overrides_scalar_strided_v = - !std::is_same_v; - -template -void expect_fast_matches_lagrange(ElementType type, - int order, - const std::vector>& points) -{ - LagrangeBasis basis(type, order); - for (const auto& xi : points) { - std::vector expected_values; - std::vector expected_gradients; - std::vector expected_hessians; - basis.evaluate_all(xi, expected_values, expected_gradients, expected_hessians); - - std::array values{}; - std::array gradients{}; - std::array hessians{}; - FastBasis::evaluate(xi, values); - FastBasis::evaluate_gradients(xi, gradients); - FastBasis::evaluate_hessians(xi, hessians); - - ASSERT_EQ(expected_values.size(), values.size()); - for (std::size_t i = 0; i < values.size(); ++i) { - EXPECT_NEAR(values[i], expected_values[i], Real(1e-14)); - for (std::size_t d = 0; d < 3u; ++d) { - EXPECT_NEAR(gradients[i][d], expected_gradients[i][d], Real(1e-14)); - for (std::size_t e = 0; e < 3u; ++e) { - EXPECT_NEAR(hessians[i](d, e), expected_hessians[i](d, e), Real(1e-14)); - } - } - } - } -} - -TEST(ConstexprBasis, FixedNodeTableSizes) { +TEST(ConstexprBasis, FixedNodeTableSizesForSupportedLayouts) { const std::vector> expected = { {ElementType::Line2, 2u}, {ElementType::Line3, 3u}, @@ -140,9 +70,6 @@ TEST(ConstexprBasis, FixedNodeTableSizes) { {ElementType::Wedge6, 6u}, {ElementType::Wedge15, 15u}, {ElementType::Wedge18, 18u}, - {ElementType::Pyramid5, 5u}, - {ElementType::Pyramid13, 13u}, - {ElementType::Pyramid14, 14u}, }; for (const auto& [type, size] : expected) { @@ -150,7 +77,7 @@ TEST(ConstexprBasis, FixedNodeTableSizes) { } } -TEST(ConstexprBasis, BasisToleranceScalesWithRealPrecision) { +TEST(ConstexprBasis, TraitToleranceScalesWithRealPrecision) { const Real eps = std::numeric_limits::epsilon(); EXPECT_GT(detail::basis_scaled_tolerance(), eps); EXPECT_TRUE(detail::basis_near_zero(eps * Real(32))); @@ -159,37 +86,6 @@ TEST(ConstexprBasis, BasisToleranceScalesWithRealPrecision) { EXPECT_FALSE(detail::basis_nearly_equal(Real(1), Real(1) + eps * Real(128))); } -TEST(ConstexprBasis, LagrangeOverridesStridedEvaluation) { - EXPECT_TRUE(overrides_scalar_strided_v); -} - -TEST(ConstexprBasis, FastSidecarsMatchRuntimeLagrangeBasis) { - expect_fast_matches_lagrange>( - ElementType::Line2, 1, - {{Real(-0.2), Real(0), Real(0)}, {Real(0.35), Real(0), Real(0)}}); - expect_fast_matches_lagrange>( - ElementType::Line2, 2, - {{Real(-0.2), Real(0), Real(0)}, {Real(0.35), Real(0), Real(0)}}); - expect_fast_matches_lagrange>( - ElementType::Quad4, 1, - {{Real(-0.2), Real(0.3), Real(0)}, {Real(0.35), Real(-0.45), Real(0)}}); - expect_fast_matches_lagrange>( - ElementType::Hex8, 1, - {{Real(-0.2), Real(0.3), Real(0.1)}, {Real(0.35), Real(-0.45), Real(0.25)}}); - expect_fast_matches_lagrange>( - ElementType::Triangle3, 1, - {{Real(0.2), Real(0.3), Real(0)}, {Real(0.1), Real(0.6), Real(0)}}); - expect_fast_matches_lagrange>( - ElementType::Triangle3, 2, - {{Real(0.2), Real(0.3), Real(0)}, {Real(0.1), Real(0.6), Real(0)}}); - expect_fast_matches_lagrange>( - ElementType::Tetra4, 1, - {{Real(0.2), Real(0.3), Real(0.1)}, {Real(0.1), Real(0.2), Real(0.4)}}); - expect_fast_matches_lagrange>( - ElementType::Tetra4, 2, - {{Real(0.2), Real(0.3), Real(0.1)}, {Real(0.1), Real(0.2), Real(0.4)}}); -} - TEST(ConstexprBasis, CompleteAliasTablesMatchGeneratedLagrangeNodes) { const std::vector> aliases = { {ElementType::Line2, ElementType::Line2, 1}, @@ -204,8 +100,6 @@ TEST(ConstexprBasis, CompleteAliasTablesMatchGeneratedLagrangeNodes) { {ElementType::Hex27, ElementType::Hex8, 2}, {ElementType::Wedge6, ElementType::Wedge6, 1}, {ElementType::Wedge18, ElementType::Wedge6, 2}, - {ElementType::Pyramid5, ElementType::Pyramid5, 1}, - {ElementType::Pyramid14, ElementType::Pyramid5, 2}, }; for (const auto& [alias, canonical_type, order] : aliases) { @@ -220,6 +114,15 @@ TEST(ConstexprBasis, CompleteAliasTablesMatchGeneratedLagrangeNodes) { } } +TEST(ConstexprBasis, PyramidNodeOrderingIsOutsideCurrentScope) { + EXPECT_THROW((void)ReferenceNodeLayout::num_nodes(ElementType::Pyramid5), + BasisNodeOrderingException); + EXPECT_THROW((void)ReferenceNodeLayout::num_nodes(ElementType::Pyramid13), + BasisNodeOrderingException); + EXPECT_THROW((void)ReferenceNodeLayout::get_lagrange_node_coords(ElementType::Pyramid5, 1), + BasisNodeOrderingException); +} + } // namespace } // namespace basis } // namespace FE diff --git a/tests/unitTests/FE/Basis/test_HigherOrderWedgePyramid.cpp b/tests/unitTests/FE/Basis/test_HigherOrderWedge.cpp similarity index 64% rename from tests/unitTests/FE/Basis/test_HigherOrderWedgePyramid.cpp rename to tests/unitTests/FE/Basis/test_HigherOrderWedge.cpp index 26efc4070..3faffd9e0 100644 --- a/tests/unitTests/FE/Basis/test_HigherOrderWedgePyramid.cpp +++ b/tests/unitTests/FE/Basis/test_HigherOrderWedge.cpp @@ -1,6 +1,6 @@ /** - * @file test_HigherOrderWedgePyramid.cpp - * @brief Focused higher-order wedge and pyramid checks for LagrangeBasis. + * @file test_HigherOrderWedge.cpp + * @brief Focused higher-order wedge checks for LagrangeBasis. */ #include @@ -9,8 +9,6 @@ #include "FE/Basis/NodeOrderingConventions.h" #include -#include -#include #include using namespace svmp::FE; @@ -107,28 +105,18 @@ void expect_all_entries_finite(const LagrangeBasis& basis, } // namespace -TEST(HigherOrderWedgePyramid, CompleteAliasesMatchGeneratedNodeLayouts) { - const std::vector> cases = { - {ElementType::Wedge18, ElementType::Wedge6, 2}, - {ElementType::Pyramid14, ElementType::Pyramid5, 2}, - }; - - for (const auto& [alias, canonical, order] : cases) { - LagrangeBasis alias_basis(alias, order); - const auto generated = ReferenceNodeLayout::get_lagrange_node_coords(canonical, order); - ASSERT_EQ(generated.size(), ReferenceNodeLayout::num_nodes(alias)); - expect_nodes_close(alias_basis.nodes(), generated, Real(1e-14)); - - for (std::size_t i = 0; i < generated.size(); ++i) { - const auto public_node = ReferenceNodeLayout::get_node_coords(alias, i); - EXPECT_NEAR(public_node[0], generated[i][0], Real(1e-14)) << "node " << i; - EXPECT_NEAR(public_node[1], generated[i][1], Real(1e-14)) << "node " << i; - EXPECT_NEAR(public_node[2], generated[i][2], Real(1e-14)) << "node " << i; - } - } +TEST(HigherOrderWedge, CompleteAliasMatchesGeneratedNodeLayout) { + LagrangeBasis alias_basis(ElementType::Wedge18, 1); + const auto generated = + ReferenceNodeLayout::get_lagrange_node_coords(ElementType::Wedge6, 2); + + ASSERT_EQ(generated.size(), ReferenceNodeLayout::num_nodes(ElementType::Wedge18)); + EXPECT_EQ(alias_basis.element_type(), ElementType::Wedge6); + EXPECT_EQ(alias_basis.order(), 2); + expect_nodes_close(alias_basis.nodes(), generated, Real(1e-14)); } -TEST(HigherOrderWedgePyramid, WedgeOrderThreeIsNodalAndPartitionsUnity) { +TEST(HigherOrderWedge, OrderThreeIsNodalAndPartitionsUnity) { LagrangeBasis wedge(ElementType::Wedge6, 3); expect_kronecker_at_nodes(wedge, Real(2e-10)); @@ -143,31 +131,9 @@ TEST(HigherOrderWedgePyramid, WedgeOrderThreeIsNodalAndPartitionsUnity) { Real(1e-9)); } -TEST(HigherOrderWedgePyramid, PyramidOrderThreeIsNodalAndPartitionsUnity) { - LagrangeBasis pyramid(ElementType::Pyramid5, 3); +TEST(HigherOrderWedge, OrderFourEvaluationsRemainFinite) { + LagrangeBasis wedge(ElementType::Wedge6, 4); - expect_kronecker_at_nodes(pyramid, Real(5e-8)); - expect_partition_gradient_hessian_sums( - pyramid, - { - {Real(0), Real(0), Real(0.2)}, - {Real(0.12), Real(-0.08), Real(0.24)}, - {Real(-0.08), Real(0.1), Real(0.55)}, - }, - Real(1e-11), - Real(5e-7)); -} - -TEST(HigherOrderWedgePyramid, PyramidNearApexDerivativeQueriesRemainFinite) { - const std::vector> cases = { - {ElementType::Pyramid5, 1}, - {ElementType::Pyramid14, 2}, - {ElementType::Pyramid5, 4}, - }; - - for (const auto& [type, order] : cases) { - LagrangeBasis basis(type, order); - expect_all_entries_finite(basis, {Real(0.01), Real(-0.005), Real(0.92)}); - expect_all_entries_finite(basis, {Real(-0.004), Real(0.007), Real(0.98)}); - } + expect_all_entries_finite(wedge, {Real(0.2), Real(0.1), Real(-0.6)}); + expect_all_entries_finite(wedge, {Real(0.05), Real(0.8), Real(0.3)}); } diff --git a/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp b/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp index a88d860e9..9d93f8931 100644 --- a/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp +++ b/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp @@ -1,2249 +1,98 @@ /** * @file test_LagrangeBasis.cpp - * @brief Unit tests for Lagrange basis functions + * @brief Unit tests for the reduced scalar Lagrange basis implementation. */ #include -#include "FE/Basis/BasisFactory.h" -#include "FE/Basis/LagrangeBasis.h" -#include "FE/Basis/NodeOrderingConventions.h" -#include "FE/Basis/SerendipityBasis.h" -#include "fs.h" -#include "nn.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace legacy_solver_nn { -using namespace consts; -#include "nn_elem_gip.h" -#include "nn_elem_gnn.h" -#include "nn_elem_gnnxx.h" -} // namespace legacy_solver_nn - -using svmp::FE::basis::LagrangeBasis; -using svmp::FE::ElementType; -using svmp::FE::Real; -using svmp::FE::basis::Gradient; -using svmp::FE::basis::Hessian; -using svmp::FE::basis::ReferenceNodeLayout; - -namespace { - -using Point = svmp::FE::math::Vector; - -struct SolverBasisAdapterCase { - consts::ElementType type; - consts::ElementType quadrature_type; - int insd; - int eNoN; - int nG; -}; - -std::vector solver_basis_adapter_cases() { - using consts::ElementType; - return { - {ElementType::LIN1, ElementType::LIN1, 1, 2, 2}, - {ElementType::LIN2, ElementType::LIN2, 1, 3, 3}, - {ElementType::TRI3, ElementType::TRI3, 2, 3, 3}, - {ElementType::TRI6, ElementType::TRI6, 2, 6, 7}, - {ElementType::QUD4, ElementType::QUD4, 2, 4, 4}, - {ElementType::QUD8, ElementType::QUD9, 2, 8, 9}, - {ElementType::QUD9, ElementType::QUD9, 2, 9, 9}, - {ElementType::TET4, ElementType::TET4, 3, 4, 4}, - {ElementType::TET10, ElementType::TET10, 3, 10, 15}, - {ElementType::HEX8, ElementType::HEX8, 3, 8, 8}, - {ElementType::HEX20, ElementType::HEX20, 3, 20, 27}, - {ElementType::HEX27, ElementType::HEX27, 3, 27, 27}, - {ElementType::WDG, ElementType::WDG, 3, 6, 6}, - }; -} - -std::vector solver_face_basis_adapter_cases() { - using consts::ElementType; - return { - {ElementType::LIN1, ElementType::LIN1, 1, 2, 2}, - {ElementType::LIN2, ElementType::LIN2, 1, 3, 3}, - {ElementType::TRI3, ElementType::TRI3, 2, 3, 3}, - {ElementType::TRI6, ElementType::TRI6, 2, 6, 7}, - {ElementType::QUD4, ElementType::QUD4, 2, 4, 4}, - {ElementType::QUD8, ElementType::QUD8, 2, 8, 9}, - {ElementType::QUD9, ElementType::QUD9, 2, 9, 9}, - }; -} - -std::vector solver_hessian_adapter_cases() { - return solver_basis_adapter_cases(); -} - -std::vector solver_legacy_hessian_parity_cases() { - using consts::ElementType; - return { - {ElementType::TRI6, ElementType::TRI6, 2, 6, 7}, - {ElementType::QUD9, ElementType::QUD9, 2, 9, 9}, - {ElementType::TET10, ElementType::TET10, 3, 10, 15}, - }; -} - -int packed_hessian_components(int insd) { - if (insd == 1) { - return 1; - } - if (insd == 2) { - return 3; - } - return 6; -} - -void fill_legacy_quadrature(const SolverBasisAdapterCase& c, - Vector& w, - Array& xi) { - mshType mesh; - mesh.eType = c.quadrature_type; - mesh.eNoN = c.eNoN; - mesh.nG = c.nG; - mesh.w.resize(c.nG); - mesh.xi.resize(c.insd, c.nG); - legacy_solver_nn::set_element_gauss_int_data.at(c.quadrature_type)(mesh); - w = mesh.w; - xi = mesh.xi; -} - -faceType initialized_face_for_case(const SolverBasisAdapterCase& c) { - faceType face; - face.eType = c.type; - face.eNoN = c.eNoN; - face.nG = c.nG; - face.w.resize(c.nG); - face.xi.resize(c.insd, c.nG); - legacy_solver_nn::set_face_gauss_int_data.at(c.quadrature_type)(face); - face.N.resize(c.eNoN, c.nG); - face.Nx.resize(c.insd, c.eNoN, c.nG); - return face; -} - -void expect_arrays_near(const Array& actual, - const Array& expected, - double tol) { - ASSERT_EQ(actual.nrows(), expected.nrows()); - ASSERT_EQ(actual.ncols(), expected.ncols()); - for (int col = 0; col < actual.ncols(); ++col) { - for (int row = 0; row < actual.nrows(); ++row) { - EXPECT_NEAR(actual(row, col), expected(row, col), tol) - << "row=" << row << ", col=" << col; - } - } -} - -void expect_vectors_near(const Vector& actual, - const Vector& expected, - double tol) { - ASSERT_EQ(actual.size(), expected.size()); - for (int i = 0; i < actual.size(); ++i) { - EXPECT_NEAR(actual(i), expected(i), tol) << "index=" << i; - } -} - -void expect_array3_near(const Array3& actual, - const Array3& expected, - double tol) { - ASSERT_EQ(actual.nrows(), expected.nrows()); - ASSERT_EQ(actual.ncols(), expected.ncols()); - ASSERT_EQ(actual.nslices(), expected.nslices()); - for (int slice = 0; slice < actual.nslices(); ++slice) { - for (int col = 0; col < actual.ncols(); ++col) { - for (int row = 0; row < actual.nrows(); ++row) { - EXPECT_NEAR(actual(row, col, slice), expected(row, col, slice), tol) - << "row=" << row << ", col=" << col << ", slice=" << slice; - } - } - } -} - -void fill_array3(Array3& values, double value) { - for (int slice = 0; slice < values.nslices(); ++slice) { - for (int col = 0; col < values.ncols(); ++col) { - for (int row = 0; row < values.nrows(); ++row) { - values(row, col, slice) = value; - } - } - } -} - -void expect_face_partition_identities(const SolverBasisAdapterCase& c, - const faceType& face, - int g, - double tol) { - double partition = 0.0; - std::array gradient_sum{0.0, 0.0, 0.0}; - - for (int a = 0; a < c.eNoN; ++a) { - EXPECT_TRUE(std::isfinite(face.N(a, g))) - << "element=" << static_cast(c.type) - << ", node=" << a - << ", g=" << g; - partition += face.N(a, g); - - for (int d = 0; d < c.insd; ++d) { - EXPECT_TRUE(std::isfinite(face.Nx(d, a, g))) - << "element=" << static_cast(c.type) - << ", d=" << d - << ", node=" << a - << ", g=" << g; - gradient_sum[static_cast(d)] += face.Nx(d, a, g); - } - } - - EXPECT_NEAR(partition, 1.0, tol) - << "element=" << static_cast(c.type) << ", g=" << g; - for (int d = 0; d < c.insd; ++d) { - EXPECT_NEAR(gradient_sum[static_cast(d)], 0.0, tol) - << "element=" << static_cast(c.type) << ", d=" << d << ", g=" << g; - } -} - -bool array3_has_nonzero_component(const Array3& values, - int row, - double tol) { - for (int slice = 0; slice < values.nslices(); ++slice) { - for (int col = 0; col < values.ncols(); ++col) { - if (std::abs(values(row, col, slice)) > tol) { - return true; - } - } - } - return false; -} - -Array single_point_xi(const SolverBasisAdapterCase& c, - const Array& xi, - int g) { - Array point(c.insd, 1); - for (int d = 0; d < c.insd; ++d) { - point(d, 0) = xi(d, g); - } - return point; -} - -std::vector finite_difference_solver_second_derivative( - const SolverBasisAdapterCase& c, - const Array& point, - int gradient_component, - int coordinate_component, - double eps) { - Array xi_plus = point; - Array xi_minus = point; - xi_plus(coordinate_component, 0) += eps; - xi_minus(coordinate_component, 0) -= eps; - - Array N_plus(c.eNoN, 1); - Array N_minus(c.eNoN, 1); - Array3 Nx_plus(c.insd, c.eNoN, 1); - Array3 Nx_minus(c.insd, c.eNoN, 1); - - nn::get_gnn(c.insd, c.type, c.eNoN, 0, xi_plus, N_plus, Nx_plus); - nn::get_gnn(c.insd, c.type, c.eNoN, 0, xi_minus, N_minus, Nx_minus); - - std::vector values(static_cast(c.eNoN)); - for (int a = 0; a < c.eNoN; ++a) { - values[static_cast(a)] = - (Nx_plus(gradient_component, a, 0) - Nx_minus(gradient_component, a, 0)) / - (2.0 * eps); - } - return values; -} - -void expect_packed_hessian_component_matches_finite_difference( - const SolverBasisAdapterCase& c, - const Array& point, - const Array3& Nxx, - int g, - int packed_row, - int first_derivative_component, - int second_derivative_component, - double tol) { - const double eps = 2e-6; - const auto numerical = finite_difference_solver_second_derivative( - c, point, first_derivative_component, second_derivative_component, eps); - for (int a = 0; a < c.eNoN; ++a) { - EXPECT_NEAR(Nxx(packed_row, a, g), numerical[static_cast(a)], tol) - << "element=" << static_cast(c.type) - << ", packed_row=" << packed_row - << ", node=" << a - << ", g=" << g; - } - - if (first_derivative_component != second_derivative_component) { - const auto symmetric_numerical = finite_difference_solver_second_derivative( - c, point, second_derivative_component, first_derivative_component, eps); - for (int a = 0; a < c.eNoN; ++a) { - EXPECT_NEAR(Nxx(packed_row, a, g), - symmetric_numerical[static_cast(a)], - tol) - << "element=" << static_cast(c.type) - << ", symmetry packed_row=" << packed_row - << ", node=" << a - << ", g=" << g; - } - } -} - -void expect_solver_hessian_matches_gradient_finite_difference( - const SolverBasisAdapterCase& c, - const Array& xi, - int g, - const Array3& Nxx, - double tol) { - const Array point = single_point_xi(c, xi, g); - - expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 0, 0, 0, tol); - if (c.insd >= 2) { - expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 1, 1, 1, tol); - } - if (c.insd == 2) { - expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 2, 0, 1, tol); - } else if (c.insd >= 3) { - expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 2, 2, 2, tol); - expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 3, 0, 1, tol); - expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 4, 1, 2, tol); - expect_packed_hessian_component_matches_finite_difference(c, point, Nxx, g, 5, 0, 2, tol); - } -} - -void expect_partition_hessian_identity(const SolverBasisAdapterCase& c, - const Array3& Nxx, - int g, - double tol) { - for (int row = 0; row < Nxx.nrows(); ++row) { - double sum = 0.0; - for (int a = 0; a < c.eNoN; ++a) { - sum += Nxx(row, a, g); - } - EXPECT_NEAR(sum, 0.0, tol) - << "element=" << static_cast(c.type) - << ", packed_row=" << row - << ", g=" << g; - } -} - -void expect_all_hessians_zero(const SolverBasisAdapterCase& c, - const Array3& Nxx, - int g, - double tol) { - for (int row = 0; row < Nxx.nrows(); ++row) { - for (int a = 0; a < c.eNoN; ++a) { - EXPECT_NEAR(Nxx(row, a, g), 0.0, tol) - << "element=" << static_cast(c.type) - << ", packed_row=" << row - << ", node=" << a - << ", g=" << g; - } - } -} - -mshType initialized_mesh_for_case(const SolverBasisAdapterCase& c, bool force_lShpF) { - mshType mesh; - mesh.nFs = 1; - mesh.eType = c.type; - mesh.eNoN = c.eNoN; - mesh.nG = c.nG; - mesh.lShpF = force_lShpF; - mesh.w.resize(c.nG); - mesh.xi.resize(c.insd, c.nG); - mesh.N.resize(c.eNoN, c.nG); - mesh.Nx.resize(c.insd, c.eNoN, c.nG); - mesh.xib.resize(2, c.insd); - mesh.Nb.resize(2, c.eNoN); - - nn::get_gip(c.insd, c.quadrature_type, c.nG, mesh.w, mesh.xi); - for (int g = 0; g < c.nG; ++g) { - nn::get_gnn(c.insd, c.type, c.eNoN, g, mesh.xi, mesh.N, mesh.Nx); - } - nn::get_nn_bnds(c.insd, c.type, c.eNoN, mesh.xib, mesh.Nb); - return mesh; -} - -enum class PyramidFace { - Base, - South, - East, - North, - West -}; - -enum class PyramidEdge { - BaseSouth, - BaseEast, - BaseNorth, - BaseWest, - VerticalSW, - VerticalSE, - VerticalNE, - VerticalNW -}; - -struct LagrangeAccuracyCase { - ElementType type; - int order; - std::vector points; -}; - -std::size_t expected_lagrange_size(ElementType type, int order) { - switch (type) { - case ElementType::Point1: - return 1u; - case ElementType::Line2: - case ElementType::Line3: - return static_cast(order + 1); - case ElementType::Triangle3: - case ElementType::Triangle6: - return static_cast(order + 1) * static_cast(order + 2) / 2; - case ElementType::Quad4: - case ElementType::Quad9: - return static_cast(order + 1) * static_cast(order + 1); - case ElementType::Tetra4: - case ElementType::Tetra10: - return static_cast(order + 1) * - static_cast(order + 2) * - static_cast(order + 3) / 6; - case ElementType::Hex8: - case ElementType::Hex27: - return static_cast(order + 1) * - static_cast(order + 1) * - static_cast(order + 1); - case ElementType::Wedge6: - case ElementType::Wedge18: - return static_cast(order + 1) * - static_cast(order + 1) * - static_cast(order + 2) / 2; - case ElementType::Pyramid5: - case ElementType::Pyramid14: - return static_cast(order + 1) * - static_cast(order + 2) * - static_cast(2 * order + 3) / 6; - default: - return 0u; - } -} - -int expected_dimension(ElementType type) { - switch (type) { - case ElementType::Point1: - return 0; - case ElementType::Line2: - case ElementType::Line3: - return 1; - case ElementType::Triangle3: - case ElementType::Triangle6: - case ElementType::Quad4: - case ElementType::Quad9: - return 2; - default: - return 3; - } -} - -bool points_close(const Point& a, - const Point& b, - Real tol = Real(1e-12)) { - return std::abs(a[0] - b[0]) <= tol && - std::abs(a[1] - b[1]) <= tol && - std::abs(a[2] - b[2]) <= tol; -} - -std::vector reference_node_coords(ElementType type) { - switch (type) { - case ElementType::Line2: - return { - Point{Real(-1), Real(0), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - }; - case ElementType::Line3: - return { - Point{Real(-1), Real(0), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(0), Real(0)}, - }; - case ElementType::Triangle3: - return { - Point{Real(0), Real(0), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - }; - case ElementType::Triangle6: - return { - Point{Real(0), Real(0), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - Point{Real(0.5), Real(0), Real(0)}, - Point{Real(0.5), Real(0.5), Real(0)}, - Point{Real(0), Real(0.5), Real(0)}, - }; - case ElementType::Quad4: - return { - Point{Real(-1), Real(-1), Real(0)}, - Point{Real(1), Real(-1), Real(0)}, - Point{Real(1), Real(1), Real(0)}, - Point{Real(-1), Real(1), Real(0)}, - }; - case ElementType::Quad8: - return { - Point{Real(-1), Real(-1), Real(0)}, - Point{Real(1), Real(-1), Real(0)}, - Point{Real(1), Real(1), Real(0)}, - Point{Real(-1), Real(1), Real(0)}, - Point{Real(0), Real(-1), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - Point{Real(-1), Real(0), Real(0)}, - }; - case ElementType::Quad9: - return { - Point{Real(-1), Real(-1), Real(0)}, - Point{Real(1), Real(-1), Real(0)}, - Point{Real(1), Real(1), Real(0)}, - Point{Real(-1), Real(1), Real(0)}, - Point{Real(0), Real(-1), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - Point{Real(-1), Real(0), Real(0)}, - Point{Real(0), Real(0), Real(0)}, - }; - case ElementType::Tetra4: - return { - Point{Real(0), Real(0), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - Point{Real(0), Real(0), Real(1)}, - }; - case ElementType::Tetra10: - return { - Point{Real(0), Real(0), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - Point{Real(0), Real(0), Real(1)}, - Point{Real(0.5), Real(0), Real(0)}, - Point{Real(0.5), Real(0.5), Real(0)}, - Point{Real(0), Real(0.5), Real(0)}, - Point{Real(0), Real(0), Real(0.5)}, - Point{Real(0.5), Real(0), Real(0.5)}, - Point{Real(0), Real(0.5), Real(0.5)}, - }; - case ElementType::Hex8: - return { - Point{Real(-1), Real(-1), Real(-1)}, - Point{Real(1), Real(-1), Real(-1)}, - Point{Real(1), Real(1), Real(-1)}, - Point{Real(-1), Real(1), Real(-1)}, - Point{Real(-1), Real(-1), Real(1)}, - Point{Real(1), Real(-1), Real(1)}, - Point{Real(1), Real(1), Real(1)}, - Point{Real(-1), Real(1), Real(1)}, - }; - case ElementType::Hex20: - return { - Point{Real(-1), Real(-1), Real(-1)}, - Point{Real(1), Real(-1), Real(-1)}, - Point{Real(1), Real(1), Real(-1)}, - Point{Real(-1), Real(1), Real(-1)}, - Point{Real(-1), Real(-1), Real(1)}, - Point{Real(1), Real(-1), Real(1)}, - Point{Real(1), Real(1), Real(1)}, - Point{Real(-1), Real(1), Real(1)}, - Point{Real(0), Real(-1), Real(-1)}, - Point{Real(1), Real(0), Real(-1)}, - Point{Real(0), Real(1), Real(-1)}, - Point{Real(-1), Real(0), Real(-1)}, - Point{Real(0), Real(-1), Real(1)}, - Point{Real(1), Real(0), Real(1)}, - Point{Real(0), Real(1), Real(1)}, - Point{Real(-1), Real(0), Real(1)}, - Point{Real(-1), Real(-1), Real(0)}, - Point{Real(1), Real(-1), Real(0)}, - Point{Real(1), Real(1), Real(0)}, - Point{Real(-1), Real(1), Real(0)}, - }; - case ElementType::Hex27: - return { - Point{Real(-1), Real(-1), Real(-1)}, - Point{Real(1), Real(-1), Real(-1)}, - Point{Real(1), Real(1), Real(-1)}, - Point{Real(-1), Real(1), Real(-1)}, - Point{Real(-1), Real(-1), Real(1)}, - Point{Real(1), Real(-1), Real(1)}, - Point{Real(1), Real(1), Real(1)}, - Point{Real(-1), Real(1), Real(1)}, - Point{Real(0), Real(-1), Real(-1)}, - Point{Real(1), Real(0), Real(-1)}, - Point{Real(0), Real(1), Real(-1)}, - Point{Real(-1), Real(0), Real(-1)}, - Point{Real(0), Real(-1), Real(1)}, - Point{Real(1), Real(0), Real(1)}, - Point{Real(0), Real(1), Real(1)}, - Point{Real(-1), Real(0), Real(1)}, - Point{Real(-1), Real(-1), Real(0)}, - Point{Real(1), Real(-1), Real(0)}, - Point{Real(1), Real(1), Real(0)}, - Point{Real(-1), Real(1), Real(0)}, - Point{Real(0), Real(0), Real(-1)}, - Point{Real(0), Real(0), Real(1)}, - Point{Real(0), Real(-1), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - Point{Real(-1), Real(0), Real(0)}, - Point{Real(0), Real(0), Real(0)}, - }; - case ElementType::Wedge6: - return { - Point{Real(0), Real(0), Real(-1)}, - Point{Real(1), Real(0), Real(-1)}, - Point{Real(0), Real(1), Real(-1)}, - Point{Real(0), Real(0), Real(1)}, - Point{Real(1), Real(0), Real(1)}, - Point{Real(0), Real(1), Real(1)}, - }; - case ElementType::Wedge15: - return { - Point{Real(0), Real(0), Real(-1)}, - Point{Real(1), Real(0), Real(-1)}, - Point{Real(0), Real(1), Real(-1)}, - Point{Real(0), Real(0), Real(1)}, - Point{Real(1), Real(0), Real(1)}, - Point{Real(0), Real(1), Real(1)}, - Point{Real(0.5), Real(0), Real(-1)}, - Point{Real(0.5), Real(0.5), Real(-1)}, - Point{Real(0), Real(0.5), Real(-1)}, - Point{Real(0.5), Real(0), Real(1)}, - Point{Real(0.5), Real(0.5), Real(1)}, - Point{Real(0), Real(0.5), Real(1)}, - Point{Real(0), Real(0), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - }; - case ElementType::Wedge18: - return { - Point{Real(0), Real(0), Real(-1)}, - Point{Real(1), Real(0), Real(-1)}, - Point{Real(0), Real(1), Real(-1)}, - Point{Real(0), Real(0), Real(1)}, - Point{Real(1), Real(0), Real(1)}, - Point{Real(0), Real(1), Real(1)}, - Point{Real(0.5), Real(0), Real(-1)}, - Point{Real(0.5), Real(0.5), Real(-1)}, - Point{Real(0), Real(0.5), Real(-1)}, - Point{Real(0.5), Real(0), Real(1)}, - Point{Real(0.5), Real(0.5), Real(1)}, - Point{Real(0), Real(0.5), Real(1)}, - Point{Real(0), Real(0), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - Point{Real(0.5), Real(0), Real(0)}, - Point{Real(0.5), Real(0.5), Real(0)}, - Point{Real(0), Real(0.5), Real(0)}, - }; - case ElementType::Pyramid5: - return { - Point{Real(-1), Real(-1), Real(0)}, - Point{Real(1), Real(-1), Real(0)}, - Point{Real(1), Real(1), Real(0)}, - Point{Real(-1), Real(1), Real(0)}, - Point{Real(0), Real(0), Real(1)}, - }; - case ElementType::Pyramid13: - return { - Point{Real(-1), Real(-1), Real(0)}, - Point{Real(1), Real(-1), Real(0)}, - Point{Real(1), Real(1), Real(0)}, - Point{Real(-1), Real(1), Real(0)}, - Point{Real(0), Real(0), Real(1)}, - Point{Real(0), Real(-1), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - Point{Real(-1), Real(0), Real(0)}, - Point{Real(-0.5), Real(-0.5), Real(0.5)}, - Point{Real(0.5), Real(-0.5), Real(0.5)}, - Point{Real(0.5), Real(0.5), Real(0.5)}, - Point{Real(-0.5), Real(0.5), Real(0.5)}, - }; - case ElementType::Pyramid14: - return { - Point{Real(-1), Real(-1), Real(0)}, - Point{Real(1), Real(-1), Real(0)}, - Point{Real(1), Real(1), Real(0)}, - Point{Real(-1), Real(1), Real(0)}, - Point{Real(0), Real(0), Real(1)}, - Point{Real(0), Real(-1), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)}, - Point{Real(-1), Real(0), Real(0)}, - Point{Real(-0.5), Real(-0.5), Real(0.5)}, - Point{Real(0.5), Real(-0.5), Real(0.5)}, - Point{Real(0.5), Real(0.5), Real(0.5)}, - Point{Real(-0.5), Real(0.5), Real(0.5)}, - Point{Real(0), Real(0), Real(0)}, - }; - default: - return {}; - } -} - -void expect_nodes_match_node_ordering(ElementType canonical_type, - int order, - ElementType node_ordering_type) { - LagrangeBasis basis(canonical_type, order); - const auto& nodes = basis.nodes(); - - ASSERT_EQ(nodes.size(), ReferenceNodeLayout::num_nodes(node_ordering_type)); - ASSERT_EQ(nodes.size(), basis.size()); - - for (std::size_t i = 0; i < nodes.size(); ++i) { - const auto expected = ReferenceNodeLayout::get_node_coords(node_ordering_type, i); - EXPECT_NEAR(nodes[i][0], expected[0], 1e-14); - EXPECT_NEAR(nodes[i][1], expected[1], 1e-14); - EXPECT_NEAR(nodes[i][2], expected[2], 1e-14); - - std::vector vals; - basis.evaluate_values(expected, vals); - ASSERT_EQ(vals.size(), nodes.size()); - for (std::size_t j = 0; j < vals.size(); ++j) { - const double expected_delta = (i == j) ? 1.0 : 0.0; - EXPECT_NEAR(vals[j], expected_delta, 1e-12); - } - } -} - -void expect_alias_matches_canonical(ElementType alias_type, - ElementType canonical_type, - int canonical_order, - const std::vector& points, - Real tol = Real(1e-12)) { - LagrangeBasis alias(alias_type, canonical_order); - LagrangeBasis canonical(canonical_type, canonical_order); - - ASSERT_EQ(alias.element_type(), canonical.element_type()); - ASSERT_EQ(alias.order(), canonical.order()); - ASSERT_EQ(alias.size(), canonical.size()); - ASSERT_EQ(alias.nodes().size(), canonical.nodes().size()); - - for (std::size_t i = 0; i < alias.nodes().size(); ++i) { - EXPECT_NEAR(alias.nodes()[i][0], canonical.nodes()[i][0], tol); - EXPECT_NEAR(alias.nodes()[i][1], canonical.nodes()[i][1], tol); - EXPECT_NEAR(alias.nodes()[i][2], canonical.nodes()[i][2], tol); - } - - for (const auto& xi : points) { - std::vector alias_values; - std::vector canonical_values; - std::vector alias_gradients; - std::vector canonical_gradients; - std::vector alias_hessians; - std::vector canonical_hessians; - - alias.evaluate_values(xi, alias_values); - canonical.evaluate_values(xi, canonical_values); - alias.evaluate_gradients(xi, alias_gradients); - canonical.evaluate_gradients(xi, canonical_gradients); - alias.evaluate_hessians(xi, alias_hessians); - canonical.evaluate_hessians(xi, canonical_hessians); - - ASSERT_EQ(alias_values.size(), canonical_values.size()); - ASSERT_EQ(alias_gradients.size(), canonical_gradients.size()); - ASSERT_EQ(alias_hessians.size(), canonical_hessians.size()); - - for (std::size_t i = 0; i < alias_values.size(); ++i) { - EXPECT_NEAR(alias_values[i], canonical_values[i], tol); - for (int d = 0; d < canonical.dimension(); ++d) { - const std::size_t sd = static_cast(d); - EXPECT_NEAR(alias_gradients[i][sd], canonical_gradients[i][sd], tol); - for (int e = 0; e < canonical.dimension(); ++e) { - const std::size_t se = static_cast(e); - EXPECT_NEAR(alias_hessians[i](sd, se), canonical_hessians[i](sd, se), Real(5) * tol); - } - } - } - } -} - -std::vector sample_points_for(ElementType type) { - switch (type) { - case ElementType::Line2: - case ElementType::Line3: - return { - Point{Real(-0.7), Real(0), Real(0)}, - Point{Real(0.1), Real(0), Real(0)}, - Point{Real(0.65), Real(0), Real(0)} - }; - case ElementType::Triangle3: - case ElementType::Triangle6: - return { - Point{Real(0.15), Real(0.2), Real(0)}, - Point{Real(0.25), Real(0.1), Real(0)}, - Point{Real(0.2), Real(0.3), Real(0)} - }; - case ElementType::Quad4: - case ElementType::Quad9: - return { - Point{Real(0.2), Real(-0.35), Real(0)}, - Point{Real(-0.4), Real(0.25), Real(0)}, - Point{Real(0.55), Real(0.1), Real(0)} - }; - case ElementType::Tetra4: - case ElementType::Tetra10: - return { - Point{Real(0.1), Real(0.2), Real(0.15)}, - Point{Real(0.2), Real(0.1), Real(0.25)}, - Point{Real(0.15), Real(0.15), Real(0.2)} - }; - case ElementType::Hex8: - case ElementType::Hex27: - return { - Point{Real(0.2), Real(-0.3), Real(0.25)}, - Point{Real(-0.5), Real(0.4), Real(-0.2)}, - Point{Real(0.1), Real(0.15), Real(0.6)} - }; - case ElementType::Wedge6: - case ElementType::Wedge18: - return { - Point{Real(0.2), Real(0.25), Real(0.0)}, - Point{Real(0.1), Real(0.2), Real(-0.45)}, - Point{Real(0.3), Real(0.15), Real(0.5)} - }; - case ElementType::Pyramid5: - case ElementType::Pyramid14: - return { - Point{Real(0.0), Real(0.0), Real(0.25)}, - Point{Real(0.15), Real(-0.1), Real(0.3)}, - Point{Real(-0.1), Real(0.2), Real(0.4)} - }; - default: - return {Point{Real(0), Real(0), Real(0)}}; - } -} - -std::vector boundary_stress_points_for(ElementType type); - -std::vector dense_sample_points_for(ElementType type) { - const auto interior = sample_points_for(type); - const auto boundary = boundary_stress_points_for(type); - - std::vector points; - points.reserve(interior.size() + boundary.size()); - points.insert(points.end(), interior.begin(), interior.end()); - points.insert(points.end(), boundary.begin(), boundary.end()); - - if (type == ElementType::Pyramid5 || type == ElementType::Pyramid14) { - points.push_back(Point{Real(0.0), Real(0.0), Real(0.85)}); - points.push_back(Point{Real(0.02), Real(-0.015), Real(0.95)}); - } - return points; -} - -std::vector boundary_stress_points_for(ElementType type) { - switch (type) { - case ElementType::Line2: - case ElementType::Line3: - return { - Point{Real(-0.999), Real(0), Real(0)}, - Point{Real(-0.75), Real(0), Real(0)}, - Point{Real(0.0), Real(0), Real(0)}, - Point{Real(0.8), Real(0), Real(0)}, - Point{Real(0.999), Real(0), Real(0)} - }; - case ElementType::Triangle3: - case ElementType::Triangle6: - return { - Point{Real(1e-6), Real(1e-6), Real(0)}, - Point{Real(0.98), Real(0.01), Real(0)}, - Point{Real(0.01), Real(0.98), Real(0)}, - Point{Real(0.25), Real(1e-4), Real(0)}, - Point{Real(0.49), Real(0.49), Real(0)} - }; - case ElementType::Quad4: - case ElementType::Quad9: - return { - Point{Real(-0.99), Real(-0.99), Real(0)}, - Point{Real(0.99), Real(-0.99), Real(0)}, - Point{Real(0.99), Real(0.99), Real(0)}, - Point{Real(-0.99), Real(0.99), Real(0)}, - Point{Real(0.0), Real(0.95), Real(0)} - }; - case ElementType::Tetra4: - case ElementType::Tetra10: - return { - Point{Real(1e-6), Real(1e-6), Real(1e-6)}, - Point{Real(0.97), Real(0.01), Real(0.01)}, - Point{Real(0.01), Real(0.97), Real(0.01)}, - Point{Real(0.01), Real(0.01), Real(0.97)}, - Point{Real(0.32), Real(0.33), Real(0.01)} - }; - case ElementType::Hex8: - case ElementType::Hex27: - return { - Point{Real(-0.99), Real(-0.99), Real(-0.99)}, - Point{Real(0.99), Real(-0.99), Real(0.99)}, - Point{Real(0.99), Real(0.99), Real(-0.99)}, - Point{Real(-0.99), Real(0.99), Real(0.99)}, - Point{Real(0.0), Real(0.0), Real(0.95)} - }; - case ElementType::Wedge6: - case ElementType::Wedge18: - return { - Point{Real(1e-6), Real(1e-6), Real(-0.99)}, - Point{Real(0.98), Real(0.01), Real(-0.99)}, - Point{Real(0.01), Real(0.98), Real(0.99)}, - Point{Real(0.49), Real(0.49), Real(0.0)}, - Point{Real(0.25), Real(1e-4), Real(0.95)} - }; - case ElementType::Pyramid5: - case ElementType::Pyramid14: - return { - Point{Real(0.0), Real(0.0), Real(0.95)}, - Point{Real(0.01), Real(-0.01), Real(0.98)}, - Point{Real(0.6), Real(-0.6), Real(0.2)}, - Point{Real(0.79), Real(0.0), Real(0.2)}, - Point{Real(0.0), Real(0.79), Real(0.2)} - }; - default: - return {Point{Real(0), Real(0), Real(0)}}; - } -} - -Real monomial_value(const Point& xi, int px, int py, int pz) { - return std::pow(xi[0], px) * std::pow(xi[1], py) * std::pow(xi[2], pz); -} - -void expect_gradients_match_finite_difference(const LagrangeAccuracyCase& c, - Real eps, - Real tol) { - LagrangeBasis basis(c.type, c.order); - - for (const auto& xi : c.points) { - std::vector gradients; - basis.evaluate_gradients(xi, gradients); - ASSERT_EQ(gradients.size(), basis.size()); - - for (int d = 0; d < basis.dimension(); ++d) { - Point xp = xi; - Point xm = xi; - xp[d] += eps; - xm[d] -= eps; - - std::vector values_p; - std::vector values_m; - basis.evaluate_values(xp, values_p); - basis.evaluate_values(xm, values_m); - - ASSERT_EQ(values_p.size(), basis.size()); - ASSERT_EQ(values_m.size(), basis.size()); - for (std::size_t i = 0; i < basis.size(); ++i) { - const Real fd = (values_p[i] - values_m[i]) / (Real(2) * eps); - EXPECT_NEAR(gradients[i][d], fd, tol) - << "type=" << static_cast(c.type) - << ", order=" << c.order - << ", dim=" << d - << ", basis_i=" << i - << ", xi=(" << xi[0] << "," << xi[1] << "," << xi[2] << ")"; - } - } - } -} - -void expect_polynomial_reproduction(const LagrangeAccuracyCase& c, - const std::vector>& exponents, - Real tol) { - LagrangeBasis basis(c.type, c.order); - const auto& nodes = basis.nodes(); - ASSERT_EQ(nodes.size(), basis.size()); - - for (const auto& exp : exponents) { - std::vector coeffs(basis.size(), Real(0)); - for (std::size_t i = 0; i < basis.size(); ++i) { - coeffs[i] = monomial_value(nodes[i], exp[0], exp[1], exp[2]); - } - - for (const auto& xi : c.points) { - std::vector values; - basis.evaluate_values(xi, values); - ASSERT_EQ(values.size(), basis.size()); - - Real interpolated = Real(0); - for (std::size_t i = 0; i < basis.size(); ++i) { - interpolated += coeffs[i] * values[i]; - } - - const Real exact = monomial_value(xi, exp[0], exp[1], exp[2]); - EXPECT_NEAR(interpolated, exact, tol) - << "type=" << static_cast(c.type) - << ", order=" << c.order - << ", monomial=(" << exp[0] << "," << exp[1] << "," << exp[2] << ")" - << ", xi=(" << xi[0] << "," << xi[1] << "," << xi[2] << ")"; - } - } -} - -template -void expect_all_finite(const Container& values) { - for (const auto& value : values) { - for (std::size_t d = 0; d < 3; ++d) { - EXPECT_TRUE(std::isfinite(value[d])); - } - } -} - -void expect_hessians_finite(const std::vector& hessians, - int dimension) { - for (const auto& H : hessians) { - for (int i = 0; i < dimension; ++i) { - for (int j = 0; j < dimension; ++j) { - EXPECT_TRUE(std::isfinite(H(static_cast(i), - static_cast(j)))); - } - } - } -} - -void expect_partition_gradient_hessian_sums(const LagrangeBasis& basis, - const std::vector& points, - Real value_tol, - Real derivative_tol) { - for (const auto& xi : points) { - std::vector values; - std::vector gradients; - std::vector hessians; - basis.evaluate_values(xi, values); - basis.evaluate_gradients(xi, gradients); - basis.evaluate_hessians(xi, hessians); - - ASSERT_EQ(values.size(), basis.size()); - ASSERT_EQ(gradients.size(), basis.size()); - ASSERT_EQ(hessians.size(), basis.size()); - - Real value_sum = Real(0); - Gradient gradient_sum{}; - Hessian hessian_sum{}; - for (std::size_t i = 0; i < basis.size(); ++i) { - value_sum += values[i]; - for (int d = 0; d < basis.dimension(); ++d) { - const std::size_t sd = static_cast(d); - gradient_sum[sd] += gradients[i][sd]; - for (int e = 0; e < basis.dimension(); ++e) { - const std::size_t se = static_cast(e); - hessian_sum(sd, se) += hessians[i](sd, se); - } - } - } - - EXPECT_NEAR(value_sum, Real(1), value_tol) - << "Element type " << static_cast(basis.element_type()) - << ", order " << basis.order() - << ", xi=(" << xi[0] << "," << xi[1] << "," << xi[2] << ")"; - - for (int d = 0; d < basis.dimension(); ++d) { - const std::size_t sd = static_cast(d); - EXPECT_NEAR(gradient_sum[sd], Real(0), derivative_tol) - << "Gradient sum mismatch for element type " << static_cast(basis.element_type()) - << ", order " << basis.order() - << ", dim " << d; - for (int e = 0; e < basis.dimension(); ++e) { - const std::size_t se = static_cast(e); - EXPECT_NEAR(hessian_sum(sd, se), Real(0), derivative_tol) - << "Hessian sum mismatch for element type " << static_cast(basis.element_type()) - << ", order " << basis.order() - << ", component (" << d << "," << e << ")"; - } - } - } -} - -bool is_on_pyramid_face(const Point& point, - PyramidFace face, - Real tol = Real(1e-12)) { - const Real scale = Real(1) - point[2]; - switch (face) { - case PyramidFace::Base: - return std::abs(point[2]) <= tol; - case PyramidFace::South: - return std::abs(point[1] + scale) <= tol; - case PyramidFace::East: - return std::abs(point[0] - scale) <= tol; - case PyramidFace::North: - return std::abs(point[1] - scale) <= tol; - case PyramidFace::West: - return std::abs(point[0] + scale) <= tol; - } - return false; -} - -Point map_pyramid_face_to_reference(PyramidFace face, - const Point& point) { - const Real scale = Real(1) - point[2]; - switch (face) { - case PyramidFace::Base: - return Point{point[0], point[1], Real(0)}; - case PyramidFace::South: - return Point{(scale - point[0]) / Real(2), point[2], Real(0)}; - case PyramidFace::East: - return Point{(scale + point[1]) / Real(2), point[2], Real(0)}; - case PyramidFace::North: - return Point{(scale + point[0]) / Real(2), point[2], Real(0)}; - case PyramidFace::West: - return Point{(scale - point[1]) / Real(2), point[2], Real(0)}; - } - return Point{}; -} - -std::vector sample_points_for_pyramid_face(PyramidFace face) { - switch (face) { - case PyramidFace::Base: - return { - Point{Real(0.15), Real(-0.2), Real(0)}, - Point{Real(-0.55), Real(0.35), Real(0)} - }; - case PyramidFace::South: - return { - Point{Real(-0.2), Real(-0.8), Real(0.2)}, - Point{Real(0.05), Real(-0.35), Real(0.65)} - }; - case PyramidFace::East: - return { - Point{Real(0.8), Real(-0.25), Real(0.2)}, - Point{Real(0.3), Real(0.08), Real(0.7)} - }; - case PyramidFace::North: - return { - Point{Real(0.25), Real(0.8), Real(0.2)}, - Point{Real(-0.08), Real(0.35), Real(0.65)} - }; - case PyramidFace::West: - return { - Point{Real(-0.8), Real(0.2), Real(0.2)}, - Point{Real(-0.3), Real(-0.05), Real(0.7)} - }; - } - return {}; -} - -bool is_on_pyramid_edge(const Point& point, - PyramidEdge edge, - Real tol = Real(1e-12)) { - const Real scale = Real(1) - point[2]; - switch (edge) { - case PyramidEdge::BaseSouth: - return std::abs(point[2]) <= tol && std::abs(point[1] + Real(1)) <= tol; - case PyramidEdge::BaseEast: - return std::abs(point[2]) <= tol && std::abs(point[0] - Real(1)) <= tol; - case PyramidEdge::BaseNorth: - return std::abs(point[2]) <= tol && std::abs(point[1] - Real(1)) <= tol; - case PyramidEdge::BaseWest: - return std::abs(point[2]) <= tol && std::abs(point[0] + Real(1)) <= tol; - case PyramidEdge::VerticalSW: - return std::abs(point[0] + scale) <= tol && std::abs(point[1] + scale) <= tol; - case PyramidEdge::VerticalSE: - return std::abs(point[0] - scale) <= tol && std::abs(point[1] + scale) <= tol; - case PyramidEdge::VerticalNE: - return std::abs(point[0] - scale) <= tol && std::abs(point[1] - scale) <= tol; - case PyramidEdge::VerticalNW: - return std::abs(point[0] + scale) <= tol && std::abs(point[1] - scale) <= tol; - } - return false; -} - -Point map_pyramid_edge_to_reference(PyramidEdge edge, - const Point& point) { - switch (edge) { - case PyramidEdge::BaseSouth: - case PyramidEdge::BaseNorth: - return Point{point[0], Real(0), Real(0)}; - case PyramidEdge::BaseEast: - case PyramidEdge::BaseWest: - return Point{point[1], Real(0), Real(0)}; - case PyramidEdge::VerticalSW: - case PyramidEdge::VerticalSE: - case PyramidEdge::VerticalNE: - case PyramidEdge::VerticalNW: - return Point{Real(2) * point[2] - Real(1), Real(0), Real(0)}; - } - return Point{}; -} - -std::vector sample_points_for_pyramid_edge(PyramidEdge edge) { - switch (edge) { - case PyramidEdge::BaseSouth: - return {Point{Real(-0.65), Real(-1), Real(0)}, Point{Real(0.35), Real(-1), Real(0)}}; - case PyramidEdge::BaseEast: - return {Point{Real(1), Real(-0.45), Real(0)}, Point{Real(1), Real(0.55), Real(0)}}; - case PyramidEdge::BaseNorth: - return {Point{Real(-0.55), Real(1), Real(0)}, Point{Real(0.45), Real(1), Real(0)}}; - case PyramidEdge::BaseWest: - return {Point{Real(-1), Real(-0.55), Real(0)}, Point{Real(-1), Real(0.45), Real(0)}}; - case PyramidEdge::VerticalSW: - return {Point{Real(-0.75), Real(-0.75), Real(0.25)}, Point{Real(-0.3), Real(-0.3), Real(0.7)}}; - case PyramidEdge::VerticalSE: - return {Point{Real(0.75), Real(-0.75), Real(0.25)}, Point{Real(0.3), Real(-0.3), Real(0.7)}}; - case PyramidEdge::VerticalNE: - return {Point{Real(0.75), Real(0.75), Real(0.25)}, Point{Real(0.3), Real(0.3), Real(0.7)}}; - case PyramidEdge::VerticalNW: - return {Point{Real(-0.75), Real(0.75), Real(0.25)}, Point{Real(-0.3), Real(0.3), Real(0.7)}}; - } - return {}; -} - -std::vector map_pyramid_nodes_to_lower_basis_nodes( - const std::vector& pyramid_nodes, - const std::vector& lower_basis_nodes, - const std::function& selector, - const std::function& mapper) { - std::vector mapping(pyramid_nodes.size(), -1); - std::size_t face_count = 0; - for (std::size_t i = 0; i < pyramid_nodes.size(); ++i) { - if (!selector(pyramid_nodes[i])) { - continue; - } - - ++face_count; - const Point mapped = mapper(pyramid_nodes[i]); - bool found = false; - for (std::size_t j = 0; j < lower_basis_nodes.size(); ++j) { - if (points_close(mapped, lower_basis_nodes[j])) { - mapping[i] = static_cast(j); - found = true; - break; - } - } - EXPECT_TRUE(found) - << "Failed to match pyramid trace node at (" << pyramid_nodes[i][0] << "," - << pyramid_nodes[i][1] << "," << pyramid_nodes[i][2] << ")"; - } - - EXPECT_EQ(face_count, lower_basis_nodes.size()); - return mapping; -} - -void expect_pyramid_face_trace_matches_lower_basis(int order, - PyramidFace face, - Real tol = Real(2e-10)) { - LagrangeBasis pyramid(ElementType::Pyramid5, order); - const bool base_face = face == PyramidFace::Base; - LagrangeBasis lower(base_face ? ElementType::Quad4 : ElementType::Triangle3, order); - - const auto mapping = map_pyramid_nodes_to_lower_basis_nodes( - pyramid.nodes(), - lower.nodes(), - [&](const Point& point) { return is_on_pyramid_face(point, face); }, - [&](const Point& point) { return map_pyramid_face_to_reference(face, point); }); - - for (const auto& face_point : sample_points_for_pyramid_face(face)) { - std::vector pyramid_values; - std::vector lower_values; - pyramid.evaluate_values(face_point, pyramid_values); - lower.evaluate_values(map_pyramid_face_to_reference(face, face_point), lower_values); - - ASSERT_EQ(pyramid_values.size(), pyramid.size()); - ASSERT_EQ(lower_values.size(), lower.size()); - - for (std::size_t i = 0; i < pyramid.size(); ++i) { - if (mapping[i] >= 0) { - EXPECT_NEAR(pyramid_values[i], lower_values[static_cast(mapping[i])], tol) - << "Face trace mismatch for order " << order - << ", face " << static_cast(face) - << ", basis " << i; - } else { - EXPECT_NEAR(pyramid_values[i], Real(0), tol) - << "Off-face pyramid basis should vanish on face for order " << order - << ", face " << static_cast(face) - << ", basis " << i; - } - } - } -} - -void expect_pyramid_edge_trace_matches_line_basis(int order, - PyramidEdge edge, - Real tol = Real(2e-10)) { - LagrangeBasis pyramid(ElementType::Pyramid5, order); - LagrangeBasis line(ElementType::Line2, order); - - const auto mapping = map_pyramid_nodes_to_lower_basis_nodes( - pyramid.nodes(), - line.nodes(), - [&](const Point& point) { return is_on_pyramid_edge(point, edge); }, - [&](const Point& point) { return map_pyramid_edge_to_reference(edge, point); }); - - for (const auto& edge_point : sample_points_for_pyramid_edge(edge)) { - std::vector pyramid_values; - std::vector line_values; - pyramid.evaluate_values(edge_point, pyramid_values); - line.evaluate_values(map_pyramid_edge_to_reference(edge, edge_point), line_values); - - ASSERT_EQ(pyramid_values.size(), pyramid.size()); - ASSERT_EQ(line_values.size(), line.size()); - - for (std::size_t i = 0; i < pyramid.size(); ++i) { - if (mapping[i] >= 0) { - EXPECT_NEAR(pyramid_values[i], line_values[static_cast(mapping[i])], tol) - << "Edge trace mismatch for order " << order - << ", edge " << static_cast(edge) - << ", basis " << i; - } else { - EXPECT_NEAR(pyramid_values[i], Real(0), tol) - << "Off-edge pyramid basis should vanish on edge for order " << order - << ", edge " << static_cast(edge) - << ", basis " << i; - } - } - } -} - -struct StridedOutputRequest { - bool values; - bool gradients; - bool hessians; -}; - -void expect_strided_matches_pointwise(ElementType type, - int order, - const StridedOutputRequest& request) { - LagrangeBasis basis(type, order); - const auto points = dense_sample_points_for(type); - const std::size_t stride = points.size() + 3u; - constexpr Real sentinel = Real(-12345.25); - - std::vector values(request.values ? basis.size() * stride : 0u, sentinel); - std::vector gradients(request.gradients ? basis.size() * 3u * stride : 0u, sentinel); - std::vector hessians(request.hessians ? basis.size() * 9u * stride : 0u, sentinel); - - basis.evaluate_at_quadrature_points_strided( - points, - stride, - request.values ? values.data() : nullptr, - request.gradients ? gradients.data() : nullptr, - request.hessians ? hessians.data() : nullptr); - - const Real tol = (type == ElementType::Pyramid5 || type == ElementType::Pyramid14) - ? Real(5e-10) - : Real(1e-12); - - for (std::size_t q = 0; q < points.size(); ++q) { - if (request.values) { - std::vector expected; - basis.evaluate_values(points[q], expected); - ASSERT_EQ(expected.size(), basis.size()); - for (std::size_t d = 0; d < basis.size(); ++d) { - EXPECT_NEAR(values[d * stride + q], expected[d], tol) - << "type=" << static_cast(type) - << ", order=" << order - << ", dof=" << d - << ", q=" << q; - } - } - - if (request.gradients) { - std::vector expected; - basis.evaluate_gradients(points[q], expected); - ASSERT_EQ(expected.size(), basis.size()); - for (std::size_t d = 0; d < basis.size(); ++d) { - for (std::size_t c = 0; c < 3u; ++c) { - EXPECT_NEAR(gradients[(d * 3u + c) * stride + q], expected[d][c], tol) - << "type=" << static_cast(type) - << ", order=" << order - << ", dof=" << d - << ", component=" << c - << ", q=" << q; - } - } - } - - if (request.hessians) { - std::vector expected; - basis.evaluate_hessians(points[q], expected); - ASSERT_EQ(expected.size(), basis.size()); - for (std::size_t d = 0; d < basis.size(); ++d) { - for (std::size_t r = 0; r < 3u; ++r) { - for (std::size_t c = 0; c < 3u; ++c) { - EXPECT_NEAR(hessians[(d * 9u + r * 3u + c) * stride + q], - expected[d](r, c), - Real(4) * tol) - << "type=" << static_cast(type) - << ", order=" << order - << ", dof=" << d - << ", hessian=(" << r << "," << c << ")" - << ", q=" << q; - } - } - } - } - } - - const auto expect_padding_untouched = [&](const std::vector& buffer, - std::size_t rows) { - for (std::size_t row = 0; row < rows; ++row) { - for (std::size_t q = points.size(); q < stride; ++q) { - EXPECT_EQ(buffer[row * stride + q], sentinel) - << "type=" << static_cast(type) - << ", order=" << order - << ", row=" << row - << ", padding q=" << q; - } - } - }; - - if (request.values) { - expect_padding_untouched(values, basis.size()); - } - if (request.gradients) { - expect_padding_untouched(gradients, basis.size() * 3u); - } - if (request.hessians) { - expect_padding_untouched(hessians, basis.size() * 9u); - } -} - -void expect_raw_to_matches_vector_evaluation(ElementType type, int order) { - LagrangeBasis basis(type, order); - const Real tol = (type == ElementType::Pyramid5 || type == ElementType::Pyramid14) - ? Real(5e-10) - : Real(1e-12); - - for (const auto& point : sample_points_for(type)) { - std::vector values; - std::vector gradients; - std::vector hessians; - basis.evaluate_all(point, values, gradients, hessians); - - std::vector raw_values(basis.size()); - std::vector raw_gradients(basis.size() * 3u); - std::vector raw_hessians(basis.size() * 9u); - basis.evaluate_values_to(point, raw_values.data()); - basis.evaluate_gradients_to(point, raw_gradients.data()); - basis.evaluate_hessians_to(point, raw_hessians.data()); - - for (std::size_t i = 0; i < basis.size(); ++i) { - EXPECT_NEAR(raw_values[i], values[i], tol) - << "type=" << static_cast(type) << ", order=" << order << ", dof=" << i; - for (std::size_t c = 0; c < 3u; ++c) { - EXPECT_NEAR(raw_gradients[i * 3u + c], gradients[i][c], tol) - << "type=" << static_cast(type) - << ", order=" << order - << ", dof=" << i - << ", gradient component=" << c; - } - for (std::size_t r = 0; r < 3u; ++r) { - for (std::size_t c = 0; c < 3u; ++c) { - EXPECT_NEAR(raw_hessians[i * 9u + r * 3u + c], hessians[i](r, c), Real(4) * tol) - << "type=" << static_cast(type) - << ", order=" << order - << ", dof=" << i - << ", hessian=(" << r << "," << c << ")"; - } - } - } - } -} - -} // namespace - -TEST(SolverBasisAdapter, ShapeValuesGradientsAndMeshOverloadMatchLegacy) { - constexpr double tol = 2e-12; - - for (const auto& c : solver_basis_adapter_cases()) { - SCOPED_TRACE("element=" + std::to_string(static_cast(c.type))); - Vector weights; - Array xi; - fill_legacy_quadrature(c, weights, xi); - - Array legacy_N(c.eNoN, c.nG); - Array adapter_N(c.eNoN, c.nG); - Array3 legacy_Nx(c.insd, c.eNoN, c.nG); - Array3 adapter_Nx(c.insd, c.eNoN, c.nG); - auto legacy_shape = legacy_solver_nn::get_element_shape_data.find(c.type); - - faceType legacy_face; - if (legacy_shape == legacy_solver_nn::get_element_shape_data.end()) { - ASSERT_EQ(c.type, consts::ElementType::QUD8); - legacy_face.eType = c.type; - legacy_face.eNoN = c.eNoN; - legacy_face.nG = c.nG; - legacy_face.xi = xi; - legacy_face.N.resize(c.eNoN, c.nG); - legacy_face.Nx.resize(c.insd, c.eNoN, c.nG); - } - - for (int g = 0; g < c.nG; ++g) { - if (legacy_shape != legacy_solver_nn::get_element_shape_data.end()) { - legacy_shape->second(c.insd, c.eNoN, g, xi, legacy_N, legacy_Nx); - } else { - legacy_solver_nn::set_face_shape_data.at(c.type)(g, legacy_face); - } - nn::get_gnn(c.insd, c.type, c.eNoN, g, xi, adapter_N, adapter_Nx); - - double partition = 0.0; - std::array gradient_sum{0.0, 0.0, 0.0}; - for (int a = 0; a < c.eNoN; ++a) { - partition += adapter_N(a, g); - for (int d = 0; d < c.insd; ++d) { - gradient_sum[static_cast(d)] += adapter_Nx(d, a, g); - } - } - - EXPECT_NEAR(partition, 1.0, tol) - << "element=" << static_cast(c.type) << ", g=" << g; - for (int d = 0; d < c.insd; ++d) { - EXPECT_NEAR(gradient_sum[static_cast(d)], 0.0, tol) - << "element=" << static_cast(c.type) << ", d=" << d << ", g=" << g; - } - } - - if (legacy_shape == legacy_solver_nn::get_element_shape_data.end()) { - legacy_N = legacy_face.N; - legacy_Nx = legacy_face.Nx; - } - - expect_arrays_near(adapter_N, legacy_N, tol); - expect_array3_near(adapter_Nx, legacy_Nx, tol); - - mshType mesh; - mesh.eType = c.type; - mesh.eNoN = c.eNoN; - mesh.nG = c.nG; - mesh.xi = xi; - mesh.N.resize(c.eNoN, c.nG); - mesh.Nx.resize(c.insd, c.eNoN, c.nG); - for (int g = 0; g < c.nG; ++g) { - nn::get_gnn(g, mesh); - } - - expect_arrays_near(mesh.N, legacy_N, tol); - expect_array3_near(mesh.Nx, legacy_Nx, tol); - } -} - -TEST(SolverFaceBasisAdapter, ShapeValuesGradientsAndDispatchMatchLegacyFaceTable) { - constexpr double tol = 2e-12; - - int covered = 0; - for (const auto& c : solver_face_basis_adapter_cases()) { - SCOPED_TRACE("face element=" + std::to_string(static_cast(c.type))); - - faceType legacy_face = initialized_face_for_case(c); - faceType basis_face = initialized_face_for_case(c); - - for (int g = 0; g < c.nG; ++g) { - legacy_solver_nn::set_face_shape_data.at(c.type)(g, legacy_face); - nn::get_gnn(nullptr, g, basis_face); - expect_face_partition_identities(c, basis_face, g, tol); - } - - expect_arrays_near(basis_face.N, legacy_face.N, tol); - expect_array3_near(basis_face.Nx, legacy_face.Nx, tol); - ++covered; - } - - EXPECT_EQ(covered, 7); -} - -TEST(SolverFaceBasisAdapter, MappedFacesFailClosedWithoutLegacyFallback) { - using consts::ElementType; - - SolverBasisAdapterCase c{ElementType::LIN1, ElementType::LIN1, 1, 3, 2}; - faceType face = initialized_face_for_case(c); - - try { - nn::get_gnn(nullptr, 0, face); - FAIL() << "Expected mapped face dispatch to reject mismatched eNoN"; - } catch (const svmp::FE::basis::BasisEvaluationException& exception) { - const std::string message = exception.what(); - EXPECT_NE(message.find("legacy fallback was not attempted"), std::string::npos) - << message; - } -} - -TEST(SolverFaceBasisAdapter, PointFaceRemainsLegacyValuePath) { - faceType face; - face.eType = consts::ElementType::PNT; - face.eNoN = 1; - face.nG = 1; - face.N.resize(1, 1); - face.Nx.resize(1, 1, 1); - face.N(0, 0) = -7.0; - face.Nx(0, 0, 0) = 42.0; - - nn::get_gnn(nullptr, 0, face); - - EXPECT_DOUBLE_EQ(face.N(0, 0), 1.0); - EXPECT_DOUBLE_EQ(face.Nx(0, 0, 0), 42.0); -} - -TEST(SolverFaceBasisAdapter, UnsupportedFacesThrowClearErrors) { - faceType nrb_face; - nrb_face.eType = consts::ElementType::NRB; - nrb_face.eNoN = 1; - nrb_face.nG = 1; - nrb_face.N.resize(1, 1); - nrb_face.Nx.resize(1, 1, 1); - EXPECT_THROW(nn::get_gnn(nullptr, 0, nrb_face), svmp::FE::NotImplementedException); - - faceType unknown_face; - unknown_face.eType = consts::ElementType::NA; - unknown_face.eNoN = 1; - unknown_face.nG = 1; - unknown_face.N.resize(1, 1); - unknown_face.Nx.resize(1, 1, 1); - EXPECT_THROW(nn::get_gnn(nullptr, 0, unknown_face), svmp::FE::InvalidElementException); -} - -TEST(SolverBasisAdapter, QuadraturePathsRemainLegacyCompatible) { - constexpr double tol = 0.0; - - for (const auto& c : solver_basis_adapter_cases()) { - auto mesh_it = legacy_solver_nn::set_element_gauss_int_data.find(c.type); - if (mesh_it != legacy_solver_nn::set_element_gauss_int_data.end()) { - mshType legacy_mesh; - legacy_mesh.eType = c.type; - legacy_mesh.eNoN = c.eNoN; - legacy_mesh.nG = c.nG; - legacy_mesh.w.resize(c.nG); - legacy_mesh.xi.resize(c.insd, c.nG); - mesh_it->second(legacy_mesh); - - mshType adapter_mesh; - adapter_mesh.eType = c.type; - adapter_mesh.eNoN = c.eNoN; - adapter_mesh.nG = c.nG; - adapter_mesh.w.resize(c.nG); - adapter_mesh.xi.resize(c.insd, c.nG); - nn::get_gip(adapter_mesh); - - expect_vectors_near(adapter_mesh.w, legacy_mesh.w, tol); - expect_arrays_near(adapter_mesh.xi, legacy_mesh.xi, tol); - } - - auto scalar_it = legacy_solver_nn::get_element_gauss_int_data.find(c.type); - if (scalar_it != legacy_solver_nn::get_element_gauss_int_data.end()) { - Vector legacy_w(c.nG); - Vector adapter_w(c.nG); - Array legacy_xi(c.insd, c.nG); - Array adapter_xi(c.insd, c.nG); - - scalar_it->second(c.insd, c.nG, legacy_w, legacy_xi); - nn::get_gip(c.insd, c.type, c.nG, adapter_w, adapter_xi); - - expect_vectors_near(adapter_w, legacy_w, tol); - expect_arrays_near(adapter_xi, legacy_xi, tol); - } - } - - mshType legacy_tet; - legacy_tet.eType = consts::ElementType::TET4; - legacy_tet.eNoN = 4; - legacy_tet.nG = 4; - legacy_tet.qmTET4 = 0.25; - legacy_tet.w.resize(4); - legacy_tet.xi.resize(3, 4); - legacy_solver_nn::set_element_gauss_int_data.at(consts::ElementType::TET4)(legacy_tet); - - mshType adapter_tet; - adapter_tet.eType = consts::ElementType::TET4; - adapter_tet.eNoN = 4; - adapter_tet.nG = 4; - adapter_tet.qmTET4 = 0.25; - adapter_tet.w.resize(4); - adapter_tet.xi.resize(3, 4); - nn::get_gip(adapter_tet); - - expect_vectors_near(adapter_tet.w, legacy_tet.w, tol); - expect_arrays_near(adapter_tet.xi, legacy_tet.xi, tol); -} - -TEST(SolverBasisAdapter, HessiansCoverEveryMappedScalarVolumeElement) { - constexpr double partition_tol = 2e-10; - constexpr double finite_difference_tol = 2e-5; - constexpr double zero_tol = 2e-12; - - int covered = 0; - for (const auto& c : solver_hessian_adapter_cases()) { - SCOPED_TRACE("element=" + std::to_string(static_cast(c.type))); - Vector weights; - Array xi; - fill_legacy_quadrature(c, weights, xi); - - const int ind2 = packed_hessian_components(c.insd); - Array3 adapter_Nxx(ind2, c.eNoN, c.nG); - fill_array3(adapter_Nxx, std::numeric_limits::quiet_NaN()); - - for (int g = 0; g < c.nG; ++g) { - nn::get_gn_nxx(c.insd, ind2, c.type, c.eNoN, g, xi, adapter_Nxx); - expect_partition_hessian_identity(c, adapter_Nxx, g, partition_tol); - expect_solver_hessian_matches_gradient_finite_difference( - c, xi, g, adapter_Nxx, finite_difference_tol); - - if (c.type == consts::ElementType::LIN1 || - c.type == consts::ElementType::TRI3 || - c.type == consts::ElementType::TET4) { - expect_all_hessians_zero(c, adapter_Nxx, g, zero_tol); - } - } - - if (c.type == consts::ElementType::QUD4) { - EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 2, zero_tol)); - } else if (c.type == consts::ElementType::HEX8) { - EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 3, zero_tol)); - EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 4, zero_tol)); - EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 5, zero_tol)); - } else if (c.type == consts::ElementType::WDG) { - EXPECT_TRUE(array3_has_nonzero_component(adapter_Nxx, 5, zero_tol)); - } - ++covered; - } - - EXPECT_EQ(covered, 13); -} - -TEST(SolverBasisAdapter, HessianPackingMatchesLegacyWhereLegacyIsApproved) { - constexpr double tol = 2e-12; - - for (const auto& c : solver_legacy_hessian_parity_cases()) { - Vector weights; - Array xi; - fill_legacy_quadrature(c, weights, xi); - - const int ind2 = packed_hessian_components(c.insd); - Array3 legacy_Nxx(ind2, c.eNoN, c.nG); - Array3 adapter_Nxx(ind2, c.eNoN, c.nG); - - for (int g = 0; g < c.nG; ++g) { - legacy_solver_nn::get_element_2nd_derivs.at(c.type)( - c.insd, ind2, c.eNoN, g, xi, legacy_Nxx); - nn::get_gn_nxx(c.insd, ind2, c.type, c.eNoN, g, xi, adapter_Nxx); - } - - expect_array3_near(adapter_Nxx, legacy_Nxx, tol); - } -} - -TEST(SolverBasisAdapter, Qud8HessiansDoNotUseLegacyFallback) { - using consts::ElementType; - SolverBasisAdapterCase c{ElementType::QUD8, ElementType::QUD9, 2, 8, 9}; - - Vector weights; - Array xi; - fill_legacy_quadrature(c, weights, xi); - - const int ind2 = packed_hessian_components(c.insd); - Array3 legacy_Nxx(ind2, c.eNoN, c.nG); - Array3 adapter_Nxx(ind2, c.eNoN, c.nG); - fill_array3(legacy_Nxx, 0.0); - fill_array3(adapter_Nxx, 0.0); - - for (int g = 0; g < c.nG; ++g) { - legacy_solver_nn::get_element_2nd_derivs.at(c.type)( - c.insd, ind2, c.eNoN, g, xi, legacy_Nxx); - nn::get_gn_nxx(c.insd, ind2, c.type, c.eNoN, g, xi, adapter_Nxx); - } - - double max_abs_difference = 0.0; - for (int g = 0; g < c.nG; ++g) { - for (int a = 0; a < c.eNoN; ++a) { - for (int row = 0; row < ind2; ++row) { - max_abs_difference = std::max( - max_abs_difference, - std::abs(adapter_Nxx(row, a, g) - legacy_Nxx(row, a, g))); - } - } - } - - EXPECT_GT(max_abs_difference, 1e-8); -} - -TEST(SolverBasisAdapter, UnsupportedHessianFamiliesRemainNoOp) { - Array xi(1, 1); - xi(0, 0) = 0.0; - Array3 Nxx(1, 1, 1); - - for (const auto unsupported : {consts::ElementType::NRB, consts::ElementType::PNT}) { - fill_array3(Nxx, 42.0); - nn::get_gn_nxx(1, 1, unsupported, 1, 0, xi, Nxx); - EXPECT_DOUBLE_EQ(Nxx(0, 0, 0), 42.0) - << "element=" << static_cast(unsupported); - } -} - -TEST(SolverBasisAdapter, InitFsMshPopulatesMappedHessiansWithoutLShpFGate) { - using consts::ElementType; - const SolverBasisAdapterCase cases[] = { - {ElementType::QUD4, ElementType::QUD4, 2, 4, 4}, - {ElementType::HEX8, ElementType::HEX8, 3, 8, 8}, - {ElementType::HEX20, ElementType::HEX20, 3, 20, 27}, - {ElementType::HEX27, ElementType::HEX27, 3, 27, 27}, - {ElementType::WDG, ElementType::WDG, 3, 6, 6}, - }; - - for (const auto& c : cases) { - SCOPED_TRACE("element=" + std::to_string(static_cast(c.type))); - ComMod com_mod; - com_mod.nsd = c.insd; - mshType mesh = initialized_mesh_for_case(c, true); - - fs::init_fs_msh(com_mod, mesh); - - ASSERT_EQ(mesh.fs.size(), 1u); - ASSERT_EQ(mesh.fs[0].Nxx.nrows(), packed_hessian_components(c.insd)); - if (c.type == ElementType::QUD4) { - EXPECT_TRUE(array3_has_nonzero_component(mesh.fs[0].Nxx, 2, 2e-12)); - } else if (c.type == ElementType::HEX8) { - EXPECT_TRUE(array3_has_nonzero_component(mesh.fs[0].Nxx, 3, 2e-12)); - } else if (c.type == ElementType::WDG) { - EXPECT_TRUE(array3_has_nonzero_component(mesh.fs[0].Nxx, 5, 2e-12)); - } else { - bool has_nonzero = false; - for (int row = 0; row < mesh.fs[0].Nxx.nrows(); ++row) { - has_nonzero = has_nonzero || - array3_has_nonzero_component(mesh.fs[0].Nxx, row, 2e-12); - } - EXPECT_TRUE(has_nonzero); - } - } -} - -TEST(LagrangeBasis, QuadPartitionOfUnity) { - LagrangeBasis basis(ElementType::Quad4, 1); - svmp::FE::math::Vector xi{0.2, -0.3, 0.0}; - - std::vector values; - basis.evaluate_values(xi, values); - - double sum = std::accumulate(values.begin(), values.end(), 0.0); - EXPECT_NEAR(sum, 1.0, 1e-12); -} - -TEST(LagrangeBasis, LineGradientLinear) { - LagrangeBasis basis(ElementType::Line2, 1); - svmp::FE::math::Vector xi{0.0, 0.0, 0.0}; - std::vector grad; - basis.evaluate_gradients(xi, grad); - - ASSERT_EQ(grad.size(), 2u); - EXPECT_NEAR(grad[0][0], -0.5, 1e-12); - EXPECT_NEAR(grad[1][0], 0.5, 1e-12); -} - -TEST(LagrangeBasis, TrianglePartitionOfUnity) { - LagrangeBasis basis(ElementType::Triangle3, 1); - svmp::FE::math::Vector xi{0.2, 0.3, 0.0}; - std::vector values; - basis.evaluate_values(xi, values); - - double sum = std::accumulate(values.begin(), values.end(), 0.0); - EXPECT_NEAR(sum, 1.0, 1e-12); -} - -TEST(LagrangeBasis, SizeFormulasPerElement) { - for (int order = 0; order <= 3; ++order) { - { - LagrangeBasis line(ElementType::Line2, order); - EXPECT_EQ(line.size(), static_cast(order + 1)); - } - { - LagrangeBasis quad(ElementType::Quad4, order); - const std::size_t n1d = static_cast(order + 1); - EXPECT_EQ(quad.size(), n1d * n1d); - } - { - LagrangeBasis hex(ElementType::Hex8, order); - const std::size_t n1d = static_cast(order + 1); - EXPECT_EQ(hex.size(), n1d * n1d * n1d); - } - { - LagrangeBasis tri(ElementType::Triangle3, order); - const std::size_t expected = - static_cast(order + 1) * - static_cast(order + 2) / 2; - EXPECT_EQ(tri.size(), expected); - } - { - LagrangeBasis tet(ElementType::Tetra4, order); - const std::size_t expected = - static_cast(order + 1) * - static_cast(order + 2) * - static_cast(order + 3) / 6; - EXPECT_EQ(tet.size(), expected); - } - } -} - -TEST(LagrangeBasis, KroneckerDeltaAtNodes) { - const std::vector> cases = { - {ElementType::Line2, 1}, - {ElementType::Quad4, 1}, - {ElementType::Triangle3, 1}, - {ElementType::Tetra4, 1}, - {ElementType::Hex8, 1}, - {ElementType::Triangle3, 2}, - {ElementType::Tetra4, 2}, - {ElementType::Quad4, 2}, - {ElementType::Hex8, 2}, - {ElementType::Wedge6, 2} - }; - - for (const auto& c : cases) { - LagrangeBasis basis(c.first, c.second); - const auto& nodes = basis.nodes(); - ASSERT_EQ(nodes.size(), basis.size()); - - for (std::size_t i = 0; i < nodes.size(); ++i) { - std::vector vals; - basis.evaluate_values(nodes[i], vals); - ASSERT_EQ(vals.size(), nodes.size()); - for (std::size_t j = 0; j < nodes.size(); ++j) { - if (i == j) { - EXPECT_NEAR(vals[j], 1.0, 1e-12); - } else { - EXPECT_NEAR(vals[j], 0.0, 1e-12); - } - } - } - } -} - -TEST(LagrangeBasis, MatchesNodeOrderingConventionsForLinearAndQuadratic) { - // Tensor-product elements - expect_nodes_match_node_ordering(ElementType::Line2, 1, ElementType::Line2); - expect_nodes_match_node_ordering(ElementType::Line2, 2, ElementType::Line3); - expect_nodes_match_node_ordering(ElementType::Quad4, 1, ElementType::Quad4); - expect_nodes_match_node_ordering(ElementType::Quad4, 2, ElementType::Quad9); - expect_nodes_match_node_ordering(ElementType::Hex8, 1, ElementType::Hex8); - expect_nodes_match_node_ordering(ElementType::Hex8, 2, ElementType::Hex27); - - // Simplex elements - expect_nodes_match_node_ordering(ElementType::Triangle3, 1, ElementType::Triangle3); - expect_nodes_match_node_ordering(ElementType::Triangle3, 2, ElementType::Triangle6); - expect_nodes_match_node_ordering(ElementType::Tetra4, 1, ElementType::Tetra4); - expect_nodes_match_node_ordering(ElementType::Tetra4, 2, ElementType::Tetra10); - - // Mixed topology - expect_nodes_match_node_ordering(ElementType::Wedge6, 1, ElementType::Wedge6); - expect_nodes_match_node_ordering(ElementType::Wedge6, 2, ElementType::Wedge18); - - // Pyramid - expect_nodes_match_node_ordering(ElementType::Pyramid5, 1, ElementType::Pyramid5); - expect_nodes_match_node_ordering(ElementType::Pyramid14, 2, ElementType::Pyramid14); -} - -TEST(LagrangeBasis, WedgeAndPyramidPartitionOfUnity) { - { - LagrangeBasis wedge(ElementType::Wedge6, 1); - svmp::FE::math::Vector xi{Real(0.2), Real(0.1), Real(0.3)}; - std::vector vals; - wedge.evaluate_values(xi, vals); - const double sum = std::accumulate(vals.begin(), vals.end(), 0.0); - EXPECT_NEAR(sum, 1.0, 1e-12); - } - - { - LagrangeBasis wedge_q(ElementType::Wedge18, 2); - svmp::FE::math::Vector xi{Real(0.2), Real(0.1), Real(-0.25)}; - std::vector vals; - wedge_q.evaluate_values(xi, vals); - const double sum = std::accumulate(vals.begin(), vals.end(), 0.0); - EXPECT_NEAR(sum, 1.0, 1e-12); - - // Wedge18 should report 18 nodes in ReferenceNodeLayout - EXPECT_EQ(ReferenceNodeLayout::num_nodes(ElementType::Wedge18), 18u); - // Corner nodes should match Wedge6 vertices - auto v0 = ReferenceNodeLayout::get_node_coords(ElementType::Wedge18, 0); - auto v1 = ReferenceNodeLayout::get_node_coords(ElementType::Wedge18, 1); - auto v2 = ReferenceNodeLayout::get_node_coords(ElementType::Wedge18, 2); - EXPECT_NEAR(v0[0], Real(0), 1e-14); - EXPECT_NEAR(v0[1], Real(0), 1e-14); - EXPECT_NEAR(v0[2], Real(-1), 1e-14); - EXPECT_NEAR(v1[0], Real(1), 1e-14); - EXPECT_NEAR(v1[1], Real(0), 1e-14); - EXPECT_NEAR(v1[2], Real(-1), 1e-14); - EXPECT_NEAR(v2[0], Real(0), 1e-14); - EXPECT_NEAR(v2[1], Real(1), 1e-14); - EXPECT_NEAR(v2[2], Real(-1), 1e-14); - } - - { - LagrangeBasis pyr(ElementType::Pyramid5, 1); - svmp::FE::math::Vector xi{Real(0.1), Real(-0.2), Real(0.4)}; - std::vector vals; - pyr.evaluate_values(xi, vals); - const double sum = std::accumulate(vals.begin(), vals.end(), 0.0); - EXPECT_NEAR(sum, 1.0, 1e-12); - } -} - -TEST(LagrangeBasis, NonTensorStridedEvaluationMatchesPointwise) { - const std::vector> cases = { - {ElementType::Triangle3, 3}, - {ElementType::Tetra4, 3}, - {ElementType::Wedge6, 3}, - {ElementType::Pyramid5, 3}, - }; - const std::vector requests = { - {true, false, false}, - {false, true, false}, - {false, false, true}, - {true, true, false}, - {true, false, true}, - {false, true, true}, - {true, true, true}, - }; - - for (const auto& [type, order] : cases) { - for (const auto& request : requests) { - SCOPED_TRACE(static_cast(type)); - SCOPED_TRACE(order); - SCOPED_TRACE(request.values ? "values" : "no values"); - SCOPED_TRACE(request.gradients ? "gradients" : "no gradients"); - SCOPED_TRACE(request.hessians ? "hessians" : "no hessians"); - expect_strided_matches_pointwise(type, order, request); - } - } -} - -TEST(LagrangeBasis, RawOutputSinksMatchVectorEvaluationAcrossTopologies) { - const std::vector> cases = { - {ElementType::Line2, 4}, - {ElementType::Quad4, 3}, - {ElementType::Hex8, 3}, - {ElementType::Triangle3, 4}, - {ElementType::Tetra4, 3}, - {ElementType::Wedge6, 3}, - {ElementType::Pyramid5, 3}, - }; - - for (const auto& [type, order] : cases) { - SCOPED_TRACE(static_cast(type)); - SCOPED_TRACE(order); - expect_raw_to_matches_vector_evaluation(type, order); - } -} - -TEST(LagrangeBasis, CanonicalConstructorsSupportArbitraryOrders) { - const struct Case { - ElementType type; - int max_order; - } cases[] = { - {ElementType::Line2, 8}, - {ElementType::Triangle3, 6}, - {ElementType::Quad4, 6}, - {ElementType::Tetra4, 5}, - {ElementType::Hex8, 5}, - {ElementType::Wedge6, 5}, - {ElementType::Pyramid5, 5}, - }; - - for (const auto& c : cases) { - for (int order = 0; order <= c.max_order; ++order) { - LagrangeBasis basis(c.type, order); - EXPECT_EQ(basis.element_type(), c.type); - EXPECT_EQ(basis.order(), order); - EXPECT_EQ(basis.dimension(), expected_dimension(c.type)); - EXPECT_EQ(basis.size(), expected_lagrange_size(c.type, order)); - EXPECT_EQ(basis.nodes().size(), basis.size()); - } - } -} - -TEST(LagrangeBasis, AliasVariantsNormalizeToCanonicalPaths) { - expect_alias_matches_canonical( - ElementType::Line3, ElementType::Line2, 2, sample_points_for(ElementType::Line2)); - expect_alias_matches_canonical( - ElementType::Triangle6, ElementType::Triangle3, 2, sample_points_for(ElementType::Triangle3)); - expect_alias_matches_canonical( - ElementType::Quad9, ElementType::Quad4, 2, sample_points_for(ElementType::Quad4)); - expect_alias_matches_canonical( - ElementType::Tetra10, ElementType::Tetra4, 2, sample_points_for(ElementType::Tetra4)); - expect_alias_matches_canonical( - ElementType::Hex27, ElementType::Hex8, 2, sample_points_for(ElementType::Hex8)); - expect_alias_matches_canonical( - ElementType::Wedge18, ElementType::Wedge6, 2, sample_points_for(ElementType::Wedge6)); - expect_alias_matches_canonical( - ElementType::Pyramid14, ElementType::Pyramid5, 2, sample_points_for(ElementType::Pyramid5), - Real(2e-10)); -} -TEST(LagrangeBasis, SerendipityVariantsRemainRejected) { - EXPECT_THROW((void)LagrangeBasis(ElementType::Quad8, 2), svmp::FE::FEException); - EXPECT_THROW((void)LagrangeBasis(ElementType::Hex20, 2), svmp::FE::FEException); - EXPECT_THROW((void)LagrangeBasis(ElementType::Wedge15, 2), svmp::FE::FEException); - EXPECT_THROW((void)LagrangeBasis(ElementType::Pyramid13, 2), svmp::FE::FEException); -} +#include "FE/Basis/BasisExceptions.h" +#include "FE/Basis/BasisFactory.h" +#include "FE/Basis/LagrangeBasis.h" +#include "FE/Basis/NodeOrderingConventions.h" -TEST(LagrangeBasis, GeneratedNodeOrderingIsDeterministicAcrossOrders) { - const struct Case { - ElementType type; - int max_order; - } cases[] = { - {ElementType::Line2, 8}, - {ElementType::Triangle3, 6}, - {ElementType::Quad4, 6}, - {ElementType::Tetra4, 5}, - {ElementType::Hex8, 5}, - {ElementType::Wedge6, 5}, - {ElementType::Pyramid5, 5}, - }; +#include +#include +#include - for (const auto& c : cases) { - for (int order = 0; order <= c.max_order; ++order) { - const auto generated_a = ReferenceNodeLayout::get_lagrange_node_coords(c.type, order); - const auto generated_b = ReferenceNodeLayout::get_lagrange_node_coords(c.type, order); - ASSERT_EQ(generated_a.size(), expected_lagrange_size(c.type, order)); - ASSERT_EQ(generated_a.size(), generated_b.size()); - for (std::size_t i = 0; i < generated_a.size(); ++i) { - EXPECT_TRUE(points_close(generated_a[i], generated_b[i])); - } - } - } -} +using namespace svmp::FE; +using namespace svmp::FE::basis; -TEST(LagrangeBasis, NodeOrderingMatchesReferenceCoordinateOracles) { - const std::array cases = { - ElementType::Line2, ElementType::Line3, - ElementType::Triangle3, ElementType::Triangle6, - ElementType::Quad4, ElementType::Quad8, ElementType::Quad9, - ElementType::Tetra4, ElementType::Tetra10, - ElementType::Hex8, ElementType::Hex20, ElementType::Hex27, - ElementType::Wedge6, ElementType::Wedge15, ElementType::Wedge18, - ElementType::Pyramid5, ElementType::Pyramid13, ElementType::Pyramid14, - }; +namespace { - for (ElementType type : cases) { - const auto expected = reference_node_coords(type); - ASSERT_FALSE(expected.empty()); - ASSERT_EQ(ReferenceNodeLayout::num_nodes(type), expected.size()); - for (std::size_t i = 0; i < expected.size(); ++i) { - const auto actual = ReferenceNodeLayout::get_node_coords(type, i); - EXPECT_TRUE(points_close(actual, expected[i])) - << "Element type " << static_cast(type) - << ", node " << i; - } - } -} +using Point = math::Vector; -TEST(LagrangeBasis, GeneratedLowOrderOrderingMatchesPublicAliasPaths) { - const struct Case { - ElementType type; - int order; - ElementType public_alias; - } cases[] = { - {ElementType::Line2, 1, ElementType::Line2}, - {ElementType::Line2, 2, ElementType::Line3}, - {ElementType::Triangle3, 1, ElementType::Triangle3}, - {ElementType::Triangle3, 2, ElementType::Triangle6}, - {ElementType::Quad4, 1, ElementType::Quad4}, - {ElementType::Quad4, 2, ElementType::Quad9}, - {ElementType::Tetra4, 1, ElementType::Tetra4}, - {ElementType::Tetra4, 2, ElementType::Tetra10}, - {ElementType::Hex8, 1, ElementType::Hex8}, - {ElementType::Hex8, 2, ElementType::Hex27}, - {ElementType::Wedge6, 1, ElementType::Wedge6}, - {ElementType::Wedge6, 2, ElementType::Wedge18}, - {ElementType::Pyramid5, 1, ElementType::Pyramid5}, - {ElementType::Pyramid5, 2, ElementType::Pyramid14}, - }; +struct CanonicalCase { + ElementType type; + int order; + std::size_t size; + int dimension; + std::vector points; + Real derivative_tol; +}; - for (const auto& c : cases) { - const auto generated = ReferenceNodeLayout::get_lagrange_node_coords(c.type, c.order); - ASSERT_EQ(generated.size(), ReferenceNodeLayout::num_nodes(c.public_alias)); - for (std::size_t i = 0; i < generated.size(); ++i) { - const auto public_alias = ReferenceNodeLayout::get_node_coords(c.public_alias, i); - EXPECT_TRUE(points_close(generated[i], public_alias)); - } - } +const std::vector& canonical_cases() { + static const std::vector cases = { + {ElementType::Line2, 3, 4u, 1, + {{Real(-0.35), Real(0), Real(0)}, {Real(0.2), Real(0), Real(0)}}, + Real(1e-11)}, + {ElementType::Triangle3, 3, 10u, 2, + {{Real(0.15), Real(0.2), Real(0)}, {Real(0.25), Real(0.1), Real(0)}}, + Real(1e-9)}, + {ElementType::Quad4, 3, 16u, 2, + {{Real(0.2), Real(-0.3), Real(0)}, {Real(-0.45), Real(0.25), Real(0)}}, + Real(1e-11)}, + {ElementType::Tetra4, 2, 10u, 3, + {{Real(0.12), Real(0.18), Real(0.16)}, {Real(0.2), Real(0.1), Real(0.18)}}, + Real(1e-9)}, + {ElementType::Hex8, 2, 27u, 3, + {{Real(0.1), Real(-0.2), Real(0.3)}, {Real(-0.35), Real(0.25), Real(-0.15)}}, + Real(1e-10)}, + {ElementType::Wedge6, 2, 18u, 3, + {{Real(0.18), Real(0.22), Real(-0.2)}, {Real(0.12), Real(0.16), Real(0.1)}}, + Real(1e-9)}, + }; + return cases; } -TEST(LagrangeBasis, KroneckerDeltaAcrossCanonicalTopologiesAndOrders) { - const struct Case { - ElementType type; - int max_order; - } cases[] = { - {ElementType::Line2, 8}, - {ElementType::Triangle3, 6}, - {ElementType::Quad4, 6}, - {ElementType::Tetra4, 5}, - {ElementType::Hex8, 5}, - {ElementType::Wedge6, 5}, - {ElementType::Pyramid5, 5}, - }; - - for (const auto& c : cases) { - for (int order = 0; order <= c.max_order; ++order) { - LagrangeBasis basis(c.type, order); - ASSERT_EQ(basis.size(), expected_lagrange_size(c.type, order)); - - std::vector values; - for (std::size_t node_i = 0; node_i < basis.size(); ++node_i) { - basis.evaluate_values(basis.nodes()[node_i], values); - ASSERT_EQ(values.size(), basis.size()); - for (std::size_t basis_i = 0; basis_i < basis.size(); ++basis_i) { - EXPECT_NEAR(values[basis_i], basis_i == node_i ? Real(1) : Real(0), Real(2e-10)) - << "Element type " << static_cast(c.type) - << ", order " << order - << ", node " << node_i - << ", basis " << basis_i; - } - } +std::vector sample_points_for(ElementType type) { + for (const auto& c : canonical_cases()) { + if (c.type == type) { + return c.points; } } + return {}; } -TEST(LagrangeBasis, PartitionGradientAndHessianSumsAcrossCanonicalTopologiesAndOrders) { - const struct Case { - ElementType type; - int max_order; - Real tol; - } cases[] = { - {ElementType::Line2, 8, Real(1e-11)}, - {ElementType::Triangle3, 6, Real(1e-10)}, - {ElementType::Quad4, 6, Real(1e-10)}, - {ElementType::Tetra4, 5, Real(2e-10)}, - {ElementType::Hex8, 5, Real(2e-10)}, - {ElementType::Wedge6, 5, Real(5e-10)}, - {ElementType::Pyramid5, 5, Real(5e-7)}, - }; +void expect_kronecker_at_nodes(const LagrangeBasis& basis, Real tol) +{ + const auto& nodes = basis.nodes(); + ASSERT_EQ(nodes.size(), basis.size()); - for (const auto& c : cases) { - for (int order = 0; order <= c.max_order; ++order) { - LagrangeBasis basis(c.type, order); - expect_partition_gradient_hessian_sums(basis, dense_sample_points_for(c.type), c.tol, c.tol); + std::vector values; + for (std::size_t node = 0; node < nodes.size(); ++node) { + basis.evaluate_values(nodes[node], values); + ASSERT_EQ(values.size(), basis.size()); + for (std::size_t i = 0; i < values.size(); ++i) { + EXPECT_NEAR(values[i], i == node ? Real(1) : Real(0), tol) + << "node=" << node << " basis=" << i; } } } -TEST(LagrangeBasis, SimplexAxisScratchDynamicFallbackForHighOrder) { - const struct Case { - ElementType type; - int order; - Point point; - Real tolerance; - } cases[] = { - {ElementType::Triangle3, 13, Point{Real(0.19), Real(0.31), Real(0)}, Real(1e-8)}, - {ElementType::Tetra4, 13, Point{Real(0.13), Real(0.17), Real(0.19)}, Real(1e-7)}, - }; - - for (const auto& c : cases) { - LagrangeBasis basis(c.type, c.order); +void expect_partition_gradient_hessian_sums(const LagrangeBasis& basis, + const std::vector& points, + Real derivative_tol) +{ + for (const auto& xi : points) { std::vector values; std::vector gradients; std::vector hessians; - basis.evaluate_all(c.point, values, gradients, hessians); - - ASSERT_EQ(values.size(), basis.size()); - ASSERT_EQ(gradients.size(), basis.size()); - ASSERT_EQ(hessians.size(), basis.size()); + basis.evaluate_all(xi, values, gradients, hessians); Real value_sum = Real(0); Gradient gradient_sum{}; Hessian hessian_sum{}; - for (std::size_t i = 0; i < basis.size(); ++i) { + for (std::size_t i = 0; i < values.size(); ++i) { value_sum += values[i]; for (std::size_t d = 0; d < 3u; ++d) { gradient_sum[d] += gradients[i][d]; @@ -2253,776 +102,297 @@ TEST(LagrangeBasis, SimplexAxisScratchDynamicFallbackForHighOrder) { } } - EXPECT_NEAR(value_sum, Real(1), c.tolerance); - for (std::size_t d = 0; d < 3u; ++d) { - EXPECT_NEAR(gradient_sum[d], Real(0), c.tolerance); - for (std::size_t e = 0; e < 3u; ++e) { - EXPECT_NEAR(hessian_sum(d, e), Real(0), Real(10) * c.tolerance); + EXPECT_NEAR(value_sum, Real(1), Real(1e-12)); + for (int d = 0; d < basis.dimension(); ++d) { + EXPECT_NEAR(gradient_sum[static_cast(d)], Real(0), derivative_tol); + for (int e = 0; e < basis.dimension(); ++e) { + EXPECT_NEAR(hessian_sum(static_cast(d), + static_cast(e)), + Real(0), + derivative_tol); } } } } -TEST(LagrangeBasis, HighOrderAxisNearNodeMaintainsPartitionAndDerivativeSums) { - const int order = 16; - const LagrangeBasis basis(ElementType::Line2, order); - const Real node = Real(-1) + Real(2 * 5) / static_cast(order); - const Point point{node + Real(1e-7), Real(0), Real(0)}; - +void expect_raw_sinks_match_vector_evaluation(const LagrangeBasis& basis, + const Point& xi) +{ std::vector values; std::vector gradients; std::vector hessians; - basis.evaluate_all(point, values, gradients, hessians); - ASSERT_EQ(values.size(), basis.size()); - - Real value_sum = Real(0); - Real gradient_sum = Real(0); - Real hessian_sum = Real(0); - for (std::size_t i = 0; i < basis.size(); ++i) { - value_sum += values[i]; - gradient_sum += gradients[i][0]; - hessian_sum += hessians[i](0, 0); - } - - EXPECT_NEAR(value_sum, Real(1), Real(1e-12)); - EXPECT_NEAR(gradient_sum, Real(0), Real(1e-8)); - EXPECT_NEAR(hessian_sum, Real(0), Real(1e-5)); -} - -TEST(LagrangeBasis, PyramidFaceTracesMatchLowerDimensionalLagrangeBases) { - const PyramidFace faces[] = { - PyramidFace::Base, - PyramidFace::South, - PyramidFace::East, - PyramidFace::North, - PyramidFace::West, - }; - - for (int order = 1; order <= 5; ++order) { - for (const auto face : faces) { - expect_pyramid_face_trace_matches_lower_basis( - order, face, face == PyramidFace::Base ? Real(2e-10) : Real(5e-10)); - } - } -} - -TEST(LagrangeBasis, PyramidEdgeTracesMatchLineLagrangeBasis) { - const PyramidEdge edges[] = { - PyramidEdge::BaseSouth, - PyramidEdge::BaseEast, - PyramidEdge::BaseNorth, - PyramidEdge::BaseWest, - PyramidEdge::VerticalSW, - PyramidEdge::VerticalSE, - PyramidEdge::VerticalNE, - PyramidEdge::VerticalNW, - }; - - for (int order = 1; order <= 5; ++order) { - for (const auto edge : edges) { - expect_pyramid_edge_trace_matches_line_basis(order, edge, Real(5e-10)); - } - } -} + basis.evaluate_all(xi, values, gradients, hessians); -TEST(LagrangeBasis, Pyramid14RationalNodalAndPartition) { - using svmp::FE::basis::ReferenceNodeLayout; + std::vector flat_values(basis.size()); + std::vector flat_gradients(basis.size() * 3u); + std::vector flat_hessians(basis.size() * 9u); + basis.evaluate_values_to(xi, flat_values.data()); + basis.evaluate_gradients_to(xi, flat_gradients.data()); + basis.evaluate_hessians_to(xi, flat_hessians.data()); - LagrangeBasis basis(ElementType::Pyramid14, 2); - EXPECT_EQ(basis.dimension(), 3); - EXPECT_EQ(basis.size(), 14u); - - // Kronecker nodal property at all Pyramid14 nodes for (std::size_t i = 0; i < basis.size(); ++i) { - auto xi = ReferenceNodeLayout::get_node_coords(ElementType::Pyramid14, i); - std::vector vals; - basis.evaluate_values(xi, vals); - ASSERT_EQ(vals.size(), basis.size()); - for (std::size_t j = 0; j < basis.size(); ++j) { - const double expected = (i == j) ? 1.0 : 0.0; - EXPECT_NEAR(vals[j], expected, 1e-12); + EXPECT_NEAR(flat_values[i], values[i], Real(1e-14)); + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_NEAR(flat_gradients[i * 3u + d], gradients[i][d], Real(1e-14)); + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_NEAR(flat_hessians[i * 9u + d * 3u + e], + hessians[i](d, e), + Real(1e-14)); + } } } - - // Partition of unity at an interior point - svmp::FE::math::Vector xi{Real(0.1), Real(-0.2), Real(0.3)}; - std::vector vals; - basis.evaluate_values(xi, vals); - const double sum = std::accumulate(vals.begin(), vals.end(), 0.0); - EXPECT_NEAR(sum, 1.0, 1e-12); } -TEST(LagrangeBasis, Pyramid14GradientSumZero) { - LagrangeBasis basis(ElementType::Pyramid14, 2); - svmp::FE::math::Vector xi{Real(0.15), Real(-0.1), Real(0.3)}; - - std::vector grads; - basis.evaluate_gradients(xi, grads); - ASSERT_EQ(grads.size(), basis.size()); - - Gradient sum{}; - for (const auto& g : grads) { - sum[0] += g[0]; - sum[1] += g[1]; - sum[2] += g[2]; +void expect_nodes_close(const std::vector& lhs, + const std::vector& rhs, + Real tol) +{ + ASSERT_EQ(lhs.size(), rhs.size()); + for (std::size_t i = 0; i < lhs.size(); ++i) { + EXPECT_NEAR(lhs[i][0], rhs[i][0], tol) << "node=" << i; + EXPECT_NEAR(lhs[i][1], rhs[i][1], tol) << "node=" << i; + EXPECT_NEAR(lhs[i][2], rhs[i][2], tol) << "node=" << i; } - EXPECT_NEAR(sum[0], 0.0, 1e-8); - EXPECT_NEAR(sum[1], 0.0, 1e-8); - EXPECT_NEAR(sum[2], 0.0, 1e-8); } -TEST(LagrangeBasis, HigherOrderP4KroneckerAndPartition) { - struct Case { - ElementType type; - int order; - svmp::FE::math::Vector xi; - }; - - const std::vector cases = { - {ElementType::Line2, 4, {Real(0.11), Real(0), Real(0)}}, - {ElementType::Quad4, 4, {Real(0.2), Real(-0.3), Real(0)}}, - {ElementType::Triangle3, 4, {Real(0.2), Real(0.3), Real(0)}}, - {ElementType::Hex8, 4, {Real(0.2), Real(-0.3), Real(0.4)}}, - }; - - for (const auto& c : cases) { - LagrangeBasis basis(c.type, c.order); - - // Partition of unity at an interior point - std::vector values; - basis.evaluate_values(c.xi, values); - const double sum = std::accumulate(values.begin(), values.end(), 0.0); - EXPECT_NEAR(sum, 1.0, 1e-12); +void expect_evaluations_match(const LagrangeBasis& lhs, + const LagrangeBasis& rhs, + const std::vector& points, + Real tol) +{ + ASSERT_EQ(lhs.size(), rhs.size()); - // Kronecker delta property at all nodes - const auto& nodes = basis.nodes(); - ASSERT_EQ(nodes.size(), basis.size()); - for (std::size_t i = 0; i < nodes.size(); ++i) { - basis.evaluate_values(nodes[i], values); - ASSERT_EQ(values.size(), nodes.size()); - for (std::size_t j = 0; j < nodes.size(); ++j) { - const double expected = (i == j) ? 1.0 : 0.0; - EXPECT_NEAR(values[j], expected, 1e-12); + for (const auto& xi : points) { + std::vector lhs_values; + std::vector rhs_values; + std::vector lhs_gradients; + std::vector rhs_gradients; + std::vector lhs_hessians; + std::vector rhs_hessians; + + lhs.evaluate_all(xi, lhs_values, lhs_gradients, lhs_hessians); + rhs.evaluate_all(xi, rhs_values, rhs_gradients, rhs_hessians); + + for (std::size_t i = 0; i < lhs.size(); ++i) { + EXPECT_NEAR(lhs_values[i], rhs_values[i], tol); + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_NEAR(lhs_gradients[i][d], rhs_gradients[i][d], tol); + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_NEAR(lhs_hessians[i](d, e), rhs_hessians[i](d, e), tol); + } } } } } -TEST(LagrangeBasis, Pyramid14InterpolatesQuadraticPolynomials) { - using svmp::FE::basis::ReferenceNodeLayout; - - LagrangeBasis basis(ElementType::Pyramid14, 2); - const std::size_t n = basis.size(); - - // Precompute nodal coordinates - std::vector> nodes; - nodes.reserve(n); - for (std::size_t i = 0; i < n; ++i) { - nodes.push_back(ReferenceNodeLayout::get_node_coords(ElementType::Pyramid14, i)); - } - - auto interpolate_and_check = [&](auto f, Real tol) { - // Nodal coefficients - std::vector coeffs(n); - for (std::size_t i = 0; i < n; ++i) { - const auto& x = nodes[i]; - coeffs[i] = f(x[0], x[1], x[2]); - } - - // Test at a few interior points - const svmp::FE::math::Vector test_pts[] = { - {Real(0.1), Real(-0.2), Real(0.2)}, - {Real(-0.2), Real(0.15), Real(0.4)}, - {Real(0.05), Real(0.05), Real(0.3)} - }; - - for (const auto& xi : test_pts) { - std::vector vals; - basis.evaluate_values(xi, vals); - ASSERT_EQ(vals.size(), n); - - Real u_interp = Real(0); - for (std::size_t i = 0; i < n; ++i) { - u_interp += coeffs[i] * vals[i]; - } - - const Real u_exact = f(xi[0], xi[1], xi[2]); - EXPECT_NEAR(u_interp, u_exact, tol); - } - }; - - // Constant, linear and quadratic monomials - interpolate_and_check([](Real, Real, Real) { return Real(1); }, Real(1e-12)); - interpolate_and_check([](Real x, Real, Real) { return x; }, Real(1e-11)); - interpolate_and_check([](Real, Real y, Real) { return y; }, Real(1e-11)); - interpolate_and_check([](Real, Real, Real z) { return z; }, Real(1e-11)); - interpolate_and_check([](Real x, Real y, Real) { return x * y; }, Real(1e-10)); - interpolate_and_check([](Real x, Real, Real z) { return x * z; }, Real(1e-10)); - interpolate_and_check([](Real, Real y, Real z) { return y * z; }, Real(1e-10)); - interpolate_and_check([](Real x, Real, Real) { return x * x; }, Real(1e-10)); - interpolate_and_check([](Real, Real y, Real) { return y * y; }, Real(1e-10)); - interpolate_and_check([](Real, Real, Real z) { return z * z; }, Real(1e-10)); +Real linear_function(const Point& p) { + return Real(2) + Real(3) * p[0] - Real(4) * p[1] + Real(5) * p[2]; } -TEST(LagrangeBasis, Pyramid14GradientMatchesLinearFunctionGradient) { - using svmp::FE::basis::ReferenceNodeLayout; - - LagrangeBasis basis(ElementType::Pyramid14, 2); - const std::size_t n = basis.size(); - - // Nodal coordinates and coefficients for f(x,y,z) = ax + by + cz - const Real a = Real(1.2); - const Real b = Real(-0.7); - const Real c = Real(0.5); - - std::vector coeffs(n); - for (std::size_t i = 0; i < n; ++i) { - const auto x = ReferenceNodeLayout::get_node_coords(ElementType::Pyramid14, i); - coeffs[i] = a * x[0] + b * x[1] + c * x[2]; - } - - const svmp::FE::math::Vector xi{Real(0.1), Real(-0.15), Real(0.35)}; - - std::vector grads; - basis.evaluate_gradients(xi, grads); - ASSERT_EQ(grads.size(), n); - - Gradient g_interp{}; - for (std::size_t i = 0; i < n; ++i) { - g_interp[0] += coeffs[i] * grads[i][0]; - g_interp[1] += coeffs[i] * grads[i][1]; - g_interp[2] += coeffs[i] * grads[i][2]; - } - - EXPECT_NEAR(g_interp[0], a, 1e-6); - EXPECT_NEAR(g_interp[1], b, 1e-6); - EXPECT_NEAR(g_interp[2], c, 1e-6); +Gradient linear_gradient() { + Gradient g{}; + g[0] = Real(3); + g[1] = Real(-4); + g[2] = Real(5); + return g; } -TEST(LagrangeBasis, PyramidApexValuesRemainExactAcrossRepresentativeOrders) { - const struct Case { - ElementType type; - int order; - } cases[] = { - {ElementType::Pyramid5, 1}, - {ElementType::Pyramid14, 2}, - {ElementType::Pyramid5, 4}, - }; - - const svmp::FE::math::Vector apex{Real(0), Real(0), Real(1)}; - for (const auto& c : cases) { - LagrangeBasis basis(c.type, c.order); - std::vector values; - basis.evaluate_values(apex, values); - ASSERT_EQ(values.size(), basis.size()); - - const auto& nodes = basis.nodes(); - auto apex_it = std::find_if( - nodes.begin(), nodes.end(), - [](const auto& node) { - return std::abs(node[0]) <= Real(1e-14) && - std::abs(node[1]) <= Real(1e-14) && - std::abs(node[2] - Real(1)) <= Real(1e-14); - }); - ASSERT_NE(apex_it, nodes.end()); - const std::size_t apex_index = static_cast( - std::distance(nodes.begin(), apex_it)); +Real quadratic_function(const Point& p) { + return Real(1) + Real(2) * p[0] - p[1] + Real(0.5) * p[2] + + p[0] * p[0] + Real(0.75) * p[1] * p[1] - Real(0.25) * p[2] * p[2] + + Real(0.2) * p[0] * p[1] - Real(0.3) * p[0] * p[2] + + Real(0.4) * p[1] * p[2]; +} - Real sum = Real(0); - for (std::size_t i = 0; i < values.size(); ++i) { - EXPECT_TRUE(std::isfinite(static_cast(values[i]))); - sum += values[i]; - const Real expected = (i == apex_index) ? Real(1) : Real(0); - EXPECT_NEAR(values[i], expected, 1e-12) - << "order " << c.order << ", basis " << i; - } - EXPECT_NEAR(sum, Real(1), 1e-12); +template +Real interpolate_value(const LagrangeBasis& basis, + const std::vector& values, + Function&& nodal_function) +{ + Real result = Real(0); + const auto& nodes = basis.nodes(); + for (std::size_t i = 0; i < values.size(); ++i) { + result += values[i] * nodal_function(nodes[i]); } + return result; } -TEST(LagrangeBasis, PyramidGradientAtExactApexThrowsWhenLimitIsNotUnique) { - const struct Case { - ElementType type; - int order; - } cases[] = { - {ElementType::Pyramid5, 1}, - {ElementType::Pyramid14, 2}, - {ElementType::Pyramid5, 4}, - }; +} // namespace - const svmp::FE::math::Vector apex{Real(0), Real(0), Real(1)}; - for (const auto& c : cases) { +TEST(LagrangeBasis, CanonicalTopologiesHaveExpectedSizesAndDimensions) { + for (const auto& c : canonical_cases()) { LagrangeBasis basis(c.type, c.order); - std::vector gradients; - EXPECT_THROW(basis.evaluate_gradients(apex, gradients), svmp::FE::basis::BasisEvaluationException) - << "order " << c.order; + EXPECT_EQ(basis.basis_type(), BasisType::Lagrange); + EXPECT_EQ(basis.element_type(), c.type); + EXPECT_EQ(basis.order(), c.order); + EXPECT_EQ(basis.size(), c.size); + EXPECT_EQ(basis.dimension(), c.dimension); } } -TEST(LagrangeBasis, PyramidApexValuesMatchDirectionalNearApexLimits) { - const struct Case { - ElementType type; - int order; - Real tol; - } cases[] = { - {ElementType::Pyramid5, 1, Real(3e-6)}, - {ElementType::Pyramid14, 2, Real(4e-6)}, - {ElementType::Pyramid5, 4, Real(1e-5)}, - }; - - const std::array, 4> directions = {{ - {Real(0), Real(0)}, - {Real(0.35), Real(-0.25)}, - {Real(-0.50), Real(0.45)}, - {Real(0.20), Real(0.60)}, - }}; - const Real t = Real(1e-6); - const svmp::FE::math::Vector apex{Real(0), Real(0), Real(1)}; - - for (const auto& c : cases) { +TEST(LagrangeBasis, CanonicalTopologiesAreNodalAndPartitionUnity) { + for (const auto& c : canonical_cases()) { LagrangeBasis basis(c.type, c.order); - std::vector apex_values; - basis.evaluate_values(apex, apex_values); - - for (const auto& direction : directions) { - const svmp::FE::math::Vector xi{ - t * direction[0], - t * direction[1], - Real(1) - t - }; - - std::vector values; - basis.evaluate_values(xi, values); - ASSERT_EQ(values.size(), apex_values.size()); - - for (std::size_t i = 0; i < values.size(); ++i) { - EXPECT_NEAR(values[i], apex_values[i], c.tol) - << "order " << c.order - << ", basis " << i - << ", direction (" << direction[0] << ", " << direction[1] << ")"; - } - } + expect_kronecker_at_nodes(basis, Real(2e-10)); + expect_partition_gradient_hessian_sums(basis, c.points, c.derivative_tol); } } -TEST(LagrangeBasis, PyramidNearApexGradientShowsDirectionalSpread) { - const struct Case { - ElementType type; - int order; - Real min_spread; - } cases[] = { - {ElementType::Pyramid5, 1, Real(5e-2)}, - {ElementType::Pyramid14, 2, Real(5e-2)}, - }; - - const std::array, 4> directions = {{ - {Real(0), Real(0)}, - {Real(0.45), Real(-0.30)}, - {Real(-0.35), Real(0.40)}, - {Real(0.25), Real(0.55)}, - }}; - const Real t = Real(1e-6); - - for (const auto& c : cases) { +TEST(LagrangeBasis, RawOutputSinksMatchVectorEvaluationAcrossTopologies) { + for (const auto& c : canonical_cases()) { LagrangeBasis basis(c.type, c.order); - double max_spread = 0.0; - - std::vector> directional_gradients; - directional_gradients.reserve(directions.size()); - for (const auto& direction : directions) { - const svmp::FE::math::Vector xi{ - t * direction[0], - t * direction[1], - Real(1) - t - }; - - std::vector gradients; - basis.evaluate_gradients(xi, gradients); - directional_gradients.push_back(std::move(gradients)); - } - - for (std::size_t i = 0; i < basis.size(); ++i) { - for (int d = 0; d < 3; ++d) { - double min_value = std::numeric_limits::infinity(); - double max_value = -std::numeric_limits::infinity(); - for (const auto& gradients : directional_gradients) { - const double value = static_cast(gradients[i][static_cast(d)]); - min_value = std::min(min_value, value); - max_value = std::max(max_value, value); - } - max_spread = std::max(max_spread, max_value - min_value); - } - } - - EXPECT_GT(max_spread, static_cast(c.min_spread)) - << "order " << c.order; + expect_raw_sinks_match_vector_evaluation(basis, c.points.front()); } } -TEST(LagrangeBasis, GradientSumZeroQuadAndTet) { - const std::vector>> cases = { - {ElementType::Quad4, svmp::FE::math::Vector{Real(0.2), Real(-0.1), Real(0)}}, - {ElementType::Tetra4, svmp::FE::math::Vector{Real(0.1), Real(0.2), Real(0.1)}} +TEST(LagrangeBasis, CompleteAliasesNormalizeToCanonicalBases) { + const std::vector> aliases = { + {ElementType::Line3, ElementType::Line2, 2}, + {ElementType::Triangle6, ElementType::Triangle3, 2}, + {ElementType::Quad9, ElementType::Quad4, 2}, + {ElementType::Tetra10, ElementType::Tetra4, 2}, + {ElementType::Hex27, ElementType::Hex8, 2}, + {ElementType::Wedge18, ElementType::Wedge6, 2}, }; - for (const auto& c : cases) { - LagrangeBasis basis(c.first, 1); - std::vector grads; - basis.evaluate_gradients(c.second, grads); - - ASSERT_EQ(grads.size(), basis.size()); - Gradient sum{}; - for (const auto& g : grads) { - sum[0] += g[0]; - sum[1] += g[1]; - sum[2] += g[2]; - } - EXPECT_NEAR(sum[0], 0.0, 1e-12); - EXPECT_NEAR(sum[1], 0.0, 1e-12); - EXPECT_NEAR(sum[2], 0.0, 1e-12); - } -} - -TEST(LagrangeBasis, HexPartitionAndGradientSumZeroOrderThree) { - LagrangeBasis basis(ElementType::Hex8, 3); - svmp::FE::math::Vector xi{Real(0.1), Real(-0.2), Real(0.25)}; - - std::vector values; - basis.evaluate_values(xi, values); - const double sum = std::accumulate(values.begin(), values.end(), 0.0); - EXPECT_NEAR(sum, 1.0, 1e-12); - - std::vector grads; - basis.evaluate_gradients(xi, grads); - Gradient gsum{}; - for (const auto& g : grads) { - gsum[0] += g[0]; - gsum[1] += g[1]; - gsum[2] += g[2]; - } - EXPECT_NEAR(gsum[0], 0.0, 1e-10); - EXPECT_NEAR(gsum[1], 0.0, 1e-10); - EXPECT_NEAR(gsum[2], 0.0, 1e-10); -} - -TEST(LagrangeBasis, OracleLine3ValuesGradientsAndHessians) { - LagrangeBasis basis(ElementType::Line3, 2); - const Point xi{Real(0.2), Real(0), Real(0)}; - - std::vector values; - std::vector gradients; - std::vector hessians; - basis.evaluate_values(xi, values); - basis.evaluate_gradients(xi, gradients); - basis.evaluate_hessians(xi, hessians); - - ASSERT_EQ(values.size(), 3u); - ASSERT_EQ(gradients.size(), 3u); - ASSERT_EQ(hessians.size(), 3u); - - const Real expected_values[] = {Real(-2) / Real(25), Real(3) / Real(25), Real(24) / Real(25)}; - const Real expected_gradients[] = {Real(-3) / Real(10), Real(7) / Real(10), Real(-2) / Real(5)}; - const Real expected_hessians[] = {Real(1), Real(1), Real(-2)}; + for (const auto& [alias, canonical, order] : aliases) { + LagrangeBasis alias_basis(alias, 1); + LagrangeBasis canonical_basis(canonical, order); + const auto generated = ReferenceNodeLayout::get_lagrange_node_coords(canonical, order); - for (std::size_t i = 0; i < 3; ++i) { - EXPECT_NEAR(values[i], expected_values[i], 1e-14); - EXPECT_NEAR(gradients[i][0], expected_gradients[i], 1e-14); - EXPECT_NEAR(hessians[i](0, 0), expected_hessians[i], 1e-14); + EXPECT_EQ(alias_basis.element_type(), canonical); + EXPECT_EQ(alias_basis.order(), order); + expect_nodes_close(alias_basis.nodes(), generated, Real(1e-14)); + expect_nodes_close(alias_basis.nodes(), canonical_basis.nodes(), Real(1e-14)); + expect_evaluations_match(alias_basis, + canonical_basis, + sample_points_for(canonical), + Real(1e-12)); } } -TEST(LagrangeBasis, OracleTriangle3ValuesGradientsAndHessians) { - LagrangeBasis basis(ElementType::Triangle3, 1); - const Point xi{Real(0.2), Real(0.3), Real(0)}; - - std::vector values; - std::vector gradients; - std::vector hessians; - basis.evaluate_values(xi, values); - basis.evaluate_gradients(xi, gradients); - basis.evaluate_hessians(xi, hessians); - - ASSERT_EQ(values.size(), 3u); - const Point expected_gradients[] = { - Point{Real(-1), Real(-1), Real(0)}, - Point{Real(1), Real(0), Real(0)}, - Point{Real(0), Real(1), Real(0)} +TEST(LagrangeBasis, NodeOrderingMatchesPublicAliasLayouts) { + const std::vector> aliases = { + {ElementType::Line2, ElementType::Line2, 1}, + {ElementType::Line3, ElementType::Line2, 2}, + {ElementType::Triangle3, ElementType::Triangle3, 1}, + {ElementType::Triangle6, ElementType::Triangle3, 2}, + {ElementType::Quad4, ElementType::Quad4, 1}, + {ElementType::Quad9, ElementType::Quad4, 2}, + {ElementType::Tetra4, ElementType::Tetra4, 1}, + {ElementType::Tetra10, ElementType::Tetra4, 2}, + {ElementType::Hex8, ElementType::Hex8, 1}, + {ElementType::Hex27, ElementType::Hex8, 2}, + {ElementType::Wedge6, ElementType::Wedge6, 1}, + {ElementType::Wedge18, ElementType::Wedge6, 2}, }; - const Real expected_values[] = {Real(0.5), Real(0.2), Real(0.3)}; - - for (std::size_t i = 0; i < 3; ++i) { - EXPECT_NEAR(values[i], expected_values[i], 1e-14); - EXPECT_NEAR(gradients[i][0], expected_gradients[i][0], 1e-14); - EXPECT_NEAR(gradients[i][1], expected_gradients[i][1], 1e-14); - for (int a = 0; a < 2; ++a) { - for (int b = 0; b < 2; ++b) { - EXPECT_NEAR(hessians[i](static_cast(a), static_cast(b)), - Real(0), 1e-14); - } - } - } -} - -TEST(LagrangeBasis, OracleQuad4ValuesGradientsAndHessians) { - LagrangeBasis basis(ElementType::Quad4, 1); - const Point xi{Real(0.2), Real(-0.4), Real(0)}; - - std::vector values; - std::vector gradients; - std::vector hessians; - basis.evaluate_values(xi, values); - basis.evaluate_gradients(xi, gradients); - basis.evaluate_hessians(xi, hessians); - ASSERT_EQ(values.size(), 4u); - const Real expected_values[] = {Real(7) / Real(25), Real(21) / Real(50), - Real(9) / Real(50), Real(3) / Real(25)}; - const Point expected_gradients[] = { - Point{Real(-7) / Real(20), Real(-1) / Real(5), Real(0)}, - Point{Real(7) / Real(20), Real(-3) / Real(10), Real(0)}, - Point{Real(3) / Real(20), Real(3) / Real(10), Real(0)}, - Point{Real(-3) / Real(20), Real(1) / Real(5), Real(0)} - }; - const Real expected_hxy[] = {Real(1) / Real(4), Real(-1) / Real(4), - Real(1) / Real(4), Real(-1) / Real(4)}; + for (const auto& [alias, canonical, order] : aliases) { + const auto generated = ReferenceNodeLayout::get_lagrange_node_coords(canonical, order); + ASSERT_EQ(generated.size(), ReferenceNodeLayout::num_nodes(alias)); - for (std::size_t i = 0; i < 4; ++i) { - EXPECT_NEAR(values[i], expected_values[i], 1e-14); - EXPECT_NEAR(gradients[i][0], expected_gradients[i][0], 1e-14); - EXPECT_NEAR(gradients[i][1], expected_gradients[i][1], 1e-14); - EXPECT_NEAR(hessians[i](0, 0), Real(0), 1e-14); - EXPECT_NEAR(hessians[i](1, 1), Real(0), 1e-14); - EXPECT_NEAR(hessians[i](0, 1), expected_hxy[i], 1e-14); - EXPECT_NEAR(hessians[i](1, 0), expected_hxy[i], 1e-14); + for (std::size_t i = 0; i < generated.size(); ++i) { + const auto public_node = ReferenceNodeLayout::get_node_coords(alias, i); + EXPECT_NEAR(public_node[0], generated[i][0], Real(1e-14)) << "node=" << i; + EXPECT_NEAR(public_node[1], generated[i][1], Real(1e-14)) << "node=" << i; + EXPECT_NEAR(public_node[2], generated[i][2], Real(1e-14)) << "node=" << i; + } } } -TEST(LagrangeBasis, OracleWedge6ValuesGradientsAndHessians) { - LagrangeBasis basis(ElementType::Wedge6, 1); - const Point xi{Real(0.2), Real(0.25), Real(-0.3)}; - - std::vector values; - std::vector gradients; - std::vector hessians; - basis.evaluate_values(xi, values); - basis.evaluate_gradients(xi, gradients); - basis.evaluate_hessians(xi, hessians); - - ASSERT_EQ(values.size(), 6u); - const Real expected_values[] = { - Real(143) / Real(400), Real(13) / Real(100), Real(13) / Real(80), - Real(77) / Real(400), Real(7) / Real(100), Real(7) / Real(80) - }; - const Point expected_gradients[] = { - Point{Real(-13) / Real(20), Real(-13) / Real(20), Real(-11) / Real(40)}, - Point{Real(13) / Real(20), Real(0), Real(-1) / Real(10)}, - Point{Real(0), Real(13) / Real(20), Real(-1) / Real(8)}, - Point{Real(-7) / Real(20), Real(-7) / Real(20), Real(11) / Real(40)}, - Point{Real(7) / Real(20), Real(0), Real(1) / Real(10)}, - Point{Real(0), Real(7) / Real(20), Real(1) / Real(8)} - }; - const Point expected_hxz[] = { - Point{Real(1) / Real(2), Real(1) / Real(2), Real(0)}, - Point{Real(-1) / Real(2), Real(0), Real(0)}, - Point{Real(0), Real(-1) / Real(2), Real(0)}, - Point{Real(-1) / Real(2), Real(-1) / Real(2), Real(0)}, - Point{Real(1) / Real(2), Real(0), Real(0)}, - Point{Real(0), Real(1) / Real(2), Real(0)} +TEST(LagrangeBasis, RemovedOrSerendipityFamiliesAreRejected) { + const std::array unsupported = { + ElementType::Quad8, + ElementType::Hex20, + ElementType::Wedge15, + ElementType::Pyramid5, + ElementType::Pyramid13, + ElementType::Pyramid14, }; - for (std::size_t i = 0; i < 6; ++i) { - EXPECT_NEAR(values[i], expected_values[i], 1e-14); - EXPECT_NEAR(gradients[i][0], expected_gradients[i][0], 1e-14); - EXPECT_NEAR(gradients[i][1], expected_gradients[i][1], 1e-14); - EXPECT_NEAR(gradients[i][2], expected_gradients[i][2], 1e-14); - EXPECT_NEAR(hessians[i](0, 0), Real(0), 1e-14); - EXPECT_NEAR(hessians[i](1, 1), Real(0), 1e-14); - EXPECT_NEAR(hessians[i](2, 2), Real(0), 1e-14); - EXPECT_NEAR(hessians[i](0, 1), Real(0), 1e-14); - EXPECT_NEAR(hessians[i](1, 0), Real(0), 1e-14); - EXPECT_NEAR(hessians[i](0, 2), expected_hxz[i][0], 1e-14); - EXPECT_NEAR(hessians[i](2, 0), expected_hxz[i][0], 1e-14); - EXPECT_NEAR(hessians[i](1, 2), expected_hxz[i][1], 1e-14); - EXPECT_NEAR(hessians[i](2, 1), expected_hxz[i][1], 1e-14); + for (const auto type : unsupported) { + EXPECT_THROW((void)LagrangeBasis(type, 2), BasisElementCompatibilityException) + << "element=" << static_cast(type); } } -TEST(LagrangeBasis, DeterministicBoundarySweepMaintainsPartitionAndFiniteDerivatives) { - const std::vector> cases = { - {ElementType::Line2, 1}, - {ElementType::Line3, 2}, - {ElementType::Triangle3, 1}, - {ElementType::Triangle6, 2}, - {ElementType::Quad4, 1}, - {ElementType::Quad9, 2}, - {ElementType::Tetra4, 1}, - {ElementType::Tetra10, 2}, - {ElementType::Hex8, 1}, - {ElementType::Hex27, 2}, - {ElementType::Wedge6, 1}, - {ElementType::Wedge18, 2}, - {ElementType::Pyramid5, 1}, - {ElementType::Pyramid14, 2}, +TEST(LagrangeBasis, LinearPolynomialReproductionAcrossLinearTopologies) { + const std::vector> cases = { + {ElementType::Line2, {Real(-0.2), Real(0), Real(0)}}, + {ElementType::Triangle3, {Real(0.2), Real(0.3), Real(0)}}, + {ElementType::Quad4, {Real(0.25), Real(-0.4), Real(0)}}, + {ElementType::Tetra4, {Real(0.1), Real(0.2), Real(0.3)}}, + {ElementType::Hex8, {Real(0.15), Real(-0.2), Real(0.25)}}, + {ElementType::Wedge6, {Real(0.2), Real(0.15), Real(-0.3)}}, }; + const Gradient expected_gradient = linear_gradient(); - for (const auto& [type, order] : cases) { - LagrangeBasis basis(type, order); - for (const auto& xi : boundary_stress_points_for(type)) { - std::vector values; - std::vector gradients; - std::vector hessians; - basis.evaluate_values(xi, values); - basis.evaluate_gradients(xi, gradients); - basis.evaluate_hessians(xi, hessians); + for (const auto& [type, point] : cases) { + LagrangeBasis basis(type, 1); + std::vector values; + std::vector gradients; + basis.evaluate_values(point, values); + basis.evaluate_gradients(point, gradients); - ASSERT_EQ(values.size(), basis.size()); - ASSERT_EQ(gradients.size(), basis.size()); - ASSERT_EQ(hessians.size(), basis.size()); + const Real interpolated = + interpolate_value(basis, values, linear_function); + EXPECT_NEAR(interpolated, linear_function(point), Real(1e-12)); - Real sum = Real(0); - for (Real value : values) { - EXPECT_TRUE(std::isfinite(value)); - sum += value; + Gradient interpolated_gradient{}; + for (std::size_t i = 0; i < gradients.size(); ++i) { + const Real nodal_value = linear_function(basis.nodes()[i]); + for (int d = 0; d < basis.dimension(); ++d) { + interpolated_gradient[static_cast(d)] += + nodal_value * gradients[i][static_cast(d)]; } - expect_all_finite(gradients); - expect_hessians_finite(hessians, basis.dimension()); - EXPECT_NEAR(sum, Real(1), type == ElementType::Pyramid5 || type == ElementType::Pyramid14 - ? Real(1e-8) - : Real(1e-12)) - << "type=" << static_cast(type) - << ", order=" << order - << ", xi=(" << xi[0] << "," << xi[1] << "," << xi[2] << ")"; } - } -} - -TEST(LagrangeBasis, FiniteDifferenceGradientsAcrossSupportedLinearShapes) { - const std::vector cases = { - {ElementType::Line2, 1, sample_points_for(ElementType::Line2)}, - {ElementType::Triangle3, 1, sample_points_for(ElementType::Triangle3)}, - {ElementType::Quad4, 1, sample_points_for(ElementType::Quad4)}, - {ElementType::Tetra4, 1, sample_points_for(ElementType::Tetra4)}, - {ElementType::Hex8, 1, sample_points_for(ElementType::Hex8)}, - {ElementType::Wedge6, 1, sample_points_for(ElementType::Wedge6)}, - {ElementType::Pyramid5, 1, sample_points_for(ElementType::Pyramid5)}, - }; - - for (const auto& c : cases) { - expect_gradients_match_finite_difference(c, Real(1e-6), Real(1e-6)); - } -} - -TEST(LagrangeBasis, FiniteDifferenceGradientsAcrossSupportedQuadraticShapes) { - const std::vector cases = { - {ElementType::Line3, 2, sample_points_for(ElementType::Line3)}, - {ElementType::Triangle6, 2, sample_points_for(ElementType::Triangle6)}, - {ElementType::Quad9, 2, sample_points_for(ElementType::Quad9)}, - {ElementType::Tetra10, 2, sample_points_for(ElementType::Tetra10)}, - {ElementType::Hex27, 2, sample_points_for(ElementType::Hex27)}, - {ElementType::Wedge18, 2, sample_points_for(ElementType::Wedge18)}, - {ElementType::Pyramid14, 2, sample_points_for(ElementType::Pyramid14)}, - }; - - for (const auto& c : cases) { - expect_gradients_match_finite_difference(c, Real(1e-6), Real(2e-6)); - } -} - -TEST(LagrangeBasis, LinearPolynomialReproductionAcrossSupportedLinearShapes) { - const std::vector cases = { - {ElementType::Line2, 1, sample_points_for(ElementType::Line2)}, - {ElementType::Triangle3, 1, sample_points_for(ElementType::Triangle3)}, - {ElementType::Quad4, 1, sample_points_for(ElementType::Quad4)}, - {ElementType::Tetra4, 1, sample_points_for(ElementType::Tetra4)}, - {ElementType::Hex8, 1, sample_points_for(ElementType::Hex8)}, - {ElementType::Wedge6, 1, sample_points_for(ElementType::Wedge6)}, - {ElementType::Pyramid5, 1, sample_points_for(ElementType::Pyramid5)}, - }; - - const std::vector> exponents = { - {0, 0, 0}, - {1, 0, 0}, - {0, 1, 0}, - {0, 0, 1}, - }; - - for (const auto& c : cases) { - const std::vector> relevant( - exponents.begin(), - exponents.begin() + static_cast(c.type == ElementType::Line2 ? 2 : - (c.type == ElementType::Triangle3 || - c.type == ElementType::Quad4) ? 3 : 4)); - expect_polynomial_reproduction(c, relevant, Real(1e-12)); - } -} - -TEST(LagrangeBasis, QuadraticPolynomialReproductionAcrossSupportedQuadraticShapes) { - const std::vector cases = { - {ElementType::Line3, 2, sample_points_for(ElementType::Line3)}, - {ElementType::Triangle6, 2, sample_points_for(ElementType::Triangle6)}, - {ElementType::Quad9, 2, sample_points_for(ElementType::Quad9)}, - {ElementType::Tetra10, 2, sample_points_for(ElementType::Tetra10)}, - {ElementType::Hex27, 2, sample_points_for(ElementType::Hex27)}, - {ElementType::Wedge18, 2, sample_points_for(ElementType::Wedge18)}, - {ElementType::Pyramid14, 2, sample_points_for(ElementType::Pyramid14)}, - }; - - const std::vector> line_exponents = { - {0, 0, 0}, {1, 0, 0}, {2, 0, 0} - }; - const std::vector> surface_exponents = { - {0, 0, 0}, {1, 0, 0}, {0, 1, 0}, - {2, 0, 0}, {1, 1, 0}, {0, 2, 0} - }; - const std::vector> volume_exponents = { - {0, 0, 0}, {1, 0, 0}, {0, 1, 0}, {0, 0, 1}, - {2, 0, 0}, {1, 1, 0}, {0, 2, 0}, - {1, 0, 1}, {0, 1, 1}, {0, 0, 2} - }; - - for (const auto& c : cases) { - if (c.type == ElementType::Line3) { - expect_polynomial_reproduction(c, line_exponents, Real(1e-12)); - } else if (c.type == ElementType::Triangle6 || c.type == ElementType::Quad9) { - expect_polynomial_reproduction(c, surface_exponents, Real(1e-11)); - } else { - expect_polynomial_reproduction(c, volume_exponents, Real(2e-10)); + for (int d = 0; d < basis.dimension(); ++d) { + EXPECT_NEAR(interpolated_gradient[static_cast(d)], + expected_gradient[static_cast(d)], + Real(1e-12)); } } } -TEST(LagrangeBasis, HighOrderTensorLagrangeMaintainsPartitionAndDerivativeSums) { - const std::vector cases = { - {ElementType::Line2, 8, {Point{-0.875, 0, 0}, Point{0.125, 0, 0}, Point{1, 0, 0}}}, - {ElementType::Quad4, 7, {Point{0.2, -0.35, 0}, Point{-1, 0.5, 0}, Point{0.5, 1, 0}}}, - {ElementType::Hex8, 6, {Point{0.1, -0.2, 0.3}, Point{-1, 0.5, 1}, Point{0.75, -1, -0.5}}}, +TEST(LagrangeBasis, QuadraticPolynomialReproductionAcrossQuadraticAliases) { + const std::vector> cases = { + {ElementType::Line3, {Real(-0.2), Real(0), Real(0)}}, + {ElementType::Triangle6, {Real(0.2), Real(0.3), Real(0)}}, + {ElementType::Quad9, {Real(0.25), Real(-0.4), Real(0)}}, + {ElementType::Tetra10, {Real(0.1), Real(0.2), Real(0.3)}}, + {ElementType::Hex27, {Real(0.15), Real(-0.2), Real(0.25)}}, + {ElementType::Wedge18, {Real(0.2), Real(0.15), Real(-0.3)}}, }; - for (const auto& c : cases) { - LagrangeBasis basis(c.type, c.order); - expect_partition_gradient_hessian_sums(basis, c.points, Real(2e-12), Real(2e-8)); - } -} - -TEST(LagrangeBasis, HighOrderTensorLagrangeReproducesTensorPolynomials) { - const LagrangeAccuracyCase line{ElementType::Line2, - 8, - {Point{-0.73, 0, 0}, Point{-0.1, 0, 0}, Point{0.64, 0, 0}}}; - expect_polynomial_reproduction(line, - {{0, 0, 0}, {1, 0, 0}, {4, 0, 0}, {8, 0, 0}}, - Real(1e-11)); - - const LagrangeAccuracyCase quad{ElementType::Quad4, - 7, - {Point{-0.6, -0.2, 0}, Point{0.15, 0.45, 0}, Point{0.8, -0.55, 0}}}; - expect_polynomial_reproduction(quad, - {{0, 0, 0}, {7, 0, 0}, {0, 7, 0}, {4, 3, 0}}, - Real(5e-10)); - - const LagrangeAccuracyCase hex{ElementType::Hex8, - 6, - {Point{-0.4, 0.2, -0.3}, Point{0.35, -0.55, 0.25}, Point{0.75, 0.4, -0.65}}}; - expect_polynomial_reproduction(hex, - {{0, 0, 0}, {6, 0, 0}, {0, 6, 0}, {0, 0, 6}, {3, 2, 4}}, - Real(2e-9)); + for (const auto& [type, point] : cases) { + LagrangeBasis basis(type, 1); + std::vector values; + basis.evaluate_values(point, values); + + const Real interpolated = + interpolate_value(basis, values, quadratic_function); + EXPECT_NEAR(interpolated, quadratic_function(point), Real(5e-12)) + << "element=" << static_cast(type); + } +} + +TEST(LagrangeBasis, FactoryCreatesReducedScalarBasisFamilies) { + auto lagrange = + basis_factory::create(BasisRequest{ElementType::Hex27, BasisType::Lagrange, 1}); + ASSERT_NE(lagrange, nullptr); + EXPECT_EQ(lagrange->basis_type(), BasisType::Lagrange); + EXPECT_EQ(lagrange->element_type(), ElementType::Hex8); + EXPECT_EQ(lagrange->order(), 2); + + auto serendipity = + basis_factory::create(BasisRequest{ElementType::Quad8, BasisType::Serendipity, 2}); + ASSERT_NE(serendipity, nullptr); + EXPECT_EQ(serendipity->basis_type(), BasisType::Serendipity); + + EXPECT_THROW((void)basis_factory::create( + BasisRequest{ElementType::Pyramid5, BasisType::Lagrange, 1}), + BasisElementCompatibilityException); + EXPECT_THROW((void)basis_factory::create( + BasisRequest{ElementType::Pyramid13, BasisType::Serendipity, 2}), + BasisElementCompatibilityException); } diff --git a/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp b/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp index 9f2bf8be5..30f876420 100644 --- a/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp +++ b/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp @@ -98,19 +98,9 @@ TEST(SerendipityBasis, Wedge15IsNodalAndPartitionsUnity) { expect_partition_of_unity(basis, {Real(0.2), Real(0.3), Real(0.1)}); } -TEST(SerendipityBasis, Pyramid13IsNodalAndPartitionsUnity) { - SerendipityBasis basis(ElementType::Pyramid13, 2); - - EXPECT_EQ(basis.size(), 13u); - expect_nodal_delta(basis, - reference_nodes(ElementType::Pyramid13, basis.size()), - Real(1e-8)); - expect_partition_of_unity(basis, {Real(0.1), Real(-0.2), Real(0.4)}); -} - TEST(SerendipityBasis, RejectsUnsupportedSerendipityAliases) { EXPECT_THROW(SerendipityBasis(ElementType::Quad9, 2), FEException); + EXPECT_THROW(SerendipityBasis(ElementType::Pyramid13, 2), FEException); EXPECT_THROW(SerendipityBasis(ElementType::Pyramid14, 2), FEException); EXPECT_THROW(SerendipityBasis(ElementType::Quad8, 3), FEException); } - diff --git a/tests/unitTests/test_common.h b/tests/unitTests/test_common.h index 98709f600..7227b2beb 100644 --- a/tests/unitTests/test_common.h +++ b/tests/unitTests/test_common.h @@ -33,7 +33,6 @@ #include #include #include -#include #include "CepMod.h" #include "ComMod.h" #include "gtest/gtest.h" @@ -96,4 +95,4 @@ class TestBase { }; -#endif \ No newline at end of file +#endif From 3876ee1fb1c0cd3231a8a2fdf4ea79b10c1dac24 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 8 Jun 2026 13:11:04 -0700 Subject: [PATCH 07/22] removing prewarmed evaluations and switch to std library constants. removed associated unit tests for these changes --- Code/Source/solver/CMakeLists.txt | 6 - Code/Source/solver/FE/Basis/BasisFunction.cpp | 10 - Code/Source/solver/FE/Basis/BasisFunction.h | 2 - Code/Source/solver/FE/Basis/LagrangeBasis.cpp | 5 - Code/Source/solver/FE/Basis/LagrangeBasis.h | 2 - Code/Source/solver/FE/Math/MathConstants.h | 388 ------------------ Code/Source/solver/FE/Math/Matrix.h | 1 - Code/Source/solver/FE/Math/Vector.h | 19 +- Code/Source/solver/nn.cpp | 144 +++---- .../FE/Basis/test_BasisErrorPaths.cpp | 1 - .../unitTests/FE/Math/test_ExpressionOps.cpp | 1 - .../unitTests/FE/Math/test_MathConstants.cpp | 341 --------------- tests/unitTests/FE/Math/test_Matrix.cpp | 1 - tests/unitTests/FE/Math/test_MatrixExpr.cpp | 1 - tests/unitTests/FE/Math/test_Vector.cpp | 1 - tests/unitTests/FE/Math/test_VectorExpr.cpp | 1 - 16 files changed, 77 insertions(+), 847 deletions(-) delete mode 100644 Code/Source/solver/FE/Math/MathConstants.h delete mode 100644 tests/unitTests/FE/Math/test_MathConstants.cpp diff --git a/Code/Source/solver/CMakeLists.txt b/Code/Source/solver/CMakeLists.txt index bdebc4a52..eace4d0b2 100644 --- a/Code/Source/solver/CMakeLists.txt +++ b/Code/Source/solver/CMakeLists.txt @@ -258,17 +258,11 @@ file(GLOB SOLVER_FE_MATH_SRCS CONFIGURE_DEPENDS FE/Math/*.h ) -file(GLOB SOLVER_FE_QUADRATURE_SRCS CONFIGURE_DEPENDS - FE/Quadrature/*.cpp - FE/Quadrature/*.h -) - list(APPEND CSRCS ${SOLVER_CORE_SRCS} ${SOLVER_FE_COMMON_SRCS} ${SOLVER_FE_BASIS_SRCS} ${SOLVER_FE_MATH_SRCS} - ${SOLVER_FE_QUADRATURE_SRCS} ) # Set PETSc interace code. diff --git a/Code/Source/solver/FE/Basis/BasisFunction.cpp b/Code/Source/solver/FE/Basis/BasisFunction.cpp index 2a1d4f6b0..578c46c88 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.cpp +++ b/Code/Source/solver/FE/Basis/BasisFunction.cpp @@ -19,12 +19,6 @@ struct BasisFunctionScratch { std::vector values; std::vector gradients; std::vector hessians; - - void prewarm(std::size_t max_size) { - values.reserve(max_size); - gradients.reserve(max_size); - hessians.reserve(max_size); - } }; BasisFunctionScratch& scratch() { @@ -34,10 +28,6 @@ BasisFunctionScratch& scratch() { } // namespace -void prewarm_basis_function_scratch(std::size_t max_size) { - scratch().prewarm(max_size); -} - void BasisFunction::evaluate_gradients(const math::Vector& xi, std::vector& gradients) const { (void)xi; diff --git a/Code/Source/solver/FE/Basis/BasisFunction.h b/Code/Source/solver/FE/Basis/BasisFunction.h index dbabf7061..bf6ac5de7 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.h +++ b/Code/Source/solver/FE/Basis/BasisFunction.h @@ -23,8 +23,6 @@ namespace basis { using Gradient = math::Vector; using Hessian = math::Matrix; -void prewarm_basis_function_scratch(std::size_t max_size); - [[nodiscard]] inline Hessian make_symmetric_hessian(Real xx, Real yy, Real zz, diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp index 7516d514a..372209722 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp @@ -303,11 +303,6 @@ void store_gradient(const Gradient& gradient, Real* dst) { } // namespace -void prewarm_lagrange_basis_scratch(int max_order, std::size_t max_qpts) { - const auto n = static_cast(std::max(0, max_order) + 1); - prewarm_basis_function_scratch(std::max(n * n * n, max_qpts)); -} - LagrangeBasis::LagrangeBasis(ElementType type, int order) : element_type_(type), order_(order) { const auto normalized = normalize_lagrange_request(element_type_, order_); diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.h b/Code/Source/solver/FE/Basis/LagrangeBasis.h index a5fe8e0fa..dae149872 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.h +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.h @@ -18,8 +18,6 @@ namespace svmp { namespace FE { namespace basis { -void prewarm_lagrange_basis_scratch(int max_order, std::size_t max_qpts = 0); - class LagrangeBasis : public BasisFunction { public: using TensorNodeIndex = std::array; diff --git a/Code/Source/solver/FE/Math/MathConstants.h b/Code/Source/solver/FE/Math/MathConstants.h deleted file mode 100644 index 145520ab2..000000000 --- a/Code/Source/solver/FE/Math/MathConstants.h +++ /dev/null @@ -1,388 +0,0 @@ -#ifndef SVMP_FE_MATH_CONSTANTS_H -#define SVMP_FE_MATH_CONSTANTS_H - -/** - * @file MathConstants.h - * @brief Mathematical constants and numerical tolerances for FE computations - * - * This header provides mathematical constants (π, e, √2, etc.) and numerical - * tolerances used throughout the FE library. All constants are templated - * to support different precision types. - */ - -#include -#include -#include -#include - -namespace svmp { -namespace FE { -namespace math { - -/** - * @brief Mathematical constants templated by type - * @tparam T The numeric type (float, double, long double) - */ -template -struct Constants { - static_assert(std::is_floating_point_v, - "Constants only defined for floating-point types"); - - // Mathematical constants - static constexpr T pi = T(3.14159265358979323846264338327950288419716939937510L); - static constexpr T two_pi = T(6.28318530717958647692528676655900576839433879875021L); - static constexpr T half_pi = T(1.57079632679489661923132169163975144209858469968755L); - static constexpr T quarter_pi = T(0.78539816339744830961566084581987572104929234984378L); - static constexpr T inv_pi = T(0.31830988618379067153776752674502872406891929148091L); - static constexpr T inv_two_pi = T(0.15915494309189533576888376337251436203445964574046L); - - static constexpr T e = T(2.71828182845904523536028747135266249775724709369995L); - static constexpr T log2e = T(1.44269504088896340735992468100189213742664595415299L); - static constexpr T log10e = T(0.43429448190325182765112891891660508229439700580367L); - static constexpr T ln2 = T(0.69314718055994530941723212145817656807550013436026L); - static constexpr T ln10 = T(2.30258509299404568401799145468436420760110148862877L); - - static constexpr T sqrt2 = T(1.41421356237309504880168872420969807856967187537694L); - static constexpr T sqrt3 = T(1.73205080756887729352744634150587236694280525381038L); - static constexpr T inv_sqrt2 = T(0.70710678118654752440084436210484903928483593768847L); - static constexpr T inv_sqrt3 = T(0.57735026918962576450914878050195745564760175127013L); - - // Golden ratio - static constexpr T phi = T(1.61803398874989484820458683436563811772030917980576L); - - // Degrees to radians conversion - static constexpr T deg_to_rad = pi / T(180); - static constexpr T rad_to_deg = T(180) / pi; -}; - -/** - * @brief Numerical tolerances and machine epsilon - * @tparam T The numeric type - */ -template -struct Tolerances { - static_assert(std::is_floating_point_v, - "Tolerances only defined for floating-point types"); - - // Machine epsilon - static constexpr T epsilon = std::numeric_limits::epsilon(); - - // Default tolerance (1000 * machine epsilon) - static constexpr T tolerance = T(1000) * epsilon; - - // Strict tolerance (10 * machine epsilon) - static constexpr T strict = T(10) * epsilon; - - // Loose tolerance (10000 * machine epsilon) - static constexpr T loose = T(10000) * epsilon; - - // Square root of epsilon (useful for finite differences) - static inline const T sqrt_epsilon = std::sqrt(epsilon); - - // Cube root of epsilon (useful for numerical derivatives) - static inline const T cbrt_epsilon = std::cbrt(epsilon); - - // Smallest positive normalized value - static constexpr T min_positive = std::numeric_limits::min(); - - // Largest representable value - static constexpr T max_value = std::numeric_limits::max(); - - // Infinity - static constexpr T infinity = std::numeric_limits::infinity(); - - // Not-a-Number - static constexpr T nan = std::numeric_limits::quiet_NaN(); -}; - -/** - * @brief Convenient aliases for common types - */ -template inline constexpr T pi = Constants::pi; -template inline constexpr T two_pi = Constants::two_pi; -template inline constexpr T half_pi = Constants::half_pi; -template inline constexpr T quarter_pi = Constants::quarter_pi; -template inline constexpr T inv_pi = Constants::inv_pi; -template inline constexpr T inv_two_pi = Constants::inv_two_pi; - -template inline constexpr T e = Constants::e; -template inline constexpr T log2e = Constants::log2e; -template inline constexpr T log10e = Constants::log10e; -template inline constexpr T ln2 = Constants::ln2; -template inline constexpr T ln10 = Constants::ln10; - -template inline constexpr T sqrt2 = Constants::sqrt2; -template inline constexpr T sqrt3 = Constants::sqrt3; -template inline constexpr T inv_sqrt2 = Constants::inv_sqrt2; -template inline constexpr T inv_sqrt3 = Constants::inv_sqrt3; - -template inline constexpr T phi = Constants::phi; - -template inline constexpr T deg_to_rad = Constants::deg_to_rad; -template inline constexpr T rad_to_deg = Constants::rad_to_deg; - -template inline constexpr T epsilon = Tolerances::epsilon; -template inline constexpr T tolerance = Tolerances::tolerance; -template inline constexpr T strict_tol = Tolerances::strict; -template inline constexpr T loose_tol = Tolerances::loose; -template inline const T sqrt_epsilon = Tolerances::sqrt_epsilon; -template inline const T cbrt_epsilon = Tolerances::cbrt_epsilon; -template inline constexpr T min_positive = Tolerances::min_positive; -template inline constexpr T max_value = Tolerances::max_value; -template inline constexpr T infinity = Tolerances::infinity; - -/** - * @brief Comparison functions with tolerance - */ - -/** - * @brief Check if two values are approximately equal - * @param a First value - * @param b Second value - * @param tol Tolerance (default: 1000 * epsilon) - * @return true if |a - b| <= tol * max(|a|, |b|, 1) - */ -template -inline constexpr bool approx_equal(T a, T b, T tol = tolerance) { - static_assert(std::is_floating_point_v, - "approx_equal only defined for floating-point types"); - const T scale = std::max({std::abs(a), std::abs(b), T(1)}); - return std::abs(a - b) <= tol * scale; -} - -/** - * @brief Check if a value is approximately zero - * @param a Value to check - * @param tol Tolerance (default: 1000 * epsilon) - * @return true if |a| <= tol - */ -template -inline constexpr bool approx_zero(T a, T tol = tolerance) { - static_assert(std::is_floating_point_v, - "approx_zero only defined for floating-point types"); - return std::abs(a) <= tol; -} - -/** - * @brief Check if a value is positive (greater than tolerance) - * @param a Value to check - * @param tol Tolerance (default: 1000 * epsilon) - * @return true if a > tol - */ -template -inline constexpr bool is_positive(T a, T tol = tolerance) { - static_assert(std::is_floating_point_v, - "is_positive only defined for floating-point types"); - return a > tol; -} - -/** - * @brief Check if a value is negative (less than -tolerance) - * @param a Value to check - * @param tol Tolerance (default: 1000 * epsilon) - * @return true if a < -tol - */ -template -inline constexpr bool is_negative(T a, T tol = tolerance) { - static_assert(std::is_floating_point_v, - "is_negative only defined for floating-point types"); - return a < -tol; -} - -/** - * @brief Check if a value is finite (not infinite or NaN) - * @param a Value to check - * @return true if value is finite - */ -template -inline constexpr bool is_finite(T a) { - static_assert(std::is_floating_point_v, - "is_finite only defined for floating-point types"); - return std::isfinite(a); -} - -/** - * @brief Degrees to radians conversion - * @param degrees Angle in degrees - * @return Angle in radians - */ -template -inline constexpr T to_radians(T degrees) { - static_assert(std::is_floating_point_v, - "to_radians only defined for floating-point types"); - return degrees * deg_to_rad; -} - -/** - * @brief Radians to degrees conversion - * @param radians Angle in radians - * @return Angle in degrees - */ -template -inline constexpr T to_degrees(T radians) { - static_assert(std::is_floating_point_v, - "to_degrees only defined for floating-point types"); - return radians * rad_to_deg; -} - -// ============================================================================= -// Constants namespace for compatibility with test expectations -// ============================================================================= -namespace constants { - -// Mathematical constants (double precision defaults) -inline constexpr double PI = Constants::pi; -inline constexpr double PI_2 = Constants::half_pi; -inline constexpr double PI_4 = Constants::quarter_pi; -inline constexpr double TWO_PI = Constants::two_pi; -inline constexpr double INV_PI = Constants::inv_pi; - -inline constexpr double E = Constants::e; -inline constexpr double LN_2 = Constants::ln2; -inline constexpr double LN_10 = Constants::ln10; -inline constexpr double LOG10_E = Constants::log10e; -inline constexpr double LOG2_E = Constants::log2e; - -inline constexpr double SQRT_2 = Constants::sqrt2; -inline constexpr double SQRT_3 = Constants::sqrt3; -inline constexpr double SQRT_5 = 2.2360679774997896964091736687312L; -inline constexpr double INV_SQRT_2 = Constants::inv_sqrt2; -inline constexpr double INV_SQRT_3 = Constants::inv_sqrt3; - -inline constexpr double PHI = Constants::phi; - -// Angle conversion functions -template -inline constexpr T deg_to_rad(T degrees) { - return degrees * Constants::deg_to_rad; -} - -template -inline constexpr T rad_to_deg(T radians) { - return radians * Constants::rad_to_deg; -} - -// Templated tolerances -template -inline constexpr T tolerance() { - return Tolerances::tolerance; -} - -template -inline constexpr T machine_epsilon() { - return Tolerances::epsilon; -} - -// Additional constants and utility functions for tests -inline constexpr double DEFAULT_TOLERANCE = Tolerances::tolerance; -inline constexpr double DEFAULT_REL_TOLERANCE = 1e-12; -inline constexpr double GEOMETRY_TOLERANCE = 1e-10; -inline constexpr double SOLVER_TOLERANCE = Tolerances::strict; -inline constexpr double EPSILON = Tolerances::epsilon; -inline constexpr double INF_VALUE = Tolerances::infinity; // Renamed from INFINITY -inline constexpr double NOT_A_NUMBER = Tolerances::nan; // Renamed from NAN -inline constexpr double MAX_DOUBLE = Tolerances::max_value; -inline constexpr double MIN_DOUBLE = Tolerances::min_positive; -inline constexpr double LOWEST_DOUBLE = -Tolerances::max_value; - -// Physical constants -inline constexpr double SPEED_OF_LIGHT = 299792458.0; // m/s -inline constexpr double GRAVITATIONAL_CONSTANT = 6.67430e-11; // m³/(kg·s²) -inline constexpr double PLANCK_CONSTANT = 6.62607015e-34; // J·s -inline constexpr double AVOGADRO_NUMBER = 6.02214076e23; // mol⁻¹ -inline constexpr double BOLTZMANN_CONSTANT = 1.380649e-23; // J/K -inline constexpr double STANDARD_GRAVITY = 9.80665; // m/s² - -// Float and long double versions -inline constexpr float PI_F = static_cast(PI); -inline constexpr float E_F = static_cast(E); -inline constexpr float SQRT_2_F = static_cast(SQRT_2); -inline constexpr float EPSILON_F = Tolerances::epsilon; - -inline constexpr long double PI_L = static_cast(PI); -inline constexpr long double E_L = static_cast(E); -inline constexpr long double SQRT_2_L = static_cast(SQRT_2); -inline constexpr long double EPSILON_L = Tolerances::epsilon; - -// Additional mathematical constants -inline constexpr double SQRT_PI = 1.7724538509055160272981674833411L; - -// Utility functions -template -inline constexpr int sign(T value) { - return (T(0) < value) - (value < T(0)); -} - -template -inline constexpr bool is_zero(T value, T tol = DEFAULT_TOLERANCE) { - return std::abs(value) <= tol; -} - -template -inline bool near(T a, T b, T tol = DEFAULT_TOLERANCE) { - return std::abs(a - b) <= tol; -} - -template -inline bool near_relative(T a, T b, T rel_tol = DEFAULT_REL_TOLERANCE) { - T scale = std::max(std::abs(a), std::abs(b)); - return std::abs(a - b) <= rel_tol * scale; -} - -template -inline constexpr T clamp(T value, T min_val, T max_val) { - return value < min_val ? min_val : (value > max_val ? max_val : value); -} - -template -inline constexpr T lerp(T a, T b, T t) { - return a + t * (b - a); -} - -template -inline T safe_divide(T numerator, T denominator, T default_val = T(0)) { - return is_zero(denominator) ? default_val : numerator / denominator; -} - -template -inline bool isinf(T value) { - return std::isinf(value); -} - -template -inline bool isnan(T value) { - return std::isnan(value); -} - -} // namespace constants - -// Physical constants for FE analysis -namespace physical_constants { - -// Material properties (SI units) -inline constexpr double water_density = 1000.0; // kg/m³ -inline constexpr double steel_density = 7850.0; // kg/m³ -inline constexpr double aluminum_density = 2700.0; // kg/m³ - -inline constexpr double water_viscosity = 0.001; // Pa·s at 20°C -inline constexpr double air_viscosity = 1.81e-5; // Pa·s at 20°C - -inline constexpr double steel_youngs_modulus = 200e9; // Pa -inline constexpr double aluminum_youngs_modulus = 70e9; // Pa - -inline constexpr double steel_poisson_ratio = 0.3; // dimensionless -inline constexpr double aluminum_poisson_ratio = 0.33; // dimensionless - -// Physical constants -inline constexpr double gravity = 9.80665; // m/s² -inline constexpr double gas_constant = 8.314462618; // J/(mol·K) -inline constexpr double boltzmann = 1.380649e-23; // J/K -inline constexpr double avogadro = 6.02214076e23; // mol⁻¹ - -} // namespace physical_constants - -} // namespace math -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_MATH_CONSTANTS_H diff --git a/Code/Source/solver/FE/Math/Matrix.h b/Code/Source/solver/FE/Math/Matrix.h index 6058ab943..8cb28e5d5 100644 --- a/Code/Source/solver/FE/Math/Matrix.h +++ b/Code/Source/solver/FE/Math/Matrix.h @@ -13,7 +13,6 @@ #include "MatrixExpr.h" #include "Vector.h" -#include "MathConstants.h" #include "../Common/Types.h" #include #include diff --git a/Code/Source/solver/FE/Math/Vector.h b/Code/Source/solver/FE/Math/Vector.h index 76c7be152..777f9945b 100644 --- a/Code/Source/solver/FE/Math/Vector.h +++ b/Code/Source/solver/FE/Math/Vector.h @@ -11,12 +11,12 @@ */ #include "VectorExpr.h" -#include "MathConstants.h" #include "../Common/Types.h" #include #include #include #include +#include #include #include #include @@ -25,6 +25,23 @@ namespace svmp { namespace FE { namespace math { +template +inline constexpr T tolerance = + std::is_floating_point_v ? T(1000) * std::numeric_limits::epsilon() : T(0); + +template +inline bool approx_zero(T value, T tol = tolerance) { + using std::abs; + return abs(value) <= tol; +} + +template +inline bool approx_equal(T a, T b, T tol = tolerance) { + using std::abs; + const T scale = std::max({abs(a), abs(b), T(1)}); + return abs(a - b) <= tol * scale; +} + /** * @brief Fixed-size vector for element-level computations * @tparam T Scalar type (float, double) diff --git a/Code/Source/solver/nn.cpp b/Code/Source/solver/nn.cpp index 51c126708..a9e0aebc3 100644 --- a/Code/Source/solver/nn.cpp +++ b/Code/Source/solver/nn.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -152,36 +153,27 @@ std::string solver_element_name(consts::ElementType eType) std::optional to_basis_selection(consts::ElementType eType) { - switch (eType) { - case consts::ElementType::LIN1: - return BasisSelection{fe::ElementType::Line2, fe::BasisType::Lagrange, 1}; - case consts::ElementType::LIN2: - return BasisSelection{fe::ElementType::Line3, fe::BasisType::Lagrange, 2}; - case consts::ElementType::TRI3: - return BasisSelection{fe::ElementType::Triangle3, fe::BasisType::Lagrange, 1}; - case consts::ElementType::TRI6: - return BasisSelection{fe::ElementType::Triangle6, fe::BasisType::Lagrange, 2}; - case consts::ElementType::QUD4: - return BasisSelection{fe::ElementType::Quad4, fe::BasisType::Lagrange, 1}; - case consts::ElementType::QUD8: - return BasisSelection{fe::ElementType::Quad8, fe::BasisType::Serendipity, 2}; - case consts::ElementType::QUD9: - return BasisSelection{fe::ElementType::Quad9, fe::BasisType::Lagrange, 2}; - case consts::ElementType::TET4: - return BasisSelection{fe::ElementType::Tetra4, fe::BasisType::Lagrange, 1}; - case consts::ElementType::TET10: - return BasisSelection{fe::ElementType::Tetra10, fe::BasisType::Lagrange, 2}; - case consts::ElementType::HEX8: - return BasisSelection{fe::ElementType::Hex8, fe::BasisType::Lagrange, 1}; - case consts::ElementType::HEX20: - return BasisSelection{fe::ElementType::Hex20, fe::BasisType::Serendipity, 2}; - case consts::ElementType::HEX27: - return BasisSelection{fe::ElementType::Hex27, fe::BasisType::Lagrange, 2}; - case consts::ElementType::WDG: - return BasisSelection{fe::ElementType::Wedge6, fe::BasisType::Lagrange, 1}; - default: - return std::nullopt; - } + static constexpr std::array supported{ + BasisSelection{fe::ElementType::Line2, fe::BasisType::Lagrange, 1}, + BasisSelection{fe::ElementType::Line3, fe::BasisType::Lagrange, 2}, + BasisSelection{fe::ElementType::Triangle3, fe::BasisType::Lagrange, 1}, + BasisSelection{fe::ElementType::Triangle6, fe::BasisType::Lagrange, 2}, + BasisSelection{fe::ElementType::Quad4, fe::BasisType::Lagrange, 1}, + BasisSelection{fe::ElementType::Quad8, fe::BasisType::Serendipity, 2}, + BasisSelection{fe::ElementType::Quad9, fe::BasisType::Lagrange, 2}, + BasisSelection{fe::ElementType::Tetra4, fe::BasisType::Lagrange, 1}, + BasisSelection{fe::ElementType::Tetra10, fe::BasisType::Lagrange, 2}, + BasisSelection{fe::ElementType::Hex8, fe::BasisType::Lagrange, 1}, + BasisSelection{fe::ElementType::Hex20, fe::BasisType::Serendipity, 2}, + BasisSelection{fe::ElementType::Hex27, fe::BasisType::Lagrange, 2}, + BasisSelection{fe::ElementType::Wedge6, fe::BasisType::Lagrange, 1}, + }; + + const int index = static_cast(eType) - static_cast(consts::ElementType::LIN1); + if (index >= 0 && static_cast(index) < supported.size()) { + return supported[static_cast(index)]; + } + return std::nullopt; } bool use_basis_adapter_for(consts::ElementType eType) @@ -189,11 +181,6 @@ bool use_basis_adapter_for(consts::ElementType eType) return basis_mode_allows_fe_adapter() && to_basis_selection(eType).has_value(); } -bool supports_basis_hessian_adapter_for(consts::ElementType eType) -{ - return basis_mode_allows_fe_adapter() && to_basis_selection(eType).has_value(); -} - bool supports_face_basis_adapter_for(consts::ElementType eType) { if (!basis_mode_allows_fe_adapter()) { @@ -223,26 +210,36 @@ std::shared_ptr make_basis_for_solver_element(consts::El __FILE__, __LINE__, __func__); } - febasis::BasisRequest request; - request.element_type = selection->element; - request.basis_type = selection->basis; - request.order = selection->order; - return febasis::basis_factory::create(request); + return febasis::basis_factory::create( + {selection->element, selection->basis, selection->order}); } -template -std::size_t mapped_basis_index(const std::array& map, - consts::ElementType eType, - const int solver_node) +std::span solver_to_basis_node_map(consts::ElementType eType) { - if (solver_node < 0 || static_cast(solver_node) >= map.size()) { - throw febasis::BasisNodeOrderingException( - "Solver node " + std::to_string(solver_node) + - " is outside node map for " + solver_element_name(eType), - __FILE__, __LINE__, __func__); - } + static constexpr std::array tri3{1, 2, 0}; + static constexpr std::array tri6{1, 2, 0, 4, 5, 3}; + static constexpr std::array tet4{1, 2, 3, 0}; + static constexpr std::array tet10{1, 2, 3, 0, 5, 9, 8, 4, 6, 7}; + static constexpr std::array hex27{ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 25, 23, 22, 24, 20, 21, 26}; - return map[static_cast(solver_node)]; + switch (eType) { + case consts::ElementType::TRI3: + return tri3; + case consts::ElementType::TRI6: + case consts::ElementType::WDG: + return tri6; + case consts::ElementType::TET4: + return tet4; + case consts::ElementType::TET10: + return tet10; + case consts::ElementType::HEX27: + return hex27; + default: + return {}; + } } std::size_t basis_index_for_solver_node(consts::ElementType eType, const int solver_node) @@ -255,40 +252,17 @@ std::size_t basis_index_for_solver_node(consts::ElementType eType, const int sol } const auto node = static_cast(solver_node); - - switch (eType) { - case consts::ElementType::TRI3: { - static constexpr std::array map{1, 2, 0}; - return mapped_basis_index(map, eType, solver_node); - } - case consts::ElementType::TRI6: { - static constexpr std::array map{1, 2, 0, 4, 5, 3}; - return mapped_basis_index(map, eType, solver_node); - } - case consts::ElementType::TET4: { - static constexpr std::array map{1, 2, 3, 0}; - return mapped_basis_index(map, eType, solver_node); - } - case consts::ElementType::TET10: { - static constexpr std::array map{1, 2, 3, 0, 5, 9, 8, 4, 6, 7}; - return mapped_basis_index(map, eType, solver_node); - } - case consts::ElementType::WDG: { - static constexpr std::array map{1, 2, 0, 4, 5, 3}; - return mapped_basis_index(map, eType, solver_node); - } - case consts::ElementType::HEX27: { - static constexpr std::array map{ - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, - 25, 23, 22, 24, 20, 21, 26 - }; - return mapped_basis_index(map, eType, solver_node); - } - default: - return node; + const auto map = solver_to_basis_node_map(eType); + if (map.empty()) { + return node; + } + if (node < map.size()) { + return map[node]; } + throw febasis::BasisNodeOrderingException( + "Solver node " + std::to_string(solver_node) + + " is outside node map for " + solver_element_name(eType), + __FILE__, __LINE__, __func__); } fe::math::Vector make_basis_point(const febasis::BasisFunction& basis, @@ -710,7 +684,7 @@ void get_gn_nxx(const int insd, const int ind2, consts::ElementType eType, const return; } - if (supports_basis_hessian_adapter_for(eType)) { + if (use_basis_adapter_for(eType)) { try { evaluate_basis_hessians(insd, ind2, eType, eNoN, gaus_pt, xi, Nxx); return; diff --git a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp index 7838702b0..430390e54 100644 --- a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp +++ b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp @@ -156,7 +156,6 @@ TEST(BasisErrorPaths, BasisFunctionDefaultsThrowForMissingDerivatives) { TEST(BasisErrorPaths, BasisFunctionFallbackWritesRawLayouts) { CompleteFallbackBasis basis; const math::Vector point{Real(0.25), Real(0.5), Real(-0.25)}; - prewarm_basis_function_scratch(basis.size()); std::vector flat_values(basis.size()); std::vector flat_gradients(basis.size() * 3u); diff --git a/tests/unitTests/FE/Math/test_ExpressionOps.cpp b/tests/unitTests/FE/Math/test_ExpressionOps.cpp index 307b308a1..a368e345e 100644 --- a/tests/unitTests/FE/Math/test_ExpressionOps.cpp +++ b/tests/unitTests/FE/Math/test_ExpressionOps.cpp @@ -7,7 +7,6 @@ #include "FE/Math/ExpressionOps.h" #include "FE/Math/Vector.h" #include "FE/Math/Matrix.h" -#include "FE/Math/MathConstants.h" #include #include #include diff --git a/tests/unitTests/FE/Math/test_MathConstants.cpp b/tests/unitTests/FE/Math/test_MathConstants.cpp deleted file mode 100644 index 5619690ed..000000000 --- a/tests/unitTests/FE/Math/test_MathConstants.cpp +++ /dev/null @@ -1,341 +0,0 @@ -/** - * @file test_MathConstants.cpp - * @brief Unit tests for MathConstants.h - mathematical constants and tolerances - */ - -#include -#include "FE/Math/MathConstants.h" -#include -#include -#include - -using namespace svmp::FE::math; - -// Test fixture for MathConstants tests -class MathConstantsTest : public ::testing::Test { -protected: - void SetUp() override {} - void TearDown() override {} -}; - -// ============================================================================= -// Mathematical Constants Tests -// ============================================================================= - -TEST_F(MathConstantsTest, PiConstants) { - // Test PI value - EXPECT_NEAR(constants::PI, 3.14159265358979323846, 1e-15); - - // Test PI/2 - EXPECT_NEAR(constants::PI_2, constants::PI / 2.0, 1e-15); - - // Test PI/4 - EXPECT_NEAR(constants::PI_4, constants::PI / 4.0, 1e-15); - - // Test 2*PI - EXPECT_NEAR(constants::TWO_PI, 2.0 * constants::PI, 1e-15); - - // Test 1/PI - EXPECT_NEAR(constants::INV_PI, 1.0 / constants::PI, 1e-15); - - // Test sqrt(PI) - EXPECT_NEAR(constants::SQRT_PI, std::sqrt(constants::PI), 1e-15); -} - -TEST_F(MathConstantsTest, EulerConstant) { - // Test e (Euler's number) - EXPECT_NEAR(constants::E, std::exp(1.0), 1e-15); - - // Test ln(2) - EXPECT_NEAR(constants::LN_2, std::log(2.0), 1e-15); - - // Test ln(10) - EXPECT_NEAR(constants::LN_10, std::log(10.0), 1e-15); - - // Test log10(e) - EXPECT_NEAR(constants::LOG10_E, std::log10(constants::E), 1e-15); - - // Test log2(e) - EXPECT_NEAR(constants::LOG2_E, std::log2(constants::E), 1e-15); -} - -TEST_F(MathConstantsTest, SquareRootConstants) { - // Test sqrt(2) - EXPECT_NEAR(constants::SQRT_2, std::sqrt(2.0), 1e-15); - - // Test sqrt(3) - EXPECT_NEAR(constants::SQRT_3, std::sqrt(3.0), 1e-15); - - // Test sqrt(5) - EXPECT_NEAR(constants::SQRT_5, std::sqrt(5.0), 1e-15); - - // Test 1/sqrt(2) - EXPECT_NEAR(constants::INV_SQRT_2, 1.0 / std::sqrt(2.0), 1e-15); - - // Test 1/sqrt(3) - EXPECT_NEAR(constants::INV_SQRT_3, 1.0 / std::sqrt(3.0), 1e-15); -} - -TEST_F(MathConstantsTest, GoldenRatio) { - // Test golden ratio φ = (1 + sqrt(5))/2 - EXPECT_NEAR(constants::PHI, (1.0 + std::sqrt(5.0)) / 2.0, 1e-15); - - // Property: φ² = φ + 1 - EXPECT_NEAR(constants::PHI * constants::PHI, constants::PHI + 1.0, 1e-14); - - // Property: 1/φ = φ - 1 - EXPECT_NEAR(1.0 / constants::PHI, constants::PHI - 1.0, 1e-14); -} - -// ============================================================================= -// Angle Conversion Tests -// ============================================================================= - -TEST_F(MathConstantsTest, DegreesToRadians) { - // Test common conversions - EXPECT_NEAR(constants::deg_to_rad(0.0), 0.0, 1e-15); - EXPECT_NEAR(constants::deg_to_rad(90.0), constants::PI_2, 1e-15); - EXPECT_NEAR(constants::deg_to_rad(180.0), constants::PI, 1e-15); - EXPECT_NEAR(constants::deg_to_rad(270.0), 3.0 * constants::PI_2, 1e-15); - EXPECT_NEAR(constants::deg_to_rad(360.0), constants::TWO_PI, 1e-15); - - // Test negative angles - EXPECT_NEAR(constants::deg_to_rad(-90.0), -constants::PI_2, 1e-15); - EXPECT_NEAR(constants::deg_to_rad(-180.0), -constants::PI, 1e-15); - - // Test arbitrary angle - EXPECT_NEAR(constants::deg_to_rad(45.0), constants::PI_4, 1e-15); - EXPECT_NEAR(constants::deg_to_rad(30.0), constants::PI / 6.0, 1e-15); - EXPECT_NEAR(constants::deg_to_rad(60.0), constants::PI / 3.0, 1e-15); -} - -TEST_F(MathConstantsTest, RadiansToDegrees) { - // Test common conversions - EXPECT_NEAR(constants::rad_to_deg(0.0), 0.0, 1e-13); - EXPECT_NEAR(constants::rad_to_deg(constants::PI_2), 90.0, 1e-13); - EXPECT_NEAR(constants::rad_to_deg(constants::PI), 180.0, 1e-13); - EXPECT_NEAR(constants::rad_to_deg(constants::TWO_PI), 360.0, 1e-13); - - // Test negative angles - EXPECT_NEAR(constants::rad_to_deg(-constants::PI), -180.0, 1e-13); - - // Test round-trip conversion - double angle_deg = 123.456; - double angle_rad = constants::deg_to_rad(angle_deg); - double back_to_deg = constants::rad_to_deg(angle_rad); - EXPECT_NEAR(back_to_deg, angle_deg, 1e-13); -} - -// ============================================================================= -// Machine Precision Tests -// ============================================================================= - -TEST_F(MathConstantsTest, MachineEpsilon) { - // Test double precision epsilon - EXPECT_EQ(constants::EPSILON, std::numeric_limits::epsilon()); - - // Test float precision epsilon - EXPECT_EQ(constants::EPSILON_F, std::numeric_limits::epsilon()); - - // Verify epsilon is the smallest value such that 1.0 + epsilon != 1.0 - double one_plus_eps = 1.0 + constants::EPSILON; - double one_plus_half_eps = 1.0 + constants::EPSILON / 2.0; - - EXPECT_NE(one_plus_eps, 1.0); - EXPECT_EQ(one_plus_half_eps, 1.0); -} - -TEST_F(MathConstantsTest, NumericalLimits) { - // Test infinity - EXPECT_TRUE(std::isinf(constants::INF_VALUE)); - EXPECT_GT(constants::INF_VALUE, std::numeric_limits::max()); - - // Test NaN - EXPECT_TRUE(std::isnan(constants::NOT_A_NUMBER)); - EXPECT_NE(constants::NOT_A_NUMBER, constants::NOT_A_NUMBER); // NaN != NaN - - // Test max/min values - EXPECT_EQ(constants::MAX_DOUBLE, std::numeric_limits::max()); - EXPECT_EQ(constants::MIN_DOUBLE, std::numeric_limits::min()); - EXPECT_EQ(constants::LOWEST_DOUBLE, std::numeric_limits::lowest()); -} - -// ============================================================================= -// Tolerance Tests -// ============================================================================= - -TEST_F(MathConstantsTest, DefaultTolerances) { - // Test default absolute tolerance - EXPECT_GT(constants::DEFAULT_TOLERANCE, 0.0); - EXPECT_LT(constants::DEFAULT_TOLERANCE, 1e-10); - - // Test default relative tolerance - EXPECT_GT(constants::DEFAULT_REL_TOLERANCE, 0.0); - EXPECT_LT(constants::DEFAULT_REL_TOLERANCE, 1e-10); - - // Test solver tolerance - EXPECT_GT(constants::SOLVER_TOLERANCE, 0.0); - EXPECT_LE(constants::SOLVER_TOLERANCE, constants::DEFAULT_TOLERANCE); - - // Test geometry tolerance (typically larger) - EXPECT_GT(constants::GEOMETRY_TOLERANCE, 0.0); - EXPECT_GE(constants::GEOMETRY_TOLERANCE, constants::DEFAULT_TOLERANCE); -} - -TEST_F(MathConstantsTest, ToleranceComparison) { - double a = 1.0; - double b = 1.0 + constants::DEFAULT_TOLERANCE / 2.0; - double c = 1.0 + constants::DEFAULT_TOLERANCE * 2.0; - - // Values within tolerance should be considered equal - EXPECT_TRUE(constants::near(a, b, constants::DEFAULT_TOLERANCE)); - - // Values outside tolerance should not be equal - EXPECT_FALSE(constants::near(a, c, constants::DEFAULT_TOLERANCE)); - - // Test relative tolerance - double large_a = 1e10; - double large_b = large_a * (1.0 + constants::DEFAULT_REL_TOLERANCE / 2.0); - double large_c = large_a * (1.0 + constants::DEFAULT_REL_TOLERANCE * 2.0); - - EXPECT_TRUE(constants::near_relative(large_a, large_b, constants::DEFAULT_REL_TOLERANCE)); - EXPECT_FALSE(constants::near_relative(large_a, large_c, constants::DEFAULT_REL_TOLERANCE)); -} - -TEST_F(MathConstantsTest, ZeroComparison) { - // Test near zero detection - EXPECT_TRUE(constants::is_zero(0.0)); - EXPECT_TRUE(constants::is_zero(constants::DEFAULT_TOLERANCE / 2.0)); - EXPECT_FALSE(constants::is_zero(constants::DEFAULT_TOLERANCE * 2.0)); - - // Test with negative values - EXPECT_TRUE(constants::is_zero(-constants::DEFAULT_TOLERANCE / 2.0)); - EXPECT_FALSE(constants::is_zero(-constants::DEFAULT_TOLERANCE * 2.0)); -} - -// ============================================================================= -// Physical Constants Tests -// ============================================================================= - -TEST_F(MathConstantsTest, PhysicalConstants) { - // Test speed of light (m/s) - EXPECT_NEAR(constants::SPEED_OF_LIGHT, 299792458.0, 1.0); - - // Test gravitational constant (m³/kg/s²) - EXPECT_NEAR(constants::GRAVITATIONAL_CONSTANT, 6.67430e-11, 1e-16); - - // Test standard gravity (m/s²) - EXPECT_NEAR(constants::STANDARD_GRAVITY, 9.80665, 1e-10); - - // Test Planck constant (J⋅s) - EXPECT_NEAR(constants::PLANCK_CONSTANT, 6.62607015e-34, 1e-42); - - // Test Boltzmann constant (J/K) - EXPECT_NEAR(constants::BOLTZMANN_CONSTANT, 1.380649e-23, 1e-29); - - // Test Avogadro's number (1/mol) - EXPECT_NEAR(constants::AVOGADRO_NUMBER, 6.02214076e23, 1e15); -} - -// ============================================================================= -// Compile-Time Constants Tests -// ============================================================================= - -TEST_F(MathConstantsTest, CompileTimeConstants) { - // Test that constants are constexpr (compile-time) - constexpr double pi = constants::PI; - constexpr double e = constants::E; - constexpr double sqrt2 = constants::SQRT_2; - - EXPECT_EQ(pi, constants::PI); - EXPECT_EQ(e, constants::E); - EXPECT_EQ(sqrt2, constants::SQRT_2); - - // Test compile-time functions - constexpr double angle_rad = constants::deg_to_rad(90.0); - EXPECT_NEAR(angle_rad, constants::PI_2, 1e-15); - - constexpr double angle_deg = constants::rad_to_deg(constants::PI); - EXPECT_NEAR(angle_deg, 180.0, 1e-13); -} - -// ============================================================================= -// Type Traits Tests -// ============================================================================= - -TEST_F(MathConstantsTest, TypedConstants) { - // Test float versions - EXPECT_NEAR(constants::PI_F, static_cast(constants::PI), 1e-7f); - EXPECT_NEAR(constants::E_F, static_cast(constants::E), 1e-7f); - EXPECT_NEAR(constants::SQRT_2_F, static_cast(constants::SQRT_2), 1e-7f); - - // Test long double versions - EXPECT_NEAR(constants::PI_L, static_cast(constants::PI), 1e-18L); - EXPECT_NEAR(constants::E_L, static_cast(constants::E), 1e-18L); -} - -// ============================================================================= -// Special Functions Tests -// ============================================================================= - -TEST_F(MathConstantsTest, SignFunction) { - // Test sign function - EXPECT_EQ(constants::sign(5.0), 1); - EXPECT_EQ(constants::sign(-5.0), -1); - EXPECT_EQ(constants::sign(0.0), 0); - - // Test with very small values - EXPECT_EQ(constants::sign(constants::EPSILON), 1); - EXPECT_EQ(constants::sign(-constants::EPSILON), -1); - - // Test with infinity - EXPECT_EQ(constants::sign(constants::INF_VALUE), 1); - EXPECT_EQ(constants::sign(-constants::INF_VALUE), -1); -} - -TEST_F(MathConstantsTest, SafeDivision) { - // Test safe division - EXPECT_NEAR(constants::safe_divide(10.0, 2.0), 5.0, 1e-15); - EXPECT_NEAR(constants::safe_divide(1.0, 3.0), 1.0/3.0, 1e-15); - - // Test division by zero returns default - EXPECT_EQ(constants::safe_divide(1.0, 0.0, 999.0), 999.0); - EXPECT_EQ(constants::safe_divide(1.0, constants::EPSILON/2.0, -1.0), -1.0); - - // Test division by near-zero - double tiny = constants::DEFAULT_TOLERANCE / 10.0; - EXPECT_EQ(constants::safe_divide(1.0, tiny, 0.0), 0.0); -} - -// ============================================================================= -// Utility Functions Tests -// ============================================================================= - -TEST_F(MathConstantsTest, ClampFunction) { - // Test clamping - EXPECT_EQ(constants::clamp(5.0, 0.0, 10.0), 5.0); - EXPECT_EQ(constants::clamp(-5.0, 0.0, 10.0), 0.0); - EXPECT_EQ(constants::clamp(15.0, 0.0, 10.0), 10.0); - - // Test with same min/max - EXPECT_EQ(constants::clamp(5.0, 3.0, 3.0), 3.0); - - // Test with infinity - EXPECT_EQ(constants::clamp(constants::INF_VALUE, 0.0, 10.0), 10.0); - EXPECT_EQ(constants::clamp(-constants::INF_VALUE, 0.0, 10.0), 0.0); -} - -TEST_F(MathConstantsTest, LerpFunction) { - // Test linear interpolation - EXPECT_NEAR(constants::lerp(0.0, 10.0, 0.0), 0.0, 1e-15); - EXPECT_NEAR(constants::lerp(0.0, 10.0, 1.0), 10.0, 1e-15); - EXPECT_NEAR(constants::lerp(0.0, 10.0, 0.5), 5.0, 1e-15); - EXPECT_NEAR(constants::lerp(0.0, 10.0, 0.25), 2.5, 1e-15); - - // Test extrapolation - EXPECT_NEAR(constants::lerp(0.0, 10.0, -0.5), -5.0, 1e-15); - EXPECT_NEAR(constants::lerp(0.0, 10.0, 1.5), 15.0, 1e-15); - - // Test with negative range - EXPECT_NEAR(constants::lerp(-10.0, -5.0, 0.5), -7.5, 1e-15); -} diff --git a/tests/unitTests/FE/Math/test_Matrix.cpp b/tests/unitTests/FE/Math/test_Matrix.cpp index c186c26ee..3b2fe664a 100644 --- a/tests/unitTests/FE/Math/test_Matrix.cpp +++ b/tests/unitTests/FE/Math/test_Matrix.cpp @@ -7,7 +7,6 @@ #include "FE/Math/Matrix.h" #include "FE/Math/Vector.h" #include "FE/Math/MatrixExpr.h" -#include "FE/Math/MathConstants.h" #include #include #include diff --git a/tests/unitTests/FE/Math/test_MatrixExpr.cpp b/tests/unitTests/FE/Math/test_MatrixExpr.cpp index 9486f409c..b17bce928 100644 --- a/tests/unitTests/FE/Math/test_MatrixExpr.cpp +++ b/tests/unitTests/FE/Math/test_MatrixExpr.cpp @@ -7,7 +7,6 @@ #include "FE/Math/Matrix.h" #include "FE/Math/MatrixExpr.h" #include "FE/Math/Vector.h" -#include "FE/Math/MathConstants.h" #include #include #include diff --git a/tests/unitTests/FE/Math/test_Vector.cpp b/tests/unitTests/FE/Math/test_Vector.cpp index a38a71727..754ad819d 100644 --- a/tests/unitTests/FE/Math/test_Vector.cpp +++ b/tests/unitTests/FE/Math/test_Vector.cpp @@ -6,7 +6,6 @@ #include #include "FE/Math/Vector.h" #include "FE/Math/VectorExpr.h" -#include "FE/Math/MathConstants.h" #include #include #include diff --git a/tests/unitTests/FE/Math/test_VectorExpr.cpp b/tests/unitTests/FE/Math/test_VectorExpr.cpp index bd6d85d51..0e7363c64 100644 --- a/tests/unitTests/FE/Math/test_VectorExpr.cpp +++ b/tests/unitTests/FE/Math/test_VectorExpr.cpp @@ -6,7 +6,6 @@ #include #include "FE/Math/Vector.h" #include "FE/Math/VectorExpr.h" -#include "FE/Math/MathConstants.h" #include #include #include From 2a97fa0466796913614d22a7e9f4c089e1a1d257 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 8 Jun 2026 13:43:26 -0700 Subject: [PATCH 08/22] consolidating math support for integer functions and expression operations --- .../solver/FE/Basis/SerendipityBasis.cpp | 25 +- Code/Source/solver/FE/Math/ExpressionOps.h | 99 ---- Code/Source/solver/FE/Math/IntegerMath.h | 98 ---- Code/Source/solver/FE/Math/MatrixExpr.h | 5 +- Code/Source/solver/FE/Math/VectorExpr.h | 59 +- .../unitTests/FE/Math/test_ExpressionOps.cpp | 508 ------------------ 6 files changed, 75 insertions(+), 719 deletions(-) delete mode 100644 Code/Source/solver/FE/Math/ExpressionOps.h delete mode 100644 Code/Source/solver/FE/Math/IntegerMath.h delete mode 100644 tests/unitTests/FE/Math/test_ExpressionOps.cpp diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp index e6395cee4..237f8c2ce 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp @@ -9,7 +9,6 @@ #include "LagrangeBasis.h" #include "NodeOrderingConventions.h" #include "Math/DenseLinearAlgebra.h" -#include "Math/IntegerMath.h" #include #include @@ -21,8 +20,6 @@ namespace svmp { namespace FE { namespace basis { -using math::pow_int; - namespace { using Vec3 = math::Vector; @@ -150,7 +147,7 @@ std::vector quad_serendipity_inverse_vandermonde( const Real y = nodes[static_cast(row)][1]; for (int col = 0; col < n; ++col) { const auto [ax, ay] = exponents[static_cast(col)]; - vandermonde[idx(row, col)] = pow_int(x, ax) * pow_int(y, ay); + vandermonde[idx(row, col)] = std::pow(x, ax) * std::pow(y, ay); } } @@ -521,7 +518,7 @@ void SerendipityBasis::evaluate_values(const math::Vector& xi, std::vector monomials(size_, Real(0)); for (std::size_t j = 0; j < size_; ++j) { const auto [ax, ay] = quad_monomial_exponents_[j]; - monomials[j] = pow_int(x, ax) * pow_int(y, ay); + monomials[j] = std::pow(x, ax) * std::pow(y, ay); } for (std::size_t i = 0; i < size_; ++i) { @@ -609,8 +606,10 @@ void SerendipityBasis::evaluate_gradients(const math::Vector& xi, std::vector dmon_dy(size_, Real(0)); for (std::size_t j = 0; j < size_; ++j) { const auto [ax, ay] = quad_monomial_exponents_[j]; - dmon_dx[j] = (ax > 0) ? Real(ax) * pow_int(x, ax - 1) * pow_int(y, ay) : Real(0); - dmon_dy[j] = (ay > 0) ? pow_int(x, ax) * Real(ay) * pow_int(y, ay - 1) : Real(0); + dmon_dx[j] = + (ax > 0) ? Real(ax) * std::pow(x, ax - 1) * std::pow(y, ay) : Real(0); + dmon_dy[j] = + (ay > 0) ? std::pow(x, ax) * Real(ay) * std::pow(y, ay - 1) : Real(0); } for (std::size_t i = 0; i < size_; ++i) { @@ -747,9 +746,15 @@ void SerendipityBasis::evaluate_hessians(const math::Vector& xi, std::vector dyy(size_, Real(0)); for (std::size_t j = 0; j < size_; ++j) { const auto [ax, ay] = quad_monomial_exponents_[j]; - dxx[j] = (ax > 1) ? Real(ax * (ax - 1)) * pow_int(x, ax - 2) * pow_int(y, ay) : Real(0); - dxy[j] = (ax > 0 && ay > 0) ? Real(ax * ay) * pow_int(x, ax - 1) * pow_int(y, ay - 1) : Real(0); - dyy[j] = (ay > 1) ? Real(ay * (ay - 1)) * pow_int(x, ax) * pow_int(y, ay - 2) : Real(0); + dxx[j] = (ax > 1) + ? Real(ax * (ax - 1)) * std::pow(x, ax - 2) * std::pow(y, ay) + : Real(0); + dxy[j] = (ax > 0 && ay > 0) + ? Real(ax * ay) * std::pow(x, ax - 1) * std::pow(y, ay - 1) + : Real(0); + dyy[j] = (ay > 1) + ? Real(ay * (ay - 1)) * std::pow(x, ax) * std::pow(y, ay - 2) + : Real(0); } for (std::size_t i = 0; i < size_; ++i) { diff --git a/Code/Source/solver/FE/Math/ExpressionOps.h b/Code/Source/solver/FE/Math/ExpressionOps.h deleted file mode 100644 index 96cea1037..000000000 --- a/Code/Source/solver/FE/Math/ExpressionOps.h +++ /dev/null @@ -1,99 +0,0 @@ -#ifndef SVMP_FE_MATH_EXPRESSION_OPS_H -#define SVMP_FE_MATH_EXPRESSION_OPS_H - -/** - * @file ExpressionOps.h - * @brief Common expression template operators for vector and matrix expressions - * - * This header provides shared operator functors used by both VectorExpr.h and - * MatrixExpr.h to avoid code duplication and namespace conflicts. All operators - * are defined in the detail::ops namespace for internal use by expression templates. - */ - -#include - -namespace svmp { -namespace FE { -namespace math { -namespace detail { -namespace ops { - -/** - * @brief Addition operator functor - */ -struct Add { - template - constexpr auto operator()(const T1& a, const T2& b) const { - return a + b; - } -}; - -/** - * @brief Subtraction operator functor - */ -struct Sub { - template - constexpr auto operator()(const T1& a, const T2& b) const { - return a - b; - } -}; - -/** - * @brief Multiplication operator functor - */ -struct Mul { - template - constexpr auto operator()(const T1& a, const T2& b) const { - return a * b; - } -}; - -/** - * @brief Division operator functor - */ -struct Div { - template - constexpr auto operator()(const T1& a, const T2& b) const { - return a / b; - } -}; - -/** - * @brief Negation operator functor - */ -struct Negate { - template - constexpr auto operator()(const T& a) const { - return -a; - } -}; - -/** - * @brief Absolute value operator functor - */ -struct Abs { - template - constexpr auto operator()(const T& a) const { - using std::abs; - return abs(a); - } -}; - -/** - * @brief Square root operator functor - */ -struct Sqrt { - template - constexpr auto operator()(const T& a) const { - using std::sqrt; - return sqrt(a); - } -}; - -} // namespace ops -} // namespace detail -} // namespace math -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_MATH_EXPRESSION_OPS_H diff --git a/Code/Source/solver/FE/Math/IntegerMath.h b/Code/Source/solver/FE/Math/IntegerMath.h deleted file mode 100644 index 52a50117f..000000000 --- a/Code/Source/solver/FE/Math/IntegerMath.h +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ - -#ifndef SVMP_FE_MATH_INTEGERMATH_H -#define SVMP_FE_MATH_INTEGERMATH_H - -#include "Types.h" - -#include -#include -#include -#include - -namespace svmp { -namespace FE { -namespace math { - -[[nodiscard]] constexpr Real pow_int_nonnegative(Real base, int exponent) noexcept { - Real result = Real(1); - Real factor = base; - int power = exponent; - while (power > 0) { - if ((power & 1) != 0) { - result *= factor; - } - power >>= 1; - if (power > 0) { - factor *= factor; - } - } - return result; -} - -[[nodiscard]] constexpr Real pow_int(Real base, int exponent) noexcept { - if (exponent < 0) { - return Real(1) / pow_int_nonnegative(base, -exponent); - } - return pow_int_nonnegative(base, exponent); -} - -[[nodiscard]] constexpr std::size_t binomial_size(int n, int k) { - if (n < 0 || k < 0 || k > n) { - return 0u; - } - if (k > n - k) { - k = n - k; - } - - std::size_t result = 1u; - for (int i = 1; i <= k; ++i) { - auto numerator = static_cast(n - (k - i)); - auto denominator = static_cast(i); - - const auto numerator_gcd = std::gcd(numerator, denominator); - numerator /= numerator_gcd; - denominator /= numerator_gcd; - - const auto result_gcd = std::gcd(result, denominator); - result /= result_gcd; - denominator /= result_gcd; - if (denominator != 1u) { - throw std::overflow_error( - "binomial_size: failed to reduce exact binomial factor"); - } - if (numerator != 0u && - result > std::numeric_limits::max() / numerator) { - throw std::overflow_error("binomial_size: result does not fit in size_t"); - } - result *= numerator; - } - return result; -} - -[[nodiscard]] constexpr Real binomial_real(int n, int k) noexcept { - if (k < 0 || k > n) { - return Real(0); - } - if (k > n - k) { - k = n - k; - } - - Real result = Real(1); - for (int i = 1; i <= k; ++i) { - result *= static_cast(n - (k - i)); - result /= static_cast(i); - } - return result; -} - -} // namespace math -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_MATH_INTEGERMATH_H diff --git a/Code/Source/solver/FE/Math/MatrixExpr.h b/Code/Source/solver/FE/Math/MatrixExpr.h index 097f35361..13010bddf 100644 --- a/Code/Source/solver/FE/Math/MatrixExpr.h +++ b/Code/Source/solver/FE/Math/MatrixExpr.h @@ -11,10 +11,11 @@ */ #include +#include #include #include -#include -#include "ExpressionOps.h" + +#include "VectorExpr.h" namespace svmp { namespace FE { diff --git a/Code/Source/solver/FE/Math/VectorExpr.h b/Code/Source/solver/FE/Math/VectorExpr.h index 627d2fd88..178b66b8a 100644 --- a/Code/Source/solver/FE/Math/VectorExpr.h +++ b/Code/Source/solver/FE/Math/VectorExpr.h @@ -10,14 +10,69 @@ * of assignment, eliminating intermediate allocations and improving performance. */ +#include #include #include -#include -#include "ExpressionOps.h" namespace svmp { namespace FE { namespace math { +namespace detail { +namespace ops { + +struct Add { + template + constexpr auto operator()(const T1& a, const T2& b) const { + return a + b; + } +}; + +struct Sub { + template + constexpr auto operator()(const T1& a, const T2& b) const { + return a - b; + } +}; + +struct Mul { + template + constexpr auto operator()(const T1& a, const T2& b) const { + return a * b; + } +}; + +struct Div { + template + constexpr auto operator()(const T1& a, const T2& b) const { + return a / b; + } +}; + +struct Negate { + template + constexpr auto operator()(const T& a) const { + return -a; + } +}; + +struct Abs { + template + constexpr auto operator()(const T& a) const { + using std::abs; + return abs(a); + } +}; + +struct Sqrt { + template + constexpr auto operator()(const T& a) const { + using std::sqrt; + return sqrt(a); + } +}; + +} // namespace ops +} // namespace detail /** * @brief Base class for all vector expressions using CRTP diff --git a/tests/unitTests/FE/Math/test_ExpressionOps.cpp b/tests/unitTests/FE/Math/test_ExpressionOps.cpp deleted file mode 100644 index a368e345e..000000000 --- a/tests/unitTests/FE/Math/test_ExpressionOps.cpp +++ /dev/null @@ -1,508 +0,0 @@ -/** - * @file test_ExpressionOps.cpp - * @brief Unit tests for ExpressionOps.h - expression template operators - */ - -#include -#include "FE/Math/ExpressionOps.h" -#include "FE/Math/Vector.h" -#include "FE/Math/Matrix.h" -#include -#include -#include -#include - -using namespace svmp::FE::math; -using namespace svmp::FE::math::detail::ops; - -// Test fixture for ExpressionOps tests -class ExpressionOpsTest : public ::testing::Test { -protected: - static constexpr double tolerance = 1e-14; - - void SetUp() override {} - void TearDown() override {} - - template - bool approx_equal(T a, T b, T tol = tolerance) { - return std::abs(a - b) <= tol; - } -}; - -// ============================================================================= -// Binary Operation Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, AddOperator) { - Add op; - - // Integer addition - EXPECT_EQ(op(5, 3), 8); - EXPECT_EQ(op(-5, 3), -2); - EXPECT_EQ(op(-5, -3), -8); - - // Floating point addition - EXPECT_DOUBLE_EQ(op(3.14, 2.86), 6.0); - EXPECT_DOUBLE_EQ(op(-1.5, 2.5), 1.0); - - // Mixed types - auto result = op(3, 2.5); - EXPECT_TRUE((std::is_same_v)); - EXPECT_DOUBLE_EQ(result, 5.5); -} - -TEST_F(ExpressionOpsTest, SubOperator) { - Sub op; - - // Integer subtraction - EXPECT_EQ(op(5, 3), 2); - EXPECT_EQ(op(3, 5), -2); - EXPECT_EQ(op(-5, -3), -2); - - // Floating point subtraction - EXPECT_DOUBLE_EQ(op(5.5, 2.5), 3.0); - EXPECT_DOUBLE_EQ(op(2.5, 5.5), -3.0); - - // Mixed types - auto result = op(5.5, 2); - EXPECT_TRUE((std::is_same_v)); - EXPECT_DOUBLE_EQ(result, 3.5); -} - -TEST_F(ExpressionOpsTest, MulOperator) { - Mul op; - - // Integer multiplication - EXPECT_EQ(op(5, 3), 15); - EXPECT_EQ(op(-5, 3), -15); - EXPECT_EQ(op(-5, -3), 15); - - // Floating point multiplication - EXPECT_DOUBLE_EQ(op(2.5, 4.0), 10.0); - EXPECT_DOUBLE_EQ(op(-2.5, 4.0), -10.0); - - // Zero multiplication - EXPECT_EQ(op(0, 100), 0); - EXPECT_DOUBLE_EQ(op(0.0, 3.14), 0.0); - - // Mixed types - auto result = op(3, 2.5); - EXPECT_TRUE((std::is_same_v)); - EXPECT_DOUBLE_EQ(result, 7.5); -} - -TEST_F(ExpressionOpsTest, DivOperator) { - Div op; - - // Integer division - EXPECT_EQ(op(10, 2), 5); - EXPECT_EQ(op(10, 3), 3); // Integer division truncates - EXPECT_EQ(op(-10, 2), -5); - - // Floating point division - EXPECT_DOUBLE_EQ(op(10.0, 2.0), 5.0); - EXPECT_DOUBLE_EQ(op(10.0, 3.0), 10.0/3.0); - EXPECT_DOUBLE_EQ(op(-10.0, 2.0), -5.0); - - // Mixed types - auto result = op(10.0, 3); - EXPECT_TRUE((std::is_same_v)); - EXPECT_DOUBLE_EQ(result, 10.0/3.0); -} - -// ============================================================================= -// Unary Operation Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, NegateOperator) { - Negate op; - - // Integer negation - EXPECT_EQ(op(5), -5); - EXPECT_EQ(op(-5), 5); - EXPECT_EQ(op(0), 0); - - // Floating point negation - EXPECT_DOUBLE_EQ(op(3.14), -3.14); - EXPECT_DOUBLE_EQ(op(-2.71), 2.71); - EXPECT_DOUBLE_EQ(op(0.0), 0.0); - - // Type preservation - auto int_result = op(5); - EXPECT_TRUE((std::is_same_v)); - - auto double_result = op(5.0); - EXPECT_TRUE((std::is_same_v)); -} - -TEST_F(ExpressionOpsTest, AbsOperator) { - Abs op; - - // Integer absolute value - EXPECT_EQ(op(5), 5); - EXPECT_EQ(op(-5), 5); - EXPECT_EQ(op(0), 0); - - // Floating point absolute value - EXPECT_DOUBLE_EQ(op(3.14), 3.14); - EXPECT_DOUBLE_EQ(op(-3.14), 3.14); - EXPECT_DOUBLE_EQ(op(0.0), 0.0); - - // Special cases - EXPECT_DOUBLE_EQ(op(-0.0), 0.0); - - // Type preservation - auto int_result = op(-5); - EXPECT_TRUE((std::is_same_v)); - - auto double_result = op(-5.0); - EXPECT_TRUE((std::is_same_v)); -} - -TEST_F(ExpressionOpsTest, SqrtOperator) { - Sqrt op; - - // Perfect squares - EXPECT_DOUBLE_EQ(op(4.0), 2.0); - EXPECT_DOUBLE_EQ(op(9.0), 3.0); - EXPECT_DOUBLE_EQ(op(16.0), 4.0); - EXPECT_DOUBLE_EQ(op(25.0), 5.0); - - // Non-perfect squares - EXPECT_DOUBLE_EQ(op(2.0), std::sqrt(2.0)); - EXPECT_DOUBLE_EQ(op(3.0), std::sqrt(3.0)); - - // Special cases - EXPECT_DOUBLE_EQ(op(0.0), 0.0); - EXPECT_DOUBLE_EQ(op(1.0), 1.0); - - // Type conversion - auto result = op(4); // Integer input - EXPECT_DOUBLE_EQ(result, 2.0); -} - -// ============================================================================= -// Constexpr Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, ConstexprOperators) { - // Test that operators can be used in constexpr contexts - constexpr Add add_op; - constexpr Sub sub_op; - constexpr Mul mul_op; - constexpr Div div_op; - constexpr Negate neg_op; - - // Compile-time evaluation - constexpr auto sum = add_op(3, 4); - constexpr auto diff = sub_op(7, 3); - constexpr auto prod = mul_op(3, 4); - constexpr auto quot = div_op(12, 3); - constexpr auto neg = neg_op(5); - - EXPECT_EQ(sum, 7); - EXPECT_EQ(diff, 4); - EXPECT_EQ(prod, 12); - EXPECT_EQ(quot, 4); - EXPECT_EQ(neg, -5); - - // Static assertions to verify compile-time evaluation - static_assert(add_op(2, 3) == 5); - static_assert(sub_op(5, 2) == 3); - static_assert(mul_op(3, 4) == 12); - static_assert(div_op(10, 2) == 5); - static_assert(neg_op(3) == -3); -} - -// ============================================================================= -// Type Deduction Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, TypeDeduction) { - Add add_op; - Sub sub_op; - Mul mul_op; - Div div_op; - - // int + int -> int - auto int_result = add_op(3, 4); - EXPECT_TRUE((std::is_same_v)); - - // double + double -> double - auto double_result = add_op(3.0, 4.0); - EXPECT_TRUE((std::is_same_v)); - - // int + double -> double - auto mixed_result1 = add_op(3, 4.0); - EXPECT_TRUE((std::is_same_v)); - - // double + int -> double - auto mixed_result2 = add_op(3.0, 4); - EXPECT_TRUE((std::is_same_v)); - - // float + double -> double - auto float_double_result = add_op(3.0f, 4.0); - EXPECT_TRUE((std::is_same_v)); -} - -// ============================================================================= -// Complex Expression Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, ChainedOperations) { - Add add_op; - Sub sub_op; - Mul mul_op; - Div div_op; - Negate neg_op; - - // Simulate complex expression: -(a + b) * c / d - double a = 2.0, b = 3.0, c = 4.0, d = 2.0; - - auto sum = add_op(a, b); // 5.0 - auto negated = neg_op(sum); // -5.0 - auto product = mul_op(negated, c); // -20.0 - auto result = div_op(product, d); // -10.0 - - EXPECT_DOUBLE_EQ(result, -10.0); -} - -TEST_F(ExpressionOpsTest, MixedPrecisionChain) { - Add add_op; - Mul mul_op; - - // Mixed precision chain - int a = 2; - float b = 3.5f; - double c = 1.5; - - auto step1 = add_op(a, b); // int + float -> float (5.5f) - auto step2 = mul_op(step1, c); // float + double -> double (8.25) - - EXPECT_TRUE((std::is_same_v)); - EXPECT_DOUBLE_EQ(step2, 8.25); -} - -// ============================================================================= -// Operator Integration with Vector/Matrix Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, VectorIntegration) { - Vector v1{1.0, 2.0, 3.0}; - Vector v2{4.0, 5.0, 6.0}; - - // Test that operators work correctly in vector expressions - Vector sum = v1 + v2; - Vector diff = v1 - v2; - Vector neg = -v1; - Vector scaled = v1 * 2.0; - - EXPECT_DOUBLE_EQ(sum[0], 5.0); - EXPECT_DOUBLE_EQ(sum[1], 7.0); - EXPECT_DOUBLE_EQ(sum[2], 9.0); - - EXPECT_DOUBLE_EQ(diff[0], -3.0); - EXPECT_DOUBLE_EQ(diff[1], -3.0); - EXPECT_DOUBLE_EQ(diff[2], -3.0); - - EXPECT_DOUBLE_EQ(neg[0], -1.0); - EXPECT_DOUBLE_EQ(neg[1], -2.0); - EXPECT_DOUBLE_EQ(neg[2], -3.0); - - EXPECT_DOUBLE_EQ(scaled[0], 2.0); - EXPECT_DOUBLE_EQ(scaled[1], 4.0); - EXPECT_DOUBLE_EQ(scaled[2], 6.0); -} - -TEST_F(ExpressionOpsTest, MatrixIntegration) { - Matrix m1{{1.0, 2.0}, {3.0, 4.0}}; - Matrix m2{{5.0, 6.0}, {7.0, 8.0}}; - - // Test that operators work correctly in matrix expressions - Matrix sum = m1 + m2; - Matrix diff = m1 - m2; - Matrix neg = -m1; - Matrix scaled = m1 * 2.0; - - EXPECT_DOUBLE_EQ(sum(0, 0), 6.0); - EXPECT_DOUBLE_EQ(sum(0, 1), 8.0); - EXPECT_DOUBLE_EQ(sum(1, 0), 10.0); - EXPECT_DOUBLE_EQ(sum(1, 1), 12.0); - - EXPECT_DOUBLE_EQ(diff(0, 0), -4.0); - EXPECT_DOUBLE_EQ(diff(0, 1), -4.0); - EXPECT_DOUBLE_EQ(diff(1, 0), -4.0); - EXPECT_DOUBLE_EQ(diff(1, 1), -4.0); - - EXPECT_DOUBLE_EQ(neg(0, 0), -1.0); - EXPECT_DOUBLE_EQ(neg(0, 1), -2.0); - EXPECT_DOUBLE_EQ(neg(1, 0), -3.0); - EXPECT_DOUBLE_EQ(neg(1, 1), -4.0); - - EXPECT_DOUBLE_EQ(scaled(0, 0), 2.0); - EXPECT_DOUBLE_EQ(scaled(0, 1), 4.0); - EXPECT_DOUBLE_EQ(scaled(1, 0), 6.0); - EXPECT_DOUBLE_EQ(scaled(1, 1), 8.0); -} - -// ============================================================================= -// Edge Cases and Special Values Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, SpecialFloatingPointValues) { - Add add_op; - Sub sub_op; - Mul mul_op; - Div div_op; - Abs abs_op; - Negate neg_op; - - // Infinity handling - double inf = std::numeric_limits::infinity(); - EXPECT_DOUBLE_EQ(add_op(inf, 1.0), inf); - EXPECT_DOUBLE_EQ(sub_op(inf, 1.0), inf); - EXPECT_DOUBLE_EQ(mul_op(inf, 2.0), inf); - EXPECT_DOUBLE_EQ(div_op(inf, 2.0), inf); - EXPECT_DOUBLE_EQ(abs_op(inf), inf); - EXPECT_DOUBLE_EQ(neg_op(inf), -inf); - - // NaN handling - double nan = std::numeric_limits::quiet_NaN(); - EXPECT_TRUE(std::isnan(add_op(nan, 1.0))); - EXPECT_TRUE(std::isnan(sub_op(nan, 1.0))); - EXPECT_TRUE(std::isnan(mul_op(nan, 2.0))); - EXPECT_TRUE(std::isnan(div_op(nan, 2.0))); - EXPECT_TRUE(std::isnan(abs_op(nan))); - EXPECT_TRUE(std::isnan(neg_op(nan))); - - // Division by zero - EXPECT_DOUBLE_EQ(div_op(1.0, 0.0), inf); - EXPECT_DOUBLE_EQ(div_op(-1.0, 0.0), -inf); - EXPECT_TRUE(std::isnan(div_op(0.0, 0.0))); -} - -TEST_F(ExpressionOpsTest, LargeAndSmallValues) { - Add add_op; - Mul mul_op; - - // Large values - double large = 1e308; - double result = add_op(large, large); - EXPECT_TRUE(std::isinf(result)); // Overflow to infinity - - // Small values - double tiny = std::numeric_limits::min(); - double tiny_result = mul_op(tiny, 0.5); - EXPECT_GT(tiny_result, 0.0); // Should still be positive - EXPECT_LT(tiny_result, tiny); // But smaller - - // Denormalized numbers - double denorm = std::numeric_limits::denorm_min(); - double denorm_result = add_op(denorm, denorm); - EXPECT_EQ(denorm_result, 2.0 * denorm); -} - -// ============================================================================= -// SFINAE and Compile-time Constraint Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, SFINAECompatibility) { - // Test that operators work with any arithmetic types - Add add_op; - - // Various integer types - EXPECT_EQ(add_op(int8_t(3), int8_t(4)), 7); - EXPECT_EQ(add_op(int16_t(100), int16_t(200)), 300); - EXPECT_EQ(add_op(int32_t(1000), int32_t(2000)), 3000); - EXPECT_EQ(add_op(int64_t(10000), int64_t(20000)), 30000); - - // Unsigned types - EXPECT_EQ(add_op(uint8_t(3), uint8_t(4)), 7u); - EXPECT_EQ(add_op(uint16_t(100), uint16_t(200)), 300u); - EXPECT_EQ(add_op(uint32_t(1000), uint32_t(2000)), 3000u); - - // Floating point types - EXPECT_FLOAT_EQ(add_op(3.0f, 4.0f), 7.0f); - EXPECT_DOUBLE_EQ(add_op(3.0, 4.0), 7.0); - - // Long double - long double ld1 = 3.0L; - long double ld2 = 4.0L; - EXPECT_DOUBLE_EQ(add_op(ld1, ld2), 7.0L); -} - -// ============================================================================= -// Template Instantiation Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, TemplateInstantiations) { - // Test that operators can be instantiated with various types - Add add_op; - Sub sub_op; - Mul mul_op; - Div div_op; - Abs abs_op; - Sqrt sqrt_op; - Negate neg_op; - - // Custom types that support arithmetic operations - struct CustomNumber { - double value; - CustomNumber(double v) : value(v) {} - CustomNumber operator+(const CustomNumber& other) const { return CustomNumber(value + other.value); } - CustomNumber operator-(const CustomNumber& other) const { return CustomNumber(value - other.value); } - CustomNumber operator*(const CustomNumber& other) const { return CustomNumber(value * other.value); } - CustomNumber operator/(const CustomNumber& other) const { return CustomNumber(value / other.value); } - CustomNumber operator-() const { return CustomNumber(-value); } - bool operator==(const CustomNumber& other) const { return value == other.value; } - }; - - CustomNumber cn1(3.0); - CustomNumber cn2(4.0); - - auto cn_sum = add_op(cn1, cn2); - EXPECT_EQ(cn_sum.value, 7.0); - - auto cn_diff = sub_op(cn1, cn2); - EXPECT_EQ(cn_diff.value, -1.0); - - auto cn_prod = mul_op(cn1, cn2); - EXPECT_EQ(cn_prod.value, 12.0); - - auto cn_quot = div_op(cn1, cn2); - EXPECT_EQ(cn_quot.value, 0.75); - - auto cn_neg = neg_op(cn1); - EXPECT_EQ(cn_neg.value, -3.0); -} - -// ============================================================================= -// Complex Number Support Tests -// ============================================================================= - -TEST_F(ExpressionOpsTest, ComplexNumberSupport) { - Add add_op; - Sub sub_op; - Mul mul_op; - Div div_op; - Negate neg_op; - - std::complex c1(3.0, 4.0); - std::complex c2(1.0, 2.0); - - auto c_sum = add_op(c1, c2); - EXPECT_DOUBLE_EQ(c_sum.real(), 4.0); - EXPECT_DOUBLE_EQ(c_sum.imag(), 6.0); - - auto c_diff = sub_op(c1, c2); - EXPECT_DOUBLE_EQ(c_diff.real(), 2.0); - EXPECT_DOUBLE_EQ(c_diff.imag(), 2.0); - - auto c_prod = mul_op(c1, c2); - EXPECT_DOUBLE_EQ(c_prod.real(), -5.0); // (3+4i)(1+2i) = 3+6i+4i+8i² = 3+10i-8 = -5+10i - EXPECT_DOUBLE_EQ(c_prod.imag(), 10.0); - - auto c_neg = neg_op(c1); - EXPECT_DOUBLE_EQ(c_neg.real(), -3.0); - EXPECT_DOUBLE_EQ(c_neg.imag(), -4.0); -} From 7f2e0202de0896246f4a88b4d42ec38e60b72b3a Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 8 Jun 2026 14:16:50 -0700 Subject: [PATCH 09/22] removing the previous basis functions so that we are not maintaining two basis function infrastructures --- Code/Source/solver/README.md | 2 +- Code/Source/solver/nn.cpp | 256 +---- Code/Source/solver/nn_elem_gnn.h | 1586 ---------------------------- Code/Source/solver/nn_elem_gnnxx.h | 139 --- 4 files changed, 32 insertions(+), 1951 deletions(-) delete mode 100644 Code/Source/solver/nn_elem_gnn.h delete mode 100644 Code/Source/solver/nn_elem_gnnxx.h diff --git a/Code/Source/solver/README.md b/Code/Source/solver/README.md index 252999e8f..d11378e35 100644 --- a/Code/Source/solver/README.md +++ b/Code/Source/solver/README.md @@ -601,7 +601,7 @@ A map type used to set element properties. Computes shape functions and derivatives at given natural coords. -- `set_face_shape_data[face.eType](gaus_pt, face)` +- FE Basis face evaluation for supported mapped face elements. diff --git a/Code/Source/solver/nn.cpp b/Code/Source/solver/nn.cpp index a9e0aebc3..1ec9984b6 100644 --- a/Code/Source/solver/nn.cpp +++ b/Code/Source/solver/nn.cpp @@ -1,7 +1,8 @@ // SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. // SPDX-License-Identifier: BSD-3-Clause -// The functions defined here replicate the Fortran functions defined in NN.f. +// Solver-facing element setup, Gauss integration, FE Basis evaluation, and +// shape-function bounds. // // The functions are used to // @@ -25,13 +26,8 @@ #include "lapack_defs.h" -#include #include -#include -#include -#include #include -#include #include #include #include @@ -51,12 +47,6 @@ using namespace consts; // Define maps used to set element Gauss integration data. #include "nn_elem_gip.h" -// Define maps used to set element shape function data. -#include "nn_elem_gnn.h" - -// Define maps used to get element shape function 2nd derivative data. -#include "nn_elem_gnnxx.h" - // Define a map type used to set the bounds of element shape functions. #include "nn_elem_nn_bnds.h" @@ -71,77 +61,6 @@ struct BasisSelection { int order; }; -enum class BasisMode { - Auto, - Legacy, - Fe -}; - -std::string normalize_basis_mode_name(std::string value) -{ - std::transform(value.begin(), value.end(), value.begin(), - [](unsigned char c) { return static_cast(std::tolower(c)); }); - return value; -} - -BasisMode parse_basis_mode() -{ - const char* mode_env = std::getenv("SVMP_BASIS_MODE"); - if (mode_env == nullptr || *mode_env == '\0') { - return BasisMode::Auto; - } - - const std::string mode = normalize_basis_mode_name(mode_env); - if (mode == "auto") { - return BasisMode::Auto; - } - if (mode == "legacy") { - return BasisMode::Legacy; - } - if (mode == "fe") { - return BasisMode::Fe; - } - - throw febasis::BasisConfigurationException( - "Invalid SVMP_BASIS_MODE='" + std::string(mode_env) + - "'. Expected one of: auto, legacy, fe", - __FILE__, __LINE__, __func__); -} - -BasisMode active_basis_mode() -{ - static const BasisMode mode = parse_basis_mode(); - return mode; -} - -const char* basis_mode_name(BasisMode mode) -{ - switch (mode) { - case BasisMode::Auto: - return "auto"; - case BasisMode::Legacy: - return "legacy"; - case BasisMode::Fe: - return "fe"; - } - return "unknown"; -} - -void log_basis_mode_once() -{ - static const bool logged = []() { - std::cout << "[svMultiPhysics] SVMP_BASIS_MODE=" - << basis_mode_name(active_basis_mode()) << std::endl; - return true; - }(); - (void)logged; -} - -bool basis_mode_allows_fe_adapter() -{ - return active_basis_mode() != BasisMode::Legacy; -} - std::string solver_element_name(consts::ElementType eType) { auto it = consts::element_type_to_string.find(eType); @@ -178,15 +97,11 @@ std::optional to_basis_selection(consts::ElementType eType) bool use_basis_adapter_for(consts::ElementType eType) { - return basis_mode_allows_fe_adapter() && to_basis_selection(eType).has_value(); + return to_basis_selection(eType).has_value(); } bool supports_face_basis_adapter_for(consts::ElementType eType) { - if (!basis_mode_allows_fe_adapter()) { - return false; - } - switch (eType) { case consts::ElementType::LIN1: case consts::ElementType::LIN2: @@ -464,73 +379,20 @@ void evaluate_basis_hessians(const int insd, copy_basis_hessians_to_solver_nxx(eType, eNoN, gaus_pt, basis->dimension(), hessians, Nxx); } -void call_legacy_get_gnn(const int insd, - consts::ElementType eType, - const int eNoN, - const int g, - Array& xi, - Array& N, - Array3& Nx, - const std::string& basis_failure = "") +void set_point_face_shape_data(const int gaus_pt, faceType& face) { - try { - get_element_shape_data[eType](insd, eNoN, g, xi, N, Nx); - } catch (const std::bad_function_call&) { - std::string message = "[get_gnn] No FE Basis or legacy shape support for element " + - solver_element_name(eType) + "; legacy fallback was attempted"; - if (!basis_failure.empty()) { - message += " after FE Basis failure: " + basis_failure; + face.N(0, gaus_pt) = 1.0; + for (int row = 0; row < face.Nx.nrows(); ++row) { + for (int col = 0; col < face.Nx.ncols(); ++col) { + face.Nx(row, col, gaus_pt) = 0.0; } - throw fe::InvalidElementException(message, solver_element_name(eType), - __FILE__, __LINE__, __func__); } } -void call_legacy_get_gn_nxx(const int insd, - const int ind2, - consts::ElementType eType, - const int eNoN, - const int gaus_pt, - const Array& xi, - Array3& Nxx, - const std::string& basis_failure = "", - const bool allow_missing_legacy_table = false) -{ - try { - get_element_2nd_derivs[eType](insd, ind2, eNoN, gaus_pt, xi, Nxx); - } catch (const std::bad_function_call&) { - if (allow_missing_legacy_table) { - return; - } - - std::string message = "[get_gn_nxx] No FE Basis or legacy second-derivative support for element " + - solver_element_name(eType) + "; legacy fallback was attempted"; - if (!basis_failure.empty()) { - message += " after FE Basis failure: " + basis_failure; - } - throw fe::InvalidElementException(message, solver_element_name(eType), - __FILE__, __LINE__, __func__); - } -} - -void call_legacy_face_shape_data(const int gaus_pt, faceType& face) -{ - auto legacy_shape = set_face_shape_data.find(face.eType); - if (legacy_shape == set_face_shape_data.end()) { - throw fe::InvalidElementException( - "[get_gnn(face)] No FE Basis or legacy face shape support", - solver_element_name(face.eType), __FILE__, __LINE__, __func__); - } - - legacy_shape->second(gaus_pt, face); -} - } // namespace void get_gip(const int insd, consts::ElementType eType, const int nG, Vector& w, Array& xi) { - log_basis_mode_once(); - try { get_element_gauss_int_data[eType](insd, nG, w, xi); } catch (const std::bad_function_call& exception) { @@ -546,8 +408,6 @@ void get_gip(const int insd, consts::ElementType eType, const int nG, Vector& xi, Array& N, Array3& Nx) { - log_basis_mode_once(); - - if (use_basis_adapter_for(eType)) { - try { - evaluate_basis_values_and_gradients(insd, eType, eNoN, g, xi, N, Nx); - return; - } catch (const fe::NotImplementedException& exception) { - call_legacy_get_gnn(insd, eType, eNoN, g, xi, N, Nx, exception.what()); - return; - } catch (const std::exception& exception) { - throw febasis::BasisEvaluationException( - "[get_gnn] FE Basis adapter failed for element " + - solver_element_name(eType) + - "; legacy fallback was not attempted for this approved element: " + - exception.what(), - __FILE__, __LINE__, __func__); - } + if (!use_basis_adapter_for(eType)) { + throw febasis::BasisElementCompatibilityException( + "[get_gnn] FE Basis does not support solver element " + solver_element_name(eType), + __FILE__, __LINE__, __func__); } - call_legacy_get_gnn(insd, eType, eNoN, g, xi, N, Nx); + evaluate_basis_values_and_gradients(insd, eType, eNoN, g, xi, N, Nx); } -/// @brief A big fat hack because the Fortran GETNN() operates on primitive types but -/// the C++ version does not, uses Array and Vector objects. +/// @brief Adapter overload for vector-style callers. // void get_gnn(const int nsd, consts::ElementType eType, const int eNoN, Vector& xi, Vector& N, Array& Nx) @@ -625,86 +469,48 @@ void get_gnn(Simulation* simulation, int gaus_pt, faceType& face) { using consts::ElementType; - log_basis_mode_once(); - - if (active_basis_mode() == BasisMode::Legacy) { - call_legacy_face_shape_data(gaus_pt, face); - return; - } - if (face.eType == ElementType::NRB) { throw fe::NotImplementedException( - "[get_gnn(face)] NRB face shape functions remain unsupported by FE Basis and the legacy face table", + "[get_gnn(face)] NRB face shape functions are unsupported by FE Basis", __FILE__, __LINE__, __func__); } - if (supports_face_basis_adapter_for(face.eType)) { - try { - // FE Basis owns mapped face N/Nx formulas; faceType remains the solver-facing storage contract. - evaluate_face_basis_values_and_gradients(gaus_pt, face); - return; - } catch (const std::exception& exception) { - throw febasis::BasisEvaluationException( - "[get_gnn(face)] FE Basis face adapter failed for mapped face element " + - solver_element_name(face.eType) + "; legacy fallback was not attempted: " + - exception.what(), - __FILE__, __LINE__, __func__); - } + if (face.eType == ElementType::PNT) { + set_point_face_shape_data(gaus_pt, face); + return; } - if (face.eType == ElementType::PNT) { - // Point faces have no mapped FE Basis representation in this pass; keep the legacy scalar value path. - call_legacy_face_shape_data(gaus_pt, face); + if (supports_face_basis_adapter_for(face.eType)) { + // FE Basis owns mapped face N/Nx formulas; faceType remains the solver-facing storage contract. + evaluate_face_basis_values_and_gradients(gaus_pt, face); return; } - // The legacy face table is retained only for explicitly unsupported paths and future cleanup. - call_legacy_face_shape_data(gaus_pt, face); + throw febasis::BasisElementCompatibilityException( + "[get_gnn(face)] FE Basis does not support face element " + solver_element_name(face.eType), + __FILE__, __LINE__, __func__); } -/// @brief Returns second order derivatives at given natural coords -/// -/// Replicates 'SUBROUTINE GETGNNxx(insd, ind2, eType, eNoN, xi, Nxx)'. +/// @brief Returns second order derivatives at given natural coords. // void get_gn_nxx(const int insd, const int ind2, consts::ElementType eType, const int eNoN, const int gaus_pt, const Array& xi, Array3& Nxx) { using namespace consts; - log_basis_mode_once(); - // NRB/PNT and face-only Hessian paths remain intentionally unsupported here. if (eType == ElementType::NRB || eType == ElementType::PNT) { return; } - if (active_basis_mode() == BasisMode::Legacy) { - call_legacy_get_gn_nxx( - insd, ind2, eType, eNoN, gaus_pt, xi, Nxx, "", true); - return; - } - - if (use_basis_adapter_for(eType)) { - try { - evaluate_basis_hessians(insd, ind2, eType, eNoN, gaus_pt, xi, Nxx); - return; - } catch (const fe::NotImplementedException& exception) { - throw fe::NotImplementedException( - "[get_gn_nxx] FE Basis Hessian support is required for mapped volume element " + - solver_element_name(eType) + " but is not implemented: " + exception.what(), - __FILE__, __LINE__, __func__); - } catch (const std::exception& exception) { - throw febasis::BasisEvaluationException( - "[get_gn_nxx] FE Basis Hessian adapter failed for element " + - solver_element_name(eType) + - "; legacy fallback was not attempted for this approved element: " + - exception.what(), - __FILE__, __LINE__, __func__); - } + if (!use_basis_adapter_for(eType)) { + throw febasis::BasisElementCompatibilityException( + "[get_gn_nxx] FE Basis Hessian evaluation does not support solver element " + + solver_element_name(eType), + __FILE__, __LINE__, __func__); } - // Legacy Hessian tables are reserved for intentionally unsupported families. - call_legacy_get_gn_nxx(insd, ind2, eType, eNoN, gaus_pt, xi, Nxx); + evaluate_basis_hessians(insd, ind2, eType, eNoN, gaus_pt, xi, Nxx); } /// @brief Sets bounds on Gauss integration points in parametric space and diff --git a/Code/Source/solver/nn_elem_gnn.h b/Code/Source/solver/nn_elem_gnn.h deleted file mode 100644 index 33564d45b..000000000 --- a/Code/Source/solver/nn_elem_gnn.h +++ /dev/null @@ -1,1586 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. -// SPDX-License-Identifier: BSD-3-Clause - -/// @brief Define a map type used to set element shape function data. -/// -/// Reproduces the Fortran 'GETGNN' subroutine. -// -using GetElementShapeMapType = std::map&, Array&, Array3&)>>; - -GetElementShapeMapType get_element_shape_data = { - - {ElementType::HEX8, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double lz = 1.0 - xi(2,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double uz = 1.0 + xi(2,g); - - N(0,g) = lx*ly*lz/8.0; - N(1,g) = ux*ly*lz/8.0; - N(2,g) = ux*uy*lz/8.0; - N(3,g) = lx*uy*lz/8.0; - N(4,g) = lx*ly*uz/8.0; - N(5,g) = ux*ly*uz/8.0; - N(6,g) = ux*uy*uz/8.0; - N(7,g) = lx*uy*uz/8.0; - - Nx(0,0,g) = -ly*lz/8.0; - Nx(1,0,g) = -lx*lz/8.0; - Nx(2,0,g) = -lx*ly/8.0; - - Nx(0,1,g) = ly*lz/8.0; - Nx(1,1,g) = -ux*lz/8.0; - Nx(2,1,g) = -ux*ly/8.0; - - Nx(0,2,g) = uy*lz/8.0; - Nx(1,2,g) = ux*lz/8.0; - Nx(2,2,g) = -ux*uy/8.0; - - Nx(0,3,g) = -uy*lz/8.0; - Nx(1,3,g) = lx*lz/8.0; - Nx(2,3,g) = -lx*uy/8.0; - - Nx(0,4,g) = -ly*uz/8.0; - Nx(1,4,g) = -lx*uz/8.0; - Nx(2,4,g) = lx*ly/8.0; - - Nx(0,5,g) = ly*uz/8.0; - Nx(1,5,g) = -ux*uz/8.0; - Nx(2,5,g) = ux*ly/8.0; - - Nx(0,6,g) = uy*uz/8.0; - Nx(1,6,g) = ux*uz/8.0; - Nx(2,6,g) = ux*uy/8.0; - - Nx(0,7,g) = -uy*uz/8.0; - Nx(1,7,g) = lx*uz/8.0; - Nx(2,7,g) = lx*uy/8.0; - } - }, - - {ElementType::HEX20, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double lz = 1.0 - xi(2,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double uz = 1.0 + xi(2,g); - - double mx = lx*ux; - double my = ly*uy; - double mz = lz*uz; - - N(0, g) = lx*ly*lz*(lx+ly+lz-5.0)/8.0; - N(1, g) = ux*ly*lz*(ux+ly+lz-5.0)/8.0; - N(2, g) = ux*uy*lz*(ux+uy+lz-5.0)/8.0; - N(3, g) = lx*uy*lz*(lx+uy+lz-5.0)/8.0; - N(4, g) = lx*ly*uz*(lx+ly+uz-5.0)/8.0; - N(5, g) = ux*ly*uz*(ux+ly+uz-5.0)/8.0; - N(6, g) = ux*uy*uz*(ux+uy+uz-5.0)/8.0; - N(7, g) = lx*uy*uz*(lx+uy+uz-5.0)/8.0; - N(8, g) = mx*ly*lz/4.0; - N(9, g) = ux*my*lz/4.0; - N(10, g) = mx*uy*lz/4.0; - N(11, g) = lx*my*lz/4.0; - N(12, g) = mx*ly*uz/4.0; - N(13, g) = ux*my*uz/4.0; - N(14, g) = mx*uy*uz/4.0; - N(15, g) = lx*my*uz/4.0; - N(16, g) = lx*ly*mz/4.0; - N(17, g) = ux*ly*mz/4.0; - N(18, g) = ux*uy*mz/4.0; - N(19, g) = lx*uy*mz/4.0; - - // N(1) = lx*ly*lz*(lx+ly+lz-5.0)/8.0; - int n = 0; - Nx(0,n,g) = -ly*lz*(lx+ly+lz-5.0+lx)/8.0; - Nx(1,n,g) = -lx*lz*(lx+ly+lz-5.0+ly)/8.0; - Nx(2,n,g) = -lx*ly*(lx+ly+lz-5.0+lz)/8.0; - -//c N(n,g) = ux*ly*lz*(ux+ly+lz-5.0)/8.0; - n += 1; - Nx(0,n,g) = ly*lz*(ux+ly+lz-5.0+ux)/8.0; - Nx(1,n,g) = -ux*lz*(ux+ly+lz-5.0+ly)/8.0; - Nx(2,n,g) = -ux*ly*(ux+ly+lz-5.0+lz)/8.0; - -//c N(n,g) = ux*uy*lz*(ux+uy+lz-5.0)/8.0 - n += 1; - Nx(0,n,g) = uy*lz*(ux+uy+lz-5.0+ux)/8.0; - Nx(1,n,g) = ux*lz*(ux+uy+lz-5.0+uy)/8.0; - Nx(2,n,g) = -ux*uy*(ux+uy+lz-5.0+lz)/8.0; - -//c N(n,g) = lx*uy*lz*(lx+uy+lz-5.0)/8.0 - n += 1; - Nx(0,n,g) = -uy*lz*(lx+uy+lz-5.0+lx)/8.0; - Nx(1,n,g) = lx*lz*(lx+uy+lz-5.0+uy)/8.0; - Nx(2,n,g) = -lx*uy*(lx+uy+lz-5.0+lz)/8.0; - -//c N(n,g) = lx*ly*uz*(lx+ly+uz-5.0)/8.0 - n += 1; - Nx(0,n,g) = -ly*uz*(lx+ly+uz-5.0+lx)/8.0; - Nx(1,n,g) = -lx*uz*(lx+ly+uz-5.0+ly)/8.0; - Nx(2,n,g) = lx*ly*(lx+ly+uz-5.0+uz)/8.0; - -//c N(n,g) = ux*ly*uz*(ux+ly+uz-5.0)/8.0 - n += 1; - Nx(0,n,g) = ly*uz*(ux+ly+uz-5.0+ux)/8.0; - Nx(1,n,g) = -ux*uz*(ux+ly+uz-5.0+ly)/8.0; - Nx(2,n,g) = ux*ly*(ux+ly+uz-5.0+uz)/8.0; - -//c N(n,g) = ux*uy*uz*(ux+uy+uz-5.0)/8.0 - n += 1; - Nx(0,n,g) = uy*uz*(ux+uy+uz-5.0+ux)/8.0; - Nx(1,n,g) = ux*uz*(ux+uy+uz-5.0+uy)/8.0; - Nx(2,n,g) = ux*uy*(ux+uy+uz-5.0+uz)/8.0; - -//c N(n,g) = lx*uy*uz*(lx+uy+uz-5.0)/8.0 - n += 1; - Nx(0,n,g) = -uy*uz*(lx+uy+uz-5.0+lx)/8.0; - Nx(1,n,g) = lx*uz*(lx+uy+uz-5.0+uy)/8.0; - Nx(2,n,g) = lx*uy*(lx+uy+uz-5.0+uz)/8.0; - -//c N(n,g) = mx*ly*lz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*ly*lz/4.0; - Nx(1,n,g) = -mx*lz/4.0; - Nx(2,n,g) = -mx*ly/4.0; - -//c N(0n,g) = ux*my*lz/4.0 - n += 1; - Nx(0,n,g) = my*lz/4.0; - Nx(1,n,g) = (ly - uy)*ux*lz/4.0; - Nx(2,n,g) = -ux*my/4.0; - -//c N(0n,g) = mx*uy*lz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*uy*lz/4.0; - Nx(1,n,g) = mx*lz/4.0; - Nx(2,n,g) = -mx*uy/4.0; - -//c N(0n,g) = lx*my*lz/4.0 - n += 1; - Nx(0,n,g) = -my*lz/4.0; - Nx(1,n,g) = (ly - uy)*lx*lz/4.0; - Nx(2,n,g) = -lx*my/4.0; - -//c N(0n,g) = mx*ly*uz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*ly*uz/4.0; - Nx(1,n,g) = -mx*uz/4.0; - Nx(2,n,g) = mx*ly/4.0; - -//c N(0n,g) = ux*my*uz/4.0 - n += 1; - Nx(0,n,g) = my*uz/4.0; - Nx(1,n,g) = (ly - uy)*ux*uz/4.0; - Nx(2,n,g) = ux*my/4.0; - -//c N(0n,g) = mx*uy*uz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*uy*uz/4.0; - Nx(1,n,g) = mx*uz/4.0; - Nx(2,n,g) = mx*uy/4.0; - -//c N(0n,g) = lx*my*uz/4.0 - n += 1; - Nx(0,n,g) = -my*uz/4.0; - Nx(1,n,g) = (ly - uy)*lx*uz/4.0; - Nx(2,n,g) = lx*my/4.0; - -//c N(0n,g) = lx*ly*mz/4.0 - n += 1; - Nx(0,n,g) = -ly*mz/4.0; - Nx(1,n,g) = -lx*mz/4.0; - Nx(2,n,g) = (lz - uz)*lx*ly/4.0; - -//c N(0n,g) = ux*ly*mz/4.0 - n += 1; - Nx(0,n,g) = ly*mz/4.0; - Nx(1,n,g) = -ux*mz/4.0; - Nx(2,n,g) = (lz - uz)*ux*ly/4.0; - -//c N(0n,g) = ux*uy*mz/4.0 - n += 1; - Nx(0,n,g) = uy*mz/4.0; - Nx(1,n,g) = ux*mz/4.0; - Nx(2,n,g) = (lz - uz)*ux*uy/4.0; - -//c N(n,g) = lx*uy*mz/4.0 - n += 1; - Nx(0,n,g) = -uy*mz/4.0; - Nx(1,n,g) = lx*mz/4.0; - Nx(2,n,g) = (lz - uz)*lx*uy/4.0; - } - }, - - {ElementType::HEX27, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double lz = 1.0 - xi(2,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double uz = 1.0 + xi(2,g); - - double mx = xi(0,g); - double my = xi(1,g); - double mz = xi(2,g); - - N(0,g) = -mx*lx*my*ly*mz*lz/8.0; - N(1,g) = mx*ux*my*ly*mz*lz/8.0; - N(2,g) = -mx*ux*my*uy*mz*lz/8.0; - N(3,g) = mx*lx*my*uy*mz*lz/8.0; - N(4,g) = mx*lx*my*ly*mz*uz/8.0; - N(5,g) = -mx*ux*my*ly*mz*uz/8.0; - N(6,g) = mx*ux*my*uy*mz*uz/8.0; - N(7,g) = -mx*lx*my*uy*mz*uz/8.0; - N(8,g) = lx*ux*my*ly*mz*lz/4.0; - N(9,g) = -mx*ux*ly*uy*mz*lz/4.0; - N(10,g) = -lx*ux*my*uy*mz*lz/4.0; - N(11,g) = mx*lx*ly*uy*mz*lz/4.0; - N(12,g) = -lx*ux*my*ly*mz*uz/4.0; - N(13,g) = mx*ux*ly*uy*mz*uz/4.0; - N(14,g) = lx*ux*my*uy*mz*uz/4.0; - N(15,g) = -mx*lx*ly*uy*mz*uz/4.0; - N(16,g) = mx*lx*my*ly*lz*uz/4.0; - N(17,g) = -mx*ux*my*ly*lz*uz/4.0; - N(18,g) = mx*ux*my*uy*lz*uz/4.0; - N(19,g) = -mx*lx*my*uy*lz*uz/4.0; - - N(20,g) = -mx*lx*ly*uy*lz*uz/2.0; - N(21,g) = mx*ux*ly*uy*lz*uz/2.0; - N(22,g) = -lx*ux*my*ly*lz*uz/2.0; - N(23,g) = lx*ux*my*uy*lz*uz/2.0; - N(24,g) = -lx*ux*ly*uy*mz*lz/2.0; - N(25,g) = lx*ux*ly*uy*mz*uz/2.0; - - N(26,g) = lx*ux*ly*uy*lz*uz; - - // N(0) = -mx*lx*my*ly*mz*lz/8.0 - int n = 0; - Nx(0,n,g) = -(lx - mx)*my*ly*mz*lz/8.0; - Nx(1,n,g) = -(ly - my)*mx*lx*mz*lz/8.0; - Nx(2,n,g) = -(lz - mz)*mx*lx*my*ly/8.0; - - // N(n,g) = mx*ux*my*ly*mz*lz/8.0 - n += 1; - Nx(0,n,g) = (mx + ux)*my*ly*mz*lz/8.0; - Nx(1,n,g) = (ly - my)*mx*ux*mz*lz/8.0; - Nx(2,n,g) = (lz - mz)*mx*ux*my*ly/8.0; - - // N(n,g) = -mx*ux*my*uy*mz*lz/8.0 - n += 1; - Nx(0,n,g) = -(mx + ux)*my*uy*mz*lz/8.0; - Nx(1,n,g) = -(my + uy)*mx*ux*mz*lz/8.0; - Nx(2,n,g) = -(lz - mz)*mx*ux*my*uy/8.0; - - // N(n,g) = mx*lx*my*uy*mz*lz/8.0 - n += 1; - Nx(0,n,g) = (lx - mx)*my*uy*mz*lz/8.0; - Nx(1,n,g) = (my + uy)*mx*lx*mz*lz/8.0; - Nx(2,n,g) = (lz - mz)*mx*lx*my*uy/8.0; - - // N(n,g) = mx*lx*my*ly*mz*uz/8.0 - n += 1; - Nx(0,n,g) = (lx - mx)*my*ly*mz*uz/8.0; - Nx(1,n,g) = (ly - my)*mx*lx*mz*uz/8.0; - Nx(2,n,g) = (mz + uz)*mx*lx*my*ly/8.0; - - // N(n,g) = -mx*ux*my*ly*mz*uz/8.0 - n += 1; - Nx(0,n,g) = -(mx + ux)*my*ly*mz*uz/8.0; - Nx(1,n,g) = -(ly - my)*mx*ux*mz*uz/8.0; - Nx(2,n,g) = -(mz + uz)*mx*ux*my*ly/8.0; - - // N(n,g) = mx*ux*my*uy*mz*uz/8.0 - n += 1; - Nx(0,n,g) = (mx + ux)*my*uy*mz*uz/8.0; - Nx(1,n,g) = (my + uy)*mx*ux*mz*uz/8.0; - Nx(2,n,g) = (mz + uz)*mx*ux*my*uy/8.0; - - // N(n,g) = -mx*lx*my*uy*mz*uz/8.0 - n += 1; - Nx(0,n,g) = -(lx - mx)*my*uy*mz*uz/8.0; - Nx(1,n,g) = -(my + uy)*mx*lx*mz*uz/8.0; - Nx(2,n,g) = -(mz + uz)*mx*lx*my*uy/8.0; - - // N(n,g) = lx*ux*my*ly*mz*lz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*my*ly*mz*lz/4.0; - Nx(1,n,g) = (ly - my)*lx*ux*mz*lz/4.0; - Nx(2,n,g) = (lz - mz)*lx*ux*my*ly/4.0; - - // N(n,g) = -mx*ux*ly*uy*mz*lz/4.0 - n += 1; - Nx(0,n,g) = -(mx + ux)*ly*uy*mz*lz/4.0; - Nx(1,n,g) = -(ly - uy)*mx*ux*mz*lz/4.0; - Nx(2,n,g) = -(lz - mz)*mx*ux*ly*uy/4.0; - - // N(n,g) = -lx*ux*my*uy*mz*lz/4.0 - n += 1; - Nx(0,n,g) = -(lx - ux)*my*uy*mz*lz/4.0; - Nx(1,n,g) = -(my + uy)*lx*ux*mz*lz/4.0; - Nx(2,n,g) = -(lz - mz)*lx*ux*my*uy/4.0; - - // N(n,g) = mx*lx*ly*uy*mz*lz/4.0 - n += 1; - Nx(0,n,g) = (lx - mx)*ly*uy*mz*lz/4.0; - Nx(1,n,g) = (ly - uy)*mx*lx*mz*lz/4.0; - Nx(2,n,g) = (lz - mz)*mx*lx*ly*uy/4.0; - - // N(n,g) = -lx*ux*my*ly*mz*uz/4.0 - n += 1; - Nx(0,n,g) = -(lx - ux)*my*ly*mz*uz/4.0; - Nx(1,n,g) = -(ly - my)*lx*ux*mz*uz/4.0; - Nx(2,n,g) = -(mz + uz)*lx*ux*my*ly/4.0; - - // N(n,g) = mx*ux*ly*uy*mz*uz/4.0 - n += 1; - Nx(0,n,g) = (mx + ux)*ly*uy*mz*uz/4.0; - Nx(1,n,g) = (ly - uy)*mx*ux*mz*uz/4.0; - Nx(2,n,g) = (mz + uz)*mx*ux*ly*uy/4.0; - - // N(n,g) = lx*ux*my*uy*mz*uz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*my*uy*mz*uz/4.0; - Nx(1,n,g) = (my + uy)*lx*ux*mz*uz/4.0; - Nx(2,n,g) = (mz + uz)*lx*ux*my*uy/4.0; - - // N(n,g) = -mx*lx*ly*uy*mz*uz/4.0 - n += 1; - Nx(0,n,g) = -(lx - mx)*ly*uy*mz*uz/4.0; - Nx(1,n,g) = -(ly - uy)*mx*lx*mz*uz/4.0; - Nx(2,n,g) = -(mz + uz)*mx*lx*ly*uy/4.0; - - // N(n,g) = mx*lx*my*ly*lz*uz/4.0 - n += 1; - Nx(0,n,g) = (lx - mx)*my*ly*lz*uz/4.0; - Nx(1,n,g) = (ly - my)*mx*lx*lz*uz/4.0; - Nx(2,n,g) = (lz - uz)*mx*lx*my*ly/4.0; - - // N(n,g) = -mx*ux*my*ly*lz*uz/4.0 - n += 1; - Nx(0,n,g) = -(mx + ux)*my*ly*lz*uz/4.0; - Nx(1,n,g) = -(ly - my)*mx*ux*lz*uz/4.0; - Nx(2,n,g) = -(lz - uz)*mx*ux*my*ly/4.0; - - // N(n,g) = mx*ux*my*uy*lz*uz/4.0 - n += 1; - Nx(0,n,g) = (mx + ux)*my*uy*lz*uz/4.0; - Nx(1,n,g) = (my + uy)*mx*ux*lz*uz/4.0; - Nx(2,n,g) = (lz - uz)*mx*ux*my*uy/4.0; - - // N(n,g) = -mx*lx*my*uy*lz*uz/4.0 - n += 1; - Nx(0,n,g) = -(lx - mx)*my*uy*lz*uz/4.0; - Nx(1,n,g) = -(my + uy)*mx*lx*lz*uz/4.0; - Nx(2,n,g) = -(lz - uz)*mx*lx*my*uy/4.0; - - // N(n,g) = -mx*lx*ly*uy*lz*uz/2.0 - n += 1; - Nx(0,n,g) = -(lx - mx)*ly*uy*lz*uz/2.0; - Nx(1,n,g) = -(ly - uy)*mx*lx*lz*uz/2.0; - Nx(2,n,g) = -(lz - uz)*mx*lx*ly*uy/2.0; - - // N(n,g) = mx*ux*ly*uy*lz*uz/2.0 - n += 1; - Nx(0,n,g) = (mx + ux)*ly*uy*lz*uz/2.0; - Nx(1,n,g) = (ly - uy)*mx*ux*lz*uz/2.0; - Nx(2,n,g) = (lz - uz)*mx*ux*ly*uy/2.0; - - // N(n,g) = -lx*ux*my*ly*lz*uz/2.0 - n += 1; - Nx(0,n,g) = -(lx - ux)*my*ly*lz*uz/2.0; - Nx(1,n,g) = -(ly - my)*lx*ux*lz*uz/2.0; - Nx(2,n,g) = -(lz - uz)*lx*ux*my*ly/2.0; - - // N(n,g) = lx*ux*my*uy*lz*uz/2.0 - n += 1; - Nx(0,n,g) = (lx - ux)*my*uy*lz*uz/2.0; - Nx(1,n,g) = (my + uy)*lx*ux*lz*uz/2.0; - Nx(2,n,g) = (lz - uz)*lx*ux*my*uy/2.0; - - // N(n,g) = -lx*ux*ly*uy*mz*lz/2.0 - n += 1; - Nx(0,n,g) = -(lx - ux)*ly*uy*mz*lz/2.0; - Nx(1,n,g) = -(ly - uy)*lx*ux*mz*lz/2.0; - Nx(2,n,g) = -(lz - mz)*lx*ux*ly*uy/2.0; - - // N(n,g) = lx*ux*ly*uy*mz*uz/2.0 - n += 1; - Nx(0,n,g) = (lx - ux)*ly*uy*mz*uz/2.0; - Nx(1,n,g) = (ly - uy)*lx*ux*mz*uz/2.0; - Nx(2,n,g) = (mz + uz)*lx*ux*ly*uy/2.0; - - // N(n,g) = lx*ux*ly*uy*lz*uz - n += 1; - Nx(0,n,g) = (lx - ux)*ly*uy*lz*uz; - Nx(1,n,g) = (ly - uy)*lx*ux*lz*uz; - Nx(2,n,g) = (lz - uz)*lx*ux*ly*uy; - } - }, - - {ElementType::LIN1, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - N(0,g) = (1.0 - xi(0,g))*0.5; - N(1,g) = (1.0 + xi(0,g))*0.5; - - Nx(0,0,g) = -0.5; - Nx(0,1,g) = 0.5; - } - }, - - {ElementType::LIN2, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - N(0,g) = -xi(0,g)*(1.0 - xi(0,g))*0.50; - N(1,g) = xi(0,g)*(1.0 + xi(0,g))*0.50; - N(2,g) = (1.0 - xi(0,g))*(1.0 + xi(0,g)); - - Nx(0,0,g) = -0.50 + xi(0,g); - Nx(0,1,g) = 0.50 + xi(0,g); - Nx(0,2,g) = -2.0*xi(0,g); - } - }, - - {ElementType::QUD4, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - - N(0,g) = lx*ly / 4.0; - N(1,g) = ux*ly / 4.0; - N(2,g) = ux*uy / 4.0; - N(3,g) = lx*uy / 4.0; - - Nx(0,0,g) = -ly / 4.0; - Nx(1,0,g) = -lx / 4.0; - Nx(0,1,g) = ly / 4.0; - Nx(1,1,g) = -ux / 4.0; - Nx(0,2,g) = uy / 4.0; - Nx(1,2,g) = ux / 4.0; - Nx(0,3,g) = -uy / 4.0; - Nx(1,3,g) = lx / 4.0; - } - }, - - {ElementType::QUD9, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double mx = xi(0,g); - double my = xi(1,g); - - N(0,g) = mx*lx*my*ly/4.0; - N(1,g) = -mx*ux*my*ly/4.0; - N(2,g) = mx*ux*my*uy/4.0; - N(3,g) = -mx*lx*my*uy/4.0; - N(4,g) = -lx*ux*my*ly*0.50; - N(5,g) = mx*ux*ly*uy*0.50; - N(6,g) = lx*ux*my*uy*0.50; - N(7,g) = -mx*lx*ly*uy*0.50; - N(8,g) = lx*ux*ly*uy; - - Nx(0,0,g) = (lx - mx)*my*ly/4.0; - Nx(1,0,g) = (ly - my)*mx*lx/4.0; - Nx(0,1,g) = -(ux + mx)*my*ly/4.0; - Nx(1,1,g) = -(ly - my)*mx*ux/4.0; - Nx(0,2,g) = (ux + mx)*my*uy/4.0; - Nx(1,2,g) = (uy + my)*mx*ux/4.0; - Nx(0,3,g) = -(lx - mx)*my*uy/4.0; - Nx(1,3,g) = -(uy + my)*mx*lx/4.0; - Nx(0,4,g) = -(lx - ux)*my*ly*0.50; - Nx(1,4,g) = -(ly - my)*lx*ux*0.50; - Nx(0,5,g) = (ux + mx)*ly*uy*0.50; - Nx(1,5,g) = (ly - uy)*mx*ux*0.50; - Nx(0,6,g) = (lx - ux)*my*uy*0.50; - Nx(1,6,g) = (uy + my)*lx*ux*0.50; - Nx(0,7,g) = -(lx - mx)*ly*uy*0.50; - Nx(1,7,g) = -(ly - uy)*mx*lx*0.50; - Nx(0,8,g) = (lx - ux)*ly*uy; - Nx(1,8,g) = (ly - uy)*lx*ux; - } - }, - - {ElementType::TET4, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - //std::cout << "[get_element_shape_data] TET4 " << std::endl; - - N(0,g) = xi(0,g); - N(1,g) = xi(1,g); - N(2,g) = xi(2,g); - N(3,g) = 1.0 - xi(0,g) - xi(1,g) - xi(2,g); - - Nx(0,0,g) = 1.0; - Nx(1,0,g) = 0.0; - Nx(2,0,g) = 0.0; - Nx(0,1,g) = 0.0; - Nx(1,1,g) = 1.0; - Nx(2,1,g) = 0.0; - Nx(0,2,g) = 0.0; - Nx(1,2,g) = 0.0; - Nx(2,2,g) = 1.0; - Nx(0,3,g) = -1.0; - Nx(1,3,g) = -1.0; - Nx(2,3,g) = -1.0; - } - }, - - {ElementType::TET10, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - double s = 1.0 - xi(0,g) - xi(1,g) - xi(2,g); - N(0,g) = xi(0,g)*(2.0*xi(0,g) - 1.0); - N(1,g) = xi(1,g)*(2.0*xi(1,g) - 1.0); - N(2,g) = xi(2,g)*(2.0*xi(2,g) - 1.0); - N(3,g) = s * (2.0*s - 1.0); - N(4,g) = 4.0*xi(0,g)*xi(1,g); - N(5,g) = 4.0*xi(1,g)*xi(2,g); - N(6,g) = 4.0*xi(0,g)*xi(2,g); - N(7,g) = 4.0*xi(0,g)*s; - N(8,g) = 4.0*xi(1,g)*s; - N(9,g) = 4.0*xi(2,g)*s; - - Nx(0,0,g) = 4.0*xi(0,g) - 1.0; - Nx(1,0,g) = 0.0; - Nx(2,0,g) = 0.0; - - Nx(0,1,g) = 0.0; - Nx(1,1,g) = 4.0*xi(1,g) - 1.0; - Nx(2,1,g) = 0.0; - - Nx(0,2,g) = 0.0; - Nx(1,2,g) = 0.0; - Nx(2,2,g) = 4.0*xi(2,g) - 1.0; - - Nx(0,3,g) = 1.0 - 4.0*s; - Nx(1,3,g) = 1.0 - 4.0*s; - Nx(2,3,g) = 1.0 - 4.0*s; - - Nx(0,4,g) = 4.0*xi(1,g); - Nx(1,4,g) = 4.0*xi(0,g); - Nx(2,4,g) = 0.0; - - Nx(0,5,g) = 0.0; - Nx(1,5,g) = 4.0*xi(2,g); - Nx(2,5,g) = 4.0*xi(1,g); - - Nx(0,6,g) = 4.0*xi(2,g); - Nx(1,6,g) = 0.0; - Nx(2,6,g) = 4.0*xi(0,g); - - Nx(0,7,g) = 4.0*( s - xi(0,g)); - Nx(1,7,g) = -4.0*xi(0,g); - Nx(2,7,g) = -4.0*xi(0,g); - - Nx(0,8,g) = -4.0*xi(1,g); - Nx(1,8,g) = 4.0*( s - xi(1,g)); - Nx(2,8,g) = -4.0*xi(1,g); - - Nx(0,9,g) = -4.0*xi(2,g); - Nx(1,9,g) = -4.0*xi(2,g); - Nx(2,9,g) = 4.0*( s - xi(2,g)); - } - }, - - {ElementType::TRI3, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - //std::cout << "[get_element_shape_data] TRI3 " << std::endl; - N(0,g) = xi(0,g); - N(1,g) = xi(1,g); - N(2,g) = 1.0 - xi(0,g) - xi(1,g); - - Nx(0,0,g) = 1.0; - Nx(1,0,g) = 0.0; - Nx(0,1,g) = 0.0; - Nx(1,1,g) = 1.0; - Nx(0,2,g) = -1.0; - Nx(1,2,g) = -1.0; - } - }, - - {ElementType::TRI6, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - double s = 1.0 - xi(0,g) - xi(1,g); - N(0,g) = xi(0,g) * (2.0*xi(0,g) - 1.0); - N(1,g) = xi(1,g) * (2.0*xi(1,g) - 1.0); - N(2,g) = s * (2.0*s - 1.0); - N(3,g) = 4.0*xi(0,g)*xi(1,g); - N(4,g) = 4.0*xi(1,g)*s; - N(5,g) = 4.0*xi(0,g)*s; - - Nx(0,0,g) = 4.0*xi(0,g) - 1.0; - Nx(1,0,g) = 0.0; - - Nx(0,1,g) = 0.0; - Nx(1,1,g) = 4.0*xi(1,g) - 1.0; - - Nx(0,2,g) = 1.0 - 4.0*s; - Nx(1,2,g) = 1.0 - 4.0*s; - - Nx(0,3,g) = 4.0*xi(1,g); - Nx(1,3,g) = 4.0*xi(0,g); - - Nx(0,4,g) = -4.0*xi(1,g); - Nx(1,4,g) = 4.0*( s - xi(1,g) ); - - Nx(0,5,g) = 4.0*( s - xi(0,g) ); - Nx(1,5,g) = -4.0*xi(0,g); - } - }, - - {ElementType::WDG, [](const int insd, const int eNoN, const int g, Array& xi, Array& N, - Array3& Nx) -> void - { - double ux = xi(0,g); - double uy = xi(1,g); - double uz = 1.0 - ux - uy; - double s = (1.0 + xi(2,g))*0.5; - double t = (1.0 - xi(2,g))*0.5; - N(0,g) = ux*t; - N(1,g) = uy*t; - N(2,g) = uz*t; - N(3,g) = ux*s; - N(4,g) = uy*s; - N(5,g) = uz*s; - - Nx(0,0,g) = t; - Nx(1,0,g) = 0.0; - Nx(2,0,g) = -ux*0.50; - - Nx(0,1,g) = 0.0; - Nx(1,1,g) = t; - Nx(2,1,g) = -uy*0.50; - - Nx(0,2,g) = -t; - Nx(1,2,g) = -t; - Nx(2,2,g) = -uz*0.50; - - Nx(0,3,g) = s; - Nx(1,3,g) = 0.0; - Nx(2,3,g) = ux*0.50; - - Nx(0,4,g) = 0.0; - Nx(1,4,g) = s; - Nx(2,4,g) = uy*0.50; - - Nx(0,5,g) = -s; - Nx(1,5,g) = -s; - Nx(2,5,g) = uz*0.50; - } - }, - - - -}; - - -//------------------------ -// set_element_shape_data -//------------------------ -// Replicates 'SUBROUTINE GETGNN(insd, eType, eNoN, xi, N, Nxi)' defined in NN.f. -// -using SetElementShapeMapType = std::map>; - -SetElementShapeMapType set_element_shape_data = { - - {ElementType::HEX8, [](int g, mshType& mesh) -> void { - auto& xi = mesh.xi; - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double lz = 1.0 - xi(2,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double uz = 1.0 + xi(2,g); - - auto& N = mesh.N; - N(0,g) = lx*ly*lz/8.0; - N(1,g) = ux*ly*lz/8.0; - N(2,g) = ux*uy*lz/8.0; - N(3,g) = lx*uy*lz/8.0; - N(4,g) = lx*ly*uz/8.0; - N(5,g) = ux*ly*uz/8.0; - N(6,g) = ux*uy*uz/8.0; - N(7,g) = lx*uy*uz/8.0; - - auto& Nx = mesh.Nx; - Nx(0,0,g) = -ly*lz/8.0; - Nx(1,0,g) = -lx*lz/8.0; - Nx(2,0,g) = -lx*ly/8.0; - - Nx(0,1,g) = ly*lz/8.0; - Nx(1,1,g) = -ux*lz/8.0; - Nx(2,1,g) = -ux*ly/8.0; - - Nx(0,2,g) = uy*lz/8.0; - Nx(1,2,g) = ux*lz/8.0; - Nx(2,2,g) = -ux*uy/8.0; - - Nx(0,3,g) = -uy*lz/8.0; - Nx(1,3,g) = lx*lz/8.0; - Nx(2,3,g) = -lx*uy/8.0; - - Nx(0,4,g) = -ly*uz/8.0; - Nx(1,4,g) = -lx*uz/8.0; - Nx(2,4,g) = lx*ly/8.0; - - Nx(0,5,g) = ly*uz/8.0; - Nx(1,5,g) = -ux*uz/8.0; - Nx(2,5,g) = ux*ly/8.0; - - Nx(0,6,g) = uy*uz/8.0; - Nx(1,6,g) = ux*uz/8.0; - Nx(2,6,g) = ux*uy/8.0; - - Nx(0,7,g) = -uy*uz/8.0; - Nx(1,7,g) = lx*uz/8.0; - Nx(2,7,g) = lx*uy/8.0; - } - }, - - {ElementType::HEX20, [](int g, mshType& mesh) -> void { - - auto& xi = mesh.xi; - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double lz = 1.0 - xi(2,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double uz = 1.0 + xi(2,g); - - double mx = lx*ux; - double my = ly*uy; - double mz = lz*uz; - - auto& N = mesh.N; - N(0, g) = lx*ly*lz*(lx+ly+lz-5.0)/8.0; - N(1, g) = ux*ly*lz*(ux+ly+lz-5.0)/8.0; - N(2, g) = ux*uy*lz*(ux+uy+lz-5.0)/8.0; - N(3, g) = lx*uy*lz*(lx+uy+lz-5.0)/8.0; - N(4, g) = lx*ly*uz*(lx+ly+uz-5.0)/8.0; - N(5, g) = ux*ly*uz*(ux+ly+uz-5.0)/8.0; - N(6, g) = ux*uy*uz*(ux+uy+uz-5.0)/8.0; - N(7, g) = lx*uy*uz*(lx+uy+uz-5.0)/8.0; - N(8, g) = mx*ly*lz/4.0; - N(9, g) = ux*my*lz/4.0; - N(10, g) = mx*uy*lz/4.0; - N(11, g) = lx*my*lz/4.0; - N(12, g) = mx*ly*uz/4.0; - N(13, g) = ux*my*uz/4.0; - N(14, g) = mx*uy*uz/4.0; - N(15, g) = lx*my*uz/4.0; - N(16, g) = lx*ly*mz/4.0; - N(17, g) = ux*ly*mz/4.0; - N(18, g) = ux*uy*mz/4.0; - N(19, g) = lx*uy*mz/4.0; - - // N(1) = lx*ly*lz*(lx+ly+lz-5.0)/8.0; - auto& Nx = mesh.Nx; - int n = 0; - Nx(0,n,g) = -ly*lz*(lx+ly+lz-5.0+lx)/8.0; - Nx(1,n,g) = -lx*lz*(lx+ly+lz-5.0+ly)/8.0; - Nx(2,n,g) = -lx*ly*(lx+ly+lz-5.0+lz)/8.0; - -//c N(n,g) = ux*ly*lz*(ux+ly+lz-5.0)/8.0; - n += 1; - Nx(0,n,g) = ly*lz*(ux+ly+lz-5.0+ux)/8.0; - Nx(1,n,g) = -ux*lz*(ux+ly+lz-5.0+ly)/8.0; - Nx(2,n,g) = -ux*ly*(ux+ly+lz-5.0+lz)/8.0; - -//c N(n,g) = ux*uy*lz*(ux+uy+lz-5.0)/8.0 - n += 1; - Nx(0,n,g) = uy*lz*(ux+uy+lz-5.0+ux)/8.0; - Nx(1,n,g) = ux*lz*(ux+uy+lz-5.0+uy)/8.0; - Nx(2,n,g) = -ux*uy*(ux+uy+lz-5.0+lz)/8.0; - -//c N(n,g) = lx*uy*lz*(lx+uy+lz-5.0)/8.0 - n += 1; - Nx(0,n,g) = -uy*lz*(lx+uy+lz-5.0+lx)/8.0; - Nx(1,n,g) = lx*lz*(lx+uy+lz-5.0+uy)/8.0; - Nx(2,n,g) = -lx*uy*(lx+uy+lz-5.0+lz)/8.0; - -//c N(n,g) = lx*ly*uz*(lx+ly+uz-5.0)/8.0 - n += 1; - Nx(0,n,g) = -ly*uz*(lx+ly+uz-5.0+lx)/8.0; - Nx(1,n,g) = -lx*uz*(lx+ly+uz-5.0+ly)/8.0; - Nx(2,n,g) = lx*ly*(lx+ly+uz-5.0+uz)/8.0; - -//c N(n,g) = ux*ly*uz*(ux+ly+uz-5.0)/8.0 - n += 1; - Nx(0,n,g) = ly*uz*(ux+ly+uz-5.0+ux)/8.0; - Nx(1,n,g) = -ux*uz*(ux+ly+uz-5.0+ly)/8.0; - Nx(2,n,g) = ux*ly*(ux+ly+uz-5.0+uz)/8.0; - -//c N(n,g) = ux*uy*uz*(ux+uy+uz-5.0)/8.0 - n += 1; - Nx(0,n,g) = uy*uz*(ux+uy+uz-5.0+ux)/8.0; - Nx(1,n,g) = ux*uz*(ux+uy+uz-5.0+uy)/8.0; - Nx(2,n,g) = ux*uy*(ux+uy+uz-5.0+uz)/8.0; - -//c N(n,g) = lx*uy*uz*(lx+uy+uz-5.0)/8.0 - n += 1; - Nx(0,n,g) = -uy*uz*(lx+uy+uz-5.0+lx)/8.0; - Nx(1,n,g) = lx*uz*(lx+uy+uz-5.0+uy)/8.0; - Nx(2,n,g) = lx*uy*(lx+uy+uz-5.0+uz)/8.0; - -//c N(n,g) = mx*ly*lz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*ly*lz/4.0; - Nx(1,n,g) = -mx*lz/4.0; - Nx(2,n,g) = -mx*ly/4.0; - -//c N(0n,g) = ux*my*lz/4.0 - n += 1; - Nx(0,n,g) = my*lz/4.0; - Nx(1,n,g) = (ly - uy)*ux*lz/4.0; - Nx(2,n,g) = -ux*my/4.0; - -//c N(0n,g) = mx*uy*lz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*uy*lz/4.0; - Nx(1,n,g) = mx*lz/4.0; - Nx(2,n,g) = -mx*uy/4.0; - -//c N(0n,g) = lx*my*lz/4.0 - n += 1; - Nx(0,n,g) = -my*lz/4.0; - Nx(1,n,g) = (ly - uy)*lx*lz/4.0; - Nx(2,n,g) = -lx*my/4.0; - -//c N(0n,g) = mx*ly*uz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*ly*uz/4.0; - Nx(1,n,g) = -mx*uz/4.0; - Nx(2,n,g) = mx*ly/4.0; - -//c N(0n,g) = ux*my*uz/4.0 - n += 1; - Nx(0,n,g) = my*uz/4.0; - Nx(1,n,g) = (ly - uy)*ux*uz/4.0; - Nx(2,n,g) = ux*my/4.0; - -//c N(0n,g) = mx*uy*uz/4.0 - n += 1; - Nx(0,n,g) = (lx - ux)*uy*uz/4.0; - Nx(1,n,g) = mx*uz/4.0; - Nx(2,n,g) = mx*uy/4.0; - -//c N(0n,g) = lx*my*uz/4.0 - n += 1; - Nx(0,n,g) = -my*uz/4.0; - Nx(1,n,g) = (ly - uy)*lx*uz/4.0; - Nx(2,n,g) = lx*my/4.0; - -//c N(0n,g) = lx*ly*mz/4.0 - n += 1; - Nx(0,n,g) = -ly*mz/4.0; - Nx(1,n,g) = -lx*mz/4.0; - Nx(2,n,g) = (lz - uz)*lx*ly/4.0; - -//c N(0n,g) = ux*ly*mz/4.0 - n += 1; - Nx(0,n,g) = ly*mz/4.0; - Nx(1,n,g) = -ux*mz/4.0; - Nx(2,n,g) = (lz - uz)*ux*ly/4.0; - -//c N(0n,g) = ux*uy*mz/4.0 - n += 1; - Nx(0,n,g) = uy*mz/4.0; - Nx(1,n,g) = ux*mz/4.0; - Nx(2,n,g) = (lz - uz)*ux*uy/4.0; - -//c N(n,g) = lx*uy*mz/4.0 - n += 1; - Nx(0,n,g) = -uy*mz/4.0; - Nx(1,n,g) = lx*mz/4.0; - Nx(2,n,g) = (lz - uz)*lx*uy/4.0; - } - }, - - {ElementType::HEX27, [](int g, mshType& mesh) -> void { - - auto& xi = mesh.xi; - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double lz = 1.0 - xi(2,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double uz = 1.0 + xi(2,g); - - double mx = xi(0,g); - double my = xi(1,g); - double mz = xi(2,g); - - auto& N = mesh.N; - N(0,g) = -mx*lx*my*ly*mz*lz/8.0; - N(1,g) = mx*ux*my*ly*mz*lz/8.0; - N(2,g) = -mx*ux*my*uy*mz*lz/8.0; - N(3,g) = mx*lx*my*uy*mz*lz/8.0; - N(4,g) = mx*lx*my*ly*mz*uz/8.0; - N(5,g) = -mx*ux*my*ly*mz*uz/8.0; - N(6,g) = mx*ux*my*uy*mz*uz/8.0; - N(7,g) = -mx*lx*my*uy*mz*uz/8.0; - N(8,g) = lx*ux*my*ly*mz*lz/4.0; - N(9,g) = -mx*ux*ly*uy*mz*lz/4.0; - N(10,g) = -lx*ux*my*uy*mz*lz/4.0; - N(11,g) = mx*lx*ly*uy*mz*lz/4.0; - N(12,g) = -lx*ux*my*ly*mz*uz/4.0; - N(13,g) = mx*ux*ly*uy*mz*uz/4.0; - N(14,g) = lx*ux*my*uy*mz*uz/4.0; - N(15,g) = -mx*lx*ly*uy*mz*uz/4.0; - N(16,g) = mx*lx*my*ly*lz*uz/4.0; - N(17,g) = -mx*ux*my*ly*lz*uz/4.0; - N(18,g) = mx*ux*my*uy*lz*uz/4.0; - N(19,g) = -mx*lx*my*uy*lz*uz/4.0; - - N(20,g) = -mx*lx*ly*uy*lz*uz/2.0; - N(21,g) = mx*ux*ly*uy*lz*uz/2.0; - N(22,g) = -lx*ux*my*ly*lz*uz/2.0; - N(23,g) = lx*ux*my*uy*lz*uz/2.0; - N(24,g) = -lx*ux*ly*uy*mz*lz/2.0; - N(25,g) = lx*ux*ly*uy*mz*uz/2.0; - - N(26,g) = lx*ux*ly*uy*lz*uz; - - auto& Nxi = mesh.Nx; - int n = 0; - Nxi(0,n,g) = -(lx - mx)*my*ly*mz*lz/8.0; - Nxi(1,n,g) = -(ly - my)*mx*lx*mz*lz/8.0; - Nxi(2,n,g) = -(lz - mz)*mx*lx*my*ly/8.0; - - n += 1; - Nxi(0,n,g) = (mx + ux)*my*ly*mz*lz/8.0; - Nxi(1,n,g) = (ly - my)*mx*ux*mz*lz/8.0; - Nxi(2,n,g) = (lz - mz)*mx*ux*my*ly/8.0; - - n += 1; - Nxi(0,n,g) = -(mx + ux)*my*uy*mz*lz/8.0; - Nxi(1,n,g) = -(my + uy)*mx*ux*mz*lz/8.0; - Nxi(2,n,g) = -(lz - mz)*mx*ux*my*uy/8.0; - - n += 1; - Nxi(0,n,g) = (lx - mx)*my*uy*mz*lz/8.0; - Nxi(1,n,g) = (my + uy)*mx*lx*mz*lz/8.0; - Nxi(2,n,g) = (lz - mz)*mx*lx*my*uy/8.0; - - n += 1; - Nxi(0,n,g) = (lx - mx)*my*ly*mz*uz/8.0; - Nxi(1,n,g) = (ly - my)*mx*lx*mz*uz/8.0; - Nxi(2,n,g) = (mz + uz)*mx*lx*my*ly/8.0; - - n += 1; - Nxi(0,n,g) = -(mx + ux)*my*ly*mz*uz/8.0; - Nxi(1,n,g) = -(ly - my)*mx*ux*mz*uz/8.0; - Nxi(2,n,g) = -(mz + uz)*mx*ux*my*ly/8.0; - - n += 1; - Nxi(0,n,g) = (mx + ux)*my*uy*mz*uz/8.0; - Nxi(1,n,g) = (my + uy)*mx*ux*mz*uz/8.0; - Nxi(2,n,g) = (mz + uz)*mx*ux*my*uy/8.0; - - n += 1; - Nxi(0,n,g) = -(lx - mx)*my*uy*mz*uz/8.0; - Nxi(1,n,g) = -(my + uy)*mx*lx*mz*uz/8.0; - Nxi(2,n,g) = -(mz + uz)*mx*lx*my*uy/8.0; - - n += 1; - Nxi(0,n,g) = (lx - ux)*my*ly*mz*lz/4.0; - Nxi(1,n,g) = (ly - my)*lx*ux*mz*lz/4.0; - Nxi(2,n,g) = (lz - mz)*lx*ux*my*ly/4.0; - - n += 1; - Nxi(0,n,g) = -(mx + ux)*ly*uy*mz*lz/4.0; - Nxi(1,n,g) = -(ly - uy)*mx*ux*mz*lz/4.0; - Nxi(2,n,g) = -(lz - mz)*mx*ux*ly*uy/4.0; - - n += 1; - Nxi(0,n,g) = -(lx - ux)*my*uy*mz*lz/4.0; - Nxi(1,n,g) = -(my + uy)*lx*ux*mz*lz/4.0; - Nxi(2,n,g) = -(lz - mz)*lx*ux*my*uy/4.0; - - n += 1; - Nxi(0,n,g) = (lx - mx)*ly*uy*mz*lz/4.0; - Nxi(1,n,g) = (ly - uy)*mx*lx*mz*lz/4.0; - Nxi(2,n,g) = (lz - mz)*mx*lx*ly*uy/4.0; - - n += 1; - Nxi(0,n,g) = -(lx - ux)*my*ly*mz*uz/4.0; - Nxi(1,n,g) = -(ly - my)*lx*ux*mz*uz/4.0; - Nxi(2,n,g) = -(mz + uz)*lx*ux*my*ly/4.0; - - n += 1; - Nxi(0,n,g) = (mx + ux)*ly*uy*mz*uz/4.0; - Nxi(1,n,g) = (ly - uy)*mx*ux*mz*uz/4.0; - Nxi(2,n,g) = (mz + uz)*mx*ux*ly*uy/4.0; - - n += 1; - Nxi(0,n,g) = (lx - ux)*my*uy*mz*uz/4.0; - Nxi(1,n,g) = (my + uy)*lx*ux*mz*uz/4.0; - Nxi(2,n,g) = (mz + uz)*lx*ux*my*uy/4.0; - - n += 1; - Nxi(0,n,g) = -(lx - mx)*ly*uy*mz*uz/4.0; - Nxi(1,n,g) = -(ly - uy)*mx*lx*mz*uz/4.0; - Nxi(2,n,g) = -(mz + uz)*mx*lx*ly*uy/4.0; - - n += 1; - Nxi(0,n,g) = (lx - mx)*my*ly*lz*uz/4.0; - Nxi(1,n,g) = (ly - my)*mx*lx*lz*uz/4.0; - Nxi(2,n,g) = (lz - uz)*mx*lx*my*ly/4.0; - - n += 1; - Nxi(0,n,g) = -(mx + ux)*my*ly*lz*uz/4.0; - Nxi(1,n,g) = -(ly - my)*mx*ux*lz*uz/4.0; - Nxi(2,n,g) = -(lz - uz)*mx*ux*my*ly/4.0; - - n += 1; - Nxi(0,n,g) = (mx + ux)*my*uy*lz*uz/4.0; - Nxi(1,n,g) = (my + uy)*mx*ux*lz*uz/4.0; - Nxi(2,n,g) = (lz - uz)*mx*ux*my*uy/4.0; - - n += 1; - Nxi(0,n,g) = -(lx - mx)*my*uy*lz*uz/4.0; - Nxi(1,n,g) = -(my + uy)*mx*lx*lz*uz/4.0; - Nxi(2,n,g) = -(lz - uz)*mx*lx*my*uy/4.0; - - n += 1; - Nxi(0,n,g) = -(lx - mx)*ly*uy*lz*uz/2.0; - Nxi(1,n,g) = -(ly - uy)*mx*lx*lz*uz/2.0; - Nxi(2,n,g) = -(lz - uz)*mx*lx*ly*uy/2.0; - - n += 1; - Nxi(0,n,g) = (mx + ux)*ly*uy*lz*uz/2.0; - Nxi(1,n,g) = (ly - uy)*mx*ux*lz*uz/2.0; - Nxi(2,n,g) = (lz - uz)*mx*ux*ly*uy/2.0; - - n += 1; - Nxi(0,n,g) = -(lx - ux)*my*ly*lz*uz/2.0; - Nxi(1,n,g) = -(ly - my)*lx*ux*lz*uz/2.0; - Nxi(2,n,g) = -(lz - uz)*lx*ux*my*ly/2.0; - - n += 1; - Nxi(0,n,g) = (lx - ux)*my*uy*lz*uz/2.0; - Nxi(1,n,g) = (my + uy)*lx*ux*lz*uz/2.0; - Nxi(2,n,g) = (lz - uz)*lx*ux*my*uy/2.0; - - n += 1; - Nxi(0,n,g) = -(lx - ux)*ly*uy*mz*lz/2.0; - Nxi(1,n,g) = -(ly - uy)*lx*ux*mz*lz/2.0; - Nxi(2,n,g) = -(lz - mz)*lx*ux*ly*uy/2.0; - - n += 1; - Nxi(0,n,g) = (lx - ux)*ly*uy*mz*uz/2.0; - Nxi(1,n,g) = (ly - uy)*lx*ux*mz*uz/2.0; - Nxi(2,n,g) = (mz + uz)*lx*ux*ly*uy/2.0; - - n += 1; - Nxi(0,n,g) = (lx - ux)*ly*uy*lz*uz; - Nxi(1,n,g) = (ly - uy)*lx*ux*lz*uz; - Nxi(2,n,g) = (lz - uz)*lx*ux*ly*uy; - } - }, - - {ElementType::LIN1, [](int g, mshType& mesh) -> void { - //std::cout << "[set_element_shape_data] **************************" << std::endl; - //std::cout << "[set_element_shape_data] ERROR: LIN1 not supported." << std::endl; - //std::cout << "[set_element_shape_data] **************************" << std::endl; - auto& xi = mesh.xi; - auto& N = mesh.N; - N(0,g) = (1.0 - xi(0,g))*0.5; - N(1,g) = (1.0 + xi(0,g))*0.5; - - auto& Nx = mesh.Nx; - Nx(0,0,g) = -0.5; - Nx(0,1,g) = 0.5; - } - }, - - {ElementType::LIN2, [](int g, mshType& mesh) -> void { - auto& xi = mesh.xi; - auto& N = mesh.N; - N(0,g) = -xi(0,g)*(1.0 - xi(0,g))*0.50; - N(1,g) = xi(0,g)*(1.0 + xi(0,g))*0.50; - N(2,g) = (1.0 - xi(0,g))*(1.0 + xi(0,g)); - - auto& Nx = mesh.Nx; - Nx(0,0,g) = -0.50 + xi(0,g); - Nx(0,1,g) = 0.50 + xi(0,g); - Nx(0,2,g) = -2.0*xi(0,g); - } - }, - - {ElementType::QUD4, [](int g, mshType& mesh) -> void { - auto& xi = mesh.xi; - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - - auto& N = mesh.N; - N(0,g) = lx*ly / 4.0; - N(1,g) = ux*ly / 4.0; - N(2,g) = ux*uy / 4.0; - N(3,g) = lx*uy / 4.0; - - auto& Nx = mesh.Nx; - Nx(0,0,g) = -ly / 4.0; - Nx(1,0,g) = -lx / 4.0; - Nx(0,1,g) = ly / 4.0; - Nx(1,1,g) = -ux / 4.0; - Nx(0,2,g) = uy / 4.0; - Nx(1,2,g) = ux / 4.0; - Nx(0,3,g) = -uy / 4.0; - Nx(1,3,g) = lx / 4.0; - } - }, - - {ElementType::QUD9, [](int g, mshType& mesh) -> void { - auto& xi = mesh.xi; - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double mx = xi(0,g); - double my = xi(1,g); - - auto& N = mesh.N; - N(0,g) = mx*lx*my*ly/4.0; - N(1,g) = -mx*ux*my*ly/4.0; - N(2,g) = mx*ux*my*uy/4.0; - N(3,g) = -mx*lx*my*uy/4.0; - N(4,g) = -lx*ux*my*ly*0.50; - N(5,g) = mx*ux*ly*uy*0.50; - N(6,g) = lx*ux*my*uy*0.50; - N(7,g) = -mx*lx*ly*uy*0.50; - N(8,g) = lx*ux*ly*uy; - - auto& Nx = mesh.Nx; - Nx(0,0,g) = (lx - mx)*my*ly/4.0; - Nx(1,0,g) = (ly - my)*mx*lx/4.0; - - Nx(0,1,g) = -(ux + mx)*my*ly/4.0; - Nx(1,1,g) = -(ly - my)*mx*ux/4.0; - - Nx(0,2,g) = (ux + mx)*my*uy/4.0; - Nx(1,2,g) = (uy + my)*mx*ux/4.0; - - Nx(0,3,g) = -(lx - mx)*my*uy/4.0; - Nx(1,3,g) = -(uy + my)*mx*lx/4.0; - - Nx(0,4,g) = -(lx - ux)*my*ly*0.50; - Nx(1,4,g) = -(ly - my)*lx*ux*0.50; - - Nx(0,5,g) = (ux + mx)*ly*uy*0.50; - Nx(1,5,g) = (ly - uy)*mx*ux*0.50; - - Nx(0,6,g) = (lx - ux)*my*uy*0.50; - Nx(1,6,g) = (uy + my)*lx*ux*0.50; - - Nx(0,7,g) = -(lx - mx)*ly*uy*0.50; - Nx(1,7,g) = -(ly - uy)*mx*lx*0.50; - - Nx(0,8,g) = (lx - ux)*ly*uy; - Nx(1,8,g) = (ly - uy)*lx*ux; - } - }, - - {ElementType::TET4, [](int g, mshType& mesh) -> void { - auto& xi = mesh.xi; - auto& N = mesh.N; - N(0,g) = xi(0,g); - N(1,g) = xi(1,g); - N(2,g) = xi(2,g); - N(3,g) = 1.0 - xi(0,g) - xi(1,g) - xi(2,g); - - auto& Nx = mesh.Nx; - Nx(0,0,g) = 1.0; - Nx(1,0,g) = 0.0; - Nx(2,0,g) = 0.0; - Nx(0,1,g) = 0.0; - Nx(1,1,g) = 1.0; - Nx(2,1,g) = 0.0; - Nx(0,2,g) = 0.0; - Nx(1,2,g) = 0.0; - Nx(2,2,g) = 1.0; - Nx(0,3,g) = -1.0; - Nx(1,3,g) = -1.0; - Nx(2,3,g) = -1.0; - } - }, - - {ElementType::TET10, [](int g, mshType& mesh) -> void { - auto& xi = mesh.xi; - auto& N = mesh.N; - double s = 1.0 - xi(0,g) - xi(1,g) - xi(2,g); - N(0,g) = xi(0,g)*(2.0*xi(0,g) - 1.0); - N(1,g) = xi(1,g)*(2.0*xi(1,g) - 1.0); - N(2,g) = xi(2,g)*(2.0*xi(2,g) - 1.0); - N(3,g) = s *(2.0*s - 1.0); - N(4,g) = 4.0*xi(0,g)*xi(1,g); - N(5,g) = 4.0*xi(1,g)*xi(2,g); - N(6,g) = 4.0*xi(0,g)*xi(2,g); - N(7,g) = 4.0*xi(0,g)*s; - N(8,g) = 4.0*xi(1,g)*s; - N(9,g) = 4.0*xi(2,g)*s; - - auto& Nx = mesh.Nx; - Nx(0,0,g) = 4.0*xi(0,g) - 1.0; - Nx(1,0,g) = 0.0; - Nx(2,0,g) = 0.0; - - Nx(0,1,g) = 0.0; - Nx(1,1,g) = 4.0*xi(1,g) - 1.0; - Nx(2,1,g) = 0.0; - - Nx(0,2,g) = 0.0; - Nx(1,2,g) = 0.0; - Nx(2,2,g) = 4.0*xi(2,g) - 1.0; - - Nx(0,3,g) = 1.0 - 4.0*s; - Nx(1,3,g) = 1.0 - 4.0*s; - Nx(2,3,g) = 1.0 - 4.0*s; - - Nx(0,4,g) = 4.0*xi(1,g); - Nx(1,4,g) = 4.0*xi(0,g); - Nx(2,4,g) = 0.0; - - Nx(0,5,g) = 0.0; - Nx(1,5,g) = 4.0*xi(2,g); - Nx(2,5,g) = 4.0*xi(1,g); - - Nx(0,6,g) = 4.0*xi(2,g); - Nx(1,6,g) = 0.0; - Nx(2,6,g) = 4.0*xi(0,g); - - Nx(0,7,g) = 4.0*( s - xi(0,g)); - Nx(1,7,g) = -4.0*xi(0,g); - Nx(2,7,g) = -4.0*xi(0,g); - - Nx(0,8,g) = -4.0*xi(1,g); - Nx(1,8,g) = 4.0*( s - xi(1,g)); - Nx(2,8,g) = -4.0*xi(1,g); - - Nx(0,9,g) = -4.0*xi(2,g); - Nx(1,9,g) = -4.0*xi(2,g); - Nx(2,9,g) = 4.0*( s - xi(2,g)); - } - }, - - {ElementType::TRI3, [](int g, mshType& mesh) -> void { - auto& xi = mesh.xi; - auto& N = mesh.N; - N(0,g) = xi(0,g); - N(1,g) = xi(1,g); - N(2,g) = 1.0 - xi(0,g) - xi(1,g); - - auto& Nxi = mesh.Nx; - Nxi(0,0,g) = 1.0; - Nxi(1,0,g) = 0.0; - Nxi(0,1,g) = 0.0; - Nxi(1,1,g) = 1.0; - Nxi(0,2,g) = -1.0; - Nxi(1,2,g) = -1.0; - } - }, - - {ElementType::TRI6, [](int g, mshType& mesh) -> void { - auto& xi = mesh.xi; - auto& N = mesh.N; - - double s = 1.0 - xi(0,g) - xi(1,g); - N(0,g) = xi(0,g)*( 2.0*xi(0,g) - 1.0 ); - N(1,g) = xi(1,g)*( 2.0*xi(1,g) - 1.0 ); - N(2,g) = s *( 2.0*s - 1.0 ); - N(3,g) = 4.0*xi(0,g)*xi(1,g); - N(4,g) = 4.0*xi(1,g)*s; - N(5,g) = 4.0*xi(0,g)*s; - - auto& Nxi = mesh.Nx; - Nxi(0,0,g) = 4.0*xi(0,g) - 1.0; - Nxi(1,0,g) = 0.0; - Nxi(0,1,g) = 0.0; - Nxi(1,1,g) = 4.0*xi(1,g) - 1.0; - Nxi(0,2,g) = 1.0 - 4.0*s; - Nxi(1,2,g) = 1.0 - 4.0*s; - Nxi(0,3,g) = 4.0*xi(1,g); - Nxi(1,3,g) = 4.0*xi(0,g); - Nxi(0,4,g) = -4.0*xi(1,g); - Nxi(1,4,g) = 4.0*( s - xi(1,g) ); - Nxi(0,5,g) = 4.0*( s - xi(0,g) ); - Nxi(1,5,g) = -4.0*xi(0,g); - } - }, - - {ElementType::WDG, [](int g, mshType& mesh) -> void - { - auto& xi = mesh.xi; - auto& N = mesh.N; - double ux = xi(0,g); - double uy = xi(1,g); - double uz = 1.0 - ux - uy; - double s = (1.0 + xi(2,g))*0.5; - double t = (1.0 - xi(2,g))*0.5; - N(0,g) = ux*t; - N(1,g) = uy*t; - N(2,g) = uz*t; - N(3,g) = ux*s; - N(4,g) = uy*s; - N(5,g) = uz*s; - - auto& Nxi = mesh.Nx; - Nxi(0,0,g) = t; - Nxi(1,0,g) = 0.0; - Nxi(2,0,g) = -ux*0.50; - - Nxi(0,1,g) = 0.0; - Nxi(1,1,g) = t; - Nxi(2,1,g) = -uy*0.50; - - Nxi(0,2,g) = -t; - Nxi(1,2,g) = -t; - Nxi(2,2,g) = -uz*0.50; - - Nxi(0,3,g) = s; - Nxi(1,3,g) = 0.0; - Nxi(2,3,g) = ux*0.50; - - Nxi(0,4,g) = 0.0; - Nxi(1,4,g) = s; - Nxi(2,4,g) = uy*0.50; - - Nxi(0,5,g) = -s; - Nxi(1,5,g) = -s; - Nxi(2,5,g) = uz*0.50; - } - }, - -}; - -//--------------------- -// set_face_shape_data -//--------------------- -// Define a map type used to face element shape function data. -// -// This reproduces 'SUBROUTINE GETGNN(insd, eType, eNoN, xi, N, Nxi)' in NN.f. -// -using SetFaceShapeMapType = std::map>; - -SetFaceShapeMapType set_face_shape_data = { - - {ElementType::PNT, [](int g, faceType& face) -> void - { - face.N(0,g) = 1.0; - } - }, - - {ElementType::QUD8, [](int g, faceType& face) -> void - { - auto& xi = face.xi; - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double mx = lx*ux; - double my = ly*uy; - - auto& N = face.N; - N(0,g) = lx*ly*(lx+ly-3.0)/4.0; - N(1,g) = ux*ly*(ux+ly-3.0)/4.0; - N(2,g) = ux*uy*(ux+uy-3.0)/4.0; - N(3,g) = lx*uy*(lx+uy-3.0)/4.0; - N(4,g) = mx*ly*0.50; - N(5,g) = ux*my*0.50; - N(6,g) = mx*uy*0.50; - N(7,g) = lx*my*0.50; - - auto& Nxi = face.Nx; - Nxi(0,0,g) = -ly*(lx+ly-3.0+lx)/4.0; - Nxi(1,0,g) = -lx*(lx+ly-3.0+ly)/4.0; - - Nxi(0,1,g) = ly*(ux+ly-3.0+ux)/4.0; - Nxi(1,1,g) = -ux*(ux+ly-3.0+ly)/4.0; - - Nxi(0,2,g) = uy*(ux+uy-3.0+ux)/4.0; - Nxi(1,2,g) = ux*(ux+uy-3.0+uy)/4.0; - - Nxi(0,3,g) = -uy*(lx+uy-3.0+lx)/4.0; - Nxi(1,3,g) = lx*(lx+uy-3.0+uy)/4.0; - - Nxi(0,4,g) = (lx - ux)*ly*0.50; - Nxi(1,4,g) = -mx*0.50; - - Nxi(0,5,g) = my*0.50; - Nxi(1,5,g) = (ly - uy)*ux*0.50; - - Nxi(0,6,g) = (lx - ux)*uy*0.50; - Nxi(1,6,g) = mx*0.50; - - Nxi(0,7,g) = -my*0.50; - Nxi(1,7,g) = (ly - uy)*lx*0.50; - } - }, - - {ElementType::QUD9, [](int g, faceType& face) -> void - { - auto& xi = face.xi; - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double mx = xi(0,g); - double my = xi(1,g); - - auto& N = face.N; - N(0,g) = mx*lx*my*ly/4.0; - N(1,g) = -mx*ux*my*ly/4.0; - N(2,g) = mx*ux*my*uy/4.0; - N(3,g) = -mx*lx*my*uy/4.0; - N(4,g) = -lx*ux*my*ly*0.50; - N(5,g) = mx*ux*ly*uy*0.50; - N(6,g) = lx*ux*my*uy*0.50; - N(7,g) = -mx*lx*ly*uy*0.50; - N(8,g) = lx*ux*ly*uy; - - auto& Nx = face.Nx; - Nx(0,0,g) = (lx - mx)*my*ly/4.0; - Nx(1,0,g) = (ly - my)*mx*lx/4.0; - Nx(0,1,g) = -(ux + mx)*my*ly/4.0; - Nx(1,1,g) = -(ly - my)*mx*ux/4.0; - Nx(0,2,g) = (ux + mx)*my*uy/4.0; - Nx(1,2,g) = (uy + my)*mx*ux/4.0; - Nx(0,3,g) = -(lx - mx)*my*uy/4.0; - Nx(1,3,g) = -(uy + my)*mx*lx/4.0; - Nx(0,4,g) = -(lx - ux)*my*ly*0.50; - Nx(1,4,g) = -(ly - my)*lx*ux*0.50; - Nx(0,5,g) = (ux + mx)*ly*uy*0.50; - Nx(1,5,g) = (ly - uy)*mx*ux*0.50; - Nx(0,6,g) = (lx - ux)*my*uy*0.50; - Nx(1,6,g) = (uy + my)*lx*ux*0.50; - Nx(0,7,g) = -(lx - mx)*ly*uy*0.50; - Nx(1,7,g) = -(ly - uy)*mx*lx*0.50; - Nx(0,8,g) = (lx - ux)*ly*uy; - Nx(1,8,g) = (ly - uy)*lx*ux; - } - }, - - {ElementType::LIN1, [](int g, faceType& face) -> void - { - face.N(0,g) = 0.5 * (1.0 - face.xi(0,g)); - face.N(1,g) = 0.5 * (1.0 + face.xi(0,g)); - - face.Nx(0,0,g) = -0.5; - face.Nx(0,1,g) = 0.5; - } - }, - - {ElementType::LIN2, [](int g, faceType& face) -> void - { - auto& xi = face.xi; - auto& N = face.N; - N(0,g) = -xi(0,g)*(1.0 - xi(0,g))*0.50; - N(1,g) = xi(0,g)*(1.0 + xi(0,g))*0.50; - N(2,g) = (1.0 - xi(0,g))*(1.0 + xi(0,g)); - - auto& Nx = face.Nx; - Nx(0,0,g) = -0.50 + xi(0,g); - Nx(0,1,g) = 0.50 + xi(0,g); - Nx(0,2,g) = -2.0*xi(0,g); - } - }, - - {ElementType::QUD4, [](int g, faceType& face) -> void { - auto& xi = face.xi; - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - - auto& N =face.N; - N(0,g) = lx*ly / 4.0; - N(1,g) = ux*ly / 4.0; - N(2,g) = ux*uy / 4.0; - N(3,g) = lx*uy / 4.0; - - auto& Nx = face.Nx; - Nx(0,0,g) = -ly / 4.0; - Nx(1,0,g) = -lx / 4.0; - Nx(0,1,g) = ly / 4.0; - Nx(1,1,g) = -ux / 4.0; - Nx(0,2,g) = uy / 4.0; - Nx(1,2,g) = ux / 4.0; - Nx(0,3,g) = -uy / 4.0; - Nx(1,3,g) = lx / 4.0; - } - }, - - {ElementType::TRI3, [](int g, faceType& face) -> void - { - face.N(0,g) = face.xi(0,g); - face.N(1,g) = face.xi(1,g); - face.N(2,g) = 1.0 - face.xi(0,g) - face.xi(1,g); - - face.Nx(0,0,g) = 1.0; - face.Nx(1,0,g) = 0.0; - - face.Nx(0,1,g) = 0.0; - face.Nx(1,1,g) = 1.0; - - face.Nx(0,2,g) = -1.0; - face.Nx(1,2,g) = -1.0; - } - }, - - {ElementType::TRI6, [](int g, faceType& face) -> void - { - auto& xi = face.xi; - auto& N = face.N; - - double s = 1.0 - xi(0,g) - xi(1,g); - N(0,g) = xi(0,g)*( 2.0*xi(0,g) - 1.0 ); - N(1,g) = xi(1,g)*( 2.0*xi(1,g) - 1.0 ); - N(2,g) = s *( 2.0*s - 1.0 ); - N(3,g) = 4.0*xi(0,g)*xi(1,g); - N(4,g) = 4.0*xi(1,g)*s; - N(5,g) = 4.0*xi(0,g)*s; - - auto& Nxi = face.Nx; - Nxi(0,0,g) = 4.0*xi(0,g) - 1.0; - Nxi(1,0,g) = 0.0; - - Nxi(0,1,g) = 0.0; - Nxi(1,1,g) = 4.0*xi(1,g) - 1.0; - - Nxi(0,2,g) = 1.0 - 4.0*s; - Nxi(1,2,g) = 1.0 - 4.0*s; - - Nxi(0,3,g) = 4.0*xi(1,g); - Nxi(1,3,g) = 4.0*xi(0,g); - - Nxi(0,4,g) = -4.0*xi(1,g); - Nxi(1,4,g) = 4.0*( s - xi(1,g) ); - - Nxi(0,5,g) = 4.0*( s - xi(0,g) ); - Nxi(1,5,g) = -4.0*xi(0,g); - } - }, - - -}; diff --git a/Code/Source/solver/nn_elem_gnnxx.h b/Code/Source/solver/nn_elem_gnnxx.h deleted file mode 100644 index 7b40a783b..000000000 --- a/Code/Source/solver/nn_elem_gnnxx.h +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. -// SPDX-License-Identifier: BSD-3-Clause - -/// @brief Define a map type used to compute 2nd direivatives of element shape function data. -/// -/// Replicates 'SUBROUTINE GETGNNxx(insd, ind2, eType, eNoN, xi, Nxx)' -// -static double fp = 4.0; -static double fn = -4.0; -static double en = -8.0; -static double ze = 0.0; - -using GetElement2ndDerivMapType = std::map&, Array3&)>>; - -GetElement2ndDerivMapType get_element_2nd_derivs = { - - {ElementType::QUD8, [](const int insd, const int ind2, const int eNoN, const int g, const Array& xi, - Array3& Nxx) -> void { - - double lx = 1.0 - xi(0); - double ly = 1.0 - xi(1); - double ux = 1.0 + xi(0); - double uy = 1.0 + xi(1); - double mx = xi(0); - double my = xi(1); - - Nxx(0,0,g) = ly*0.50; - Nxx(1,0,g) = lx*0.50; - Nxx(2,0,g) = (lx+lx+ly+ly-3.0)/4.0; - - Nxx(0,1,g) = ly*0.50; - Nxx(1,1,g) = ux*0.50; - Nxx(2,1,g) = -(ux+ux+ly+ly-3.0)/4.0; - - Nxx(0,2,g) = uy*0.50; - Nxx(1,2,g) = ux*0.50; - Nxx(2,3,g) = (ux+ux+uy+uy-3.0)/4.0; - - Nxx(0,3,g) = uy*0.50; - Nxx(1,3,g) = lx*0.50; - Nxx(2,3,g) = -(lx+lx+uy+uy-3.0)/4.0; - - Nxx(0,4,g) = -ly; - Nxx(1,4,g) = 0.0; - Nxx(2,4,g) = mx; - - Nxx(0,5,g) = 0.0; - Nxx(1,5,g) = -ux; - Nxx(2,5,g) = -my; - - Nxx(0,6,g) = -uy; - Nxx(1,6,g) = 0.0; - Nxx(2,6,g) = -mx; - - Nxx(0,7,g) = 0.0; - Nxx(1,7,g) = -lx; - Nxx(2,7,g) = my; - } - }, - - {ElementType::QUD9, [](const int insd, const int ind2, const int eNoN, const int g, const Array& xi, - Array3& Nxx) -> void { - - double lx = 1.0 - xi(0,g); - double ly = 1.0 - xi(1,g); - double ux = 1.0 + xi(0,g); - double uy = 1.0 + xi(1,g); - double mx = xi(0,g); - double my = xi(1,g); - - Nxx(0,0,g) = -ly*my*0.5; - Nxx(1,0,g) = -lx*mx*0.5; - Nxx(2,0,g) = (lx-mx)*(ly-my)/4.0; - - Nxx(0,1,g) = -ly*my*0.5; - Nxx(1,1,g) = ux*mx*0.5; - Nxx(2,1,g) = -(ux+mx)*(ly-my)/4.0; - - Nxx(0,2,g) = uy*my*0.5; - Nxx(1,2,g) = ux*mx*0.5; - Nxx(2,2,g) = (ux+mx)*(uy+my)/4.0; - - Nxx(0,3,g) = uy*my*0.5; - Nxx(1,3,g) = -lx*mx*0.5; - Nxx(2,3,g) = -(lx-mx)*(uy+my)/4.0; - - Nxx(0,4,g) = ly*my; - Nxx(1,4,g) = lx*ux; - Nxx(2,4,g) = mx*(ly-my); - - Nxx(0,5,g) = ly*uy; - Nxx(1,5,g) = -ux*mx; - Nxx(2,5,g) = -(ux+mx)*my; - - Nxx(0,6,g) = -uy*my; - Nxx(1,6,g) = lx*ux; - Nxx(2,6,g) = -mx*(uy+my); - - Nxx(0,7,g) = ly*uy; - Nxx(1,7,g) = lx*mx; - Nxx(2,7,g) = (lx-mx)*my; - - Nxx(0,8,g) = -ly*uy*2.0; - Nxx(1,8,g) = -lx*ux*2.0; - Nxx(2,8,g) = mx*my*4.0; - } - }, - - {ElementType::TET10, [](const int insd, const int ind2, const int eNoN, const int g, const Array& xi, - Array3& Nxx) -> void { - Nxx.set_row(0, g, {fp, ze, ze, ze, ze, ze}); - Nxx.set_row(1, g, {ze, fp, ze, ze, ze, ze}); - Nxx.set_row(2, g, {ze, ze, fp, ze, ze, ze}); - Nxx.set_row(3, g, {fp, fp, fp, fp, fp, fp}); - Nxx.set_row(4, g, {ze, ze, ze, fp, ze, ze}); - Nxx.set_row(5, g, {ze, ze, ze, ze, fp, ze}); - Nxx.set_row(6, g, {ze, ze, ze, ze, ze, fp}); - Nxx.set_row(7, g, {en, ze, ze, fn, ze, fn}); - Nxx.set_row(8, g, {ze, en, ze, fn, fn, ze}); - Nxx.set_row(9, g, {ze, ze, en, ze, fn, fn}); - } - }, - - {ElementType::TRI6, [](const int insd, const int ind2, const int eNoN, const int g, const Array& xi, - Array3& Nxx) -> void { - - Nxx.set_row(0, g, {fp, ze, ze}); - Nxx.set_row(1, g, {ze, fp, ze}); - Nxx.set_row(2, g, {fp, fp, fp}); - Nxx.set_row(3, g, {ze, ze, fp}); - Nxx.set_row(4, g, {ze, en, fn}); - Nxx.set_row(5, g, {en, ze, fn}); - } - }, - -}; - - From 36046f8a2f0baa006dcd2fa896bd66498c7032b0 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 8 Jun 2026 14:25:44 -0700 Subject: [PATCH 10/22] fixing the licensing and copyright comments --- Code/Source/solver/FE/Basis/BasisExceptions.h | 8 ++--- Code/Source/solver/FE/Basis/BasisFactory.cpp | 8 ++--- Code/Source/solver/FE/Basis/BasisFactory.h | 8 ++--- Code/Source/solver/FE/Basis/BasisFunction.cpp | 8 ++--- Code/Source/solver/FE/Basis/BasisFunction.h | 8 ++--- Code/Source/solver/FE/Basis/BasisTraits.h | 8 ++--- Code/Source/solver/FE/Basis/LagrangeBasis.cpp | 8 ++--- Code/Source/solver/FE/Basis/LagrangeBasis.h | 8 ++--- .../FE/Basis/NodeOrderingConventions.cpp | 8 ++--- .../solver/FE/Basis/NodeOrderingConventions.h | 8 ++--- .../solver/FE/Basis/SerendipityBasis.cpp | 8 ++--- .../Source/solver/FE/Basis/SerendipityBasis.h | 8 ++--- Code/Source/solver/FE/Common/Types.h | 31 ++----------------- .../solver/FE/Math/DenseLinearAlgebra.cpp | 8 ++--- .../solver/FE/Math/DenseLinearAlgebra.h | 8 ++--- .../solver/FE/Math/DenseTransformKernels.h | 8 ++--- Code/Source/solver/FE/Math/Matrix.h | 3 ++ Code/Source/solver/FE/Math/MatrixExpr.h | 3 ++ Code/Source/solver/FE/Math/Vector.h | 3 ++ Code/Source/solver/FE/Math/VectorExpr.h | 3 ++ 20 files changed, 44 insertions(+), 119 deletions(-) diff --git a/Code/Source/solver/FE/Basis/BasisExceptions.h b/Code/Source/solver/FE/Basis/BasisExceptions.h index 8ee92a3dd..c1af17049 100644 --- a/Code/Source/solver/FE/Basis/BasisExceptions.h +++ b/Code/Source/solver/FE/Basis/BasisExceptions.h @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_BASIS_BASISEXCEPTIONS_H #define SVMP_FE_BASIS_BASISEXCEPTIONS_H diff --git a/Code/Source/solver/FE/Basis/BasisFactory.cpp b/Code/Source/solver/FE/Basis/BasisFactory.cpp index 9f0867959..bc01be0ed 100644 --- a/Code/Source/solver/FE/Basis/BasisFactory.cpp +++ b/Code/Source/solver/FE/Basis/BasisFactory.cpp @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #include "BasisFactory.h" diff --git a/Code/Source/solver/FE/Basis/BasisFactory.h b/Code/Source/solver/FE/Basis/BasisFactory.h index c937dd4a0..b188b3aa2 100644 --- a/Code/Source/solver/FE/Basis/BasisFactory.h +++ b/Code/Source/solver/FE/Basis/BasisFactory.h @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_BASIS_BASISFACTORY_H #define SVMP_FE_BASIS_BASISFACTORY_H diff --git a/Code/Source/solver/FE/Basis/BasisFunction.cpp b/Code/Source/solver/FE/Basis/BasisFunction.cpp index 578c46c88..3d95671f4 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.cpp +++ b/Code/Source/solver/FE/Basis/BasisFunction.cpp @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #include "BasisFunction.h" diff --git a/Code/Source/solver/FE/Basis/BasisFunction.h b/Code/Source/solver/FE/Basis/BasisFunction.h index bf6ac5de7..5ad65f35d 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.h +++ b/Code/Source/solver/FE/Basis/BasisFunction.h @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_BASIS_BASISFUNCTION_H #define SVMP_FE_BASIS_BASISFUNCTION_H diff --git a/Code/Source/solver/FE/Basis/BasisTraits.h b/Code/Source/solver/FE/Basis/BasisTraits.h index d97b59f1f..eca5c1c69 100644 --- a/Code/Source/solver/FE/Basis/BasisTraits.h +++ b/Code/Source/solver/FE/Basis/BasisTraits.h @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_BASIS_BASISTRAITS_H #define SVMP_FE_BASIS_BASISTRAITS_H diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp index 372209722..ece2d9cb5 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #include "LagrangeBasis.h" #include "NodeOrderingConventions.h" diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.h b/Code/Source/solver/FE/Basis/LagrangeBasis.h index dae149872..43304a263 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.h +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.h @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_BASIS_LAGRANGEBASIS_H #define SVMP_FE_BASIS_LAGRANGEBASIS_H diff --git a/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp b/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp index ae3ea8ed3..76662abe1 100644 --- a/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp +++ b/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #include "NodeOrderingConventions.h" #include "BasisExceptions.h" diff --git a/Code/Source/solver/FE/Basis/NodeOrderingConventions.h b/Code/Source/solver/FE/Basis/NodeOrderingConventions.h index 8a43cc4e3..4b11cca32 100644 --- a/Code/Source/solver/FE/Basis/NodeOrderingConventions.h +++ b/Code/Source/solver/FE/Basis/NodeOrderingConventions.h @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_BASIS_NODEORDERINGCONVENTIONS_H #define SVMP_FE_BASIS_NODEORDERINGCONVENTIONS_H diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp index 237f8c2ce..d551419a8 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #include "SerendipityBasis.h" #include "LagrangeBasis.h" diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.h b/Code/Source/solver/FE/Basis/SerendipityBasis.h index 10e426164..e0289f82d 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.h +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.h @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_BASIS_SERENDIPITYBASIS_H #define SVMP_FE_BASIS_SERENDIPITYBASIS_H diff --git a/Code/Source/solver/FE/Common/Types.h b/Code/Source/solver/FE/Common/Types.h index bb3f23bca..e3d5a46e9 100644 --- a/Code/Source/solver/FE/Common/Types.h +++ b/Code/Source/solver/FE/Common/Types.h @@ -1,32 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See Copyright-SimVascular.txt for additional details. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_TYPES_H #define SVMP_FE_TYPES_H diff --git a/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp b/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp index 7d909fa0c..8be9a7560 100644 --- a/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp +++ b/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #include "DenseLinearAlgebra.h" diff --git a/Code/Source/solver/FE/Math/DenseLinearAlgebra.h b/Code/Source/solver/FE/Math/DenseLinearAlgebra.h index 7684439b5..6c81755f4 100644 --- a/Code/Source/solver/FE/Math/DenseLinearAlgebra.h +++ b/Code/Source/solver/FE/Math/DenseLinearAlgebra.h @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_MATH_DENSELINEARALGEBRA_H #define SVMP_FE_MATH_DENSELINEARALGEBRA_H diff --git a/Code/Source/solver/FE/Math/DenseTransformKernels.h b/Code/Source/solver/FE/Math/DenseTransformKernels.h index 8bf83ec0b..50f1002de 100644 --- a/Code/Source/solver/FE/Math/DenseTransformKernels.h +++ b/Code/Source/solver/FE/Math/DenseTransformKernels.h @@ -1,9 +1,5 @@ -/* Copyright (c) Stanford University, The Regents of the University of California, and others. - * - * All Rights Reserved. - * - * See License file. - */ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause #ifndef SVMP_FE_MATH_DENSETRANSFORMKERNELS_H #define SVMP_FE_MATH_DENSETRANSFORMKERNELS_H diff --git a/Code/Source/solver/FE/Math/Matrix.h b/Code/Source/solver/FE/Math/Matrix.h index 8cb28e5d5..3f3a9d9b6 100644 --- a/Code/Source/solver/FE/Math/Matrix.h +++ b/Code/Source/solver/FE/Math/Matrix.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause + #ifndef SVMP_FE_MATH_MATRIX_H #define SVMP_FE_MATH_MATRIX_H diff --git a/Code/Source/solver/FE/Math/MatrixExpr.h b/Code/Source/solver/FE/Math/MatrixExpr.h index 13010bddf..288bbc5ca 100644 --- a/Code/Source/solver/FE/Math/MatrixExpr.h +++ b/Code/Source/solver/FE/Math/MatrixExpr.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause + #ifndef SVMP_FE_MATH_MATRIX_EXPR_H #define SVMP_FE_MATH_MATRIX_EXPR_H diff --git a/Code/Source/solver/FE/Math/Vector.h b/Code/Source/solver/FE/Math/Vector.h index 777f9945b..a1214f9aa 100644 --- a/Code/Source/solver/FE/Math/Vector.h +++ b/Code/Source/solver/FE/Math/Vector.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause + #ifndef SVMP_FE_MATH_VECTOR_H #define SVMP_FE_MATH_VECTOR_H diff --git a/Code/Source/solver/FE/Math/VectorExpr.h b/Code/Source/solver/FE/Math/VectorExpr.h index 178b66b8a..aa712dd63 100644 --- a/Code/Source/solver/FE/Math/VectorExpr.h +++ b/Code/Source/solver/FE/Math/VectorExpr.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause + #ifndef SVMP_FE_MATH_VECTOR_EXPR_H #define SVMP_FE_MATH_VECTOR_EXPR_H From 3691503eed8da410083633ed831ec7c350bd433f Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Tue, 9 Jun 2026 10:00:33 -0700 Subject: [PATCH 11/22] including doxygen documentation for Basis and Math submodules --- .github/workflows/documentation.yml | 2 +- Code/Source/solver/FE/Basis/BasisFunction.h | 86 ++++++++++ Code/Source/solver/FE/Basis/LagrangeBasis.cpp | 19 ++- Code/Source/solver/FE/Basis/LagrangeBasis.h | 158 ++++++++++++++++++ .../Source/solver/FE/Basis/SerendipityBasis.h | 116 +++++++++++++ Code/Source/solver/FE/Math/Matrix.h | 27 ++- Code/Source/solver/FE/Math/Vector.h | 14 ++ Documentation/Doxyfile | 8 +- .../FE/Basis/test_BasisErrorPaths.cpp | 2 +- 9 files changed, 423 insertions(+), 9 deletions(-) diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index db67bbbdb..c1f8a3b5d 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -10,7 +10,7 @@ jobs: - uses: actions/checkout@v4 - name: Build doxygen documentation run: | - sudo apt install -y doxygen + sudo apt install -y doxygen graphviz doxygen Documentation/Doxyfile - name: Save documentation uses: actions/upload-artifact@v4 diff --git a/Code/Source/solver/FE/Basis/BasisFunction.h b/Code/Source/solver/FE/Basis/BasisFunction.h index 5ad65f35d..f8f78d7b6 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.h +++ b/Code/Source/solver/FE/Basis/BasisFunction.h @@ -12,11 +12,25 @@ #include #include +/// \defgroup FE FE Library +/// \brief Finite-element interfaces and utilities used by the solver. +/// +/// The FE library groups basis functions, math utilities, assembly interfaces, +/// and related support code that can be built and consumed as a coherent +/// finite-element component. + +/// \defgroup FE_Basis Basis +/// \ingroup FE +/// \brief Basis-function interfaces, concrete basis families, and reference-node conventions. + namespace svmp { namespace FE { namespace basis { +/// \brief Gradient vector type used by basis evaluators. using Gradient = math::Vector; + +/// \brief Hessian matrix type used by basis evaluators. using Hessian = math::Matrix; [[nodiscard]] inline Hessian make_symmetric_hessian(Real xx, @@ -71,38 +85,110 @@ inline void add_scaled_hessian(Hessian& target, } } +/// \brief Abstract interface for finite-element basis-function families. +/// \ingroup FE_Basis +/// +/// BasisFunction defines the common query and evaluation API used by solver +/// code that does not need to know the concrete basis implementation. Derived +/// classes provide values at minimum and can override analytical gradients, +/// Hessians, combined evaluation, and flat-buffer output paths. class BasisFunction { public: + /// \brief Destroy a basis function through the abstract interface. virtual ~BasisFunction() = default; + /// \brief Return the concrete basis family. + /// \return Basis family identifier. virtual BasisType basis_type() const noexcept = 0; + + /// \brief Return the canonical element type represented by this basis. + /// \return Element type used for node layout and evaluation. virtual ElementType element_type() const noexcept = 0; + + /// \brief Return the reference-space dimension of the basis. + /// \return Reference dimension, from zero for points through three for volume elements. virtual int dimension() const noexcept = 0; + + /// \brief Return the polynomial order represented by this basis. + /// \return Effective polynomial order after any element-family normalization. virtual int order() const noexcept = 0; + + /// \brief Return the number of basis functions and reference nodes. + /// \return Basis function count. virtual std::size_t size() const noexcept = 0; + /// \brief Evaluate basis function values at a reference coordinate. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param values Receives one value per basis function. virtual void evaluate_values(const math::Vector& xi, std::vector& values) const = 0; + + /// \brief Evaluate basis gradients at a reference coordinate. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param gradients Receives one three-component gradient per basis function. + /// \throws BasisEvaluationException If gradients are not available for the basis. virtual void evaluate_gradients(const math::Vector& xi, std::vector& gradients) const; + + /// \brief Evaluate basis Hessians at a reference coordinate. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param hessians Receives one 3-by-3 Hessian per basis function. + /// \throws BasisEvaluationException If Hessians are not available for the basis. virtual void evaluate_hessians(const math::Vector& xi, std::vector& hessians) const; + + /// \brief Evaluate values, gradients, and Hessians together. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param values Receives one value per basis function. + /// \param gradients Receives one three-component gradient per basis function. + /// \param hessians Receives one 3-by-3 Hessian per basis function. virtual void evaluate_all(const math::Vector& xi, std::vector& values, std::vector& gradients, std::vector& hessians) const; + /// \brief Evaluate basis values into a flat caller-provided buffer. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param values_out Output buffer with at least size() entries. virtual void evaluate_values_to(const math::Vector& xi, Real* SVMP_RESTRICT values_out) const; + + /// \brief Evaluate basis gradients into a flat caller-provided buffer. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param gradients_out Output buffer with node-major layout: node * 3 + component. virtual void evaluate_gradients_to(const math::Vector& xi, Real* SVMP_RESTRICT gradients_out) const; + + /// \brief Evaluate basis Hessians into a flat caller-provided buffer. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param hessians_out Output buffer with node-major row-major layout: node * 9 + row * 3 + col. virtual void evaluate_hessians_to(const math::Vector& xi, Real* SVMP_RESTRICT hessians_out) const; protected: + /// \brief Approximate gradients by centered finite differences of values. + /// + /// \details This helper exists as a development and fallback utility for + /// basis implementations that do not yet provide analytical gradients. It + /// is useful for prototyping new basis families and for checking analytical + /// derivative formulas in tests. Production element assembly should prefer + /// analytical gradients when available because finite differences introduce + /// truncation/roundoff sensitivity and require multiple value evaluations + /// per reference coordinate. void numerical_gradient(const math::Vector& xi, std::vector& gradients, Real eps = Real(1e-6)) const; + + /// \brief Approximate Hessians by centered finite differences of gradients. + /// + /// \details This helper exists for the same reason as numerical_gradient: + /// it provides a simple reference implementation for prototyping and + /// derivative verification when analytical second derivatives are not yet + /// implemented. It depends on evaluate_gradients(), so it is only available + /// for basis implementations that can already provide gradients. Analytical + /// Hessians should be used in performance-sensitive solver paths because + /// finite-difference Hessians amplify numerical error and require repeated + /// gradient evaluations. void numerical_hessian(const math::Vector& xi, std::vector& hessians, Real eps = Real(1e-5)) const; diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp index ece2d9cb5..d777447cb 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp @@ -16,6 +16,7 @@ namespace { using Vec3 = math::Vector; +// Return the equispaced 1D reference coordinate in [-1, 1]. inline constexpr Real equispaced_pm_one_coord(int i, int order) { if (order <= 0) { return Real(0); @@ -40,6 +41,7 @@ struct NormalizedLagrangeRequest { int order; }; +// Validate and return the supported basis topology for a Lagrange element type. BasisTopology supported_lagrange_topology(ElementType type) { const BasisTopology top = topology(type); if (top == BasisTopology::Unknown) { @@ -49,6 +51,7 @@ BasisTopology supported_lagrange_topology(ElementType type) { return top; } +// Normalize named higher-order element requests to base Lagrange topologies. NormalizedLagrangeRequest normalize_lagrange_request(ElementType element_type, int order) { switch (element_type) { case ElementType::Line3: @@ -79,13 +82,14 @@ NormalizedLagrangeRequest normalize_lagrange_request(ElementType element_type, i case ElementType::Pyramid13: case ElementType::Pyramid14: throw BasisElementCompatibilityException( - "LagrangeBasis: pyramid support has been removed from the current solver basis scope", + "LagrangeBasis: pyramid support is not within the current solver basis scope", __FILE__, __LINE__, __func__); default: return {element_type, order}; } } +// Convert a coordinate on [-1, 1] to an equispaced axis node index. std::size_t axis_index_pm_one(Real coord, int order) { if (order <= 0) { return 0u; @@ -94,6 +98,7 @@ std::size_t axis_index_pm_one(Real coord, int order) { return static_cast(std::llround(scaled)); } +// Convert a simplex barycentric coordinate to a lattice index. int simplex_lattice_index(Real value, int order) { if (order <= 0) { return 0; @@ -101,6 +106,7 @@ int simplex_lattice_index(Real value, int order) { return static_cast(std::llround(value * Real(order))); } +// Compute simplex interpolation exponents from a reference node. LagrangeBasis::SimplexExponent simplex_exponent_from_point(const Vec3& p, BasisTopology top, int order) { @@ -121,6 +127,7 @@ LagrangeBasis::SimplexExponent simplex_exponent_from_point(const Vec3& p, return e; } +// Evaluate 1D Lagrange polynomials and derivatives at a point. void evaluate_1d_lagrange(Real x, const std::vector& nodes, AxisEval& out) { const std::size_t n = nodes.size(); out.value.assign(n, Real(0)); @@ -185,6 +192,7 @@ void evaluate_1d_lagrange(Real x, const std::vector& nodes, AxisEval& out) } } +// Evaluate one barycentric polynomial factor and derivatives. std::array simplex_factor(int alpha, Real lambda, int order) { Real value = Real(1); Real first = Real(0); @@ -204,6 +212,7 @@ std::array simplex_factor(int alpha, Real lambda, int order) { return {value, first, second}; } +// Evaluate simplex Lagrange basis functions and derivatives. void evaluate_simplex(const Vec3& xi, BasisTopology top, int order, @@ -291,6 +300,7 @@ void evaluate_simplex(const Vec3& xi, } } +// Store a gradient in the flat buffer layout used by fast evaluators. void store_gradient(const Gradient& gradient, Real* dst) { dst[0] = gradient[0]; dst[1] = gradient[1]; @@ -314,6 +324,7 @@ LagrangeBasis::LagrangeBasis(ElementType type, int order) init_nodes(); } +// Initialize equispaced 1D interpolation nodes for tensor-product axes. void LagrangeBasis::init_equispaced_1d_nodes() { nodes_1d_.resize(static_cast(order_ + 1)); for (int i = 0; i <= order_; ++i) { @@ -322,6 +333,7 @@ void LagrangeBasis::init_equispaced_1d_nodes() { } } +// Initialize reference nodes and topology-specific lookup data. void LagrangeBasis::init_nodes() { nodes_.clear(); nodes_1d_.clear(); @@ -357,10 +369,12 @@ void LagrangeBasis::init_nodes() { __FILE__, __LINE__, __func__); } +// Build the single reference node for a point basis. void LagrangeBasis::build_point_nodes() { nodes_.push_back(Vec3{Real(0), Real(0), Real(0)}); } +// Build nodes and axis indices for tensor-product elements. void LagrangeBasis::build_tensor_product_nodes(int dimensions) { init_equispaced_1d_nodes(); nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); @@ -378,6 +392,7 @@ void LagrangeBasis::build_tensor_product_nodes(int dimensions) { } } +// Build nodes and barycentric exponents for simplex elements. void LagrangeBasis::build_simplex_nodes() { nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); simplex_exponents_.reserve(nodes_.size()); @@ -386,6 +401,7 @@ void LagrangeBasis::build_simplex_nodes() { } } +// Build nodes and mixed triangle-axis lookup data for wedge elements. void LagrangeBasis::build_wedge_nodes() { init_equispaced_1d_nodes(); nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); @@ -412,6 +428,7 @@ void LagrangeBasis::build_wedge_nodes() { } } +// Evaluate requested basis quantities into caller-provided flat buffers. void LagrangeBasis::evaluate_all_to(const Vec3& xi, Real* SVMP_RESTRICT values_out, Real* SVMP_RESTRICT gradients_out, diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.h b/Code/Source/solver/FE/Basis/LagrangeBasis.h index 43304a263..3bb1a5e74 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.h +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.h @@ -14,37 +14,193 @@ namespace svmp { namespace FE { namespace basis { +/// \defgroup FE_LagrangeBasis LagrangeBasis +/// \ingroup FE_Basis +/// \brief Construction and evaluation API for nodal Lagrange finite-element bases. +/// +/// \details This group documents the complete nodal Lagrange basis evaluator +/// used by the FE library. The implementation covers tensor-product, +/// simplex, and wedge reference topologies with exact analytical first and +/// second derivatives in reference coordinates. +/// @{ + +/// \brief Nodal Lagrange basis on supported reference finite elements. +/// +/// \details LagrangeBasis represents the nodal interpolation basis associated +/// with an equispaced reference-node lattice. It supports point, line, +/// quadrilateral, hexahedron, triangle, tetrahedron, and wedge reference +/// elements. Named complete quadratic elements such as Line3, Triangle6, +/// Quad9, Tetra10, Hex27, and Wedge18 are normalized to their canonical +/// linear topology plus effective order 2. +/// +/// Tensor-product elements use the one-dimensional nodal polynomials +/// \f[ +/// l_i(x) = \prod_{j \ne i} \frac{x - x_j}{x_i - x_j} +/// \f] +/// on equispaced coordinates in \f$[-1, 1]\f$. Multi-dimensional basis +/// functions are products of the active axis polynomials, for example +/// \f$N_{ijk}(r,s,t) = l_i(r)l_j(s)l_k(t)\f$ on a hexahedron. +/// +/// Simplex elements use barycentric coordinates and integer lattice +/// exponents. For a node with exponent tuple \f$\alpha\f$, where +/// \f$\sum_a \alpha_a = p\f$, the basis is assembled from scaled +/// falling-factorial factors, +/// \f[ +/// N_\alpha(\lambda) = +/// \prod_a \prod_{m=0}^{\alpha_a-1} +/// \frac{p\lambda_a - m}{m + 1}. +/// \f] +/// Gradients and Hessians are evaluated analytically by differentiating these +/// factors and applying the barycentric-coordinate chain rule. +/// +/// Wedge elements are treated as a tensor product between a triangle simplex +/// basis and a one-dimensional through-axis basis: +/// \f$N_{a k}(r,s,t) = T_a(r,s)l_k(t)\f$. +/// +/// The vector-returning evaluators are convenient API wrappers. The `*_to` +/// methods write to caller-provided flat buffers and are intended for assembly +/// paths that avoid temporary allocations. class LagrangeBasis : public BasisFunction { public: + /// \brief Axis-index tuple for tensor-product reference nodes. using TensorNodeIndex = std::array; + + /// \brief Barycentric exponent tuple for simplex reference nodes. using SimplexExponent = std::array; + + /// \brief Triangle-node and axis-node tuple for wedge reference nodes. using WedgeNodeIndex = std::array; + /// \brief Construct a Lagrange basis for an element type and polynomial order. + /// + /// \details The constructor normalizes complete higher-order aliases to the + /// canonical topology and effective polynomial order, builds the reference + /// node coordinates, and precomputes topology-specific lookup data used by + /// evaluation. Tensor-product bases store per-axis node indices, simplex + /// bases store barycentric exponent tuples, and wedge bases store the + /// triangle-node/axis-node decomposition. + /// + /// \param type Element type used to determine topology and reference-node layout. + /// \param order Requested polynomial order. + /// \throws BasisConfigurationException If the effective order is negative. + /// \throws BasisElementCompatibilityException If the element type is unsupported. LagrangeBasis(ElementType type, int order); + /// \copydoc BasisFunction::basis_type() BasisType basis_type() const noexcept override { return BasisType::Lagrange; } + + /// \copydoc BasisFunction::element_type() ElementType element_type() const noexcept override { return element_type_; } + + /// \copydoc BasisFunction::dimension() int dimension() const noexcept override { return dimension_; } + + /// \copydoc BasisFunction::order() int order() const noexcept override { return order_; } + + /// \copydoc BasisFunction::size() std::size_t size() const noexcept override { return nodes_.size(); } + /// \brief Return the reference interpolation nodes in basis ordering. + /// + /// \details The returned node order matches the basis-function order used + /// by all evaluators. Coordinates are reference-element coordinates: + /// tensor-product axes use \f$[-1,1]\f$, triangles and tetrahedra use the + /// repository's simplex reference coordinates, and wedges combine triangle + /// reference coordinates with a \f$[-1,1]\f$ through-axis coordinate. + /// + /// \return Reference node coordinates, one per basis function. const std::vector>& nodes() const noexcept { return nodes_; } + /// \brief Evaluate Lagrange basis function values at a reference coordinate. + /// + /// \details Values satisfy the nodal interpolation property + /// \f$N_i(x_j)=\delta_{ij}\f$ at the basis nodes. Tensor-product values are + /// products of one-dimensional Lagrange polynomials. Simplex values are + /// products of barycentric falling-factorial factors. Wedge values are + /// products of triangle simplex values and through-axis Lagrange values. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param values Receives one value per basis function. void evaluate_values(const math::Vector& xi, std::vector& values) const final; + + /// \brief Evaluate analytical Lagrange basis gradients at a reference coordinate. + /// + /// \details Gradients are derivatives with respect to reference + /// coordinates, not physical coordinates. Tensor-product gradients apply + /// the product rule to the active axis polynomials. Simplex gradients + /// differentiate the barycentric factors and multiply by the constant + /// gradients of the barycentric coordinates. Wedge gradients combine the + /// triangle gradient in the first two components with the through-axis + /// derivative in the third component. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param gradients Receives one three-component gradient per basis function. void evaluate_gradients(const math::Vector& xi, std::vector& gradients) const final; + + /// \brief Evaluate analytical Lagrange basis Hessians at a reference coordinate. + /// + /// \details Hessians are second derivatives in reference coordinates and + /// are stored as 3-by-3 matrices. Tensor-product Hessians contain pure + /// second axis derivatives on the diagonal and mixed product-rule terms + /// off diagonal. Simplex Hessians are assembled from first and second + /// derivatives of the barycentric factors. Wedge Hessians contain triangle + /// Hessian terms, through-axis second derivatives, and mixed + /// triangle/through-axis derivative products. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param hessians Receives one 3-by-3 Hessian per basis function. void evaluate_hessians(const math::Vector& xi, std::vector& hessians) const final; + + /// \brief Evaluate Lagrange values, gradients, and Hessians together. + /// + /// \details This is the allocation-friendly vector API for callers that + /// need all basis quantities at the same quadrature point. The underlying + /// evaluator computes only topology-local polynomial data once and then + /// fills all requested outputs. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param values Receives one value per basis function. + /// \param gradients Receives one three-component gradient per basis function. + /// \param hessians Receives one 3-by-3 Hessian per basis function. void evaluate_all(const math::Vector& xi, std::vector& values, std::vector& gradients, std::vector& hessians) const final; + /// \brief Evaluate Lagrange basis values into a flat caller-provided buffer. + /// + /// \details This is the low-allocation API intended for element assembly + /// loops. The buffer is filled in basis-node order and no vector resizing + /// is performed. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param values_out Output buffer with at least size() entries. void evaluate_values_to(const math::Vector& xi, Real* SVMP_RESTRICT values_out) const final; + + /// \brief Evaluate Lagrange basis gradients into a flat caller-provided buffer. + /// + /// \details Gradients are written in node-major order with three + /// reference-coordinate components per node. For node \f$i\f$ and component + /// \f$c\f$, the entry is `gradients_out[i * 3 + c]`. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param gradients_out Output buffer with node-major layout: node * 3 + component. void evaluate_gradients_to(const math::Vector& xi, Real* SVMP_RESTRICT gradients_out) const final; + + /// \brief Evaluate Lagrange basis Hessians into a flat caller-provided buffer. + /// + /// \details Hessians are written in node-major row-major order. For node + /// \f$i\f$ and Hessian component \f$(r,c)\f$, the entry is + /// `hessians_out[i * 9 + r * 3 + c]`. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param hessians_out Output buffer with node-major row-major layout: node * 9 + row * 3 + col. void evaluate_hessians_to(const math::Vector& xi, Real* SVMP_RESTRICT hessians_out) const final; @@ -73,6 +229,8 @@ class LagrangeBasis : public BasisFunction { Real* SVMP_RESTRICT hessians_out) const; }; +/// @} + } // namespace basis } // namespace FE } // namespace svmp diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.h b/Code/Source/solver/FE/Basis/SerendipityBasis.h index e0289f82d..fc0b897cf 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.h +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.h @@ -17,23 +17,137 @@ namespace svmp { namespace FE { namespace basis { +/// \defgroup FE_SerendipityBasis SerendipityBasis +/// \ingroup FE_Basis +/// \brief Construction and evaluation API for reduced serendipity finite-element bases. +/// +/// \details This group documents reduced degree-of-freedom basis families that +/// preserve nodal interpolation on supported element boundaries while omitting +/// selected interior tensor-product modes. These bases are used for standard +/// serendipity elements and geometry-mode mappings that intentionally use a +/// lower-order interpolation space. +/// @{ + +/// \brief Reduced-degree-of-freedom serendipity basis on supported reference elements. +/// +/// \details SerendipityBasis implements nodal bases for Quad4/Quad8, +/// Hex8/Hex20, and Wedge15. Compared with a complete tensor-product Lagrange +/// basis of the same nominal order, a serendipity basis removes selected +/// interior modes while retaining nodal interpolation on the supported node +/// layout. +/// +/// Quadrilateral serendipity bases are built from monomials +/// \f$x^{a_x}y^{a_y}\f$ whose superlinear degree is at most the requested +/// order. In this implementation the superlinear degree is +/// \f[ +/// sldeg(x^{a_x}y^{a_y}) = +/// \begin{cases} a_x, & a_x > 1 \\ 0, & a_x \le 1 \end{cases} +/// + +/// \begin{cases} a_y, & a_y > 1 \\ 0, & a_y \le 1 \end{cases}. +/// \f] +/// The nodal basis is recovered by inverting the Vandermonde interpolation +/// matrix at the selected reference nodes. Values, gradients, and Hessians are +/// then evaluated by differentiating the monomial vector and applying the +/// inverse Vandermonde coefficients. +/// +/// Hex8 uses the standard trilinear corner basis +/// \f$(1 \pm r)(1 \pm s)(1 \pm t)/8\f$. Hex20 and Wedge15 use tabulated +/// polynomial coefficient tables over monomial bases; analytical gradients and +/// Hessians are obtained by differentiating those monomials. Hex20 evaluation +/// is reordered through ReferenceNodeLayout so the output matches the public +/// basis ordering. +/// +/// When `geometry_mode` is enabled for Hex20, the basis uses the trilinear +/// Hex8 corner functions for geometry mapping and assigns zero contribution to +/// the quadratic edge nodes. This preserves the public Hex20 node count while +/// intentionally reducing the geometry interpolation order. class SerendipityBasis : public BasisFunction { public: + /// \brief Construct a serendipity basis for an element type and polynomial order. + /// + /// \details The constructor selects the topology-specific interpolation + /// space, computes the reference node coordinates, and initializes any + /// coefficient tables needed for evaluation. Quadrilateral bases build and + /// invert a Vandermonde matrix for the selected serendipity monomials. + /// Hex20 and Wedge15 use fixed coefficient tables. For hexahedra, only + /// linear Hex8 and quadratic Hex20 serendipity spaces are supported. For + /// wedges, only quadratic Wedge15 is supported. + /// + /// \param type Element type used to determine topology and reference-node layout. + /// \param order Requested polynomial order. + /// \param geometry_mode When true, allow reduced geometry-mapping behavior for supported elements. + /// \throws BasisConfigurationException If the requested order or mode is invalid. + /// \throws BasisElementCompatibilityException If the element type is unsupported. SerendipityBasis(ElementType type, int order, bool geometry_mode = false); + /// \copydoc BasisFunction::basis_type() BasisType basis_type() const noexcept override { return BasisType::Serendipity; } + + /// \copydoc BasisFunction::element_type() ElementType element_type() const noexcept override { return element_type_; } + + /// \copydoc BasisFunction::dimension() int dimension() const noexcept override { return dimension_; } + + /// \copydoc BasisFunction::order() int order() const noexcept override { return order_; } + + /// \copydoc BasisFunction::size() std::size_t size() const noexcept override { return size_; } + + /// \brief Return the reference interpolation nodes in basis ordering. + /// + /// \details Node coordinates are the points at which the serendipity basis + /// satisfies the nodal interpolation property. Quadrilateral nodes are + /// placed first on the boundary and then, for higher order requests, at the + /// selected interior points needed to make the reduced monomial space + /// unisolvent. Hexahedral and wedge nodes are taken from + /// ReferenceNodeLayout. + /// + /// \return Reference node coordinates, one per basis function. const std::vector>& nodes() const noexcept { return nodes_; } + /// \brief Evaluate serendipity basis function values at a reference coordinate. + /// + /// \details For quadrilateral bases, this evaluates the serendipity + /// monomial vector and multiplies by the inverse Vandermonde matrix to + /// obtain nodal shape-function values. For Hex8, values are the standard + /// trilinear corner products. For Hex20 and Wedge15, values are evaluated + /// from the stored polynomial coefficient tables. In Hex20 geometry mode, + /// only the first eight corner values are nonzero and they match Hex8. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param values Receives one value per basis function. void evaluate_values(const math::Vector& xi, std::vector& values) const override; + /// \brief Evaluate analytical serendipity basis gradients at a reference coordinate. + /// + /// \details Gradients are derivatives with respect to reference + /// coordinates. Quadrilateral gradients differentiate the monomial vector + /// before applying the inverse Vandermonde coefficients. Hex8 gradients are + /// direct derivatives of the trilinear corner products. Hex20 and Wedge15 + /// gradients are computed by differentiating the tabulated monomial + /// expansions. In Hex20 geometry mode, edge-node gradients are zero and the + /// corner gradients match Hex8. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param gradients Receives one three-component gradient per basis function. void evaluate_gradients(const math::Vector& xi, std::vector& gradients) const override; + /// \brief Evaluate analytical serendipity basis Hessians at a reference coordinate. + /// + /// \details Hessians are second derivatives in reference coordinates and + /// are stored as 3-by-3 matrices. Quadrilateral Hessians use second + /// derivatives of the monomial vector and inverse Vandermonde coefficients. + /// Hex8 Hessians are delegated to the linear Lagrange Hex8 basis. Hex20 and + /// Wedge15 Hessians are computed by differentiating their polynomial + /// coefficient tables twice. In Hex20 geometry mode, only the corner + /// Hessians from the Hex8 geometry mapping are populated. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param hessians Receives one 3-by-3 Hessian per basis function. void evaluate_hessians(const math::Vector& xi, std::vector& hessians) const override; @@ -52,6 +166,8 @@ class SerendipityBasis : public BasisFunction { bool geometry_mode_; }; +/// @} + } // namespace basis } // namespace FE } // namespace svmp diff --git a/Code/Source/solver/FE/Math/Matrix.h b/Code/Source/solver/FE/Math/Matrix.h index 3f3a9d9b6..f7432f38c 100644 --- a/Code/Source/solver/FE/Math/Matrix.h +++ b/Code/Source/solver/FE/Math/Matrix.h @@ -25,12 +25,21 @@ #include #include +/// \defgroup FE_MatrixMath Matrix +/// \ingroup FE_Math +/// \brief Fixed-size matrix types, matrix expressions, and small-matrix operations. +/// +/// \details The Matrix submodule contains row-major fixed-size matrices used +/// by FE kernels, expression-template support for matrix algebra, and direct +/// determinant/inverse implementations for common element-level sizes. + namespace svmp { namespace FE { namespace math { /** * @brief Fixed-size matrix for element-level computations + * @ingroup FE_MatrixMath * @tparam T Scalar type (float, double) * @tparam M Number of rows * @tparam N Number of columns @@ -770,7 +779,14 @@ inline Matrix inverse_3x3(const Matrix& m) { return adj * inv_det; } -// Template specializations for 2x2 Matrix determinant and inverse +/** + * @brief Specialized fixed-size 2-by-2 matrix for element-level computations. + * @ingroup FE_MatrixMath + * @tparam T Scalar type. + * + * This specialization preserves the Matrix API while using direct formulas for + * 2-by-2 determinant and inverse operations. + */ template class Matrix : public MatrixExpr> { static constexpr std::size_t M = 2; @@ -1006,7 +1022,14 @@ class Matrix : public MatrixExpr> { const T* end() const { return data_ + 4; } }; -// Template specialization for 3x3 Matrix +/** + * @brief Specialized fixed-size 3-by-3 matrix for element-level computations. + * @ingroup FE_MatrixMath + * @tparam T Scalar type. + * + * This specialization preserves the Matrix API while using direct formulas for + * 3-by-3 determinant and inverse operations. + */ template class Matrix : public MatrixExpr> { static constexpr std::size_t M = 3; diff --git a/Code/Source/solver/FE/Math/Vector.h b/Code/Source/solver/FE/Math/Vector.h index a1214f9aa..0ec99c81f 100644 --- a/Code/Source/solver/FE/Math/Vector.h +++ b/Code/Source/solver/FE/Math/Vector.h @@ -24,6 +24,19 @@ #include #include +/// \defgroup FE_Math Math +/// \ingroup FE +/// \brief Fixed-size and dense linear algebra utilities for finite-element computations. +/// +/// \details The Math module provides small fixed-size vector and matrix types +/// used in element-level kernels, expression-template infrastructure for +/// allocation-free algebraic expressions, and dense linear algebra utilities +/// used by basis construction and local transforms. +/// +/// \defgroup FE_VectorMath Vector +/// \ingroup FE_Math +/// \brief Fixed-size vector types and vector expression utilities. + namespace svmp { namespace FE { namespace math { @@ -47,6 +60,7 @@ inline bool approx_equal(T a, T b, T tol = tolerance) { /** * @brief Fixed-size vector for element-level computations + * @ingroup FE_VectorMath * @tparam T Scalar type (float, double) * @tparam N Vector dimension * diff --git a/Documentation/Doxyfile b/Documentation/Doxyfile index acd5ba21c..3c29a08f1 100644 --- a/Documentation/Doxyfile +++ b/Documentation/Doxyfile @@ -191,10 +191,10 @@ TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO FORMULA_FONTSIZE = 10 USE_MATHJAX = YES -MATHJAX_VERSION = MathJax_3 -MATHJAX_FORMAT = chtml -MATHJAX_RELPATH = https://cdn.jsdelivr.net/npm/mathjax@3 -MATHJAX_EXTENSIONS = ams +MATHJAX_VERSION = MathJax_2 +MATHJAX_FORMAT = HTML-CSS +MATHJAX_RELPATH = https://cdn.jsdelivr.net/npm/mathjax@2 +MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols MATHJAX_CODEFILE = SEARCHENGINE = YES SERVER_BASED_SEARCH = NO diff --git a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp index 430390e54..d4bf1d6e5 100644 --- a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp +++ b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp @@ -1,6 +1,6 @@ /** * @file test_BasisErrorPaths.cpp - * @brief Error-path coverage for the migrated Lagrange-focused Basis subset. + * @brief Error-path coverage for the Lagrange-focused Basis subset. */ #include From c53e0e06ef4165b3b8b4069f2fbec246bbd4ab54 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Tue, 9 Jun 2026 11:34:34 -0700 Subject: [PATCH 12/22] updating serendipity basis to be concrete terminal classes with `final` --- .../solver/FE/Basis/SerendipityBasis.cpp | 442 ++++++++---------- .../Source/solver/FE/Basis/SerendipityBasis.h | 56 ++- 2 files changed, 238 insertions(+), 260 deletions(-) diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp index d551419a8..358e76123 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: BSD-3-Clause #include "SerendipityBasis.h" -#include "LagrangeBasis.h" #include "NodeOrderingConventions.h" #include "Math/DenseLinearAlgebra.h" @@ -19,6 +18,61 @@ namespace basis { namespace { using Vec3 = math::Vector; +void store_gradient(const Gradient& gradient, Real* dst) { + dst[0] = gradient[0]; + dst[1] = gradient[1]; + dst[2] = gradient[2]; +} + +void evaluate_hex8_reference(Real r, + Real s, + Real t, + Real* values, + Real* gradients, + Real* hessians) { + static constexpr int signs[8][3] = { + {-1, -1, -1}, + { 1, -1, -1}, + { 1, 1, -1}, + {-1, 1, -1}, + {-1, -1, 1}, + { 1, -1, 1}, + { 1, 1, 1}, + {-1, 1, 1}, + }; + + for (std::size_t i = 0; i < 8u; ++i) { + const Real a = Real(signs[i][0]); + const Real b = Real(signs[i][1]); + const Real c = Real(signs[i][2]); + const Real ar = Real(1) + a * r; + const Real bs = Real(1) + b * s; + const Real ct = Real(1) + c * t; + + if (values) { + values[i] = Real(0.125) * ar * bs * ct; + } + if (gradients) { + Real* g = gradients + i * 3u; + g[0] = Real(0.125) * a * bs * ct; + g[1] = Real(0.125) * b * ar * ct; + g[2] = Real(0.125) * c * ar * bs; + } + if (hessians) { + Real* h = hessians + i * 9u; + h[0] = Real(0); + h[1] = Real(0.125) * a * b * ct; + h[2] = Real(0.125) * a * c * bs; + h[3] = h[1]; + h[4] = Real(0); + h[5] = Real(0.125) * b * c * ar; + h[6] = h[2]; + h[7] = h[5]; + h[8] = Real(0); + } + } +} + int quad_serendipity_superlinear_degree(int ax, int ay) { return (ax > 1 ? ax : 0) + (ay > 1 ? ay : 0); } @@ -496,96 +550,24 @@ SerendipityBasis::SerendipityBasis(ElementType type, int order, bool geometry_mo } } -void SerendipityBasis::evaluate_values(const math::Vector& xi, - std::vector& values) const { - values.assign(size_, Real(0)); - const Real x = xi[0]; - const Real y = xi[1]; - const Real z = xi[2]; - - if (dimension_ == 2) { - if (quad_monomial_exponents_.size() != size_ || - quad_inv_vandermonde_.size() != size_ * size_) { - throw BasisEvaluationException( - "SerendipityBasis: quadrilateral interpolation tables are not initialized for value evaluation", - __FILE__, __LINE__, __func__); - } - - std::vector monomials(size_, Real(0)); - for (std::size_t j = 0; j < size_; ++j) { - const auto [ax, ay] = quad_monomial_exponents_[j]; - monomials[j] = std::pow(x, ax) * std::pow(y, ay); - } - - for (std::size_t i = 0; i < size_; ++i) { - Real value = Real(0); - for (std::size_t j = 0; j < size_; ++j) { - value += monomials[j] * quad_inv_vandermonde_[j * size_ + i]; - } - values[i] = value; - } +void SerendipityBasis::evaluate_all_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + if (!values_out && !gradients_out && !hessians_out) { return; } - if (dimension_ == 3 && order_ == 1) { - // Hex8 trilinear shape functions - const Real r = x; - const Real s = y; - const Real t = z; - values[0] = Real(0.125) * (Real(1) - r) * (Real(1) - s) * (Real(1) - t); - values[1] = Real(0.125) * (Real(1) + r) * (Real(1) - s) * (Real(1) - t); - values[2] = Real(0.125) * (Real(1) + r) * (Real(1) + s) * (Real(1) - t); - values[3] = Real(0.125) * (Real(1) - r) * (Real(1) + s) * (Real(1) - t); - values[4] = Real(0.125) * (Real(1) - r) * (Real(1) - s) * (Real(1) + t); - values[5] = Real(0.125) * (Real(1) + r) * (Real(1) - s) * (Real(1) + t); - values[6] = Real(0.125) * (Real(1) + r) * (Real(1) + s) * (Real(1) + t); - values[7] = Real(0.125) * (Real(1) - r) * (Real(1) + s) * (Real(1) + t); - return; + if (values_out) { + std::fill_n(values_out, size_, Real(0)); } - - const Real r = x; - const Real s = y; - const Real t = z; - - if (geometry_mode_ && element_type_ == ElementType::Hex20) { - // Hex20 geometry mode: use trilinear Hex8 shape functions on corners, edges zero. - values[0] = Real(0.125) * (Real(1) - r) * (Real(1) - s) * (Real(1) - t); - values[1] = Real(0.125) * (Real(1) + r) * (Real(1) - s) * (Real(1) - t); - values[2] = Real(0.125) * (Real(1) + r) * (Real(1) + s) * (Real(1) - t); - values[3] = Real(0.125) * (Real(1) - r) * (Real(1) + s) * (Real(1) - t); - values[4] = Real(0.125) * (Real(1) - r) * (Real(1) - s) * (Real(1) + t); - values[5] = Real(0.125) * (Real(1) + r) * (Real(1) - s) * (Real(1) + t); - values[6] = Real(0.125) * (Real(1) + r) * (Real(1) + s) * (Real(1) + t); - values[7] = Real(0.125) * (Real(1) - r) * (Real(1) + s) * (Real(1) + t); - for (std::size_t i = 8; i < 20; ++i) { - values[i] = Real(0); - } - return; + if (gradients_out) { + std::fill_n(gradients_out, size_ * 3u, Real(0)); } - - if (element_type_ == ElementType::Hex20) { - Real internal_vals[20]; - eval_hex20_internal(r, s, t, internal_vals); - const auto mesh_to_basis = ReferenceNodeLayout::mesh_to_basis_ordering(element_type_); - BASIS_CHECK_EVAL(mesh_to_basis.size() == size_, - "Hex20 mesh-to-basis ordering is not registered"); - for (std::size_t i = 0; i < 20; ++i) { - values[i] = internal_vals[mesh_to_basis[i]]; - } - return; + if (hessians_out) { + std::fill_n(hessians_out, size_ * 9u, Real(0)); } - if (element_type_ == ElementType::Wedge15) { - eval_wedge15_polynomial(r, s, t, values.data(), nullptr, nullptr); - return; - } - -} - -void SerendipityBasis::evaluate_gradients(const math::Vector& xi, - std::vector& gradients) const { - gradients.assign(size_, Gradient{}); - const Real x = xi[0]; const Real y = xi[1]; const Real z = xi[2]; @@ -594,216 +576,174 @@ void SerendipityBasis::evaluate_gradients(const math::Vector& xi, if (quad_monomial_exponents_.size() != size_ || quad_inv_vandermonde_.size() != size_ * size_) { throw BasisEvaluationException( - "SerendipityBasis: quadrilateral interpolation tables are not initialized for gradient evaluation", + "SerendipityBasis: quadrilateral interpolation tables are not initialized for value evaluation", __FILE__, __LINE__, __func__); } - std::vector dmon_dx(size_, Real(0)); - std::vector dmon_dy(size_, Real(0)); for (std::size_t j = 0; j < size_; ++j) { const auto [ax, ay] = quad_monomial_exponents_[j]; - dmon_dx[j] = + const Real value = std::pow(x, ax) * std::pow(y, ay); + const Real dx = (ax > 0) ? Real(ax) * std::pow(x, ax - 1) * std::pow(y, ay) : Real(0); - dmon_dy[j] = + const Real dy = (ay > 0) ? std::pow(x, ax) * Real(ay) * std::pow(y, ay - 1) : Real(0); - } + const Real dxx = + (ax > 1) ? Real(ax * (ax - 1)) * std::pow(x, ax - 2) * std::pow(y, ay) + : Real(0); + const Real dxy = + (ax > 0 && ay > 0) + ? Real(ax * ay) * std::pow(x, ax - 1) * std::pow(y, ay - 1) + : Real(0); + const Real dyy = + (ay > 1) ? Real(ay * (ay - 1)) * std::pow(x, ax) * std::pow(y, ay - 2) + : Real(0); - for (std::size_t i = 0; i < size_; ++i) { - Real gx = Real(0); - Real gy = Real(0); - for (std::size_t j = 0; j < size_; ++j) { + for (std::size_t i = 0; i < size_; ++i) { const Real coeff = quad_inv_vandermonde_[j * size_ + i]; - gx += dmon_dx[j] * coeff; - gy += dmon_dy[j] * coeff; + if (values_out) { + values_out[i] += value * coeff; + } + if (gradients_out) { + Real* g = gradients_out + i * 3u; + g[0] += dx * coeff; + g[1] += dy * coeff; + } + if (hessians_out) { + Real* h = hessians_out + i * 9u; + h[0] += dxx * coeff; + h[1] += dxy * coeff; + h[3] += dxy * coeff; + h[4] += dyy * coeff; + } } - gradients[i][0] = gx; - gradients[i][1] = gy; } return; } - // 3D linear hex (Hex8) if (dimension_ == 3 && order_ == 1) { - const Real r = x, s = y, t = z; - gradients[0][0] = -Real(0.125) * (Real(1) - s) * (Real(1) - t); - gradients[0][1] = -Real(0.125) * (Real(1) - r) * (Real(1) - t); - gradients[0][2] = -Real(0.125) * (Real(1) - r) * (Real(1) - s); - - gradients[1][0] = Real(0.125) * (Real(1) - s) * (Real(1) - t); - gradients[1][1] = -Real(0.125) * (Real(1) + r) * (Real(1) - t); - gradients[1][2] = -Real(0.125) * (Real(1) + r) * (Real(1) - s); - - gradients[2][0] = Real(0.125) * (Real(1) + s) * (Real(1) - t); - gradients[2][1] = Real(0.125) * (Real(1) + r) * (Real(1) - t); - gradients[2][2] = -Real(0.125) * (Real(1) + r) * (Real(1) + s); - - gradients[3][0] = -Real(0.125) * (Real(1) + s) * (Real(1) - t); - gradients[3][1] = Real(0.125) * (Real(1) - r) * (Real(1) - t); - gradients[3][2] = -Real(0.125) * (Real(1) - r) * (Real(1) + s); - - gradients[4][0] = -Real(0.125) * (Real(1) - s) * (Real(1) + t); - gradients[4][1] = -Real(0.125) * (Real(1) - r) * (Real(1) + t); - gradients[4][2] = Real(0.125) * (Real(1) - r) * (Real(1) - s); - - gradients[5][0] = Real(0.125) * (Real(1) - s) * (Real(1) + t); - gradients[5][1] = -Real(0.125) * (Real(1) + r) * (Real(1) + t); - gradients[5][2] = Real(0.125) * (Real(1) + r) * (Real(1) - s); - - gradients[6][0] = Real(0.125) * (Real(1) + s) * (Real(1) + t); - gradients[6][1] = Real(0.125) * (Real(1) + r) * (Real(1) + t); - gradients[6][2] = Real(0.125) * (Real(1) + r) * (Real(1) + s); - - gradients[7][0] = -Real(0.125) * (Real(1) + s) * (Real(1) + t); - gradients[7][1] = Real(0.125) * (Real(1) - r) * (Real(1) + t); - gradients[7][2] = Real(0.125) * (Real(1) - r) * (Real(1) + s); + evaluate_hex8_reference(x, y, z, values_out, gradients_out, hessians_out); return; } - // Hex20 geometry mode: use Hex8 gradients - if (dimension_ == 3 && order_ == 2 && geometry_mode_ && - (element_type_ == ElementType::Hex20 || element_type_ == ElementType::Quad8)) { - const Real r = x, s = y, t = z; - gradients[0][0] = -Real(0.125) * (Real(1) - s) * (Real(1) - t); - gradients[0][1] = -Real(0.125) * (Real(1) - r) * (Real(1) - t); - gradients[0][2] = -Real(0.125) * (Real(1) - r) * (Real(1) - s); - - gradients[1][0] = Real(0.125) * (Real(1) - s) * (Real(1) - t); - gradients[1][1] = -Real(0.125) * (Real(1) + r) * (Real(1) - t); - gradients[1][2] = -Real(0.125) * (Real(1) + r) * (Real(1) - s); - - gradients[2][0] = Real(0.125) * (Real(1) + s) * (Real(1) - t); - gradients[2][1] = Real(0.125) * (Real(1) + r) * (Real(1) - t); - gradients[2][2] = -Real(0.125) * (Real(1) + r) * (Real(1) + s); - - gradients[3][0] = -Real(0.125) * (Real(1) + s) * (Real(1) - t); - gradients[3][1] = Real(0.125) * (Real(1) - r) * (Real(1) - t); - gradients[3][2] = -Real(0.125) * (Real(1) - r) * (Real(1) + s); - - gradients[4][0] = -Real(0.125) * (Real(1) - s) * (Real(1) + t); - gradients[4][1] = -Real(0.125) * (Real(1) - r) * (Real(1) + t); - gradients[4][2] = Real(0.125) * (Real(1) - r) * (Real(1) - s); - - gradients[5][0] = Real(0.125) * (Real(1) - s) * (Real(1) + t); - gradients[5][1] = -Real(0.125) * (Real(1) + r) * (Real(1) + t); - gradients[5][2] = Real(0.125) * (Real(1) + r) * (Real(1) - s); - - gradients[6][0] = Real(0.125) * (Real(1) + s) * (Real(1) + t); - gradients[6][1] = Real(0.125) * (Real(1) + r) * (Real(1) + t); - gradients[6][2] = Real(0.125) * (Real(1) + r) * (Real(1) + s); - - gradients[7][0] = -Real(0.125) * (Real(1) + s) * (Real(1) + t); - gradients[7][1] = Real(0.125) * (Real(1) - r) * (Real(1) + t); - gradients[7][2] = Real(0.125) * (Real(1) - r) * (Real(1) + s); - // Edge-node gradients remain zero + if (geometry_mode_ && element_type_ == ElementType::Hex20) { + evaluate_hex8_reference(x, y, z, values_out, gradients_out, hessians_out); return; } - // Hex20 analytical gradients using monomial differentiation - if (element_type_ == ElementType::Hex20 && order_ == 2) { - const Real r = x, s = y, t = z; - Gradient internal_grads[20]; - eval_hex20_grad_internal(r, s, t, internal_grads); + if (element_type_ == ElementType::Hex20) { const auto mesh_to_basis = ReferenceNodeLayout::mesh_to_basis_ordering(element_type_); BASIS_CHECK_EVAL(mesh_to_basis.size() == size_, "Hex20 mesh-to-basis ordering is not registered"); - for (std::size_t i = 0; i < 20; ++i) { - gradients[i] = internal_grads[mesh_to_basis[i]]; + + if (values_out) { + Real internal_vals[20]; + eval_hex20_internal(x, y, z, internal_vals); + for (std::size_t i = 0; i < 20u; ++i) { + values_out[i] = internal_vals[mesh_to_basis[i]]; + } + } + if (gradients_out) { + Gradient internal_grads[20]; + eval_hex20_grad_internal(x, y, z, internal_grads); + for (std::size_t i = 0; i < 20u; ++i) { + store_gradient(internal_grads[mesh_to_basis[i]], gradients_out + i * 3u); + } + } + if (hessians_out) { + Hessian internal_hessians[20]; + eval_hex20_hess_internal(x, y, z, internal_hessians); + for (std::size_t i = 0; i < 20u; ++i) { + store_hessian(internal_hessians[mesh_to_basis[i]], hessians_out + i * 9u); + } } return; } - // Wedge15 analytical gradients using monomial differentiation - if (element_type_ == ElementType::Wedge15 && order_ == 2) { - eval_wedge15_polynomial(x, y, z, nullptr, gradients.data(), nullptr); + if (element_type_ == ElementType::Wedge15) { + std::array wedge_gradients{}; + std::array wedge_hessians{}; + eval_wedge15_polynomial(x, + y, + z, + values_out, + gradients_out ? wedge_gradients.data() : nullptr, + hessians_out ? wedge_hessians.data() : nullptr); + if (gradients_out) { + for (std::size_t i = 0; i < 15u; ++i) { + store_gradient(wedge_gradients[i], gradients_out + i * 3u); + } + } + if (hessians_out) { + for (std::size_t i = 0; i < 15u; ++i) { + store_hessian(wedge_hessians[i], hessians_out + i * 9u); + } + } return; } - throw BasisEvaluationException("SerendipityBasis::evaluate_gradients: unsupported serendipity configuration", + throw BasisEvaluationException("SerendipityBasis::evaluate_all_to: unsupported serendipity configuration", __FILE__, __LINE__, __func__); } -void SerendipityBasis::evaluate_hessians(const math::Vector& xi, - std::vector& hessians) const { - hessians.assign(size_, Hessian{}); - const Real x = xi[0]; - const Real y = xi[1]; - const Real z = xi[2]; - - if (dimension_ == 2) { - if (quad_monomial_exponents_.size() != size_ || - quad_inv_vandermonde_.size() != size_ * size_) { - throw BasisEvaluationException( - "SerendipityBasis: quadrilateral interpolation tables are not initialized for Hessian evaluation", - __FILE__, __LINE__, __func__); - } - - std::vector dxx(size_, Real(0)); - std::vector dxy(size_, Real(0)); - std::vector dyy(size_, Real(0)); - for (std::size_t j = 0; j < size_; ++j) { - const auto [ax, ay] = quad_monomial_exponents_[j]; - dxx[j] = (ax > 1) - ? Real(ax * (ax - 1)) * std::pow(x, ax - 2) * std::pow(y, ay) - : Real(0); - dxy[j] = (ax > 0 && ay > 0) - ? Real(ax * ay) * std::pow(x, ax - 1) * std::pow(y, ay - 1) - : Real(0); - dyy[j] = (ay > 1) - ? Real(ay * (ay - 1)) * std::pow(x, ax) * std::pow(y, ay - 2) - : Real(0); - } +void SerendipityBasis::evaluate_values(const math::Vector& xi, + std::vector& values) const { + values.resize(size_); + evaluate_values_to(xi, values.data()); +} - for (std::size_t i = 0; i < size_; ++i) { - for (std::size_t j = 0; j < size_; ++j) { - const Real coeff = quad_inv_vandermonde_[j * size_ + i]; - hessians[i](0, 0) += dxx[j] * coeff; - hessians[i](0, 1) += dxy[j] * coeff; - hessians[i](1, 1) += dyy[j] * coeff; - } - hessians[i](1, 0) = hessians[i](0, 1); - } - return; +void SerendipityBasis::evaluate_gradients(const math::Vector& xi, + std::vector& gradients) const { + gradients.resize(size_); + std::vector flat(size_ * 3u, Real(0)); + evaluate_gradients_to(xi, flat.data()); + for (std::size_t i = 0; i < size_; ++i) { + gradients[i][0] = flat[i * 3u + 0u]; + gradients[i][1] = flat[i * 3u + 1u]; + gradients[i][2] = flat[i * 3u + 2u]; } +} - if (element_type_ == ElementType::Hex8 && order_ == 1) { - static const LagrangeBasis parent(ElementType::Hex8, 1); - parent.evaluate_hessians(xi, hessians); - return; +void SerendipityBasis::evaluate_hessians(const math::Vector& xi, + std::vector& hessians) const { + hessians.resize(size_); + std::vector flat(size_ * 9u, Real(0)); + evaluate_hessians_to(xi, flat.data()); + for (std::size_t i = 0; i < size_; ++i) { + hessians[i] = load_hessian(flat.data() + i * 9u); } +} - if (geometry_mode_ && element_type_ == ElementType::Hex20) { - static const LagrangeBasis parent(ElementType::Hex8, 1); - std::array parent_hessians{}; - parent.evaluate_hessians_to(xi, parent_hessians.data()); - for (std::size_t i = 0; i < 8; ++i) { - for (std::size_t r = 0; r < 3; ++r) { - for (std::size_t c = 0; c < 3; ++c) { - hessians[i](r, c) = parent_hessians[i * 9u + r * 3u + c]; - } - } - } - return; +void SerendipityBasis::evaluate_all(const math::Vector& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians) const { + values.resize(size_); + gradients.resize(size_); + hessians.resize(size_); + std::vector flat_gradients(size_ * 3u, Real(0)); + std::vector flat_hessians(size_ * 9u, Real(0)); + evaluate_all_to(xi, values.data(), flat_gradients.data(), flat_hessians.data()); + for (std::size_t i = 0; i < size_; ++i) { + gradients[i][0] = flat_gradients[i * 3u + 0u]; + gradients[i][1] = flat_gradients[i * 3u + 1u]; + gradients[i][2] = flat_gradients[i * 3u + 2u]; + hessians[i] = load_hessian(flat_hessians.data() + i * 9u); } +} - if (element_type_ == ElementType::Hex20 && order_ == 2) { - Hessian internal_hessians[20]; - eval_hex20_hess_internal(x, y, z, internal_hessians); - const auto mesh_to_basis = ReferenceNodeLayout::mesh_to_basis_ordering(element_type_); - BASIS_CHECK_EVAL(mesh_to_basis.size() == size_, - "Hex20 mesh-to-basis ordering is not registered"); - for (std::size_t i = 0; i < 20; ++i) { - hessians[i] = internal_hessians[mesh_to_basis[i]]; - } - return; - } +void SerendipityBasis::evaluate_values_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out) const { + evaluate_all_to(xi, values_out, nullptr, nullptr); +} - if (element_type_ == ElementType::Wedge15 && order_ == 2) { - eval_wedge15_polynomial(x, y, z, nullptr, nullptr, hessians.data()); - return; - } +void SerendipityBasis::evaluate_gradients_to(const math::Vector& xi, + Real* SVMP_RESTRICT gradients_out) const { + evaluate_all_to(xi, nullptr, gradients_out, nullptr); +} - throw BasisEvaluationException("SerendipityBasis::evaluate_hessians: unsupported serendipity configuration", - __FILE__, __LINE__, __func__); +void SerendipityBasis::evaluate_hessians_to(const math::Vector& xi, + Real* SVMP_RESTRICT hessians_out) const { + evaluate_all_to(xi, nullptr, nullptr, hessians_out); } } // namespace basis diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.h b/Code/Source/solver/FE/Basis/SerendipityBasis.h index fc0b897cf..9c55c8eec 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.h +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.h @@ -61,7 +61,7 @@ namespace basis { /// Hex8 corner functions for geometry mapping and assigns zero contribution to /// the quadratic edge nodes. This preserves the public Hex20 node count while /// intentionally reducing the geometry interpolation order. -class SerendipityBasis : public BasisFunction { +class SerendipityBasis final : public BasisFunction { public: /// \brief Construct a serendipity basis for an element type and polynomial order. /// @@ -81,19 +81,19 @@ class SerendipityBasis : public BasisFunction { SerendipityBasis(ElementType type, int order, bool geometry_mode = false); /// \copydoc BasisFunction::basis_type() - BasisType basis_type() const noexcept override { return BasisType::Serendipity; } + BasisType basis_type() const noexcept final { return BasisType::Serendipity; } /// \copydoc BasisFunction::element_type() - ElementType element_type() const noexcept override { return element_type_; } + ElementType element_type() const noexcept final { return element_type_; } /// \copydoc BasisFunction::dimension() - int dimension() const noexcept override { return dimension_; } + int dimension() const noexcept final { return dimension_; } /// \copydoc BasisFunction::order() - int order() const noexcept override { return order_; } + int order() const noexcept final { return order_; } /// \copydoc BasisFunction::size() - std::size_t size() const noexcept override { return size_; } + std::size_t size() const noexcept final { return size_; } /// \brief Return the reference interpolation nodes in basis ordering. /// @@ -119,7 +119,7 @@ class SerendipityBasis : public BasisFunction { /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. /// \param values Receives one value per basis function. void evaluate_values(const math::Vector& xi, - std::vector& values) const override; + std::vector& values) const final; /// \brief Evaluate analytical serendipity basis gradients at a reference coordinate. /// @@ -134,7 +134,7 @@ class SerendipityBasis : public BasisFunction { /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. /// \param gradients Receives one three-component gradient per basis function. void evaluate_gradients(const math::Vector& xi, - std::vector& gradients) const override; + std::vector& gradients) const final; /// \brief Evaluate analytical serendipity basis Hessians at a reference coordinate. /// @@ -149,7 +149,40 @@ class SerendipityBasis : public BasisFunction { /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. /// \param hessians Receives one 3-by-3 Hessian per basis function. void evaluate_hessians(const math::Vector& xi, - std::vector& hessians) const override; + std::vector& hessians) const final; + + /// \brief Evaluate serendipity values, gradients, and Hessians together. + /// + /// \details This vector API is backed by the same flat-buffer evaluator as + /// the assembly-oriented `*_to` methods, so topology-specific polynomial + /// setup can be shared for a quadrature point. + /// + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param values Receives one value per basis function. + /// \param gradients Receives one three-component gradient per basis function. + /// \param hessians Receives one 3-by-3 Hessian per basis function. + void evaluate_all(const math::Vector& xi, + std::vector& values, + std::vector& gradients, + std::vector& hessians) const final; + + /// \brief Evaluate serendipity basis values into a flat caller-provided buffer. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param values_out Output buffer with at least size() entries. + void evaluate_values_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out) const final; + + /// \brief Evaluate serendipity basis gradients into a flat caller-provided buffer. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param gradients_out Output buffer with node-major layout: node * 3 + component. + void evaluate_gradients_to(const math::Vector& xi, + Real* SVMP_RESTRICT gradients_out) const final; + + /// \brief Evaluate serendipity basis Hessians into a flat caller-provided buffer. + /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. + /// \param hessians_out Output buffer with node-major row-major layout: node * 9 + row * 3 + col. + void evaluate_hessians_to(const math::Vector& xi, + Real* SVMP_RESTRICT hessians_out) const final; private: ElementType element_type_; @@ -164,6 +197,11 @@ class SerendipityBasis : public BasisFunction { // When true, this basis is used purely for geometry mapping and may use // reduced polynomial order (e.g., Hex20 geometry as Hex8). bool geometry_mode_; + + void evaluate_all_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const; }; /// @} From 1289c086f637cdc1544aff0bfe99eb78ad3b9f1c Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Tue, 9 Jun 2026 16:33:06 -0700 Subject: [PATCH 13/22] adding switch cases for converting consts element types to fe element types. replaced custom math vector/matrix implementations for Eigen-backed implementations --- Code/Source/solver/FE/Basis/BasisFactory.cpp | 27 + Code/Source/solver/FE/Basis/BasisFactory.h | 24 + Code/Source/solver/FE/Basis/BasisFunction.cpp | 4 +- Code/Source/solver/FE/Basis/BasisFunction.h | 4 +- Code/Source/solver/FE/Basis/LagrangeBasis.cpp | 7 +- .../solver/FE/Basis/SerendipityBasis.cpp | 4 +- .../solver/FE/Math/DenseLinearAlgebra.cpp | 278 +--- .../solver/FE/Math/DenseLinearAlgebra.h | 9 +- .../solver/FE/Math/DenseTransformKernels.h | 70 +- Code/Source/solver/FE/Math/Matrix.h | 1472 +---------------- Code/Source/solver/FE/Math/MatrixExpr.h | 630 ------- Code/Source/solver/FE/Math/Vector.h | 826 +-------- Code/Source/solver/FE/Math/VectorExpr.h | 476 ------ Code/Source/solver/nn.cpp | 115 +- .../FE/Basis/test_BasisErrorPaths.cpp | 106 +- .../unitTests/FE/Basis/test_BasisHessians.cpp | 141 +- .../FE/Basis/test_HigherOrderWedge.cpp | 22 +- .../unitTests/FE/Basis/test_LagrangeBasis.cpp | 207 ++- .../FE/Basis/test_SerendipityTensorModal.cpp | 185 ++- .../FE/Math/test_DenseLinearAlgebra.cpp | 143 +- tests/unitTests/FE/Math/test_Matrix.cpp | 593 ------- tests/unitTests/FE/Math/test_MatrixExpr.cpp | 527 ------ tests/unitTests/FE/Math/test_Vector.cpp | 588 ------- tests/unitTests/FE/Math/test_VectorExpr.cpp | 408 ----- 24 files changed, 1038 insertions(+), 5828 deletions(-) delete mode 100644 Code/Source/solver/FE/Math/MatrixExpr.h delete mode 100644 Code/Source/solver/FE/Math/VectorExpr.h delete mode 100644 tests/unitTests/FE/Math/test_Matrix.cpp delete mode 100644 tests/unitTests/FE/Math/test_MatrixExpr.cpp delete mode 100644 tests/unitTests/FE/Math/test_Vector.cpp delete mode 100644 tests/unitTests/FE/Math/test_VectorExpr.cpp diff --git a/Code/Source/solver/FE/Basis/BasisFactory.cpp b/Code/Source/solver/FE/Basis/BasisFactory.cpp index bc01be0ed..b48e25536 100644 --- a/Code/Source/solver/FE/Basis/BasisFactory.cpp +++ b/Code/Source/solver/FE/Basis/BasisFactory.cpp @@ -3,6 +3,7 @@ #include "BasisFactory.h" +#include "BasisTraits.h" #include "LagrangeBasis.h" #include "SerendipityBasis.h" @@ -74,6 +75,32 @@ std::shared_ptr create(const BasisRequest& req) { } } +BasisRequest default_basis_request(ElementType element_type) { + switch (element_type) { + // Reduced serendipity node layouts have no complete Lagrange basis at + // their node count; they always use the quadratic serendipity space. + case ElementType::Quad8: + case ElementType::Hex20: + case ElementType::Wedge15: + return BasisRequest{element_type, BasisType::Serendipity, 2}; + case ElementType::Point1: + return BasisRequest{element_type, BasisType::Lagrange, 0}; + default: { + const int order = complete_lagrange_alias_order(element_type); + if (order >= 0) { + return BasisRequest{element_type, BasisType::Lagrange, order}; + } + throw BasisElementCompatibilityException( + "BasisFactory: no default basis is defined for the requested element type", + __FILE__, __LINE__, __func__); + } + } +} + +std::shared_ptr create_default_for(ElementType element_type) { + return create(default_basis_request(element_type)); +} + } // namespace basis_factory } // namespace basis diff --git a/Code/Source/solver/FE/Basis/BasisFactory.h b/Code/Source/solver/FE/Basis/BasisFactory.h index b188b3aa2..3922d5ced 100644 --- a/Code/Source/solver/FE/Basis/BasisFactory.h +++ b/Code/Source/solver/FE/Basis/BasisFactory.h @@ -38,6 +38,30 @@ namespace basis_factory { [[nodiscard]] std::shared_ptr create(const BasisRequest& req); +/// \brief Return the default basis request (family and order) for an element type. +/// +/// \details This is the single source of truth for which basis family and +/// polynomial order a given element type uses by default: serendipity node +/// layouts (Quad8, Hex20, Wedge15) select the quadratic serendipity family, +/// and every complete Lagrange element selects the Lagrange family at the +/// order given by its node layout. Solver-facing adapters should translate +/// their element names to ElementType and delegate the basis choice here +/// rather than tabulating family/order themselves. +/// +/// \param element_type Element type to select a default basis for. +/// \return Basis request suitable for create(). +/// \throws BasisElementCompatibilityException If no default basis is defined +/// for the element type. +[[nodiscard]] BasisRequest default_basis_request(ElementType element_type); + +/// \brief Create the default basis for an element type. +/// +/// \details Equivalent to create(default_basis_request(element_type)). +/// +/// \param element_type Element type to create a default basis for. +/// \return Shared basis instance. +[[nodiscard]] std::shared_ptr create_default_for(ElementType element_type); + } // namespace basis_factory } // namespace basis diff --git a/Code/Source/solver/FE/Basis/BasisFunction.cpp b/Code/Source/solver/FE/Basis/BasisFunction.cpp index 3d95671f4..b98a36292 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.cpp +++ b/Code/Source/solver/FE/Basis/BasisFunction.cpp @@ -84,7 +84,7 @@ void BasisFunction::numerical_gradient(const math::Vector& xi, Real eps) const { std::vector base; evaluate_values(xi, base); - gradients.assign(base.size(), Gradient{}); + gradients.assign(base.size(), Gradient::Zero()); for (int d = 0; d < dimension(); ++d) { math::Vector forward = xi; @@ -109,7 +109,7 @@ void BasisFunction::numerical_hessian(const math::Vector& xi, Real eps) const { std::vector base_grad; evaluate_gradients(xi, base_grad); - hessians.assign(base_grad.size(), Hessian{}); + hessians.assign(base_grad.size(), Hessian::Zero()); for (int d = 0; d < dimension(); ++d) { math::Vector forward = xi; diff --git a/Code/Source/solver/FE/Basis/BasisFunction.h b/Code/Source/solver/FE/Basis/BasisFunction.h index f8f78d7b6..e7de2bf01 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.h +++ b/Code/Source/solver/FE/Basis/BasisFunction.h @@ -39,7 +39,7 @@ using Hessian = math::Matrix; Real xy, Real xz, Real yz) { - Hessian hessian{}; + Hessian hessian = Hessian::Zero(); hessian(0, 0) = xx; hessian(1, 1) = yy; hessian(2, 2) = zz; @@ -62,7 +62,7 @@ inline void store_hessian(const Hessian& hessian, Real* dst) noexcept { } [[nodiscard]] inline Hessian load_hessian(const Real* src) noexcept { - Hessian hessian{}; + Hessian hessian = Hessian::Zero(); hessian(0, 0) = src[0]; hessian(0, 1) = src[1]; hessian(0, 2) = src[2]; diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp index d777447cb..4f8c15bb1 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp @@ -220,8 +220,8 @@ void evaluate_simplex(const Vec3& xi, SimplexEval& out) { const std::size_t n = exponents.size(); out.value.assign(n, Real(0)); - out.gradient.assign(n, Gradient{}); - out.hessian.assign(n, Hessian{}); + out.gradient.assign(n, Gradient::Zero()); + out.hessian.assign(n, Hessian::Zero()); if (n == 1u && order == 0) { out.value[0] = Real(1); @@ -230,7 +230,8 @@ void evaluate_simplex(const Vec3& xi, const int bary_count = top == BasisTopology::Triangle ? 3 : 4; std::array lambda{Real(0), Real(0), Real(0), Real(0)}; - std::array lambda_grad{}; + std::array lambda_grad; + lambda_grad.fill(Gradient::Zero()); lambda[1] = xi[0]; lambda[2] = xi[1]; diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp index 358e76123..30eac9c38 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp @@ -377,7 +377,7 @@ void eval_hex20_hess_internal(Real r, Real s, Real t, Hessian* internal_hessians } for (int i = 0; i < 20; ++i) { - Hessian H{}; + Hessian H = Hessian::Zero(); for (int j = 0; j < 20; ++j) { H(0, 0) += hex20_coeffs[j][i] * d2phi_drr[j]; H(1, 1) += hex20_coeffs[j][i] * d2phi_dss[j]; @@ -450,7 +450,7 @@ void eval_wedge15_polynomial(Real r, Real gr = Real(0); Real gs = Real(0); Real gt = Real(0); - Hessian H{}; + Hessian H = Hessian::Zero(); for (int j = 0; j < 15; ++j) { const Real coefficient = kWedge15Coefficients[static_cast(j)][static_cast(i)]; diff --git a/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp b/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp index 8be9a7560..fb27ad7bf 100644 --- a/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp +++ b/Code/Source/solver/FE/Math/DenseLinearAlgebra.cpp @@ -5,9 +5,7 @@ #include "FEException.h" -#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN #include -#endif #include #include @@ -24,16 +22,24 @@ namespace math { namespace { -constexpr std::size_t kDenseSolveRhsBlock = 32u; +using DenseMatrix = DenseLUSolver::DenseMatrix; +using RowMajorMatrix = + Eigen::Matrix; +using ConstRowMajorMap = Eigen::Map; + +ConstRowMajorMap map_row_major(std::span matrix, + std::size_t rows, + std::size_t cols) { + return ConstRowMajorMap(matrix.data(), + static_cast(rows), + static_cast(cols)); +} -void materialize_inverse_from_solver(const DenseLUSolver& solver, - std::vector& inverse) { - const std::size_t n = solver.n; - inverse.assign(n * n, Real(0)); - for (std::size_t diag = 0; diag < n; ++diag) { - inverse[diag * n + diag] = Real(1); - } - solver.solve_in_place(std::span(inverse.data(), inverse.size()), n); +void copy_to_row_major(const DenseMatrix& source, std::vector& dest) { + const auto rows = static_cast(source.rows()); + const auto cols = static_cast(source.cols()); + dest.resize(rows * cols); + Eigen::Map(dest.data(), source.rows(), source.cols()) = source; } } // namespace @@ -84,59 +90,18 @@ void DenseLUSolver::solve_in_place(std::span rhs, label + ": dense solve requires at least one right-hand side"); DENSE_LINALG_CHECK(rhs.size() == n * rhs_count, label + ": dense multi-RHS solve size mismatch"); - DENSE_LINALG_CHECK(lu.size() == n * n && pivots.size() == n, + DENSE_LINALG_CHECK(lu.rows() == static_cast(n), label + ": dense solver is not factorized"); - - for (std::size_t k = 0; k < n; ++k) { - if (pivots[k] != k) { - for (std::size_t block = 0; block < rhs_count; block += kDenseSolveRhsBlock) { - const std::size_t end = - std::min(rhs_count, block + kDenseSolveRhsBlock); - for (std::size_t r = block; r < end; ++r) { - std::swap(rhs[k * rhs_count + r], - rhs[pivots[k] * rhs_count + r]); - } - } - } - } - - for (std::size_t row = 0; row < n; ++row) { - for (std::size_t col = 0; col < row; ++col) { - const Real factor = lu[row * n + col]; - for (std::size_t block = 0; block < rhs_count; block += kDenseSolveRhsBlock) { - const std::size_t end = - std::min(rhs_count, block + kDenseSolveRhsBlock); - for (std::size_t r = block; r < end; ++r) { - rhs[row * rhs_count + r] -= factor * rhs[col * rhs_count + r]; - } - } - } + if (n == 0) { + return; } - for (std::size_t rev = 0; rev < n; ++rev) { - const std::size_t row = n - 1u - rev; - for (std::size_t col = row + 1u; col < n; ++col) { - const Real factor = lu[row * n + col]; - for (std::size_t block = 0; block < rhs_count; block += kDenseSolveRhsBlock) { - const std::size_t end = - std::min(rhs_count, block + kDenseSolveRhsBlock); - for (std::size_t r = block; r < end; ++r) { - rhs[row * rhs_count + r] -= factor * rhs[col * rhs_count + r]; - } - } - } - const Real pivot = lu[row * n + row]; - DENSE_LINALG_CHECK( - std::abs(pivot) > pivot_tolerance, - label + ": zero pivot during dense solve"); - for (std::size_t block = 0; block < rhs_count; block += kDenseSolveRhsBlock) { - const std::size_t end = - std::min(rhs_count, block + kDenseSolveRhsBlock); - for (std::size_t r = block; r < end; ++r) { - rhs[row * rhs_count + r] /= pivot; - } - } - } + Eigen::Map rhs_map(rhs.data(), + static_cast(n), + static_cast(rhs_count)); + // Evaluate into a temporary: lu.solve cannot alias its argument. + const DenseMatrix solution = lu.solve(rhs_map); + rhs_map = solution; } std::vector DenseLUSolver::solve(std::span rhs) const { @@ -155,14 +120,8 @@ DenseMatrixDiagnostics dense_matrix_diagnostics( DENSE_LINALG_CHECK(rows > 0 && cols > 0, std::string(label) + ": diagnostics require a nonempty matrix"); -#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN - using RowMajorMatrix = Eigen::Matrix; - using Matrix = Eigen::Matrix; - const Eigen::Map A(matrix.data(), - static_cast(rows), - static_cast(cols)); - const Matrix dense = A; - Eigen::JacobiSVD svd(dense); + const DenseMatrix dense = map_row_major(matrix, rows, cols); + Eigen::JacobiSVD svd(dense); DenseMatrixDiagnostics diagnostics; const auto& singular_values = svd.singularValues(); @@ -189,22 +148,6 @@ DenseMatrixDiagnostics dense_matrix_diagnostics( diagnostics.smallest_retained_singular_value; } return diagnostics; -#else - DenseMatrixDiagnostics diagnostics; - diagnostics.largest_singular_value = dense_matrix_max_abs(matrix); - diagnostics.tolerance = - dense_matrix_pivot_tolerance(rows, cols, diagnostics.largest_singular_value); - diagnostics.rank = - dense_matrix_rank(std::vector(matrix.begin(), matrix.end()), rows, cols); - const std::size_t full_rank = std::min(rows, cols); - if (diagnostics.rank == full_rank) { - diagnostics.smallest_retained_singular_value = diagnostics.tolerance; - } - // Exact condition estimates require SVD diagnostics. In Eigen-disabled - // builds this stays explicit instead of relying on a misleading estimate. - diagnostics.condition_estimate = std::numeric_limits::infinity(); - return diagnostics; -#endif } DenseLUSolver factor_dense_matrix(std::vector matrix, @@ -215,55 +158,28 @@ DenseLUSolver factor_dense_matrix(std::vector matrix, DenseLUSolver solver; solver.n = n; - solver.lu = std::move(matrix); - solver.pivots.resize(n); - const Real max_abs = dense_matrix_max_abs(solver.lu); - solver.pivot_tolerance = - dense_matrix_pivot_tolerance(n, n, max_abs); solver.label = std::string(label); + const Real max_abs = + dense_matrix_max_abs(std::span(matrix.data(), matrix.size())); + solver.pivot_tolerance = dense_matrix_pivot_tolerance(n, n, max_abs); + solver.lu.compute(map_row_major(matrix, n, n)); + + // Partial pivoting leaves the pivots on the diagonal of the packed LU + // factor; a pivot below the scale-aware tolerance marks rank deficiency. Real max_pivot_abs = Real(0); Real min_pivot_abs = std::numeric_limits::infinity(); - for (std::size_t col = 0; col < n; ++col) { - std::size_t pivot_row = col; - Real pivot_abs = std::abs(solver.lu[col * n + col]); - for (std::size_t row = col + 1; row < n; ++row) { - const Real candidate = std::abs(solver.lu[row * n + col]); - if (candidate > pivot_abs) { - pivot_abs = candidate; - pivot_row = row; - } - } - + const auto diagonal = solver.lu.matrixLU().diagonal(); + for (Eigen::Index col = 0; col < diagonal.size(); ++col) { + const Real pivot_magnitude = std::abs(diagonal[col]); DENSE_LINALG_CHECK( - pivot_abs > solver.pivot_tolerance, + pivot_magnitude > solver.pivot_tolerance, solver.label + ": rank-deficient matrix (rank " + std::to_string(col) + " of " + std::to_string(n) + ", pivot below scale-aware tolerance " + std::to_string(solver.pivot_tolerance) + ")"); - - solver.pivots[col] = pivot_row; - if (pivot_row != col) { - for (std::size_t j = 0; j < n; ++j) { - std::swap(solver.lu[col * n + j], solver.lu[pivot_row * n + j]); - } - } - - const Real pivot = solver.lu[col * n + col]; - DENSE_LINALG_CHECK( - std::abs(pivot) > solver.pivot_tolerance, - solver.label + ": zero pivot after row exchange"); - const Real pivot_magnitude = std::abs(pivot); max_pivot_abs = std::max(max_pivot_abs, pivot_magnitude); min_pivot_abs = std::min(min_pivot_abs, pivot_magnitude); - - for (std::size_t row = col + 1; row < n; ++row) { - const Real factor = solver.lu[row * n + col] / pivot; - solver.lu[row * n + col] = factor; - for (std::size_t j = col + 1; j < n; ++j) { - solver.lu[row * n + j] -= factor * solver.lu[col * n + j]; - } - } } solver.diagnostics.rank = n; @@ -293,20 +209,14 @@ DenseInverseResult invert_dense_matrix_with_diagnostics( dense_matrix_diagnostics(std::span(matrix.data(), matrix.size()), n, n, label); -#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN if (std::isfinite(solver.diagnostics.condition_estimate) && std::isfinite(result.diagnostics.condition_estimate) && result.diagnostics.condition_estimate > dense_matrix_condition_fallback_threshold()) { - using RowMajorMatrix = Eigen::Matrix; - using Matrix = Eigen::Matrix; - const Eigen::Map A(matrix.data(), - static_cast(n), - static_cast(n)); - const Matrix dense = A; - Eigen::JacobiSVD svd(dense, - Eigen::ComputeFullU | Eigen::ComputeFullV); - Matrix sigma_inverse = Matrix::Zero(static_cast(n), - static_cast(n)); + const DenseMatrix dense = map_row_major(matrix, n, n); + Eigen::JacobiSVD svd(dense, + Eigen::ComputeFullU | Eigen::ComputeFullV); + DenseMatrix sigma_inverse = DenseMatrix::Zero(static_cast(n), + static_cast(n)); const auto& singular_values = svd.singularValues(); for (Eigen::Index i = 0; i < singular_values.size(); ++i) { DENSE_LINALG_CHECK( @@ -314,20 +224,14 @@ DenseInverseResult invert_dense_matrix_with_diagnostics( std::string(label) + ": high-condition SVD fallback encountered a dropped singular value"); sigma_inverse(i, i) = Real(1) / singular_values[i]; } - const Matrix inverse = svd.matrixV() * sigma_inverse * svd.matrixU().transpose(); - result.inverse.assign(n * n, Real(0)); - for (std::size_t row = 0; row < n; ++row) { - for (std::size_t col = 0; col < n; ++col) { - result.inverse[row * n + col] = - inverse(static_cast(row), static_cast(col)); - } - } + const DenseMatrix inverse = svd.matrixV() * sigma_inverse * svd.matrixU().transpose(); + copy_to_row_major(inverse, result.inverse); result.used_svd_fallback = true; return result; } -#endif - materialize_inverse_from_solver(solver, result.inverse); + const DenseMatrix inverse = solver.lu.inverse(); + copy_to_row_major(inverse, result.inverse); return result; } @@ -357,9 +261,10 @@ std::vector invert_dense_matrix(std::vector matrix, std::size_t n, std::string_view label) { const DenseLUSolver solver = factor_dense_matrix(std::move(matrix), n, label); - std::vector inverse; - materialize_inverse_from_solver(solver, inverse); - return inverse; + const DenseMatrix inverse = solver.lu.inverse(); + std::vector result; + copy_to_row_major(inverse, result); + return result; } std::size_t dense_matrix_rank(std::vector matrix, @@ -367,46 +272,22 @@ std::size_t dense_matrix_rank(std::vector matrix, std::size_t cols) { DENSE_LINALG_CHECK(matrix.size() == rows * cols, "dense_matrix_rank: size mismatch"); - const Real tolerance = - dense_matrix_pivot_tolerance(rows, cols, dense_matrix_max_abs(matrix)); - std::size_t rank = 0; - std::size_t pivot_row = 0; - for (std::size_t col = 0; col < cols && pivot_row < rows; ++col) { - std::size_t best_row = pivot_row; - Real best_abs = std::abs(matrix[pivot_row * cols + col]); - for (std::size_t row = pivot_row + 1; row < rows; ++row) { - const Real candidate = std::abs(matrix[row * cols + col]); - if (candidate > best_abs) { - best_abs = candidate; - best_row = row; - } - } - if (best_abs <= tolerance) { - continue; - } + const DenseMatrix dense = + map_row_major(std::span(matrix.data(), matrix.size()), rows, cols); + Eigen::JacobiSVD svd(dense); - if (best_row != pivot_row) { - for (std::size_t c = col; c < cols; ++c) { - std::swap(matrix[pivot_row * cols + c], matrix[best_row * cols + c]); - } - } + const auto& singular_values = svd.singularValues(); + const Real largest = + (singular_values.size() > 0) ? singular_values[0] : Real(0); + const Real tolerance = + dense_matrix_singular_value_tolerance(rows, cols, largest); - const Real pivot = matrix[pivot_row * cols + col]; - for (std::size_t row = pivot_row + 1; row < rows; ++row) { - const Real factor = matrix[row * cols + col] / pivot; - if (std::abs(factor) <= tolerance) { - matrix[row * cols + col] = Real(0); - continue; - } - matrix[row * cols + col] = Real(0); - for (std::size_t c = col + 1; c < cols; ++c) { - matrix[row * cols + c] -= factor * matrix[pivot_row * cols + c]; - } + std::size_t rank = 0; + for (Eigen::Index i = 0; i < singular_values.size(); ++i) { + if (singular_values[i] > tolerance) { + ++rank; } - - ++rank; - ++pivot_row; } return rank; } @@ -421,17 +302,10 @@ DensePseudoInverseResult rank_revealing_pseudo_inverse( DENSE_LINALG_CHECK(rows > 0 && cols > 0, std::string(label) + ": pseudo-inverse requires a nonempty matrix"); -#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN - using RowMajorMatrix = Eigen::Matrix; - using Matrix = Eigen::Matrix; - const Eigen::Map A(matrix.data(), - static_cast(rows), - static_cast(cols)); - const Matrix dense = A; - Eigen::JacobiSVD svd(dense, Eigen::ComputeFullU | Eigen::ComputeFullV); + const DenseMatrix dense = map_row_major(matrix, rows, cols); + Eigen::JacobiSVD svd(dense, Eigen::ComputeFullU | Eigen::ComputeFullV); DensePseudoInverseResult result; - result.inverse.assign(cols * rows, Real(0)); const auto& singular_values = svd.singularValues(); result.largest_singular_value = @@ -439,8 +313,8 @@ DensePseudoInverseResult rank_revealing_pseudo_inverse( result.tolerance = dense_matrix_singular_value_tolerance(rows, cols, result.largest_singular_value); - Matrix sigma_inverse = Matrix::Zero(static_cast(cols), - static_cast(rows)); + DenseMatrix sigma_inverse = DenseMatrix::Zero(static_cast(cols), + static_cast(rows)); for (Eigen::Index i = 0; i < singular_values.size(); ++i) { const Real sigma = singular_values[i]; if (sigma <= result.tolerance) { @@ -451,22 +325,10 @@ DensePseudoInverseResult rank_revealing_pseudo_inverse( result.smallest_retained_singular_value = sigma; } - const Matrix pseudo_inverse = + const DenseMatrix pseudo_inverse = svd.matrixV() * sigma_inverse * svd.matrixU().transpose(); - for (std::size_t r = 0; r < cols; ++r) { - for (std::size_t c = 0; c < rows; ++c) { - result.inverse[r * rows + c] = - pseudo_inverse(static_cast(r), static_cast(c)); - } - } + copy_to_row_major(pseudo_inverse, result.inverse); return result; -#else - DENSE_LINALG_CHECK( - false, - std::string(label) + - ": rank-revealing pseudo-inverse requires FE_ENABLE_EIGEN"); - return {}; -#endif } } // namespace math diff --git a/Code/Source/solver/FE/Math/DenseLinearAlgebra.h b/Code/Source/solver/FE/Math/DenseLinearAlgebra.h index 6c81755f4..d322ef958 100644 --- a/Code/Source/solver/FE/Math/DenseLinearAlgebra.h +++ b/Code/Source/solver/FE/Math/DenseLinearAlgebra.h @@ -6,6 +6,8 @@ #include "Types.h" +#include + #include #include #include @@ -18,7 +20,7 @@ namespace FE { namespace math { // Dense solve, inverse, rank, and pseudo-inverse support for FE construction -// utilities. Matrices are row-major: matrix[row * cols + col]. +// utilities, backed by Eigen. Matrices are row-major: matrix[row * cols + col]. [[nodiscard]] Real dense_matrix_max_abs(std::span matrix) noexcept; [[nodiscard]] Real dense_matrix_pivot_tolerance(std::size_t rows, @@ -57,9 +59,10 @@ struct DenseInverseResult { [[nodiscard]] Real dense_matrix_condition_error_threshold() noexcept; struct DenseLUSolver { + using DenseMatrix = Eigen::Matrix; + std::size_t n{0}; - std::vector lu; - std::vector pivots; + Eigen::PartialPivLU lu; DenseMatrixDiagnostics diagnostics; Real pivot_tolerance{0}; std::string label; diff --git a/Code/Source/solver/FE/Math/DenseTransformKernels.h b/Code/Source/solver/FE/Math/DenseTransformKernels.h index 50f1002de..2ddb9cefa 100644 --- a/Code/Source/solver/FE/Math/DenseTransformKernels.h +++ b/Code/Source/solver/FE/Math/DenseTransformKernels.h @@ -6,17 +6,21 @@ #include "Types.h" -#include -#include +#include + #include namespace svmp { namespace FE { namespace math { -constexpr std::size_t dense_transform_blocked_min_rows() noexcept { return 32u; } -constexpr std::size_t dense_transform_blocked_min_rhs() noexcept { return 4u; } - +/// \brief Apply a row-major dense matrix to a batch of right-hand sides. +/// +/// Computes output = matrix * input where matrix is rows-by-cols (row-major), +/// input holds cols rows of rhs_count values each (row stride +/// input_row_stride), and output holds rows rows of rhs_count values each +/// (row stride output_row_stride). Strides may exceed rhs_count for padded +/// layouts; padding entries are left untouched. inline void dense_transform_batched_row_major( const Real* SVMP_RESTRICT matrix, std::size_t rows, @@ -30,41 +34,29 @@ inline void dense_transform_batched_row_major( return; } - if (rows < dense_transform_blocked_min_rows() || - rhs_count < dense_transform_blocked_min_rhs()) { - for (std::size_t row = 0; row < rows; ++row) { - const Real* matrix_row = matrix + row * cols; - Real* output_row = output + row * output_row_stride; - for (std::size_t rhs = 0; rhs < rhs_count; ++rhs) { - Real value = Real(0); - for (std::size_t col = 0; col < cols; ++col) { - value += matrix_row[col] * input[col * input_row_stride + rhs]; - } - output_row[rhs] = value; - } - } - return; - } + using RowMajorMatrix = + Eigen::Matrix; + using ConstMap = Eigen::Map; + using ConstStridedMap = + Eigen::Map>; + using StridedMap = + Eigen::Map>; - constexpr std::size_t kRhsBlock = 32u; - for (std::size_t row = 0; row < rows; ++row) { - const Real* matrix_row = matrix + row * cols; - Real* output_row = output + row * output_row_stride; - for (std::size_t rhs_base = 0; rhs_base < rhs_count; rhs_base += kRhsBlock) { - const std::size_t block_size = std::min(kRhsBlock, rhs_count - rhs_base); - std::array accum{}; - for (std::size_t col = 0; col < cols; ++col) { - const Real coeff = matrix_row[col]; - const Real* input_row = input + col * input_row_stride + rhs_base; - for (std::size_t rhs = 0; rhs < block_size; ++rhs) { - accum[rhs] += coeff * input_row[rhs]; - } - } - for (std::size_t rhs = 0; rhs < block_size; ++rhs) { - output_row[rhs_base + rhs] = accum[rhs]; - } - } - } + const ConstMap matrix_map(matrix, + static_cast(rows), + static_cast(cols)); + const ConstStridedMap input_map( + input, + static_cast(cols), + static_cast(rhs_count), + Eigen::OuterStride<>(static_cast(input_row_stride))); + StridedMap output_map( + output, + static_cast(rows), + static_cast(rhs_count), + Eigen::OuterStride<>(static_cast(output_row_stride))); + + output_map.noalias() = matrix_map * input_map; } } // namespace math diff --git a/Code/Source/solver/FE/Math/Matrix.h b/Code/Source/solver/FE/Math/Matrix.h index f7432f38c..ce1d4a612 100644 --- a/Code/Source/solver/FE/Math/Matrix.h +++ b/Code/Source/solver/FE/Math/Matrix.h @@ -6,32 +6,25 @@ /** * @file Matrix.h - * @brief Fixed-size matrices with expression templates and specializations for FE computations + * @brief Fixed-size matrix types for FE computations, backed by Eigen. * - * This header provides optimized fixed-size matrix operations for element-level - * computations. Includes specialized analytical formulas for 2x2 and 3x3 matrices - * (determinant, inverse using Cramer's rule) and Gauss elimination for larger matrices. - * All operations use expression templates to eliminate temporaries. + * The FE library standardizes on Eigen for linear algebra. These aliases give + * element-level code a stable vocabulary type without re-exporting all of + * Eigen. Storage is Eigen's default (column-major); element access through + * operator()(row, col) is unchanged. Note that, unlike the previous in-house + * implementation, Eigen types are NOT zero-initialized by default + * construction; use Matrix::Zero() where a zeroed value is required. */ -#include "MatrixExpr.h" #include "Vector.h" -#include "../Common/Types.h" -#include -#include -#include -#include -#include -#include -#include + +#include + +#include /// \defgroup FE_MatrixMath Matrix /// \ingroup FE_Math -/// \brief Fixed-size matrix types, matrix expressions, and small-matrix operations. -/// -/// \details The Matrix submodule contains row-major fixed-size matrices used -/// by FE kernels, expression-template support for matrix algebra, and direct -/// determinant/inverse implementations for common element-level sizes. +/// \brief Fixed-size matrix type aliases. namespace svmp { namespace FE { @@ -43,1266 +36,9 @@ namespace math { * @tparam T Scalar type (float, double) * @tparam M Number of rows * @tparam N Number of columns - * - * Storage is row-major for cache efficiency. Memory is aligned for SIMD operations. - * Specializations exist for 2x2, 3x3, 4x4 matrices with analytical algorithms. */ template -class Matrix : public MatrixExpr> { - static_assert(std::is_arithmetic_v, "T must be an arithmetic type"); - static_assert(M > 0 && N > 0, "Matrix dimensions must be positive"); - -private: - alignas(kFEFixedObjectAlignmentBytes) T data_[M * N]; // Row-major, SIMD-friendly storage - - // Helper to compute linear index from (i,j) - static constexpr std::size_t index(std::size_t i, std::size_t j) { - return i * N + j; - } - -public: - // Type definitions - using value_type = T; - using size_type = std::size_t; - using reference = T&; - using const_reference = const T&; - using pointer = T*; - using const_pointer = const T*; - - /** - * @brief Default constructor - zero initializes all elements - */ - constexpr Matrix() : data_{} {} - - /** - * @brief Fill constructor - initializes all elements with same value - * @param value Value to fill matrix with - */ - constexpr explicit Matrix(T value) { - for (size_type i = 0; i < M * N; ++i) { - data_[i] = value; - } - } - - /** - * @brief Initializer list constructor for row-wise initialization - * @param init Nested initializer lists {{row0}, {row1}, ...} - */ - constexpr Matrix(std::initializer_list> init) : data_{} { - size_type row = 0; - for (auto row_init : init) { - if (row >= M) break; - size_type col = 0; - for (auto val : row_init) { - if (col >= N) break; - (*this)(row, col) = val; - ++col; - } - ++row; - } - } - - /** - * @brief Constructor from expression template - * @tparam Expr Expression type - * @param expr Matrix expression to evaluate - */ - template - Matrix(const MatrixExpr& expr) { - const auto& e = expr.derived(); - for (size_type i = 0; i < M; ++i) { - for (size_type j = 0; j < N; ++j) { - (*this)(i, j) = e(i, j); - } - } - } - - /** - * @brief Copy constructor - */ - constexpr Matrix(const Matrix&) = default; - - /** - * @brief Move constructor - */ - constexpr Matrix(Matrix&&) noexcept = default; - - /** - * @brief Copy assignment - */ - Matrix& operator=(const Matrix&) = default; - - /** - * @brief Move assignment - */ - Matrix& operator=(Matrix&&) noexcept = default; - - /** - * @brief Assignment from expression template - * @tparam Expr Expression type - * @param expr Matrix expression to evaluate - * @return Reference to this - */ - template - Matrix& operator=(const MatrixExpr& expr) { - const auto& e = expr.derived(); - for (size_type i = 0; i < M; ++i) { - for (size_type j = 0; j < N; ++j) { - (*this)(i, j) = e(i, j); - } - } - return *this; - } - - /** - * @brief Get number of rows (compile-time constant) - * @return Number of rows - */ - static constexpr size_type rows() { return M; } - - /** - * @brief Get number of columns (compile-time constant) - * @return Number of columns - */ - static constexpr size_type cols() { return N; } - - /** - * @brief Get total number of elements - * @return M * N - */ - static constexpr size_type size() { return M * N; } - - /** - * @brief Element access (no bounds checking) - * @param i Row index - * @param j Column index - * @return Reference to element - */ - constexpr T& operator()(size_type i, size_type j) { - return data_[index(i, j)]; - } - - /** - * @brief Element access (no bounds checking) - const version - * @param i Row index - * @param j Column index - * @return Const reference to element - */ - constexpr const T& operator()(size_type i, size_type j) const { - return data_[index(i, j)]; - } - - /** - * @brief Element access with bounds checking - * @param i Row index - * @param j Column index - * @return Reference to element - * @throws std::out_of_range if indices are out of bounds - */ - T& at(size_type i, size_type j) { - if (i >= M || j >= N) { - throw std::out_of_range("Matrix::at: index out of range"); - } - return (*this)(i, j); - } - - /** - * @brief Element access with bounds checking - const version - * @param i Row index - * @param j Column index - * @return Const reference to element - * @throws std::out_of_range if indices are out of bounds - */ - const T& at(size_type i, size_type j) const { - if (i >= M || j >= N) { - throw std::out_of_range("Matrix::at: index out of range"); - } - return (*this)(i, j); - } - - /** - * @brief Get row as vector - * @param i Row index - * @return Vector containing row elements - */ - Vector row(size_type i) const { - Vector result; - for (size_type j = 0; j < N; ++j) { - result[j] = (*this)(i, j); - } - return result; - } - - /** - * @brief Get column as vector - * @param j Column index - * @return Vector containing column elements - */ - Vector column(size_type j) const { - Vector result; - for (size_type i = 0; i < M; ++i) { - result[i] = (*this)(i, j); - } - return result; - } - - /** - * @brief Get column as vector (alias for column) - * @param j Column index - * @return Vector containing column elements - */ - Vector col(size_type j) const { - return column(j); - } - - /** - * @brief Set row from vector - * @param i Row index - * @param v Vector of values - */ - void set_row(size_type i, const Vector& v) { - for (size_type j = 0; j < N; ++j) { - (*this)(i, j) = v[j]; - } - } - - /** - * @brief Set column from vector - * @param j Column index - * @param v Vector of values - */ - void set_column(size_type j, const Vector& v) { - for (size_type i = 0; i < M; ++i) { - (*this)(i, j) = v[i]; - } - } - - /** - * @brief Set column from vector (alias for set_column) - * @param j Column index - * @param v Vector of values - */ - void set_col(size_type j, const Vector& v) { - set_column(j, v); - } - - /** - * @brief Get pointer to underlying data - * @return Pointer to first element - */ - T* data() { return data_; } - const T* data() const { return data_; } - - /** - * @brief Fill matrix with value - * @param value Value to fill with - */ - void fill(T value) { - for (size_type i = 0; i < M * N; ++i) { - data_[i] = value; - } - } - - /** - * @brief Set all elements to zero - */ - void set_zero() { - fill(T{0}); - } - - // Arithmetic operators - - /** - * @brief In-place addition - * @param other Matrix to add - * @return Reference to this - */ - Matrix& operator+=(const Matrix& other) { - for (size_type i = 0; i < M * N; ++i) { - data_[i] += other.data_[i]; - } - return *this; - } - - /** - * @brief In-place subtraction - * @param other Matrix to subtract - * @return Reference to this - */ - Matrix& operator-=(const Matrix& other) { - for (size_type i = 0; i < M * N; ++i) { - data_[i] -= other.data_[i]; - } - return *this; - } - - /** - * @brief In-place scalar multiplication - * @param scalar Scalar to multiply by - * @return Reference to this - */ - Matrix& operator*=(T scalar) { - for (size_type i = 0; i < M * N; ++i) { - data_[i] *= scalar; - } - return *this; - } - - /** - * @brief In-place scalar division - * @param scalar Scalar to divide by - * @return Reference to this - */ - Matrix& operator/=(T scalar) { - const T inv = T(1) / scalar; - return (*this) *= inv; - } - - // Matrix operations - - /** - * @brief Compute transpose - * @return Transposed matrix - */ - Matrix transpose() const { - Matrix result; - for (size_type i = 0; i < M; ++i) { - for (size_type j = 0; j < N; ++j) { - result(j, i) = (*this)(i, j); - } - } - return result; - } - - /** - * @brief Compute trace (sum of diagonal elements) - * @return Trace (only valid for square matrices) - */ - template - std::enable_if_t trace() const { - T result = T(0); - for (size_type i = 0; i < M; ++i) { - result += (*this)(i, i); - } - return result; - } - - /** - * @brief Compute Frobenius norm squared - * @return Sum of squares of all elements - */ - T frobenius_norm_squared() const { - T result = T(0); - for (size_type i = 0; i < M * N; ++i) { - result += data_[i] * data_[i]; - } - return result; - } - - /** - * @brief Compute Frobenius norm - * @return Square root of sum of squares - */ - T frobenius_norm() const { - using std::sqrt; - return sqrt(frobenius_norm_squared()); - } - - /** - * @brief Compute infinity norm (maximum absolute row sum) - * @return Infinity norm - */ - T infinity_norm() const { - T max_row_sum = T(0); - for (size_type i = 0; i < M; ++i) { - T row_sum = T(0); - for (size_type j = 0; j < N; ++j) { - using std::abs; - row_sum += abs((*this)(i, j)); - } - max_row_sum = std::max(max_row_sum, row_sum); - } - return max_row_sum; - } - - /** - * @brief Compute one norm (maximum absolute column sum) - * @return One norm - */ - T one_norm() const { - T max_col_sum = T(0); - for (size_type j = 0; j < N; ++j) { - T col_sum = T(0); - for (size_type i = 0; i < M; ++i) { - using std::abs; - col_sum += abs((*this)(i, j)); - } - max_col_sum = std::max(max_col_sum, col_sum); - } - return max_col_sum; - } - - /** - * @brief Get minimum element - * @return Minimum value - */ - T min() const { - return *std::min_element(data_, data_ + M * N); - } - - /** - * @brief Get maximum element - * @return Maximum value - */ - T max() const { - return *std::max_element(data_, data_ + M * N); - } - - /** - * @brief Get sum of all elements - * @return Sum of elements - */ - T sum() const { - T result = T(0); - for (size_type i = 0; i < M * N; ++i) { - result += data_[i]; - } - return result; - } - - // Static factory functions - - /** - * @brief Create zero matrix - * @return Matrix with all elements zero - */ - static constexpr Matrix zeros() { - return Matrix(); - } - - /** - * @brief Create matrix with all elements one - * @return Matrix with all elements one - */ - static constexpr Matrix ones() { - return Matrix(T(1)); - } - - /** - * @brief Create identity matrix (only for square matrices) - * @return Identity matrix - */ - template - static std::enable_if_t identity() { - Matrix result; - for (size_type i = 0; i < M; ++i) { - result(i, i) = T(1); - } - return result; - } - - /** - * @brief Create diagonal matrix from vector (only for square matrices) - * @param diag Vector of diagonal elements - * @return Diagonal matrix - */ - template - static std::enable_if_t diagonal(const Vector& diag) { - Matrix result; - for (size_type i = 0; i < M; ++i) { - result(i, i) = diag[i]; - } - return result; - } - - /** - * @brief Create zero matrix (static factory) - * @return Zero matrix - */ - static Matrix zero() { - return zeros(); - } - - // Property checking methods - - /** - * @brief Check if matrix is symmetric (only for square matrices) - * @param tol Tolerance for comparison - * @return true if symmetric - */ - template - std::enable_if_t is_symmetric(T tol = tolerance) const { - for (size_type i = 0; i < M; ++i) { - for (size_type j = i + 1; j < N; ++j) { - using std::abs; - if (abs((*this)(i, j) - (*this)(j, i)) > tol) { - return false; - } - } - } - return true; - } - - /** - * @brief Check if matrix is skew-symmetric (only for square matrices) - * @param tol Tolerance for comparison - * @return true if skew-symmetric - */ - template - std::enable_if_t is_skew_symmetric(T tol = tolerance) const { - for (size_type i = 0; i < M; ++i) { - // Diagonal must be zero - using std::abs; - if (abs((*this)(i, i)) > tol) { - return false; - } - for (size_type j = i + 1; j < N; ++j) { - if (abs((*this)(i, j) + (*this)(j, i)) > tol) { - return false; - } - } - } - return true; - } - - /** - * @brief Check if matrix is diagonal (only for square matrices) - * @param tol Tolerance for comparison - * @return true if diagonal - */ - template - std::enable_if_t is_diagonal(T tol = tolerance) const { - for (size_type i = 0; i < M; ++i) { - for (size_type j = 0; j < N; ++j) { - if (i != j) { - using std::abs; - if (abs((*this)(i, j)) > tol) { - return false; - } - } - } - } - return true; - } - - // Determinant (general template, specialized for 2x2, 3x3) - /** - * @brief Compute determinant (only for square matrices) - * @return Determinant value - */ - template - std::enable_if_t determinant() const { - // For 4x4 and larger, use LU decomposition - return determinant_lu(); - } - - // Inverse (general template, specialized for 2x2, 3x3) - /** - * @brief Compute matrix inverse (only for square matrices) - * @return Inverse matrix - */ - template - std::enable_if_t inverse() const { - // For 4x4 and larger, use Gauss-Jordan elimination - return inverse_gauss_jordan(); - } - -private: - // LU decomposition for determinant (4x4 and larger) - T determinant_lu() const { - Matrix lu = *this; - T det = T(1); - - for (size_type k = 0; k < M - 1; ++k) { - // Find pivot - size_type pivot = k; - T max_val = std::abs(lu(k, k)); - for (size_type i = k + 1; i < M; ++i) { - T val = std::abs(lu(i, k)); - if (val > max_val) { - max_val = val; - pivot = i; - } - } - - // Swap rows if needed - if (pivot != k) { - for (size_type j = 0; j < M; ++j) { - std::swap(lu(k, j), lu(pivot, j)); - } - det = -det; // Row swap changes sign - } - - // Check for singularity - if (approx_zero(lu(k, k))) { - return T(0); - } - - // Eliminate column - for (size_type i = k + 1; i < M; ++i) { - T factor = lu(i, k) / lu(k, k); - for (size_type j = k + 1; j < M; ++j) { - lu(i, j) -= factor * lu(k, j); - } - } - - det *= lu(k, k); - } - det *= lu(M - 1, M - 1); - - return det; - } - - // Gauss-Jordan elimination for inverse (4x4 and larger) - Matrix inverse_gauss_jordan() const { - Matrix aug; // Augmented matrix [A | I] - Matrix result = Matrix::identity(); - - // Copy this matrix to augmented matrix - for (size_type i = 0; i < M; ++i) { - for (size_type j = 0; j < M; ++j) { - aug(i, j) = (*this)(i, j); - } - } - - // Forward elimination with partial pivoting - for (size_type k = 0; k < M; ++k) { - // Find pivot - size_type pivot = k; - T max_val = std::abs(aug(k, k)); - for (size_type i = k + 1; i < M; ++i) { - T val = std::abs(aug(i, k)); - if (val > max_val) { - max_val = val; - pivot = i; - } - } - - // Swap rows - if (pivot != k) { - for (size_type j = 0; j < M; ++j) { - std::swap(aug(k, j), aug(pivot, j)); - std::swap(result(k, j), result(pivot, j)); - } - } - - // Check for singularity - if (approx_zero(aug(k, k))) { - throw std::runtime_error("Matrix is singular"); - } - - // Scale pivot row - T pivot_val = aug(k, k); - for (size_type j = 0; j < M; ++j) { - aug(k, j) /= pivot_val; - result(k, j) /= pivot_val; - } - - // Eliminate column - for (size_type i = 0; i < M; ++i) { - if (i != k) { - T factor = aug(i, k); - for (size_type j = 0; j < M; ++j) { - aug(i, j) -= factor * aug(k, j); - result(i, j) -= factor * result(k, j); - } - } - } - } - - return result; - } - - // Iterators -public: - T* begin() { return data_; } - T* end() { return data_ + M * N; } - const T* begin() const { return data_; } - const T* end() const { return data_ + M * N; } - const T* cbegin() const { return data_; } - const T* cend() const { return data_ + M * N; } -}; - -// Specialization for 2x2 determinant (analytical formula) -template -inline T determinant_2x2(const Matrix& m) { - return m(0, 0) * m(1, 1) - m(0, 1) * m(1, 0); -} - -// Specialization for 2x2 inverse (Cramer's rule) -template -inline Matrix inverse_2x2(const Matrix& m) { - T det = determinant_2x2(m); - if (approx_zero(det)) { - throw std::runtime_error("Matrix is singular"); - } - - T inv_det = T(1) / det; - return Matrix{ - { m(1, 1) * inv_det, -m(0, 1) * inv_det}, - {-m(1, 0) * inv_det, m(0, 0) * inv_det} - }; -} - -// Specialization for 3x3 determinant (Sarrus rule) -template -inline T determinant_3x3(const Matrix& m) { - return m(0, 0) * (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)) - - m(0, 1) * (m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0)) - + m(0, 2) * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0)); -} - -// Specialization for 3x3 inverse (Cramer's rule / adjugate method) -template -inline Matrix inverse_3x3(const Matrix& m) { - T det = determinant_3x3(m); - if (approx_zero(det)) { - throw std::runtime_error("Matrix is singular"); - } - - T inv_det = T(1) / det; - - // Compute adjugate matrix (transpose of cofactor matrix) - Matrix adj; - adj(0, 0) = (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)); - adj(0, 1) = -(m(0, 1) * m(2, 2) - m(0, 2) * m(2, 1)); - adj(0, 2) = (m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1)); - - adj(1, 0) = -(m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0)); - adj(1, 1) = (m(0, 0) * m(2, 2) - m(0, 2) * m(2, 0)); - adj(1, 2) = -(m(0, 0) * m(1, 2) - m(0, 2) * m(1, 0)); - - adj(2, 0) = (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0)); - adj(2, 1) = -(m(0, 0) * m(2, 1) - m(0, 1) * m(2, 0)); - adj(2, 2) = (m(0, 0) * m(1, 1) - m(0, 1) * m(1, 0)); - - return adj * inv_det; -} - -/** - * @brief Specialized fixed-size 2-by-2 matrix for element-level computations. - * @ingroup FE_MatrixMath - * @tparam T Scalar type. - * - * This specialization preserves the Matrix API while using direct formulas for - * 2-by-2 determinant and inverse operations. - */ -template -class Matrix : public MatrixExpr> { - static constexpr std::size_t M = 2; - static constexpr std::size_t N = 2; - -private: - alignas(kFEFixedObjectAlignmentBytes) T data_[4]; - - static constexpr std::size_t index(std::size_t i, std::size_t j) { - return i * 2 + j; - } - -public: - using value_type = T; - using size_type = std::size_t; - - // Include all the same constructors and methods as the general template - constexpr Matrix() : data_{} {} - constexpr explicit Matrix(T value) { - for (size_type i = 0; i < 4; ++i) { - data_[i] = value; - } - } - constexpr Matrix(std::initializer_list> init) : data_{} { - size_type row = 0; - for (auto row_init : init) { - if (row >= 2) break; - size_type col = 0; - for (auto val : row_init) { - if (col >= 2) break; - (*this)(row, col) = val; - ++col; - } - ++row; - } - } - - template - Matrix(const MatrixExpr& expr) { - const auto& e = expr.derived(); - for (size_type i = 0; i < 2; ++i) { - for (size_type j = 0; j < 2; ++j) { - (*this)(i, j) = e(i, j); - } - } - } - - constexpr Matrix(const Matrix&) = default; - constexpr Matrix(Matrix&&) noexcept = default; - Matrix& operator=(const Matrix&) = default; - Matrix& operator=(Matrix&&) noexcept = default; - - template - Matrix& operator=(const MatrixExpr& expr) { - const auto& e = expr.derived(); - for (size_type i = 0; i < 2; ++i) { - for (size_type j = 0; j < 2; ++j) { - (*this)(i, j) = e(i, j); - } - } - return *this; - } - - static constexpr size_type rows() { return 2; } - static constexpr size_type cols() { return 2; } - static constexpr size_type size() { return 4; } - - constexpr T& operator()(size_type i, size_type j) { - return data_[index(i, j)]; - } - constexpr const T& operator()(size_type i, size_type j) const { - return data_[index(i, j)]; - } - - T* data() { return data_; } - const T* data() const { return data_; } - - void fill(T value) { - for (size_type i = 0; i < 4; ++i) { - data_[i] = value; - } - } - - void set_zero() { fill(T{0}); } - - void set_row(size_type i, const Vector& v) { - for (size_type j = 0; j < 2; ++j) { - (*this)(i, j) = v[j]; - } - } - - void set_column(size_type j, const Vector& v) { - for (size_type i = 0; i < 2; ++i) { - (*this)(i, j) = v[i]; - } - } - - void set_col(size_type j, const Vector& v) { - set_column(j, v); - } - - Vector col(size_type j) const { - return column(j); - } - - static Matrix zero() { - return zeros(); - } - - static Matrix diagonal(const Vector& diag) { - Matrix result; - result(0, 0) = diag[0]; - result(1, 1) = diag[1]; - return result; - } - - bool is_symmetric(T tol = tolerance) const { - using std::abs; - return abs((*this)(0, 1) - (*this)(1, 0)) <= tol; - } - - bool is_skew_symmetric(T tol = tolerance) const { - using std::abs; - // Diagonal must be zero - if (abs((*this)(0, 0)) > tol || abs((*this)(1, 1)) > tol) { - return false; - } - // Off-diagonal must be opposite - return abs((*this)(0, 1) + (*this)(1, 0)) <= tol; - } - - bool is_diagonal(T tol = tolerance) const { - using std::abs; - return abs((*this)(0, 1)) <= tol && abs((*this)(1, 0)) <= tol; - } - - T frobenius_norm() const { - using std::sqrt; - T sum = T(0); - for (size_type i = 0; i < 4; ++i) { - sum += data_[i] * data_[i]; - } - return sqrt(sum); - } - - T infinity_norm() const { - using std::abs; - T row0 = abs((*this)(0, 0)) + abs((*this)(0, 1)); - T row1 = abs((*this)(1, 0)) + abs((*this)(1, 1)); - return std::max(row0, row1); - } - - T one_norm() const { - using std::abs; - T col0 = abs((*this)(0, 0)) + abs((*this)(1, 0)); - T col1 = abs((*this)(0, 1)) + abs((*this)(1, 1)); - return std::max(col0, col1); - } - - Matrix& operator+=(const Matrix& other) { - for (size_type i = 0; i < 4; ++i) { - data_[i] += other.data_[i]; - } - return *this; - } - - Matrix& operator-=(const Matrix& other) { - for (size_type i = 0; i < 4; ++i) { - data_[i] -= other.data_[i]; - } - return *this; - } - - Matrix& operator*=(T scalar) { - for (size_type i = 0; i < 4; ++i) { - data_[i] *= scalar; - } - return *this; - } - - Matrix& operator/=(T scalar) { - const T inv = T(1) / scalar; - return (*this) *= inv; - } - - Matrix transpose() const { - return Matrix{ - {(*this)(0, 0), (*this)(1, 0)}, - {(*this)(0, 1), (*this)(1, 1)} - }; - } - - T trace() const { - return (*this)(0, 0) + (*this)(1, 1); - } - - static Matrix identity() { - Matrix result; - result(0, 0) = T(1); - result(1, 1) = T(1); - return result; - } - - static Matrix zeros() { - return Matrix(); - } - - static Matrix ones() { - return Matrix(T(1)); - } - - // Specialized 2x2 determinant - T determinant() const { - return determinant_2x2(*this); - } - - // Specialized 2x2 inverse - Matrix inverse() const { - return inverse_2x2(*this); - } - - Vector row(size_type i) const { - return Vector{(*this)(i, 0), (*this)(i, 1)}; - } - - Vector column(size_type j) const { - return Vector{(*this)(0, j), (*this)(1, j)}; - } - - T* begin() { return data_; } - T* end() { return data_ + 4; } - const T* begin() const { return data_; } - const T* end() const { return data_ + 4; } -}; - -/** - * @brief Specialized fixed-size 3-by-3 matrix for element-level computations. - * @ingroup FE_MatrixMath - * @tparam T Scalar type. - * - * This specialization preserves the Matrix API while using direct formulas for - * 3-by-3 determinant and inverse operations. - */ -template -class Matrix : public MatrixExpr> { - static constexpr std::size_t M = 3; - static constexpr std::size_t N = 3; - -private: - alignas(kFEFixedObjectAlignmentBytes) T data_[9]; - - static constexpr std::size_t index(std::size_t i, std::size_t j) { - return i * 3 + j; - } - -public: - using value_type = T; - using size_type = std::size_t; - - constexpr Matrix() : data_{} {} - constexpr explicit Matrix(T value) { - for (size_type i = 0; i < 9; ++i) { - data_[i] = value; - } - } - constexpr Matrix(std::initializer_list> init) : data_{} { - size_type row = 0; - for (auto row_init : init) { - if (row >= 3) break; - size_type col = 0; - for (auto val : row_init) { - if (col >= 3) break; - (*this)(row, col) = val; - ++col; - } - ++row; - } - } - - template - Matrix(const MatrixExpr& expr) { - const auto& e = expr.derived(); - for (size_type i = 0; i < 3; ++i) { - for (size_type j = 0; j < 3; ++j) { - (*this)(i, j) = e(i, j); - } - } - } - - constexpr Matrix(const Matrix&) = default; - constexpr Matrix(Matrix&&) noexcept = default; - Matrix& operator=(const Matrix&) = default; - Matrix& operator=(Matrix&&) noexcept = default; - - template - Matrix& operator=(const MatrixExpr& expr) { - const auto& e = expr.derived(); - for (size_type i = 0; i < 3; ++i) { - for (size_type j = 0; j < 3; ++j) { - (*this)(i, j) = e(i, j); - } - } - return *this; - } - - static constexpr size_type rows() { return 3; } - static constexpr size_type cols() { return 3; } - static constexpr size_type size() { return 9; } - - constexpr T& operator()(size_type i, size_type j) { - return data_[index(i, j)]; - } - constexpr const T& operator()(size_type i, size_type j) const { - return data_[index(i, j)]; - } - - T* data() { return data_; } - const T* data() const { return data_; } - - void fill(T value) { - for (size_type i = 0; i < 9; ++i) { - data_[i] = value; - } - } - - void set_zero() { fill(T{0}); } - - void set_row(size_type i, const Vector& v) { - for (size_type j = 0; j < 3; ++j) { - (*this)(i, j) = v[j]; - } - } - - void set_column(size_type j, const Vector& v) { - for (size_type i = 0; i < 3; ++i) { - (*this)(i, j) = v[i]; - } - } - - void set_col(size_type j, const Vector& v) { - set_column(j, v); - } - - Vector col(size_type j) const { - return column(j); - } - - static Matrix zero() { - return zeros(); - } - - static Matrix diagonal(const Vector& diag) { - Matrix result; - result(0, 0) = diag[0]; - result(1, 1) = diag[1]; - result(2, 2) = diag[2]; - return result; - } - - bool is_symmetric(T tol = tolerance) const { - using std::abs; - for (size_type i = 0; i < 3; ++i) { - for (size_type j = i + 1; j < 3; ++j) { - if (abs((*this)(i, j) - (*this)(j, i)) > tol) { - return false; - } - } - } - return true; - } - - bool is_skew_symmetric(T tol = tolerance) const { - using std::abs; - // Diagonal must be zero - for (size_type i = 0; i < 3; ++i) { - if (abs((*this)(i, i)) > tol) { - return false; - } - } - // Off-diagonal must be opposite - for (size_type i = 0; i < 3; ++i) { - for (size_type j = i + 1; j < 3; ++j) { - if (abs((*this)(i, j) + (*this)(j, i)) > tol) { - return false; - } - } - } - return true; - } - - bool is_diagonal(T tol = tolerance) const { - using std::abs; - for (size_type i = 0; i < 3; ++i) { - for (size_type j = 0; j < 3; ++j) { - if (i != j && abs((*this)(i, j)) > tol) { - return false; - } - } - } - return true; - } - - T frobenius_norm() const { - using std::sqrt; - T sum = T(0); - for (size_type i = 0; i < 9; ++i) { - sum += data_[i] * data_[i]; - } - return sqrt(sum); - } - - T infinity_norm() const { - using std::abs; - T max_row_sum = T(0); - for (size_type i = 0; i < 3; ++i) { - T row_sum = T(0); - for (size_type j = 0; j < 3; ++j) { - row_sum += abs((*this)(i, j)); - } - max_row_sum = std::max(max_row_sum, row_sum); - } - return max_row_sum; - } - - T one_norm() const { - using std::abs; - T max_col_sum = T(0); - for (size_type j = 0; j < 3; ++j) { - T col_sum = T(0); - for (size_type i = 0; i < 3; ++i) { - col_sum += abs((*this)(i, j)); - } - max_col_sum = std::max(max_col_sum, col_sum); - } - return max_col_sum; - } - - Matrix& operator+=(const Matrix& other) { - for (size_type i = 0; i < 9; ++i) { - data_[i] += other.data_[i]; - } - return *this; - } - - Matrix& operator-=(const Matrix& other) { - for (size_type i = 0; i < 9; ++i) { - data_[i] -= other.data_[i]; - } - return *this; - } - - Matrix& operator*=(T scalar) { - for (size_type i = 0; i < 9; ++i) { - data_[i] *= scalar; - } - return *this; - } - - Matrix& operator/=(T scalar) { - const T inv = T(1) / scalar; - return (*this) *= inv; - } - - Matrix transpose() const { - Matrix result; - for (size_type i = 0; i < 3; ++i) { - for (size_type j = 0; j < 3; ++j) { - result(j, i) = (*this)(i, j); - } - } - return result; - } - - T trace() const { - return (*this)(0, 0) + (*this)(1, 1) + (*this)(2, 2); - } - - static Matrix identity() { - Matrix result; - result(0, 0) = T(1); - result(1, 1) = T(1); - result(2, 2) = T(1); - return result; - } - - static Matrix zeros() { - return Matrix(); - } - - static Matrix ones() { - return Matrix(T(1)); - } - - // Specialized 3x3 determinant - T determinant() const { - return determinant_3x3(*this); - } - - // Specialized 3x3 inverse - Matrix inverse() const { - return inverse_3x3(*this); - } - - Vector row(size_type i) const { - return Vector{(*this)(i, 0), (*this)(i, 1), (*this)(i, 2)}; - } - - Vector column(size_type j) const { - return Vector{(*this)(0, j), (*this)(1, j), (*this)(2, j)}; - } - - T* begin() { return data_; } - T* end() { return data_ + 9; } - const T* begin() const { return data_; } - const T* end() const { return data_ + 9; } -}; +using Matrix = Eigen::Matrix(M), static_cast(N)>; // Type aliases for common matrix types template using Matrix2x2 = Matrix; @@ -1323,188 +59,6 @@ using Matrix2x2f = Matrix2x2; using Matrix3x3f = Matrix3x3; using Matrix4x4f = Matrix4x4; -// Matrix-vector multiplication -template -inline Vector operator*(const Matrix& A, const Vector& x) { - Vector result; - for (std::size_t i = 0; i < M; ++i) { - T sum = T(0); - for (std::size_t j = 0; j < N; ++j) { - sum += A(i, j) * x[j]; - } - result[i] = sum; - } - return result; -} - -// Vector-matrix multiplication (row vector * matrix) -template -inline Vector operator*(const Vector& x, const Matrix& A) { - Vector result; - for (std::size_t j = 0; j < N; ++j) { - T sum = T(0); - for (std::size_t i = 0; i < M; ++i) { - sum += x[i] * A(i, j); - } - result[j] = sum; - } - return result; -} - -// Matrix-matrix multiplication -template -inline Matrix operator*(const Matrix& A, const Matrix& B) { - Matrix result; - for (std::size_t i = 0; i < M; ++i) { - for (std::size_t k = 0; k < N; ++k) { - T a_ik = A(i, k); - for (std::size_t j = 0; j < P; ++j) { - result(i, j) += a_ik * B(k, j); - } - } - } - return result; -} - -// Free functions - -/** - * @brief Compute matrix transpose - */ -template -inline Matrix transpose(const Matrix& m) { - return m.transpose(); -} - -/** - * @brief Compute matrix trace - */ -template -inline T trace(const Matrix& m) { - return m.trace(); -} - -/** - * @brief Compute matrix determinant - */ -template -inline T determinant(const Matrix& m) { - return m.determinant(); -} - -/** - * @brief Compute matrix inverse - */ -template -inline Matrix inverse(const Matrix& m) { - return m.inverse(); -} - -/** - * @brief Compute Frobenius norm - */ -template -inline T frobenius_norm(const Matrix& m) { - return m.frobenius_norm(); -} - -/** - * @brief Component-wise absolute value - */ -template -inline Matrix abs(const Matrix& m) { - Matrix result; - for (std::size_t i = 0; i < M; ++i) { - for (std::size_t j = 0; j < N; ++j) { - using std::abs; - result(i, j) = abs(m(i, j)); - } - } - return result; -} - -/** - * @brief Component-wise minimum - */ -template -inline Matrix min(const Matrix& a, const Matrix& b) { - Matrix result; - for (std::size_t i = 0; i < M; ++i) { - for (std::size_t j = 0; j < N; ++j) { - result(i, j) = std::min(a(i, j), b(i, j)); - } - } - return result; -} - -/** - * @brief Component-wise maximum - */ -template -inline Matrix max(const Matrix& a, const Matrix& b) { - Matrix result; - for (std::size_t i = 0; i < M; ++i) { - for (std::size_t j = 0; j < N; ++j) { - result(i, j) = std::max(a(i, j), b(i, j)); - } - } - return result; -} - -/** - * @brief Outer product of two vectors - */ -template -inline Matrix outer_product(const Vector& u, const Vector& v) { - Matrix result; - for (std::size_t i = 0; i < M; ++i) { - for (std::size_t j = 0; j < N; ++j) { - result(i, j) = u[i] * v[j]; - } - } - return result; -} - -/** - * @brief Check if two matrices are approximately equal - */ -template -inline bool approx_equal(const Matrix& a, const Matrix& b, T tol = tolerance) { - for (std::size_t i = 0; i < M; ++i) { - for (std::size_t j = 0; j < N; ++j) { - if (!approx_equal(a(i, j), b(i, j), tol)) { - return false; - } - } - } - return true; -} - -/** - * @brief Stream output operator for matrices - * @tparam T Scalar type - * @tparam M Number of rows - * @tparam N Number of columns - * @param os Output stream - * @param m Matrix to output - * @return Reference to output stream - */ -template -inline std::ostream& operator<<(std::ostream& os, const Matrix& m) { - os << "["; - for (std::size_t i = 0; i < M; ++i) { - if (i > 0) os << "\n "; - os << "["; - for (std::size_t j = 0; j < N; ++j) { - if (j > 0) os << ", "; - os << m(i, j); - } - os << "]"; - } - os << "]"; - return os; -} - } // namespace math } // namespace FE } // namespace svmp diff --git a/Code/Source/solver/FE/Math/MatrixExpr.h b/Code/Source/solver/FE/Math/MatrixExpr.h deleted file mode 100644 index 288bbc5ca..000000000 --- a/Code/Source/solver/FE/Math/MatrixExpr.h +++ /dev/null @@ -1,630 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. -// SPDX-License-Identifier: BSD-3-Clause - -#ifndef SVMP_FE_MATH_MATRIX_EXPR_H -#define SVMP_FE_MATH_MATRIX_EXPR_H - -/** - * @file MatrixExpr.h - * @brief Expression template infrastructure for lazy evaluation of matrix operations - * - * This header provides expression templates that enable compound matrix operations - * without creating temporary objects. Operations are evaluated lazily at the point - * of assignment, eliminating intermediate allocations and improving performance. - */ - -#include -#include -#include -#include - -#include "VectorExpr.h" - -namespace svmp { -namespace FE { -namespace math { - -/** - * @brief Base class for all matrix expressions using CRTP - * @tparam Derived The derived expression type - * - * This uses the Curiously Recurring Template Pattern (CRTP) to provide - * static polymorphism for expression templates. - */ -template -class MatrixExpr { -public: - /** - * @brief Get the derived expression - * @return Reference to the derived type - */ - const Derived& derived() const { - return static_cast(*this); - } - - /** - * @brief Get the derived expression (non-const) - * @return Reference to the derived type - */ - Derived& derived() { - return static_cast(*this); - } - - /** - * @brief Access element by row and column indices - * @param i Row index - * @param j Column index - * @return Value at (i,j) - */ - auto operator()(std::size_t i, std::size_t j) const { - return derived()(i, j); - } - - /** - * @brief Get number of rows - * @return Number of rows - */ - std::size_t rows() const { - return derived().rows(); - } - - /** - * @brief Get number of columns - * @return Number of columns - */ - std::size_t cols() const { - return derived().cols(); - } -}; - -/** - * @brief Binary expression for element-wise operations between two matrix expressions - * @tparam LHS Left-hand side expression type - * @tparam RHS Right-hand side expression type - * @tparam Op Binary operation functor - */ -template -class MatrixBinaryExpr : public MatrixExpr> { -private: - LHS lhs_; - RHS rhs_; - Op op_; - -public: - /** - * @brief Construct binary expression - * @param lhs Left operand - * @param rhs Right operand - * @param op Operation to apply - */ - constexpr MatrixBinaryExpr(const LHS& lhs, const RHS& rhs, Op op = Op{}) - : lhs_(lhs), rhs_(rhs), op_(op) {} - - /** - * @brief Access element at (i,j) - * @param i Row index - * @param j Column index - * @return Result of operation on elements at (i,j) - */ - constexpr auto operator()(std::size_t i, std::size_t j) const { - return op_(lhs_(i, j), rhs_(i, j)); - } - - /** - * @brief Get number of rows - * @return Number of rows - */ - constexpr std::size_t rows() const { - return lhs_.rows(); - } - - /** - * @brief Get number of columns - * @return Number of columns - */ - constexpr std::size_t cols() const { - return lhs_.cols(); - } -}; - -/** - * @brief Unary expression for element-wise operations on a single matrix expression - * @tparam Expr Expression type - * @tparam Op Unary operation functor - */ -template -class MatrixUnaryExpr : public MatrixExpr> { -private: - Expr expr_; - Op op_; - -public: - /** - * @brief Construct unary expression - * @param expr Operand expression - * @param op Operation to apply - */ - constexpr MatrixUnaryExpr(const Expr& expr, Op op = Op{}) - : expr_(expr), op_(op) {} - - /** - * @brief Access element at (i,j) - * @param i Row index - * @param j Column index - * @return Result of operation on element at (i,j) - */ - constexpr auto operator()(std::size_t i, std::size_t j) const { - return op_(expr_(i, j)); - } - - /** - * @brief Get number of rows - * @return Number of rows - */ - constexpr std::size_t rows() const { - return expr_.rows(); - } - - /** - * @brief Get number of columns - * @return Number of columns - */ - constexpr std::size_t cols() const { - return expr_.cols(); - } -}; - -/** - * @brief Scalar multiplication expression - * @tparam Expr Matrix expression type - * @tparam Scalar Scalar type - */ -template -class MatrixScalarExpr : public MatrixExpr> { -private: - Expr expr_; - Scalar scalar_; - -public: - /** - * @brief Construct scalar multiplication expression - * @param expr Matrix expression - * @param scalar Scalar value - */ - constexpr MatrixScalarExpr(const Expr& expr, Scalar scalar) - : expr_(expr), scalar_(scalar) {} - - /** - * @brief Access element at (i,j) - * @param i Row index - * @param j Column index - * @return Element multiplied by scalar - */ - constexpr auto operator()(std::size_t i, std::size_t j) const { - return expr_(i, j) * scalar_; - } - - /** - * @brief Get number of rows - * @return Number of rows - */ - constexpr std::size_t rows() const { - return expr_.rows(); - } - - /** - * @brief Get number of columns - * @return Number of columns - */ - constexpr std::size_t cols() const { - return expr_.cols(); - } -}; - -/** - * @brief Scalar division expression - * @tparam Expr Matrix expression type - * @tparam Scalar Scalar type - */ -template -class MatrixScalarDivExpr : public MatrixExpr> { -private: - Expr expr_; - Scalar scalar_; - -public: - /** - * @brief Construct scalar division expression - * @param expr Matrix expression - * @param scalar Scalar divisor - */ - constexpr MatrixScalarDivExpr(const Expr& expr, Scalar scalar) - : expr_(expr), scalar_(scalar) {} - - /** - * @brief Access element at (i,j) - * @param i Row index - * @param j Column index - * @return Element divided by scalar - */ - constexpr auto operator()(std::size_t i, std::size_t j) const { - return expr_(i, j) / scalar_; - } - - /** - * @brief Get number of rows - * @return Number of rows - */ - constexpr std::size_t rows() const { - return expr_.rows(); - } - - /** - * @brief Get number of columns - * @return Number of columns - */ - constexpr std::size_t cols() const { - return expr_.cols(); - } -}; - -/** - * @brief Matrix multiplication expression (lazy evaluation) - * @tparam LHS Left matrix expression type - * @tparam RHS Right matrix expression type - * - * Computes matrix multiplication A*B lazily - */ -template -class MatrixMulExpr : public MatrixExpr> { -private: - LHS lhs_; - RHS rhs_; - -public: - /** - * @brief Construct matrix multiplication expression - * @param lhs Left matrix - * @param rhs Right matrix - */ - constexpr MatrixMulExpr(const LHS& lhs, const RHS& rhs) - : lhs_(lhs), rhs_(rhs) {} - - /** - * @brief Compute element at (i,j) - * @param i Row index - * @param j Column index - * @return Dot product of row i of lhs and column j of rhs - */ - constexpr auto operator()(std::size_t i, std::size_t j) const { - using result_type = decltype(lhs_(0, 0) * rhs_(0, 0)); - result_type sum = result_type{0}; - const auto n = lhs_.cols(); - for (std::size_t k = 0; k < n; ++k) { - sum += lhs_(i, k) * rhs_(k, j); - } - return sum; - } - - /** - * @brief Get number of rows (from left matrix) - * @return Number of rows - */ - constexpr std::size_t rows() const { - return lhs_.rows(); - } - - /** - * @brief Get number of columns (from right matrix) - * @return Number of columns - */ - constexpr std::size_t cols() const { - return rhs_.cols(); - } -}; - -/** - * @brief Transpose expression (lazy evaluation) - * @tparam Expr Matrix expression type - */ -template -class TransposeExpr : public MatrixExpr> { -private: - Expr expr_; - -public: - /** - * @brief Construct transpose expression - * @param expr Matrix expression to transpose - */ - constexpr explicit TransposeExpr(const Expr& expr) - : expr_(expr) {} - - /** - * @brief Access transposed element - * @param i Row index (becomes column in original) - * @param j Column index (becomes row in original) - * @return Element at (j,i) of original matrix - */ - constexpr auto operator()(std::size_t i, std::size_t j) const { - return expr_(j, i); - } - - /** - * @brief Get number of rows (columns of original) - * @return Number of rows - */ - constexpr std::size_t rows() const { - return expr_.cols(); - } - - /** - * @brief Get number of columns (rows of original) - * @return Number of columns - */ - constexpr std::size_t cols() const { - return expr_.rows(); - } -}; - -/** - * @brief Diagonal matrix expression (creates diagonal matrix from vector) - * @tparam VecExpr Vector expression type - */ -template -class DiagonalExpr : public MatrixExpr> { -private: - VecExpr vec_; - std::size_t n_; - -public: - /** - * @brief Construct diagonal matrix from vector - * @param vec Vector of diagonal elements - * @param n Matrix dimension (default: vector size) - */ - constexpr explicit DiagonalExpr(const VecExpr& vec, std::size_t n = 0) - : vec_(vec), n_(n > 0 ? n : vec.size()) {} - - /** - * @brief Access element - * @param i Row index - * @param j Column index - * @return Diagonal element if i==j, zero otherwise - */ - constexpr auto operator()(std::size_t i, std::size_t j) const { - using result_type = decltype(vec_[0]); - return (i == j && i < vec_.size()) ? vec_[i] : result_type{0}; - } - - /** - * @brief Get number of rows - * @return Number of rows - */ - constexpr std::size_t rows() const { - return n_; - } - - /** - * @brief Get number of columns - * @return Number of columns - */ - constexpr std::size_t cols() const { - return n_; - } -}; - -/** - * @brief Addition operator for matrix expressions - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto operator+(const MatrixExpr& lhs, const MatrixExpr& rhs) { - return MatrixBinaryExpr( - lhs.derived(), rhs.derived(), detail::ops::Add{} - ); -} - -/** - * @brief Subtraction operator for matrix expressions - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto operator-(const MatrixExpr& lhs, const MatrixExpr& rhs) { - return MatrixBinaryExpr( - lhs.derived(), rhs.derived(), detail::ops::Sub{} - ); -} - -/** - * @brief Matrix multiplication operator - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto operator*(const MatrixExpr& lhs, const MatrixExpr& rhs) { - return MatrixMulExpr(lhs.derived(), rhs.derived()); -} - -/** - * @brief Element-wise multiplication (Hadamard product) - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto hadamard(const MatrixExpr& lhs, const MatrixExpr& rhs) { - return MatrixBinaryExpr( - lhs.derived(), rhs.derived(), detail::ops::Mul{} - ); -} - -/** - * @brief Element-wise division - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto hadamard_div(const MatrixExpr& lhs, const MatrixExpr& rhs) { - return MatrixBinaryExpr( - lhs.derived(), rhs.derived(), detail::ops::Div{} - ); -} - -/** - * @brief Negation operator for matrix expressions - */ -template, Expr> - >> -constexpr auto operator-(const MatrixExpr& expr) { - return MatrixUnaryExpr( - expr.derived(), detail::ops::Negate{} - ); -} - -/** - * @brief Scalar multiplication operator (matrix * scalar) - */ -template, Expr> && - std::is_arithmetic_v - >> -constexpr auto operator*(const MatrixExpr& expr, Scalar scalar) { - return MatrixScalarExpr(expr.derived(), scalar); -} - -/** - * @brief Scalar multiplication operator (scalar * matrix) - */ -template && - std::is_base_of_v, Expr> - >> -constexpr auto operator*(Scalar scalar, const MatrixExpr& expr) { - return MatrixScalarExpr(expr.derived(), scalar); -} - -/** - * @brief Scalar division operator (matrix / scalar) - */ -template, Expr> && - std::is_arithmetic_v - >> -constexpr auto operator/(const MatrixExpr& expr, Scalar scalar) { - return MatrixScalarDivExpr(expr.derived(), scalar); -} - -/** - * @brief Transpose function - */ -template, Expr> - >> -constexpr auto transpose(const MatrixExpr& expr) { - return TransposeExpr(expr.derived()); -} - -/** - * @brief Element-wise absolute value - */ -template, Expr> - >> -constexpr auto abs(const MatrixExpr& expr) { - return MatrixUnaryExpr(expr.derived(), detail::ops::Abs{}); -} - -/** - * @brief Element-wise square root - */ -template, Expr> - >> -constexpr auto sqrt(const MatrixExpr& expr) { - return MatrixUnaryExpr(expr.derived(), detail::ops::Sqrt{}); -} - -/** - * @brief Compute Frobenius norm squared of matrix expression - * @tparam Expr Matrix expression type - * @param expr Matrix expression - * @return Square of the Frobenius norm - */ -template, Expr> - >> -constexpr auto frobenius_norm_squared(const MatrixExpr& expr) { - using result_type = decltype(expr.derived()(0, 0) * expr.derived()(0, 0)); - result_type sum = result_type{0}; - const auto m = expr.rows(); - const auto n = expr.cols(); - for (std::size_t i = 0; i < m; ++i) { - for (std::size_t j = 0; j < n; ++j) { - auto val = expr.derived()(i, j); - sum += val * val; - } - } - return sum; -} - -/** - * @brief Compute Frobenius norm of matrix expression - * @tparam Expr Matrix expression type - * @param expr Matrix expression - * @return Frobenius norm - */ -template, Expr> - >> -constexpr auto frobenius_norm(const MatrixExpr& expr) { - using std::sqrt; - return sqrt(frobenius_norm_squared(expr)); -} - -/** - * @brief Compute trace of square matrix expression - * @tparam Expr Matrix expression type - * @param expr Matrix expression - * @return Sum of diagonal elements - */ -template, Expr> - >> -constexpr auto trace(const MatrixExpr& expr) { - using result_type = decltype(expr.derived()(0, 0)); - result_type sum = result_type{0}; - const auto n = std::min(expr.rows(), expr.cols()); - for (std::size_t i = 0; i < n; ++i) { - sum += expr.derived()(i, i); - } - return sum; -} - -} // namespace math -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_MATH_MATRIX_EXPR_H diff --git a/Code/Source/solver/FE/Math/Vector.h b/Code/Source/solver/FE/Math/Vector.h index 0ec99c81f..b234bac49 100644 --- a/Code/Source/solver/FE/Math/Vector.h +++ b/Code/Source/solver/FE/Math/Vector.h @@ -6,574 +6,43 @@ /** * @file Vector.h - * @brief Fixed-size vectors with expression templates for FE computations + * @brief Fixed-size vector types for FE computations, backed by Eigen. * - * This header provides optimized fixed-size vector operations for element-level - * computations. All operations use expression templates to eliminate temporaries - * and are header-only for maximum inlining. Memory is aligned for SIMD operations. + * The FE library standardizes on Eigen for linear algebra. These aliases give + * element-level code a stable vocabulary type without re-exporting all of + * Eigen. Note that, unlike the previous in-house implementation, Eigen types + * are NOT zero-initialized by default construction; use Vector::Zero() where a + * zeroed value is required. */ -#include "VectorExpr.h" -#include "../Common/Types.h" -#include -#include -#include -#include -#include -#include -#include -#include +#include + +#include /// \defgroup FE_Math Math /// \ingroup FE -/// \brief Fixed-size and dense linear algebra utilities for finite-element computations. +/// \brief Linear algebra vocabulary types and dense utilities for finite-element computations. /// -/// \details The Math module provides small fixed-size vector and matrix types -/// used in element-level kernels, expression-template infrastructure for -/// allocation-free algebraic expressions, and dense linear algebra utilities -/// used by basis construction and local transforms. +/// \details The Math module defines the fixed-size vector and matrix types +/// used in element-level kernels (as aliases of Eigen types) and dense linear +/// algebra utilities used by basis construction and local transforms. /// /// \defgroup FE_VectorMath Vector /// \ingroup FE_Math -/// \brief Fixed-size vector types and vector expression utilities. +/// \brief Fixed-size vector type aliases. namespace svmp { namespace FE { namespace math { -template -inline constexpr T tolerance = - std::is_floating_point_v ? T(1000) * std::numeric_limits::epsilon() : T(0); - -template -inline bool approx_zero(T value, T tol = tolerance) { - using std::abs; - return abs(value) <= tol; -} - -template -inline bool approx_equal(T a, T b, T tol = tolerance) { - using std::abs; - const T scale = std::max({abs(a), abs(b), T(1)}); - return abs(a - b) <= tol * scale; -} - /** - * @brief Fixed-size vector for element-level computations + * @brief Fixed-size column vector for element-level computations * @ingroup FE_VectorMath * @tparam T Scalar type (float, double) * @tparam N Vector dimension - * - * This class provides small vector operations optimized for - * compile-time known dimensions. Memory is aligned for SIMD operations. */ template -class Vector : public VectorExpr> { - static_assert(std::is_arithmetic_v, "T must be an arithmetic type"); - static_assert(N > 0, "Vector dimension must be positive"); - -private: - alignas(kFEFixedObjectAlignmentBytes) T data_[N]; // SIMD-friendly alignment - -public: - // Type definitions - using value_type = T; - using size_type = std::size_t; - using reference = T&; - using const_reference = const T&; - using pointer = T*; - using const_pointer = const T*; - - /** - * @brief Default constructor - zero initializes all components - */ - constexpr Vector() : data_{} {} - - /** - * @brief Fill constructor - initializes all components with same value - * @param value Value to fill vector with - */ - constexpr explicit Vector(T value) { - for (size_type i = 0; i < N; ++i) { - data_[i] = value; - } - } - - /** - * @brief Initializer list constructor - * @param init List of values - */ - constexpr Vector(std::initializer_list init) : data_{} { - auto it = init.begin(); - for (size_type i = 0; i < N && it != init.end(); ++i, ++it) { - data_[i] = *it; - } - } - - /** - * @brief Constructor from expression template - * @tparam Expr Expression type - * @param expr Vector expression to evaluate - */ - template - Vector(const VectorExpr& expr) { - const auto& e = expr.derived(); - for (size_type i = 0; i < N; ++i) { - data_[i] = e[i]; - } - } - - /** - * @brief Copy constructor - */ - constexpr Vector(const Vector&) = default; - - /** - * @brief Move constructor - */ - constexpr Vector(Vector&&) noexcept = default; - - /** - * @brief Copy assignment - */ - Vector& operator=(const Vector&) = default; - - /** - * @brief Move assignment - */ - Vector& operator=(Vector&&) noexcept = default; - - /** - * @brief Assignment from expression template - * @tparam Expr Expression type - * @param expr Vector expression to evaluate - * @return Reference to this - */ - template - Vector& operator=(const VectorExpr& expr) { - const auto& e = expr.derived(); - for (size_type i = 0; i < N; ++i) { - data_[i] = e[i]; - } - return *this; - } - - /** - * @brief Get vector size (compile-time constant) - * @return Number of elements - */ - static constexpr size_type size() { return N; } - - /** - * @brief Element access (no bounds checking) - * @param i Element index - * @return Reference to element - */ - constexpr T& operator[](size_type i) { - return data_[i]; - } - - /** - * @brief Element access (no bounds checking) - const version - * @param i Element index - * @return Const reference to element - */ - constexpr const T& operator[](size_type i) const { - return data_[i]; - } - - /** - * @brief Element access with bounds checking - * @param i Element index - * @return Reference to element - * @throws std::out_of_range if i >= N - */ - T& at(size_type i) { - if (i >= N) { - throw std::out_of_range("Vector::at: index out of range"); - } - return data_[i]; - } - - /** - * @brief Element access with bounds checking - const version - * @param i Element index - * @return Const reference to element - * @throws std::out_of_range if i >= N - */ - const T& at(size_type i) const { - if (i >= N) { - throw std::out_of_range("Vector::at: index out of range"); - } - return data_[i]; - } - - /** - * @brief Access first element - * @return Reference to first element - */ - T& front() { return data_[0]; } - const T& front() const { return data_[0]; } - - /** - * @brief Access last element - * @return Reference to last element - */ - T& back() { return data_[N-1]; } - const T& back() const { return data_[N-1]; } - - /** - * @brief Get pointer to underlying data - * @return Pointer to first element - */ - T* data() { return data_; } - const T* data() const { return data_; } - - /** - * @brief Fill vector with value - * @param value Value to fill with - */ - void fill(T value) { - for (size_type i = 0; i < N; ++i) { - data_[i] = value; - } - } - - /** - * @brief Set all components to zero - */ - void set_zero() { - fill(T{0}); - } - - // Arithmetic operators - - /** - * @brief In-place addition - * @param other Vector to add - * @return Reference to this - */ - Vector& operator+=(const Vector& other) { - for (size_type i = 0; i < N; ++i) { - data_[i] += other.data_[i]; - } - return *this; - } - - /** - * @brief In-place subtraction - * @param other Vector to subtract - * @return Reference to this - */ - Vector& operator-=(const Vector& other) { - for (size_type i = 0; i < N; ++i) { - data_[i] -= other.data_[i]; - } - return *this; - } - - /** - * @brief In-place scalar multiplication - * @param scalar Scalar to multiply by - * @return Reference to this - */ - Vector& operator*=(T scalar) { - for (size_type i = 0; i < N; ++i) { - data_[i] *= scalar; - } - return *this; - } - - /** - * @brief In-place scalar division - * @param scalar Scalar to divide by - * @return Reference to this - */ - Vector& operator/=(T scalar) { - const T inv = T(1) / scalar; - return (*this) *= inv; - } - - // Vector operations - - /** - * @brief Compute dot product - * @param other Other vector - * @return Dot product - */ - T dot(const Vector& other) const { - T result = T(0); - for (size_type i = 0; i < N; ++i) { - result += data_[i] * other.data_[i]; - } - return result; - } - - /** - * @brief Compute squared Euclidean norm - * @return Squared norm - */ - T norm_squared() const { - return dot(*this); - } - - /** - * @brief Compute Euclidean norm - * @return Norm - */ - T norm() const { - using std::sqrt; - return sqrt(norm_squared()); - } - - /** - * @brief Get normalized vector - * @return Unit vector in same direction - */ - Vector normalized() const { - const T n = norm(); - if (approx_zero(n)) { - return Vector(); // Return zero vector - } - return (*this) / n; - } - - /** - * @brief Normalize this vector in place - * @return Reference to this - */ - Vector& normalize() { - const T n = norm(); - if (!approx_zero(n)) { - (*this) /= n; - } - return *this; - } - - /** - * @brief Compute L1 norm (Manhattan norm) - * @return Sum of absolute values - */ - T norm_l1() const { - T result = T(0); - for (size_type i = 0; i < N; ++i) { - using std::abs; - result += abs(data_[i]); - } - return result; - } - - /** - * @brief Compute L-infinity norm (maximum norm) - * @return Maximum absolute value - */ - T norm_inf() const { - T result = T(0); - for (size_type i = 0; i < N; ++i) { - using std::abs; - result = std::max(result, abs(data_[i])); - } - return result; - } - - /** - * @brief Get minimum component - * @return Minimum value - */ - T min() const { - T result = data_[0]; - for (size_type i = 1; i < N; ++i) { - result = std::min(result, data_[i]); - } - return result; - } - - /** - * @brief Get maximum component - * @return Maximum value - */ - T max() const { - T result = data_[0]; - for (size_type i = 1; i < N; ++i) { - result = std::max(result, data_[i]); - } - return result; - } - - /** - * @brief Get sum of all components - * @return Sum of components - */ - T sum() const { - T result = T(0); - for (size_type i = 0; i < N; ++i) { - result += data_[i]; - } - return result; - } - - /** - * @brief Get product of all components - * @return Product of components - */ - T product() const { - T result = data_[0]; - for (size_type i = 1; i < N; ++i) { - result *= data_[i]; - } - return result; - } - - // Static factory functions - - /** - * @brief Create zero vector - * @return Vector with all components zero - */ - static constexpr Vector zeros() { - return Vector(); - } - - /** - * @brief Create vector with all components one - * @return Vector with all components one - */ - static constexpr Vector ones() { - return Vector(T(1)); - } - - /** - * @brief Create unit vector along axis - * @param axis Axis index (0-based) - * @return Unit vector - */ - static Vector unit(size_type axis) { - Vector v; - if (axis < N) { - v[axis] = T(1); - } - return v; - } - - /** - * @brief Create basis vector (alias for unit) - * @param i Axis index (0-based) - * @return Basis vector - */ - static Vector basis(size_type i) { - return unit(i); - } - - /** - * @brief Create zero vector (alias for zeros) - * @return Zero vector - */ - static constexpr Vector zero() { - return zeros(); - } - - /** - * @brief Get index of minimum element - * @return Index of minimum value - */ - size_type min_index() const { - size_type idx = 0; - T min_val = data_[0]; - for (size_type i = 1; i < N; ++i) { - if (data_[i] < min_val) { - min_val = data_[i]; - idx = i; - } - } - return idx; - } - - /** - * @brief Get index of maximum element - * @return Index of maximum value - */ - size_type max_index() const { - size_type idx = 0; - T max_val = data_[0]; - for (size_type i = 1; i < N; ++i) { - if (data_[i] > max_val) { - max_val = data_[i]; - idx = i; - } - } - return idx; - } - - /** - * @brief Compute mean of all components - * @return Average value - */ - T mean() const { - return sum() / static_cast(N); - } - - /** - * @brief Cross product for 3D vectors - * @param other Other vector - * @return Cross product - * @note Only available for 3D vectors - */ - template - std::enable_if_t> cross(const Vector& other) const { - return Vector{ - data_[1] * other[2] - data_[2] * other[1], - data_[2] * other[0] - data_[0] * other[2], - data_[0] * other[1] - data_[1] * other[0] - }; - } - - /** - * @brief Check if vectors are approximately equal - * @param other Other vector - * @param tol Tolerance - * @return true if equal within tolerance - */ - bool approx_equal(const Vector& other, T tol = tolerance) const { - for (size_type i = 0; i < N; ++i) { - using std::abs; - if (abs(data_[i] - other.data_[i]) > tol) { - return false; - } - } - return true; - } - - /** - * @brief Equality comparison - * @param other Other vector - * @return true if exactly equal - */ - bool operator==(const Vector& other) const { - for (size_type i = 0; i < N; ++i) { - if (data_[i] != other.data_[i]) { - return false; - } - } - return true; - } - - /** - * @brief Inequality comparison - * @param other Other vector - * @return true if not equal - */ - bool operator!=(const Vector& other) const { - return !(*this == other); - } - - // Iterators - T* begin() { return data_; } - T* end() { return data_ + N; } - const T* begin() const { return data_; } - const T* end() const { return data_ + N; } - const T* cbegin() const { return data_; } - const T* cend() const { return data_ + N; } -}; +using Vector = Eigen::Matrix(N), 1>; // Type aliases for common vector types template using Vector2 = Vector; @@ -595,269 +64,6 @@ using Vector2i = Vector2; using Vector3i = Vector3; using Vector4i = Vector4; -/** - * @brief 3D Cross product - * @tparam T Scalar type - * @param a First vector - * @param b Second vector - * @return Cross product a × b - */ -template -inline Vector3 cross(const Vector3& a, const Vector3& b) { - return Vector3{ - a[1] * b[2] - a[2] * b[1], - a[2] * b[0] - a[0] * b[2], - a[0] * b[1] - a[1] * b[0] - }; -} - -/** - * @brief 2D Cross product (returns scalar - z component of 3D cross) - * @tparam T Scalar type - * @param a First vector - * @param b Second vector - * @return Scalar cross product - */ -template -inline T cross(const Vector2& a, const Vector2& b) { - return a[0] * b[1] - a[1] * b[0]; -} - -/** - * @brief Triple scalar product (a · (b × c)) - * @tparam T Scalar type - * @param a First vector - * @param b Second vector - * @param c Third vector - * @return Scalar triple product - */ -template -inline T triple_product(const Vector3& a, const Vector3& b, const Vector3& c) { - return a.dot(cross(b, c)); -} - -// Free functions for common operations - -/** - * @brief Compute dot product - */ -template -inline T dot(const Vector& a, const Vector& b) { - return a.dot(b); -} - -/** - * @brief Compute Euclidean norm - */ -template -inline T norm(const Vector& v) { - return v.norm(); -} - -/** - * @brief Compute squared Euclidean norm - */ -template -inline T norm_squared(const Vector& v) { - return v.norm_squared(); -} - -/** - * @brief Get normalized vector - */ -template -inline Vector normalize(const Vector& v) { - return v.normalized(); -} - -/** - * @brief Component-wise absolute value - */ -template -inline Vector abs(const Vector& v) { - Vector result; - for (std::size_t i = 0; i < N; ++i) { - using std::abs; - result[i] = abs(v[i]); - } - return result; -} - -/** - * @brief Component-wise minimum - */ -template -inline Vector min(const Vector& a, const Vector& b) { - Vector result; - for (std::size_t i = 0; i < N; ++i) { - result[i] = std::min(a[i], b[i]); - } - return result; -} - -/** - * @brief Component-wise maximum - */ -template -inline Vector max(const Vector& a, const Vector& b) { - Vector result; - for (std::size_t i = 0; i < N; ++i) { - result[i] = std::max(a[i], b[i]); - } - return result; -} - -/** - * @brief Component-wise clamp - */ -template -inline Vector clamp(const Vector& v, const Vector& min_v, const Vector& max_v) { - Vector result; - for (std::size_t i = 0; i < N; ++i) { - result[i] = std::clamp(v[i], min_v[i], max_v[i]); - } - return result; -} - -/** - * @brief Linear interpolation between vectors - * @tparam T Scalar type - * @tparam N Vector dimension - * @param t Interpolation parameter [0, 1] - * @param a Start vector (at t=0) - * @param b End vector (at t=1) - * @return Interpolated vector - */ -template -inline Vector lerp(T t, const Vector& a, const Vector& b) { - return a + t * (b - a); -} - -/** - * @brief Spherical linear interpolation (for unit vectors) - * @tparam T Scalar type - * @param t Interpolation parameter [0, 1] - * @param a Start unit vector - * @param b End unit vector - * @return Interpolated unit vector - */ -template -inline Vector3 slerp(T t, const Vector3& a, const Vector3& b) { - T cos_angle = a.dot(b); - - // Handle numerical issues - cos_angle = std::clamp(cos_angle, T(-1), T(1)); - - // If vectors are nearly parallel, use linear interpolation - if (cos_angle > T(0.9995)) { - return normalize(lerp(t, a, b)); - } - - T angle = std::acos(cos_angle); - T sin_angle = std::sin(angle); - - T t0 = std::sin((T(1) - t) * angle) / sin_angle; - T t1 = std::sin(t * angle) / sin_angle; - - return t0 * a + t1 * b; -} - -/** - * @brief Reflect vector about normal - * @tparam T Scalar type - * @tparam N Vector dimension - * @param v Incident vector - * @param n Normal vector (should be unit) - * @return Reflected vector - */ -template -inline Vector reflect(const Vector& v, const Vector& n) { - return v - T(2) * dot(v, n) * n; -} - -/** - * @brief Project vector onto another vector - * @tparam T Scalar type - * @tparam N Vector dimension - * @param v Vector to project - * @param onto Vector to project onto - * @return Projection of v onto 'onto' - */ -template -inline Vector project(const Vector& v, const Vector& onto) { - T denom = onto.norm_squared(); - if (approx_zero(denom)) { - return Vector::zeros(); - } - return (dot(v, onto) / denom) * onto; -} - -/** - * @brief Get perpendicular component of vector - * @tparam T Scalar type - * @tparam N Vector dimension - * @param v Vector - * @param direction Direction to remove - * @return Component of v perpendicular to direction - */ -template -inline Vector perpendicular(const Vector& v, const Vector& direction) { - return v - project(v, direction); -} - -/** - * @brief Compute angle between two vectors - * @tparam T Scalar type - * @tparam N Vector dimension - * @param a First vector - * @param b Second vector - * @return Angle in radians [0, π] - */ -template -inline T angle(const Vector& a, const Vector& b) { - T cos_angle = dot(a, b) / (norm(a) * norm(b)); - cos_angle = std::clamp(cos_angle, T(-1), T(1)); - return std::acos(cos_angle); -} - -/** - * @brief Check if two vectors are approximately equal - * @tparam T Scalar type - * @tparam N Vector dimension - * @param a First vector - * @param b Second vector - * @param tol Tolerance - * @return true if vectors are equal within tolerance - */ -template -inline bool approx_equal(const Vector& a, const Vector& b, T tol = tolerance) { - for (std::size_t i = 0; i < N; ++i) { - if (!approx_equal(a[i], b[i], tol)) { - return false; - } - } - return true; -} - -/** - * @brief Stream output operator - * @tparam T Scalar type - * @tparam N Vector dimension - * @param os Output stream - * @param v Vector to output - * @return Reference to output stream - */ -template -inline std::ostream& operator<<(std::ostream& os, const Vector& v) { - os << "["; - for (std::size_t i = 0; i < N; ++i) { - if (i > 0) os << ", "; - os << v[i]; - } - os << "]"; - return os; -} - } // namespace math } // namespace FE } // namespace svmp diff --git a/Code/Source/solver/FE/Math/VectorExpr.h b/Code/Source/solver/FE/Math/VectorExpr.h deleted file mode 100644 index aa712dd63..000000000 --- a/Code/Source/solver/FE/Math/VectorExpr.h +++ /dev/null @@ -1,476 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. -// SPDX-License-Identifier: BSD-3-Clause - -#ifndef SVMP_FE_MATH_VECTOR_EXPR_H -#define SVMP_FE_MATH_VECTOR_EXPR_H - -/** - * @file VectorExpr.h - * @brief Expression template infrastructure for lazy evaluation of vector operations - * - * This header provides expression templates that enable compound vector operations - * without creating temporary objects. Operations are evaluated lazily at the point - * of assignment, eliminating intermediate allocations and improving performance. - */ - -#include -#include -#include - -namespace svmp { -namespace FE { -namespace math { -namespace detail { -namespace ops { - -struct Add { - template - constexpr auto operator()(const T1& a, const T2& b) const { - return a + b; - } -}; - -struct Sub { - template - constexpr auto operator()(const T1& a, const T2& b) const { - return a - b; - } -}; - -struct Mul { - template - constexpr auto operator()(const T1& a, const T2& b) const { - return a * b; - } -}; - -struct Div { - template - constexpr auto operator()(const T1& a, const T2& b) const { - return a / b; - } -}; - -struct Negate { - template - constexpr auto operator()(const T& a) const { - return -a; - } -}; - -struct Abs { - template - constexpr auto operator()(const T& a) const { - using std::abs; - return abs(a); - } -}; - -struct Sqrt { - template - constexpr auto operator()(const T& a) const { - using std::sqrt; - return sqrt(a); - } -}; - -} // namespace ops -} // namespace detail - -/** - * @brief Base class for all vector expressions using CRTP - * @tparam Derived The derived expression type - * - * This uses the Curiously Recurring Template Pattern (CRTP) to provide - * static polymorphism for expression templates. - */ -template -class VectorExpr { -public: - /** - * @brief Get the derived expression - * @return Reference to the derived type - */ - const Derived& derived() const { - return static_cast(*this); - } - - /** - * @brief Get the derived expression (non-const) - * @return Reference to the derived type - */ - Derived& derived() { - return static_cast(*this); - } - - /** - * @brief Access element by index - * @param i Element index - * @return Value at index i - */ - auto operator[](std::size_t i) const { - return derived()[i]; - } - - /** - * @brief Get the size of the vector expression - * @return Number of elements - */ - std::size_t size() const { - return derived().size(); - } -}; - -/** - * @brief Binary expression for element-wise operations between two vector expressions - * @tparam LHS Left-hand side expression type - * @tparam RHS Right-hand side expression type - * @tparam Op Binary operation functor - */ -template -class VectorBinaryExpr : public VectorExpr> { -private: - LHS lhs_; - RHS rhs_; - Op op_; - -public: - /** - * @brief Construct binary expression - * @param lhs Left operand - * @param rhs Right operand - * @param op Operation to apply - */ - constexpr VectorBinaryExpr(const LHS& lhs, const RHS& rhs, Op op = Op{}) - : lhs_(lhs), rhs_(rhs), op_(op) {} - - /** - * @brief Access element at index - * @param i Element index - * @return Result of operation on elements at index i - */ - constexpr auto operator[](std::size_t i) const { - return op_(lhs_[i], rhs_[i]); - } - - /** - * @brief Get size of expression (from left operand) - * @return Number of elements - */ - constexpr std::size_t size() const { - return lhs_.size(); - } -}; - -/** - * @brief Unary expression for element-wise operations on a single vector expression - * @tparam Expr Expression type - * @tparam Op Unary operation functor - */ -template -class VectorUnaryExpr : public VectorExpr> { -private: - Expr expr_; - Op op_; - -public: - /** - * @brief Construct unary expression - * @param expr Operand expression - * @param op Operation to apply - */ - constexpr VectorUnaryExpr(const Expr& expr, Op op = Op{}) - : expr_(expr), op_(op) {} - - /** - * @brief Access element at index - * @param i Element index - * @return Result of operation on element at index i - */ - constexpr auto operator[](std::size_t i) const { - return op_(expr_[i]); - } - - /** - * @brief Get size of expression - * @return Number of elements - */ - constexpr std::size_t size() const { - return expr_.size(); - } -}; - -/** - * @brief Scalar multiplication expression - * @tparam Expr Vector expression type - * @tparam Scalar Scalar type - */ -template -class VectorScalarExpr : public VectorExpr> { -private: - Expr expr_; - Scalar scalar_; - -public: - /** - * @brief Construct scalar multiplication expression - * @param expr Vector expression - * @param scalar Scalar value - */ - constexpr VectorScalarExpr(const Expr& expr, Scalar scalar) - : expr_(expr), scalar_(scalar) {} - - /** - * @brief Access element at index - * @param i Element index - * @return Element multiplied by scalar - */ - constexpr auto operator[](std::size_t i) const { - return expr_[i] * scalar_; - } - - /** - * @brief Get size of expression - * @return Number of elements - */ - constexpr std::size_t size() const { - return expr_.size(); - } -}; - -/** - * @brief Scalar division expression - * @tparam Expr Vector expression type - * @tparam Scalar Scalar type - */ -template -class VectorScalarDivExpr : public VectorExpr> { -private: - Expr expr_; - Scalar scalar_; - -public: - /** - * @brief Construct scalar division expression - * @param expr Vector expression - * @param scalar Scalar divisor - */ - constexpr VectorScalarDivExpr(const Expr& expr, Scalar scalar) - : expr_(expr), scalar_(scalar) {} - - /** - * @brief Access element at index - * @param i Element index - * @return Element divided by scalar - */ - constexpr auto operator[](std::size_t i) const { - return expr_[i] / scalar_; - } - - /** - * @brief Get size of expression - * @return Number of elements - */ - constexpr std::size_t size() const { - return expr_.size(); - } -}; - -/** - * @brief Addition operator for vector expressions - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto operator+(const VectorExpr& lhs, const VectorExpr& rhs) { - return VectorBinaryExpr( - lhs.derived(), rhs.derived(), detail::ops::Add{} - ); -} - -/** - * @brief Subtraction operator for vector expressions - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto operator-(const VectorExpr& lhs, const VectorExpr& rhs) { - return VectorBinaryExpr( - lhs.derived(), rhs.derived(), detail::ops::Sub{} - ); -} - -/** - * @brief Element-wise multiplication operator for vector expressions - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto hadamard(const VectorExpr& lhs, const VectorExpr& rhs) { - return VectorBinaryExpr( - lhs.derived(), rhs.derived(), detail::ops::Mul{} - ); -} - -/** - * @brief Element-wise division operator for vector expressions - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto hadamard_div(const VectorExpr& lhs, const VectorExpr& rhs) { - return VectorBinaryExpr( - lhs.derived(), rhs.derived(), detail::ops::Div{} - ); -} - -/** - * @brief Negation operator for vector expressions - */ -template, Expr> - >> -constexpr auto operator-(const VectorExpr& expr) { - return VectorUnaryExpr( - expr.derived(), detail::ops::Negate{} - ); -} - -/** - * @brief Scalar multiplication operator (vector * scalar) - */ -template, Expr> && - std::is_arithmetic_v - >> -constexpr auto operator*(const VectorExpr& expr, Scalar scalar) { - return VectorScalarExpr(expr.derived(), scalar); -} - -/** - * @brief Scalar multiplication operator (scalar * vector) - */ -template && - std::is_base_of_v, Expr> - >> -constexpr auto operator*(Scalar scalar, const VectorExpr& expr) { - return VectorScalarExpr(expr.derived(), scalar); -} - -/** - * @brief Scalar division operator (vector / scalar) - */ -template, Expr> && - std::is_arithmetic_v - >> -constexpr auto operator/(const VectorExpr& expr, Scalar scalar) { - return VectorScalarDivExpr(expr.derived(), scalar); -} - -/** - * @brief Element-wise absolute value - */ -template, Expr> - >> -constexpr auto abs(const VectorExpr& expr) { - return VectorUnaryExpr(expr.derived(), detail::ops::Abs{}); -} - -/** - * @brief Element-wise square root - */ -template, Expr> - >> -constexpr auto sqrt(const VectorExpr& expr) { - return VectorUnaryExpr(expr.derived(), detail::ops::Sqrt{}); -} - -/** - * @brief Dot product for vector expressions - * @tparam LHS Left vector expression type - * @tparam RHS Right vector expression type - * @param lhs Left operand - * @param rhs Right operand - * @return Dot product result - */ -template, LHS> && - std::is_base_of_v, RHS> - >> -constexpr auto dot(const VectorExpr& lhs, const VectorExpr& rhs) { - using result_type = decltype(lhs.derived()[0] * rhs.derived()[0]); - result_type sum = result_type{0}; - const auto n = lhs.size(); - for (std::size_t i = 0; i < n; ++i) { - sum += lhs.derived()[i] * rhs.derived()[i]; - } - return sum; -} - -/** - * @brief Compute norm squared of vector expression - * @tparam Expr Vector expression type - * @param expr Vector expression - * @return Square of the Euclidean norm - */ -template, Expr> - >> -constexpr auto norm_squared(const VectorExpr& expr) { - return dot(expr, expr); -} - -/** - * @brief Compute norm of vector expression - * @tparam Expr Vector expression type - * @param expr Vector expression - * @return Euclidean norm - */ -template, Expr> - >> -constexpr auto norm(const VectorExpr& expr) { - using std::sqrt; - return sqrt(norm_squared(expr)); -} - -/** - * @brief Normalize vector expression - * @tparam Expr Vector expression type - * @param expr Vector expression - * @return Normalized vector expression - */ -template, Expr> - >> -constexpr auto normalize(const VectorExpr& expr) { - return expr / norm(expr); -} - -} // namespace math -} // namespace FE -} // namespace svmp - -#endif // SVMP_FE_MATH_VECTOR_EXPR_H diff --git a/Code/Source/solver/nn.cpp b/Code/Source/solver/nn.cpp index eb6d35106..60fcddf81 100644 --- a/Code/Source/solver/nn.cpp +++ b/Code/Source/solver/nn.cpp @@ -28,8 +28,10 @@ #include #include -#include +#include +#include #include +#include #include #include #include @@ -55,12 +57,6 @@ namespace { namespace fe = svmp::FE; namespace febasis = svmp::FE::basis; -struct BasisSelection { - fe::ElementType element; - fe::BasisType basis; - int order; -}; - std::string solver_element_name(consts::ElementType eType) { auto it = consts::element_type_to_string.find(eType); @@ -70,34 +66,42 @@ std::string solver_element_name(consts::ElementType eType) return "unknown (" + std::to_string(static_cast(eType)) + ")"; } -std::optional to_basis_selection(consts::ElementType eType) +/// Translate a solver element type into its FE library counterpart. This is a +/// pure renaming between the two enum vocabularies: the FE library owns the +/// choice of basis family and polynomial order for each element type +/// (basis_factory::default_basis_request). The switch deliberately has no +/// default case so that compilers building with -Wswitch flag any newly added +/// solver element type that is missing a mapping here. +std::optional to_fe_element_type(consts::ElementType eType) { - static constexpr std::array supported{ - BasisSelection{fe::ElementType::Line2, fe::BasisType::Lagrange, 1}, - BasisSelection{fe::ElementType::Line3, fe::BasisType::Lagrange, 2}, - BasisSelection{fe::ElementType::Triangle3, fe::BasisType::Lagrange, 1}, - BasisSelection{fe::ElementType::Triangle6, fe::BasisType::Lagrange, 2}, - BasisSelection{fe::ElementType::Quad4, fe::BasisType::Lagrange, 1}, - BasisSelection{fe::ElementType::Quad8, fe::BasisType::Serendipity, 2}, - BasisSelection{fe::ElementType::Quad9, fe::BasisType::Lagrange, 2}, - BasisSelection{fe::ElementType::Tetra4, fe::BasisType::Lagrange, 1}, - BasisSelection{fe::ElementType::Tetra10, fe::BasisType::Lagrange, 2}, - BasisSelection{fe::ElementType::Hex8, fe::BasisType::Lagrange, 1}, - BasisSelection{fe::ElementType::Hex20, fe::BasisType::Serendipity, 2}, - BasisSelection{fe::ElementType::Hex27, fe::BasisType::Lagrange, 2}, - BasisSelection{fe::ElementType::Wedge6, fe::BasisType::Lagrange, 1}, - }; - - const int index = static_cast(eType) - static_cast(consts::ElementType::LIN1); - if (index >= 0 && static_cast(index) < supported.size()) { - return supported[static_cast(index)]; + switch (eType) { + case consts::ElementType::LIN1: return fe::ElementType::Line2; + case consts::ElementType::LIN2: return fe::ElementType::Line3; + case consts::ElementType::TRI3: return fe::ElementType::Triangle3; + case consts::ElementType::TRI6: return fe::ElementType::Triangle6; + case consts::ElementType::QUD4: return fe::ElementType::Quad4; + case consts::ElementType::QUD8: return fe::ElementType::Quad8; + case consts::ElementType::QUD9: return fe::ElementType::Quad9; + case consts::ElementType::TET4: return fe::ElementType::Tetra4; + case consts::ElementType::TET10: return fe::ElementType::Tetra10; + case consts::ElementType::HEX8: return fe::ElementType::Hex8; + case consts::ElementType::HEX20: return fe::ElementType::Hex20; + case consts::ElementType::HEX27: return fe::ElementType::Hex27; + case consts::ElementType::WDG: return fe::ElementType::Wedge6; + + // No FE basis mapping: points use dedicated shape data in get_gnn and + // NURBS are outside the current FE Basis scope. + case consts::ElementType::NA: + case consts::ElementType::PNT: + case consts::ElementType::NRB: + return std::nullopt; } return std::nullopt; } bool use_basis_adapter_for(consts::ElementType eType) { - return to_basis_selection(eType).has_value(); + return to_fe_element_type(eType).has_value(); } bool supports_face_basis_adapter_for(consts::ElementType eType) @@ -110,23 +114,36 @@ bool supports_face_basis_adapter_for(consts::ElementType eType) case consts::ElementType::QUD4: case consts::ElementType::QUD8: case consts::ElementType::QUD9: - return to_basis_selection(eType).has_value(); + return use_basis_adapter_for(eType); default: return false; } } -std::shared_ptr make_basis_for_solver_element(consts::ElementType eType) +/// Return the shared FE basis for a solver element type, constructing it on +/// first use. Basis construction is not free (node-lattice generation, and a +/// Vandermonde inversion for quadrilateral serendipity), while callers invoke +/// this per Gauss point or per probe point, so instances are cached per +/// element type. Sharing is safe: bases are immutable after construction, +/// evaluation is const, and BasisFunction scratch state is thread_local. +const febasis::BasisFunction& basis_for_solver_element(consts::ElementType eType) { - auto selection = to_basis_selection(eType); - if (!selection) { + static std::mutex cache_mutex; + static std::map> cache; + + const auto fe_type = to_fe_element_type(eType); + if (!fe_type) { throw febasis::BasisElementCompatibilityException( "No FE Basis selection for solver element " + solver_element_name(eType), __FILE__, __LINE__, __func__); } - return febasis::basis_factory::create( - {selection->element, selection->basis, selection->order}); + const std::lock_guard lock(cache_mutex); + auto it = cache.find(eType); + if (it == cache.end()) { + it = cache.emplace(eType, febasis::basis_factory::create_default_for(*fe_type)).first; + } + return *it->second; } std::span solver_to_basis_node_map(consts::ElementType eType) @@ -192,7 +209,9 @@ fe::math::Vector make_basis_point(const febasis::BasisFunction& bas __FILE__, __LINE__, __func__); } - fe::math::Vector point{}; + // Inactive trailing components must be zero for lower-dimensional elements; + // Eigen-backed vectors are not zero-initialized by default construction. + fe::math::Vector point = fe::math::Vector::Zero(); for (int d = 0; d < basis.dimension(); ++d) { point[static_cast(d)] = xi(d, g); } @@ -250,19 +269,19 @@ void evaluate_basis_values_and_gradients(const int insd, Array& N, Array3& Nx) { - auto basis = make_basis_for_solver_element(eType); - if (insd < basis->dimension()) { + const auto& basis = basis_for_solver_element(eType); + if (insd < basis.dimension()) { throw febasis::BasisConfigurationException( "solver insd " + std::to_string(insd) + - " is smaller than FE Basis reference dimension " + std::to_string(basis->dimension()), + " is smaller than FE Basis reference dimension " + std::to_string(basis.dimension()), __FILE__, __LINE__, __func__); } - const auto point = make_basis_point(*basis, g, xi); + const auto point = make_basis_point(basis, g, xi); std::vector values; std::vector gradients; - basis->evaluate_values(point, values); - basis->evaluate_gradients(point, gradients); + basis.evaluate_values(point, values); + basis.evaluate_gradients(point, gradients); // FE Basis owns the formulas; fsType and mshType remain the solver-facing storage contract. copy_basis_values_to_solver_arrays(eType, eNoN, g, values, gradients, N, Nx); @@ -355,15 +374,15 @@ void evaluate_basis_hessians(const int insd, const Array& xi, Array3& Nxx) { - auto basis = make_basis_for_solver_element(eType); - if (insd < basis->dimension()) { + const auto& basis = basis_for_solver_element(eType); + if (insd < basis.dimension()) { throw febasis::BasisConfigurationException( "solver insd " + std::to_string(insd) + - " is smaller than FE Basis reference dimension " + std::to_string(basis->dimension()), + " is smaller than FE Basis reference dimension " + std::to_string(basis.dimension()), __FILE__, __LINE__, __func__); } - const int required_components = required_nxx_components_for_dimension(basis->dimension()); + const int required_components = required_nxx_components_for_dimension(basis.dimension()); if (ind2 < required_components) { throw febasis::BasisConfigurationException( "solver ind2 " + std::to_string(ind2) + @@ -371,12 +390,12 @@ void evaluate_basis_hessians(const int insd, __FILE__, __LINE__, __func__); } - const auto point = make_basis_point(*basis, gaus_pt, xi); + const auto point = make_basis_point(basis, gaus_pt, xi); std::vector hessians; - basis->evaluate_hessians(point, hessians); + basis.evaluate_hessians(point, hessians); // Solver Nxx packing is dxx, dyy, dxy in 2D and dxx, dyy, dzz, dxy, dyz, dxz in 3D. - copy_basis_hessians_to_solver_nxx(eType, eNoN, gaus_pt, basis->dimension(), hessians, Nxx); + copy_basis_hessians_to_solver_nxx(eType, eNoN, gaus_pt, basis.dimension(), hessians, Nxx); } void set_point_face_shape_data(const int gaus_pt, faceType& face) diff --git a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp index d4bf1d6e5..60ca72114 100644 --- a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp +++ b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp @@ -34,6 +34,61 @@ class MinimalScalarBasis : public BasisFunction { } }; +// Quadratic scalar basis with exact analytic derivatives, used to verify the +// protected numerical_gradient/numerical_hessian development helpers. Centered +// differences are exact (up to roundoff) on quadratics, so any mismatch is a +// bug in the helpers themselves. +class ExactQuadraticBasis : public BasisFunction { +public: + using BasisFunction::numerical_gradient; + using BasisFunction::numerical_hessian; + + BasisType basis_type() const noexcept override { return BasisType::Custom; } + ElementType element_type() const noexcept override { return ElementType::Hex8; } + int dimension() const noexcept override { return 3; } + int order() const noexcept override { return 2; } + std::size_t size() const noexcept override { return 2u; } + + void evaluate_values(const math::Vector& xi, + std::vector& values) const override + { + const Real x = xi[0]; + const Real y = xi[1]; + const Real z = xi[2]; + values.resize(size()); + values[0] = Real(1) + Real(2) * x - y + Real(0.5) * z + + x * x + Real(0.75) * y * y - Real(0.25) * z * z + + Real(0.2) * x * y - Real(0.3) * x * z + Real(0.4) * y * z; + values[1] = Real(3) - x + Real(2) * y + z + + Real(0.5) * x * x - y * y + z * z + + x * y + x * z - y * z; + } + + void evaluate_gradients(const math::Vector& xi, + std::vector& gradients) const override + { + const Real x = xi[0]; + const Real y = xi[1]; + const Real z = xi[2]; + gradients.assign(size(), Gradient::Zero()); + gradients[0][0] = Real(2) + Real(2) * x + Real(0.2) * y - Real(0.3) * z; + gradients[0][1] = Real(-1) + Real(1.5) * y + Real(0.2) * x + Real(0.4) * z; + gradients[0][2] = Real(0.5) - Real(0.5) * z - Real(0.3) * x + Real(0.4) * y; + gradients[1][0] = Real(-1) + x + y + z; + gradients[1][1] = Real(2) - Real(2) * y + x - z; + gradients[1][2] = Real(1) + Real(2) * z + x - y; + } + + void exact_hessians(std::vector& hessians) const + { + hessians.assign(size(), Hessian::Zero()); + hessians[0] = make_symmetric_hessian(Real(2), Real(1.5), Real(-0.5), + Real(0.2), Real(-0.3), Real(0.4)); + hessians[1] = make_symmetric_hessian(Real(1), Real(-2), Real(2), + Real(1), Real(1), Real(-1)); + } +}; + class CompleteFallbackBasis : public BasisFunction { public: BasisType basis_type() const noexcept override { return BasisType::Lagrange; } @@ -53,7 +108,7 @@ class CompleteFallbackBasis : public BasisFunction { void evaluate_gradients(const math::Vector&, std::vector& gradients) const override { - gradients.assign(size(), Gradient{}); + gradients.assign(size(), Gradient::Zero()); gradients[0][0] = Real(1); gradients[1][1] = Real(1); } @@ -61,7 +116,7 @@ class CompleteFallbackBasis : public BasisFunction { void evaluate_hessians(const math::Vector& xi, std::vector& hessians) const override { - hessians.assign(size(), Hessian{}); + hessians.assign(size(), Hessian::Zero()); for (std::size_t d = 0; d < hessians.size(); ++d) { for (std::size_t r = 0; r < 3u; ++r) { for (std::size_t c = 0; c < 3u; ++c) { @@ -96,6 +151,16 @@ TEST(BasisErrorPaths, SerendipityInvalidRequestsThrowBasisExceptions) { BasisElementCompatibilityException); } +TEST(BasisErrorPaths, BasisFactoryRejectsNonC0Continuity) { + BasisRequest c1_request{ElementType::Line2, BasisType::Lagrange, 1}; + c1_request.continuity = Continuity::C1; + EXPECT_THROW((void)basis_factory::create(c1_request), BasisConfigurationException); + + BasisRequest l2_request{ElementType::Quad8, BasisType::Serendipity, 2}; + l2_request.continuity = Continuity::L2; + EXPECT_THROW((void)basis_factory::create(l2_request), BasisConfigurationException); +} + TEST(BasisErrorPaths, BasisFactoryInvalidRequestsThrowBasisExceptions) { EXPECT_THROW((void)basis_factory::create( BasisRequest{ElementType::Line2, BasisType::Lagrange}), @@ -153,6 +218,43 @@ TEST(BasisErrorPaths, BasisFunctionDefaultsThrowForMissingDerivatives) { EXPECT_THROW(basis.evaluate_hessians(xi, hessians), BasisEvaluationException); } +TEST(BasisErrorPaths, NumericalDerivativeHelpersMatchAnalyticDerivatives) { + ExactQuadraticBasis basis; + const math::Vector xi{Real(0.2), Real(-0.35), Real(0.4)}; + + std::vector exact_gradients; + basis.evaluate_gradients(xi, exact_gradients); + + std::vector approx_gradients; + basis.numerical_gradient(xi, approx_gradients); + ASSERT_EQ(approx_gradients.size(), basis.size()); + for (std::size_t n = 0; n < basis.size(); ++n) { + for (int d = 0; d < basis.dimension(); ++d) { + const std::size_t sd = static_cast(d); + EXPECT_NEAR(approx_gradients[n][sd], exact_gradients[n][sd], Real(1e-8)) + << "basis=" << n << " component=" << d; + } + } + + std::vector exact_hessians; + basis.exact_hessians(exact_hessians); + + std::vector approx_hessians; + basis.numerical_hessian(xi, approx_hessians); + ASSERT_EQ(approx_hessians.size(), basis.size()); + for (std::size_t n = 0; n < basis.size(); ++n) { + for (int r = 0; r < basis.dimension(); ++r) { + for (int c = 0; c < basis.dimension(); ++c) { + const std::size_t sr = static_cast(r); + const std::size_t sc = static_cast(c); + EXPECT_NEAR(approx_hessians[n](sr, sc), exact_hessians[n](sr, sc), + Real(1e-8)) + << "basis=" << n << " component=(" << r << "," << c << ")"; + } + } + } +} + TEST(BasisErrorPaths, BasisFunctionFallbackWritesRawLayouts) { CompleteFallbackBasis basis; const math::Vector point{Real(0.25), Real(0.5), Real(-0.25)}; diff --git a/tests/unitTests/FE/Basis/test_BasisHessians.cpp b/tests/unitTests/FE/Basis/test_BasisHessians.cpp index f786b07cd..9ad458c0b 100644 --- a/tests/unitTests/FE/Basis/test_BasisHessians.cpp +++ b/tests/unitTests/FE/Basis/test_BasisHessians.cpp @@ -18,12 +18,39 @@ using namespace svmp::FE::basis; namespace { +void numerical_gradient_helper(const BasisFunction& basis, + const math::Vector& xi, + std::vector& gradients, + Real eps = Real(1e-6)) +{ + std::vector base; + basis.evaluate_values(xi, base); + gradients.assign(base.size(), Gradient::Zero()); + + for (int d = 0; d < basis.dimension(); ++d) { + const std::size_t sd = static_cast(d); + math::Vector xi_p = xi; + math::Vector xi_m = xi; + xi_p[sd] += eps; + xi_m[sd] -= eps; + + std::vector v_p; + std::vector v_m; + basis.evaluate_values(xi_p, v_p); + basis.evaluate_values(xi_m, v_m); + + for (std::size_t n = 0; n < base.size(); ++n) { + gradients[n][sd] = (v_p[n] - v_m[n]) / (Real(2) * eps); + } + } +} + void numerical_hessian_helper(const BasisFunction& basis, const math::Vector& xi, std::vector& hessians, Real eps = Real(1e-5)) { - hessians.assign(basis.size(), Hessian{}); + hessians.assign(basis.size(), Hessian::Zero()); const int dim = basis.dimension(); for (int i = 0; i < dim; ++i) { @@ -66,7 +93,31 @@ std::vector> sample_points_for(ElementType type) { } } -void expect_hessians_match_numerical(const LagrangeBasis& basis, +void expect_gradients_match_numerical(const BasisFunction& basis, + const std::vector>& points, + Real tol, + Real eps = Real(1e-6)) +{ + for (const auto& xi : points) { + std::vector analytical; + std::vector numerical; + basis.evaluate_gradients(xi, analytical); + numerical_gradient_helper(basis, xi, numerical, eps); + + ASSERT_EQ(analytical.size(), numerical.size()); + for (std::size_t n = 0; n < analytical.size(); ++n) { + for (int d = 0; d < basis.dimension(); ++d) { + const std::size_t sd = static_cast(d); + EXPECT_NEAR(analytical[n][sd], numerical[n][sd], tol) + << "basis " << n << ", component " << d + << ", element " << static_cast(basis.element_type()) + << ", order " << basis.order(); + } + } + } +} + +void expect_hessians_match_numerical(const BasisFunction& basis, const std::vector>& points, Real tol, Real eps = Real(1e-5)) @@ -100,7 +151,7 @@ void expect_partition_hessian_sum_zero(const LagrangeBasis& basis, std::vector hessians; basis.evaluate_hessians(xi, hessians); - Hessian sum{}; + Hessian sum = Hessian::Zero(); for (const auto& hessian : hessians) { for (std::size_t r = 0; r < 3u; ++r) { for (std::size_t c = 0; c < 3u; ++c) { @@ -145,7 +196,7 @@ void expect_partition_hessian_sum_zero(const BasisFunction& basis, std::vector hessians; basis.evaluate_hessians(xi, hessians); - Hessian sum{}; + Hessian sum = Hessian::Zero(); for (const auto& hessian : hessians) { for (std::size_t r = 0; r < 3u; ++r) { for (std::size_t c = 0; c < 3u; ++c) { @@ -183,6 +234,16 @@ void expect_hessians_symmetric(const BasisFunction& basis, } } +std::vector> serendipity_sample_points(ElementType type) { + if (type == ElementType::Quad4 || type == ElementType::Quad8) { + return {{Real(0.17), Real(-0.31), Real(0)}, {Real(-0.45), Real(0.25), Real(0)}}; + } + if (type == ElementType::Hex8 || type == ElementType::Hex20) { + return {{Real(0.2), Real(-0.1), Real(0.3)}, {Real(-0.35), Real(0.25), Real(-0.15)}}; + } + return {{Real(0.2), Real(0.3), Real(0.1)}, {Real(0.12), Real(0.16), Real(-0.2)}}; +} + } // namespace TEST(BasisHessians, LagrangeCanonicalTopologiesMatchNumericalHessians) { @@ -280,3 +341,75 @@ TEST(BasisHessians, SolverMappedVolumeSelectionsSatisfyInvariants) { EXPECT_EQ(covered, 13); } + +// Gradients must match centered finite differences of values. This is the only +// check that ties the gradient code path back to the value code path; partition +// sums and Hessian-vs-FD(gradient) comparisons cannot catch a systematic error +// shared by the first- and second-derivative recurrences. +TEST(BasisGradients, LagrangeCanonicalTopologiesMatchNumericalGradients) { + const struct Case { + ElementType type; + int order; + Real tol; + } cases[] = { + {ElementType::Line2, 3, Real(1e-8)}, + {ElementType::Triangle3, 3, Real(1e-7)}, + {ElementType::Quad4, 3, Real(1e-7)}, + {ElementType::Tetra4, 2, Real(1e-7)}, + {ElementType::Hex8, 2, Real(1e-7)}, + {ElementType::Wedge6, 2, Real(1e-7)}, + }; + + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + expect_gradients_match_numerical(basis, sample_points_for(c.type), c.tol); + } +} + +// The serendipity coefficient tables (Hex20 20x20, Wedge15 15x15) and the quad +// inverse-Vandermonde path each differentiate values through hand-written code +// that is independent of the value evaluation. Partition sums only verify that +// the constant function differentiates to zero, and symmetry is assigned +// structurally, so neither can detect a wrong derivative formula. Finite +// differences of values are the authoritative check. +TEST(BasisGradients, SerendipityFamiliesMatchNumericalGradients) { + const struct Case { + ElementType type; + int order; + Real tol; + } cases[] = { + {ElementType::Quad4, 1, Real(1e-8)}, + {ElementType::Quad8, 2, Real(1e-7)}, + {ElementType::Quad4, 3, Real(1e-7)}, + {ElementType::Quad4, 4, Real(5e-7)}, + {ElementType::Hex8, 1, Real(1e-8)}, + {ElementType::Hex20, 2, Real(1e-7)}, + {ElementType::Wedge15, 2, Real(1e-7)}, + }; + + for (const auto& c : cases) { + SerendipityBasis basis(c.type, c.order); + expect_gradients_match_numerical(basis, serendipity_sample_points(c.type), c.tol); + } +} + +TEST(BasisHessians, SerendipityFamiliesMatchNumericalHessians) { + const struct Case { + ElementType type; + int order; + Real tol; + } cases[] = { + {ElementType::Quad4, 1, Real(1e-6)}, + {ElementType::Quad8, 2, Real(1e-6)}, + {ElementType::Quad4, 3, Real(1e-6)}, + {ElementType::Quad4, 4, Real(5e-6)}, + {ElementType::Hex8, 1, Real(1e-6)}, + {ElementType::Hex20, 2, Real(1e-6)}, + {ElementType::Wedge15, 2, Real(1e-6)}, + }; + + for (const auto& c : cases) { + SerendipityBasis basis(c.type, c.order); + expect_hessians_match_numerical(basis, serendipity_sample_points(c.type), c.tol); + } +} diff --git a/tests/unitTests/FE/Basis/test_HigherOrderWedge.cpp b/tests/unitTests/FE/Basis/test_HigherOrderWedge.cpp index 3faffd9e0..8827eebb0 100644 --- a/tests/unitTests/FE/Basis/test_HigherOrderWedge.cpp +++ b/tests/unitTests/FE/Basis/test_HigherOrderWedge.cpp @@ -57,8 +57,8 @@ void expect_partition_gradient_hessian_sums(const LagrangeBasis& basis, basis.evaluate_all(xi, values, gradients, hessians); Real value_sum = Real(0); - Gradient gradient_sum{}; - Hessian hessian_sum{}; + Gradient gradient_sum = Gradient::Zero(); + Hessian hessian_sum = Hessian::Zero(); for (std::size_t i = 0; i < values.size(); ++i) { value_sum += values[i]; for (std::size_t d = 0; d < 3u; ++d) { @@ -137,3 +137,21 @@ TEST(HigherOrderWedge, OrderFourEvaluationsRemainFinite) { expect_all_entries_finite(wedge, {Real(0.2), Real(0.1), Real(-0.6)}); expect_all_entries_finite(wedge, {Real(0.05), Real(0.8), Real(0.3)}); } + +// Finiteness alone cannot detect a wrong triangle-index or axis-index lookup; +// the Kronecker property validates the order-four node lattice and its inverse +// index mapping end to end. +TEST(HigherOrderWedge, OrderFourIsNodalAndPartitionsUnity) { + LagrangeBasis wedge(ElementType::Wedge6, 4); + + EXPECT_EQ(wedge.size(), 75u); + expect_kronecker_at_nodes(wedge, Real(1e-9)); + expect_partition_gradient_hessian_sums( + wedge, + { + {Real(0.18), Real(0.22), Real(-0.2)}, + {Real(0.25), Real(0.15), Real(0.45)}, + }, + Real(1e-12), + Real(1e-7)); +} diff --git a/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp b/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp index 9d93f8931..8a1f43c58 100644 --- a/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp +++ b/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp @@ -10,6 +10,7 @@ #include "FE/Basis/LagrangeBasis.h" #include "FE/Basis/NodeOrderingConventions.h" +#include #include #include #include @@ -90,8 +91,8 @@ void expect_partition_gradient_hessian_sums(const LagrangeBasis& basis, basis.evaluate_all(xi, values, gradients, hessians); Real value_sum = Real(0); - Gradient gradient_sum{}; - Hessian hessian_sum{}; + Gradient gradient_sum = Gradient::Zero(); + Hessian hessian_sum = Hessian::Zero(); for (std::size_t i = 0; i < values.size(); ++i) { value_sum += values[i]; for (std::size_t d = 0; d < 3u; ++d) { @@ -190,7 +191,7 @@ Real linear_function(const Point& p) { } Gradient linear_gradient() { - Gradient g{}; + Gradient g = Gradient::Zero(); g[0] = Real(3); g[1] = Real(-4); g[2] = Real(5); @@ -204,6 +205,18 @@ Real quadratic_function(const Point& p) { Real(0.4) * p[1] * p[2]; } +// Total degree three, so it lies in both the P3 simplex space and the Q3 +// tensor-product space. +Real cubic_function(const Point& p) { + return quadratic_function(p) + + Real(0.1) * p[0] * p[0] * p[0] - + Real(0.2) * p[1] * p[1] * p[1] + + Real(0.3) * p[2] * p[2] * p[2] + + Real(0.15) * p[0] * p[0] * p[1] - + Real(0.12) * p[0] * p[2] * p[2] + + Real(0.08) * p[0] * p[1] * p[2]; +} + template Real interpolate_value(const LagrangeBasis& basis, const std::vector& values, @@ -338,7 +351,7 @@ TEST(LagrangeBasis, LinearPolynomialReproductionAcrossLinearTopologies) { interpolate_value(basis, values, linear_function); EXPECT_NEAR(interpolated, linear_function(point), Real(1e-12)); - Gradient interpolated_gradient{}; + Gradient interpolated_gradient = Gradient::Zero(); for (std::size_t i = 0; i < gradients.size(); ++i) { const Real nodal_value = linear_function(basis.nodes()[i]); for (int d = 0; d < basis.dimension(); ++d) { @@ -376,6 +389,192 @@ TEST(LagrangeBasis, QuadraticPolynomialReproductionAcrossQuadraticAliases) { } } +// Tetra order >= 3 activates the face-interior node loops, tetra order >= 4 +// activates the volume-interior lattice, and hex order >= 3 activates the six +// orientation-specific face traversals in NodeOrderingConventions. None of +// those generation paths run at the orders covered elsewhere; the Kronecker +// test is what validates the node lattice together with its llround-based +// inverse index mapping (a duplicated or missing node makes the basis +// non-nodal here). +TEST(LagrangeBasis, HigherOrderLatticesAreNodalAndPartitionUnity) { + const struct Case { + ElementType type; + int order; + std::size_t size; + Real kronecker_tol; + Real derivative_tol; + std::vector points; + } cases[] = { + {ElementType::Tetra4, 3, 20u, Real(5e-10), Real(1e-8), + {{Real(0.12), Real(0.18), Real(0.16)}, {Real(0.3), Real(0.2), Real(0.25)}}}, + {ElementType::Tetra4, 4, 35u, Real(1e-9), Real(1e-7), + {{Real(0.12), Real(0.18), Real(0.16)}, {Real(0.2), Real(0.1), Real(0.18)}}}, + {ElementType::Hex8, 3, 64u, Real(5e-10), Real(1e-8), + {{Real(0.1), Real(-0.2), Real(0.3)}, {Real(-0.35), Real(0.25), Real(-0.15)}}}, + }; + + for (const auto& c : cases) { + LagrangeBasis basis(c.type, c.order); + EXPECT_EQ(basis.size(), c.size); + expect_kronecker_at_nodes(basis, c.kronecker_tol); + expect_partition_gradient_hessian_sums(basis, c.points, c.derivative_tol); + } +} + +TEST(LagrangeBasis, CubicPolynomialReproductionAtOrderThree) { + const std::vector> cases = { + {ElementType::Tetra4, {Real(0.15), Real(0.2), Real(0.25)}}, + {ElementType::Hex8, {Real(0.15), Real(-0.2), Real(0.25)}}, + }; + + for (const auto& [type, point] : cases) { + LagrangeBasis basis(type, 3); + std::vector values; + basis.evaluate_values(point, values); + + const Real interpolated = interpolate_value(basis, values, cubic_function); + EXPECT_NEAR(interpolated, cubic_function(point), Real(1e-10)) + << "element=" << static_cast(type); + } +} + +TEST(LagrangeBasis, PointTopologyEvaluatesConstantUnity) { + LagrangeBasis basis(ElementType::Point1, 0); + + EXPECT_EQ(basis.element_type(), ElementType::Point1); + EXPECT_EQ(basis.size(), 1u); + EXPECT_EQ(basis.dimension(), 0); + ASSERT_EQ(basis.nodes().size(), 1u); + + const Point xi{Real(0.3), Real(-0.4), Real(0.1)}; + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_all(xi, values, gradients, hessians); + + ASSERT_EQ(values.size(), 1u); + EXPECT_EQ(values[0], Real(1)); + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_EQ(gradients[0][d], Real(0)); + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_EQ(hessians[0](d, e), Real(0)); + } + } + + Real flat_value = Real(-1); + Real flat_gradient[3] = {Real(-1), Real(-1), Real(-1)}; + Real flat_hessian[9]; + std::fill_n(flat_hessian, 9u, Real(-1)); + basis.evaluate_values_to(xi, &flat_value); + basis.evaluate_gradients_to(xi, flat_gradient); + basis.evaluate_hessians_to(xi, flat_hessian); + EXPECT_EQ(flat_value, Real(1)); + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_EQ(flat_gradient[d], Real(0)); + } + for (std::size_t e = 0; e < 9u; ++e) { + EXPECT_EQ(flat_hessian[e], Real(0)); + } +} + +// P0 bases back piecewise-constant fields (e.g. pressure in mixed elements); +// the order-zero branches in node generation and the simplex/tensor/wedge +// evaluators have no other coverage. +TEST(LagrangeBasis, OrderZeroBasesAreConstantUnity) { + const std::array types = { + ElementType::Line2, + ElementType::Triangle3, + ElementType::Quad4, + ElementType::Tetra4, + ElementType::Hex8, + ElementType::Wedge6, + }; + + for (const auto type : types) { + LagrangeBasis basis(type, 0); + EXPECT_EQ(basis.order(), 0) << "element=" << static_cast(type); + EXPECT_EQ(basis.size(), 1u) << "element=" << static_cast(type); + + for (const auto& xi : sample_points_for(type)) { + std::vector values; + std::vector gradients; + std::vector hessians; + basis.evaluate_all(xi, values, gradients, hessians); + + ASSERT_EQ(values.size(), 1u); + EXPECT_NEAR(values[0], Real(1), Real(1e-14)) + << "element=" << static_cast(type); + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_NEAR(gradients[0][d], Real(0), Real(1e-14)); + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_NEAR(hessians[0](d, e), Real(0), Real(1e-14)); + } + } + } + } +} + +// Pins the default basis selection for every supported element type. The +// solver adapter (nn.cpp) translates solver element names to ElementType and +// delegates the family/order choice to default_basis_request; a silent change +// here would change the discretization of every simulation using that element. +TEST(BasisFactoryDefaults, SelectionsArePinnedForAllSupportedElements) { + struct Expected { + ElementType type; + BasisType family; + int order; + std::size_t size; + }; + const std::vector cases = { + {ElementType::Point1, BasisType::Lagrange, 0, 1u}, + {ElementType::Line2, BasisType::Lagrange, 1, 2u}, + {ElementType::Line3, BasisType::Lagrange, 2, 3u}, + {ElementType::Triangle3, BasisType::Lagrange, 1, 3u}, + {ElementType::Triangle6, BasisType::Lagrange, 2, 6u}, + {ElementType::Quad4, BasisType::Lagrange, 1, 4u}, + {ElementType::Quad8, BasisType::Serendipity, 2, 8u}, + {ElementType::Quad9, BasisType::Lagrange, 2, 9u}, + {ElementType::Tetra4, BasisType::Lagrange, 1, 4u}, + {ElementType::Tetra10, BasisType::Lagrange, 2, 10u}, + {ElementType::Hex8, BasisType::Lagrange, 1, 8u}, + {ElementType::Hex20, BasisType::Serendipity, 2, 20u}, + {ElementType::Hex27, BasisType::Lagrange, 2, 27u}, + {ElementType::Wedge6, BasisType::Lagrange, 1, 6u}, + {ElementType::Wedge15, BasisType::Serendipity, 2, 15u}, + {ElementType::Wedge18, BasisType::Lagrange, 2, 18u}, + }; + + for (const auto& expected : cases) { + const auto request = basis_factory::default_basis_request(expected.type); + EXPECT_EQ(request.element_type, expected.type) + << "element=" << static_cast(expected.type); + EXPECT_EQ(request.basis_type, expected.family) + << "element=" << static_cast(expected.type); + ASSERT_TRUE(request.order.has_value()) + << "element=" << static_cast(expected.type); + EXPECT_EQ(*request.order, expected.order) + << "element=" << static_cast(expected.type); + + auto basis = basis_factory::create_default_for(expected.type); + ASSERT_NE(basis, nullptr); + EXPECT_EQ(basis->basis_type(), expected.family) + << "element=" << static_cast(expected.type); + EXPECT_EQ(basis->order(), expected.order) + << "element=" << static_cast(expected.type); + EXPECT_EQ(basis->size(), expected.size) + << "element=" << static_cast(expected.type); + } +} + +TEST(BasisFactoryDefaults, RejectsElementsWithoutDefaultBasis) { + EXPECT_THROW((void)basis_factory::default_basis_request(ElementType::Pyramid5), + BasisElementCompatibilityException); + EXPECT_THROW((void)basis_factory::default_basis_request(ElementType::Pyramid13), + BasisElementCompatibilityException); + EXPECT_THROW((void)basis_factory::create_default_for(ElementType::Unknown), + BasisElementCompatibilityException); +} + TEST(LagrangeBasis, FactoryCreatesReducedScalarBasisFamilies) { auto lagrange = basis_factory::create(BasisRequest{ElementType::Hex27, BasisType::Lagrange, 1}); diff --git a/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp b/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp index 30f876420..235dc8c40 100644 --- a/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp +++ b/tests/unitTests/FE/Basis/test_SerendipityTensorModal.cpp @@ -5,9 +5,11 @@ #include +#include "FE/Basis/LagrangeBasis.h" #include "FE/Basis/NodeOrderingConventions.h" #include "FE/Basis/SerendipityBasis.h" +#include #include using namespace svmp::FE; @@ -25,7 +27,7 @@ void expect_partition_of_unity(const SerendipityBasis& basis, basis.evaluate_gradients(xi, gradients); Real value_sum = Real(0); - Gradient gradient_sum{}; + Gradient gradient_sum = Gradient::Zero(); for (std::size_t i = 0; i < values.size(); ++i) { value_sum += values[i]; for (std::size_t component = 0; component < 3u; ++component) { @@ -68,6 +70,36 @@ std::vector> reference_nodes(ElementType type, return nodes; } +template +Real interpolate_nodal_function(const SerendipityBasis& basis, + const math::Vector& xi, + Function&& nodal_function) +{ + std::vector values; + basis.evaluate_values(xi, values); + + Real result = Real(0); + const auto& nodes = basis.nodes(); + for (std::size_t i = 0; i < values.size(); ++i) { + result += values[i] * nodal_function(nodes[i]); + } + return result; +} + +// Every monomial here has superlinear degree at most three, so it lies in the +// order-three quadrilateral serendipity space. +Real cubic_serendipity_function(const math::Vector& p) { + const Real x = p[0]; + const Real y = p[1]; + return Real(1) + Real(2) * x - y + Real(3) * x * y + + x * x * x - Real(2) * y * y * y + + Real(0.5) * x * x * x * y - Real(0.25) * x * y * y * y; +} + +Real bilinear_function(const math::Vector& p) { + return Real(2) - Real(3) * p[0] + Real(4) * p[1] + Real(0.5) * p[0] * p[1]; +} + } // namespace TEST(SerendipityBasis, Quad8IsNodalAndPartitionsUnity) { @@ -104,3 +136,154 @@ TEST(SerendipityBasis, RejectsUnsupportedSerendipityAliases) { EXPECT_THROW(SerendipityBasis(ElementType::Pyramid14, 2), FEException); EXPECT_THROW(SerendipityBasis(ElementType::Quad8, 3), FEException); } + +// Orders other than two run the generic quadrilateral path: serendipity +// monomial selection, boundary plus interior node placement, and a runtime +// Vandermonde inversion whose unisolvence is assumed rather than tabulated. +// Order four is the first order that selects an interior node. +TEST(SerendipityBasis, QuadrilateralOrdersOneThreeFourAreNodalAndPartitionUnity) { + const struct Case { + int order; + std::size_t size; + } cases[] = { + {1, 4u}, + {3, 12u}, + {4, 17u}, + }; + + for (const auto& c : cases) { + SerendipityBasis basis(ElementType::Quad4, c.order); + EXPECT_EQ(basis.size(), c.size) << "order=" << c.order; + EXPECT_EQ(basis.order(), c.order); + EXPECT_EQ(basis.dimension(), 2); + ASSERT_EQ(basis.nodes().size(), c.size); + + for (const auto& node : basis.nodes()) { + EXPECT_LE(std::abs(node[0]), Real(1)); + EXPECT_LE(std::abs(node[1]), Real(1)); + } + + expect_nodal_delta(basis, basis.nodes(), Real(1e-9)); + expect_partition_of_unity(basis, {Real(0.17), Real(-0.31), Real(0)}, Real(1e-9)); + expect_partition_of_unity(basis, {Real(-0.45), Real(0.25), Real(0)}, Real(1e-9)); + } +} + +TEST(SerendipityBasis, QuadrilateralOrderOneReproducesBilinearFunctions) { + SerendipityBasis basis(ElementType::Quad4, 1); + + const std::vector> points = { + {Real(0.25), Real(-0.4), Real(0)}, + {Real(-0.7), Real(0.6), Real(0)}, + }; + for (const auto& xi : points) { + EXPECT_NEAR(interpolate_nodal_function(basis, xi, bilinear_function), + bilinear_function(xi), + Real(1e-12)); + } +} + +TEST(SerendipityBasis, QuadrilateralOrderThreeReproducesSerendipityCubics) { + SerendipityBasis basis(ElementType::Quad4, 3); + + const std::vector> points = { + {Real(0.25), Real(-0.4), Real(0)}, + {Real(-0.7), Real(0.6), Real(0)}, + }; + for (const auto& xi : points) { + EXPECT_NEAR(interpolate_nodal_function(basis, xi, cubic_serendipity_function), + cubic_serendipity_function(xi), + Real(1e-11)); + } +} + +// SerendipityBasis(Hex8, 1) is the only route to the hand-written trilinear +// corner evaluator (values, gradients, and Hessians); it must agree with the +// trilinear Lagrange basis on the same element. +TEST(SerendipityBasis, TrilinearHexMatchesLagrangeHex8) { + SerendipityBasis serendipity(ElementType::Hex8, 1); + LagrangeBasis lagrange(ElementType::Hex8, 1); + + EXPECT_EQ(serendipity.size(), 8u); + EXPECT_EQ(serendipity.dimension(), 3); + expect_nodal_delta(serendipity, + reference_nodes(ElementType::Hex8, serendipity.size()), + Real(1e-12)); + + const std::vector> points = { + {Real(0.2), Real(-0.1), Real(0.3)}, + {Real(-0.35), Real(0.25), Real(-0.15)}, + }; + for (const auto& xi : points) { + std::vector s_values; + std::vector l_values; + std::vector s_gradients; + std::vector l_gradients; + std::vector s_hessians; + std::vector l_hessians; + serendipity.evaluate_all(xi, s_values, s_gradients, s_hessians); + lagrange.evaluate_all(xi, l_values, l_gradients, l_hessians); + + ASSERT_EQ(s_values.size(), l_values.size()); + for (std::size_t i = 0; i < s_values.size(); ++i) { + EXPECT_NEAR(s_values[i], l_values[i], Real(1e-13)); + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_NEAR(s_gradients[i][d], l_gradients[i][d], Real(1e-13)); + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_NEAR(s_hessians[i](d, e), l_hessians[i](d, e), Real(1e-13)); + } + } + } + } +} + +// Geometry mode keeps the public Hex20 node count while mapping geometry with +// the trilinear corner functions: corners must match the Hex8 basis exactly +// and the quadratic edge nodes must contribute nothing. +TEST(SerendipityBasis, Hex20GeometryModeUsesTrilinearCornersOnly) { + SerendipityBasis geometry(ElementType::Hex20, 2, true); + SerendipityBasis trilinear(ElementType::Hex8, 1); + + EXPECT_EQ(geometry.size(), 20u); + EXPECT_EQ(geometry.order(), 2); + + const std::vector> points = { + {Real(0.2), Real(-0.1), Real(0.3)}, + {Real(-0.35), Real(0.25), Real(-0.15)}, + }; + for (const auto& xi : points) { + std::vector g_values; + std::vector g_gradients; + std::vector g_hessians; + geometry.evaluate_all(xi, g_values, g_gradients, g_hessians); + ASSERT_EQ(g_values.size(), 20u); + + std::vector t_values; + std::vector t_gradients; + std::vector t_hessians; + trilinear.evaluate_all(xi, t_values, t_gradients, t_hessians); + + Real value_sum = Real(0); + for (std::size_t i = 0; i < 20u; ++i) { + value_sum += g_values[i]; + if (i < 8u) { + EXPECT_NEAR(g_values[i], t_values[i], Real(1e-13)) << "corner=" << i; + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_NEAR(g_gradients[i][d], t_gradients[i][d], Real(1e-13)); + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_NEAR(g_hessians[i](d, e), t_hessians[i](d, e), Real(1e-13)); + } + } + } else { + EXPECT_EQ(g_values[i], Real(0)) << "edge node=" << i; + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_EQ(g_gradients[i][d], Real(0)); + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_EQ(g_hessians[i](d, e), Real(0)); + } + } + } + } + EXPECT_NEAR(value_sum, Real(1), Real(1e-13)); + } +} diff --git a/tests/unitTests/FE/Math/test_DenseLinearAlgebra.cpp b/tests/unitTests/FE/Math/test_DenseLinearAlgebra.cpp index 2b44ad2bf..9e9e08e95 100644 --- a/tests/unitTests/FE/Math/test_DenseLinearAlgebra.cpp +++ b/tests/unitTests/FE/Math/test_DenseLinearAlgebra.cpp @@ -108,6 +108,132 @@ TEST(DenseLinearAlgebra, FactorizationSolvesDenseRightHandSideBlock) { } } +// Every other matrix in this file already has its largest pivot on the +// diagonal, so without these cases the row-exchange branch in +// factor_dense_matrix and the permutation replay in solve_in_place never +// execute. SerendipityBasis inverts its Vandermonde matrices through this +// code in production. +TEST(DenseLinearAlgebra, FactorizationPivotsThroughZeroLeadingDiagonal) { + const std::vector swap_2x2{ + Real(0), Real(1), + Real(1), Real(0) + }; + + const auto solver = factor_dense_matrix(swap_2x2, 2u, "swap 2x2"); + const std::vector rhs{Real(3), Real(7)}; + const auto x = solver.solve(std::span(rhs.data(), rhs.size())); + ASSERT_EQ(x.size(), 2u); + EXPECT_NEAR(x[0], Real(7), Real(1.0e-14)); + EXPECT_NEAR(x[1], Real(3), Real(1.0e-14)); + + const auto inv = invert_dense_matrix(swap_2x2, 2u, "swap 2x2"); + for (std::size_t row = 0; row < 2u; ++row) { + for (std::size_t col = 0; col < 2u; ++col) { + EXPECT_NEAR(inv[row * 2u + col], swap_2x2[row * 2u + col], Real(1.0e-14)); + } + } + + // Every column requires a row exchange during elimination. + const std::vector permuted_scaled{ + Real(0), Real(0), Real(1), Real(0), + Real(1), Real(0), Real(0), Real(0), + Real(0), Real(0), Real(0), Real(2), + Real(0), Real(3), Real(0), Real(0) + }; + + const auto inv4 = invert_dense_matrix(permuted_scaled, 4u, "permuted scaled 4x4"); + for (std::size_t row = 0; row < 4u; ++row) { + for (std::size_t col = 0; col < 4u; ++col) { + const Real expected = (row == col) ? Real(1) : Real(0); + EXPECT_NEAR(multiply_entry(permuted_scaled, inv4, 4u, row, col), + expected, + Real(1.0e-14)); + } + } +} + +TEST(DenseLinearAlgebra, WideMultiRhsSolveWithPivoting) { + // Requires a row swap in column 0 and uses a wide right-hand-side block to + // exercise the row-interleaved multi-RHS layout end to end. + const std::vector A{ + Real(0), Real(2), Real(1), + Real(4), Real(1), Real(0), + Real(1), Real(0), Real(3) + }; + constexpr std::size_t kRhsCount = 33u; + + const auto solver = factor_dense_matrix(A, 3u, "pivoting 3x3"); + + std::vector rhs(3u * kRhsCount, Real(0)); + for (std::size_t row = 0; row < 3u; ++row) { + for (std::size_t r = 0; r < kRhsCount; ++r) { + rhs[row * kRhsCount + r] = + Real(1) + static_cast(row) - Real(0.25) * static_cast(r % 7u); + } + } + const auto original_rhs = rhs; + + solver.solve_in_place(std::span(rhs.data(), rhs.size()), kRhsCount); + + for (std::size_t r = 0; r < kRhsCount; ++r) { + for (std::size_t row = 0; row < 3u; ++row) { + Real ax = Real(0); + for (std::size_t col = 0; col < 3u; ++col) { + ax += A[row * 3u + col] * rhs[col * kRhsCount + r]; + } + EXPECT_NEAR(ax, original_rhs[row * kRhsCount + r], Real(1.0e-12)) + << "rhs column " << r << ", row " << row; + } + } +} + +TEST(DenseLinearAlgebra, SolveInPlaceValidatesInputs) { + const std::vector identity{ + Real(1), Real(0), + Real(0), Real(1) + }; + const auto solver = factor_dense_matrix(identity, 2u, "identity 2x2"); + + std::vector rhs{Real(1), Real(2)}; + EXPECT_THROW(solver.solve_in_place(std::span(rhs.data(), rhs.size()), 0u), + FEException); + + std::vector wrong_size{Real(1), Real(2), Real(3)}; + EXPECT_THROW( + solver.solve_in_place(std::span(wrong_size.data(), wrong_size.size()), 1u), + FEException); + + DenseLUSolver unfactored; + unfactored.n = 2u; + unfactored.label = "unfactored"; + EXPECT_FALSE(unfactored.empty()); + EXPECT_THROW(unfactored.solve_in_place(std::span(rhs.data(), rhs.size()), 1u), + FEException); +} + +TEST(DenseLinearAlgebra, DiagnosticValidationRejectsRankMismatch) { + DenseInverseResult result; + result.diagnostics.rank = 1u; + + EXPECT_THROW(validate_dense_inverse_diagnostics(result, 2u, "rank mismatch"), + FEException); +} + +TEST(DenseLinearAlgebra, RankHandlesNonSquareMatrices) { + const std::vector wide_full{ + Real(1), Real(0), Real(2), + Real(0), Real(1), Real(-1) + }; + EXPECT_EQ(dense_matrix_rank(wide_full, 2u, 3u), 2u); + + const std::vector tall_rank_one{ + Real(1), Real(2), + Real(2), Real(4), + Real(3), Real(6) + }; + EXPECT_EQ(dense_matrix_rank(tall_rank_one, 3u, 2u), 1u); +} + TEST(DenseLinearAlgebra, HighConditionInverseUsesSvdFallback) { const std::vector high_condition{ Real(1), Real(0), @@ -117,13 +243,9 @@ TEST(DenseLinearAlgebra, HighConditionInverseUsesSvdFallback) { const auto result = invert_dense_matrix_with_diagnostics(high_condition, 2u, "high-condition diagonal"); EXPECT_EQ(result.diagnostics.rank, 2u); -#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN EXPECT_GT(result.diagnostics.condition_estimate, dense_matrix_condition_fallback_threshold()); EXPECT_TRUE(result.used_svd_fallback); -#else - EXPECT_FALSE(result.used_svd_fallback); -#endif for (std::size_t row = 0; row < 2u; ++row) { for (std::size_t col = 0; col < 2u; ++col) { @@ -136,9 +258,6 @@ TEST(DenseLinearAlgebra, HighConditionInverseUsesSvdFallback) { } TEST(DenseLinearAlgebra, DiagnosticValidationRejectsUnsupportedCondition) { -#if !(defined(FE_HAS_EIGEN) && FE_HAS_EIGEN) - GTEST_SKIP() << "condition rejection requires FE_ENABLE_EIGEN diagnostics"; -#endif DenseInverseResult result; result.diagnostics.rank = 2u; result.diagnostics.condition_estimate = @@ -193,13 +312,9 @@ TEST(DenseLinearAlgebra, DiagnosticsReportRankAndConditionEstimate) { const auto full = dense_matrix_diagnostics(diagonal, 2u, 2u, "diagonal 2x2"); EXPECT_EQ(full.rank, 2u); -#if defined(FE_HAS_EIGEN) && FE_HAS_EIGEN EXPECT_NEAR(full.largest_singular_value, Real(4), Real(1.0e-14)); EXPECT_NEAR(full.smallest_retained_singular_value, Real(0.5), Real(1.0e-14)); EXPECT_NEAR(full.condition_estimate, Real(8), Real(1.0e-14)); -#else - EXPECT_TRUE(std::isinf(full.condition_estimate)); -#endif const std::vector rank_one{ Real(1), Real(2), @@ -212,9 +327,6 @@ TEST(DenseLinearAlgebra, DiagnosticsReportRankAndConditionEstimate) { } TEST(DenseLinearAlgebra, PseudoInverseHandlesSingularMatrixWithoutNormalEquations) { -#if !(defined(FE_HAS_EIGEN) && FE_HAS_EIGEN) - GTEST_SKIP() << "rank-revealing pseudo-inverse requires FE_ENABLE_EIGEN"; -#endif const std::vector rank_one{ Real(1), Real(2), Real(2), Real(4) @@ -246,9 +358,6 @@ TEST(DenseLinearAlgebra, PseudoInverseHandlesSingularMatrixWithoutNormalEquation } TEST(DenseLinearAlgebra, PseudoInverseDropsNearZeroSingularValues) { -#if !(defined(FE_HAS_EIGEN) && FE_HAS_EIGEN) - GTEST_SKIP() << "rank-revealing pseudo-inverse requires FE_ENABLE_EIGEN"; -#endif const std::vector near_singular{ Real(1), Real(0), Real(0), Real(1.0e-18) diff --git a/tests/unitTests/FE/Math/test_Matrix.cpp b/tests/unitTests/FE/Math/test_Matrix.cpp deleted file mode 100644 index 3b2fe664a..000000000 --- a/tests/unitTests/FE/Math/test_Matrix.cpp +++ /dev/null @@ -1,593 +0,0 @@ -/** - * @file test_Matrix.cpp - * @brief Unit tests for Matrix.h - fixed-size matrices with expression templates - */ - -#include -#include "FE/Math/Matrix.h" -#include "FE/Math/Vector.h" -#include "FE/Math/MatrixExpr.h" -#include -#include -#include -#include - -using namespace svmp::FE::math; - -// Test fixture for Matrix tests -class MatrixTest : public ::testing::Test { -protected: - static constexpr double tolerance = 1e-14; - - void SetUp() override {} - void TearDown() override {} - - // Helper function to check if two values are approximately equal - template - bool approx_equal(T a, T b, T tol = tolerance) { - return std::abs(a - b) <= tol; - } -}; - -// ============================================================================= -// Construction and Initialization Tests -// ============================================================================= - -TEST_F(MatrixTest, DefaultConstruction) { - Matrix m; - for (size_t i = 0; i < 3; ++i) { - for (size_t j = 0; j < 3; ++j) { - EXPECT_EQ(m(i, j), 0.0); - } - } -} - -TEST_F(MatrixTest, FillConstruction) { - Matrix m(5.0); - for (size_t i = 0; i < 2; ++i) { - for (size_t j = 0; j < 3; ++j) { - EXPECT_EQ(m(i, j), 5.0); - } - } -} - -TEST_F(MatrixTest, InitializerListConstruction) { - Matrix m{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}}; - - EXPECT_EQ(m(0, 0), 1.0); - EXPECT_EQ(m(0, 1), 2.0); - EXPECT_EQ(m(0, 2), 3.0); - EXPECT_EQ(m(1, 0), 4.0); - EXPECT_EQ(m(1, 1), 5.0); - EXPECT_EQ(m(1, 2), 6.0); -} - -TEST_F(MatrixTest, CopyConstruction) { - Matrix m1{{1.0, 2.0}, - {3.0, 4.0}}; - Matrix m2(m1); - - EXPECT_EQ(m2(0, 0), 1.0); - EXPECT_EQ(m2(0, 1), 2.0); - EXPECT_EQ(m2(1, 0), 3.0); - EXPECT_EQ(m2(1, 1), 4.0); - - // Ensure deep copy - m2(0, 0) = 10.0; - EXPECT_EQ(m1(0, 0), 1.0); - EXPECT_EQ(m2(0, 0), 10.0); -} - -TEST_F(MatrixTest, MoveConstruction) { - Matrix m1{{1.0, 2.0}, - {3.0, 4.0}}; - Matrix m2(std::move(m1)); - - EXPECT_EQ(m2(0, 0), 1.0); - EXPECT_EQ(m2(0, 1), 2.0); - EXPECT_EQ(m2(1, 0), 3.0); - EXPECT_EQ(m2(1, 1), 4.0); -} - -// ============================================================================= -// Element Access Tests -// ============================================================================= - -TEST_F(MatrixTest, ElementAccess) { - Matrix m{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}}; - - // Non-const access using operator() - EXPECT_EQ(m(0, 0), 1.0); - EXPECT_EQ(m(0, 2), 3.0); - EXPECT_EQ(m(1, 1), 5.0); - - // Modification - m(1, 2) = 7.0; - EXPECT_EQ(m(1, 2), 7.0); - - // Const access - const Matrix cm{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}}; - EXPECT_EQ(cm(0, 1), 2.0); - EXPECT_EQ(cm(1, 0), 4.0); -} - -TEST_F(MatrixTest, ElementAccessBounds) { - Matrix m{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}}; - - // at() with bounds checking - EXPECT_EQ(m.at(0, 0), 1.0); - EXPECT_EQ(m.at(1, 2), 6.0); - - // Test out of bounds throws - EXPECT_THROW(m.at(2, 0), std::out_of_range); - EXPECT_THROW(m.at(0, 3), std::out_of_range); - EXPECT_THROW(m.at(10, 10), std::out_of_range); -} - -TEST_F(MatrixTest, RowColumnAccess) { - Matrix m{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}}; - - // Get row - auto row1 = m.row(1); - EXPECT_EQ(row1[0], 4.0); - EXPECT_EQ(row1[1], 5.0); - EXPECT_EQ(row1[2], 6.0); - - // Get column - auto col2 = m.col(2); - EXPECT_EQ(col2[0], 3.0); - EXPECT_EQ(col2[1], 6.0); - EXPECT_EQ(col2[2], 9.0); - - // Set row - Vector new_row{10.0, 11.0, 12.0}; - m.set_row(0, new_row); - EXPECT_EQ(m(0, 0), 10.0); - EXPECT_EQ(m(0, 1), 11.0); - EXPECT_EQ(m(0, 2), 12.0); - - // Set column - Vector new_col{20.0, 21.0, 22.0}; - m.set_col(1, new_col); - EXPECT_EQ(m(0, 1), 20.0); - EXPECT_EQ(m(1, 1), 21.0); - EXPECT_EQ(m(2, 1), 22.0); -} - -// ============================================================================= -// Arithmetic Operations Tests -// ============================================================================= - -TEST_F(MatrixTest, Addition) { - Matrix a{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}}; - Matrix b{{7.0, 8.0, 9.0}, - {10.0, 11.0, 12.0}}; - - Matrix c = a + b; - EXPECT_EQ(c(0, 0), 8.0); - EXPECT_EQ(c(0, 1), 10.0); - EXPECT_EQ(c(0, 2), 12.0); - EXPECT_EQ(c(1, 0), 14.0); - EXPECT_EQ(c(1, 1), 16.0); - EXPECT_EQ(c(1, 2), 18.0); -} - -TEST_F(MatrixTest, Subtraction) { - Matrix a{{8.0, 10.0, 12.0}, - {14.0, 16.0, 18.0}}; - Matrix b{{7.0, 8.0, 9.0}, - {10.0, 11.0, 12.0}}; - - Matrix c = a - b; - EXPECT_EQ(c(0, 0), 1.0); - EXPECT_EQ(c(0, 1), 2.0); - EXPECT_EQ(c(0, 2), 3.0); - EXPECT_EQ(c(1, 0), 4.0); - EXPECT_EQ(c(1, 1), 5.0); - EXPECT_EQ(c(1, 2), 6.0); -} - -TEST_F(MatrixTest, ScalarMultiplication) { - Matrix a{{1.0, 2.0}, - {3.0, 4.0}}; - - Matrix b = 2.0 * a; - EXPECT_EQ(b(0, 0), 2.0); - EXPECT_EQ(b(0, 1), 4.0); - EXPECT_EQ(b(1, 0), 6.0); - EXPECT_EQ(b(1, 1), 8.0); - - Matrix c = a * 3.0; - EXPECT_EQ(c(0, 0), 3.0); - EXPECT_EQ(c(0, 1), 6.0); - EXPECT_EQ(c(1, 0), 9.0); - EXPECT_EQ(c(1, 1), 12.0); -} - -TEST_F(MatrixTest, ScalarDivision) { - Matrix a{{2.0, 4.0}, - {6.0, 8.0}}; - - Matrix b = a / 2.0; - EXPECT_EQ(b(0, 0), 1.0); - EXPECT_EQ(b(0, 1), 2.0); - EXPECT_EQ(b(1, 0), 3.0); - EXPECT_EQ(b(1, 1), 4.0); -} - -TEST_F(MatrixTest, MatrixMultiplication) { - Matrix a{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}}; - Matrix b{{7.0, 8.0}, - {9.0, 10.0}, - {11.0, 12.0}}; - - Matrix c = a * b; - EXPECT_EQ(c(0, 0), 58.0); // 1*7 + 2*9 + 3*11 - EXPECT_EQ(c(0, 1), 64.0); // 1*8 + 2*10 + 3*12 - EXPECT_EQ(c(1, 0), 139.0); // 4*7 + 5*9 + 6*11 - EXPECT_EQ(c(1, 1), 154.0); // 4*8 + 5*10 + 6*12 -} - -TEST_F(MatrixTest, MatrixVectorMultiplication) { - Matrix m{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}}; - Vector v{1.0, 2.0, 3.0}; - - Vector result = m * v; - EXPECT_EQ(result[0], 14.0); // 1*1 + 2*2 + 3*3 - EXPECT_EQ(result[1], 32.0); // 4*1 + 5*2 + 6*3 - EXPECT_EQ(result[2], 50.0); // 7*1 + 8*2 + 9*3 -} - -// ============================================================================= -// Special Matrix Operations Tests -// ============================================================================= - -TEST_F(MatrixTest, Transpose) { - Matrix m{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}}; - - Matrix mt = m.transpose(); - EXPECT_EQ(mt(0, 0), 1.0); - EXPECT_EQ(mt(0, 1), 4.0); - EXPECT_EQ(mt(1, 0), 2.0); - EXPECT_EQ(mt(1, 1), 5.0); - EXPECT_EQ(mt(2, 0), 3.0); - EXPECT_EQ(mt(2, 1), 6.0); -} - -TEST_F(MatrixTest, Determinant2x2) { - Matrix m{{1.0, 2.0}, - {3.0, 4.0}}; - - double det = m.determinant(); - EXPECT_EQ(det, -2.0); // 1*4 - 2*3 = 4 - 6 = -2 -} - -TEST_F(MatrixTest, Determinant3x3) { - Matrix m{{1.0, 2.0, 3.0}, - {0.0, 1.0, 4.0}, - {5.0, 6.0, 0.0}}; - - double det = m.determinant(); - EXPECT_EQ(det, 1.0); // Using Sarrus rule -} - -TEST_F(MatrixTest, Determinant4x4) { - Matrix m{{1, 0, 0, 0}, - {0, 2, 0, 0}, - {0, 0, 3, 0}, - {0, 0, 0, 4}}; - - double det = m.determinant(); - EXPECT_EQ(det, 24.0); // 1*2*3*4 = 24 (diagonal matrix) -} - -TEST_F(MatrixTest, Inverse2x2) { - Matrix m{{1.0, 2.0}, - {3.0, 4.0}}; - - Matrix inv = m.inverse(); - - // Check inverse properties - EXPECT_NEAR(inv(0, 0), -2.0, tolerance); - EXPECT_NEAR(inv(0, 1), 1.0, tolerance); - EXPECT_NEAR(inv(1, 0), 1.5, tolerance); - EXPECT_NEAR(inv(1, 1), -0.5, tolerance); - - // Verify M * M^-1 = I - Matrix identity = m * inv; - EXPECT_NEAR(identity(0, 0), 1.0, tolerance); - EXPECT_NEAR(identity(0, 1), 0.0, tolerance); - EXPECT_NEAR(identity(1, 0), 0.0, tolerance); - EXPECT_NEAR(identity(1, 1), 1.0, tolerance); -} - -TEST_F(MatrixTest, Inverse3x3) { - Matrix m{{1.0, 2.0, 3.0}, - {0.0, 1.0, 4.0}, - {5.0, 6.0, 0.0}}; - - Matrix inv = m.inverse(); - - // Verify M * M^-1 = I - Matrix identity = m * inv; - for (size_t i = 0; i < 3; ++i) { - for (size_t j = 0; j < 3; ++j) { - double expected = (i == j) ? 1.0 : 0.0; - EXPECT_NEAR(identity(i, j), expected, tolerance); - } - } -} - -TEST_F(MatrixTest, Trace) { - Matrix m{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}}; - - double trace = m.trace(); - EXPECT_EQ(trace, 15.0); // 1 + 5 + 9 = 15 -} - -// ============================================================================= -// Special Matrix Types Tests -// ============================================================================= - -TEST_F(MatrixTest, IdentityMatrix) { - Matrix I = Matrix::identity(); - - for (size_t i = 0; i < 3; ++i) { - for (size_t j = 0; j < 3; ++j) { - double expected = (i == j) ? 1.0 : 0.0; - EXPECT_EQ(I(i, j), expected); - } - } - - // Test identity property - Matrix m{{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}}; - Matrix result = m * I; - - for (size_t i = 0; i < 3; ++i) { - for (size_t j = 0; j < 3; ++j) { - EXPECT_EQ(result(i, j), m(i, j)); - } - } -} - -TEST_F(MatrixTest, ZeroMatrix) { - Matrix Z = Matrix::zero(); - - for (size_t i = 0; i < 2; ++i) { - for (size_t j = 0; j < 3; ++j) { - EXPECT_EQ(Z(i, j), 0.0); - } - } -} - -TEST_F(MatrixTest, DiagonalMatrix) { - Vector diag{1.0, 2.0, 3.0}; - Matrix D = Matrix::diagonal(diag); - - EXPECT_EQ(D(0, 0), 1.0); - EXPECT_EQ(D(1, 1), 2.0); - EXPECT_EQ(D(2, 2), 3.0); - - // Off-diagonal elements should be zero - EXPECT_EQ(D(0, 1), 0.0); - EXPECT_EQ(D(0, 2), 0.0); - EXPECT_EQ(D(1, 0), 0.0); - EXPECT_EQ(D(1, 2), 0.0); - EXPECT_EQ(D(2, 0), 0.0); - EXPECT_EQ(D(2, 1), 0.0); -} - -// ============================================================================= -// Expression Template Tests -// ============================================================================= - -TEST_F(MatrixTest, ExpressionTemplatesNoTemporaries) { - Matrix a{{1, 2}, {3, 4}}; - Matrix b{{5, 6}, {7, 8}}; - Matrix c{{9, 10}, {11, 12}}; - - // Complex expression should create no temporaries - Matrix result = a + b - c; - - EXPECT_EQ(result(0, 0), -3.0); // 1 + 5 - 9 - EXPECT_EQ(result(0, 1), -2.0); // 2 + 6 - 10 - EXPECT_EQ(result(1, 0), -1.0); // 3 + 7 - 11 - EXPECT_EQ(result(1, 1), 0.0); // 4 + 8 - 12 -} - -TEST_F(MatrixTest, LazyEvaluation) { - Matrix a{{1, 2}, {3, 4}}; - Matrix b{{5, 6}, {7, 8}}; - - // Expression should not be evaluated until assignment - auto expr = a + b; // No computation yet - - Matrix result = expr; // Evaluation happens here - EXPECT_EQ(result(0, 0), 6.0); - EXPECT_EQ(result(0, 1), 8.0); -} - -// ============================================================================= -// Edge Cases and Error Handling Tests -// ============================================================================= - -TEST_F(MatrixTest, SingularMatrixInverse) { - Matrix singular{{1.0, 2.0}, - {2.0, 4.0}}; // det = 0 - - EXPECT_THROW(singular.inverse(), std::runtime_error); -} - -TEST_F(MatrixTest, DivisionByZero) { - Matrix m{{1.0, 2.0}, - {3.0, 4.0}}; - - Matrix result = m / 0.0; - EXPECT_TRUE(std::isinf(result(0, 0))); - EXPECT_TRUE(std::isinf(result(0, 1))); -} - -TEST_F(MatrixTest, ExtremeLargeValues) { - double large = 1e308; - Matrix m{{large, 0}, {0, large}}; - - Matrix half = m / 2.0; - EXPECT_FALSE(std::isinf(half(0, 0))); - EXPECT_EQ(half(0, 0), large / 2.0); -} - -// ============================================================================= -// Numerical Precision Tests -// ============================================================================= - -TEST_F(MatrixTest, NumericalStability) { - // Test near-singular matrix - double eps = 1e-15; - Matrix m{{1.0, 1.0}, - {1.0, 1.0 + eps}}; - - double det = m.determinant(); - // Relax tolerance due to floating-point arithmetic in determinant calculation - EXPECT_NEAR(det, eps, 1e-14); -} - -TEST_F(MatrixTest, OrthogonalMatrixProperties) { - // Create rotation matrix (orthogonal) - double angle = M_PI / 4; - Matrix R{{cos(angle), -sin(angle)}, - {sin(angle), cos(angle)}}; - - // Check orthogonality: R * R^T = I - Matrix RRt = R * R.transpose(); - EXPECT_NEAR(RRt(0, 0), 1.0, tolerance); - EXPECT_NEAR(RRt(0, 1), 0.0, tolerance); - EXPECT_NEAR(RRt(1, 0), 0.0, tolerance); - EXPECT_NEAR(RRt(1, 1), 1.0, tolerance); - - // Check determinant = ±1 - EXPECT_NEAR(std::abs(R.determinant()), 1.0, tolerance); -} - -// ============================================================================= -// Matrix Properties Tests -// ============================================================================= - -TEST_F(MatrixTest, IsSymmetric) { - Matrix sym{{1, 2, 3}, - {2, 4, 5}, - {3, 5, 6}}; - EXPECT_TRUE(sym.is_symmetric(tolerance)); - - Matrix nonsym{{1, 2, 3}, - {4, 5, 6}, - {7, 8, 9}}; - EXPECT_FALSE(nonsym.is_symmetric(tolerance)); -} - -TEST_F(MatrixTest, IsSkewSymmetric) { - Matrix skew{{0, -1, 2}, - {1, 0, -3}, - {-2, 3, 0}}; - EXPECT_TRUE(skew.is_skew_symmetric(tolerance)); - - Matrix nonskew{{1, 2, 3}, - {4, 5, 6}, - {7, 8, 9}}; - EXPECT_FALSE(nonskew.is_skew_symmetric(tolerance)); -} - -TEST_F(MatrixTest, IsDiagonal) { - Matrix diag{{1, 0, 0}, - {0, 2, 0}, - {0, 0, 3}}; - EXPECT_TRUE(diag.is_diagonal(tolerance)); - - Matrix nondiag{{1, 0.1, 0}, - {0, 2, 0}, - {0, 0, 3}}; - EXPECT_FALSE(nondiag.is_diagonal(tolerance)); -} - -// ============================================================================= -// Thread Safety Tests -// ============================================================================= - -TEST_F(MatrixTest, ThreadSafetyReadOnly) { - Matrix m{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; - - std::vector threads; - std::vector results(10); - - for (int i = 0; i < 10; ++i) { - threads.emplace_back([&m, &results, i]() { - results[static_cast(i)] = m.trace(); - }); - } - - for (auto& t : threads) { - t.join(); - } - - for (double r : results) { - EXPECT_EQ(r, 15.0); - } -} - -// ============================================================================= -// Memory Alignment Tests -// ============================================================================= - -TEST_F(MatrixTest, MemoryAlignment) { - Matrix m; - - std::uintptr_t addr = reinterpret_cast(m.data()); - EXPECT_EQ(addr % 32, 0) << "Matrix data should be 32-byte aligned for AVX"; -} - -// ============================================================================= -// Utility Function Tests -// ============================================================================= - -TEST_F(MatrixTest, Norms) { - Matrix m{{1, 2}, {3, 4}}; - - // Frobenius norm: sqrt(1^2 + 2^2 + 3^2 + 4^2) = sqrt(30) - EXPECT_NEAR(m.frobenius_norm(), std::sqrt(30.0), tolerance); - - // Infinity norm (max row sum) - EXPECT_EQ(m.infinity_norm(), 7.0); // max(|1|+|2|, |3|+|4|) = max(3, 7) - - // One norm (max column sum) - EXPECT_EQ(m.one_norm(), 6.0); // max(|1|+|3|, |2|+|4|) = max(4, 6) -} - -TEST_F(MatrixTest, MinMaxElements) { - Matrix m{{3, -1, 4}, {1, -2, 5}}; - - EXPECT_EQ(m.min(), -2.0); - EXPECT_EQ(m.max(), 5.0); -} - -TEST_F(MatrixTest, ToString) { - Matrix m{{1, 2}, {3, 4}}; - std::stringstream ss; - ss << m; - - std::string expected = "[[1, 2]\n [3, 4]]"; - EXPECT_EQ(ss.str(), expected); -} diff --git a/tests/unitTests/FE/Math/test_MatrixExpr.cpp b/tests/unitTests/FE/Math/test_MatrixExpr.cpp deleted file mode 100644 index b17bce928..000000000 --- a/tests/unitTests/FE/Math/test_MatrixExpr.cpp +++ /dev/null @@ -1,527 +0,0 @@ -/** - * @file test_MatrixExpr.cpp - * @brief Unit tests for MatrixExpr.h - matrix expression templates - */ - -#include -#include "FE/Math/Matrix.h" -#include "FE/Math/MatrixExpr.h" -#include "FE/Math/Vector.h" -#include -#include -#include -#include -#include - -using namespace svmp::FE::math; - -// Test fixture for MatrixExpr tests -class MatrixExprTest : public ::testing::Test { -protected: - static constexpr double tolerance = 1e-14; - - // Custom allocator to track memory allocations - template - class TrackingAllocator { - public: - using value_type = T; - - static std::atomic allocations; - static std::atomic deallocations; - static std::atomic bytes_allocated; - - TrackingAllocator() = default; - - template - TrackingAllocator(const TrackingAllocator&) {} - - T* allocate(size_t n) { - allocations.fetch_add(1); - bytes_allocated.fetch_add(n * sizeof(T)); - return static_cast(::operator new(n * sizeof(T))); - } - - void deallocate(T* p, size_t n) { - deallocations.fetch_add(1); - ::operator delete(p); - } - - static void reset() { - allocations = 0; - deallocations = 0; - bytes_allocated = 0; - } - }; - - void SetUp() override { - TrackingAllocator::reset(); - } - - void TearDown() override {} - - template - bool approx_equal(T a, T b, T tol = tolerance) { - return std::abs(a - b) <= tol; - } -}; - -template -std::atomic MatrixExprTest::TrackingAllocator::allocations{0}; -template -std::atomic MatrixExprTest::TrackingAllocator::deallocations{0}; -template -std::atomic MatrixExprTest::TrackingAllocator::bytes_allocated{0}; - -// ============================================================================= -// Lazy Evaluation Tests -// ============================================================================= - -TEST_F(MatrixExprTest, LazyEvaluationNoTemporaries) { - // Expression templates should not create temporary matrices - Matrix A{{1.0, 2.0}, {3.0, 4.0}}; - Matrix B{{5.0, 6.0}, {7.0, 8.0}}; - Matrix C{{9.0, 10.0}, {11.0, 12.0}}; - - // Build expression without evaluation - auto expr = A + B - C; - - // Expression type should not be Matrix, but an expression type - using ExprType = decltype(expr); - EXPECT_FALSE((std::is_same_v>)); - - // Now evaluate - Matrix result = expr; - EXPECT_DOUBLE_EQ(result(0, 0), -3.0); - EXPECT_DOUBLE_EQ(result(0, 1), -2.0); - EXPECT_DOUBLE_EQ(result(1, 0), -1.0); - EXPECT_DOUBLE_EQ(result(1, 1), 0.0); -} - -TEST_F(MatrixExprTest, LazyEvaluationAccessPattern) { - Matrix A; - Matrix B; - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 3; ++j) { - A(i, j) = i * 3 + j + 1; - B(i, j) = (i * 3 + j + 1) * 2; - } - } - - auto expr = A + B; - - // Access individual elements without full evaluation - EXPECT_DOUBLE_EQ(expr(0, 0), 3.0); - EXPECT_DOUBLE_EQ(expr(1, 1), 15.0); - EXPECT_DOUBLE_EQ(expr(2, 2), 27.0); - - // Size should be accessible - EXPECT_EQ(expr.rows(), 3u); - EXPECT_EQ(expr.cols(), 3u); -} - -// ============================================================================= -// Matrix Multiplication Tests -// ============================================================================= - -TEST_F(MatrixExprTest, MatrixMultiplicationExpression) { - Matrix A{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; - Matrix B{{7.0, 8.0}, {9.0, 10.0}, {11.0, 12.0}}; - - // Matrix multiplication should produce 2x2 result - Matrix C = A * B; - - // Verify results - EXPECT_DOUBLE_EQ(C(0, 0), 58.0); // 1*7 + 2*9 + 3*11 - EXPECT_DOUBLE_EQ(C(0, 1), 64.0); // 1*8 + 2*10 + 3*12 - EXPECT_DOUBLE_EQ(C(1, 0), 139.0); // 4*7 + 5*9 + 6*11 - EXPECT_DOUBLE_EQ(C(1, 1), 154.0); // 4*8 + 5*10 + 6*12 -} - -TEST_F(MatrixExprTest, ChainedMatrixMultiplication) { - Matrix A{{1.0, 2.0}, {3.0, 4.0}}; - Matrix B{{5.0, 6.0}, {7.0, 8.0}}; - Matrix C{{9.0, 10.0}, {11.0, 12.0}}; - - // Chain matrix multiplications: (A * B) * C - Matrix result = A * B * C; - - // First compute A * B - Matrix AB = A * B; - EXPECT_DOUBLE_EQ(AB(0, 0), 19.0); // 1*5 + 2*7 - EXPECT_DOUBLE_EQ(AB(0, 1), 22.0); // 1*6 + 2*8 - EXPECT_DOUBLE_EQ(AB(1, 0), 43.0); // 3*5 + 4*7 - EXPECT_DOUBLE_EQ(AB(1, 1), 50.0); // 3*6 + 4*8 - - // Then (A * B) * C - EXPECT_DOUBLE_EQ(result(0, 0), 413.0); // 19*9 + 22*11 - EXPECT_DOUBLE_EQ(result(0, 1), 454.0); // 19*10 + 22*12 - EXPECT_DOUBLE_EQ(result(1, 0), 937.0); // 43*9 + 50*11 - EXPECT_DOUBLE_EQ(result(1, 1), 1030.0); // 43*10 + 50*12 -} - -// ============================================================================= -// Mixed Operations Tests -// ============================================================================= - -TEST_F(MatrixExprTest, MixedMatrixOperations) { - Matrix A, B, C, D; - - // Initialize matrices - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 3; ++j) { - A(i, j) = i + j + 1; - B(i, j) = (i + 1) * (j + 1); - C(i, j) = i * j + 1; - D(i, j) = 1.0; - } - } - - // Complex expression: A * B + C * D - Matrix result = A * B + C * D; - - // Verify a few key elements - Matrix AB = A * B; - Matrix CD = C * D; - - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 3; ++j) { - EXPECT_DOUBLE_EQ(result(i, j), AB(i, j) + CD(i, j)); - } - } -} - -TEST_F(MatrixExprTest, ScalarMultiplicationInExpression) { - Matrix A{{1.0, 2.0}, {3.0, 4.0}}; - Matrix B{{5.0, 6.0}, {7.0, 8.0}}; - - Matrix result = 2.0 * (A + B) / 3.0; - - EXPECT_TRUE(approx_equal(result(0, 0), 4.0)); - EXPECT_TRUE(approx_equal(result(0, 1), 16.0/3.0)); - EXPECT_TRUE(approx_equal(result(1, 0), 20.0/3.0)); - EXPECT_TRUE(approx_equal(result(1, 1), 8.0)); -} - -// ============================================================================= -// Transpose Tests -// ============================================================================= - -TEST_F(MatrixExprTest, TransposeExpression) { - Matrix A{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; - - auto AT = transpose(A); - - // Check dimensions - EXPECT_EQ(AT.rows(), 3u); - EXPECT_EQ(AT.cols(), 2u); - - // Check values - EXPECT_DOUBLE_EQ(AT(0, 0), 1.0); - EXPECT_DOUBLE_EQ(AT(0, 1), 4.0); - EXPECT_DOUBLE_EQ(AT(1, 0), 2.0); - EXPECT_DOUBLE_EQ(AT(1, 1), 5.0); - EXPECT_DOUBLE_EQ(AT(2, 0), 3.0); - EXPECT_DOUBLE_EQ(AT(2, 1), 6.0); -} - -TEST_F(MatrixExprTest, TransposeInExpression) { - Matrix A{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}; - Matrix B{{7.0, 8.0}, {9.0, 10.0}, {11.0, 12.0}}; - - // Compute A^T * B (should be 2x2) - Matrix result = transpose(A) * B; - - EXPECT_DOUBLE_EQ(result(0, 0), 89.0); // 1*7 + 3*9 + 5*11 - EXPECT_DOUBLE_EQ(result(0, 1), 98.0); // 1*8 + 3*10 + 5*12 - EXPECT_DOUBLE_EQ(result(1, 0), 116.0); // 2*7 + 4*9 + 6*11 - EXPECT_DOUBLE_EQ(result(1, 1), 128.0); // 2*8 + 4*10 + 6*12 -} - -// ============================================================================= -// Unary Operations Tests -// ============================================================================= - -TEST_F(MatrixExprTest, NegationInExpression) { - Matrix A{{1.0, -2.0}, {3.0, -4.0}}; - Matrix B{{5.0, 6.0}, {-7.0, 8.0}}; - - Matrix result = -A + (-B); - - EXPECT_DOUBLE_EQ(result(0, 0), -6.0); - EXPECT_DOUBLE_EQ(result(0, 1), -4.0); - EXPECT_DOUBLE_EQ(result(1, 0), 4.0); - EXPECT_DOUBLE_EQ(result(1, 1), -4.0); -} - -TEST_F(MatrixExprTest, AbsoluteValueExpression) { - Matrix M{{-1.5, 2.3, -4.7}, {0.0, -3.2, 5.1}}; - - Matrix result = abs(M); - - EXPECT_DOUBLE_EQ(result(0, 0), 1.5); - EXPECT_DOUBLE_EQ(result(0, 1), 2.3); - EXPECT_DOUBLE_EQ(result(0, 2), 4.7); - EXPECT_DOUBLE_EQ(result(1, 0), 0.0); - EXPECT_DOUBLE_EQ(result(1, 1), 3.2); - EXPECT_DOUBLE_EQ(result(1, 2), 5.1); -} - -TEST_F(MatrixExprTest, SqrtExpression) { - Matrix M{{4.0, 9.0}, {16.0, 25.0}}; - - Matrix result = sqrt(M); - - EXPECT_DOUBLE_EQ(result(0, 0), 2.0); - EXPECT_DOUBLE_EQ(result(0, 1), 3.0); - EXPECT_DOUBLE_EQ(result(1, 0), 4.0); - EXPECT_DOUBLE_EQ(result(1, 1), 5.0); -} - -// ============================================================================= -// Element-wise Operations Tests -// ============================================================================= - -TEST_F(MatrixExprTest, HadamardProductExpression) { - Matrix A{{2.0, 3.0, 4.0}, {5.0, 6.0, 7.0}}; - Matrix B{{8.0, 9.0, 10.0}, {11.0, 12.0, 13.0}}; - - Matrix result = hadamard(A, B); - - EXPECT_DOUBLE_EQ(result(0, 0), 16.0); - EXPECT_DOUBLE_EQ(result(0, 1), 27.0); - EXPECT_DOUBLE_EQ(result(0, 2), 40.0); - EXPECT_DOUBLE_EQ(result(1, 0), 55.0); - EXPECT_DOUBLE_EQ(result(1, 1), 72.0); - EXPECT_DOUBLE_EQ(result(1, 2), 91.0); -} - -TEST_F(MatrixExprTest, HadamardDivisionExpression) { - Matrix A{{10.0, 18.0}, {28.0, 36.0}}; - Matrix B{{2.0, 3.0}, {4.0, 6.0}}; - - Matrix result = hadamard_div(A, B); - - EXPECT_DOUBLE_EQ(result(0, 0), 5.0); - EXPECT_DOUBLE_EQ(result(0, 1), 6.0); - EXPECT_DOUBLE_EQ(result(1, 0), 7.0); - EXPECT_DOUBLE_EQ(result(1, 1), 6.0); -} - -// ============================================================================= -// Norm and Trace Tests -// ============================================================================= - -TEST_F(MatrixExprTest, FrobeniusNormOfExpression) { - Matrix A{{1.0, 2.0}, {3.0, 4.0}}; - Matrix B{{2.0, 2.0}, {2.0, 2.0}}; - - double norm_sq = frobenius_norm_squared(A - B); - double norm = frobenius_norm(A - B); - - // (A - B) = [[-1, 0], [1, 2]] - // norm_squared = 1 + 0 + 1 + 4 = 6 - EXPECT_DOUBLE_EQ(norm_sq, 6.0); - EXPECT_DOUBLE_EQ(norm, std::sqrt(6.0)); -} - -TEST_F(MatrixExprTest, TraceOfExpression) { - Matrix A; - Matrix B; - - // Initialize as diagonal matrices - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 3; ++j) { - A(i, j) = (i == j) ? (i + 1) : 0.0; // diag(1, 2, 3) - B(i, j) = (i == j) ? (i + 4) : 0.0; // diag(4, 5, 6) - } - } - - double tr = trace(A + B); - - // trace(A + B) = trace(diag(5, 7, 9)) = 21 - EXPECT_DOUBLE_EQ(tr, 21.0); -} - -// ============================================================================= -// Type Deduction Tests -// ============================================================================= - -TEST_F(MatrixExprTest, TypeDeductionCorrectness) { - Matrix Mf{{1.0f, 2.0f}, {3.0f, 4.0f}}; - Matrix Md{{5.0, 6.0}, {7.0, 8.0}}; - - // Float expression - auto expr = Mf + Mf; - using ExprType = decltype(expr(0, 0)); - EXPECT_TRUE((std::is_same_v)); - - // Test that expression evaluates correctly - Matrix result = expr; - EXPECT_FLOAT_EQ(result(0, 0), 2.0f); - EXPECT_FLOAT_EQ(result(1, 1), 8.0f); -} - -// ============================================================================= -// SFINAE and Compile-time Tests -// ============================================================================= - -TEST_F(MatrixExprTest, SFINAEConstraints) { - // Test that MatrixExpr operators only work with MatrixExpr types - Matrix M1{{1.0, 2.0}, {3.0, 4.0}}; - Matrix M2{{5.0, 6.0}, {7.0, 8.0}}; - - // This should compile - auto expr = M1 + M2; - Matrix result = expr; - - // Verify the constraint checking - EXPECT_TRUE((std::is_base_of_v>, Matrix>)); -} - -// ============================================================================= -// Aliasing and Self-Assignment Tests -// ============================================================================= - -TEST_F(MatrixExprTest, SelfAssignmentWithExpression) { - Matrix A{{1.0, 2.0}, {3.0, 4.0}}; - Matrix B{{5.0, 6.0}, {7.0, 8.0}}; - - // Self-assignment through expression - A = A + B; - - EXPECT_DOUBLE_EQ(A(0, 0), 6.0); - EXPECT_DOUBLE_EQ(A(0, 1), 8.0); - EXPECT_DOUBLE_EQ(A(1, 0), 10.0); - EXPECT_DOUBLE_EQ(A(1, 1), 12.0); -} - -TEST_F(MatrixExprTest, AliasingInExpression) { - Matrix A{{2.0, 3.0}, {4.0, 5.0}}; - Matrix B{{1.0, 1.0}, {1.0, 1.0}}; - - // A appears on both sides - A = B + A; - - EXPECT_DOUBLE_EQ(A(0, 0), 3.0); - EXPECT_DOUBLE_EQ(A(0, 1), 4.0); - EXPECT_DOUBLE_EQ(A(1, 0), 5.0); - EXPECT_DOUBLE_EQ(A(1, 1), 6.0); -} - -// ============================================================================= -// Edge Cases Tests -// ============================================================================= - -TEST_F(MatrixExprTest, SingleElementMatrix) { - Matrix A{{5.0}}; - Matrix B{{3.0}}; - - Matrix result = A + B - A * 0.5; - - EXPECT_DOUBLE_EQ(result(0, 0), 5.5); -} - -TEST_F(MatrixExprTest, NonSquareMatrixOperations) { - Matrix A; - Matrix B; - - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 4; ++j) { - A(i, j) = i * 4 + j + 1; - B(i, j) = (i * 4 + j + 1) * 2; - } - } - - Matrix result = A + B - A * 0.5; - - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 4; ++j) { - double expected = A(i, j) + B(i, j) - A(i, j) * 0.5; - EXPECT_DOUBLE_EQ(result(i, j), expected); - } - } -} - -// ============================================================================= -// Diagonal Matrix Tests -// ============================================================================= - -TEST_F(MatrixExprTest, DiagonalMatrixExpression) { - Vector v{1.0, 2.0, 3.0}; - - auto diag = DiagonalExpr>(v); - - // Check dimensions - EXPECT_EQ(diag.rows(), 3u); - EXPECT_EQ(diag.cols(), 3u); - - // Check values - EXPECT_DOUBLE_EQ(diag(0, 0), 1.0); - EXPECT_DOUBLE_EQ(diag(1, 1), 2.0); - EXPECT_DOUBLE_EQ(diag(2, 2), 3.0); - - // Off-diagonal should be zero - EXPECT_DOUBLE_EQ(diag(0, 1), 0.0); - EXPECT_DOUBLE_EQ(diag(1, 0), 0.0); -} - -TEST_F(MatrixExprTest, DiagonalMatrixInExpression) { - Vector v{2.0, 3.0}; - Matrix A{{1.0, 1.0}, {1.0, 1.0}}; - - auto diag = DiagonalExpr>(v); - Matrix result = A + diag; - - EXPECT_DOUBLE_EQ(result(0, 0), 3.0); - EXPECT_DOUBLE_EQ(result(0, 1), 1.0); - EXPECT_DOUBLE_EQ(result(1, 0), 1.0); - EXPECT_DOUBLE_EQ(result(1, 1), 4.0); -} - -// ============================================================================= -// Complex Expression Pattern Tests -// ============================================================================= - -TEST_F(MatrixExprTest, ComplexNestedExpression) { - Matrix A{{1.0, 2.0}, {3.0, 4.0}}; - Matrix B{{5.0, 6.0}, {7.0, 8.0}}; - Matrix C{{9.0, 10.0}, {11.0, 12.0}}; - - // Complex expression with multiple operation types - Matrix result = 2.0 * abs(A - B) + sqrt(hadamard(C, C)) / 3.0; - - // |A - B| = |[-4, -4], [-4, -4]| = [4, 4], [4, 4] - // 2 * [4, 4], [4, 4] = [8, 8], [8, 8] - // C * C (element-wise) = [81, 100], [121, 144] - // sqrt(C * C) = [9, 10], [11, 12] - // sqrt(C * C) / 3 = [3, 10/3], [11/3, 4] - // result = [11, 34/3], [35/3, 12] - - EXPECT_DOUBLE_EQ(result(0, 0), 11.0); - EXPECT_TRUE(approx_equal(result(0, 1), 34.0/3.0)); - EXPECT_TRUE(approx_equal(result(1, 0), 35.0/3.0)); - EXPECT_DOUBLE_EQ(result(1, 1), 12.0); -} - -TEST_F(MatrixExprTest, MatrixVectorMixedExpression) { - Matrix A; - Vector v{1.0, 2.0, 3.0}; - - // Create identity matrix - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 3; ++j) { - A(i, j) = (i == j) ? 1.0 : 0.0; - } - } - - // Create diagonal from vector and add to identity - auto diag = DiagonalExpr>(v); - Matrix result = A + diag; - - // Result should be diag(2, 3, 4) - EXPECT_DOUBLE_EQ(result(0, 0), 2.0); - EXPECT_DOUBLE_EQ(result(1, 1), 3.0); - EXPECT_DOUBLE_EQ(result(2, 2), 4.0); - EXPECT_DOUBLE_EQ(result(0, 1), 0.0); - EXPECT_DOUBLE_EQ(result(1, 0), 0.0); -} diff --git a/tests/unitTests/FE/Math/test_Vector.cpp b/tests/unitTests/FE/Math/test_Vector.cpp deleted file mode 100644 index 754ad819d..000000000 --- a/tests/unitTests/FE/Math/test_Vector.cpp +++ /dev/null @@ -1,588 +0,0 @@ -/** - * @file test_Vector.cpp - * @brief Unit tests for Vector.h - fixed-size vectors with expression templates - */ - -#include -#include "FE/Math/Vector.h" -#include "FE/Math/VectorExpr.h" -#include -#include -#include -#include -#include - -using namespace svmp::FE::math; - -// Test fixture for Vector tests -class VectorTest : public ::testing::Test { -protected: - static constexpr double tolerance = 1e-14; - - void SetUp() override {} - void TearDown() override {} - - // Helper function to check if two values are approximately equal - template - bool approx_equal(T a, T b, T tol = tolerance) { - return std::abs(a - b) <= tol; - } -}; - -// ============================================================================= -// Construction and Initialization Tests -// ============================================================================= - -TEST_F(VectorTest, DefaultConstruction) { - Vector v; - EXPECT_EQ(v[0], 0.0); - EXPECT_EQ(v[1], 0.0); - EXPECT_EQ(v[2], 0.0); - - Vector vf; - for (size_t i = 0; i < 4; ++i) { - EXPECT_EQ(vf[i], 0.0f); - } -} - -TEST_F(VectorTest, FillConstruction) { - Vector v(5.0); - EXPECT_EQ(v[0], 5.0); - EXPECT_EQ(v[1], 5.0); - EXPECT_EQ(v[2], 5.0); - - Vector vi(-3); - for (size_t i = 0; i < 10; ++i) { - EXPECT_EQ(vi[i], -3); - } -} - -TEST_F(VectorTest, InitializerListConstruction) { - Vector v{1.0, 2.0, 3.0}; - EXPECT_EQ(v[0], 1.0); - EXPECT_EQ(v[1], 2.0); - EXPECT_EQ(v[2], 3.0); - - // Partial initialization - Vector v2{1.0, 2.0}; - EXPECT_EQ(v2[0], 1.0); - EXPECT_EQ(v2[1], 2.0); - EXPECT_EQ(v2[2], 0.0); - EXPECT_EQ(v2[3], 0.0); - EXPECT_EQ(v2[4], 0.0); -} - -TEST_F(VectorTest, CopyConstruction) { - Vector v1{1.0, 2.0, 3.0}; - Vector v2(v1); - - EXPECT_EQ(v2[0], 1.0); - EXPECT_EQ(v2[1], 2.0); - EXPECT_EQ(v2[2], 3.0); - - // Ensure deep copy - v2[0] = 10.0; - EXPECT_EQ(v1[0], 1.0); - EXPECT_EQ(v2[0], 10.0); -} - -TEST_F(VectorTest, MoveConstruction) { - Vector v1{1.0, 2.0, 3.0}; - Vector v2(std::move(v1)); - - EXPECT_EQ(v2[0], 1.0); - EXPECT_EQ(v2[1], 2.0); - EXPECT_EQ(v2[2], 3.0); -} - -// ============================================================================= -// Element Access Tests -// ============================================================================= - -TEST_F(VectorTest, ElementAccess) { - Vector v{1.0, 2.0, 3.0}; - - // Non-const access - EXPECT_EQ(v[0], 1.0); - EXPECT_EQ(v[1], 2.0); - EXPECT_EQ(v[2], 3.0); - - // Modification - v[1] = 5.0; - EXPECT_EQ(v[1], 5.0); - - // Const access - const Vector cv{4.0, 5.0, 6.0}; - EXPECT_EQ(cv[0], 4.0); - EXPECT_EQ(cv[1], 5.0); - EXPECT_EQ(cv[2], 6.0); -} - -TEST_F(VectorTest, ElementAccessBounds) { - Vector v{1.0, 2.0, 3.0}; - - // at() with bounds checking - EXPECT_EQ(v.at(0), 1.0); - EXPECT_EQ(v.at(1), 2.0); - EXPECT_EQ(v.at(2), 3.0); - - // Test out of bounds throws - EXPECT_THROW(v.at(3), std::out_of_range); - EXPECT_THROW(v.at(100), std::out_of_range); -} - -TEST_F(VectorTest, DataPointerAccess) { - Vector v{1.0, 2.0, 3.0}; - - double* data = v.data(); - EXPECT_EQ(data[0], 1.0); - EXPECT_EQ(data[1], 2.0); - EXPECT_EQ(data[2], 3.0); - - // Const data access - const Vector cv{4.0, 5.0, 6.0}; - const double* cdata = cv.data(); - EXPECT_EQ(cdata[0], 4.0); - EXPECT_EQ(cdata[1], 5.0); - EXPECT_EQ(cdata[2], 6.0); -} - -// ============================================================================= -// Arithmetic Operations Tests -// ============================================================================= - -TEST_F(VectorTest, Addition) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - - Vector c = a + b; - EXPECT_EQ(c[0], 5.0); - EXPECT_EQ(c[1], 7.0); - EXPECT_EQ(c[2], 9.0); -} - -TEST_F(VectorTest, Subtraction) { - Vector a{5.0, 7.0, 9.0}; - Vector b{4.0, 5.0, 6.0}; - - Vector c = a - b; - EXPECT_EQ(c[0], 1.0); - EXPECT_EQ(c[1], 2.0); - EXPECT_EQ(c[2], 3.0); -} - -TEST_F(VectorTest, ScalarMultiplication) { - Vector a{1.0, 2.0, 3.0}; - - // Scalar * Vector - Vector b = 2.0 * a; - EXPECT_EQ(b[0], 2.0); - EXPECT_EQ(b[1], 4.0); - EXPECT_EQ(b[2], 6.0); - - // Vector * Scalar - Vector c = a * 3.0; - EXPECT_EQ(c[0], 3.0); - EXPECT_EQ(c[1], 6.0); - EXPECT_EQ(c[2], 9.0); -} - -TEST_F(VectorTest, ScalarDivision) { - Vector a{2.0, 4.0, 6.0}; - - Vector b = a / 2.0; - EXPECT_EQ(b[0], 1.0); - EXPECT_EQ(b[1], 2.0); - EXPECT_EQ(b[2], 3.0); -} - -TEST_F(VectorTest, UnaryNegation) { - Vector a{1.0, -2.0, 3.0}; - - Vector b = -a; - EXPECT_EQ(b[0], -1.0); - EXPECT_EQ(b[1], 2.0); - EXPECT_EQ(b[2], -3.0); -} - -TEST_F(VectorTest, CompoundAssignment) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - - // += - a += b; - EXPECT_EQ(a[0], 5.0); - EXPECT_EQ(a[1], 7.0); - EXPECT_EQ(a[2], 9.0); - - // -= - a -= b; - EXPECT_EQ(a[0], 1.0); - EXPECT_EQ(a[1], 2.0); - EXPECT_EQ(a[2], 3.0); - - // *= - a *= 2.0; - EXPECT_EQ(a[0], 2.0); - EXPECT_EQ(a[1], 4.0); - EXPECT_EQ(a[2], 6.0); - - // /= - a /= 2.0; - EXPECT_EQ(a[0], 1.0); - EXPECT_EQ(a[1], 2.0); - EXPECT_EQ(a[2], 3.0); -} - -// ============================================================================= -// Vector Operations Tests -// ============================================================================= - -TEST_F(VectorTest, DotProduct) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - - double dot = a.dot(b); - EXPECT_EQ(dot, 32.0); // 1*4 + 2*5 + 3*6 = 4 + 10 + 18 = 32 - - // Test commutativity - EXPECT_EQ(b.dot(a), dot); - - // Test orthogonal vectors - Vector x{1.0, 0.0, 0.0}; - Vector y{0.0, 1.0, 0.0}; - EXPECT_EQ(x.dot(y), 0.0); -} - -TEST_F(VectorTest, CrossProduct3D) { - Vector x{1.0, 0.0, 0.0}; - Vector y{0.0, 1.0, 0.0}; - Vector z{0.0, 0.0, 1.0}; - - // Test basis vector cross products - Vector xy = x.cross(y); - EXPECT_EQ(xy[0], 0.0); - EXPECT_EQ(xy[1], 0.0); - EXPECT_EQ(xy[2], 1.0); - - Vector yx = y.cross(x); - EXPECT_EQ(yx[0], 0.0); - EXPECT_EQ(yx[1], 0.0); - EXPECT_EQ(yx[2], -1.0); - - // General cross product - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - Vector c = a.cross(b); - - EXPECT_EQ(c[0], -3.0); // 2*6 - 3*5 = 12 - 15 = -3 - EXPECT_EQ(c[1], 6.0); // 3*4 - 1*6 = 12 - 6 = 6 - EXPECT_EQ(c[2], -3.0); // 1*5 - 2*4 = 5 - 8 = -3 -} - -TEST_F(VectorTest, Norm) { - Vector v{3.0, 4.0, 0.0}; - EXPECT_EQ(v.norm(), 5.0); - - Vector unit{1.0, 0.0, 0.0}; - EXPECT_EQ(unit.norm(), 1.0); - - Vector zero{0.0, 0.0, 0.0}; - EXPECT_EQ(zero.norm(), 0.0); -} - -TEST_F(VectorTest, NormSquared) { - Vector v{3.0, 4.0, 0.0}; - EXPECT_EQ(v.norm_squared(), 25.0); - - Vector a{1.0, 2.0, 3.0}; - EXPECT_EQ(a.norm_squared(), 14.0); // 1 + 4 + 9 = 14 -} - -TEST_F(VectorTest, Normalize) { - Vector v{3.0, 4.0, 0.0}; - Vector n = v.normalized(); - - EXPECT_NEAR(n[0], 0.6, tolerance); - EXPECT_NEAR(n[1], 0.8, tolerance); - EXPECT_NEAR(n[2], 0.0, tolerance); - EXPECT_NEAR(n.norm(), 1.0, tolerance); - - // In-place normalization - v.normalize(); - EXPECT_NEAR(v[0], 0.6, tolerance); - EXPECT_NEAR(v[1], 0.8, tolerance); - EXPECT_NEAR(v.norm(), 1.0, tolerance); -} - -// ============================================================================= -// Expression Template Tests -// ============================================================================= - -TEST_F(VectorTest, ExpressionTemplatesNoTemporaries) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - Vector c{7.0, 8.0, 9.0}; - Vector d{10.0, 11.0, 12.0}; - - // Complex expression should create no temporaries - Vector result = a + b - c + d; - - EXPECT_EQ(result[0], 8.0); // 1 + 4 - 7 + 10 - EXPECT_EQ(result[1], 10.0); // 2 + 5 - 8 + 11 - EXPECT_EQ(result[2], 12.0); // 3 + 6 - 9 + 12 -} - -TEST_F(VectorTest, LazyEvaluation) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - - // Expression should not be evaluated until assignment - auto expr = a + b; // No computation yet - - Vector result = expr; // Evaluation happens here - EXPECT_EQ(result[0], 5.0); - EXPECT_EQ(result[1], 7.0); - EXPECT_EQ(result[2], 9.0); -} - -TEST_F(VectorTest, MixedExpressions) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - double scalar = 2.0; - - // Complex mixed expression - Vector result = scalar * (a + b) - a / scalar; - - EXPECT_NEAR(result[0], 9.5, tolerance); // 2*(1+4) - 1/2 - EXPECT_NEAR(result[1], 13.0, tolerance); // 2*(2+5) - 2/2 - EXPECT_NEAR(result[2], 16.5, tolerance); // 2*(3+6) - 3/2 -} - -// ============================================================================= -// Special Values Tests -// ============================================================================= - -TEST_F(VectorTest, ZeroVector) { - Vector zero = Vector::zero(); - EXPECT_EQ(zero[0], 0.0); - EXPECT_EQ(zero[1], 0.0); - EXPECT_EQ(zero[2], 0.0); - EXPECT_EQ(zero.norm(), 0.0); -} - -TEST_F(VectorTest, OnesVector) { - Vector ones = Vector::ones(); - EXPECT_EQ(ones[0], 1.0); - EXPECT_EQ(ones[1], 1.0); - EXPECT_EQ(ones[2], 1.0); -} - -TEST_F(VectorTest, BasisVectors) { - auto e0 = Vector::basis(0); - EXPECT_EQ(e0[0], 1.0); - EXPECT_EQ(e0[1], 0.0); - EXPECT_EQ(e0[2], 0.0); - - auto e1 = Vector::basis(1); - EXPECT_EQ(e1[0], 0.0); - EXPECT_EQ(e1[1], 1.0); - EXPECT_EQ(e1[2], 0.0); - - auto e2 = Vector::basis(2); - EXPECT_EQ(e2[0], 0.0); - EXPECT_EQ(e2[1], 0.0); - EXPECT_EQ(e2[2], 1.0); -} - -// ============================================================================= -// Edge Cases and Error Handling Tests -// ============================================================================= - -TEST_F(VectorTest, DivisionByZero) { - Vector v{1.0, 2.0, 3.0}; - - // Division by zero should produce inf - Vector result = v / 0.0; - EXPECT_TRUE(std::isinf(result[0])); - EXPECT_TRUE(std::isinf(result[1])); - EXPECT_TRUE(std::isinf(result[2])); -} - -TEST_F(VectorTest, NormalizeZeroVector) { - Vector zero{0.0, 0.0, 0.0}; - - // Normalizing zero vector should handle gracefully - Vector n = zero.normalized(); - EXPECT_TRUE(std::isnan(n[0]) || n[0] == 0.0); -} - -TEST_F(VectorTest, ExtremeLargeValues) { - double large = 1e308; // Near double max - Vector v{large, large, large}; - - // Operations should not overflow - Vector half = v / 2.0; - EXPECT_FALSE(std::isinf(half[0])); - EXPECT_EQ(half[0], large / 2.0); -} - -TEST_F(VectorTest, ExtremeSmallValues) { - double tiny = 1e-308; // Near double min - Vector v{tiny, tiny, tiny}; - - // Operations should maintain precision - Vector doubled = v * 2.0; - EXPECT_EQ(doubled[0], tiny * 2.0); -} - -// ============================================================================= -// Numerical Precision Tests -// ============================================================================= - -TEST_F(VectorTest, NumericalStability) { - // Test Kahan summation for better precision - Vector v{1e16, 1.0, -1e16, 1.0}; - // Computed for future validation - demonstrates numerical precision issues - [[maybe_unused]] double sum = v[0] + v[1] + v[2] + v[3]; - - // Direct summation might lose precision - // But vector operations should maintain it - Vector a{1e16, 0.0, -1e16, 0.0}; - Vector b{0.0, 1.0, 0.0, 1.0}; - Vector c = a + b; - - EXPECT_EQ(c[0], 1e16); - EXPECT_EQ(c[1], 1.0); - EXPECT_EQ(c[2], -1e16); - EXPECT_EQ(c[3], 1.0); -} - -TEST_F(VectorTest, OrthogonalityPreservation) { - // Create nearly orthogonal vectors - Vector a{1.0, 1e-15, 0.0}; - Vector b{0.0, 1.0, 0.0}; - - double dot = a.dot(b); - EXPECT_NEAR(dot, 1e-15, 1e-16); -} - -// ============================================================================= -// Comparison Operations Tests -// ============================================================================= - -TEST_F(VectorTest, Equality) { - Vector a{1.0, 2.0, 3.0}; - Vector b{1.0, 2.0, 3.0}; - Vector c{1.0, 2.0, 3.1}; - - EXPECT_TRUE(a == b); - EXPECT_FALSE(a == c); - EXPECT_FALSE(a != b); - EXPECT_TRUE(a != c); -} - -TEST_F(VectorTest, ApproximateEquality) { - Vector a{1.0, 2.0, 3.0}; - Vector b{1.0 + 1e-15, 2.0 - 1e-15, 3.0 + 1e-15}; - - EXPECT_TRUE(a.approx_equal(b, 1e-14)); - EXPECT_FALSE(a.approx_equal(b, 1e-16)); -} - -// ============================================================================= -// Thread Safety Tests -// ============================================================================= - -TEST_F(VectorTest, ThreadSafetyReadOnly) { - Vector v{1.0, 2.0, 3.0}; - - // Multiple threads reading should be safe - std::vector threads; - std::vector results(10); - - for (int i = 0; i < 10; ++i) { - threads.emplace_back([&v, &results, i]() { - results[static_cast(i)] = v.norm(); - }); - } - - for (auto& t : threads) { - t.join(); - } - - // All threads should get same result - double expected = v.norm(); - for (double r : results) { - EXPECT_EQ(r, expected); - } -} - -TEST_F(VectorTest, ThreadSafetyIsolated) { - // Each thread works on its own vector - std::vector threads; - std::vector> results(10); - - for (int i = 0; i < 10; ++i) { - threads.emplace_back([&results, i]() { - Vector local{static_cast(i), 0.0, 0.0}; - results[static_cast(i)] = local * 2.0; - }); - } - - for (auto& t : threads) { - t.join(); - } - - // Check each thread computed correctly - for (int i = 0; i < 10; ++i) { - EXPECT_EQ(results[static_cast(i)][0], 2.0 * i); - } -} - -// ============================================================================= -// Memory Alignment Tests -// ============================================================================= - -TEST_F(VectorTest, MemoryAlignment) { - Vector v; - - // Check that data is properly aligned for SIMD - std::uintptr_t addr = reinterpret_cast(v.data()); - EXPECT_EQ(addr % 32, 0) << "Vector data should be 32-byte aligned for AVX"; -} - -// ============================================================================= -// Utility Function Tests -// ============================================================================= - -TEST_F(VectorTest, MinMaxElements) { - Vector v{3.0, -1.0, 4.0, 1.0, -2.0}; - - EXPECT_EQ(v.min(), -2.0); - EXPECT_EQ(v.max(), 4.0); - EXPECT_EQ(v.min_index(), 4); - EXPECT_EQ(v.max_index(), 2); -} - -TEST_F(VectorTest, Sum) { - Vector v{1.0, 2.0, 3.0, 4.0}; - EXPECT_EQ(v.sum(), 10.0); - - Vector zero{0.0, 0.0, 0.0}; - EXPECT_EQ(zero.sum(), 0.0); -} - -TEST_F(VectorTest, Mean) { - Vector v{1.0, 2.0, 3.0, 4.0}; - EXPECT_EQ(v.mean(), 2.5); -} - -TEST_F(VectorTest, ToString) { - Vector v{1.0, 2.0, 3.0}; - std::stringstream ss; - ss << v; - - std::string expected = "[1, 2, 3]"; - EXPECT_EQ(ss.str(), expected); -} diff --git a/tests/unitTests/FE/Math/test_VectorExpr.cpp b/tests/unitTests/FE/Math/test_VectorExpr.cpp deleted file mode 100644 index 0e7363c64..000000000 --- a/tests/unitTests/FE/Math/test_VectorExpr.cpp +++ /dev/null @@ -1,408 +0,0 @@ -/** - * @file test_VectorExpr.cpp - * @brief Unit tests for VectorExpr.h - vector expression templates - */ - -#include -#include "FE/Math/Vector.h" -#include "FE/Math/VectorExpr.h" -#include -#include -#include -#include -#include - -using namespace svmp::FE::math; - -// Test fixture for VectorExpr tests -class VectorExprTest : public ::testing::Test { -protected: - static constexpr double tolerance = 1e-14; - - // Custom allocator to track memory allocations - template - class TrackingAllocator { - public: - using value_type = T; - - static std::atomic allocations; - static std::atomic deallocations; - static std::atomic bytes_allocated; - - TrackingAllocator() = default; - - template - TrackingAllocator(const TrackingAllocator&) {} - - T* allocate(size_t n) { - allocations.fetch_add(1); - bytes_allocated.fetch_add(n * sizeof(T)); - return static_cast(::operator new(n * sizeof(T))); - } - - void deallocate(T* p, size_t n) { - deallocations.fetch_add(1); - ::operator delete(p); - } - - static void reset() { - allocations = 0; - deallocations = 0; - bytes_allocated = 0; - } - }; - - void SetUp() override { - TrackingAllocator::reset(); - } - - void TearDown() override {} - - template - bool approx_equal(T a, T b, T tol = tolerance) { - return std::abs(a - b) <= tol; - } -}; - -template -std::atomic VectorExprTest::TrackingAllocator::allocations{0}; -template -std::atomic VectorExprTest::TrackingAllocator::deallocations{0}; -template -std::atomic VectorExprTest::TrackingAllocator::bytes_allocated{0}; - -// ============================================================================= -// Lazy Evaluation Tests -// ============================================================================= - -TEST_F(VectorExprTest, LazyEvaluationNoTemporaries) { - // Expression templates should not create temporary vectors - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - Vector c{7.0, 8.0, 9.0}; - - // Build expression without evaluation - auto expr = a + b - c; - - // Expression type should not be Vector, but an expression type - using ExprType = decltype(expr); - EXPECT_FALSE((std::is_same_v>)); - - // Now evaluate - Vector result = expr; - EXPECT_DOUBLE_EQ(result[0], -2.0); - EXPECT_DOUBLE_EQ(result[1], -1.0); - EXPECT_DOUBLE_EQ(result[2], 0.0); -} - -TEST_F(VectorExprTest, LazyEvaluationAccessPattern) { - Vector a{1.0, 2.0, 3.0, 4.0}; - Vector b{5.0, 6.0, 7.0, 8.0}; - - auto expr = a + b; - - // Access individual elements without full evaluation - EXPECT_DOUBLE_EQ(expr[0], 6.0); - EXPECT_DOUBLE_EQ(expr[2], 10.0); - - // Size should be accessible - EXPECT_EQ(expr.size(), 4u); -} - -// ============================================================================= -// Expression Chaining Tests -// ============================================================================= - -TEST_F(VectorExprTest, ChainedAdditionSubtraction) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - Vector c{2.0, 3.0, 4.0}; - Vector d{1.0, 1.0, 1.0}; - - // Chain multiple operations - Vector result = a + b - c + d; - - EXPECT_DOUBLE_EQ(result[0], 4.0); - EXPECT_DOUBLE_EQ(result[1], 5.0); - EXPECT_DOUBLE_EQ(result[2], 6.0); -} - -TEST_F(VectorExprTest, DeepExpressionNesting) { - Vector v1{1.0, 2.0}; - Vector v2{3.0, 4.0}; - Vector v3{5.0, 6.0}; - Vector v4{7.0, 8.0}; - Vector v5{9.0, 10.0}; - - // Deep nesting - Vector result = ((v1 + v2) - (v3 - v4)) + v5; - - EXPECT_DOUBLE_EQ(result[0], 15.0); - EXPECT_DOUBLE_EQ(result[1], 18.0); -} - -// ============================================================================= -// Mixed Operations Tests -// ============================================================================= - -TEST_F(VectorExprTest, ScalarMultiplicationInExpression) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - - Vector result = 2.0 * (a + b) / 3.0; - - EXPECT_TRUE(approx_equal(result[0], 10.0/3.0)); - EXPECT_TRUE(approx_equal(result[1], 14.0/3.0)); - EXPECT_TRUE(approx_equal(result[2], 6.0)); -} - -TEST_F(VectorExprTest, MixedScalarVectorOperations) { - Vector v{2.0, 4.0, 6.0, 8.0}; - - // Complex mixed expression - Vector result = 3.0 * v / 2.0 + v * 0.5 - 1.0 * v; - - EXPECT_DOUBLE_EQ(result[0], 2.0); - EXPECT_DOUBLE_EQ(result[1], 4.0); - EXPECT_DOUBLE_EQ(result[2], 6.0); - EXPECT_DOUBLE_EQ(result[3], 8.0); -} - -// ============================================================================= -// Unary Operations Tests -// ============================================================================= - -TEST_F(VectorExprTest, NegationInExpression) { - Vector a{1.0, -2.0, 3.0}; - Vector b{4.0, 5.0, -6.0}; - - Vector result = -a + (-b); - - EXPECT_DOUBLE_EQ(result[0], -5.0); - EXPECT_DOUBLE_EQ(result[1], -3.0); - EXPECT_DOUBLE_EQ(result[2], 3.0); -} - -TEST_F(VectorExprTest, AbsoluteValueExpression) { - Vector v{-1.5, 2.3, -4.7, 0.0}; - - Vector result = abs(v); - - EXPECT_DOUBLE_EQ(result[0], 1.5); - EXPECT_DOUBLE_EQ(result[1], 2.3); - EXPECT_DOUBLE_EQ(result[2], 4.7); - EXPECT_DOUBLE_EQ(result[3], 0.0); -} - -TEST_F(VectorExprTest, SqrtExpression) { - Vector v{4.0, 9.0, 16.0}; - - Vector result = sqrt(v); - - EXPECT_DOUBLE_EQ(result[0], 2.0); - EXPECT_DOUBLE_EQ(result[1], 3.0); - EXPECT_DOUBLE_EQ(result[2], 4.0); -} - -// ============================================================================= -// Element-wise Operations Tests -// ============================================================================= - -TEST_F(VectorExprTest, HadamardProductExpression) { - Vector a{2.0, 3.0, 4.0}; - Vector b{5.0, 6.0, 7.0}; - - Vector result = hadamard(a, b); - - EXPECT_DOUBLE_EQ(result[0], 10.0); - EXPECT_DOUBLE_EQ(result[1], 18.0); - EXPECT_DOUBLE_EQ(result[2], 28.0); -} - -TEST_F(VectorExprTest, HadamardDivisionExpression) { - Vector a{10.0, 18.0, 28.0}; - Vector b{2.0, 3.0, 4.0}; - - Vector result = hadamard_div(a, b); - - EXPECT_DOUBLE_EQ(result[0], 5.0); - EXPECT_DOUBLE_EQ(result[1], 6.0); - EXPECT_DOUBLE_EQ(result[2], 7.0); -} - -// ============================================================================= -// Dot Product and Norm Tests -// ============================================================================= - -TEST_F(VectorExprTest, DotProductOfExpressions) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - Vector c{2.0, 2.0, 2.0}; - - // Dot product of expressions - double result = dot(a + b, c); - - EXPECT_DOUBLE_EQ(result, 42.0); -} - -TEST_F(VectorExprTest, NormOfExpression) { - Vector a{3.0, 0.0}; - Vector b{0.0, 4.0}; - - double result = norm(a + b); - - EXPECT_DOUBLE_EQ(result, 5.0); // norm of (3,4) = 5 -} - -TEST_F(VectorExprTest, NormalizeExpression) { - Vector v{3.0, 0.0, 4.0}; - - Vector result = normalize(v); - - EXPECT_DOUBLE_EQ(result[0], 0.6); - EXPECT_DOUBLE_EQ(result[1], 0.0); - EXPECT_DOUBLE_EQ(result[2], 0.8); -} - -// ============================================================================= -// Type Deduction Tests -// ============================================================================= - -TEST_F(VectorExprTest, TypeDeductionCorrectness) { - Vector vf{1.0f, 2.0f, 3.0f}; - Vector vd{4.0, 5.0, 6.0}; - - // Mixed type operations should promote to higher precision - auto expr = vf + vf; // float expression - using ExprType = decltype(expr[0]); - EXPECT_TRUE((std::is_same_v)); - - // Test that expression evaluates correctly - Vector result = expr; - EXPECT_FLOAT_EQ(result[0], 2.0f); - EXPECT_FLOAT_EQ(result[1], 4.0f); - EXPECT_FLOAT_EQ(result[2], 6.0f); -} - -// ============================================================================= -// SFINAE and Compile-time Tests -// ============================================================================= - -TEST_F(VectorExprTest, SFINAEConstraints) { - // Test that VectorExpr operators only work with VectorExpr types - Vector v1{1.0, 2.0, 3.0}; - Vector v2{4.0, 5.0, 6.0}; - - // This should compile - auto expr = v1 + v2; - Vector result = expr; - - // Verify the constraint checking - EXPECT_TRUE((std::is_base_of_v>, Vector>)); -} - -// ============================================================================= -// Aliasing and Self-Assignment Tests -// ============================================================================= - -TEST_F(VectorExprTest, SelfAssignmentWithExpression) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - - // Self-assignment through expression - a = a + b; - - EXPECT_DOUBLE_EQ(a[0], 5.0); - EXPECT_DOUBLE_EQ(a[1], 7.0); - EXPECT_DOUBLE_EQ(a[2], 9.0); -} - -TEST_F(VectorExprTest, AliasingInExpression) { - Vector a{2.0, 3.0, 4.0}; - Vector b{1.0, 1.0, 1.0}; - - // a appears on both sides - a = b + a; - - EXPECT_DOUBLE_EQ(a[0], 3.0); - EXPECT_DOUBLE_EQ(a[1], 4.0); - EXPECT_DOUBLE_EQ(a[2], 5.0); -} - -// ============================================================================= -// Edge Cases Tests -// ============================================================================= - -TEST_F(VectorExprTest, SingleElementVector) { - Vector a{5.0}; - Vector b{3.0}; - - Vector result = a + b - a * 0.5; - - EXPECT_DOUBLE_EQ(result[0], 5.5); -} - -TEST_F(VectorExprTest, EmptyExpression) { - Vector v{1.0, 2.0, 3.0}; - - // Expression that evaluates to identity - Vector result = v + v * 0.0; - - EXPECT_DOUBLE_EQ(result[0], 1.0); - EXPECT_DOUBLE_EQ(result[1], 2.0); - EXPECT_DOUBLE_EQ(result[2], 3.0); -} - -TEST_F(VectorExprTest, LargeVectorExpression) { - const size_t N = 100; - Vector a, b, c; - - for (size_t i = 0; i < N; ++i) { - a[i] = static_cast(i); - b[i] = static_cast(i * 2); - c[i] = static_cast(i * 3); - } - - Vector result = a + b - c / 2.0; - - for (size_t i = 0; i < N; ++i) { - EXPECT_DOUBLE_EQ(result[i], i + 2.0 * i - 1.5 * i); - } -} - -// ============================================================================= -// Complex Expression Pattern Tests -// ============================================================================= - -TEST_F(VectorExprTest, ComplexNestedExpression) { - Vector a{1.0, 2.0, 3.0}; - Vector b{4.0, 5.0, 6.0}; - Vector c{7.0, 8.0, 9.0}; - - // Complex expression with multiple operation types - Vector result = 2.0 * abs(a - b) + sqrt(hadamard(c, c)) / 3.0; - - // Verify each component - // |a - b| = |(-3, -3, -3)| = (3, 3, 3) - // 2 * (3, 3, 3) = (6, 6, 6) - // c * c = (49, 64, 81) - // sqrt(c * c) = (7, 8, 9) - // sqrt(c * c) / 3 = (7/3, 8/3, 3) - // result = (6 + 7/3, 6 + 8/3, 6 + 3) = (25/3, 26/3, 9) - - EXPECT_TRUE(approx_equal(result[0], 25.0/3.0)); - EXPECT_TRUE(approx_equal(result[1], 26.0/3.0)); - EXPECT_DOUBLE_EQ(result[2], 9.0); -} - -TEST_F(VectorExprTest, ChainedUnaryOperations) { - Vector v{-4.0, -9.0, -16.0, -25.0}; - - // Chain of unary operations - Vector result = sqrt(abs(-v)); - - EXPECT_DOUBLE_EQ(result[0], 2.0); - EXPECT_DOUBLE_EQ(result[1], 3.0); - EXPECT_DOUBLE_EQ(result[2], 4.0); - EXPECT_DOUBLE_EQ(result[3], 5.0); -} From 82a1158eceeb4ad5c09591f9139bc29cea2e5e55 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Tue, 9 Jun 2026 18:21:37 -0700 Subject: [PATCH 14/22] adding doxygen to Common submodule --- Code/Source/solver/FE/Common/FEException.h | 189 +++++++++++++++ Code/Source/solver/FE/Common/Types.h | 253 ++++++++++++++------- 2 files changed, 354 insertions(+), 88 deletions(-) diff --git a/Code/Source/solver/FE/Common/FEException.h b/Code/Source/solver/FE/Common/FEException.h index 67b7da234..033b85eb1 100644 --- a/Code/Source/solver/FE/Common/FEException.h +++ b/Code/Source/solver/FE/Common/FEException.h @@ -22,8 +22,34 @@ namespace svmp { namespace FE { +/// \defgroup FE_CommonExceptions Exceptions +/// \ingroup FE_Common +/// \brief FE exception hierarchy and throw/check helper functions. +/// +/// \details All FE-specific exceptions derive from FEException, which itself +/// derives from the shared solver ExceptionBase. Specialized subclasses carry +/// structured context (element type, DOF index, backend name and error code, +/// iteration counts, Jacobian determinants) so call sites can report +/// actionable diagnostics. The free helper templates raise(), throw_if(), +/// check_arg(), check_not_null(), and check_index() wrap common validation +/// patterns with source-location capture. +/// @{ + +/** + * @brief Base exception type for errors originating in the FE library + * + * Carries a status code and source location alongside the message. Derived + * classes select an appropriate StatusCode and may attach additional + * structured context. + */ class FEException : public ExceptionBase { public: + /// @brief Construct with a message and optional status code and source location. + /// @param message Human-readable error description. + /// @param status Status code classifying the failure. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. FEException(const std::string& message, StatusCode status = StatusCode::Unknown, const char* file = "", @@ -38,6 +64,11 @@ class FEException : public ExceptionBase { { } + /// @brief Construct with a message and source location, using an Unknown status. + /// @param message Human-readable error description. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. FEException(const std::string& message, const char* file, int line, @@ -46,11 +77,21 @@ class FEException : public ExceptionBase { { } + /// @brief Status code classifying the failure. + /// @return The status code recorded at construction. StatusCode status() const noexcept { return status_code(); } }; +/** + * @brief An argument failed validation + */ class InvalidArgumentException : public FEException { public: + /// @brief Construct with a message and optional source location. + /// @param message Human-readable error description. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. InvalidArgumentException(const std::string& message, const char* file = "", int line = 0, @@ -61,8 +102,19 @@ class InvalidArgumentException : public FEException { } }; +/** + * @brief Unsupported or malformed element request + * + * Records the offending element type so error reports can name it. + */ class InvalidElementException : public FEException { public: + /// @brief Construct with a message and optional element-type context. + /// @param message Human-readable error description. + /// @param element_type Name of the offending element type; appended to the message when non-empty. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. InvalidElementException(const std::string& message, std::string element_type = "", const char* file = "", @@ -77,6 +129,8 @@ class InvalidElementException : public FEException { { } + /// @brief Name of the offending element type. + /// @return Element-type name; empty when not provided. const std::string& element_type() const noexcept { return element_type_; } private: @@ -93,8 +147,19 @@ class InvalidElementException : public FEException { std::string element_type_; }; +/** + * @brief Degree-of-freedom numbering or lookup failure + * + * Records the offending DOF index so error reports can name it. + */ class DofException : public FEException { public: + /// @brief Construct with a message and optional DOF-index context. + /// @param message Human-readable error description. + /// @param dof_index Offending DOF index; appended to the message unless it equals invalid_dof_index(). + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. DofException(const std::string& message, long long dof_index = invalid_dof_index(), const char* file = "", @@ -109,7 +174,11 @@ class DofException : public FEException { { } + /// @brief Offending DOF index. + /// @return DOF index; invalid_dof_index() when not provided. long long dof_index() const noexcept { return dof_index_; } + /// @brief Sentinel meaning "no DOF index attached". + /// @return The sentinel value -1. static constexpr long long invalid_dof_index() noexcept { return -1; } private: @@ -126,8 +195,16 @@ class DofException : public FEException { long long dof_index_ = invalid_dof_index(); }; +/** + * @brief Global assembly failure + */ class AssemblyException : public FEException { public: + /// @brief Construct with a message and optional source location. + /// @param message Human-readable error description. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. AssemblyException(const std::string& message, const char* file = "", int line = 0, @@ -137,8 +214,21 @@ class AssemblyException : public FEException { } }; +/** + * @brief Failure reported by a linear-algebra or solver backend + * + * Records the backend name and its native error code so error reports can + * identify the failing dependency. + */ class BackendException : public FEException { public: + /// @brief Construct with a message and optional backend context. + /// @param message Human-readable error description. + /// @param backend_name Name of the failing backend; appended to the message when non-empty. + /// @param error_code Backend-native error code; appended to the message when nonzero. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. BackendException(const std::string& message, std::string backend_name = "", int error_code = 0, @@ -155,7 +245,11 @@ class BackendException : public FEException { { } + /// @brief Name of the failing backend. + /// @return Backend name; empty when not provided. const std::string& backend_name() const noexcept { return backend_name_; } + /// @brief Backend-native error code. + /// @return Error code; zero when not provided. int error_code() const noexcept { return error_code_; } private: @@ -185,8 +279,16 @@ class BackendException : public FEException { int error_code_ = 0; }; +/** + * @brief Requested feature is not implemented + */ class NotImplementedException : public FEException { public: + /// @brief Construct from the name of the missing feature. + /// @param feature Description of the unimplemented feature. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. NotImplementedException(const std::string& feature, const char* file = "", int line = 0, @@ -200,8 +302,16 @@ class NotImplementedException : public FEException { } }; +/** + * @brief Required initialization step has not been performed + */ class NotInitializedException : public FEException { public: + /// @brief Construct from the name of the uninitialized feature. + /// @param feature Description of the missing initialization. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. NotInitializedException(const std::string &feature, const char *file, int line = 0, @@ -215,8 +325,21 @@ class NotInitializedException : public FEException { } }; +/** + * @brief Iterative process failed to converge + * + * Records the iteration count and final residual so error reports can show + * how far the iteration progressed. + */ class ConvergenceException : public FEException { public: + /// @brief Construct with a message and optional iteration context. + /// @param message Human-readable error description. + /// @param iteration Iteration at which the failure was detected; appended to the message when non-negative. + /// @param residual Final residual; appended to the message when positive. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. ConvergenceException(const std::string& message, int iteration = -1, double residual = 0.0, @@ -233,7 +356,11 @@ class ConvergenceException : public FEException { { } + /// @brief Iteration at which the failure was detected. + /// @return Iteration count; -1 when not provided. int iteration() const noexcept { return iteration_; } + /// @brief Final residual value. + /// @return Residual; 0.0 when not provided. double residual() const noexcept { return residual_; } private: @@ -257,8 +384,20 @@ class ConvergenceException : public FEException { double residual_ = 0.0; }; +/** + * @brief Element geometric mapping is singular or inverted + * + * Records the offending Jacobian determinant so error reports can show the + * degeneracy. + */ class SingularMappingException : public FEException { public: + /// @brief Construct with a message and the offending Jacobian determinant. + /// @param message Human-readable error description. + /// @param jacobian_det Jacobian determinant at the failure point; appended to the message. + /// @param file Source file where the error was raised. + /// @param line Source line where the error was raised. + /// @param function Function where the error was raised. SingularMappingException(const std::string& message, double jacobian_det = 0.0, const char* file = "", @@ -273,6 +412,8 @@ class SingularMappingException : public FEException { { } + /// @brief Jacobian determinant at the failure point. + /// @return The determinant recorded at construction. double jacobian_det() const noexcept { return jacobian_det_; } private: @@ -285,12 +426,27 @@ class SingularMappingException : public FEException { double jacobian_det_ = 0.0; }; +/** + * @brief Throw an FE exception with source-location capture + * @tparam ExceptionT Exception type to throw. + * @tparam Args Constructor argument types forwarded to the exception. + * @param location Source location to record in the exception. + * @param args Arguments forwarded to the exception constructor. + */ template [[noreturn]] inline void raise(SourceLocation location, Args&&... args) { ::svmp::raise(location, std::forward(args)...); } +/** + * @brief Throw an FE exception when a condition holds + * @tparam ExceptionT Exception type to throw; defaults to FEException. + * @tparam Args Constructor argument types forwarded to the exception. + * @param condition Condition that triggers the throw when true. + * @param location Source location to record in the exception. + * @param args Arguments forwarded to the exception constructor. + */ template inline void throw_if(bool condition, SourceLocation location, Args&&... args) { @@ -299,6 +455,14 @@ inline void throw_if(bool condition, SourceLocation location, Args&&... args) } } +/** + * @brief Validate an argument condition, throwing when it fails + * @tparam ExceptionT Exception type to throw; defaults to InvalidArgumentException. + * @tparam Args Constructor argument types forwarded to the exception. + * @param condition Condition that must hold for the argument to be valid. + * @param location Source location to record in the exception. + * @param args Arguments forwarded to the exception constructor. + */ template inline void check_arg(bool condition, SourceLocation location, Args&&... args) { @@ -306,6 +470,15 @@ inline void check_arg(bool condition, SourceLocation location, Args&&... args) std::forward(args)...); } +/** + * @brief Validate that a pointer is non-null, throwing when it is null + * @tparam ExceptionT Exception type to throw; defaults to InvalidArgumentException. + * @tparam PointerT Pointer-like type being checked. + * @tparam Args Constructor argument types forwarded to the exception. + * @param ptr Pointer to validate. + * @param location Source location to record in the exception. + * @param args Arguments forwarded to the exception constructor. + */ template inline void check_not_null(PointerT ptr, SourceLocation location, @@ -314,6 +487,15 @@ inline void check_not_null(PointerT ptr, SourceLocation location, ::svmp::check_not_null(ptr, location, std::forward(args)...); } +/** + * @brief Validate that an index lies in [0, size), throwing when out of bounds + * @tparam ExceptionT Exception type to throw; defaults to InvalidArgumentException. + * @tparam IndexT Integral index type. + * @tparam SizeT Integral size type. + * @param index Index to validate. + * @param size Exclusive upper bound for the index. + * @param location Source location to record in the exception. + */ template inline void check_index(IndexT index, SizeT size, SourceLocation location) @@ -329,12 +511,19 @@ inline void check_index(IndexT index, SizeT size, SourceLocation location) " out of bounds [0, " + std::to_string(fe_check_size_value) + ")"); } +/** + * @brief Throw NotImplementedException for a missing feature + * @param feature Description of the unimplemented feature. + * @param location Source location to record in the exception. + */ [[noreturn]] inline void not_implemented(const std::string& feature, SourceLocation location) { ::svmp::FE::raise(location, feature); } +/// @} + } // namespace FE } // namespace svmp diff --git a/Code/Source/solver/FE/Common/Types.h b/Code/Source/solver/FE/Common/Types.h index e3d5a46e9..1f57ffcc5 100644 --- a/Code/Source/solver/FE/Common/Types.h +++ b/Code/Source/solver/FE/Common/Types.h @@ -16,18 +16,26 @@ #if defined(SVMP_FE_WITH_MESH) && SVMP_FE_WITH_MESH # include "Mesh/Core/MeshTypes.h" +/// Nonzero when FE shares scalar/index types with the Mesh library. # define SVMP_FE_HAS_MESH_TYPES 1 #else // Build FE without Mesh types unless explicitly enabled. +/// Nonzero when FE shares scalar/index types with the Mesh library. # define SVMP_FE_HAS_MESH_TYPES 0 #endif #if !SVMP_FE_HAS_MESH_TYPES namespace svmp { -// Minimal fallback when the Mesh library is not available. -// Keeps FE compilation self-contained while preserving the same namespace. #ifndef SVMP_CELL_FAMILY_DEFINED +/// Guard marking that svmp::CellFamily has been defined. #define SVMP_CELL_FAMILY_DEFINED 1 +/** + * @brief Minimal fallback for svmp::CellFamily when the Mesh library is unavailable + * @ingroup FE_CommonTypes + * + * Keeps FE compilation self-contained while preserving the same namespace + * and enumerator set as the Mesh library's cell-family classification. + */ enum class CellFamily { Point, Line, @@ -51,16 +59,40 @@ enum class CellFamily { #include #if defined(_MSC_VER) +/// Portable restrict qualifier for aliasing-free pointer parameters. # define SVMP_RESTRICT __restrict #elif defined(__clang__) || defined(__GNUC__) +/// Portable restrict qualifier for aliasing-free pointer parameters. # define SVMP_RESTRICT __restrict__ #else +/// Portable restrict qualifier for aliasing-free pointer parameters. # define SVMP_RESTRICT #endif +/// \defgroup FE_Common Common +/// \ingroup FE +/// \brief Shared vocabulary types, constants, and exception infrastructure used by every FE module. +/// +/// \details The Common module collects the foundational definitions that the +/// rest of the FE library builds on: index and scalar type aliases; element, +/// basis, quadrature, and field enumerations; sentinel constants and strong +/// type wrappers; and the FE exception hierarchy together with its +/// argument-checking helpers. + namespace svmp { namespace FE { +/// \defgroup FE_CommonTypes Types +/// \ingroup FE_Common +/// \brief Core type aliases, enumerations, constants, geometric types, and compile-time traits. +/// +/// \details This group documents the index and identifier types used for +/// element-local and global numbering, the element/basis/quadrature/field +/// enumerations shared across modules, sentinel constants, reference- and +/// physical-space geometric aliases, and the strong-type utilities that +/// prevent accidental mixing of conceptually distinct values. +/// @{ + // ============================================================================ // Index Types // ============================================================================ @@ -88,10 +120,16 @@ using GlobalIndex = std::int64_t; * Provides type safety at compile time. */ struct DofIndex { - GlobalIndex value; + GlobalIndex value; ///< Underlying global DOF index; negative values are invalid. + /// @brief Construct a DOF index, defaulting to the invalid sentinel. + /// @param v Global DOF index value. constexpr explicit DofIndex(GlobalIndex v = -1) noexcept : value(v) {} + /// @brief Convert to the underlying global index value. + /// @return The stored global index. constexpr operator GlobalIndex() const noexcept { return value; } + /// @brief Check whether this index refers to a valid DOF. + /// @return True when the stored value is non-negative. constexpr bool is_valid() const noexcept { return value >= 0; } }; @@ -109,28 +147,32 @@ using BlockId = std::uint16_t; // Import mesh library scalar/index types when available (optional dependency). #if SVMP_FE_HAS_MESH_TYPES -using MeshIndex = svmp::index_t; -using MeshOffset = svmp::offset_t; -using MeshGlobalId = svmp::gid_t; -using Real = svmp::real_t; // Use same precision as Mesh library +using MeshIndex = svmp::index_t; ///< Local mesh entity index, shared with the Mesh library. +using MeshOffset = svmp::offset_t; ///< Offset type for mesh connectivity arrays. +using MeshGlobalId = svmp::gid_t; ///< Global mesh entity identifier. +using Real = svmp::real_t; ///< Floating-point scalar type; same precision as the Mesh library. #else -using MeshIndex = std::int32_t; -using MeshOffset = std::int64_t; -using MeshGlobalId = std::int64_t; -using Real = double; +using MeshIndex = std::int32_t; ///< Local mesh entity index, shared with the Mesh library. +using MeshOffset = std::int64_t; ///< Offset type for mesh connectivity arrays. +using MeshGlobalId = std::int64_t; ///< Global mesh entity identifier. +using Real = double; ///< Floating-point scalar type; same precision as the Mesh library. #endif // ============================================================================ // Constants // ============================================================================ +/// Sentinel for an unset or out-of-range local index. constexpr LocalIndex INVALID_LOCAL_INDEX = std::numeric_limits::max(); +/// Sentinel for an unset or out-of-range global index. constexpr GlobalIndex INVALID_GLOBAL_INDEX = -1; +/// Sentinel FieldId meaning "uninitialized / no field". constexpr FieldId INVALID_FIELD_ID = std::numeric_limits::max(); /// Sentinel FieldId for geometry-only quantities (no DOF dependence). /// Uses first registered field's space for quadrature, but logically decoupled /// from any specific field's DOFs. constexpr FieldId GEOMETRY_FIELD_ID = std::numeric_limits::max() - 1; +/// Sentinel for an unset or out-of-range block identifier. constexpr BlockId INVALID_BLOCK_ID = std::numeric_limits::max(); /** @@ -169,9 +211,9 @@ constexpr int MAX_FIELD_VALUE_COMPONENTS = 9; * Node-scoped auxiliary models with Lagrange Kronecker delta). */ struct FieldValueEntry { - FieldId field{INVALID_FIELD_ID}; - int n_components{0}; - Real components[MAX_FIELD_VALUE_COMPONENTS]{}; + FieldId field{INVALID_FIELD_ID}; ///< Field this value belongs to. + int n_components{0}; ///< Number of valid entries in components. + Real components[MAX_FIELD_VALUE_COMPONENTS]{}; ///< Component values, row-major for tensors. }; // ============================================================================ @@ -186,115 +228,115 @@ struct FieldValueEntry { */ enum class ElementType : std::uint8_t { // Linear elements - Line2 = 0, // 2-node line - Triangle3 = 1, // 3-node triangle - Quad4 = 2, // 4-node quadrilateral - Tetra4 = 3, // 4-node tetrahedron - Hex8 = 4, // 8-node hexahedron - Wedge6 = 5, // 6-node wedge/prism - Pyramid5 = 6, // 5-node pyramid + Line2 = 0, ///< 2-node line + Triangle3 = 1, ///< 3-node triangle + Quad4 = 2, ///< 4-node quadrilateral + Tetra4 = 3, ///< 4-node tetrahedron + Hex8 = 4, ///< 8-node hexahedron + Wedge6 = 5, ///< 6-node wedge/prism + Pyramid5 = 6, ///< 5-node pyramid // Quadratic elements - Line3 = 10, // 3-node line - Triangle6 = 11, // 6-node triangle - Quad9 = 12, // 9-node quadrilateral (bi-quadratic) - Quad8 = 13, // 8-node quadrilateral (serendipity) - Tetra10 = 14, // 10-node tetrahedron - Hex27 = 15, // 27-node hexahedron (tri-quadratic) - Hex20 = 16, // 20-node hexahedron (serendipity) - Wedge15 = 17, // 15-node wedge - Wedge18 = 18, // 18-node wedge (complete quadratic) - Pyramid13 = 19, // 13-node pyramid - Pyramid14 = 20, // 14-node pyramid + Line3 = 10, ///< 3-node line + Triangle6 = 11, ///< 6-node triangle + Quad9 = 12, ///< 9-node quadrilateral (bi-quadratic) + Quad8 = 13, ///< 8-node quadrilateral (serendipity) + Tetra10 = 14, ///< 10-node tetrahedron + Hex27 = 15, ///< 27-node hexahedron (tri-quadratic) + Hex20 = 16, ///< 20-node hexahedron (serendipity) + Wedge15 = 17, ///< 15-node wedge + Wedge18 = 18, ///< 18-node wedge (complete quadratic) + Pyramid13 = 19, ///< 13-node pyramid + Pyramid14 = 20, ///< 14-node pyramid // Special elements - Point1 = 30, // 1-node point element + Point1 = 30, ///< 1-node point element - Unknown = 255 + Unknown = 255 ///< Unrecognized or uninitialized element type }; /** * @brief Quadrature rule types */ enum class QuadratureType : std::uint8_t { - GaussLegendre, // Standard Gaussian quadrature - GaussLobatto, // Includes endpoints (for spectral elements) - Newton, // Newton-Cotes rules - Reduced, // Order-based reduced integration for locking - PositionBased, // Position-based reduced integration (legacy compatible) - Composite, // Composite rules for adaptivity - Custom // User-defined quadrature points + GaussLegendre, ///< Standard Gaussian quadrature + GaussLobatto, ///< Includes endpoints (for spectral elements) + Newton, ///< Newton-Cotes rules + Reduced, ///< Order-based reduced integration for locking + PositionBased, ///< Position-based reduced integration (legacy compatible) + Composite, ///< Composite rules for adaptivity + Custom ///< User-defined quadrature points }; /** * @brief Basis function families */ enum class BasisType : std::uint8_t { - Lagrange, // Standard nodal Lagrange basis - Hierarchical, // Hierarchical/modal basis - Bernstein, // Bernstein polynomials - NURBS, // Non-uniform rational B-splines - BSpline, // Non-rational B-spline basis - Spectral, // Spectral element basis - Serendipity, // Serendipity elements - Hermite, // Hermite C1 continuity basis - RaviartThomas, // H(div) Raviart-Thomas family - Nedelec, // H(curl) Nedelec edge elements - BDM, // H(div) Brezzi-Douglas-Marini family - Bubble, // Interior bubble functions for enrichment - Custom // User-defined basis + Lagrange, ///< Standard nodal Lagrange basis + Hierarchical, ///< Hierarchical/modal basis + Bernstein, ///< Bernstein polynomials + NURBS, ///< Non-uniform rational B-splines + BSpline, ///< Non-rational B-spline basis + Spectral, ///< Spectral element basis + Serendipity, ///< Serendipity elements + Hermite, ///< Hermite C1 continuity basis + RaviartThomas, ///< H(div) Raviart-Thomas family + Nedelec, ///< H(curl) Nedelec edge elements + BDM, ///< H(div) Brezzi-Douglas-Marini family + Bubble, ///< Interior bubble functions for enrichment + Custom ///< User-defined basis }; /** * @brief Field types for function spaces */ enum class FieldType : std::uint8_t { - Scalar, // Scalar field (temperature, pressure) - Vector, // Vector field (velocity, displacement) - Tensor, // Tensor field (stress, strain) - SymmetricTensor, // Symmetric tensor field - Mixed // Mixed/composite field + Scalar, ///< Scalar field (temperature, pressure) + Vector, ///< Vector field (velocity, displacement) + Tensor, ///< Tensor field (stress, strain) + SymmetricTensor, ///< Symmetric tensor field + Mixed ///< Mixed/composite field }; /** * @brief Continuity requirements for function spaces */ enum class Continuity : std::uint8_t { - C0, // Continuous (standard FEM) - C1, // C1 continuous (for plates/shells) - L2, // L2 (discontinuous) - H_div, // H(div) conforming - H_curl, // H(curl) conforming - Custom + C0, ///< Continuous (standard FEM) + C1, ///< C1 continuous (for plates/shells) + L2, ///< L2 (discontinuous) + H_div, ///< H(div) conforming + H_curl, ///< H(curl) conforming + Custom ///< User-defined continuity requirement }; /** * @brief Assembly strategies */ enum class AssemblyStrategy : std::uint8_t { - ElementByElement, // Traditional element loop - Vectorized, // SIMD vectorized assembly - MatrixFree, // Matrix-free operators - Hybrid // Mixed strategy + ElementByElement, ///< Traditional element loop + Vectorized, ///< SIMD vectorized assembly + MatrixFree, ///< Matrix-free operators + Hybrid ///< Mixed strategy }; /** * @brief Status codes for FE operations */ enum class FEStatus : std::uint8_t { - Success = 0, - InvalidArgument = 1, - InvalidElement = 2, - SingularMapping = 3, - QuadratureError = 4, - AssemblyError = 5, - BackendError = 6, - NotImplemented = 7, - ConvergenceError = 8, - AllocationError = 9, - MPIError = 10, - IOError = 11, - Unknown = 255 + Success = 0, ///< Operation completed successfully + InvalidArgument = 1, ///< An argument failed validation + InvalidElement = 2, ///< Unsupported or malformed element + SingularMapping = 3, ///< Element mapping Jacobian is singular + QuadratureError = 4, ///< Quadrature rule construction or evaluation failed + AssemblyError = 5, ///< Global assembly failure + BackendError = 6, ///< Linear-algebra backend failure + NotImplemented = 7, ///< Requested feature is not implemented + ConvergenceError = 8, ///< Iterative process failed to converge + AllocationError = 9, ///< Memory allocation failure + MPIError = 10, ///< MPI communication failure + IOError = 11, ///< File or stream I/O failure + Unknown = 255 ///< Unclassified error }; // ============================================================================ @@ -303,6 +345,7 @@ enum class FEStatus : std::uint8_t { /** * @brief Point in reference element coordinates + * @tparam Dim Reference-space dimension */ template using ReferencePoint = std::array(Dim)>; @@ -314,6 +357,8 @@ using PhysicalPoint = std::array; /** * @brief Jacobian matrix type + * @tparam SpatialDim Physical-space dimension (rows) + * @tparam ReferenceDim Reference-space dimension (columns) */ template using Jacobian = std::array(ReferenceDim)>, static_cast(SpatialDim)>; @@ -327,31 +372,51 @@ using Jacobian = std::array(ReferenceD * * Prevents accidental mixing of conceptually different types that have * the same underlying representation. + * + * @tparam T Underlying value type + * @tparam Tag Empty tag type that distinguishes otherwise identical wrappers */ template class StrongType { public: + /// @brief Underlying value type. using ValueType = T; + /// @brief Value-initialize the wrapped value. constexpr StrongType() noexcept(std::is_nothrow_default_constructible_v) : value_{} {} + /// @brief Wrap an explicit value. + /// @param value Value to store. constexpr explicit StrongType(T value) noexcept(std::is_nothrow_move_constructible_v) : value_(std::move(value)) {} + /// @brief Access the wrapped value. + /// @return Reference to the wrapped value. constexpr T& get() noexcept { return value_; } + /// @brief Access the wrapped value. + /// @return Reference to the wrapped value. constexpr const T& get() const noexcept { return value_; } - // Explicit conversion + /// @brief Explicitly convert back to the underlying type. + /// @return Copy of the wrapped value. constexpr explicit operator T() const noexcept { return value_; } - // Comparison operators + /// @brief Compare wrapped values for equality. + /// @param other Wrapper to compare against. + /// @return True when the wrapped values are equal. constexpr bool operator==(const StrongType& other) const noexcept { return value_ == other.value_; } + /// @brief Compare wrapped values for inequality. + /// @param other Wrapper to compare against. + /// @return True when the wrapped values differ. constexpr bool operator!=(const StrongType& other) const noexcept { return value_ != other.value_; } + /// @brief Order by wrapped value. + /// @param other Wrapper to compare against. + /// @return True when this wrapped value orders before the other. constexpr bool operator<(const StrongType& other) const noexcept { return value_ < other.value_; } @@ -361,12 +426,14 @@ class StrongType { }; // Specific strong types for common use cases -struct QuadraturePointTag {}; -struct QuadratureWeightTag {}; -struct BasisValueTag {}; -struct BasisGradientTag {}; +struct QuadraturePointTag {}; ///< Tag type for quadrature-point indices. +struct QuadratureWeightTag {}; ///< Tag type for quadrature weights. +struct BasisValueTag {}; ///< Tag type for basis-function values. +struct BasisGradientTag {}; ///< Tag type for basis-function gradients. +/// Type-safe index of a quadrature point within a rule. using QuadraturePointIndex = StrongType; +/// Type-safe quadrature weight value. using QuadratureWeight = StrongType; // ============================================================================ @@ -388,6 +455,7 @@ struct is_index_type : std::true_type {}; template<> struct is_index_type : std::true_type {}; +/// Convenience variable template for is_index_type. template inline constexpr bool is_index_type_v = is_index_type::value; @@ -400,6 +468,7 @@ struct is_field_type : std::false_type {}; template<> struct is_field_type : std::true_type {}; +/// Convenience variable template for is_field_type. template inline constexpr bool is_field_type_v = is_field_type::value; @@ -409,6 +478,8 @@ inline constexpr bool is_field_type_v = is_field_type::value; /** * @brief Convert FE ElementType to Mesh CellFamily + * @param elem Element type to classify. + * @return Cell family of the element's linear topology; Point for unknown types. */ constexpr svmp::CellFamily to_mesh_family(ElementType elem) noexcept { switch(elem) { @@ -454,6 +525,8 @@ constexpr svmp::CellFamily to_mesh_family(ElementType elem) noexcept { /** * @brief Get spatial dimension of element type + * @param elem Element type to query. + * @return Reference dimension from 0 (point) to 3 (volume); -1 for unknown types. */ constexpr int element_dimension(ElementType elem) noexcept { switch(elem) { @@ -487,6 +560,8 @@ constexpr int element_dimension(ElementType elem) noexcept { /** * @brief Convert status code to string for error reporting + * @param status Status code to describe. + * @return Static human-readable description of the status. */ inline const char* status_to_string(FEStatus status) noexcept { switch(status) { @@ -506,6 +581,8 @@ inline const char* status_to_string(FEStatus status) noexcept { } } +/// @} + } // namespace FE } // namespace svmp From 917c638668e816199f23018cb6920d0670fafb0a Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Tue, 9 Jun 2026 19:06:10 -0700 Subject: [PATCH 15/22] aligning exception throws and raises with the function-template calls and using `SVMP_HERE` for file, line, and function source location information --- Code/Source/solver/FE/Basis/BasisExceptions.h | 40 ---------- Code/Source/solver/FE/Basis/BasisFactory.cpp | 38 ++++------ Code/Source/solver/FE/Basis/BasisFunction.cpp | 8 +- Code/Source/solver/FE/Basis/LagrangeBasis.cpp | 46 +++++------- .../FE/Basis/NodeOrderingConventions.cpp | 28 +++---- .../solver/FE/Basis/SerendipityBasis.cpp | 73 ++++++++----------- 6 files changed, 78 insertions(+), 155 deletions(-) diff --git a/Code/Source/solver/FE/Basis/BasisExceptions.h b/Code/Source/solver/FE/Basis/BasisExceptions.h index c1af17049..8f8fd3c3c 100644 --- a/Code/Source/solver/FE/Basis/BasisExceptions.h +++ b/Code/Source/solver/FE/Basis/BasisExceptions.h @@ -83,46 +83,6 @@ class BasisConstructionException : public BasisException { : BasisException(message, file, line, function, StatusCode::InternalError) {} }; -#define BASIS_CHECK_CONFIG(condition, message) \ - do { \ - if (!(condition)) { \ - throw ::svmp::FE::basis::BasisConfigurationException((message), \ - __FILE__, __LINE__, __func__); \ - } \ - } while (false) - -#define BASIS_CHECK_COMPAT(condition, message) \ - do { \ - if (!(condition)) { \ - throw ::svmp::FE::basis::BasisElementCompatibilityException((message), \ - __FILE__, __LINE__, __func__); \ - } \ - } while (false) - -#define BASIS_CHECK_EVAL(condition, message) \ - do { \ - if (!(condition)) { \ - throw ::svmp::FE::basis::BasisEvaluationException((message), \ - __FILE__, __LINE__, __func__); \ - } \ - } while (false) - -#define BASIS_CHECK_NODE_ORDER(condition, message) \ - do { \ - if (!(condition)) { \ - throw ::svmp::FE::basis::BasisNodeOrderingException((message), \ - __FILE__, __LINE__, __func__); \ - } \ - } while (false) - -#define BASIS_CHECK_CONSTRUCTION(condition, message) \ - do { \ - if (!(condition)) { \ - throw ::svmp::FE::basis::BasisConstructionException((message), \ - __FILE__, __LINE__, __func__); \ - } \ - } while (false) - } // namespace basis } // namespace FE } // namespace svmp diff --git a/Code/Source/solver/FE/Basis/BasisFactory.cpp b/Code/Source/solver/FE/Basis/BasisFactory.cpp index b48e25536..c3130d16f 100644 --- a/Code/Source/solver/FE/Basis/BasisFactory.cpp +++ b/Code/Source/solver/FE/Basis/BasisFactory.cpp @@ -16,28 +16,20 @@ namespace { int require_basis_order(const BasisRequest& req, const char* missing_message, const char* negative_message) { - if (!req.order.has_value()) { - throw BasisConfigurationException(missing_message, - __FILE__, __LINE__, __func__); - } - if (*req.order < 0) { - throw BasisConfigurationException(negative_message, - __FILE__, __LINE__, __func__); - } + FE::throw_if(!req.order.has_value(), SVMP_HERE, + missing_message); + FE::throw_if(*req.order < 0, SVMP_HERE, + negative_message); return *req.order; } void require_scalar_c0_request(const BasisRequest& req) { - if (req.field_type != FieldType::Scalar) { - throw BasisConfigurationException( - "BasisFactory: Lagrange/Serendipity bases support scalar fields only", - __FILE__, __LINE__, __func__); - } - if (req.continuity != Continuity::C0) { - throw BasisConfigurationException( - "BasisFactory: Lagrange/Serendipity bases support C0 continuity only", - __FILE__, __LINE__, __func__); - } + FE::throw_if( + req.field_type != FieldType::Scalar, SVMP_HERE, + "BasisFactory: Lagrange/Serendipity bases support scalar fields only"); + FE::throw_if( + req.continuity != Continuity::C0, SVMP_HERE, + "BasisFactory: Lagrange/Serendipity bases support C0 continuity only"); } std::shared_ptr create_lagrange(const BasisRequest& req) { @@ -69,9 +61,8 @@ std::shared_ptr create(const BasisRequest& req) { case BasisType::Serendipity: return create_serendipity(req); default: - throw BasisConfigurationException( - "BasisFactory: requested basis family is outside the scalar Lagrange/Serendipity scope", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "BasisFactory: requested basis family is outside the scalar Lagrange/Serendipity scope"); } } @@ -90,9 +81,8 @@ BasisRequest default_basis_request(ElementType element_type) { if (order >= 0) { return BasisRequest{element_type, BasisType::Lagrange, order}; } - throw BasisElementCompatibilityException( - "BasisFactory: no default basis is defined for the requested element type", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "BasisFactory: no default basis is defined for the requested element type"); } } } diff --git a/Code/Source/solver/FE/Basis/BasisFunction.cpp b/Code/Source/solver/FE/Basis/BasisFunction.cpp index b98a36292..591f6751a 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.cpp +++ b/Code/Source/solver/FE/Basis/BasisFunction.cpp @@ -28,16 +28,16 @@ void BasisFunction::evaluate_gradients(const math::Vector& xi, std::vector& gradients) const { (void)xi; (void)gradients; - throw BasisEvaluationException("Analytic gradient evaluation is not implemented for this basis", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "Analytic gradient evaluation is not implemented for this basis"); } void BasisFunction::evaluate_hessians(const math::Vector& xi, std::vector& hessians) const { (void)xi; (void)hessians; - throw BasisEvaluationException("Analytic Hessian evaluation is not implemented for this basis", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "Analytic Hessian evaluation is not implemented for this basis"); } void BasisFunction::evaluate_all(const math::Vector& xi, diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp index 4f8c15bb1..b32199d03 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp @@ -44,10 +44,8 @@ struct NormalizedLagrangeRequest { // Validate and return the supported basis topology for a Lagrange element type. BasisTopology supported_lagrange_topology(ElementType type) { const BasisTopology top = topology(type); - if (top == BasisTopology::Unknown) { - throw BasisElementCompatibilityException("LagrangeBasis: unsupported element type", - __FILE__, __LINE__, __func__); - } + FE::throw_if(top == BasisTopology::Unknown, SVMP_HERE, + "LagrangeBasis: unsupported element type"); return top; } @@ -67,23 +65,19 @@ NormalizedLagrangeRequest normalize_lagrange_request(ElementType element_type, i case ElementType::Wedge18: return {ElementType::Wedge6, std::max(order, 2)}; case ElementType::Quad8: - throw BasisElementCompatibilityException( - "LagrangeBasis: Quad8 is serendipity; use SerendipityBasis", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "LagrangeBasis: Quad8 is serendipity; use SerendipityBasis"); case ElementType::Hex20: - throw BasisElementCompatibilityException( - "LagrangeBasis: Hex20 is serendipity; use SerendipityBasis", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "LagrangeBasis: Hex20 is serendipity; use SerendipityBasis"); case ElementType::Wedge15: - throw BasisElementCompatibilityException( - "LagrangeBasis: Wedge15 is serendipity; use SerendipityBasis", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "LagrangeBasis: Wedge15 is serendipity; use SerendipityBasis"); case ElementType::Pyramid5: case ElementType::Pyramid13: case ElementType::Pyramid14: - throw BasisElementCompatibilityException( - "LagrangeBasis: pyramid support is not within the current solver basis scope", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "LagrangeBasis: pyramid support is not within the current solver basis scope"); default: return {element_type, order}; } @@ -315,10 +309,8 @@ LagrangeBasis::LagrangeBasis(ElementType type, int order) const auto normalized = normalize_lagrange_request(element_type_, order_); element_type_ = normalized.element_type; order_ = normalized.order; - if (order_ < 0) { - throw BasisConfigurationException("LagrangeBasis requires non-negative polynomial order", - __FILE__, __LINE__, __func__); - } + FE::throw_if(order_ < 0, SVMP_HERE, + "LagrangeBasis requires non-negative polynomial order"); topology_ = supported_lagrange_topology(element_type_); dimension_ = reference_dimension(element_type_); @@ -366,8 +358,8 @@ void LagrangeBasis::init_nodes() { break; } - throw BasisElementCompatibilityException("Unsupported element type in LagrangeBasis::init_nodes", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "Unsupported element type in LagrangeBasis::init_nodes"); } // Build the single reference node for a point basis. @@ -419,10 +411,8 @@ void LagrangeBasis::build_wedge_nodes() { const auto tri_exp = simplex_exponent_from_point(node, BasisTopology::Triangle, order_); auto it = std::find(simplex_exponents_.begin(), simplex_exponents_.end(), tri_exp); - if (it == simplex_exponents_.end()) { - throw BasisConstructionException("LagrangeBasis: wedge node triangle index lookup failed", - __FILE__, __LINE__, __func__); - } + FE::throw_if(it == simplex_exponents_.end(), SVMP_HERE, + "LagrangeBasis: wedge node triangle index lookup failed"); const std::size_t tri_index = static_cast(std::distance(simplex_exponents_.begin(), it)); wedge_indices_.push_back({tri_index, axis_index_pm_one(node[2], order_)}); @@ -555,8 +545,8 @@ void LagrangeBasis::evaluate_all_to(const Vec3& xi, return; } - throw BasisEvaluationException("Unsupported element in LagrangeBasis evaluation", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "Unsupported element in LagrangeBasis evaluation"); } void LagrangeBasis::evaluate_values(const Vec3& xi, diff --git a/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp b/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp index 76662abe1..850f8cd0a 100644 --- a/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp +++ b/Code/Source/solver/FE/Basis/NodeOrderingConventions.cpp @@ -318,10 +318,8 @@ std::vector generate_wedge_nodes(int order) { } std::vector complete_lagrange_nodes(ElementType canonical_type, int order) { - if (order < 0) { - throw BasisNodeOrderingException("ReferenceNodeLayout requires non-negative Lagrange order", - __FILE__, __LINE__, __func__); - } + FE::throw_if(order < 0, SVMP_HERE, + "ReferenceNodeLayout requires non-negative Lagrange order"); const ElementType type = canonical_lagrange_type(canonical_type); switch (type) { case ElementType::Point1: @@ -339,11 +337,11 @@ std::vector complete_lagrange_nodes(ElementType canonical_type, int order case ElementType::Wedge6: return generate_wedge_nodes(order); case ElementType::Pyramid5: - throw BasisNodeOrderingException("ReferenceNodeLayout: pyramid node ordering is disabled", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "ReferenceNodeLayout: pyramid node ordering is disabled"); default: - throw BasisNodeOrderingException("ReferenceNodeLayout: unsupported Lagrange topology", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "ReferenceNodeLayout: unsupported Lagrange topology"); } } @@ -370,11 +368,11 @@ std::vector element_nodes(ElementType elem_type) { return nodes; } case ElementType::Pyramid13: - throw BasisNodeOrderingException("ReferenceNodeLayout: pyramid node ordering is disabled", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "ReferenceNodeLayout: pyramid node ordering is disabled"); default: - throw BasisNodeOrderingException("ReferenceNodeLayout: unknown element type", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "ReferenceNodeLayout: unknown element type"); } } @@ -383,10 +381,8 @@ std::vector element_nodes(ElementType elem_type) { math::Vector ReferenceNodeLayout::get_node_coords(ElementType elem_type, std::size_t local_node) { const auto nodes = element_nodes(elem_type); - if (local_node >= nodes.size()) { - throw BasisNodeOrderingException("ReferenceNodeLayout::get_node_coords: node index out of range", - __FILE__, __LINE__, __func__); - } + FE::throw_if(local_node >= nodes.size(), SVMP_HERE, + "ReferenceNodeLayout::get_node_coords: node index out of range"); return nodes[local_node]; } diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp index 30eac9c38..006d43fdc 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp @@ -115,11 +115,9 @@ std::vector quad_serendipity_nodes(int order, std::size_t total_size) { nodes.push_back(Vec3{Real(-1), Real(1) - Real(2 * i) * inv_order, Real(0)}); } - if (nodes.size() > total_size) { - throw BasisConstructionException( - "SerendipityBasis: quadrilateral serendipity boundary nodes exceed requested size", - __FILE__, __LINE__, __func__); - } + FE::throw_if( + nodes.size() > total_size, SVMP_HERE, + "SerendipityBasis: quadrilateral serendipity boundary nodes exceed requested size"); const std::size_t interior_count = total_size - nodes.size(); if (interior_count == 0u) { @@ -157,11 +155,9 @@ std::vector quad_serendipity_nodes(int order, std::size_t total_size) { return a[0] < b[0]; }); - if (interior_count > interior_candidates.size()) { - throw BasisConstructionException( - "SerendipityBasis: insufficient quadrilateral interior nodes for requested serendipity order", - __FILE__, __LINE__, __func__); - } + FE::throw_if( + interior_count > interior_candidates.size(), SVMP_HERE, + "SerendipityBasis: insufficient quadrilateral interior nodes for requested serendipity order"); nodes.insert(nodes.end(), interior_candidates.begin(), @@ -181,11 +177,9 @@ std::vector quad_serendipity_inverse_vandermonde( std::span> exponents, int order) { const int n = static_cast(nodes.size()); - if (n == 0 || exponents.size() != nodes.size()) { - throw BasisConstructionException( - "SerendipityBasis: invalid quadrilateral serendipity interpolation setup", - __FILE__, __LINE__, __func__); - } + FE::throw_if( + n == 0 || exponents.size() != nodes.size(), SVMP_HERE, + "SerendipityBasis: invalid quadrilateral serendipity interpolation setup"); std::vector vandermonde(static_cast(n * n), Real(0)); auto idx = [n](int row, int col) -> std::size_t { @@ -499,19 +493,15 @@ SerendipityBasis::SerendipityBasis(ElementType type, int order, bool geometry_mo if (order_ < 1) { order_ = 1; } - if (type == ElementType::Quad8 && order_ != 2) { - throw BasisConfigurationException( - "SerendipityBasis: Quad8 is only valid for quadratic order 2; use Quad4 for higher-order quadrilateral serendipity", - __FILE__, __LINE__, __func__); - } + FE::throw_if( + type == ElementType::Quad8 && order_ != 2, SVMP_HERE, + "SerendipityBasis: Quad8 is only valid for quadratic order 2; use Quad4 for higher-order quadrilateral serendipity"); quad_monomial_exponents_ = quad_serendipity_exponents(order_); size_ = quad_monomial_exponents_.size(); nodes_ = quad_serendipity_nodes(order_, size_); - if (nodes_.size() != size_) { - throw BasisConstructionException( - "SerendipityBasis: quadrilateral serendipity setup produced inconsistent sizes", - __FILE__, __LINE__, __func__); - } + FE::throw_if( + nodes_.size() != size_, SVMP_HERE, + "SerendipityBasis: quadrilateral serendipity setup produced inconsistent sizes"); quad_inv_vandermonde_ = quad_serendipity_inverse_vandermonde(nodes_, quad_monomial_exponents_, order_); } else if (type == ElementType::Hex8 || type == ElementType::Hex20) { dimension_ = 3; @@ -521,9 +511,8 @@ SerendipityBasis::SerendipityBasis(ElementType type, int order, bool geometry_mo } else if (order_ == 2) { size_ = 20; } else { - throw BasisConfigurationException( - "SerendipityBasis supports up to quadratic on hexahedra", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "SerendipityBasis supports up to quadratic on hexahedra"); } } else if (type == ElementType::Wedge15) { dimension_ = 3; @@ -533,13 +522,12 @@ SerendipityBasis::SerendipityBasis(ElementType type, int order, bool geometry_mo if (order_ == 2) { size_ = 15; } else { - throw BasisConfigurationException( - "SerendipityBasis supports up to quadratic on wedge15", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "SerendipityBasis supports up to quadratic on wedge15"); } } else { - throw BasisElementCompatibilityException("SerendipityBasis supports Quad4/Quad8, Hex8/Hex20, and Wedge15 elements", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "SerendipityBasis supports Quad4/Quad8, Hex8/Hex20, and Wedge15 elements"); } if (nodes_.empty()) { @@ -573,12 +561,11 @@ void SerendipityBasis::evaluate_all_to(const math::Vector& xi, const Real z = xi[2]; if (dimension_ == 2) { - if (quad_monomial_exponents_.size() != size_ || - quad_inv_vandermonde_.size() != size_ * size_) { - throw BasisEvaluationException( - "SerendipityBasis: quadrilateral interpolation tables are not initialized for value evaluation", - __FILE__, __LINE__, __func__); - } + FE::throw_if( + quad_monomial_exponents_.size() != size_ || + quad_inv_vandermonde_.size() != size_ * size_, + SVMP_HERE, + "SerendipityBasis: quadrilateral interpolation tables are not initialized for value evaluation"); for (std::size_t j = 0; j < size_; ++j) { const auto [ax, ay] = quad_monomial_exponents_[j]; @@ -632,8 +619,8 @@ void SerendipityBasis::evaluate_all_to(const math::Vector& xi, if (element_type_ == ElementType::Hex20) { const auto mesh_to_basis = ReferenceNodeLayout::mesh_to_basis_ordering(element_type_); - BASIS_CHECK_EVAL(mesh_to_basis.size() == size_, - "Hex20 mesh-to-basis ordering is not registered"); + FE::throw_if(mesh_to_basis.size() != size_, SVMP_HERE, + "Hex20 mesh-to-basis ordering is not registered"); if (values_out) { Real internal_vals[20]; @@ -681,8 +668,8 @@ void SerendipityBasis::evaluate_all_to(const math::Vector& xi, return; } - throw BasisEvaluationException("SerendipityBasis::evaluate_all_to: unsupported serendipity configuration", - __FILE__, __LINE__, __func__); + FE::raise(SVMP_HERE, + "SerendipityBasis::evaluate_all_to: unsupported serendipity configuration"); } void SerendipityBasis::evaluate_values(const math::Vector& xi, From 4819f595e920e95a88be0c5895be3e1d5dc055d0 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Tue, 9 Jun 2026 19:35:11 -0700 Subject: [PATCH 16/22] fixing doxygen layout to allow for visible topic sections since modules in doxygen are now reserved for c++ modules --- Documentation/DoxygenLayout.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/DoxygenLayout.xml b/Documentation/DoxygenLayout.xml index df0146828..f056df891 100644 --- a/Documentation/DoxygenLayout.xml +++ b/Documentation/DoxygenLayout.xml @@ -3,7 +3,11 @@ + + From dfd5aff359ee357771550c228e303f58d5f862b2 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Tue, 9 Jun 2026 20:46:58 -0700 Subject: [PATCH 17/22] added topology evaluation helpers and cleaned up static cast helpers --- Code/Source/solver/FE/Basis/BasisFunction.cpp | 4 +- Code/Source/solver/FE/Basis/BasisFunction.h | 14 + Code/Source/solver/FE/Basis/LagrangeBasis.cpp | 361 +++++++++--------- Code/Source/solver/FE/Basis/LagrangeBasis.h | 17 +- .../solver/FE/Basis/SerendipityBasis.cpp | 14 +- 5 files changed, 215 insertions(+), 195 deletions(-) diff --git a/Code/Source/solver/FE/Basis/BasisFunction.cpp b/Code/Source/solver/FE/Basis/BasisFunction.cpp index 591f6751a..d847a9cca 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.cpp +++ b/Code/Source/solver/FE/Basis/BasisFunction.cpp @@ -63,9 +63,7 @@ void BasisFunction::evaluate_gradients_to(const math::Vector& xi, tmp.resize(size()); evaluate_gradients(xi, tmp); for (std::size_t i = 0; i < tmp.size(); ++i) { - gradients_out[i * 3u + 0u] = tmp[i][0]; - gradients_out[i * 3u + 1u] = tmp[i][1]; - gradients_out[i * 3u + 2u] = tmp[i][2]; + store_gradient(tmp[i], gradients_out + i * 3u); } } diff --git a/Code/Source/solver/FE/Basis/BasisFunction.h b/Code/Source/solver/FE/Basis/BasisFunction.h index e7de2bf01..832926199 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.h +++ b/Code/Source/solver/FE/Basis/BasisFunction.h @@ -49,6 +49,20 @@ using Hessian = math::Matrix; return hessian; } +inline void store_gradient(const Gradient& gradient, Real* dst) noexcept { + dst[0] = gradient[0]; + dst[1] = gradient[1]; + dst[2] = gradient[2]; +} + +[[nodiscard]] inline Gradient load_gradient(const Real* src) noexcept { + Gradient gradient; + gradient[0] = src[0]; + gradient[1] = src[1]; + gradient[2] = src[2]; + return gradient; +} + inline void store_hessian(const Hessian& hessian, Real* dst) noexcept { dst[0] = hessian(0, 0); dst[1] = hessian(0, 1); diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp index b32199d03..4ec970b86 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace svmp { namespace FE { @@ -121,6 +122,9 @@ LagrangeBasis::SimplexExponent simplex_exponent_from_point(const Vec3& p, return e; } +// Sentinel node index meaning "skip nothing" in product_excluding below. +constexpr std::size_t kNoSkip = std::numeric_limits::max(); + // Evaluate 1D Lagrange polynomials and derivatives at a point. void evaluate_1d_lagrange(Real x, const std::vector& nodes, AxisEval& out) { const std::size_t n = nodes.size(); @@ -134,6 +138,19 @@ void evaluate_1d_lagrange(Real x, const std::vector& nodes, AxisEval& out) } for (std::size_t i = 0; i < n; ++i) { + // Product of (x - nodes[j]) over all j except i and the listed skips. + // Each derivative order drops one additional factor from the product. + const auto product_excluding = [&](std::size_t skip1 = kNoSkip, + std::size_t skip2 = kNoSkip) { + Real product = Real(1); + for (std::size_t j = 0; j < n; ++j) { + if (j != i && j != skip1 && j != skip2) { + product *= x - nodes[j]; + } + } + return product; + }; + Real denom = Real(1); for (std::size_t j = 0; j < n; ++j) { if (j != i) { @@ -141,26 +158,13 @@ void evaluate_1d_lagrange(Real x, const std::vector& nodes, AxisEval& out) } } - Real value = Real(1); - for (std::size_t j = 0; j < n; ++j) { - if (j != i) { - value *= x - nodes[j]; - } - } - out.value[i] = value / denom; + out.value[i] = product_excluding() / denom; Real first = Real(0); for (std::size_t m = 0; m < n; ++m) { - if (m == i) { - continue; + if (m != i) { + first += product_excluding(m); } - Real product = Real(1); - for (std::size_t j = 0; j < n; ++j) { - if (j != i && j != m) { - product *= x - nodes[j]; - } - } - first += product; } out.first[i] = first / denom; @@ -170,16 +174,9 @@ void evaluate_1d_lagrange(Real x, const std::vector& nodes, AxisEval& out) continue; } for (std::size_t l = 0; l < n; ++l) { - if (l == i || l == m) { - continue; - } - Real product = Real(1); - for (std::size_t j = 0; j < n; ++j) { - if (j != i && j != m && j != l) { - product *= x - nodes[j]; - } + if (l != i && l != m) { + second += product_excluding(m, l); } - second += product; } } out.second[i] = second / denom; @@ -222,7 +219,7 @@ void evaluate_simplex(const Vec3& xi, return; } - const int bary_count = top == BasisTopology::Triangle ? 3 : 4; + const std::size_t bary_count = top == BasisTopology::Triangle ? 3u : 4u; std::array lambda{Real(0), Real(0), Real(0), Real(0)}; std::array lambda_grad; lambda_grad.fill(Gradient::Zero()); @@ -246,48 +243,40 @@ void evaluate_simplex(const Vec3& xi, for (std::size_t i = 0; i < n; ++i) { std::array, 4> f{}; - for (int a = 0; a < bary_count; ++a) { - f[static_cast(a)] = - simplex_factor(exponents[i][static_cast(a)], - lambda[static_cast(a)], - order); + for (std::size_t a = 0; a < bary_count; ++a) { + f[a] = simplex_factor(exponents[i][a], lambda[a], order); } Real value = Real(1); - for (int a = 0; a < bary_count; ++a) { - value *= f[static_cast(a)][0]; + for (std::size_t a = 0; a < bary_count; ++a) { + value *= f[a][0]; } out.value[i] = value; - for (int a = 0; a < bary_count; ++a) { - Real product = f[static_cast(a)][1]; - for (int b = 0; b < bary_count; ++b) { + for (std::size_t a = 0; a < bary_count; ++a) { + Real product = f[a][1]; + for (std::size_t b = 0; b < bary_count; ++b) { if (b != a) { - product *= f[static_cast(b)][0]; + product *= f[b][0]; } } for (std::size_t c = 0; c < 3u; ++c) { - out.gradient[i][c] += product * lambda_grad[static_cast(a)][c]; + out.gradient[i][c] += product * lambda_grad[a][c]; } } - for (int a = 0; a < bary_count; ++a) { - for (int b = 0; b < bary_count; ++b) { - Real product = (a == b) - ? f[static_cast(a)][2] - : f[static_cast(a)][1] * - f[static_cast(b)][1]; - for (int c = 0; c < bary_count; ++c) { - if (c != a && c != b) { - product *= f[static_cast(c)][0]; + for (std::size_t a = 0; a < bary_count; ++a) { + for (std::size_t b = 0; b < bary_count; ++b) { + Real product = (a == b) ? f[a][2] : f[a][1] * f[b][1]; + for (std::size_t k = 0; k < bary_count; ++k) { + if (k != a && k != b) { + product *= f[k][0]; } } for (std::size_t r = 0; r < 3u; ++r) { for (std::size_t c = 0; c < 3u; ++c) { out.hessian[i](r, c) += - product * - lambda_grad[static_cast(a)][r] * - lambda_grad[static_cast(b)][c]; + product * lambda_grad[a][r] * lambda_grad[b][c]; } } } @@ -295,13 +284,6 @@ void evaluate_simplex(const Vec3& xi, } } -// Store a gradient in the flat buffer layout used by fast evaluators. -void store_gradient(const Gradient& gradient, Real* dst) { - dst[0] = gradient[0]; - dst[1] = gradient[1]; - dst[2] = gradient[2]; -} - } // namespace LagrangeBasis::LagrangeBasis(ElementType type, int order) @@ -339,13 +321,9 @@ void LagrangeBasis::init_nodes() { build_point_nodes(); return; case BasisTopology::Line: - build_tensor_product_nodes(1); - return; case BasisTopology::Quadrilateral: - build_tensor_product_nodes(2); - return; case BasisTopology::Hexahedron: - build_tensor_product_nodes(3); + build_tensor_product_nodes(); return; case BasisTopology::Triangle: case BasisTopology::Tetrahedron: @@ -368,17 +346,17 @@ void LagrangeBasis::build_point_nodes() { } // Build nodes and axis indices for tensor-product elements. -void LagrangeBasis::build_tensor_product_nodes(int dimensions) { +void LagrangeBasis::build_tensor_product_nodes() { init_equispaced_1d_nodes(); nodes_ = ReferenceNodeLayout::get_lagrange_node_coords(element_type_, order_); tensor_indices_.reserve(nodes_.size()); for (const auto& node : nodes_) { TensorNodeIndex idx{0u, 0u, 0u}; idx[0] = axis_index_pm_one(node[0], order_); - if (dimensions >= 2) { + if (dimension_ >= 2) { idx[1] = axis_index_pm_one(node[1], order_); } - if (dimensions >= 3) { + if (dimension_ >= 3) { idx[2] = axis_index_pm_one(node[2], order_); } tensor_indices_.push_back(idx); @@ -419,130 +397,159 @@ void LagrangeBasis::build_wedge_nodes() { } } -// Evaluate requested basis quantities into caller-provided flat buffers. -void LagrangeBasis::evaluate_all_to(const Vec3& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { - if (topology_ == BasisTopology::Point) { +// Evaluate the constant point basis. +void LagrangeBasis::evaluate_point_to(Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + if (values_out) { + values_out[0] = Real(1); + } + if (gradients_out) { + gradients_out[0] = gradients_out[1] = gradients_out[2] = Real(0); + } + if (hessians_out) { + std::fill_n(hessians_out, 9u, Real(0)); + } +} + +// Evaluate line, quadrilateral, and hexahedron bases as axis-polynomial products. +void LagrangeBasis::evaluate_tensor_product_to(const Vec3& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + AxisEval ax; + AxisEval ay; + AxisEval az; + evaluate_1d_lagrange(xi[0], nodes_1d_, ax); + if (dimension_ >= 2) { + evaluate_1d_lagrange(xi[1], nodes_1d_, ay); + } + if (dimension_ >= 3) { + evaluate_1d_lagrange(xi[2], nodes_1d_, az); + } + + for (std::size_t node = 0; node < tensor_indices_.size(); ++node) { + const auto& idx = tensor_indices_[node]; + const Real vx = ax.value[idx[0]]; + const Real dx = ax.first[idx[0]]; + const Real d2x = ax.second[idx[0]]; + const Real vy = dimension_ >= 2 ? ay.value[idx[1]] : Real(1); + const Real dy = dimension_ >= 2 ? ay.first[idx[1]] : Real(0); + const Real d2y = dimension_ >= 2 ? ay.second[idx[1]] : Real(0); + const Real vz = dimension_ >= 3 ? az.value[idx[2]] : Real(1); + const Real dz = dimension_ >= 3 ? az.first[idx[2]] : Real(0); + const Real d2z = dimension_ >= 3 ? az.second[idx[2]] : Real(0); + if (values_out) { - values_out[0] = Real(1); + values_out[node] = vx * vy * vz; } if (gradients_out) { - gradients_out[0] = gradients_out[1] = gradients_out[2] = Real(0); + Real* g = gradients_out + node * 3u; + g[0] = dx * vy * vz; + g[1] = vx * dy * vz; + g[2] = vx * vy * dz; } if (hessians_out) { - std::fill_n(hessians_out, 9u, Real(0)); + Real* h = hessians_out + node * 9u; + h[0] = d2x * vy * vz; + h[1] = dx * dy * vz; + h[2] = dx * vy * dz; + h[3] = h[1]; + h[4] = vx * d2y * vz; + h[5] = vx * dy * dz; + h[6] = h[2]; + h[7] = h[5]; + h[8] = vx * vy * d2z; } - return; } +} - if (topology_ == BasisTopology::Line || - topology_ == BasisTopology::Quadrilateral || - topology_ == BasisTopology::Hexahedron) { - AxisEval ax; - AxisEval ay; - AxisEval az; - evaluate_1d_lagrange(xi[0], nodes_1d_, ax); - if (dimension_ >= 2) { - evaluate_1d_lagrange(xi[1], nodes_1d_, ay); +// Evaluate triangle and tetrahedron bases from barycentric factors. +void LagrangeBasis::evaluate_simplex_to(const Vec3& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + SimplexEval simplex; + evaluate_simplex(xi, topology_, order_, simplex_exponents_, simplex); + for (std::size_t i = 0; i < simplex.value.size(); ++i) { + if (values_out) { + values_out[i] = simplex.value[i]; } - if (dimension_ >= 3) { - evaluate_1d_lagrange(xi[2], nodes_1d_, az); + if (gradients_out) { + store_gradient(simplex.gradient[i], gradients_out + i * 3u); } - - for (std::size_t node = 0; node < tensor_indices_.size(); ++node) { - const auto& idx = tensor_indices_[node]; - const Real vx = ax.value[idx[0]]; - const Real dx = ax.first[idx[0]]; - const Real d2x = ax.second[idx[0]]; - const Real vy = dimension_ >= 2 ? ay.value[idx[1]] : Real(1); - const Real dy = dimension_ >= 2 ? ay.first[idx[1]] : Real(0); - const Real d2y = dimension_ >= 2 ? ay.second[idx[1]] : Real(0); - const Real vz = dimension_ >= 3 ? az.value[idx[2]] : Real(1); - const Real dz = dimension_ >= 3 ? az.first[idx[2]] : Real(0); - const Real d2z = dimension_ >= 3 ? az.second[idx[2]] : Real(0); - - if (values_out) { - values_out[node] = vx * vy * vz; - } - if (gradients_out) { - Real* g = gradients_out + node * 3u; - g[0] = dx * vy * vz; - g[1] = vx * dy * vz; - g[2] = vx * vy * dz; - } - if (hessians_out) { - Real* h = hessians_out + node * 9u; - h[0] = d2x * vy * vz; - h[1] = dx * dy * vz; - h[2] = dx * vy * dz; - h[3] = h[1]; - h[4] = vx * d2y * vz; - h[5] = vx * dy * dz; - h[6] = h[2]; - h[7] = h[5]; - h[8] = vx * vy * d2z; - } + if (hessians_out) { + store_hessian(simplex.hessian[i], hessians_out + i * 9u); } - return; } +} - if (topology_ == BasisTopology::Triangle || topology_ == BasisTopology::Tetrahedron) { - SimplexEval simplex; - evaluate_simplex(xi, topology_, order_, simplex_exponents_, simplex); - for (std::size_t i = 0; i < simplex.value.size(); ++i) { - if (values_out) { - values_out[i] = simplex.value[i]; - } - if (gradients_out) { - store_gradient(simplex.gradient[i], gradients_out + i * 3u); - } - if (hessians_out) { - store_hessian(simplex.hessian[i], hessians_out + i * 9u); - } +// Evaluate wedge bases as triangle/through-axis products. +void LagrangeBasis::evaluate_wedge_to(const Vec3& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + SimplexEval tri; + AxisEval z_axis; + evaluate_simplex(xi, BasisTopology::Triangle, order_, simplex_exponents_, tri); + evaluate_1d_lagrange(xi[2], nodes_1d_, z_axis); + + for (std::size_t node = 0; node < wedge_indices_.size(); ++node) { + const auto [tri_idx, z_idx] = wedge_indices_[node]; + const Real tv = tri.value[tri_idx]; + const Real zv = z_axis.value[z_idx]; + const Real dz = z_axis.first[z_idx]; + const Real d2z = z_axis.second[z_idx]; + + if (values_out) { + values_out[node] = tv * zv; + } + if (gradients_out) { + Real* g = gradients_out + node * 3u; + g[0] = tri.gradient[tri_idx][0] * zv; + g[1] = tri.gradient[tri_idx][1] * zv; + g[2] = tv * dz; + } + if (hessians_out) { + Real* h = hessians_out + node * 9u; + const Hessian& th = tri.hessian[tri_idx]; + const Gradient& tg = tri.gradient[tri_idx]; + h[0] = th(0, 0) * zv; + h[1] = th(0, 1) * zv; + h[2] = tg[0] * dz; + h[3] = h[1]; + h[4] = th(1, 1) * zv; + h[5] = tg[1] * dz; + h[6] = h[2]; + h[7] = h[5]; + h[8] = tv * d2z; } - return; } +} - if (topology_ == BasisTopology::Wedge) { - SimplexEval tri; - AxisEval z_axis; - evaluate_simplex(xi, BasisTopology::Triangle, order_, simplex_exponents_, tri); - evaluate_1d_lagrange(xi[2], nodes_1d_, z_axis); - - for (std::size_t node = 0; node < wedge_indices_.size(); ++node) { - const auto [tri_idx, z_idx] = wedge_indices_[node]; - const Real tv = tri.value[tri_idx]; - const Real zv = z_axis.value[z_idx]; - const Real dz = z_axis.first[z_idx]; - const Real d2z = z_axis.second[z_idx]; - - if (values_out) { - values_out[node] = tv * zv; - } - if (gradients_out) { - Real* g = gradients_out + node * 3u; - g[0] = tri.gradient[tri_idx][0] * zv; - g[1] = tri.gradient[tri_idx][1] * zv; - g[2] = tv * dz; - } - if (hessians_out) { - Real* h = hessians_out + node * 9u; - const Hessian& th = tri.hessian[tri_idx]; - const Gradient& tg = tri.gradient[tri_idx]; - h[0] = th(0, 0) * zv; - h[1] = th(0, 1) * zv; - h[2] = tg[0] * dz; - h[3] = h[1]; - h[4] = th(1, 1) * zv; - h[5] = tg[1] * dz; - h[6] = h[2]; - h[7] = h[5]; - h[8] = tv * d2z; - } - } - return; +// Evaluate requested basis quantities into caller-provided flat buffers. +void LagrangeBasis::evaluate_all_to(const Vec3& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const { + switch (topology_) { + case BasisTopology::Point: + evaluate_point_to(values_out, gradients_out, hessians_out); + return; + case BasisTopology::Line: + case BasisTopology::Quadrilateral: + case BasisTopology::Hexahedron: + evaluate_tensor_product_to(xi, values_out, gradients_out, hessians_out); + return; + case BasisTopology::Triangle: + case BasisTopology::Tetrahedron: + evaluate_simplex_to(xi, values_out, gradients_out, hessians_out); + return; + case BasisTopology::Wedge: + evaluate_wedge_to(xi, values_out, gradients_out, hessians_out); + return; + default: + break; } FE::raise(SVMP_HERE, @@ -561,9 +568,7 @@ void LagrangeBasis::evaluate_gradients(const Vec3& xi, std::vector flat(size() * 3u, Real(0)); evaluate_gradients_to(xi, flat.data()); for (std::size_t i = 0; i < size(); ++i) { - gradients[i][0] = flat[i * 3u + 0u]; - gradients[i][1] = flat[i * 3u + 1u]; - gradients[i][2] = flat[i * 3u + 2u]; + gradients[i] = load_gradient(flat.data() + i * 3u); } } @@ -588,9 +593,7 @@ void LagrangeBasis::evaluate_all(const Vec3& xi, std::vector flat_h(size() * 9u, Real(0)); evaluate_all_to(xi, values.data(), flat_g.data(), flat_h.data()); for (std::size_t i = 0; i < size(); ++i) { - gradients[i][0] = flat_g[i * 3u + 0u]; - gradients[i][1] = flat_g[i * 3u + 1u]; - gradients[i][2] = flat_g[i * 3u + 2u]; + gradients[i] = load_gradient(flat_g.data() + i * 3u); hessians[i] = load_hessian(flat_h.data() + i * 9u); } } diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.h b/Code/Source/solver/FE/Basis/LagrangeBasis.h index 3bb1a5e74..cd0ca6058 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.h +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.h @@ -218,7 +218,7 @@ class LagrangeBasis : public BasisFunction { void init_nodes(); void build_point_nodes(); - void build_tensor_product_nodes(int dimensions); + void build_tensor_product_nodes(); void build_simplex_nodes(); void build_wedge_nodes(); void init_equispaced_1d_nodes(); @@ -227,6 +227,21 @@ class LagrangeBasis : public BasisFunction { Real* SVMP_RESTRICT values_out, Real* SVMP_RESTRICT gradients_out, Real* SVMP_RESTRICT hessians_out) const; + void evaluate_point_to(Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const; + void evaluate_tensor_product_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const; + void evaluate_simplex_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const; + void evaluate_wedge_to(const math::Vector& xi, + Real* SVMP_RESTRICT values_out, + Real* SVMP_RESTRICT gradients_out, + Real* SVMP_RESTRICT hessians_out) const; }; /// @} diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp index 006d43fdc..fd5f99cbc 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp @@ -18,12 +18,6 @@ namespace basis { namespace { using Vec3 = math::Vector; -void store_gradient(const Gradient& gradient, Real* dst) { - dst[0] = gradient[0]; - dst[1] = gradient[1]; - dst[2] = gradient[2]; -} - void evaluate_hex8_reference(Real r, Real s, Real t, @@ -684,9 +678,7 @@ void SerendipityBasis::evaluate_gradients(const math::Vector& xi, std::vector flat(size_ * 3u, Real(0)); evaluate_gradients_to(xi, flat.data()); for (std::size_t i = 0; i < size_; ++i) { - gradients[i][0] = flat[i * 3u + 0u]; - gradients[i][1] = flat[i * 3u + 1u]; - gradients[i][2] = flat[i * 3u + 2u]; + gradients[i] = load_gradient(flat.data() + i * 3u); } } @@ -711,9 +703,7 @@ void SerendipityBasis::evaluate_all(const math::Vector& xi, std::vector flat_hessians(size_ * 9u, Real(0)); evaluate_all_to(xi, values.data(), flat_gradients.data(), flat_hessians.data()); for (std::size_t i = 0; i < size_; ++i) { - gradients[i][0] = flat_gradients[i * 3u + 0u]; - gradients[i][1] = flat_gradients[i * 3u + 1u]; - gradients[i][2] = flat_gradients[i * 3u + 2u]; + gradients[i] = load_gradient(flat_gradients.data() + i * 3u); hessians[i] = load_hessian(flat_hessians.data() + i * 9u); } } From ddb509ac88fc6b28147e464f2333e99b0c305b61 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Wed, 10 Jun 2026 11:50:24 -0700 Subject: [PATCH 18/22] aligning throw and raise to use function-template helpers for svmp --- Code/Source/solver/nn.cpp | 141 +++++++++++++++----------------------- 1 file changed, 56 insertions(+), 85 deletions(-) diff --git a/Code/Source/solver/nn.cpp b/Code/Source/solver/nn.cpp index 60fcddf81..547310703 100644 --- a/Code/Source/solver/nn.cpp +++ b/Code/Source/solver/nn.cpp @@ -133,9 +133,8 @@ const febasis::BasisFunction& basis_for_solver_element(consts::ElementType eType const auto fe_type = to_fe_element_type(eType); if (!fe_type) { - throw febasis::BasisElementCompatibilityException( - "No FE Basis selection for solver element " + solver_element_name(eType), - __FILE__, __LINE__, __func__); + fe::raise(SVMP_HERE, + "No FE Basis selection for solver element " + solver_element_name(eType)); } const std::lock_guard lock(cache_mutex); @@ -177,10 +176,9 @@ std::span solver_to_basis_node_map(consts::ElementType eType) std::size_t basis_index_for_solver_node(consts::ElementType eType, const int solver_node) { if (solver_node < 0) { - throw febasis::BasisNodeOrderingException( + fe::raise(SVMP_HERE, "Solver node " + std::to_string(solver_node) + - " is outside node map for " + solver_element_name(eType), - __FILE__, __LINE__, __func__); + " is outside node map for " + solver_element_name(eType)); } const auto node = static_cast(solver_node); @@ -191,10 +189,9 @@ std::size_t basis_index_for_solver_node(consts::ElementType eType, const int sol if (node < map.size()) { return map[node]; } - throw febasis::BasisNodeOrderingException( + fe::raise(SVMP_HERE, "Solver node " + std::to_string(solver_node) + - " is outside node map for " + solver_element_name(eType), - __FILE__, __LINE__, __func__); + " is outside node map for " + solver_element_name(eType)); } fe::math::Vector make_basis_point(const febasis::BasisFunction& basis, @@ -202,11 +199,10 @@ fe::math::Vector make_basis_point(const febasis::BasisFunction& bas const Array& xi) { if (xi.nrows() < basis.dimension()) { - throw febasis::BasisConfigurationException( + fe::raise(SVMP_HERE, "xi has " + std::to_string(xi.nrows()) + " rows but FE Basis element requires " + std::to_string(basis.dimension()) + - " reference coordinates", - __FILE__, __LINE__, __func__); + " reference coordinates"); } // Inactive trailing components must be zero for lower-dimensional elements; @@ -227,26 +223,23 @@ void copy_basis_values_to_solver_arrays(consts::ElementType eType, Array3& Nx) { if (values.size() != static_cast(eNoN)) { - throw febasis::BasisEvaluationException( + fe::raise(SVMP_HERE, "FE Basis value count " + std::to_string(values.size()) + - " does not match solver eNoN " + std::to_string(eNoN), - __FILE__, __LINE__, __func__); + " does not match solver eNoN " + std::to_string(eNoN)); } if (gradients.size() != static_cast(eNoN)) { - throw febasis::BasisEvaluationException( + fe::raise(SVMP_HERE, "FE Basis gradient count " + std::to_string(gradients.size()) + - " does not match solver eNoN " + std::to_string(eNoN), - __FILE__, __LINE__, __func__); + " does not match solver eNoN " + std::to_string(eNoN)); } for (int a = 0; a < eNoN; ++a) { const auto basis_index = basis_index_for_solver_node(eType, a); if (basis_index >= values.size() || basis_index >= gradients.size()) { - throw febasis::BasisNodeOrderingException( + fe::raise(SVMP_HERE, "Solver node " + std::to_string(a) + " maps to FE Basis node " + std::to_string(basis_index) + " outside basis output for " + - solver_element_name(eType), - __FILE__, __LINE__, __func__); + solver_element_name(eType)); } N(a, g) = values[basis_index]; @@ -271,10 +264,9 @@ void evaluate_basis_values_and_gradients(const int insd, { const auto& basis = basis_for_solver_element(eType); if (insd < basis.dimension()) { - throw febasis::BasisConfigurationException( + fe::raise(SVMP_HERE, "solver insd " + std::to_string(insd) + - " is smaller than FE Basis reference dimension " + std::to_string(basis.dimension()), - __FILE__, __LINE__, __func__); + " is smaller than FE Basis reference dimension " + std::to_string(basis.dimension())); } const auto point = make_basis_point(basis, g, xi); @@ -309,9 +301,8 @@ int required_nxx_components_for_dimension(const int dimension) case 3: return 6; default: - throw febasis::BasisConfigurationException( - "Unsupported FE Basis reference dimension " + std::to_string(dimension), - __FILE__, __LINE__, __func__); + fe::raise(SVMP_HERE, + "Unsupported FE Basis reference dimension " + std::to_string(dimension)); } } @@ -323,18 +314,16 @@ void copy_basis_hessians_to_solver_nxx(consts::ElementType eType, Array3& Nxx) { if (hessians.size() != static_cast(eNoN)) { - throw febasis::BasisEvaluationException( + fe::raise(SVMP_HERE, "FE Basis Hessian count " + std::to_string(hessians.size()) + - " does not match solver eNoN " + std::to_string(eNoN), - __FILE__, __LINE__, __func__); + " does not match solver eNoN " + std::to_string(eNoN)); } const int required_components = required_nxx_components_for_dimension(dimension); if (Nxx.nrows() < required_components) { - throw febasis::BasisConfigurationException( + fe::raise(SVMP_HERE, "solver Nxx has " + std::to_string(Nxx.nrows()) + - " rows but FE Basis Hessian packing requires " + std::to_string(required_components), - __FILE__, __LINE__, __func__); + " rows but FE Basis Hessian packing requires " + std::to_string(required_components)); } for (int a = 0; a < eNoN; ++a) { @@ -344,11 +333,10 @@ void copy_basis_hessians_to_solver_nxx(consts::ElementType eType, const auto basis_index = basis_index_for_solver_node(eType, a); if (basis_index >= hessians.size()) { - throw febasis::BasisNodeOrderingException( + fe::raise(SVMP_HERE, "Solver node " + std::to_string(a) + " maps to FE Basis Hessian node " + std::to_string(basis_index) + " outside basis output for " + - solver_element_name(eType), - __FILE__, __LINE__, __func__); + solver_element_name(eType)); } const auto& hessian = hessians[basis_index]; @@ -376,18 +364,16 @@ void evaluate_basis_hessians(const int insd, { const auto& basis = basis_for_solver_element(eType); if (insd < basis.dimension()) { - throw febasis::BasisConfigurationException( + fe::raise(SVMP_HERE, "solver insd " + std::to_string(insd) + - " is smaller than FE Basis reference dimension " + std::to_string(basis.dimension()), - __FILE__, __LINE__, __func__); + " is smaller than FE Basis reference dimension " + std::to_string(basis.dimension())); } const int required_components = required_nxx_components_for_dimension(basis.dimension()); if (ind2 < required_components) { - throw febasis::BasisConfigurationException( + fe::raise(SVMP_HERE, "solver ind2 " + std::to_string(ind2) + - " is smaller than packed Hessian component count " + std::to_string(required_components), - __FILE__, __LINE__, __func__); + " is smaller than packed Hessian component count " + std::to_string(required_components)); } const auto point = make_basis_point(basis, gaus_pt, xi); @@ -415,9 +401,9 @@ void get_gip(const int insd, consts::ElementType eType, const int nG, Vector(SVMP_HERE, "No support in 'get_element_gauss_int_data'", - solver_element_name(eType), __FILE__, __LINE__, __func__); + solver_element_name(eType)); } } @@ -430,9 +416,9 @@ void get_gip(mshType& mesh) try { set_element_gauss_int_data[mesh.eType](mesh); } catch (const std::bad_function_call& exception) { - throw fe::InvalidElementException( + fe::raise(SVMP_HERE, "No support in 'set_element_gauss_int_data'", - solver_element_name(mesh.eType), __FILE__, __LINE__, __func__); + solver_element_name(mesh.eType)); } } @@ -441,9 +427,9 @@ void get_gip(Simulation* simulation, faceType& face) try { set_face_gauss_int_data[face.eType](face); } catch (const std::bad_function_call& exception) { - throw fe::InvalidElementException( + fe::raise(SVMP_HERE, "No support in 'set_face_gauss_int_data'", - solver_element_name(face.eType), __FILE__, __LINE__, __func__); + solver_element_name(face.eType)); } } @@ -453,9 +439,8 @@ void get_gnn(const int insd, consts::ElementType eType, const int eNoN, const in Array& N, Array3& Nx) { if (!use_basis_adapter_for(eType)) { - throw febasis::BasisElementCompatibilityException( - "[get_gnn] FE Basis does not support solver element " + solver_element_name(eType), - __FILE__, __LINE__, __func__); + fe::raise(SVMP_HERE, + "[get_gnn] FE Basis does not support solver element " + solver_element_name(eType)); } evaluate_basis_values_and_gradients(insd, eType, eNoN, g, xi, N, Nx); @@ -488,11 +473,8 @@ void get_gnn(Simulation* simulation, int gaus_pt, faceType& face) { using consts::ElementType; - if (face.eType == ElementType::NRB) { - throw fe::NotImplementedException( - "[get_gnn(face)] NRB face shape functions are unsupported by FE Basis", - __FILE__, __LINE__, __func__); - } + fe::throw_if(face.eType == ElementType::NRB, SVMP_HERE, + "[get_gnn(face)] NRB face shape functions are unsupported by FE Basis"); if (face.eType == ElementType::PNT) { set_point_face_shape_data(gaus_pt, face); @@ -505,9 +487,8 @@ void get_gnn(Simulation* simulation, int gaus_pt, faceType& face) return; } - throw febasis::BasisElementCompatibilityException( - "[get_gnn(face)] FE Basis does not support face element " + solver_element_name(face.eType), - __FILE__, __LINE__, __func__); + fe::raise(SVMP_HERE, + "[get_gnn(face)] FE Basis does not support face element " + solver_element_name(face.eType)); } /// @brief Returns second order derivatives at given natural coords. @@ -523,10 +504,9 @@ void get_gn_nxx(const int insd, const int ind2, consts::ElementType eType, const } if (!use_basis_adapter_for(eType)) { - throw febasis::BasisElementCompatibilityException( + fe::raise(SVMP_HERE, "[get_gn_nxx] FE Basis Hessian evaluation does not support solver element " + - solver_element_name(eType), - __FILE__, __LINE__, __func__); + solver_element_name(eType)); } evaluate_basis_hessians(insd, ind2, eType, eNoN, gaus_pt, xi, Nxx); @@ -713,11 +693,8 @@ void get_nnx(const int nsd, const consts::ElementType eType, const int eNoN, con l1 = (l1 && l2 && l3 && l4); - if (!l1) { - throw fe::InvalidArgumentException( - "Error in computing shape functions", - __FILE__, __LINE__, __func__); - } + fe::throw_if(!l1, SVMP_HERE, + "Error in computing shape functions"); } /// @brief Inverse maps {xp} to {$\xi$} in an element with coordinates {xl} using Newton's method @@ -965,11 +942,10 @@ void gnnb(const ComMod& com_mod, const faceType& lFa, const int e, const int g, } if (!found_node) { - throw fe::InvalidArgumentException( + fe::raise(SVMP_HERE, "[svMultiPhysics::gnnb] ERROR: The '" + lFa.name + "' face node " + std::to_string(Ac) + " could not be matched to a node in the '" + - msh.name + "' volume mesh.", - __FILE__, __LINE__, __func__); + msh.name + "' volume mesh."); } ptr(a) = b; @@ -1018,9 +994,8 @@ void gnnb(const ComMod& com_mod, const faceType& lFa, const int e, const int g, } break; default: - throw fe::InvalidArgumentException( - "gnnb: invalid MechanicalConfigurationType provided", - __FILE__, __LINE__, __func__); + fe::raise(SVMP_HERE, + "gnnb: invalid MechanicalConfigurationType provided"); } } } @@ -1208,10 +1183,8 @@ void gn_nxx(const int l, const int eNoN, const int nsd, const int insd, Array(INFO != 0, SVMP_HERE, + "[gn_nxx] Error in Lapack", "LAPACK dgesv", INFO); Nxx = B; @@ -1280,10 +1253,8 @@ void gn_nxx(const int l, const int eNoN, const int nsd, const int insd, Array(INFO != 0, SVMP_HERE, + "[gn_nxx] Error in Lapack", "LAPACK dgesv", INFO); Nxx = B; } @@ -1330,10 +1301,10 @@ void select_ele(const ComMod& com_mod, mshType& mesh) set_1d_element_props[mesh.eNoN](insd, mesh); } } catch (const std::bad_function_call& exception) { - throw fe::InvalidElementException( + fe::raise(SVMP_HERE, "[select_ele] No support for " + std::to_string(mesh.eNoN) + " noded " + std::to_string(insd) + "D elements.", - solver_element_name(mesh.eType), __FILE__, __LINE__, __func__); + solver_element_name(mesh.eType)); } // Set mesh 'w' and 'xi' arrays used for Gauss integration. @@ -1389,10 +1360,10 @@ void select_eleb(Simulation* simulation, mshType& mesh, faceType& face) try { set_face_element_props[face.eNoN](insd, face); } catch (const std::bad_function_call& exception) { - throw fe::InvalidElementException( + fe::raise(SVMP_HERE, "No support for " + std::to_string(face.eNoN) + " noded " + std::to_string(insd) + "D elements in 'set_face_element_props'.", - solver_element_name(face.eType), __FILE__, __LINE__, __func__); + solver_element_name(face.eType)); } // Set face 'w' and 'xi' arrays used for Gauss integration. From 9d6266b0a268569fca104b82d38dcf1b0230e4f2 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Thu, 11 Jun 2026 09:41:23 -0700 Subject: [PATCH 19/22] improving doxygen documentation for the basis topic --- Code/Source/solver/FE/Basis/BasisFunction.h | 118 ++++++++++++++++++-- Code/Source/solver/FE/FE.h | 22 ++++ 2 files changed, 132 insertions(+), 8 deletions(-) create mode 100644 Code/Source/solver/FE/FE.h diff --git a/Code/Source/solver/FE/Basis/BasisFunction.h b/Code/Source/solver/FE/Basis/BasisFunction.h index 832926199..9b8e29aaa 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.h +++ b/Code/Source/solver/FE/Basis/BasisFunction.h @@ -12,16 +12,115 @@ #include #include -/// \defgroup FE FE Library -/// \brief Finite-element interfaces and utilities used by the solver. -/// -/// The FE library groups basis functions, math utilities, assembly interfaces, -/// and related support code that can be built and consumed as a coherent -/// finite-element component. - /// \defgroup FE_Basis Basis /// \ingroup FE /// \brief Basis-function interfaces, concrete basis families, and reference-node conventions. +/// +/// \details +/// ## Scope +/// +/// The Basis module owns reference-element shape functions. It provides the +/// number of basis functions and the values and derivatives, +/// \f$N_i\f$, \f$\partial N_i / \partial \xi_j\f$, and +/// \f$\partial^2 N_i / \partial \xi_j \partial \xi_k\f$ at reference +/// points. It does not own mesh storage, quadrature selection, field +/// formulation policy, or transformation of derivatives to physical +/// coordinates. Those decisions stay with the solver layer that has the mesh, +/// material model, and equation context. +/// +/// The main pieces are: +/// - BasisFunction (BasisFunction.h): the abstract query and evaluation +/// contract for code that does not need to know the concrete family. +/// - \ref FE_LagrangeBasis "LagrangeBasis" and +/// \ref FE_SerendipityBasis "SerendipityBasis": the implemented nodal +/// families, including analytical first and second derivatives in reference +/// coordinates. +/// - basis_factory (BasisFactory.h): runtime construction from a BasisRequest. +/// basis_factory::default_basis_request() centralizes the family/order that +/// matches each supported element's public node layout. +/// - ReferenceNodeLayout (NodeOrderingConventions.h): canonical reference-node +/// coordinates and the output ordering used by every basis evaluator. +/// - BasisTraits.h and BasisExceptions.h: topology classification, +/// compile-time helpers, and module-specific exception types. +/// +/// ## Object and evaluation contract +/// +/// A basis object is immutable after construction. It represents one reference +/// topology, basis family, and effective polynomial order, and can be shared +/// safely across evaluations. Construction may build node lattices or invert +/// interpolation matrices, so callers should construct through basis_factory +/// and cache one instance for each distinct basis request instead of rebuilding +/// inside element loops. +/// +/// Every evaluator takes a three-component reference coordinate. For +/// lower-dimensional elements, only the first dimension() components are +/// active. Returned gradients always have three components and Hessians are +/// always 3-by-3 matrices; inactive reference directions are expected to be +/// zero for conforming lower-dimensional bases. The std::vector overloads are +/// convenient for setup, tests, and adapter code. The *_to overloads write to +/// caller-owned flat buffers and are the allocation-free path for assembly. +/// +/// Outputs are in ReferenceNodeLayout basis order, not necessarily the mesh or +/// solver's native node order. A caller that stores elements in another local +/// ordering must apply the appropriate permutation at the boundary between the +/// basis module and that storage format. +/// +/// ## Inputs and ownership +/// +/// Constructing and evaluating a basis combines several independent choices: +/// +/// - **Element topology comes from the mesh.** The mesh cell type is translated +/// to ElementType, which defines the reference topology and public node +/// layout. This is structural information, not a complete discretization +/// policy. +/// - **Geometry interpolation follows the mesh nodes.** The basis used for the +/// reference-to-physical map must be compatible with the element's node +/// count and ordering. For that case, callers normally use +/// basis_factory::create_default_for(element_type), which selects the +/// Lagrange or serendipity space associated with that element layout. A +/// Tetra10 mesh therefore implies a quadratic geometry map; a Hex20 mesh +/// implies the supported Hex20 serendipity geometry basis. +/// - **Field approximation is chosen by the formulation.** Field bases do not +/// have to match the geometry map. Mixed formulations, stabilized methods, +/// enrichment, and convergence studies may use different families or orders +/// for different fields on the same mesh topology. Those bases should be +/// requested explicitly with basis_factory::create() and a BasisRequest +/// naming the desired family and order. +/// - **Evaluation points come from the caller.** Quadrature rules, probe +/// points, interpolation targets, and error-sampling locations are outside +/// this module. The basis only evaluates at the reference coordinates it is +/// given. +/// +/// \dot "Basis inputs and responsibilities" +/// digraph fe_basis_information_flow { +/// rankdir=LR; +/// node [shape=box, fontname=Helvetica, fontsize=10]; +/// mesh [label="Mesh element type"]; +/// request [label="BasisRequest\nfamily + order"]; +/// topology [label="Reference topology\nand node layout"]; +/// basis [label="Basis object", style=filled, fillcolor=lightgray]; +/// points [label="Reference points"]; +/// outputs [label="Reference values\nand derivatives"]; +/// mesh -> topology; +/// request -> basis; +/// topology -> basis; +/// basis -> outputs; +/// points -> outputs; +/// } +/// \enddot +/// +/// ## Reference scope and the solver adapter +/// +/// The solver-facing adapter in nn.cpp is the boundary between this reference +/// basis contract and legacy solver storage. It translates solver element +/// enums to ElementType, obtains cached default bases for mesh/face shape +/// tables, permutes from ReferenceNodeLayout order into solver node order, and +/// stores N, Nx, and, where needed, packed Nxx at Gauss points. At that stage +/// Nx and Nxx are still derivatives with respect to reference coordinates. +/// Physical-coordinate derivatives are formed later, for a particular +/// configuration and element geometry, by composing the cached reference data +/// with the mapping Jacobian (nn::gnn for first derivatives and nn::gn_nxx for +/// second derivatives). namespace svmp { namespace FE { @@ -105,7 +204,10 @@ inline void add_scaled_hessian(Hessian& target, /// BasisFunction defines the common query and evaluation API used by solver /// code that does not need to know the concrete basis implementation. Derived /// classes provide values at minimum and can override analytical gradients, -/// Hessians, combined evaluation, and flat-buffer output paths. +/// Hessians, combined evaluation, and flat-buffer output paths. The interface +/// is deliberately limited to reference-space quantities; callers own node +/// ordering translation, physical mapping, and any field-level discretization +/// policy. class BasisFunction { public: /// \brief Destroy a basis function through the abstract interface. diff --git a/Code/Source/solver/FE/FE.h b/Code/Source/solver/FE/FE.h new file mode 100644 index 000000000..1d3bba72b --- /dev/null +++ b/Code/Source/solver/FE/FE.h @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright (c) Stanford University, The Regents of the University of California, and others. +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef SVMP_FE_FE_H +#define SVMP_FE_FE_H + +/// \file FE.h +/// \brief Library-level Doxygen group for the finite-element support code. +/// +/// This header intentionally contains no declarations. It gives Doxygen a +/// header-based home for the top-level FE group; submodule groups attach to it +/// from their own headers, including FE_Basis (Basis/BasisFunction.h), +/// FE_Common (Common/Types.h), and FE_Math (Math/Vector.h). + +/// \defgroup FE FE Library +/// \brief Finite-element interfaces and utilities used by the solver. +/// +/// The FE library groups basis functions, math utilities, assembly interfaces, +/// and related support code that can be built and consumed as a coherent +/// finite-element component. + +#endif // SVMP_FE_FE_H From bd7c2ad86d687606319d768d9abd4ab85c997d63 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 15 Jun 2026 10:10:14 -0700 Subject: [PATCH 20/22] removing chrono guard from Eigen --- .../eigen/include/eigen3/unsupported/Eigen/CXX11/Tensor | 2 -- 1 file changed, 2 deletions(-) diff --git a/Code/ThirdParty/eigen/include/eigen3/unsupported/Eigen/CXX11/Tensor b/Code/ThirdParty/eigen/include/eigen3/unsupported/Eigen/CXX11/Tensor index 45b176fe7..0938bb554 100644 --- a/Code/ThirdParty/eigen/include/eigen3/unsupported/Eigen/CXX11/Tensor +++ b/Code/ThirdParty/eigen/include/eigen3/unsupported/Eigen/CXX11/Tensor @@ -34,9 +34,7 @@ */ #include -#ifdef EIGEN_USE_GPU #include -#endif #include #include #include From 282626996ec2332a78c3789207e8ba7fccb67d6d Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 15 Jun 2026 11:06:34 -0700 Subject: [PATCH 21/22] reverting chrono replacement code changes --- Code/Source/solver/Timer.h | 8 +++----- Code/Source/solver/load_msh.cpp | 1 + Code/Source/solver/utils.cpp | 8 +++----- tests/unitTests/test_common.h | 1 + 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Code/Source/solver/Timer.h b/Code/Source/solver/Timer.h index b8ffa29df..1a55d7516 100644 --- a/Code/Source/solver/Timer.h +++ b/Code/Source/solver/Timer.h @@ -4,7 +4,7 @@ #ifndef TIMER_H #define TIMER_H -#include +#include /// @brief Keep track of time class Timer @@ -18,10 +18,8 @@ class Timer double get_time() const { - timeval now{}; - gettimeofday(&now, nullptr); - return static_cast(now.tv_sec) + - static_cast(now.tv_usec) * 1.0e-6; + const auto now = std::chrono::steady_clock::now(); + return std::chrono::duration(now.time_since_epoch()).count(); } void set_time() diff --git a/Code/Source/solver/load_msh.cpp b/Code/Source/solver/load_msh.cpp index 50d0ca858..05648b52d 100644 --- a/Code/Source/solver/load_msh.cpp +++ b/Code/Source/solver/load_msh.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/Code/Source/solver/utils.cpp b/Code/Source/solver/utils.cpp index 0fb062e8f..fb7874f95 100644 --- a/Code/Source/solver/utils.cpp +++ b/Code/Source/solver/utils.cpp @@ -4,6 +4,7 @@ #include "utils.h" #include +#include #include #include @@ -12,7 +13,6 @@ #include #include #include -#include #include "FE/Common/FEException.h" @@ -37,10 +37,8 @@ int CountBits(int n) double cput() { - timeval now{}; - gettimeofday(&now, nullptr); - return static_cast(now.tv_sec) + - static_cast(now.tv_usec) * 1.0e-6; + const auto now = std::chrono::system_clock::now(); + return std::chrono::duration(now.time_since_epoch()).count(); } Vector diff --git a/tests/unitTests/test_common.h b/tests/unitTests/test_common.h index 7227b2beb..ce6ffed4b 100644 --- a/tests/unitTests/test_common.h +++ b/tests/unitTests/test_common.h @@ -33,6 +33,7 @@ #include #include #include +#include #include "CepMod.h" #include "ComMod.h" #include "gtest/gtest.h" From f734094a5d5a9f175904866f2c49e05fcde01f48 Mon Sep 17 00:00:00 2001 From: Zachary Sexton Date: Mon, 15 Jun 2026 15:18:46 -0700 Subject: [PATCH 22/22] swapping out raw pointers for span support in the non-owning buffer access --- Code/Source/solver/FE/Basis/BasisFunction.cpp | 27 ++- Code/Source/solver/FE/Basis/BasisFunction.h | 63 ++---- Code/Source/solver/FE/Basis/LagrangeBasis.cpp | 174 +++++++++------- Code/Source/solver/FE/Basis/LagrangeBasis.h | 67 +++--- .../solver/FE/Basis/SerendipityBasis.cpp | 194 +++++++++--------- .../Source/solver/FE/Basis/SerendipityBasis.h | 27 +-- Code/Source/solver/FE/Common/Types.h | 11 - .../solver/FE/Math/DenseTransformKernels.h | 27 ++- .../FE/Basis/test_BasisErrorPaths.cpp | 20 +- .../unitTests/FE/Basis/test_LagrangeBasis.cpp | 56 ++--- 10 files changed, 332 insertions(+), 334 deletions(-) diff --git a/Code/Source/solver/FE/Basis/BasisFunction.cpp b/Code/Source/solver/FE/Basis/BasisFunction.cpp index d847a9cca..1c8c31e5d 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.cpp +++ b/Code/Source/solver/FE/Basis/BasisFunction.cpp @@ -4,6 +4,7 @@ #include "BasisFunction.h" #include +#include namespace svmp { namespace FE { @@ -22,6 +23,13 @@ BasisFunctionScratch& scratch() { return data; } +void require_span_size(std::size_t actual, + std::size_t expected, + const char* label) { + FE::throw_if(actual < expected, SVMP_HERE, + std::string("BasisFunction::") + label + ": output span is smaller than basis size"); +} + } // namespace void BasisFunction::evaluate_gradients(const math::Vector& xi, @@ -50,31 +58,30 @@ void BasisFunction::evaluate_all(const math::Vector& xi, } void BasisFunction::evaluate_values_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out) const { + std::span values_out) const { + require_span_size(values_out.size(), size(), "evaluate_values_to"); auto& tmp = scratch().values; tmp.resize(size()); evaluate_values(xi, tmp); - std::copy_n(tmp.data(), tmp.size(), values_out); + std::copy_n(tmp.begin(), tmp.size(), values_out.begin()); } void BasisFunction::evaluate_gradients_to(const math::Vector& xi, - Real* SVMP_RESTRICT gradients_out) const { + std::span gradients_out) const { + require_span_size(gradients_out.size(), size(), "evaluate_gradients_to"); auto& tmp = scratch().gradients; tmp.resize(size()); evaluate_gradients(xi, tmp); - for (std::size_t i = 0; i < tmp.size(); ++i) { - store_gradient(tmp[i], gradients_out + i * 3u); - } + std::copy_n(tmp.begin(), tmp.size(), gradients_out.begin()); } void BasisFunction::evaluate_hessians_to(const math::Vector& xi, - Real* SVMP_RESTRICT hessians_out) const { + std::span hessians_out) const { + require_span_size(hessians_out.size(), size(), "evaluate_hessians_to"); auto& tmp = scratch().hessians; tmp.resize(size()); evaluate_hessians(xi, tmp); - for (std::size_t i = 0; i < tmp.size(); ++i) { - store_hessian(tmp[i], hessians_out + i * 9u); - } + std::copy_n(tmp.begin(), tmp.size(), hessians_out.begin()); } void BasisFunction::numerical_gradient(const math::Vector& xi, diff --git a/Code/Source/solver/FE/Basis/BasisFunction.h b/Code/Source/solver/FE/Basis/BasisFunction.h index 9b8e29aaa..8327ffda9 100644 --- a/Code/Source/solver/FE/Basis/BasisFunction.h +++ b/Code/Source/solver/FE/Basis/BasisFunction.h @@ -10,6 +10,7 @@ #include "Types.h" #include +#include #include /// \defgroup FE_Basis Basis @@ -58,7 +59,7 @@ /// always 3-by-3 matrices; inactive reference directions are expected to be /// zero for conforming lower-dimensional bases. The std::vector overloads are /// convenient for setup, tests, and adapter code. The *_to overloads write to -/// caller-owned flat buffers and are the allocation-free path for assembly. +/// caller-owned spans and are the allocation-free path for assembly. /// /// Outputs are in ReferenceNodeLayout basis order, not necessarily the mesh or /// solver's native node order. A caller that stores elements in another local @@ -148,46 +149,6 @@ using Hessian = math::Matrix; return hessian; } -inline void store_gradient(const Gradient& gradient, Real* dst) noexcept { - dst[0] = gradient[0]; - dst[1] = gradient[1]; - dst[2] = gradient[2]; -} - -[[nodiscard]] inline Gradient load_gradient(const Real* src) noexcept { - Gradient gradient; - gradient[0] = src[0]; - gradient[1] = src[1]; - gradient[2] = src[2]; - return gradient; -} - -inline void store_hessian(const Hessian& hessian, Real* dst) noexcept { - dst[0] = hessian(0, 0); - dst[1] = hessian(0, 1); - dst[2] = hessian(0, 2); - dst[3] = hessian(1, 0); - dst[4] = hessian(1, 1); - dst[5] = hessian(1, 2); - dst[6] = hessian(2, 0); - dst[7] = hessian(2, 1); - dst[8] = hessian(2, 2); -} - -[[nodiscard]] inline Hessian load_hessian(const Real* src) noexcept { - Hessian hessian = Hessian::Zero(); - hessian(0, 0) = src[0]; - hessian(0, 1) = src[1]; - hessian(0, 2) = src[2]; - hessian(1, 0) = src[3]; - hessian(1, 1) = src[4]; - hessian(1, 2) = src[5]; - hessian(2, 0) = src[6]; - hessian(2, 1) = src[7]; - hessian(2, 2) = src[8]; - return hessian; -} - inline void add_scaled_hessian(Hessian& target, const Hessian& source, Real scale) noexcept { @@ -204,7 +165,7 @@ inline void add_scaled_hessian(Hessian& target, /// BasisFunction defines the common query and evaluation API used by solver /// code that does not need to know the concrete basis implementation. Derived /// classes provide values at minimum and can override analytical gradients, -/// Hessians, combined evaluation, and flat-buffer output paths. The interface +/// Hessians, combined evaluation, and span output paths. The interface /// is deliberately limited to reference-space quantities; callers own node /// ordering translation, physical mapping, and any field-level discretization /// policy. @@ -263,23 +224,23 @@ class BasisFunction { std::vector& gradients, std::vector& hessians) const; - /// \brief Evaluate basis values into a flat caller-provided buffer. + /// \brief Evaluate basis values into caller-provided storage. /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. - /// \param values_out Output buffer with at least size() entries. + /// \param values_out Output span with at least size() entries. virtual void evaluate_values_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out) const; + std::span values_out) const; - /// \brief Evaluate basis gradients into a flat caller-provided buffer. + /// \brief Evaluate basis gradients into caller-provided storage. /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. - /// \param gradients_out Output buffer with node-major layout: node * 3 + component. + /// \param gradients_out Output span with at least size() entries. virtual void evaluate_gradients_to(const math::Vector& xi, - Real* SVMP_RESTRICT gradients_out) const; + std::span gradients_out) const; - /// \brief Evaluate basis Hessians into a flat caller-provided buffer. + /// \brief Evaluate basis Hessians into caller-provided storage. /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. - /// \param hessians_out Output buffer with node-major row-major layout: node * 9 + row * 3 + col. + /// \param hessians_out Output span with at least size() entries. virtual void evaluate_hessians_to(const math::Vector& xi, - Real* SVMP_RESTRICT hessians_out) const; + std::span hessians_out) const; protected: /// \brief Approximate gradients by centered finite differences of values. diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp index 4ec970b86..ab5e73ac7 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.cpp +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace svmp { namespace FE { @@ -284,6 +286,22 @@ void evaluate_simplex(const Vec3& xi, } } +void require_output_span_size(std::size_t actual, + std::size_t expected, + const char* label) { + FE::throw_if(actual < expected, SVMP_HERE, + std::string(label) + ": output span is smaller than basis size"); +} + +template +void require_requested_span_size(std::span output, + std::size_t expected, + const char* label) { + if (!output.empty()) { + require_output_span_size(output.size(), expected, label); + } +} + } // namespace LagrangeBasis::LagrangeBasis(ElementType type, int order) @@ -398,25 +416,25 @@ void LagrangeBasis::build_wedge_nodes() { } // Evaluate the constant point basis. -void LagrangeBasis::evaluate_point_to(Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { - if (values_out) { +void LagrangeBasis::evaluate_point_to(std::span values_out, + std::span gradients_out, + std::span hessians_out) const { + if (!values_out.empty()) { values_out[0] = Real(1); } - if (gradients_out) { - gradients_out[0] = gradients_out[1] = gradients_out[2] = Real(0); + if (!gradients_out.empty()) { + gradients_out[0] = Gradient::Zero(); } - if (hessians_out) { - std::fill_n(hessians_out, 9u, Real(0)); + if (!hessians_out.empty()) { + hessians_out[0] = Hessian::Zero(); } } // Evaluate line, quadrilateral, and hexahedron bases as axis-polynomial products. void LagrangeBasis::evaluate_tensor_product_to(const Vec3& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { + std::span values_out, + std::span gradients_out, + std::span hessians_out) const { AxisEval ax; AxisEval ay; AxisEval az; @@ -440,55 +458,55 @@ void LagrangeBasis::evaluate_tensor_product_to(const Vec3& xi, const Real dz = dimension_ >= 3 ? az.first[idx[2]] : Real(0); const Real d2z = dimension_ >= 3 ? az.second[idx[2]] : Real(0); - if (values_out) { + if (!values_out.empty()) { values_out[node] = vx * vy * vz; } - if (gradients_out) { - Real* g = gradients_out + node * 3u; + if (!gradients_out.empty()) { + Gradient& g = gradients_out[node]; g[0] = dx * vy * vz; g[1] = vx * dy * vz; g[2] = vx * vy * dz; } - if (hessians_out) { - Real* h = hessians_out + node * 9u; - h[0] = d2x * vy * vz; - h[1] = dx * dy * vz; - h[2] = dx * vy * dz; - h[3] = h[1]; - h[4] = vx * d2y * vz; - h[5] = vx * dy * dz; - h[6] = h[2]; - h[7] = h[5]; - h[8] = vx * vy * d2z; + if (!hessians_out.empty()) { + Hessian& h = hessians_out[node]; + h(0, 0) = d2x * vy * vz; + h(0, 1) = dx * dy * vz; + h(0, 2) = dx * vy * dz; + h(1, 0) = h(0, 1); + h(1, 1) = vx * d2y * vz; + h(1, 2) = vx * dy * dz; + h(2, 0) = h(0, 2); + h(2, 1) = h(1, 2); + h(2, 2) = vx * vy * d2z; } } } // Evaluate triangle and tetrahedron bases from barycentric factors. void LagrangeBasis::evaluate_simplex_to(const Vec3& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { + std::span values_out, + std::span gradients_out, + std::span hessians_out) const { SimplexEval simplex; evaluate_simplex(xi, topology_, order_, simplex_exponents_, simplex); for (std::size_t i = 0; i < simplex.value.size(); ++i) { - if (values_out) { + if (!values_out.empty()) { values_out[i] = simplex.value[i]; } - if (gradients_out) { - store_gradient(simplex.gradient[i], gradients_out + i * 3u); + if (!gradients_out.empty()) { + gradients_out[i] = simplex.gradient[i]; } - if (hessians_out) { - store_hessian(simplex.hessian[i], hessians_out + i * 9u); + if (!hessians_out.empty()) { + hessians_out[i] = simplex.hessian[i]; } } } // Evaluate wedge bases as triangle/through-axis products. void LagrangeBasis::evaluate_wedge_to(const Vec3& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { + std::span values_out, + std::span gradients_out, + std::span hessians_out) const { SimplexEval tri; AxisEval z_axis; evaluate_simplex(xi, BasisTopology::Triangle, order_, simplex_exponents_, tri); @@ -501,37 +519,45 @@ void LagrangeBasis::evaluate_wedge_to(const Vec3& xi, const Real dz = z_axis.first[z_idx]; const Real d2z = z_axis.second[z_idx]; - if (values_out) { + if (!values_out.empty()) { values_out[node] = tv * zv; } - if (gradients_out) { - Real* g = gradients_out + node * 3u; + if (!gradients_out.empty()) { + Gradient& g = gradients_out[node]; g[0] = tri.gradient[tri_idx][0] * zv; g[1] = tri.gradient[tri_idx][1] * zv; g[2] = tv * dz; } - if (hessians_out) { - Real* h = hessians_out + node * 9u; + if (!hessians_out.empty()) { + Hessian& h = hessians_out[node]; const Hessian& th = tri.hessian[tri_idx]; const Gradient& tg = tri.gradient[tri_idx]; - h[0] = th(0, 0) * zv; - h[1] = th(0, 1) * zv; - h[2] = tg[0] * dz; - h[3] = h[1]; - h[4] = th(1, 1) * zv; - h[5] = tg[1] * dz; - h[6] = h[2]; - h[7] = h[5]; - h[8] = tv * d2z; + h(0, 0) = th(0, 0) * zv; + h(0, 1) = th(0, 1) * zv; + h(0, 2) = tg[0] * dz; + h(1, 0) = h(0, 1); + h(1, 1) = th(1, 1) * zv; + h(1, 2) = tg[1] * dz; + h(2, 0) = h(0, 2); + h(2, 1) = h(1, 2); + h(2, 2) = tv * d2z; } } } -// Evaluate requested basis quantities into caller-provided flat buffers. +// Evaluate requested basis quantities into caller-provided spans. void LagrangeBasis::evaluate_all_to(const Vec3& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { + std::span values_out, + std::span gradients_out, + std::span hessians_out) const { + require_requested_span_size(values_out, size(), "LagrangeBasis::evaluate_all_to values"); + require_requested_span_size(gradients_out, size(), "LagrangeBasis::evaluate_all_to gradients"); + require_requested_span_size(hessians_out, size(), "LagrangeBasis::evaluate_all_to hessians"); + + if (values_out.empty() && gradients_out.empty() && hessians_out.empty()) { + return; + } + switch (topology_) { case BasisTopology::Point: evaluate_point_to(values_out, gradients_out, hessians_out); @@ -559,27 +585,19 @@ void LagrangeBasis::evaluate_all_to(const Vec3& xi, void LagrangeBasis::evaluate_values(const Vec3& xi, std::vector& values) const { values.resize(size()); - evaluate_values_to(xi, values.data()); + evaluate_values_to(xi, std::span(values.data(), values.size())); } void LagrangeBasis::evaluate_gradients(const Vec3& xi, std::vector& gradients) const { gradients.resize(size()); - std::vector flat(size() * 3u, Real(0)); - evaluate_gradients_to(xi, flat.data()); - for (std::size_t i = 0; i < size(); ++i) { - gradients[i] = load_gradient(flat.data() + i * 3u); - } + evaluate_gradients_to(xi, std::span(gradients.data(), gradients.size())); } void LagrangeBasis::evaluate_hessians(const Vec3& xi, std::vector& hessians) const { hessians.resize(size()); - std::vector flat(size() * 9u, Real(0)); - evaluate_hessians_to(xi, flat.data()); - for (std::size_t i = 0; i < size(); ++i) { - hessians[i] = load_hessian(flat.data() + i * 9u); - } + evaluate_hessians_to(xi, std::span(hessians.data(), hessians.size())); } void LagrangeBasis::evaluate_all(const Vec3& xi, @@ -589,28 +607,28 @@ void LagrangeBasis::evaluate_all(const Vec3& xi, values.resize(size()); gradients.resize(size()); hessians.resize(size()); - std::vector flat_g(size() * 3u, Real(0)); - std::vector flat_h(size() * 9u, Real(0)); - evaluate_all_to(xi, values.data(), flat_g.data(), flat_h.data()); - for (std::size_t i = 0; i < size(); ++i) { - gradients[i] = load_gradient(flat_g.data() + i * 3u); - hessians[i] = load_hessian(flat_h.data() + i * 9u); - } + evaluate_all_to(xi, + std::span(values.data(), values.size()), + std::span(gradients.data(), gradients.size()), + std::span(hessians.data(), hessians.size())); } void LagrangeBasis::evaluate_values_to(const Vec3& xi, - Real* SVMP_RESTRICT values_out) const { - evaluate_all_to(xi, values_out, nullptr, nullptr); + std::span values_out) const { + require_output_span_size(values_out.size(), size(), "LagrangeBasis::evaluate_values_to"); + evaluate_all_to(xi, values_out, std::span{}, std::span{}); } void LagrangeBasis::evaluate_gradients_to(const Vec3& xi, - Real* SVMP_RESTRICT gradients_out) const { - evaluate_all_to(xi, nullptr, gradients_out, nullptr); + std::span gradients_out) const { + require_output_span_size(gradients_out.size(), size(), "LagrangeBasis::evaluate_gradients_to"); + evaluate_all_to(xi, std::span{}, gradients_out, std::span{}); } void LagrangeBasis::evaluate_hessians_to(const Vec3& xi, - Real* SVMP_RESTRICT hessians_out) const { - evaluate_all_to(xi, nullptr, nullptr, hessians_out); + std::span hessians_out) const { + require_output_span_size(hessians_out.size(), size(), "LagrangeBasis::evaluate_hessians_to"); + evaluate_all_to(xi, std::span{}, std::span{}, hessians_out); } } // namespace basis diff --git a/Code/Source/solver/FE/Basis/LagrangeBasis.h b/Code/Source/solver/FE/Basis/LagrangeBasis.h index cd0ca6058..6137a557a 100644 --- a/Code/Source/solver/FE/Basis/LagrangeBasis.h +++ b/Code/Source/solver/FE/Basis/LagrangeBasis.h @@ -9,6 +9,7 @@ #include #include +#include namespace svmp { namespace FE { @@ -58,8 +59,8 @@ namespace basis { /// \f$N_{a k}(r,s,t) = T_a(r,s)l_k(t)\f$. /// /// The vector-returning evaluators are convenient API wrappers. The `*_to` -/// methods write to caller-provided flat buffers and are intended for assembly -/// paths that avoid temporary allocations. +/// methods write to caller-provided spans and are intended for assembly paths +/// that avoid temporary allocations. class LagrangeBasis : public BasisFunction { public: /// \brief Axis-index tuple for tensor-product reference nodes. @@ -171,38 +172,36 @@ class LagrangeBasis : public BasisFunction { std::vector& gradients, std::vector& hessians) const final; - /// \brief Evaluate Lagrange basis values into a flat caller-provided buffer. + /// \brief Evaluate Lagrange basis values into caller-provided storage. /// /// \details This is the low-allocation API intended for element assembly - /// loops. The buffer is filled in basis-node order and no vector resizing - /// is performed. + /// loops. The span is filled in basis-node order and no vector resizing is + /// performed. /// /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. - /// \param values_out Output buffer with at least size() entries. + /// \param values_out Output span with at least size() entries. void evaluate_values_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out) const final; + std::span values_out) const final; - /// \brief Evaluate Lagrange basis gradients into a flat caller-provided buffer. + /// \brief Evaluate Lagrange basis gradients into caller-provided storage. /// - /// \details Gradients are written in node-major order with three - /// reference-coordinate components per node. For node \f$i\f$ and component - /// \f$c\f$, the entry is `gradients_out[i * 3 + c]`. + /// \details Gradients are written in basis-node order with one + /// three-component gradient per node. /// /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. - /// \param gradients_out Output buffer with node-major layout: node * 3 + component. + /// \param gradients_out Output span with at least size() entries. void evaluate_gradients_to(const math::Vector& xi, - Real* SVMP_RESTRICT gradients_out) const final; + std::span gradients_out) const final; - /// \brief Evaluate Lagrange basis Hessians into a flat caller-provided buffer. + /// \brief Evaluate Lagrange basis Hessians into caller-provided storage. /// - /// \details Hessians are written in node-major row-major order. For node - /// \f$i\f$ and Hessian component \f$(r,c)\f$, the entry is - /// `hessians_out[i * 9 + r * 3 + c]`. + /// \details Hessians are written in basis-node order with one 3-by-3 + /// Hessian per node. /// /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. - /// \param hessians_out Output buffer with node-major row-major layout: node * 9 + row * 3 + col. + /// \param hessians_out Output span with at least size() entries. void evaluate_hessians_to(const math::Vector& xi, - Real* SVMP_RESTRICT hessians_out) const final; + std::span hessians_out) const final; private: ElementType element_type_; @@ -224,24 +223,24 @@ class LagrangeBasis : public BasisFunction { void init_equispaced_1d_nodes(); void evaluate_all_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const; - void evaluate_point_to(Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const; + std::span values_out, + std::span gradients_out, + std::span hessians_out) const; + void evaluate_point_to(std::span values_out, + std::span gradients_out, + std::span hessians_out) const; void evaluate_tensor_product_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const; + std::span values_out, + std::span gradients_out, + std::span hessians_out) const; void evaluate_simplex_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const; + std::span values_out, + std::span gradients_out, + std::span hessians_out) const; void evaluate_wedge_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const; + std::span values_out, + std::span gradients_out, + std::span hessians_out) const; }; /// @} diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp index fd5f99cbc..ae505c2cf 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.cpp +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.cpp @@ -21,9 +21,9 @@ using Vec3 = math::Vector; void evaluate_hex8_reference(Real r, Real s, Real t, - Real* values, - Real* gradients, - Real* hessians) { + std::span values, + std::span gradients, + std::span hessians) { static constexpr int signs[8][3] = { {-1, -1, -1}, { 1, -1, -1}, @@ -43,26 +43,26 @@ void evaluate_hex8_reference(Real r, const Real bs = Real(1) + b * s; const Real ct = Real(1) + c * t; - if (values) { + if (!values.empty()) { values[i] = Real(0.125) * ar * bs * ct; } - if (gradients) { - Real* g = gradients + i * 3u; + if (!gradients.empty()) { + Gradient& g = gradients[i]; g[0] = Real(0.125) * a * bs * ct; g[1] = Real(0.125) * b * ar * ct; g[2] = Real(0.125) * c * ar * bs; } - if (hessians) { - Real* h = hessians + i * 9u; - h[0] = Real(0); - h[1] = Real(0.125) * a * b * ct; - h[2] = Real(0.125) * a * c * bs; - h[3] = h[1]; - h[4] = Real(0); - h[5] = Real(0.125) * b * c * ar; - h[6] = h[2]; - h[7] = h[5]; - h[8] = Real(0); + if (!hessians.empty()) { + Hessian& h = hessians[i]; + h(0, 0) = Real(0); + h(0, 1) = Real(0.125) * a * b * ct; + h(0, 2) = Real(0.125) * a * c * bs; + h(1, 0) = h(0, 1); + h(1, 1) = Real(0); + h(1, 2) = Real(0.125) * b * c * ar; + h(2, 0) = h(0, 2); + h(2, 1) = h(1, 2); + h(2, 2) = Real(0); } } } @@ -262,7 +262,7 @@ inline std::array quadratic_powers(Real x) { return {Real(1), x, x * x}; } -void eval_hex20_internal(Real r, Real s, Real t, Real* internal_vals) { +void eval_hex20_internal(Real r, Real s, Real t, std::span internal_vals) { const auto rp = quadratic_powers(r); const auto sp = quadratic_powers(s); const auto tp = quadratic_powers(t); @@ -284,7 +284,7 @@ void eval_hex20_internal(Real r, Real s, Real t, Real* internal_vals) { } } -void eval_hex20_grad_internal(Real r, Real s, Real t, Gradient* internal_grads) { +void eval_hex20_grad_internal(Real r, Real s, Real t, std::span internal_grads) { const auto rp = quadratic_powers(r); const auto sp = quadratic_powers(s); const auto tp = quadratic_powers(t); @@ -321,7 +321,7 @@ void eval_hex20_grad_internal(Real r, Real s, Real t, Gradient* internal_grads) } } -void eval_hex20_hess_internal(Real r, Real s, Real t, Hessian* internal_hessians) { +void eval_hex20_hess_internal(Real r, Real s, Real t, std::span internal_hessians) { const auto rp = quadratic_powers(r); const auto sp = quadratic_powers(s); const auto tp = quadratic_powers(t); @@ -384,9 +384,9 @@ void eval_hex20_hess_internal(Real r, Real s, Real t, Hessian* internal_hessians void eval_wedge15_polynomial(Real r, Real s, Real t, - Real* values, - Gradient* gradients, - Hessian* hessians) { + std::span values, + std::span gradients, + std::span hessians) { Real phi[15]{}; Real dr[15]{}; Real ds[15]{}; @@ -415,15 +415,15 @@ void eval_wedge15_polynomial(Real r, const Real sb = sp[bs]; const Real tc = tp[ct]; - if (values) { + if (!values.empty()) { phi[j] = ra * sb * tc; } - if (gradients) { + if (!gradients.empty()) { dr[j] = (a > 0) ? Real(a) * rp[ar - 1u] * sb * tc : Real(0); ds[j] = (b > 0) ? ra * Real(b) * sp[bs - 1u] * tc : Real(0); dt[j] = (c > 0) ? ra * sb * Real(c) * tp[ct - 1u] : Real(0); } - if (hessians) { + if (!hessians.empty()) { drr[j] = (a > 1) ? Real(a * (a - 1)) * rp[ar - 2u] * sb * tc : Real(0); dss[j] = (b > 1) ? ra * Real(b * (b - 1)) * sp[bs - 2u] * tc : Real(0); dtt[j] = (c > 1) ? ra * sb * Real(c * (c - 1)) * tp[ct - 2u] : Real(0); @@ -442,15 +442,15 @@ void eval_wedge15_polynomial(Real r, for (int j = 0; j < 15; ++j) { const Real coefficient = kWedge15Coefficients[static_cast(j)][static_cast(i)]; - if (values) { + if (!values.empty()) { value += coefficient * phi[j]; } - if (gradients) { + if (!gradients.empty()) { gr += coefficient * dr[j]; gs += coefficient * ds[j]; gt += coefficient * dt[j]; } - if (hessians) { + if (!hessians.empty()) { H(0, 0) += coefficient * drr[j]; H(1, 1) += coefficient * dss[j]; H(2, 2) += coefficient * dtt[j]; @@ -461,15 +461,15 @@ void eval_wedge15_polynomial(Real r, } const std::size_t index = static_cast(i); - if (values) { + if (!values.empty()) { values[index] = value; } - if (gradients) { + if (!gradients.empty()) { gradients[index][0] = gr; gradients[index][1] = gs; gradients[index][2] = gt; } - if (hessians) { + if (!hessians.empty()) { H(1, 0) = H(0, 1); H(2, 0) = H(0, 2); H(2, 1) = H(1, 2); @@ -478,6 +478,22 @@ void eval_wedge15_polynomial(Real r, } } +void require_output_span_size(std::size_t actual, + std::size_t expected, + const char* label) { + FE::throw_if(actual < expected, SVMP_HERE, + std::string(label) + ": output span is smaller than basis size"); +} + +template +void require_requested_span_size(std::span output, + std::size_t expected, + const char* label) { + if (!output.empty()) { + require_output_span_size(output.size(), expected, label); + } +} + } // namespace SerendipityBasis::SerendipityBasis(ElementType type, int order, bool geometry_mode) @@ -533,21 +549,25 @@ SerendipityBasis::SerendipityBasis(ElementType type, int order, bool geometry_mo } void SerendipityBasis::evaluate_all_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const { - if (!values_out && !gradients_out && !hessians_out) { + std::span values_out, + std::span gradients_out, + std::span hessians_out) const { + require_requested_span_size(values_out, size_, "SerendipityBasis::evaluate_all_to values"); + require_requested_span_size(gradients_out, size_, "SerendipityBasis::evaluate_all_to gradients"); + require_requested_span_size(hessians_out, size_, "SerendipityBasis::evaluate_all_to hessians"); + + if (values_out.empty() && gradients_out.empty() && hessians_out.empty()) { return; } - if (values_out) { - std::fill_n(values_out, size_, Real(0)); + if (!values_out.empty()) { + std::fill(values_out.begin(), values_out.end(), Real(0)); } - if (gradients_out) { - std::fill_n(gradients_out, size_ * 3u, Real(0)); + if (!gradients_out.empty()) { + std::fill(gradients_out.begin(), gradients_out.end(), Gradient::Zero()); } - if (hessians_out) { - std::fill_n(hessians_out, size_ * 9u, Real(0)); + if (!hessians_out.empty()) { + std::fill(hessians_out.begin(), hessians_out.end(), Hessian::Zero()); } const Real x = xi[0]; @@ -581,20 +601,20 @@ void SerendipityBasis::evaluate_all_to(const math::Vector& xi, for (std::size_t i = 0; i < size_; ++i) { const Real coeff = quad_inv_vandermonde_[j * size_ + i]; - if (values_out) { + if (!values_out.empty()) { values_out[i] += value * coeff; } - if (gradients_out) { - Real* g = gradients_out + i * 3u; + if (!gradients_out.empty()) { + Gradient& g = gradients_out[i]; g[0] += dx * coeff; g[1] += dy * coeff; } - if (hessians_out) { - Real* h = hessians_out + i * 9u; - h[0] += dxx * coeff; - h[1] += dxy * coeff; - h[3] += dxy * coeff; - h[4] += dyy * coeff; + if (!hessians_out.empty()) { + Hessian& h = hessians_out[i]; + h(0, 0) += dxx * coeff; + h(0, 1) += dxy * coeff; + h(1, 0) += dxy * coeff; + h(1, 1) += dyy * coeff; } } } @@ -616,49 +636,37 @@ void SerendipityBasis::evaluate_all_to(const math::Vector& xi, FE::throw_if(mesh_to_basis.size() != size_, SVMP_HERE, "Hex20 mesh-to-basis ordering is not registered"); - if (values_out) { - Real internal_vals[20]; + if (!values_out.empty()) { + std::array internal_vals{}; eval_hex20_internal(x, y, z, internal_vals); for (std::size_t i = 0; i < 20u; ++i) { values_out[i] = internal_vals[mesh_to_basis[i]]; } } - if (gradients_out) { - Gradient internal_grads[20]; + if (!gradients_out.empty()) { + std::array internal_grads{}; eval_hex20_grad_internal(x, y, z, internal_grads); for (std::size_t i = 0; i < 20u; ++i) { - store_gradient(internal_grads[mesh_to_basis[i]], gradients_out + i * 3u); + gradients_out[i] = internal_grads[mesh_to_basis[i]]; } } - if (hessians_out) { - Hessian internal_hessians[20]; + if (!hessians_out.empty()) { + std::array internal_hessians{}; eval_hex20_hess_internal(x, y, z, internal_hessians); for (std::size_t i = 0; i < 20u; ++i) { - store_hessian(internal_hessians[mesh_to_basis[i]], hessians_out + i * 9u); + hessians_out[i] = internal_hessians[mesh_to_basis[i]]; } } return; } if (element_type_ == ElementType::Wedge15) { - std::array wedge_gradients{}; - std::array wedge_hessians{}; eval_wedge15_polynomial(x, y, z, values_out, - gradients_out ? wedge_gradients.data() : nullptr, - hessians_out ? wedge_hessians.data() : nullptr); - if (gradients_out) { - for (std::size_t i = 0; i < 15u; ++i) { - store_gradient(wedge_gradients[i], gradients_out + i * 3u); - } - } - if (hessians_out) { - for (std::size_t i = 0; i < 15u; ++i) { - store_hessian(wedge_hessians[i], hessians_out + i * 9u); - } - } + gradients_out, + hessians_out); return; } @@ -669,27 +677,19 @@ void SerendipityBasis::evaluate_all_to(const math::Vector& xi, void SerendipityBasis::evaluate_values(const math::Vector& xi, std::vector& values) const { values.resize(size_); - evaluate_values_to(xi, values.data()); + evaluate_values_to(xi, std::span(values.data(), values.size())); } void SerendipityBasis::evaluate_gradients(const math::Vector& xi, std::vector& gradients) const { gradients.resize(size_); - std::vector flat(size_ * 3u, Real(0)); - evaluate_gradients_to(xi, flat.data()); - for (std::size_t i = 0; i < size_; ++i) { - gradients[i] = load_gradient(flat.data() + i * 3u); - } + evaluate_gradients_to(xi, std::span(gradients.data(), gradients.size())); } void SerendipityBasis::evaluate_hessians(const math::Vector& xi, std::vector& hessians) const { hessians.resize(size_); - std::vector flat(size_ * 9u, Real(0)); - evaluate_hessians_to(xi, flat.data()); - for (std::size_t i = 0; i < size_; ++i) { - hessians[i] = load_hessian(flat.data() + i * 9u); - } + evaluate_hessians_to(xi, std::span(hessians.data(), hessians.size())); } void SerendipityBasis::evaluate_all(const math::Vector& xi, @@ -699,28 +699,28 @@ void SerendipityBasis::evaluate_all(const math::Vector& xi, values.resize(size_); gradients.resize(size_); hessians.resize(size_); - std::vector flat_gradients(size_ * 3u, Real(0)); - std::vector flat_hessians(size_ * 9u, Real(0)); - evaluate_all_to(xi, values.data(), flat_gradients.data(), flat_hessians.data()); - for (std::size_t i = 0; i < size_; ++i) { - gradients[i] = load_gradient(flat_gradients.data() + i * 3u); - hessians[i] = load_hessian(flat_hessians.data() + i * 9u); - } + evaluate_all_to(xi, + std::span(values.data(), values.size()), + std::span(gradients.data(), gradients.size()), + std::span(hessians.data(), hessians.size())); } void SerendipityBasis::evaluate_values_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out) const { - evaluate_all_to(xi, values_out, nullptr, nullptr); + std::span values_out) const { + require_output_span_size(values_out.size(), size_, "SerendipityBasis::evaluate_values_to"); + evaluate_all_to(xi, values_out, std::span{}, std::span{}); } void SerendipityBasis::evaluate_gradients_to(const math::Vector& xi, - Real* SVMP_RESTRICT gradients_out) const { - evaluate_all_to(xi, nullptr, gradients_out, nullptr); + std::span gradients_out) const { + require_output_span_size(gradients_out.size(), size_, "SerendipityBasis::evaluate_gradients_to"); + evaluate_all_to(xi, std::span{}, gradients_out, std::span{}); } void SerendipityBasis::evaluate_hessians_to(const math::Vector& xi, - Real* SVMP_RESTRICT hessians_out) const { - evaluate_all_to(xi, nullptr, nullptr, hessians_out); + std::span hessians_out) const { + require_output_span_size(hessians_out.size(), size_, "SerendipityBasis::evaluate_hessians_to"); + evaluate_all_to(xi, std::span{}, std::span{}, hessians_out); } } // namespace basis diff --git a/Code/Source/solver/FE/Basis/SerendipityBasis.h b/Code/Source/solver/FE/Basis/SerendipityBasis.h index 9c55c8eec..e231ed833 100644 --- a/Code/Source/solver/FE/Basis/SerendipityBasis.h +++ b/Code/Source/solver/FE/Basis/SerendipityBasis.h @@ -12,6 +12,7 @@ #include "BasisFunction.h" #include +#include namespace svmp { namespace FE { @@ -153,7 +154,7 @@ class SerendipityBasis final : public BasisFunction { /// \brief Evaluate serendipity values, gradients, and Hessians together. /// - /// \details This vector API is backed by the same flat-buffer evaluator as + /// \details This vector API is backed by the same span-based evaluator as /// the assembly-oriented `*_to` methods, so topology-specific polynomial /// setup can be shared for a quadrature point. /// @@ -166,23 +167,23 @@ class SerendipityBasis final : public BasisFunction { std::vector& gradients, std::vector& hessians) const final; - /// \brief Evaluate serendipity basis values into a flat caller-provided buffer. + /// \brief Evaluate serendipity basis values into caller-provided storage. /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. - /// \param values_out Output buffer with at least size() entries. + /// \param values_out Output span with at least size() entries. void evaluate_values_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out) const final; + std::span values_out) const final; - /// \brief Evaluate serendipity basis gradients into a flat caller-provided buffer. + /// \brief Evaluate serendipity basis gradients into caller-provided storage. /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. - /// \param gradients_out Output buffer with node-major layout: node * 3 + component. + /// \param gradients_out Output span with at least size() entries. void evaluate_gradients_to(const math::Vector& xi, - Real* SVMP_RESTRICT gradients_out) const final; + std::span gradients_out) const final; - /// \brief Evaluate serendipity basis Hessians into a flat caller-provided buffer. + /// \brief Evaluate serendipity basis Hessians into caller-provided storage. /// \param xi Reference coordinate. Lower-dimensional elements use the active prefix components. - /// \param hessians_out Output buffer with node-major row-major layout: node * 9 + row * 3 + col. + /// \param hessians_out Output span with at least size() entries. void evaluate_hessians_to(const math::Vector& xi, - Real* SVMP_RESTRICT hessians_out) const final; + std::span hessians_out) const final; private: ElementType element_type_; @@ -199,9 +200,9 @@ class SerendipityBasis final : public BasisFunction { bool geometry_mode_; void evaluate_all_to(const math::Vector& xi, - Real* SVMP_RESTRICT values_out, - Real* SVMP_RESTRICT gradients_out, - Real* SVMP_RESTRICT hessians_out) const; + std::span values_out, + std::span gradients_out, + std::span hessians_out) const; }; /// @} diff --git a/Code/Source/solver/FE/Common/Types.h b/Code/Source/solver/FE/Common/Types.h index 1f57ffcc5..462b7ca76 100644 --- a/Code/Source/solver/FE/Common/Types.h +++ b/Code/Source/solver/FE/Common/Types.h @@ -58,17 +58,6 @@ enum class CellFamily { #include #include -#if defined(_MSC_VER) -/// Portable restrict qualifier for aliasing-free pointer parameters. -# define SVMP_RESTRICT __restrict -#elif defined(__clang__) || defined(__GNUC__) -/// Portable restrict qualifier for aliasing-free pointer parameters. -# define SVMP_RESTRICT __restrict__ -#else -/// Portable restrict qualifier for aliasing-free pointer parameters. -# define SVMP_RESTRICT -#endif - /// \defgroup FE_Common Common /// \ingroup FE /// \brief Shared vocabulary types, constants, and exception infrastructure used by every FE module. diff --git a/Code/Source/solver/FE/Math/DenseTransformKernels.h b/Code/Source/solver/FE/Math/DenseTransformKernels.h index 2ddb9cefa..f6639dcd3 100644 --- a/Code/Source/solver/FE/Math/DenseTransformKernels.h +++ b/Code/Source/solver/FE/Math/DenseTransformKernels.h @@ -4,11 +4,13 @@ #ifndef SVMP_FE_MATH_DENSETRANSFORMKERNELS_H #define SVMP_FE_MATH_DENSETRANSFORMKERNELS_H +#include "FEException.h" #include "Types.h" #include #include +#include namespace svmp { namespace FE { @@ -22,18 +24,31 @@ namespace math { /// (row stride output_row_stride). Strides may exceed rhs_count for padded /// layouts; padding entries are left untouched. inline void dense_transform_batched_row_major( - const Real* SVMP_RESTRICT matrix, + std::span matrix, std::size_t rows, std::size_t cols, - const Real* SVMP_RESTRICT input, + std::span input, std::size_t input_row_stride, - Real* SVMP_RESTRICT output, + std::span output, std::size_t output_row_stride, std::size_t rhs_count) { if (rows == 0u || cols == 0u || rhs_count == 0u) { return; } + FE::throw_if(matrix.size() < rows * cols, SVMP_HERE, + "dense_transform_batched_row_major: matrix span is too small"); + FE::throw_if(input_row_stride < rhs_count, SVMP_HERE, + "dense_transform_batched_row_major: input stride is smaller than RHS count"); + FE::throw_if(output_row_stride < rhs_count, SVMP_HERE, + "dense_transform_batched_row_major: output stride is smaller than RHS count"); + FE::throw_if( + input.size() < (cols - 1u) * input_row_stride + rhs_count, SVMP_HERE, + "dense_transform_batched_row_major: input span is too small"); + FE::throw_if( + output.size() < (rows - 1u) * output_row_stride + rhs_count, SVMP_HERE, + "dense_transform_batched_row_major: output span is too small"); + using RowMajorMatrix = Eigen::Matrix; using ConstMap = Eigen::Map; @@ -42,16 +57,16 @@ inline void dense_transform_batched_row_major( using StridedMap = Eigen::Map>; - const ConstMap matrix_map(matrix, + const ConstMap matrix_map(matrix.data(), static_cast(rows), static_cast(cols)); const ConstStridedMap input_map( - input, + input.data(), static_cast(cols), static_cast(rhs_count), Eigen::OuterStride<>(static_cast(input_row_stride))); StridedMap output_map( - output, + output.data(), static_cast(rows), static_cast(rhs_count), Eigen::OuterStride<>(static_cast(output_row_stride))); diff --git a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp index 60ca72114..edeca5ac5 100644 --- a/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp +++ b/tests/unitTests/FE/Basis/test_BasisErrorPaths.cpp @@ -255,29 +255,29 @@ TEST(BasisErrorPaths, NumericalDerivativeHelpersMatchAnalyticDerivatives) { } } -TEST(BasisErrorPaths, BasisFunctionFallbackWritesRawLayouts) { +TEST(BasisErrorPaths, BasisFunctionFallbackWritesSpanOutputs) { CompleteFallbackBasis basis; const math::Vector point{Real(0.25), Real(0.5), Real(-0.25)}; - std::vector flat_values(basis.size()); - std::vector flat_gradients(basis.size() * 3u); - std::vector flat_hessians(basis.size() * 9u); - basis.evaluate_values_to(point, flat_values.data()); - basis.evaluate_gradients_to(point, flat_gradients.data()); - basis.evaluate_hessians_to(point, flat_hessians.data()); + std::vector span_values(basis.size()); + std::vector span_gradients(basis.size()); + std::vector span_hessians(basis.size()); + basis.evaluate_values_to(point, span_values); + basis.evaluate_gradients_to(point, span_gradients); + basis.evaluate_hessians_to(point, span_hessians); std::vector expected_values; std::vector expected_gradients; std::vector expected_hessians; basis.evaluate_all(point, expected_values, expected_gradients, expected_hessians); for (std::size_t d = 0; d < basis.size(); ++d) { - EXPECT_EQ(flat_values[d], expected_values[d]); + EXPECT_EQ(span_values[d], expected_values[d]); for (std::size_t c = 0; c < 3u; ++c) { - EXPECT_EQ(flat_gradients[d * 3u + c], expected_gradients[d][c]); + EXPECT_EQ(span_gradients[d][c], expected_gradients[d][c]); } for (std::size_t r = 0; r < 3u; ++r) { for (std::size_t c = 0; c < 3u; ++c) { - EXPECT_EQ(flat_hessians[d * 9u + r * 3u + c], expected_hessians[d](r, c)); + EXPECT_EQ(span_hessians[d](r, c), expected_hessians[d](r, c)); } } } diff --git a/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp b/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp index 8a1f43c58..68232d216 100644 --- a/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp +++ b/tests/unitTests/FE/Basis/test_LagrangeBasis.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -116,27 +117,27 @@ void expect_partition_gradient_hessian_sums(const LagrangeBasis& basis, } } -void expect_raw_sinks_match_vector_evaluation(const LagrangeBasis& basis, - const Point& xi) +void expect_span_sinks_match_vector_evaluation(const LagrangeBasis& basis, + const Point& xi) { std::vector values; std::vector gradients; std::vector hessians; basis.evaluate_all(xi, values, gradients, hessians); - std::vector flat_values(basis.size()); - std::vector flat_gradients(basis.size() * 3u); - std::vector flat_hessians(basis.size() * 9u); - basis.evaluate_values_to(xi, flat_values.data()); - basis.evaluate_gradients_to(xi, flat_gradients.data()); - basis.evaluate_hessians_to(xi, flat_hessians.data()); + std::vector span_values(basis.size()); + std::vector span_gradients(basis.size()); + std::vector span_hessians(basis.size()); + basis.evaluate_values_to(xi, span_values); + basis.evaluate_gradients_to(xi, span_gradients); + basis.evaluate_hessians_to(xi, span_hessians); for (std::size_t i = 0; i < basis.size(); ++i) { - EXPECT_NEAR(flat_values[i], values[i], Real(1e-14)); + EXPECT_NEAR(span_values[i], values[i], Real(1e-14)); for (std::size_t d = 0; d < 3u; ++d) { - EXPECT_NEAR(flat_gradients[i * 3u + d], gradients[i][d], Real(1e-14)); + EXPECT_NEAR(span_gradients[i][d], gradients[i][d], Real(1e-14)); for (std::size_t e = 0; e < 3u; ++e) { - EXPECT_NEAR(flat_hessians[i * 9u + d * 3u + e], + EXPECT_NEAR(span_hessians[i](d, e), hessians[i](d, e), Real(1e-14)); } @@ -251,10 +252,10 @@ TEST(LagrangeBasis, CanonicalTopologiesAreNodalAndPartitionUnity) { } } -TEST(LagrangeBasis, RawOutputSinksMatchVectorEvaluationAcrossTopologies) { +TEST(LagrangeBasis, SpanOutputSinksMatchVectorEvaluationAcrossTopologies) { for (const auto& c : canonical_cases()) { LagrangeBasis basis(c.type, c.order); - expect_raw_sinks_match_vector_evaluation(basis, c.points.front()); + expect_span_sinks_match_vector_evaluation(basis, c.points.front()); } } @@ -461,19 +462,26 @@ TEST(LagrangeBasis, PointTopologyEvaluatesConstantUnity) { } } - Real flat_value = Real(-1); - Real flat_gradient[3] = {Real(-1), Real(-1), Real(-1)}; - Real flat_hessian[9]; - std::fill_n(flat_hessian, 9u, Real(-1)); - basis.evaluate_values_to(xi, &flat_value); - basis.evaluate_gradients_to(xi, flat_gradient); - basis.evaluate_hessians_to(xi, flat_hessian); - EXPECT_EQ(flat_value, Real(1)); + Real span_value = Real(-1); + Gradient span_gradient; + span_gradient[0] = span_gradient[1] = span_gradient[2] = Real(-1); + Hessian span_hessian; for (std::size_t d = 0; d < 3u; ++d) { - EXPECT_EQ(flat_gradient[d], Real(0)); + for (std::size_t e = 0; e < 3u; ++e) { + span_hessian(d, e) = Real(-1); + } + } + basis.evaluate_values_to(xi, std::span(&span_value, 1u)); + basis.evaluate_gradients_to(xi, std::span(&span_gradient, 1u)); + basis.evaluate_hessians_to(xi, std::span(&span_hessian, 1u)); + EXPECT_EQ(span_value, Real(1)); + for (std::size_t d = 0; d < 3u; ++d) { + EXPECT_EQ(span_gradient[d], Real(0)); } - for (std::size_t e = 0; e < 9u; ++e) { - EXPECT_EQ(flat_hessian[e], Real(0)); + for (std::size_t d = 0; d < 3u; ++d) { + for (std::size_t e = 0; e < 3u; ++e) { + EXPECT_EQ(span_hessian(d, e), Real(0)); + } } }