From 4cfbfd4811de56ad9afddf4706848ae5872d3a74 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 3 Jun 2026 10:29:18 -0600 Subject: [PATCH 1/3] internal - move JiT helpers only needed by SYCL to ceed/jit-tools-deprecated.h; this change should have no public facing effects --- Makefile | 3 +- .../ceed-sycl-gen-operator-build.sycl.cpp | 1 + .../sycl-gen/ceed-sycl-gen-qfunction.sycl.cpp | 1 + .../ceed-sycl-ref-qfunction-load.sycl.cpp | 1 + .../ceed-sycl-shared-basis.sycl.cpp | 1 + backends/sycl/ceed-sycl-compile.sycl.cpp | 1 + doc/sphinx/source/CHANGELOG.md | 2 + include/ceed/backend.h | 1 - include/ceed/jit-tools-deprecated.h | 15 + include/ceed/jit-tools.h | 6 - interface/ceed-jit-tools-deprecated.c | 338 ++++++++++++++++++ interface/ceed-jit-tools.c | 291 +-------------- interface/ceed-qfunction.c | 35 -- .../src/generated/libceed_bindings.jl | 4 - 14 files changed, 363 insertions(+), 337 deletions(-) create mode 100644 include/ceed/jit-tools-deprecated.h create mode 100644 interface/ceed-jit-tools-deprecated.c diff --git a/Makefile b/Makefile index 35c67c5aab..fe93e6749e 100644 --- a/Makefile +++ b/Makefile @@ -297,7 +297,7 @@ $(libceed.so) : CEED_LDFLAGS += $(if $(DARWIN), -install_name @rpath/$(notdir $( # ------------------------------------------------------------ # Interface and gallery -libceed.c := $(filter-out interface/ceed-cuda.c interface/ceed-hip.c interface/ceed-jit-source-root-$(if $(for_install),default,install).c, $(wildcard interface/ceed*.c backends/weak/*.c gallery/*.c)) +libceed.c := $(filter-out interface/ceed-cuda.c interface/ceed-hip.c interface/ceed-jit-tools-deprecated.c interface/ceed-jit-source-root-$(if $(for_install),default,install).c, $(wildcard interface/ceed*.c backends/weak/*.c gallery/*.c)) gallery.c := $(wildcard gallery/*/ceed*.c) libceed.c += $(gallery.c) libceed.h := $(shell git ls-files 'include/**/*.h') @@ -630,6 +630,7 @@ endif ifneq ($(SYCL_LIB_DIR),) PKG_LIBS += $(filter -fsycl -fno-sycl-id-queries-fit-in-int,$(SYCLFLAGS)) -lze_loader LIBCEED_CONTAINS_CXX = 1 + libceed.c += interface/ceed-jit-tools-deprecated.c libceed.sycl += $(sycl-core.cpp) $(sycl-ref.cpp) $(sycl-shared.cpp) $(sycl-gen.cpp) BACKENDS_MAKE += $(SYCL_BACKENDS) endif diff --git a/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp b/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp index ec783e5cc2..ef2b3f1f33 100644 --- a/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp +++ b/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/backends/sycl-gen/ceed-sycl-gen-qfunction.sycl.cpp b/backends/sycl-gen/ceed-sycl-gen-qfunction.sycl.cpp index 99d1438269..5225b5f18c 100644 --- a/backends/sycl-gen/ceed-sycl-gen-qfunction.sycl.cpp +++ b/backends/sycl-gen/ceed-sycl-gen-qfunction.sycl.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/backends/sycl-ref/ceed-sycl-ref-qfunction-load.sycl.cpp b/backends/sycl-ref/ceed-sycl-ref-qfunction-load.sycl.cpp index 82cac87b6d..39d027254a 100644 --- a/backends/sycl-ref/ceed-sycl-ref-qfunction-load.sycl.cpp +++ b/backends/sycl-ref/ceed-sycl-ref-qfunction-load.sycl.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include diff --git a/backends/sycl-shared/ceed-sycl-shared-basis.sycl.cpp b/backends/sycl-shared/ceed-sycl-shared-basis.sycl.cpp index 0fc0ab8137..5f2c92940f 100644 --- a/backends/sycl-shared/ceed-sycl-shared-basis.sycl.cpp +++ b/backends/sycl-shared/ceed-sycl-shared-basis.sycl.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include diff --git a/backends/sycl/ceed-sycl-compile.sycl.cpp b/backends/sycl/ceed-sycl-compile.sycl.cpp index 78c82678c1..2a5ad65285 100644 --- a/backends/sycl/ceed-sycl-compile.sycl.cpp +++ b/backends/sycl/ceed-sycl-compile.sycl.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include diff --git a/doc/sphinx/source/CHANGELOG.md b/doc/sphinx/source/CHANGELOG.md index d3b5cf2491..eada478037 100644 --- a/doc/sphinx/source/CHANGELOG.md +++ b/doc/sphinx/source/CHANGELOG.md @@ -19,6 +19,8 @@ On this page we provide a summary of the main API changes, new features and exam - Rename `CeedCompositeOperator*()` to `CeedOperatorComposite*()` for uniformity. - Rust `libceed` crate: add features `shared` and `system`. - Rust `libceed-sys` crate: feature `static` (default) with `shared` (not default). +- Move JiT helper functions only required by SYCL backends to `ceed/jit-tools-deprecated.h`. +These functions will be removed when the SYCL backends are updated to reflect the improvements in the CUDA and HIP backends. ### New features diff --git a/include/ceed/backend.h b/include/ceed/backend.h index c37b6794c1..a993e0dffe 100644 --- a/include/ceed/backend.h +++ b/include/ceed/backend.h @@ -379,7 +379,6 @@ CEED_EXTERN int CeedQFunctionGetNumArgs(CeedQFunction qf, CeedInt *num_input_fie CEED_EXTERN int CeedQFunctionGetKernelName(CeedQFunction qf, const char **kernel_name); CEED_EXTERN int CeedQFunctionGetName(CeedQFunction qf, const char **name); CEED_EXTERN int CeedQFunctionGetSourcePath(CeedQFunction qf, const char **source_path); -CEED_EXTERN int CeedQFunctionLoadSourceToBuffer(CeedQFunction qf, const char **source_buffer); CEED_EXTERN int CeedQFunctionGetUserFunction(CeedQFunction qf, CeedQFunctionUser *f); CEED_EXTERN int CeedQFunctionGetContext(CeedQFunction qf, CeedQFunctionContext *ctx); CEED_EXTERN int CeedQFunctionGetContextData(CeedQFunction qf, CeedMemType mem_type, void *data); diff --git a/include/ceed/jit-tools-deprecated.h b/include/ceed/jit-tools-deprecated.h new file mode 100644 index 0000000000..4967c065c4 --- /dev/null +++ b/include/ceed/jit-tools-deprecated.h @@ -0,0 +1,15 @@ +/// Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors. +/// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +/// +/// SPDX-License-Identifier: BSD-2-Clause +/// +/// This file is part of CEED: http://github.com/ceed + +/// @file +/// Public header for JiT utility components of libCEED +#pragma once + +#include + +CEED_EXTERN int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffer); +CEED_EXTERN int CeedQFunctionLoadSourceToBuffer(CeedQFunction qf, const char **source_buffer); diff --git a/include/ceed/jit-tools.h b/include/ceed/jit-tools.h index c82a9ad075..0a0d4c5b5b 100644 --- a/include/ceed/jit-tools.h +++ b/include/ceed/jit-tools.h @@ -12,11 +12,5 @@ #include CEED_EXTERN int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid); -CEED_EXTERN int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffer); -CEED_EXTERN int CeedLoadSourceAndInitializeBuffer(Ceed ceed, const char *source_file_path, CeedInt *num_file_paths, char ***file_paths, - char **buffer); -CEED_EXTERN int CeedLoadSourceToInitializedBuffer(Ceed ceed, const char *source_file_path, CeedInt *num_file_paths, char ***file_paths, - char **buffer); -CEED_EXTERN int CeedPathConcatenate(Ceed ceed, const char *base_file_path, const char *relative_file_path, char **new_file_path); CEED_EXTERN int CeedGetJitRelativePath(const char *absolute_file_path, const char **relative_file_path); CEED_EXTERN int CeedGetJitAbsolutePath(Ceed ceed, const char *relative_file_path, const char **absolute_file_path); diff --git a/interface/ceed-jit-tools-deprecated.c b/interface/ceed-jit-tools-deprecated.c new file mode 100644 index 0000000000..a4fffc0dfc --- /dev/null +++ b/interface/ceed-jit-tools-deprecated.c @@ -0,0 +1,338 @@ +// Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + @brief Normalize a filepath + + @param[in] ceed `Ceed` object for error handling + @param[in] source_file_path Absolute path to source file + @param[out] normalized_source_file_path Normalized filepath + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +static int CeedNormalizePath(Ceed ceed, const char *source_file_path, char **normalized_source_file_path) { + CeedCall(CeedStringAllocCopy(source_file_path, normalized_source_file_path)); + + char *first_dot = strchr(*normalized_source_file_path, '.'); + + while (first_dot) { + char *search_from = first_dot + 1; + char keyword[5] = ""; + + // -- Check for /./ and covert to / + if (first_dot != *normalized_source_file_path && strlen(first_dot) > 2) memcpy(keyword, &first_dot[-1], 3); + bool is_here = !strcmp(keyword, "/./"); + + if (is_here) { + for (CeedInt i = 0; first_dot[i - 1]; i++) first_dot[i] = first_dot[i + 2]; + search_from = first_dot; + } else { + // -- Check for /foo/../ and convert to / + if (first_dot != *normalized_source_file_path && strlen(first_dot) > 3) memcpy(keyword, &first_dot[-1], 4); + bool is_up_one = !strcmp(keyword, "/../"); + + if (is_up_one) { + char *last_slash = &first_dot[-2]; + + while (last_slash[0] != '/' && last_slash != *normalized_source_file_path) last_slash--; + CeedCheck(last_slash != *normalized_source_file_path, ceed, CEED_ERROR_MAJOR, "Malformed source path %s", source_file_path); + for (CeedInt i = 0; first_dot[i + 1]; i++) last_slash[i] = first_dot[i + 2]; + search_from = last_slash; + } + } + first_dot = strchr(search_from, '.'); + } + return CEED_ERROR_SUCCESS; +} + +/** + @brief Load source file into initialized string buffer, including full text of local files in place of `#include "local.h"`. + This also updates the `num_file_paths` and `source_file_paths`. + Callers are responsible freeing all filepath strings and the string buffer with @ref CeedFree(). + + @param[in] ceed `Ceed` object for error handling + @param[in] source_file_path Absolute path to source file + @param[in,out] num_file_paths Number of files already included + @param[in,out] file_paths Paths of files already included + @param[out] buffer String buffer for source file contents + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +static int CeedLoadSourceToInitializedBuffer(Ceed ceed, const char *source_file_path, CeedInt *num_file_paths, char ***file_paths, char **buffer) { + FILE *source_file; + long file_size, file_offset = 0; + char *temp_buffer; + + // Debug + CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- Ceed JiT ----------\n"); + CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Current source file: "); + CeedDebug(ceed, "%s\n", source_file_path); + + // Read file to temporary buffer + source_file = fopen(source_file_path, "rb"); + CeedCheck(source_file, ceed, CEED_ERROR_MAJOR, "Couldn't open source file: %s", source_file_path); + // -- Compute size of source + fseek(source_file, 0L, SEEK_END); + file_size = ftell(source_file); + fseek(source_file, 0L, SEEK_SET); + // -- Allocate memory for entire source file + { + const int ierr = CeedCalloc(file_size + 1, &temp_buffer); + + // Close stream before error handling, if necessary + if (ierr != CEED_ERROR_SUCCESS) fclose(source_file); + CeedCall(ierr); + } + // -- Copy the file into the buffer + if (1 != fread(temp_buffer, file_size, 1, source_file)) { + // LCOV_EXCL_START + fclose(source_file); + CeedCall(CeedFree(&temp_buffer)); + return CeedError(ceed, CEED_ERROR_MAJOR, "Couldn't read source file: %s", source_file_path); + // LCOV_EXCL_STOP + } + fclose(source_file); + + // Search for headers to include + const char *first_hash = strchr(temp_buffer, '#'); + + while (first_hash) { + // -- Check for 'pragma' keyword + const char *next_m = strchr(first_hash, 'm'); + char keyword[8] = ""; + + if (next_m && next_m - first_hash >= 5) memcpy(keyword, &next_m[-4], 6); + bool is_hash_pragma = !strcmp(keyword, "pragma"); + + // ---- Spaces allowed in '# pragma' + if (next_m) { + for (CeedInt i = 1; first_hash - next_m + i < -5; i++) { + is_hash_pragma &= first_hash[i] == ' '; + } + } + if (is_hash_pragma) { + // -- Check if '#pragma once' + char *next_o = strchr(first_hash, 'o'); + char *next_new_line = strchr(first_hash, '\n'); + bool is_pragma_once = next_o && (next_new_line - next_o > 0) && !strncmp(next_o, "once", 4); + + // -- Copy into buffer, omitting last line if #pragma once + const long current_size = strlen(*buffer); + const long copy_size = first_hash - &temp_buffer[file_offset] + (is_pragma_once ? 0 : (next_new_line - first_hash + 1)); + + CeedCall(CeedRealloc(current_size + copy_size + 2, buffer)); + memcpy(&(*buffer)[current_size], "\n", 2); + memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size); + memcpy(&(*buffer)[current_size + copy_size + 1], "\0", 1); // NOLINT + + file_offset = strchr(first_hash, '\n') - temp_buffer + 1; + } + + // -- Check for 'include' keyword + const char *next_e = strchr(first_hash, 'e'); + + if (next_e && next_e - first_hash >= 7) memcpy(keyword, &next_e[-6], 7); + bool is_hash_include = !strcmp(keyword, "include"); + + // ---- Spaces allowed in '# include ' + if (next_e) { + for (CeedInt i = 1; first_hash - next_e + i < -6; i++) { + is_hash_include &= first_hash[i] == ' '; + } + } + if (is_hash_include) { + // -- Copy into buffer all preceding # + const long current_size = strlen(*buffer); + const long copy_size = first_hash - &temp_buffer[file_offset]; + + CeedCall(CeedRealloc(current_size + copy_size + 2, buffer)); + memcpy(&(*buffer)[current_size], "\n", 2); + memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size); + memcpy(&(*buffer)[current_size + copy_size + 1], "\0", 1); // NOLINT + // -- Load local "header.h" + char *next_quote = strchr(first_hash, '"'); + char *next_new_line = strchr(first_hash, '\n'); + bool is_local_header = is_hash_include && next_quote && (next_new_line - next_quote > 0); + char *next_left_chevron = strchr(first_hash, '<'); + bool is_ceed_header = next_left_chevron && (next_new_line - next_left_chevron > 0) && + (!strncmp(next_left_chevron, "", 14) || + !strncmp(next_left_chevron, "", 17) || !strncmp(next_left_chevron, "", 17)); + bool is_std_header = + next_left_chevron && (next_new_line - next_left_chevron > 0) && + (!strncmp(next_left_chevron, "", 8) || !strncmp(next_left_chevron, "'); + char *ceed_relative_path; + long ceed_relative_path_length = next_right_chevron - next_left_chevron - 1; + + CeedCall(CeedCalloc(ceed_relative_path_length + 1, &ceed_relative_path)); + memcpy(ceed_relative_path, &next_left_chevron[1], ceed_relative_path_length); + CeedCall(CeedGetJitAbsolutePath(ceed, ceed_relative_path, (const char **)&include_source_path)); + CeedCall(CeedFree(&ceed_relative_path)); + } + // ---- Recursive call to load source to buffer + char *normalized_include_source_path; + + CeedCall(CeedNormalizePath(ceed, include_source_path, &normalized_include_source_path)); + for (CeedInt i = 0; i < *num_file_paths; i++) is_included |= !strcmp(normalized_include_source_path, (*file_paths)[i]); + if (!is_included) { + CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "JiT Including: %s\n", normalized_include_source_path); + CeedCall(CeedLoadSourceToInitializedBuffer(ceed, normalized_include_source_path, num_file_paths, file_paths, buffer)); + CeedCall(CeedRealloc(*num_file_paths + 1, file_paths)); + CeedCall(CeedStringAllocCopy(normalized_include_source_path, &(*file_paths)[*num_file_paths])); + (*num_file_paths)++; + } + CeedCall(CeedFree(&include_source_path)); + CeedCall(CeedFree(&normalized_include_source_path)); + } else if (!is_std_header) { + const long header_copy_size = next_new_line - first_hash + 1; + + CeedCall(CeedRealloc(current_size + copy_size + header_copy_size + 2, buffer)); + memcpy(&(*buffer)[current_size + copy_size], "\n", 2); + memcpy(&(*buffer)[current_size + copy_size + 1], first_hash, header_copy_size); + memcpy(&(*buffer)[current_size + copy_size + header_copy_size], "\0", 1); + } + file_offset = strchr(first_hash, '\n') - temp_buffer + 1; + } + // -- Next hash + first_hash = strchr(&first_hash[1], '#'); + } + // Copy rest of source file into buffer + const long current_size = strlen(*buffer); + const long copy_size = strlen(&temp_buffer[file_offset]); + + CeedCall(CeedRealloc(current_size + copy_size + 2, buffer)); + memcpy(&(*buffer)[current_size], "\n", 2); + memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size); + memcpy(&(*buffer)[current_size + copy_size + 1], "\0", 1); + + // Cleanup + CeedCall(CeedFree(&temp_buffer)); + + // Debug + CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- Ceed JiT ----------\n"); + CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Current source file: "); + CeedDebug(ceed, "%s\n", source_file_path); + CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Final buffer:\n"); + CeedDebug(ceed, "%s\n", *buffer); + return CEED_ERROR_SUCCESS; +} + +/** + @brief Load source file into initialized string buffer, including full text of local files in place of `#include "local.h"`. + This also initializes and populates the `num_file_paths` and `source_file_paths`. + Callers are responsible freeing all filepath strings and the string buffer with @ref CeedFree(). + + @param[in] ceed `Ceed` object for error handling + @param[in] source_file_path Absolute path to source file + @param[in,out] num_file_paths Number of files already included + @param[in,out] file_paths Paths of files already included + @param[out] buffer String buffer for source file contents + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +static int CeedLoadSourceAndInitializeBuffer(Ceed ceed, const char *source_file_path, CeedInt *num_file_paths, char ***file_paths, char **buffer) { + // Ensure defaults were set + *num_file_paths = 0; + *file_paths = NULL; + + // Initialize + CeedCall(CeedCalloc(1, buffer)); + + // And load source + CeedCall(CeedLoadSourceToInitializedBuffer(ceed, source_file_path, num_file_paths, file_paths, buffer)); + return CEED_ERROR_SUCCESS; +} + +/** + @brief Initialize and load source file into string buffer, including full text of local files in place of `#include "local.h"`. + Caller is responsible for freeing the string buffer with @ref CeedFree(). + + @param[in] ceed `Ceed` object for error handling + @param[in] source_file_path Absolute path to source file + @param[out] buffer String buffer for source file contents + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffer) { + char **file_paths = NULL; + CeedInt num_file_paths = 0; + + // Load + CeedCall(CeedLoadSourceAndInitializeBuffer(ceed, source_file_path, &num_file_paths, &file_paths, buffer)); + + // Cleanup + for (CeedInt i = 0; i < num_file_paths; i++) CeedCall(CeedFree(&file_paths[i])); + CeedCall(CeedFree(&file_paths)); + return CEED_ERROR_SUCCESS; +} + +/** + @brief Initialize and load `CeedQFunction` source file into string buffer, including full text of local files in place of `#include "local.h"`. + + The `buffer` is set to `NULL` if there is no `CeedQFunction` source file. + + Note: This function may as well return a mutable buffer, but all current uses + do not modify it. (This is just a downside of `const` semantics with output + arguments instead of returns.) + + Note: Caller is responsible for freeing the string buffer with @ref CeedFree(). + + @param[in] qf `CeedQFunction` + @param[out] source_buffer String buffer for source file contents + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedQFunctionLoadSourceToBuffer(CeedQFunction qf, const char **source_buffer) { + const char *source_path; + + CeedCall(CeedQFunctionGetSourcePath(qf, &source_path)); + *source_buffer = NULL; + if (source_path) { + Ceed ceed; + char *buffer = NULL; + + CeedCall(CeedQFunctionGetCeed(qf, &ceed)); + CeedCall(CeedLoadSourceToBuffer(ceed, source_path, &buffer)); + CeedCall(CeedDestroy(&ceed)); + *source_buffer = buffer; + } + return CEED_ERROR_SUCCESS; +} diff --git a/interface/ceed-jit-tools.c b/interface/ceed-jit-tools.c index bb058062ac..7c7a759387 100644 --- a/interface/ceed-jit-tools.c +++ b/interface/ceed-jit-tools.c @@ -60,295 +60,6 @@ int CeedCheckFilePath(Ceed ceed, const char *source_file_path, bool *is_valid) { return CEED_ERROR_SUCCESS; } -/** - @brief Normalize a filepath - - @param[in] ceed `Ceed` object for error handling - @param[in] source_file_path Absolute path to source file - @param[out] normalized_source_file_path Normalized filepath - - @return An error code: 0 - success, otherwise - failure - - @ref Backend -**/ -static int CeedNormalizePath(Ceed ceed, const char *source_file_path, char **normalized_source_file_path) { - CeedCall(CeedStringAllocCopy(source_file_path, normalized_source_file_path)); - - char *first_dot = strchr(*normalized_source_file_path, '.'); - - while (first_dot) { - char *search_from = first_dot + 1; - char keyword[5] = ""; - - // -- Check for /./ and covert to / - if (first_dot != *normalized_source_file_path && strlen(first_dot) > 2) memcpy(keyword, &first_dot[-1], 3); - bool is_here = !strcmp(keyword, "/./"); - - if (is_here) { - for (CeedInt i = 0; first_dot[i - 1]; i++) first_dot[i] = first_dot[i + 2]; - search_from = first_dot; - } else { - // -- Check for /foo/../ and convert to / - if (first_dot != *normalized_source_file_path && strlen(first_dot) > 3) memcpy(keyword, &first_dot[-1], 4); - bool is_up_one = !strcmp(keyword, "/../"); - - if (is_up_one) { - char *last_slash = &first_dot[-2]; - - while (last_slash[0] != '/' && last_slash != *normalized_source_file_path) last_slash--; - CeedCheck(last_slash != *normalized_source_file_path, ceed, CEED_ERROR_MAJOR, "Malformed source path %s", source_file_path); - for (CeedInt i = 0; first_dot[i + 1]; i++) last_slash[i] = first_dot[i + 2]; - search_from = last_slash; - } - } - first_dot = strchr(search_from, '.'); - } - return CEED_ERROR_SUCCESS; -} - -/** - @brief Load source file into initialized string buffer, including full text of local files in place of `#include "local.h"`. - This also updates the `num_file_paths` and `source_file_paths`. - Callers are responsible freeing all filepath strings and the string buffer with @ref CeedFree(). - - @param[in] ceed `Ceed` object for error handling - @param[in] source_file_path Absolute path to source file - @param[in,out] num_file_paths Number of files already included - @param[in,out] file_paths Paths of files already included - @param[out] buffer String buffer for source file contents - - @return An error code: 0 - success, otherwise - failure - - @ref Backend -**/ -int CeedLoadSourceToInitializedBuffer(Ceed ceed, const char *source_file_path, CeedInt *num_file_paths, char ***file_paths, char **buffer) { - FILE *source_file; - long file_size, file_offset = 0; - char *temp_buffer; - - // Debug - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- Ceed JiT ----------\n"); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Current source file: "); - CeedDebug(ceed, "%s\n", source_file_path); - - // Read file to temporary buffer - source_file = fopen(source_file_path, "rb"); - CeedCheck(source_file, ceed, CEED_ERROR_MAJOR, "Couldn't open source file: %s", source_file_path); - // -- Compute size of source - fseek(source_file, 0L, SEEK_END); - file_size = ftell(source_file); - fseek(source_file, 0L, SEEK_SET); - // -- Allocate memory for entire source file - { - const int ierr = CeedCalloc(file_size + 1, &temp_buffer); - - // Close stream before error handling, if necessary - if (ierr != CEED_ERROR_SUCCESS) fclose(source_file); - CeedCall(ierr); - } - // -- Copy the file into the buffer - if (1 != fread(temp_buffer, file_size, 1, source_file)) { - // LCOV_EXCL_START - fclose(source_file); - CeedCall(CeedFree(&temp_buffer)); - return CeedError(ceed, CEED_ERROR_MAJOR, "Couldn't read source file: %s", source_file_path); - // LCOV_EXCL_STOP - } - fclose(source_file); - - // Search for headers to include - const char *first_hash = strchr(temp_buffer, '#'); - - while (first_hash) { - // -- Check for 'pragma' keyword - const char *next_m = strchr(first_hash, 'm'); - char keyword[8] = ""; - - if (next_m && next_m - first_hash >= 5) memcpy(keyword, &next_m[-4], 6); - bool is_hash_pragma = !strcmp(keyword, "pragma"); - - // ---- Spaces allowed in '# pragma' - if (next_m) { - for (CeedInt i = 1; first_hash - next_m + i < -5; i++) { - is_hash_pragma &= first_hash[i] == ' '; - } - } - if (is_hash_pragma) { - // -- Check if '#pragma once' - char *next_o = strchr(first_hash, 'o'); - char *next_new_line = strchr(first_hash, '\n'); - bool is_pragma_once = next_o && (next_new_line - next_o > 0) && !strncmp(next_o, "once", 4); - - // -- Copy into buffer, omitting last line if #pragma once - const long current_size = strlen(*buffer); - const long copy_size = first_hash - &temp_buffer[file_offset] + (is_pragma_once ? 0 : (next_new_line - first_hash + 1)); - - CeedCall(CeedRealloc(current_size + copy_size + 2, buffer)); - memcpy(&(*buffer)[current_size], "\n", 2); - memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size); - memcpy(&(*buffer)[current_size + copy_size + 1], "\0", 1); // NOLINT - - file_offset = strchr(first_hash, '\n') - temp_buffer + 1; - } - - // -- Check for 'include' keyword - const char *next_e = strchr(first_hash, 'e'); - - if (next_e && next_e - first_hash >= 7) memcpy(keyword, &next_e[-6], 7); - bool is_hash_include = !strcmp(keyword, "include"); - - // ---- Spaces allowed in '# include ' - if (next_e) { - for (CeedInt i = 1; first_hash - next_e + i < -6; i++) { - is_hash_include &= first_hash[i] == ' '; - } - } - if (is_hash_include) { - // -- Copy into buffer all preceding # - const long current_size = strlen(*buffer); - const long copy_size = first_hash - &temp_buffer[file_offset]; - - CeedCall(CeedRealloc(current_size + copy_size + 2, buffer)); - memcpy(&(*buffer)[current_size], "\n", 2); - memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size); - memcpy(&(*buffer)[current_size + copy_size + 1], "\0", 1); // NOLINT - // -- Load local "header.h" - char *next_quote = strchr(first_hash, '"'); - char *next_new_line = strchr(first_hash, '\n'); - bool is_local_header = is_hash_include && next_quote && (next_new_line - next_quote > 0); - char *next_left_chevron = strchr(first_hash, '<'); - bool is_ceed_header = next_left_chevron && (next_new_line - next_left_chevron > 0) && - (!strncmp(next_left_chevron, "", 14) || - !strncmp(next_left_chevron, "", 17) || !strncmp(next_left_chevron, "", 17)); - bool is_std_header = - next_left_chevron && (next_new_line - next_left_chevron > 0) && - (!strncmp(next_left_chevron, "", 8) || !strncmp(next_left_chevron, "'); - char *ceed_relative_path; - long ceed_relative_path_length = next_right_chevron - next_left_chevron - 1; - - CeedCall(CeedCalloc(ceed_relative_path_length + 1, &ceed_relative_path)); - memcpy(ceed_relative_path, &next_left_chevron[1], ceed_relative_path_length); - CeedCall(CeedGetJitAbsolutePath(ceed, ceed_relative_path, (const char **)&include_source_path)); - CeedCall(CeedFree(&ceed_relative_path)); - } - // ---- Recursive call to load source to buffer - char *normalized_include_source_path; - - CeedCall(CeedNormalizePath(ceed, include_source_path, &normalized_include_source_path)); - for (CeedInt i = 0; i < *num_file_paths; i++) is_included |= !strcmp(normalized_include_source_path, (*file_paths)[i]); - if (!is_included) { - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "JiT Including: %s\n", normalized_include_source_path); - CeedCall(CeedLoadSourceToInitializedBuffer(ceed, normalized_include_source_path, num_file_paths, file_paths, buffer)); - CeedCall(CeedRealloc(*num_file_paths + 1, file_paths)); - CeedCall(CeedStringAllocCopy(normalized_include_source_path, &(*file_paths)[*num_file_paths])); - (*num_file_paths)++; - } - CeedCall(CeedFree(&include_source_path)); - CeedCall(CeedFree(&normalized_include_source_path)); - } else if (!is_std_header) { - const long header_copy_size = next_new_line - first_hash + 1; - - CeedCall(CeedRealloc(current_size + copy_size + header_copy_size + 2, buffer)); - memcpy(&(*buffer)[current_size + copy_size], "\n", 2); - memcpy(&(*buffer)[current_size + copy_size + 1], first_hash, header_copy_size); - memcpy(&(*buffer)[current_size + copy_size + header_copy_size], "\0", 1); - } - file_offset = strchr(first_hash, '\n') - temp_buffer + 1; - } - // -- Next hash - first_hash = strchr(&first_hash[1], '#'); - } - // Copy rest of source file into buffer - const long current_size = strlen(*buffer); - const long copy_size = strlen(&temp_buffer[file_offset]); - - CeedCall(CeedRealloc(current_size + copy_size + 2, buffer)); - memcpy(&(*buffer)[current_size], "\n", 2); - memcpy(&(*buffer)[current_size + 1], &temp_buffer[file_offset], copy_size); - memcpy(&(*buffer)[current_size + copy_size + 1], "\0", 1); - - // Cleanup - CeedCall(CeedFree(&temp_buffer)); - - // Debug - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- Ceed JiT ----------\n"); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Current source file: "); - CeedDebug(ceed, "%s\n", source_file_path); - CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Final buffer:\n"); - CeedDebug(ceed, "%s\n", *buffer); - return CEED_ERROR_SUCCESS; -} - -/** - @brief Load source file into initialized string buffer, including full text of local files in place of `#include "local.h"`. - This also initializes and populates the `num_file_paths` and `source_file_paths`. - Callers are responsible freeing all filepath strings and the string buffer with @ref CeedFree(). - - @param[in] ceed `Ceed` object for error handling - @param[in] source_file_path Absolute path to source file - @param[in,out] num_file_paths Number of files already included - @param[in,out] file_paths Paths of files already included - @param[out] buffer String buffer for source file contents - - @return An error code: 0 - success, otherwise - failure - - @ref Backend -**/ -int CeedLoadSourceAndInitializeBuffer(Ceed ceed, const char *source_file_path, CeedInt *num_file_paths, char ***file_paths, char **buffer) { - // Ensure defaults were set - *num_file_paths = 0; - *file_paths = NULL; - - // Initialize - CeedCall(CeedCalloc(1, buffer)); - - // And load source - CeedCall(CeedLoadSourceToInitializedBuffer(ceed, source_file_path, num_file_paths, file_paths, buffer)); - return CEED_ERROR_SUCCESS; -} - -/** - @brief Initialize and load source file into string buffer, including full text of local files in place of `#include "local.h"`. - User @ref CeedLoadSourceAndInitializeBuffer() and @ref CeedLoadSourceToInitializedBuffer() if loading multiple source files into the same buffer. - Caller is responsible for freeing the string buffer with @ref CeedFree(). - - @param[in] ceed `Ceed` object for error handling - @param[in] source_file_path Absolute path to source file - @param[out] buffer String buffer for source file contents - - @return An error code: 0 - success, otherwise - failure - - @ref Backend -**/ -int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffer) { - char **file_paths = NULL; - CeedInt num_file_paths = 0; - - // Load - CeedCall(CeedLoadSourceAndInitializeBuffer(ceed, source_file_path, &num_file_paths, &file_paths, buffer)); - - // Cleanup - for (CeedInt i = 0; i < num_file_paths; i++) CeedCall(CeedFree(&file_paths[i])); - CeedCall(CeedFree(&file_paths)); - return CEED_ERROR_SUCCESS; -} - /** @brief Build an absolute filepath from a base filepath and an absolute filepath. @@ -365,7 +76,7 @@ int CeedLoadSourceToBuffer(Ceed ceed, const char *source_file_path, char **buffe @ref Backend **/ -int CeedPathConcatenate(Ceed ceed, const char *base_file_path, const char *relative_file_path, char **new_file_path) { +static int CeedPathConcatenate(Ceed ceed, const char *base_file_path, const char *relative_file_path, char **new_file_path) { char *last_slash = strrchr(base_file_path, '/'); size_t base_length = (last_slash - base_file_path + 1), relative_length = strlen(relative_file_path), new_file_path_length = base_length + relative_length + 1; diff --git a/interface/ceed-qfunction.c b/interface/ceed-qfunction.c index 1d65b27a06..63b906dcb8 100644 --- a/interface/ceed-qfunction.c +++ b/interface/ceed-qfunction.c @@ -317,41 +317,6 @@ int CeedQFunctionGetSourcePath(CeedQFunction qf, const char **source_path) { return CEED_ERROR_SUCCESS; } -/** - @brief Initialize and load `CeedQFunction` source file into string buffer, including full text of local files in place of `#include "local.h"`. - - The `buffer` is set to `NULL` if there is no `CeedQFunction` source file. - - Note: This function may as well return a mutable buffer, but all current uses - do not modify it. (This is just a downside of `const` semantics with output - arguments instead of returns.) - - Note: Caller is responsible for freeing the string buffer with @ref CeedFree(). - - @param[in] qf `CeedQFunction` - @param[out] source_buffer String buffer for source file contents - - @return An error code: 0 - success, otherwise - failure - - @ref Backend -**/ -int CeedQFunctionLoadSourceToBuffer(CeedQFunction qf, const char **source_buffer) { - const char *source_path; - - CeedCall(CeedQFunctionGetSourcePath(qf, &source_path)); - *source_buffer = NULL; - if (source_path) { - Ceed ceed; - char *buffer = NULL; - - CeedCall(CeedQFunctionGetCeed(qf, &ceed)); - CeedCall(CeedLoadSourceToBuffer(ceed, source_path, &buffer)); - CeedCall(CeedDestroy(&ceed)); - *source_buffer = buffer; - } - return CEED_ERROR_SUCCESS; -} - /** @brief Get the User Function for a `CeedQFunction` diff --git a/julia/LibCEED.jl/src/generated/libceed_bindings.jl b/julia/LibCEED.jl/src/generated/libceed_bindings.jl index d4bba38974..be616f9c76 100644 --- a/julia/LibCEED.jl/src/generated/libceed_bindings.jl +++ b/julia/LibCEED.jl/src/generated/libceed_bindings.jl @@ -1195,10 +1195,6 @@ function CeedQFunctionGetSourcePath(qf, source_path) ccall((:CeedQFunctionGetSourcePath, libceed), Cint, (CeedQFunction, Ptr{Ptr{Cchar}}), qf, source_path) end -function CeedQFunctionLoadSourceToBuffer(qf, source_buffer) - ccall((:CeedQFunctionLoadSourceToBuffer, libceed), Cint, (CeedQFunction, Ptr{Ptr{Cchar}}), qf, source_buffer) -end - function CeedQFunctionGetUserFunction(qf, f) ccall((:CeedQFunctionGetUserFunction, libceed), Cint, (CeedQFunction, Ptr{CeedQFunctionUser}), qf, f) end From 97d87dda094c74039246fcbce4ac4010f4b7e5d8 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 3 Jun 2026 10:36:00 -0600 Subject: [PATCH 2/3] doc - fix bad simlink --- CHANGELOG.md | 2 + Makefile | 2 +- doc/sphinx/source/CHANGELOG.md | 712 +-------------------------------- 3 files changed, 4 insertions(+), 712 deletions(-) mode change 100644 => 120000 doc/sphinx/source/CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 5df5a5feb4..996c8f04e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ On this page we provide a summary of the main API changes, new features and exam - Rename `CeedCompositeOperatorCreate()` to `CeedOperatorCreateComposite()` for uniformity. - Rename `CeedCompositeOperator*()` to `CeedOperatorComposite*()` for uniformity. - Add `build_objects` parameter to `CeedOperatorLinearAssembleQFunctionBuildOrUpdateFallback` to allow for passing uninitialized vectors and restrictions +- Move JiT helper functions only required by SYCL backends to `ceed/jit-tools-deprecated.h`. +These functions will be removed when the SYCL backends are updated to reflect the improvements in the CUDA and HIP backends. ### New features diff --git a/Makefile b/Makefile index fe93e6749e..846bbb9c80 100644 --- a/Makefile +++ b/Makefile @@ -1026,7 +1026,7 @@ AUTOPEP8_OPTS += --in-place --aggressive --max-line-length 120 format.ch := $(filter-out include/ceedf.h $(wildcard tests/t*-f.h) $(wildcard examples/ceed/ex*-f.h), $(shell git ls-files '*.[ch]pp' '*.[ch]' '*.cu')) format.py := $(filter-out tests/junit-xml/junit_xml/__init__.py, $(shell git ls-files '*.py')) -format.ot := $(filter-out doc/sphinx/source/CODE_OF_CONDUCT.md doc/sphinx/source/CONTRIBUTING.md, $(shell git ls-files '*.md' '*.f90')) +format.ot := $(filter-out doc/sphinx/source/CHANGELOG.md doc/sphinx/source/CODE_OF_CONDUCT.md doc/sphinx/source/CONTRIBUTING.md, $(shell git ls-files '*.md' '*.f90')) format-c : $(call quiet,CLANG_FORMAT) $(CLANG_FORMAT_OPTS) $(format.ch) diff --git a/doc/sphinx/source/CHANGELOG.md b/doc/sphinx/source/CHANGELOG.md deleted file mode 100644 index eada478037..0000000000 --- a/doc/sphinx/source/CHANGELOG.md +++ /dev/null @@ -1,711 +0,0 @@ -# Changes/Release Notes - -On this page we provide a summary of the main API changes, new features and examples for each release of libCEED. - -(main)= - -## Current `main` branch - -### Interface changes - -- Add `bool` field type for `CeedQFunctionContext` and related interfaces to use `bool` fields. -- `CEED_BASIS_COLLOCATED` removed; users should only use `CEED_BASIS_NONE`. -- Remove unneeded pointer for `CeedElemRestrictionGetELayout`. -- Change QFunction source include file handling in JiT compilers - - Add `CEED_RUNNING_JIT_PASS` compiler definition for wrapping header files that device JiT compilers cannot read - - Users should now prefer `#include ` rather than `#include ` in QFunction source files -- Require use of `Ceed*Destroy()` on Ceed objects returned from `Ceed*Get*()`. -- Rename `CeedCompositeOperatorCreate()` to `CeedOperatorCreateComposite()` for uniformity. -- Rename `CeedCompositeOperator*()` to `CeedOperatorComposite*()` for uniformity. -- Rust `libceed` crate: add features `shared` and `system`. -- Rust `libceed-sys` crate: feature `static` (default) with `shared` (not default). -- Move JiT helper functions only required by SYCL backends to `ceed/jit-tools-deprecated.h`. -These functions will be removed when the SYCL backends are updated to reflect the improvements in the CUDA and HIP backends. - -### New features - -- Add `CeedOperatorCreateAtPoints` which evaluates the `CeedQFunction` at arbitrary locations in each element, for use in Particle in Cell, Material Point Method, and similar methods. -- Add `CeedElemRestrictionGetLLayout` to provide L-vector layout for strided `CeedElemRestriction` created with `CEED_BACKEND_STRIDES`. -- Add `CeedVectorReturnCeed` and similar when parent `Ceed` context for a libCEED object is only needed once in a calling scope. -- Enable `#pragma once` for all JiT source; remove duplicate includes in JiT source string before compilation. -- Allow user to set additional compiler options for CUDA and HIP JiT. -Specifically, directories set with `CeedAddJitSourceRoot(ceed, "foo/bar")` will be used to set `-Ifoo/bar` and defines set with `CeedAddJitDefine(ceed, "foo=bar")` will be used to set `-Dfoo=bar`. -- Added non-tensor basis support to code generation backends `/gpu/cuda/gen` and `/gpu/hip/gen`. -- Added support to code generation backends `/gpu/cuda/gen` and `/gpu/hip/gen` for operators with both tensor and non-tensor bases. -- Add `CeedGetGitVersion()` to access the Git commit and dirty state of the repository at build time. -- Add `CeedGetBuildConfiguration()` to access compilers, flags, and related information about the build environment. -- Add support for full `CeedOperator` assembly for operators with multiple active fields with different bases for CPU backends and `/gpu/cuda/ref` and `/gpu/hip/gen` backends. - -### Examples - -- Add deal.II example with CEED BP suite. - -### Maintainability - -- OCCA backends were retired. - -(v0-12)= - -## v0.12 (Oct 31, 2023) - -### Interface changes - -- Update `CeedOperatorContext*` functions to `CeedOperator*Context*` functions for consistency. -For example, `CeedOperatorContextGetFieldLabel` was renamed to `CeedOperatorGetContextFieldLabel`. -- Removed `CeedBasisSetNumQuadraturePoints` as redundant and bug-prone interface. - -### New features - -- Added {c:func}`CeedOperatorGetFieldByName` to access a specific `CeedOperatorField` by its name. -- Update `/cpu/self/memcheck/*` backends to help verify `CeedVector` array access assumptions and `CeedQFunction` user output assumptions. -- Update {c:func}`CeedOperatorLinearAssembleDiagonal` to provide default implementation that supports `CeedOperator` with multiple active bases. -- Added Sycl backends `/gpu/sycl/ref`, `/gpu/sycl/shared`, and `/gpu/sycl/gen`. -- Added {c:func}`CeedBasisApplyAtPoints` for evaluation of values and derivatives at arbitrary points inside elements. -- Added support for non-tensor $H(\text{curl})$ finite element spaces with {c:func}`CeedBasisCreateHcurl`. -- Added {c:func}`CeedElemRestrictionCreateCurlOriented`, similar to {c:func}`CeedElemRestrictionCreateOriented`, for element restrictions requiring more general element transformations such as those for high-order $H(\text{curl})$ spaces on tetrahedra (see [https://dl.acm.org/doi/pdf/10.1145/3524456](https://dl.acm.org/doi/pdf/10.1145/3524456)). -- Added {c:func}`CeedOperatorLinearAssemblePointBlockDiagonalSymbolic` to create COO mapping for mapping out of {c:func}`CeedOperatorLinearAssemblePointBlockDiagonal`. -- Added support for application codes which manage multiple {ref}`Ceed` objects, parallelized across OpenMP threads. - -### Examples - -- Add `DMSwarm` example demonstrating interpolation from background mesh to swarm points and projection from swarm points to background mesh. - -#### {ref}`example-petsc-bps` - -- Requires PETSc version 3.19 or later. - -#### {ref}`example-petsc-navier-stokes` - -- Updated restart and checkpointing interface. -- Add data-driven subgrid-stress model. -- Add differential filtering of solution. -- Add turbulence statistics collection over spanwise-symmetric geometries. -- Add Taylor-Green vortex initial condition. -- Add Riemann-based outflow boundary conditions. -- Added vortex shedding and flow past cylinder example, including calculations for lift, drag, and heat transfer. -- Add Internal Damping Layer (IDL) for helping turbulent simulation stability. -- Derive `CeedBasis` from `PetscFE`, and various other internal maintainability updates. - -(v0-11)= - -## v0.11 (Dec 24, 2022) - -### Interface changes - -- Added {c:func}`CeedOperatorSetName` for more readable {c:func}`CeedOperatorView` output. -- Added {c:func}`CeedBasisCreateProjection` to facilitate interpolation between nodes for separate `CeedBases`. -- Rename and move {c:func}`CeedCompositeOperatorGetNumSub` and {c:func}`CeedCompositeOperatorGetSubList` to public interface. -- Renamed `CEED_BASIS_COLLOCATED` to `CEED_BASIS_NONE` for clarity. -Some users previously misinterpreted a `CeedOperator` field using `CEED_BASIS_COLLOCATED` as meaning that the entire `CeedOperator` used a quadrature space that is collocated with the nodal space of the active bases. - -### New features - -- Update `/cpu/self/memcheck/*` backends to help verify `CeedQFunctionContext` data sizes provided by user. -- Improved support for $H(\text{div})$ bases. -- Added `CeedInt_FMT` to support potential future use of larger integer sizes. -- Added `CEED_QFUNCTION_ATTR` for setting compiler attributes/pragmas to `CEED_QFUNCTION_HELPER` and `CEED_QFUNCTION`. -- OCCA backend updated to latest OCCA release; DPC++ and OMP OCCA modes enabled. -Due to a limitation of the OCCA parser, typedefs are required to use pointers to arrays in QFunctions with the OCCA backend. -This issue will be fixed in a future OCCA release. - -### Bugfix - -- Fix bug in setting device id for GPU backends. -- Fix storing of indices for `CeedElemRestriction` on the host with GPU backends. -- Fix `CeedElemRestriction` sizing for {c:func}`CeedOperatorAssemblePointBlockDiagonal`. -- Fix bugs in CPU implementation of {c:func}`CeedOperatorLinearAssemble` when there are different number of active input modes and active output modes. - -### Examples - -#### {ref}`example-petsc-navier-stokes` - -- Various performance enhancements, analytic matrix-free and assembled Jacobian, and PETSc solver configurations for GPUs. -- Refactored to improve code reuse and modularity. -- Support for primitive variables for more accurate boundary layers and all-speed flow. -- Added $YZ\beta$ shock capturing scheme and Shock Tube example. -- Added Channel example, with comparison to analytic solutions. -- Added Flat Plate with boundary layer mesh and compressible Blasius inflow condition based on Chebyshev collocation solution of the Blasius equations. -- Added strong and weak synthetic turbulence generation (STG) inflow boundary conditions. -- Added "freestream" boundary conditions based on HLLC Riemann solver. -- Automated stabilization coefficients for different basis degree. - -#### {ref}`example-petsc-bps` - -- Support for convergence studies. - -### Maintainability - -- Refactored `/gpu/cuda/shared` and `/gpu/cuda/gen` as well as `/gpu/hip/shared` and `/gpu/hip/gen` backend to improve maintainablity and reduce duplicated code. -- Enabled support for `p > 8` for `/gpu/*/shared` backends. -- Switch to `clang-format` over `astyle` for automatic formatting; Makefile command changed to `make format` from `make style`. -- Improved test harness. - -(v0-10-1)= - -## v0.10.1 (Apr 11, 2022) - -### Interface changes - -- Added {c:func}`CeedQFunctionSetUserFlopsEstimate` and {c:func}`CeedOperatorGetFlopsEstimate` to facilitate estimating FLOPs in operator application. - -### New features - -- Switched MAGMA backends to use runtime compilation for tensor basis kernels (and element restriction kernels, in non-deterministic `/gpu/*/magma` backends). -This reduces time to compile the library and increases the range of parameters for which the MAGMA tensor basis kernels will work. - -### Bugfix - -- Install JiT source files in install directory to fix GPU functionality for installed libCEED. - -(v0-10)= - -## v0.10 (Mar 21, 2022) - -### Interface changes - -- Update {c:func}`CeedQFunctionGetFields` and {c:func}`CeedOperatorGetFields` to include number of fields. -- Promote to the public API: QFunction and Operator field objects, `CeedQFunctionField` and `CeedOperatorField`, and associated getters, {c:func}`CeedQFunctionGetFields`; {c:func}`CeedQFunctionFieldGetName`; {c:func}`CeedQFunctionFieldGetSize`; {c:func}`CeedQFunctionFieldGetEvalMode`; {c:func}`CeedOperatorGetFields`; {c:func}`CeedOperatorFieldGetElemRestriction`; {c:func}`CeedOperatorFieldGetBasis`; and {c:func}`CeedOperatorFieldGetVector`. -- Clarify and document conditions where `CeedQFunction` and `CeedOperator` become immutable and no further fields or suboperators can be added. -- Add {c:func}`CeedOperatorLinearAssembleQFunctionBuildOrUpdate` to reduce object creation overhead in assembly of CeedOperator preconditioning ingredients. -- Promote {c:func}`CeedOperatorCheckReady`to the public API to facilitate interactive interfaces. -- Warning added when compiling OCCA backend to alert users that this backend is experimental. -- `ceed-backend.h`, `ceed-hash.h`, and `ceed-khash.h` removed. Users should use `ceed/backend.h`, `ceed/hash.h`, and `ceed/khash.h`. -- Added {c:func}`CeedQFunctionGetKernelName`; refactored {c:func}`CeedQFunctionGetSourcePath` to exclude function kernel name. -- Clarify documentation for {c:func}`CeedVectorTakeArray`; this function will error if {c:func}`CeedVectorSetArray` with `copy_mode == CEED_USE_POINTER` was not previously called for the corresponding `CeedMemType`. -- Added {c:func}`CeedVectorGetArrayWrite` that allows access to uninitialized arrays; require initialized data for {c:func}`CeedVectorGetArray`. -- Added {c:func}`CeedQFunctionContextRegisterDouble` and {c:func}`CeedQFunctionContextRegisterInt32` with {c:func}`CeedQFunctionContextSetDouble` and {c:func}`CeedQFunctionContextSetInt32` to facilitate easy updating of {c:struct}`CeedQFunctionContext` data by user defined field names. -- Added {c:func}`CeedQFunctionContextGetFieldDescriptions` to retrieve user defined descriptions of fields that are registered with `CeedQFunctionContextRegister*`. -- Renamed `CeedElemTopology` entries for clearer namespacing between libCEED enums. -- Added type `CeedSize` equivalent to `ptrdiff_t` for array sizes in {c:func}`CeedVectorCreate`, {c:func}`CeedVectorGetLength`, `CeedElemRestrictionCreate*`, {c:func}`CeedElemRestrictionGetLVectorSize`, and {c:func}`CeedOperatorLinearAssembleSymbolic`. This is a breaking change. -- Added {c:func}`CeedOperatorSetQFunctionUpdated` to facilitate QFunction data re-use between operators sharing the same quadrature space, such as in a multigrid hierarchy. -- Added {c:func}`CeedOperatorGetActiveVectorLengths` to get shape of CeedOperator. - -### New features - -- `CeedScalar` can now be set as `float` or `double` at compile time. -- Added JiT utilities in `ceed/jit-tools.h` to reduce duplicated code in GPU backends. -- Added support for JiT of QFunctions with `#include "relative/path/local-file.h"` statements for additional local files. Note that files included with `""` are searched relative to the current file first, then by compiler paths (as with `<>` includes). To use this feature, one should adhere to relative paths only, not compiler flags like `-I`, which the JiT will not be aware of. -- Remove need to guard library headers in QFunction source for code generation backends. -- `CeedDebugEnv()` macro created to provide debugging outputs when Ceed context is not present. -- Added {c:func}`CeedStringAllocCopy` to reduce repeated code for copying strings internally. -- Added {c:func}`CeedPathConcatenate` to facilitate loading kernel source files with a path relative to the current file. -- Added support for non-tensor $H(\text{div})$ elements, to include CPU backend implementations and {c:func}`CeedBasisCreateHdiv` convenience constructor. -- Added {c:func}`CeedQFunctionSetContextWritable` and read-only access to `CeedQFunctionContext` data as an optional feature to improve GPU performance. By default, calling the `CeedQFunctionUser` during {c:func}`CeedQFunctionApply` is assumed to write into the `CeedQFunctionContext` data, consistent with the previous behavior. Note that if a user asserts that their `CeedQFunctionUser` does not write into the `CeedQFunctionContext` data, they are responsible for the validity of this assertion. -- Added support for element matrix assembly in GPU backends. - -### Maintainability - -- Refactored preconditioner support internally to facilitate future development and improve GPU completeness/test coverage. -- `Include-what-you-use` makefile target added as `make iwyu`. -- Create backend constant `CEED_FIELD_MAX` to reduce magic numbers in codebase. -- Put GPU JiTed kernel source code into separate files. -- Dropped legacy version support in PETSc based examples to better utilize PETSc DMPlex and Mat updates to support libCEED; current minimum PETSc version for the examples is v3.17. - -(v0-9)= - -## v0.9 (Jul 6, 2021) - -### Interface changes - -- Minor modification in error handling macro to silence pedantic warnings when compiling with Clang, but no functional impact. - -### New features - -- Add {c:func}`CeedVectorAXPY` and {c:func}`CeedVectorPointwiseMult` as a convenience for stand-alone testing and internal use. -- Add `CEED_QFUNCTION_HELPER` macro to properly annotate QFunction helper functions for code generation backends. -- Add `CeedPragmaOptimizeOff` macro for code that is sensitive to floating point errors from fast math optimizations. -- Rust support: split `libceed-sys` crate out of `libceed` and [publish both on crates.io](https://crates.io/crates/libceed). - -### Performance improvements - -### Examples - -- Solid mechanics mini-app updated to explore the performance impacts of various formulations in the initial and current configurations. -- Fluid mechanics example adds GPU support and improves modularity. - -### Deprecated backends - -- The `/cpu/self/tmpl` and `/cpu/self/tmpl/sub` backends have been removed. These backends were intially added to test the backend inheritance mechanism, but this mechanism is now widely used and tested in multiple backends. - -(v0-8)= - -## v0.8 (Mar 31, 2021) - -### Interface changes - -- Error handling improved to include enumerated error codes for C interface return values. -- Installed headers that will follow semantic versioning were moved to {code}`include/ceed` directory. These headers have been renamed from {code}`ceed-*.h` to {code}`ceed/*.h`. Placeholder headers with the old naming schema are currently provided, but these headers will be removed in the libCEED v0.9 release. - -### New features - -- Julia and Rust interfaces added, providing a nearly 1-1 correspondence with the C interface, plus some convenience features. -- Static libraries can be built with `make STATIC=1` and the pkg-config file is installed accordingly. -- Add {c:func}`CeedOperatorLinearAssembleSymbolic` and {c:func}`CeedOperatorLinearAssemble` to support full assembly of libCEED operators. - -### Performance improvements - -- New HIP MAGMA backends for hipMAGMA library users: `/gpu/hip/magma` and `/gpu/hip/magma/det`. -- New HIP backends for improved tensor basis performance: `/gpu/hip/shared` and `/gpu/hip/gen`. - -### Examples - -- {ref}`example-petsc-elasticity` example updated with traction boundary conditions and improved Dirichlet boundary conditions. -- {ref}`example-petsc-elasticity` example updated with Neo-Hookean hyperelasticity in current configuration as well as improved Neo-Hookean hyperelasticity exploring storage vs computation tradeoffs. -- {ref}`example-petsc-navier-stokes` example updated with isentropic traveling vortex test case, an analytical solution to the Euler equations that is useful for testing boundary conditions, discretization stability, and order of accuracy. -- {ref}`example-petsc-navier-stokes` example updated with support for performing convergence study and plotting order of convergence by polynomial degree. - -(v0-7)= - -## v0.7 (Sep 29, 2020) - -### Interface changes - -- Replace limited {code}`CeedInterlaceMode` with more flexible component stride {code}`compstride` in {code}`CeedElemRestriction` constructors. - As a result, the {code}`indices` parameter has been replaced with {code}`offsets` and the {code}`nnodes` parameter has been replaced with {code}`lsize`. - These changes improve support for mixed finite element methods. -- Replace various uses of {code}`Ceed*Get*Status` with {code}`Ceed*Is*` in the backend API to match common nomenclature. -- Replace {code}`CeedOperatorAssembleLinearDiagonal` with {c:func}`CeedOperatorLinearAssembleDiagonal` for clarity. -- Linear Operators can be assembled as point-block diagonal matrices with {c:func}`CeedOperatorLinearAssemblePointBlockDiagonal`, provided in row-major form in a {code}`ncomp` by {code}`ncomp` block per node. -- Diagonal assemble interface changed to accept a {ref}`CeedVector` instead of a pointer to a {ref}`CeedVector` to reduce memory movement when interfacing with calling code. -- Added {c:func}`CeedOperatorLinearAssembleAddDiagonal` and {c:func}`CeedOperatorLinearAssembleAddPointBlockDiagonal` for improved future integration with codes such as MFEM that compose the action of {ref}`CeedOperator`s external to libCEED. -- Added {c:func}`CeedVectorTakeAray` to sync and remove libCEED read/write access to an allocated array and pass ownership of the array to the caller. - This function is recommended over {c:func}`CeedVectorSyncArray` when the {code}`CeedVector` has an array owned by the caller that was set by {c:func}`CeedVectorSetArray`. -- Added {code}`CeedQFunctionContext` object to manage user QFunction context data and reduce copies between device and host memory. -- Added {c:func}`CeedOperatorMultigridLevelCreate`, {c:func}`CeedOperatorMultigridLevelCreateTensorH1`, and {c:func}`CeedOperatorMultigridLevelCreateH1` to facilitate creation of multigrid prolongation, restriction, and coarse grid operators using a common quadrature space. - -### New features - -- New HIP backend: `/gpu/hip/ref`. -- CeedQFunction support for user `CUfunction`s in some backends - -### Performance improvements - -- OCCA backend rebuilt to facilitate future performance enhancements. -- PETSc BPs suite improved to reduce noise due to multiple calls to {code}`mpiexec`. - -### Examples - -- {ref}`example-petsc-elasticity` example updated with strain energy computation and more flexible boundary conditions. - -### Deprecated backends - -- The `/gpu/cuda/reg` backend has been removed, with its core features moved into `/gpu/cuda/ref` and `/gpu/cuda/shared`. - -(v0-6)= - -## v0.6 (Mar 29, 2020) - -libCEED v0.6 contains numerous new features and examples, as well as expanded -documentation in [this new website](https://libceed.org). - -### New features - -- New Python interface using [CFFI](https://cffi.readthedocs.io/) provides a nearly - 1-1 correspondence with the C interface, plus some convenience features. For instance, - data stored in the {cpp:type}`CeedVector` structure are available without copy as - {py:class}`numpy.ndarray`. Short tutorials are provided in - [Binder](https://mybinder.org/v2/gh/CEED/libCEED/main?urlpath=lab/tree/examples/tutorials/). -- Linear QFunctions can be assembled as block-diagonal matrices (per quadrature point, - {c:func}`CeedOperatorAssembleLinearQFunction`) or to evaluate the diagonal - ({c:func}`CeedOperatorAssembleLinearDiagonal`). These operations are useful for - preconditioning ingredients and are used in the libCEED's multigrid examples. -- The inverse of separable operators can be obtained using - {c:func}`CeedOperatorCreateFDMElementInverse` and applied with - {c:func}`CeedOperatorApply`. This is a useful preconditioning ingredient, - especially for Laplacians and related operators. -- New functions: {c:func}`CeedVectorNorm`, {c:func}`CeedOperatorApplyAdd`, - {c:func}`CeedQFunctionView`, {c:func}`CeedOperatorView`. -- Make public accessors for various attributes to facilitate writing composable code. -- New backend: `/cpu/self/memcheck/serial`. -- QFunctions using variable-length array (VLA) pointer constructs can be used with CUDA - backends. (Single source is coming soon for OCCA backends.) -- Fix some missing edge cases in CUDA backend. - -### Performance Improvements - -- MAGMA backend performance optimization and non-tensor bases. -- No-copy optimization in {c:func}`CeedOperatorApply`. - -### Interface changes - -- Replace {code}`CeedElemRestrictionCreateIdentity` and - {code}`CeedElemRestrictionCreateBlocked` with more flexible - {c:func}`CeedElemRestrictionCreateStrided` and - {c:func}`CeedElemRestrictionCreateBlockedStrided`. -- Add arguments to {c:func}`CeedQFunctionCreateIdentity`. -- Replace ambiguous uses of {cpp:enum}`CeedTransposeMode` for L-vector identification - with {cpp:enum}`CeedInterlaceMode`. This is now an attribute of the - {cpp:type}`CeedElemRestriction` (see {c:func}`CeedElemRestrictionCreate`) and no - longer passed as `lmode` arguments to {c:func}`CeedOperatorSetField` and - {c:func}`CeedElemRestrictionApply`. - -### Examples - -libCEED-0.6 contains greatly expanded examples with {ref}`new documentation `. -Notable additions include: - -- Standalone {ref}`ex2-surface` ({file}`examples/ceed/ex2-surface`): compute the area of - a domain in 1, 2, and 3 dimensions by applying a Laplacian. - -- PETSc {ref}`example-petsc-area` ({file}`examples/petsc/area.c`): computes surface area - of domains (like the cube and sphere) by direct integration on a surface mesh; - demonstrates geometric dimension different from topological dimension. - -- PETSc {ref}`example-petsc-bps`: - - - {file}`examples/petsc/bpsraw.c` (formerly `bps.c`): transparent CUDA support. - - {file}`examples/petsc/bps.c` (formerly `bpsdmplex.c`): performance improvements - and transparent CUDA support. - - {ref}`example-petsc-bps-sphere` ({file}`examples/petsc/bpssphere.c`): - generalizations of all CEED BPs to the surface of the sphere; demonstrates geometric - dimension different from topological dimension. - -- {ref}`example-petsc-multigrid` ({file}`examples/petsc/multigrid.c`): new p-multigrid - solver with algebraic multigrid coarse solve. - -- {ref}`example-petsc-navier-stokes` ({file}`examples/fluids/navierstokes.c`; formerly - `examples/navier-stokes`): unstructured grid support (using PETSc's `DMPlex`), - implicit time integration, SU/SUPG stabilization, free-slip boundary conditions, and - quasi-2D computational domain support. - -- {ref}`example-petsc-elasticity` ({file}`examples/solids/elasticity.c`): new solver for - linear elasticity, small-strain hyperelasticity, and globalized finite-strain - hyperelasticity using p-multigrid with algebraic multigrid coarse solve. - -(v0-5)= - -## v0.5 (Sep 18, 2019) - -For this release, several improvements were made. Two new CUDA backends were added to -the family of backends, of which, the new `cuda-gen` backend achieves state-of-the-art -performance using single-source {ref}`CeedQFunction`. From this release, users -can define Q-Functions in a single source code independently of the targeted backend -with the aid of a new macro `CEED QFUNCTION` to support JIT (Just-In-Time) and CPU -compilation of the user provided {ref}`CeedQFunction` code. To allow a unified -declaration, the {ref}`CeedQFunction` API has undergone a slight change: -the `QFunctionField` parameter `ncomp` has been changed to `size`. This change -requires setting the previous value of `ncomp` to `ncomp*dim` when adding a -`QFunctionField` with eval mode `CEED EVAL GRAD`. - -Additionally, new CPU backends -were included in this release, such as the `/cpu/self/opt/*` backends (which are -written in pure C and use partial **E-vectors** to improve performance) and the -`/cpu/self/ref/memcheck` backend (which relies upon the -[Valgrind](http://valgrind.org/) Memcheck tool to help verify that user -{ref}`CeedQFunction` have no undefined values). -This release also included various performance improvements, bug fixes, new examples, -and improved tests. Among these improvements, vectorized instructions for -{ref}`CeedQFunction` code compiled for CPU were enhanced by using `CeedPragmaSIMD` -instead of `CeedPragmaOMP`, implementation of a {ref}`CeedQFunction` gallery and -identity Q-Functions were introduced, and the PETSc benchmark problems were expanded -to include unstructured meshes handling were. For this expansion, the prior version of -the PETSc BPs, which only included data associated with structured geometries, were -renamed `bpsraw`, and the new version of the BPs, which can handle data associated -with any unstructured geometry, were called `bps`. Additionally, other benchmark -problems, namely BP2 and BP4 (the vector-valued versions of BP1 and BP3, respectively), -and BP5 and BP6 (the collocated versions---for which the quadrature points are the same -as the Gauss Lobatto nodes---of BP3 and BP4 respectively) were added to the PETSc -examples. Furthermoew, another standalone libCEED example, called `ex2`, which -computes the surface area of a given mesh was added to this release. - -Backends available in this release: - -| CEED resource (`-ceed`) | Backend | -|--------------------------|-----------------------------------------------------| -| `/cpu/self/ref/serial` | Serial reference implementation | -| `/cpu/self/ref/blocked` | Blocked reference implementation | -| `/cpu/self/ref/memcheck` | Memcheck backend, undefined value checks | -| `/cpu/self/opt/serial` | Serial optimized C implementation | -| `/cpu/self/opt/blocked` | Blocked optimized C implementation | -| `/cpu/self/avx/serial` | Serial AVX implementation | -| `/cpu/self/avx/blocked` | Blocked AVX implementation | -| `/cpu/self/xsmm/serial` | Serial LIBXSMM implementation | -| `/cpu/self/xsmm/blocked` | Blocked LIBXSMM implementation | -| `/cpu/occa` | Serial OCCA kernels | -| `/gpu/occa` | CUDA OCCA kernels | -| `/omp/occa` | OpenMP OCCA kernels | -| `/ocl/occa` | OpenCL OCCA kernels | -| `/gpu/cuda/ref` | Reference pure CUDA kernels | -| `/gpu/cuda/reg` | Pure CUDA kernels using one thread per element | -| `/gpu/cuda/shared` | Optimized pure CUDA kernels using shared memory | -| `/gpu/cuda/gen` | Optimized pure CUDA kernels using code generation | -| `/gpu/magma` | CUDA MAGMA kernels | - -Examples available in this release: - -:::{list-table} -:header-rows: 1 -:widths: auto -* - User code - - Example -* - `ceed` - - * ex1 (volume) - * ex2 (surface) -* - `mfem` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) -* - `petsc` - - * BP1 (scalar mass operator) - * BP2 (vector mass operator) - * BP3 (scalar Laplace operator) - * BP4 (vector Laplace operator) - * BP5 (collocated scalar Laplace operator) - * BP6 (collocated vector Laplace operator) - * Navier-Stokes -* - `nek5000` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) -::: - -(v0-4)= - -## v0.4 (Apr 1, 2019) - -libCEED v0.4 was made again publicly available in the second full CEED software -distribution, release CEED 2.0. This release contained notable features, such as -four new CPU backends, two new GPU backends, CPU backend optimizations, initial -support for operator composition, performance benchmarking, and a Navier-Stokes demo. -The new CPU backends in this release came in two families. The `/cpu/self/*/serial` -backends process one element at a time and are intended for meshes with a smaller number -of high order elements. The `/cpu/self/*/blocked` backends process blocked batches of -eight interlaced elements and are intended for meshes with higher numbers of elements. -The `/cpu/self/avx/*` backends rely upon AVX instructions to provide vectorized CPU -performance. The `/cpu/self/xsmm/*` backends rely upon the -[LIBXSMM](http://github.com/hfp/libxsmm) package to provide vectorized CPU -performance. The `/gpu/cuda/*` backends provide GPU performance strictly using CUDA. -The `/gpu/cuda/ref` backend is a reference CUDA backend, providing reasonable -performance for most problem configurations. The `/gpu/cuda/reg` backend uses a simple -parallelization approach, where each thread treats a finite element. Using just in time -compilation, provided by nvrtc (NVidia Runtime Compiler), and runtime parameters, this -backend unroll loops and map memory address to registers. The `/gpu/cuda/reg` backend -achieve good peak performance for 1D, 2D, and low order 3D problems, but performance -deteriorates very quickly when threads run out of registers. - -A new explicit time-stepping Navier-Stokes solver was added to the family of libCEED -examples in the `examples/petsc` directory (see {ref}`example-petsc-navier-stokes`). -This example solves the time-dependent Navier-Stokes equations of compressible gas -dynamics in a static Eulerian three-dimensional frame, using structured high-order -finite/spectral element spatial discretizations and explicit high-order time-stepping -(available in PETSc). Moreover, the Navier-Stokes example was developed using PETSc, -so that the pointwise physics (defined at quadrature points) is separated from the -parallelization and meshing concerns. - -Backends available in this release: - -| CEED resource (`-ceed`) | Backend | -|--------------------------|-----------------------------------------------------| -| `/cpu/self/ref/serial` | Serial reference implementation | -| `/cpu/self/ref/blocked` | Blocked reference implementation | -| `/cpu/self/tmpl` | Backend template, defaults to `/cpu/self/blocked` | -| `/cpu/self/avx/serial` | Serial AVX implementation | -| `/cpu/self/avx/blocked` | Blocked AVX implementation | -| `/cpu/self/xsmm/serial` | Serial LIBXSMM implementation | -| `/cpu/self/xsmm/blocked` | Blocked LIBXSMM implementation | -| `/cpu/occa` | Serial OCCA kernels | -| `/gpu/occa` | CUDA OCCA kernels | -| `/omp/occa` | OpenMP OCCA kernels | -| `/ocl/occa` | OpenCL OCCA kernels | -| `/gpu/cuda/ref` | Reference pure CUDA kernels | -| `/gpu/cuda/reg` | Pure CUDA kernels using one thread per element | -| `/gpu/magma` | CUDA MAGMA kernels | - -Examples available in this release: - -:::{list-table} -:header-rows: 1 -:widths: auto -* - User code - - Example -* - `ceed` - - * ex1 (volume) -* - `mfem` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) -* - `petsc` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) - * Navier-Stokes -* - `nek5000` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) -::: - -(v0-3)= - -## v0.3 (Sep 30, 2018) - -Notable features in this release include active/passive field interface, support for -non-tensor bases, backend optimization, and improved Fortran interface. This release -also focused on providing improved continuous integration, and many new tests with code -coverage reports of about 90%. This release also provided a significant change to the -public interface: a {ref}`CeedQFunction` can take any number of named input and output -arguments while {ref}`CeedOperator` connects them to the actual data, which may be -supplied explicitly to `CeedOperatorApply()` (active) or separately via -`CeedOperatorSetField()` (passive). This interface change enables reusable libraries -of CeedQFunctions and composition of block solvers constructed using -{ref}`CeedOperator`. A concept of blocked restriction was added to this release and -used in an optimized CPU backend. Although this is typically not visible to the user, -it enables effective use of arbitrary-length SIMD while maintaining cache locality. -This CPU backend also implements an algebraic factorization of tensor product gradients -to perform fewer operations than standard application of interpolation and -differentiation from nodes to quadrature points. This algebraic formulation -automatically supports non-polynomial and non-interpolatory bases, thus is more general -than the more common derivation in terms of Lagrange polynomials on the quadrature points. - -Backends available in this release: - -| CEED resource (`-ceed`) | Backend | -|-------------------------|-----------------------------------------------------| -| `/cpu/self/blocked` | Blocked reference implementation | -| `/cpu/self/ref` | Serial reference implementation | -| `/cpu/self/tmpl` | Backend template, defaults to `/cpu/self/blocked` | -| `/cpu/occa` | Serial OCCA kernels | -| `/gpu/occa` | CUDA OCCA kernels | -| `/omp/occa` | OpenMP OCCA kernels | -| `/ocl/occa` | OpenCL OCCA kernels | -| `/gpu/magma` | CUDA MAGMA kernels | - -Examples available in this release: - -:::{list-table} -:header-rows: 1 -:widths: auto -* - User code - - Example -* - `ceed` - - * ex1 (volume) -* - `mfem` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) -* - `petsc` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) -* - `nek5000` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) -::: - -(v0-21)= - -## v0.21 (Sep 30, 2018) - -A MAGMA backend (which relies upon the -[MAGMA](https://bitbucket.org/icl/magma) package) was integrated in libCEED for this -release. This initial integration set up the framework of using MAGMA and provided the -libCEED functionality through MAGMA kernels as one of libCEED’s computational backends. -As any other backend, the MAGMA backend provides extended basic data structures for -{ref}`CeedVector`, {ref}`CeedElemRestriction`, and {ref}`CeedOperator`, and implements -the fundamental CEED building blocks to work with the new data structures. -In general, the MAGMA-specific data structures keep the libCEED pointers to CPU data -but also add corresponding device (e.g., GPU) pointers to the data. Coherency is handled -internally, and thus seamlessly to the user, through the functions/methods that are -provided to support them. - -Backends available in this release: - -| CEED resource (`-ceed`) | Backend | -|-------------------------|---------------------------------| -| `/cpu/self` | Serial reference implementation | -| `/cpu/occa` | Serial OCCA kernels | -| `/gpu/occa` | CUDA OCCA kernels | -| `/omp/occa` | OpenMP OCCA kernels | -| `/ocl/occa` | OpenCL OCCA kernels | -| `/gpu/magma` | CUDA MAGMA kernels | - -Examples available in this release: - -:::{list-table} -:header-rows: 1 -:widths: auto -* - User code - - Example -* - `ceed` - - * ex1 (volume) -* - `mfem` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) -* - `petsc` - - * BP1 (scalar mass operator) -* - `nek5000` - - * BP1 (scalar mass operator) -::: - -(v0-2)= - -## v0.2 (Mar 30, 2018) - -libCEED was made publicly available the first full CEED software distribution, release -CEED 1.0. The distribution was made available using the Spack package manager to provide -a common, easy-to-use build environment, where the user can build the CEED distribution -with all dependencies. This release included a new Fortran interface for the library. -This release also contained major improvements in the OCCA backend (including a new -`/ocl/occa` backend) and new examples. The standalone libCEED example was modified to -compute the volume volume of a given mesh (in 1D, 2D, or 3D) and placed in an -`examples/ceed` subfolder. A new `mfem` example to perform BP3 (with the application -of the Laplace operator) was also added to this release. - -Backends available in this release: - -| CEED resource (`-ceed`) | Backend | -|-------------------------|---------------------------------| -| `/cpu/self` | Serial reference implementation | -| `/cpu/occa` | Serial OCCA kernels | -| `/gpu/occa` | CUDA OCCA kernels | -| `/omp/occa` | OpenMP OCCA kernels | -| `/ocl/occa` | OpenCL OCCA kernels | - -Examples available in this release: - -:::{list-table} -:header-rows: 1 -:widths: auto -* - User code - - Example -* - `ceed` - - * ex1 (volume) -* - `mfem` - - * BP1 (scalar mass operator) - * BP3 (scalar Laplace operator) -* - `petsc` - - * BP1 (scalar mass operator) -* - `nek5000` - - * BP1 (scalar mass operator) -::: - -(v0-1)= - -## v0.1 (Jan 3, 2018) - -Initial low-level API of the CEED project. The low-level API provides a set of Finite -Elements kernels and components for writing new low-level kernels. Examples include: -vector and sparse linear algebra, element matrix assembly over a batch of elements, -partial assembly and action for efficient high-order operators like mass, diffusion, -advection, etc. The main goal of the low-level API is to establish the basis for the -high-level API. Also, identifying such low-level kernels and providing a reference -implementation for them serves as the basis for specialized backend implementations. -This release contained several backends: `/cpu/self`, and backends which rely upon the -[OCCA](http://github.com/libocca/occa) package, such as `/cpu/occa`, -`/gpu/occa`, and `/omp/occa`. -It also included several examples, in the `examples` folder: -A standalone code that shows the usage of libCEED (with no external -dependencies) to apply the Laplace operator, `ex1`; an `mfem` example to perform BP1 -(with the application of the mass operator); and a `petsc` example to perform BP1 -(with the application of the mass operator). - -Backends available in this release: - -| CEED resource (`-ceed`) | Backend | -|-------------------------|---------------------------------| -| `/cpu/self` | Serial reference implementation | -| `/cpu/occa` | Serial OCCA kernels | -| `/gpu/occa` | CUDA OCCA kernels | -| `/omp/occa` | OpenMP OCCA kernels | - -Examples available in this release: - -| User code | Example | -|-----------------------|-----------------------------------| -| `ceed` | ex1 (scalar Laplace operator) | -| `mfem` | BP1 (scalar mass operator) | -| `petsc` | BP1 (scalar mass operator) | -``` diff --git a/doc/sphinx/source/CHANGELOG.md b/doc/sphinx/source/CHANGELOG.md new file mode 120000 index 0000000000..79b747aee1 --- /dev/null +++ b/doc/sphinx/source/CHANGELOG.md @@ -0,0 +1 @@ +../../../CHANGELOG.md \ No newline at end of file From 16c385245f2ea1021fe618f734db81d5d42180c4 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Wed, 3 Jun 2026 10:42:06 -0600 Subject: [PATCH 3/3] doc - fix missing docstring args --- interface/ceed-preconditioning.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 552d1bc760..d3376aec0f 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -562,10 +562,12 @@ int CeedOperatorLinearAssembleGetNumEntries(CeedOperator op, CeedSize *num_entri Users should generally use @ref CeedOperatorLinearAssembleSymbolic(). - @param[in] op `CeedOperator` to assemble nonzero pattern - @param[in] offset Offset for number of entries - @param[out] rows Row number for each entry - @param[out] cols Column number for each entry + @param[in] op `CeedOperator` to assemble nonzero pattern + @param[in] offset Offset for number of entries + @param[in] elem_rstr_in `CeedElemRestriction` for current active input field + @param[in] elem_rstr_out `CeedElemRestriction` for current active output field + @param[out] rows Row number for each entry + @param[out] cols Column number for each entry @return An error code: 0 - success, otherwise - failure @@ -670,7 +672,7 @@ static int CeedOperatorAssembleSymbolicSingleBlock(CeedOperator op, CeedSize off } /** - @brief Build nonzero pattern for non-composite CeedOperator`. + @brief Build nonzero pattern for non-composite `CeedOperator`. Users should generally use @ref CeedOperatorLinearAssembleSymbolic(). @@ -829,9 +831,11 @@ int CeedOperatorLinearAssembleQFunctionBuildOrUpdateFallback(CeedOperator op, bo Users should generally use @ref CeedOperatorLinearAssemble(). - @param[in] op `CeedOperator` to assemble - @param[in] offset Offset for number of entries - @param[out] values Values to assemble into matrix + @param[in] op `CeedOperator` to assemble + @param[in] offset Offset for number of entries + @param[in] active_input Index of active input to assemble for + @param[in] active_output Index of active output to assemble for + @param[out] values Values to assemble into matrix @return An error code: 0 - success, otherwise - failure