diff --git a/.github/workflows/testing-linux.yml b/.github/workflows/testing-linux.yml index 5837342ecc1..8d85e22449d 100644 --- a/.github/workflows/testing-linux.yml +++ b/.github/workflows/testing-linux.yml @@ -6,6 +6,14 @@ on: - 'develop' pull_request: +env: + LIBXS_GIT_REPOSITORY: https://github.com/hfp/libxs.git + LIBXS_GIT_TAG: 166f48e0172475f4db59950d1447525994bbf293 + LIBXSTREAM_GIT_REPOSITORY: https://github.com/hfp/libxstream.git + LIBXSTREAM_GIT_TAG: f307d372dfe1eaba672dab5b0d9692d8a0f5c57f + LIBXSMM_GIT_REPOSITORY: https://github.com/libxsmm/libxsmm.git + LIBXSMM_GIT_TAG: cdeedf76ddd30e0dfb2527f6e1537d2c35a07596 + jobs: ################################################################################## # Run pre-commit @@ -33,7 +41,7 @@ jobs: matrix: use_mpi: [MPI=ON, MPI=OFF] use_openmp: [OPENMP=ON, OPENMP=OFF] - use_smm: [LIBXSMM=ON, LIBXSMM=OFF] + smm_backend: [BLAS, LIBXS, LIBXSMM] mpi_suffix: [openmpi, mpich] exclude: - use_mpi: MPI=OFF @@ -45,19 +53,69 @@ jobs: fetch-depth: 0 submodules: true + - name: Install libxs packages + if: matrix.smm_backend != 'BLAS' + run: | + set -eu + prefix="${GITHUB_WORKSPACE}/deps/install" + mkdir -p "${prefix}" deps + + fetch_and_install() { + name="$1" + repo="$2" + tag="$3" + shift 3 + git clone "${repo}" "deps/${name}-src" + git -C "deps/${name}-src" checkout "${tag}" + cmake -G Ninja \ + -S "deps/${name}-src" \ + -B "deps/${name}-build" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX="${prefix}" \ + -DCMAKE_PREFIX_PATH="${prefix}" \ + "$@" + cmake --build "deps/${name}-build" --target install + } + + fetch_and_install libxs \ + "${LIBXS_GIT_REPOSITORY}" \ + "${LIBXS_GIT_TAG}" \ + -DLIBXS_FORTRAN=ON + + if [ "${{ matrix.smm_backend }}" = "LIBXSMM" ]; then + fetch_and_install libxsmm \ + "${LIBXSMM_GIT_REPOSITORY}" \ + "${LIBXSMM_GIT_TAG}" \ + -DXSMM_STATIC=ON + fi + + echo "CMAKE_PREFIX_PATH=${prefix}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}}" >> "${GITHUB_ENV}" + echo "PKG_CONFIG_PATH=${prefix}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}" >> "${GITHUB_ENV}" + - name: Configure run: | mkdir -p build cd build + case "${{ matrix.smm_backend }}" in + BLAS) + smm_flags="-DUSE_LIBXS=OFF -DUSE_LIBXSMM=OFF" + ;; + LIBXS) + smm_flags="-DUSE_LIBXS=ON -DUSE_LIBXSMM=OFF" + ;; + LIBXSMM) + smm_flags="-DUSE_LIBXS=ON -DUSE_LIBXSMM=ON" + ;; + esac cmake -G Ninja \ -DCMAKE_BUILD_TYPE=Coverage \ -DBUILD_TESTING=ON \ -DUSE_${{ matrix.use_mpi }} \ -DUSE_${{ matrix.use_openmp }} \ - -DUSE_${{ matrix.use_smm }} \ + ${smm_flags} \ -DMPI_EXECUTABLE_SUFFIX=.${{ matrix.mpi_suffix }} \ -DMPIEXEC_PREFLAGS="$([ "${{ matrix.mpi_suffix }}" = "openmpi" ] && echo "-mca btl ^openib --allow-run-as-root --oversubscribe")" \ - -DLCOV_ARGS="--test-name;${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.use_smm }}-cpu" \ + -DLCOV_ARGS="--test-name;${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.smm_backend }}-cpu" \ -DTEST_MPI_RANKS=auto \ .. @@ -72,19 +130,19 @@ jobs: - name: Generate coverage info run: | cmake --build build -- cov-info - mv build/coverage.info build/coverage-Linux-${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.use_smm }}-cpu.info + mv build/coverage.info build/coverage-Linux-${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.smm_backend }}-cpu.info - name: Upload coverage data uses: actions/upload-artifact@v4 with: - name: coverage-data-${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.use_smm }}-${{ matrix.mpi_suffix }} + name: coverage-data-${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.smm_backend }}-${{ matrix.mpi_suffix }} path: build/coverage-*.info - name: Upload coverage data (generated files) uses: actions/upload-artifact@v4 - if: matrix.use_mpi == 'MPI=ON' && matrix.use_openmp == 'OPENMP=ON' && matrix.use_smm == 'LIBXSMM=OFF' && matrix.mpi_suffix == 'openmpi' + if: matrix.use_mpi == 'MPI=ON' && matrix.use_openmp == 'OPENMP=ON' && matrix.smm_backend == 'BLAS' && matrix.mpi_suffix == 'openmpi' with: - name: coverage-data-${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.use_smm }}-${{ matrix.mpi_suffix }}-generated-files + name: coverage-data-${{ matrix.use_mpi }}-${{ matrix.use_openmp }}-${{ matrix.smm_backend }}-${{ matrix.mpi_suffix }}-generated-files path: | build/src/dbcsr.h build/src/tensors/dbcsr_tensor.h @@ -137,7 +195,7 @@ jobs: strategy: matrix: use_openmp: [OPENMP=ON] - use_smm: [LIBXSMM=ON] + use_libxsmm: [LIBXSMM=ON] steps: - uses: actions/checkout@v4 @@ -145,6 +203,45 @@ jobs: fetch-depth: 0 submodules: true + - name: Install libxs packages + run: | + set -eu + prefix="${GITHUB_WORKSPACE}/deps/install" + mkdir -p "${prefix}" deps + + fetch_and_install() { + name="$1" + repo="$2" + tag="$3" + shift 3 + git clone "${repo}" "deps/${name}-src" + git -C "deps/${name}-src" checkout "${tag}" + cmake -G Ninja \ + -S "deps/${name}-src" \ + -B "deps/${name}-build" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX="${prefix}" \ + -DCMAKE_PREFIX_PATH="${prefix}" \ + "$@" + cmake --build "deps/${name}-build" --target install + } + + fetch_and_install libxs \ + "${LIBXS_GIT_REPOSITORY}" \ + "${LIBXS_GIT_TAG}" \ + -DLIBXS_FORTRAN=ON + fetch_and_install libxsmm \ + "${LIBXSMM_GIT_REPOSITORY}" \ + "${LIBXSMM_GIT_TAG}" \ + -DXSMM_STATIC=ON + fetch_and_install libxstream \ + "${LIBXSTREAM_GIT_REPOSITORY}" \ + "${LIBXSTREAM_GIT_TAG}" \ + -DLIBXSTREAM_SHARED=OFF + + echo "CMAKE_PREFIX_PATH=${prefix}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}}" >> "${GITHUB_ENV}" + echo "PKG_CONFIG_PATH=${prefix}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}" >> "${GITHUB_ENV}" + - name: Configure run: | mkdir -p build @@ -153,7 +250,8 @@ jobs: -DCMAKE_BUILD_TYPE=Debug \ -DBUILD_TESTING=ON \ -DUSE_${{ matrix.use_openmp }} \ - -DUSE_${{ matrix.use_smm }} \ + -DUSE_LIBXS=ON \ + -DUSE_${{ matrix.use_libxsmm }} \ -DUSE_ACCEL=opencl \ -DWITH_EXAMPLES=ON \ .. @@ -217,7 +315,15 @@ jobs: run: | mkdir -p build/src mv dbcsr.h tensors build/src/ - echo *.info | xargs printf -- '-a %s\n' | xargs lcov -o merged.info + echo *.info | xargs printf -- '-a %s\n' | xargs lcov -o merged.raw.info + + lcov --remove merged.raw.info \ + '*/libxs_procs.fi' \ + '*/libxs_jit.F' \ + '*/deps/install/*' \ + '*/_deps/*' \ + -o merged.info + genhtml merged.info -o htmlcov lcov --summary merged.info diff --git a/.github/workflows/testing-macos.yml b/.github/workflows/testing-macos.yml index 712540b099f..27dd9ca19f7 100644 --- a/.github/workflows/testing-macos.yml +++ b/.github/workflows/testing-macos.yml @@ -14,7 +14,6 @@ jobs: matrix: use_mpi: [MPI=ON] use_openmp: [OPENMP=ON] - use_smm: [SMM=blas] blas_impl: [accelerate,openblas] mpi_suffix: [mpich] # Brew openmpi doesn't provide mpi.mod @@ -44,7 +43,6 @@ jobs: -DBUILD_TESTING=ON \ -DUSE_${{ matrix.use_mpi }} \ -DUSE_${{ matrix.use_openmp }} \ - -DUSE_${{ matrix.use_smm }} \ $([ "${{ matrix.blas_impl }}" = "openblas" ] && echo '-DCMAKE_PREFIX_PATH=/usr/local/opt/openblas') \ -DMPIEXEC_PREFLAGS="$([ "${{ matrix.mpi_suffix }}" = "openmpi" ] && echo "-mca btl ^openib --allow-run-as-root")" \ -DTEST_MPI_RANKS=auto \ diff --git a/CMakeLists.txt b/CMakeLists.txt index d2744f6c431..ecef513a581 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,8 @@ cmake_minimum_required(VERSION 3.22) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION FALSE FORCE) # include our cmake snippets -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake + ${CMAKE_CURRENT_SOURCE_DIR}/src/cmake) # DBCSR's source directory set(DBCSR_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) @@ -20,6 +21,7 @@ endif () # ================================================================================================= # PROJECT AND VERSION include(GetGitRevisionDescription) +include(FetchContent) git_describe(GIT_DESC) @@ -64,7 +66,7 @@ project( dbcsr DESCRIPTION "DBCSR: Distributed Block Compressed Sparse Row matrix library (https://dbcsr.cp2k.org)" -) + LANGUAGES Fortran C CXX) set(dbcsr_VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}${VERSION_GIT}) set(dbcsr_APIVERSION ${VERSION_MAJOR}.${VERSION_MINOR}) @@ -73,11 +75,22 @@ set(dbcsr_APIVERSION ${VERSION_MAJOR}.${VERSION_MINOR}) # OPTIONS include(CMakeDependentOption) -option(BUILD_SHARED_LIBS "Build shared libraries" ON) +option(BUILD_SHARED_LIBS "Build shared libraries" OFF) option(USE_OPENMP "Build with OpenMP support" ON) -option(USE_MPI "Build with MPI support" ON) -option(USE_MPI_F08 "Build with the mpi_f08 module support" OFF) +option(USE_MPI "Build with MPI support" OFF) option(BUILD_TESTING "Build dbcsr unit tests" OFF) +option( + USE_LIBXS + "Use LIBXS for host-side Small Matrix Multiplication; required for OpenCL backend" + OFF) + +option(WITH_CUDA_PROFILING "Enable profiling within CUDA" OFF) +option(WITH_HIP_PROFILING "Enable profiling within HIP" OFF) + +cmake_dependent_option(USE_LIBXSMM "Use LIBXSMM for JIT-compiled GEMM kernels" + OFF "USE_LIBXS" OFF) +cmake_dependent_option(USE_MPI_F08 "Build with the mpi_f08 module support" OFF + "USE_MPI" OFF) # MPI_F08 is a module included in MPI cmake_dependent_option( WITH_C_API "Build the C API (ISO_C_BINDINGS)" ON "USE_MPI" OFF )# the ISO_C_BINDINGS require MPI unconditionally @@ -92,11 +105,27 @@ set(TEST_OMP_THREADS CACHE STRING "Number of OpenMP threads for testing") set(USE_ACCEL - "" - CACHE STRING "Build with acceleration support (default: none)") -set_property(CACHE USE_ACCEL PROPERTY STRINGS "" opencl cuda hip) + "none" + CACHE + STRING + "Build with acceleration support (none (default value), opencl, cuda, hip)" +) +set(_DBCSR_SUPPORTED_ACCEL_VALUES none opencl cuda hip) +set_property(CACHE USE_ACCEL PROPERTY STRINGS ${_DBCSR_SUPPORTED_ACCEL_VALUES}) string(TOLOWER "${USE_ACCEL}" USE_ACCEL) +if (NOT USE_ACCEL IN_LIST _DBCSR_SUPPORTED_ACCEL_VALUES) + message( + FATAL_ERROR + "Unsupported USE_ACCEL='${USE_ACCEL}'. Supported values are: ${_DBCSR_SUPPORTED_ACCEL_VALUES}" + ) +endif () + +set(DBCSR_WITH_ACCEL OFF) +if (NOT USE_ACCEL STREQUAL "none") + set(DBCSR_WITH_ACCEL ON) +endif () + set(SUPPORTED_CUDA_ARCHITECTURES K20X K40 @@ -106,24 +135,34 @@ set(SUPPORTED_CUDA_ARCHITECTURES A100 H100) set(SUPPORTED_HIP_ARCHITECTURES Mi50 Mi100 Mi250 Mi300 Mi350) + set(WITH_GPU - $,"","P100"> + "P100" CACHE STRING "Select GPU arch. and embed parameters (default: CUDA/HIP=P100, OPENCL=all)" ) -set(WITH_GPU_PARAMS "${WITH_GPU}") set_property(CACHE WITH_GPU PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES} ${SUPPORTED_HIP_ARCHITECTURES}) -option(WITH_CUDA_PROFILING "Enable profiling within CUDA" OFF) -option(WITH_HIP_PROFILING "Enable profiling within HIP" OFF) +# Override default for OpenCL: empty string means "all" +if (NOT DEFINED WITH_GPU AND USE_ACCEL STREQUAL "opencl") + set(WITH_GPU + "" + CACHE + STRING + "Select GPU arch. and embed parameters (default: CUDA/HIP=P100, OPENCL=all)" + FORCE) +endif () + +set(WITH_GPU_PARAMS "${WITH_GPU}") +set_property(CACHE WITH_GPU PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES} + ${SUPPORTED_HIP_ARCHITECTURES}) # ================================================================================================= # LANGUAGES AND TESTING -enable_language(Fortran) -if ((WITH_C_API AND WITH_EXAMPLES) OR (NOT USE_ACCEL MATCHES "none")) +if ((WITH_C_API AND WITH_EXAMPLES) OR DBCSR_WITH_ACCEL) enable_language(CXX) enable_language(C) @@ -136,7 +175,6 @@ if ((WITH_C_API AND WITH_EXAMPLES) OR (NOT USE_ACCEL MATCHES "none")) set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED ON) endif () - endif () # =================================== OpenMP @@ -149,155 +187,24 @@ endif () # they can be linked into shared libraries. set(CMAKE_POSITION_INDEPENDENT_CODE ON) -# LIBXS provides host-side batched GEMM for all backends. LIBXSMM provides JIT -# kernels within LIBXS (optional, auto-detected). Set LIBXSROOT to use a -# prebuilt installation, or let FetchContent download it. Set USE_LIBXS=OFF to -# disable entirely. -include(FetchContent) -include(cmake/dependencies.cmake) -option(USE_LIBXS "Use LIBXS for host-side Small Matrix Multiplication" ON) -find_package(PkgConfig QUIET) -set(LIBXSROOT - "" - CACHE PATH "Root directory of a prebuilt LIBXS (skips FetchContent)") if (USE_LIBXS) - # Try prebuilt: pkg-config > explicit root > path probing - if (NOT LIBXSROOT AND PkgConfig_FOUND) - pkg_check_modules(_LIBXS QUIET libxs) - if (_LIBXS_FOUND) - pkg_get_variable(LIBXSROOT libxs prefix) - endif () - endif () - if (NOT LIBXSROOT) - foreach (_dir "${CMAKE_SOURCE_DIR}/../libxs" "$ENV{HOME}/libxs" - "/opt/libxs") - if (EXISTS "${_dir}/include/libxs.f") - set(LIBXSROOT "${_dir}") - break() + find_package(libxs CONFIG REQUIRED) +endif () + +if (USE_LIBXSMM) + find_package(libxsmm CONFIG QUIET) + if (NOT libxsmm_FOUND) + find_package(PkgConfig QUIET) + if (PkgConfig_FOUND) + pkg_check_modules(DBCSR_LIBXSMM_PC QUIET IMPORTED_TARGET GLOBAL libxsmm) + if (DBCSR_LIBXSMM_PC_FOUND AND NOT TARGET libxsmm::libxsmm) + add_library(libxsmm::libxsmm ALIAS PkgConfig::DBCSR_LIBXSMM_PC) + set(libxsmm_FOUND TRUE) endif () - endforeach () - endif () - if (LIBXSROOT AND EXISTS "${LIBXSROOT}/include/libxs.f") - set(_dbcsr_suffixes_save ${CMAKE_FIND_LIBRARY_SUFFIXES}) - if (BUILD_SHARED_LIBS) - set(CMAKE_FIND_LIBRARY_SUFFIXES .so .dylib) - else () - set(CMAKE_FIND_LIBRARY_SUFFIXES .a) - endif () - find_library( - LIBXS_LIBRARY - NAMES xs - PATHS "${LIBXSROOT}/lib" - NO_DEFAULT_PATH) - if (NOT LIBXS_LIBRARY) - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_dbcsr_suffixes_save}) - find_library( - LIBXS_LIBRARY - NAMES xs - PATHS "${LIBXSROOT}/lib" - NO_DEFAULT_PATH) - endif () - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_dbcsr_suffixes_save}) - if (LIBXS_LIBRARY) - message(STATUS "Using prebuilt LIBXS from ${LIBXSROOT}") - else () - message( - STATUS "LIBXS source found at ${LIBXSROOT} -- building as subdirectory") - set(LIBXS_FORTRAN - ON - CACHE BOOL "" FORCE) - set(LIBXS_SHARED - ${BUILD_SHARED_LIBS} - CACHE BOOL "" FORCE) - add_subdirectory("${LIBXSROOT}" "${CMAKE_BINARY_DIR}/_deps/libxs-build") - set(LIBXS_FETCHED TRUE) endif () - else () - message(STATUS "LIBXS not found locally -- downloading via FetchContent") - FetchContent_Declare( - libxs - GIT_REPOSITORY ${LIBXS_GIT_REPOSITORY} - GIT_TAG ${LIBXS_GIT_TAG}) - set(LIBXS_FORTRAN - ON - CACHE BOOL "" FORCE) - set(LIBXS_SHARED - ${BUILD_SHARED_LIBS} - CACHE BOOL "" FORCE) - FetchContent_MakeAvailable(libxs) - set(LIBXS_FETCHED TRUE) - set(LIBXSROOT "${libxs_SOURCE_DIR}") endif () - # Optional LIBXSMM for JIT-compiled GEMM kernels - option(USE_LIBXSMM "Use LIBXSMM for JIT-compiled GEMM kernels" ON) - if (USE_LIBXSMM) - set(LIBXSMMROOT - "" - CACHE PATH "Root directory of LIBXSMM (optional)") - if (NOT LIBXSMMROOT) - foreach (_dir "${LIBXSROOT}/../libxsmm" "$ENV{HOME}/libxsmm") - if (EXISTS "${_dir}/include/libxsmm.h") - set(LIBXSMMROOT "${_dir}") - break() - endif () - endforeach () - endif () - if (LIBXSMMROOT AND EXISTS "${LIBXSMMROOT}/include/libxsmm.h") - set(_dbcsr_suffixes_save ${CMAKE_FIND_LIBRARY_SUFFIXES}) - if (BUILD_SHARED_LIBS) - set(CMAKE_FIND_LIBRARY_SUFFIXES .so .dylib) - else () - set(CMAKE_FIND_LIBRARY_SUFFIXES .a) - endif () - find_library( - LIBXSMM_LIBRARY - NAMES xsmm - PATHS "${LIBXSMMROOT}/lib" - NO_DEFAULT_PATH) - if (NOT LIBXSMM_LIBRARY) - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_dbcsr_suffixes_save}) - find_library( - LIBXSMM_LIBRARY - NAMES xsmm - PATHS "${LIBXSMMROOT}/lib" - NO_DEFAULT_PATH) - endif () - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_dbcsr_suffixes_save}) - if (LIBXSMM_LIBRARY) - message(STATUS "Using prebuilt LIBXSMM from ${LIBXSMMROOT}") - else () - message( - STATUS - "LIBXSMM source found at ${LIBXSMMROOT} -- building as subdirectory" - ) - set(XSMM_STATIC - ON - CACHE BOOL "" FORCE) - add_subdirectory("${LIBXSMMROOT}" - "${CMAKE_BINARY_DIR}/_deps/libxsmm-build") - set(LIBXSMM_FETCHED TRUE) - get_target_property(_xsmm_srcs xsmm SOURCES) - list(FILTER _xsmm_srcs EXCLUDE REGEX "binaryexport_generator") - set_target_properties(xsmm PROPERTIES SOURCES "${_xsmm_srcs}") - endif () - else () - message( - STATUS "LIBXSMM not found locally -- downloading via FetchContent") - FetchContent_Declare( - libxsmm - GIT_REPOSITORY "https://github.com/libxsmm/libxsmm.git" - GIT_TAG ${LIBXSMM_GIT_TAG}) - set(XSMM_STATIC - ON - CACHE BOOL "" FORCE) - FetchContent_MakeAvailable(libxsmm) - set(LIBXSMMROOT "${libxsmm_SOURCE_DIR}") - set(LIBXSMM_FETCHED TRUE) - get_target_property(_xsmm_srcs xsmm SOURCES) - list(FILTER _xsmm_srcs EXCLUDE REGEX - "(binaryexport_generator|gemm_driver)") - set_target_properties(xsmm PROPERTIES SOURCES "${_xsmm_srcs}") - endif () + if (NOT libxsmm_FOUND) + message(FATAL_ERROR "LIBXSMM not found (tried CMake config and pkg-config)") endif () endif () @@ -320,14 +227,15 @@ endif () # =================================== MPI if (USE_MPI) - get_property(REQUIRED_MPI_COMPONENTS GLOBAL PROPERTY ENABLED_LANGUAGES) + get_property(DBCSR_REQUIRED_MPI_COMPONENTS GLOBAL PROPERTY ENABLED_LANGUAGES) if (NOT CMAKE_CROSSCOMPILING) # when cross compiling, assume the users know # what they are doing set(MPI_DETERMINE_LIBRARY_VERSION TRUE) endif () + find_package( MPI - COMPONENTS ${REQUIRED_MPI_COMPONENTS} + COMPONENTS ${DBCSR_REQUIRED_MPI_COMPONENTS} REQUIRED) if (NOT MPI_Fortran_HAVE_F90_MODULE) @@ -358,103 +266,37 @@ endif () # =================================== GPU backends -if (NOT USE_ACCEL MATCHES "none") +if (DBCSR_WITH_ACCEL) set(DBCSR_ACC_HEADER acc/acc.h acc/acc_bench.h acc/acc_libsmm.h) endif () -if (USE_ACCEL MATCHES "opencl") - # Hint additional OpenCL locations (Intel oneAPI, CUDA toolkit) +if (USE_ACCEL STREQUAL "opencl") if (DEFINED ENV{CMPLR_ROOT}) list(APPEND CMAKE_PREFIX_PATH "$ENV{CMPLR_ROOT}") endif () if (DEFINED ENV{CUDA_PATH}) list(APPEND CMAKE_PREFIX_PATH "$ENV{CUDA_PATH}") endif () - find_package(OpenCL REQUIRED) - # LIBXS is required for the OpenCL backend - if (NOT LIBXS_LIBRARY AND NOT LIBXS_FETCHED) - message( - FATAL_ERROR - "LIBXS not found. OpenCL backend requires LIBXS (USE_LIBXS=ON).") - endif () - # Locate LIBXSTREAM: prebuilt > pkg-config > path probing > FetchContent - set(LIBXSTREAMROOT - "" - CACHE PATH "Root directory of a prebuilt LIBXSTREAM (skips FetchContent)") - if (NOT LIBXSTREAMROOT AND PkgConfig_FOUND) - pkg_check_modules(_LIBXSTREAM QUIET libxstream) - if (_LIBXSTREAM_FOUND) - pkg_get_variable(LIBXSTREAMROOT libxstream prefix) - endif () - endif () - if (NOT LIBXSTREAMROOT) - foreach (_dir "${CMAKE_SOURCE_DIR}/../libxstream" "$ENV{HOME}/libxstream" - "/opt/libxstream") - if (EXISTS "${_dir}/include/libxstream.h") - set(LIBXSTREAMROOT "${_dir}") - break() - endif () - endforeach () - endif () - if (LIBXSTREAMROOT AND EXISTS "${LIBXSTREAMROOT}/include/libxstream.h") - set(_dbcsr_suffixes_save ${CMAKE_FIND_LIBRARY_SUFFIXES}) - if (BUILD_SHARED_LIBS) - set(CMAKE_FIND_LIBRARY_SUFFIXES .so .dylib) - else () - set(CMAKE_FIND_LIBRARY_SUFFIXES .a) - endif () - find_library( - LIBXSTREAM_LIBRARY xstream - PATHS "${LIBXSTREAMROOT}/lib" - NO_DEFAULT_PATH) - if (NOT LIBXSTREAM_LIBRARY) - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_dbcsr_suffixes_save}) - find_library( - LIBXSTREAM_LIBRARY xstream - PATHS "${LIBXSTREAMROOT}/lib" - NO_DEFAULT_PATH) - endif () - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_dbcsr_suffixes_save}) - if (LIBXSTREAM_LIBRARY) - message(STATUS "Using prebuilt LIBXSTREAM from ${LIBXSTREAMROOT}") - else () - message( - STATUS - "LIBXSTREAM source found at ${LIBXSTREAMROOT} -- building as subdirectory" - ) - set(LIBXSROOT - "${LIBXSROOT}" - CACHE PATH "" FORCE) - set(LIBXSTREAM_SHARED - ${BUILD_SHARED_LIBS} - CACHE BOOL "" FORCE) - add_subdirectory("${LIBXSTREAMROOT}" - "${CMAKE_BINARY_DIR}/_deps/libxstream-build") - set(LIBXSTREAM_FETCHED TRUE) - endif () - else () + + # libxs is a hard dependency when opencl is on. + if (NOT USE_LIBXS) message( - STATUS "LIBXSTREAM not found locally -- downloading via FetchContent") - FetchContent_Declare( - libxstream - GIT_REPOSITORY ${LIBXSTREAM_GIT_REPOSITORY} - GIT_TAG ${LIBXSTREAM_GIT_TAG}) - set(LIBXSROOT - "${LIBXSROOT}" - CACHE PATH "" FORCE) - set(LIBXSTREAM_SHARED - ${BUILD_SHARED_LIBS} - CACHE BOOL "" FORCE) - FetchContent_MakeAvailable(libxstream) - set(LIBXSTREAMROOT "${libxstream_SOURCE_DIR}") - set(LIBXSTREAM_FETCHED TRUE) + WARNING "USE_LIBXS is not set but LIBXS is required for OpenCL backend; " + "overriding USE_LIBXS=OFF and enabling LIBXS") + find_package(libxs REQUIRED) endif () - message(STATUS "Using LIBXSTREAM from ${LIBXSTREAMROOT}") - # Kernel build script and common OpenCL headers - set(DBCSR_OPENCL_SCRIPT "${LIBXSTREAMROOT}/scripts/tool_opencl.sh") + + find_package(OpenCL REQUIRED) + find_package(libxs CONFIG REQUIRED) + find_package(libxstream CONFIG REQUIRED) + + message(STATUS "Using LIBXSTREAM") + set(DBCSR_OPENCL_SCRIPT "${LIBXSTREAM_OPENCL_SCRIPT}") + set(DBCSR_LIBXSTREAM_SMM_DIR "${LIBXSTREAM_SMM_DIR}") + list(GET LIBXSTREAM_INCLUDE_DIRS 0 DBCSR_LIBXSTREAM_INCLUDE_DIR) endif () -if (USE_ACCEL MATCHES "cuda|hip") +if (USE_ACCEL STREQUAL "cuda" OR USE_ACCEL STREQUAL "hip") set(GPU_ARCH_NUMBER_K20X 35) set(GPU_ARCH_NUMBER_K40 35) set(GPU_ARCH_NUMBER_K80 37) @@ -469,7 +311,7 @@ if (USE_ACCEL MATCHES "cuda|hip") set(GPU_ARCH_NUMBER_Mi350 gfx950) endif () -if (USE_ACCEL MATCHES "cuda") +if (USE_ACCEL STREQUAL "cuda") enable_language(CUDA) find_package(CUDAToolkit REQUIRED) @@ -499,7 +341,7 @@ if (USE_ACCEL MATCHES "cuda") message(STATUS "GPU profiling enabled: " ${WITH_CUDA_PROFILING}) endif () -if (USE_ACCEL MATCHES "hip") +if (USE_ACCEL STREQUAL "hip") if (NOT CMAKE_HIP_ARCHITECTURES) set(CMAKE_HIP_ARCHITECTURES OFF) endif () diff --git a/cmake/CompilerConfiguration.cmake b/cmake/CompilerConfiguration.cmake index a8a94c16f67..a7696c48b7a 100644 --- a/cmake/CompilerConfiguration.cmake +++ b/cmake/CompilerConfiguration.cmake @@ -168,6 +168,6 @@ set(CMAKE_C_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) # Suppress GFortran runtime warnings when LIBXS provides the wrapper if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" - AND NOT USE_ASAN AND (LIBXS_FETCHED OR LIBXS_LIBRARY)) + AND NOT USE_ASAN AND USE_LIBXS AND TARGET libxs::libxs) add_link_options("-Wl,--wrap=_gfortran_runtime_warning_at") endif () diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake deleted file mode 100644 index 6fb3b421d60..00000000000 --- a/cmake/dependencies.cmake +++ /dev/null @@ -1,7 +0,0 @@ -set(LIBXS_GIT_REPOSITORY "https://github.com/hfp/libxs.git") -set(LIBXS_GIT_TAG "1a9fad72958a07f28e7514c19328162485c3c358") - -set(LIBXSTREAM_GIT_REPOSITORY "https://github.com/hfp/libxstream.git") -set(LIBXSTREAM_GIT_TAG "eb9acbb55f83e65572add7569c2aa0a6f38a5a41") - -set(LIBXSMM_GIT_TAG "0cea22fdc34ec54bc59ffb47a43cb3e28b26d3e0") diff --git a/examples/dbcsr_tensor_example_2.cpp b/examples/dbcsr_tensor_example_2.cpp index 5346c5ae4d2..f20a7e71295 100644 --- a/examples/dbcsr_tensor_example_2.cpp +++ b/examples/dbcsr_tensor_example_2.cpp @@ -148,28 +148,29 @@ int main(int argc, char* argv[]) { } // block sizes - std::vector blk1, blk2, blk3, blk4, blk5; - // blk indices of non-zero blocks - std::vector nz11, nz12, nz13, nz21, nz22, nz24, nz25, nz33, nz34, nz35; - - blk1 = {3, 9, 12, 1}; - blk2 = {4, 2, 3, 1, 9, 2, 32, 10, 5, 8, 7}; - blk3 = {7, 3, 8, 7, 9, 5, 10, 23, 2}; - blk4 = {8, 1, 4, 13, 6}; - blk5 = {4, 2, 22}; - - nz11 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3}; - nz12 = {2, 4, 4, 4, 5, 5, 6, 7, 9, 10, 10, 0, 0, 3, 6, 6, 8, 9, 1, 1, 4, 5, 7, 7, 8, 10, 10, 1, 3, 4, 4, 7}; - nz13 = {6, 2, 4, 8, 5, 7, 1, 7, 2, 1, 2, 0, 3, 5, 1, 6, 4, 7, 2, 6, 0, 3, 2, 6, 7, 4, 7, 8, 5, 0, 1, 6}; + std::vector blk1 = {3, 9, 12, 1}; + std::vector blk2 = {4, 2, 3, 1, 9, 2, 32, 10, 5, 8, 7}; + std::vector blk3 = {7, 3, 8, 7, 9, 5, 10, 23, 2}; + std::vector blk4 = {8, 1, 4, 13, 6}; + std::vector blk5 = {4, 2, 22}; - nz21 = {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3}; - nz22 = {0, 2, 3, 5, 9, 1, 1, 3, 4, 4, 5, 5, 5, 6, 6, 8, 8, 8, 9, 10, 0, 2, 2, 3, 4, 5, 7, 8, 10, 10, 0, 2, 3, 5, 9, 10}; - nz24 = {2, 4, 1, 2, 1, 2, 4, 0, 0, 3, 1, 2, 3, 0, 3, 2, 3, 3, 1, 0, 2, 0, 0, 2, 3, 2, 3, 1, 1, 2, 0, 0, 2, 1, 4, 4}; - nz25 = {0, 2, 1, 0, 0, 1, 2, 0, 2, 0, 1, 2, 1, 0, 2, 1, 2, 1, 0, 1, 2, 0, 1, 2, 1, 1, 1, 2, 0, 1, 0, 2, 1, 0, 2, 1}; - - nz33 = {1, 3, 4, 4, 4, 5, 5, 7}; - nz34 = {2, 1, 0, 0, 2, 1, 3, 4}; - nz35 = {2, 1, 0, 1, 2, 1, 0, 0}; + // blk indices of non-zero blocks + std::vector nz11 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3}; + std::vector nz12 = {2, 4, 4, 4, 5, 5, 6, 7, 9, 10, 10, 0, 0, 3, 6, 6, 8, 9, 1, 1, 4, 5, 7, 7, 8, 10, 10, 1, 3, 4, 4, 7}; + std::vector nz13 = {6, 2, 4, 8, 5, 7, 1, 7, 2, 1, 2, 0, 3, 5, 1, 6, 4, 7, 2, 6, 0, 3, 2, 6, 7, 4, 7, 8, 5, 0, 1, 6}; + + std::vector nz21 = { + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3}; + std::vector nz22 = { + 0, 2, 3, 5, 9, 1, 1, 3, 4, 4, 5, 5, 5, 6, 6, 8, 8, 8, 9, 10, 0, 2, 2, 3, 4, 5, 7, 8, 10, 10, 0, 2, 3, 5, 9, 10}; + std::vector nz24 = { + 2, 4, 1, 2, 1, 2, 4, 0, 0, 3, 1, 2, 3, 0, 3, 2, 3, 3, 1, 0, 2, 0, 0, 2, 3, 2, 3, 1, 1, 2, 0, 0, 2, 1, 4, 4}; + std::vector nz25 = { + 0, 2, 1, 0, 0, 1, 2, 0, 2, 0, 1, 2, 1, 0, 2, 1, 2, 1, 0, 1, 2, 0, 1, 2, 1, 1, 1, 2, 0, 1, 0, 2, 1, 0, 2, 1}; + + std::vector nz33 = {1, 3, 4, 4, 4, 5, 5, 7}; + std::vector nz34 = {2, 1, 0, 0, 2, 1, 3, 4}; + std::vector nz35 = {2, 1, 0, 1, 2, 1, 0, 0}; // (13|2)x(54|21)=(3|45) // distribute blocks @@ -223,14 +224,12 @@ int main(int argc, char* argv[]) { dbcsr_t_distribution dist3 = nullptr; // (13|2)x(54|21)=(3|45) - std::vector map11, map12, map21, map22, map31, map32; - - map11 = {0, 2}; - map12 = {1}; - map21 = {3, 2}; - map22 = {1, 0}; - map31 = {0}; - map32 = {1, 2}; + std::vector map11 = {0, 2}; + std::vector map12 = {1}; + std::vector map21 = {3, 2}; + std::vector map22 = {1, 0}; + std::vector map31 = {0}; + std::vector map32 = {1, 2}; if (mpi_rank == 0) std::cout << "Creating dist objects..." << '\n' << std::endl; @@ -286,13 +285,12 @@ int main(int argc, char* argv[]) { // cn : indices to be contracted // noncn : indices not to be contracted // mapn : how nonc indices map to tensor 3 - std::vector c1, nonc1, c2, nonc2, map1, map2; - c1 = {0, 1}; - nonc1 = {2}; - c2 = {0, 1}; - nonc2 = {2, 3}; - map1 = {0}; - map2 = {1, 2}; + std::vector c1 = {0, 1}; + std::vector nonc1 = {2}; + std::vector c2 = {0, 1}; + std::vector nonc2 = {2, 3}; + std::vector map1 = {0}; + std::vector map2 = {1, 2}; int unit_nr = -1; if (mpi_rank == 0) unit_nr = 6; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 714adadbf94..06319cbe7c6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -124,12 +124,12 @@ set(DBCSR_CUDA_SRCS ${DBCSR_HIP_AND_CUDA_SRCS} acc/cuda/acc_cuda.cpp set(DBCSR_HIP_SRCS ${DBCSR_HIP_AND_CUDA_SRCS} acc/hip/acc_hip.cpp) -if (USE_ACCEL MATCHES "hip") +if (USE_ACCEL STREQUAL "hip") set_source_files_properties(acc/cuda_hip/calculate_norms.cpp PROPERTIES LANGUAGE HIP) set_source_files_properties(acc/cuda_hip/calculate_norms.cpp PROPERTIES COMPILE_FLAGS "-fPIE") -elseif (USE_ACCEL MATCHES "cuda") +elseif (USE_ACCEL STREQUAL "cuda") set_source_files_properties(acc/cuda_hip/calculate_norms.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(acc/cuda_hip/calculate_norms.cpp @@ -137,7 +137,7 @@ elseif (USE_ACCEL MATCHES "cuda") endif () # OpenCL backend: SMM from LIBXSTREAM samples, ACC from LIBXSTREAM library -set(DBCSR_SMM_DIR "${LIBXSTREAMROOT}/samples/smm") +set(DBCSR_SMM_DIR "${DBCSR_LIBXSTREAM_SMM_DIR}") set(DBCSR_OPENCL_SRCS "${DBCSR_SMM_DIR}/smm_acc.c" "${DBCSR_SMM_DIR}/smm_trans.c" "${DBCSR_SMM_DIR}/smm_params.c" "${DBCSR_SMM_DIR}/smm_kernel.c") @@ -159,11 +159,11 @@ endif () set(DBCSR_SRCS ${DBCSR_FORTRAN_SRCS}) -if (USE_ACCEL MATCHES "cuda") +if (USE_ACCEL STREQUAL "cuda") set(DBCSR_SRCS ${DBCSR_SRCS} ${DBCSR_CUDA_SRCS}) -elseif (USE_ACCEL MATCHES "hip") +elseif (USE_ACCEL STREQUAL "hip") set(DBCSR_SRCS ${DBCSR_SRCS} ${DBCSR_HIP_SRCS}) -elseif (USE_ACCEL MATCHES "opencl") +elseif (USE_ACCEL STREQUAL "opencl") set(DBCSR_SRCS ${DBCSR_SRCS} ${DBCSR_OPENCL_SRCS}) endif () @@ -179,111 +179,106 @@ set_target_properties( SOVERSION ${dbcsr_APIVERSION} POSITION_INDEPENDENT_CODE ON) -if (USE_ACCEL MATCHES "hip") +if (USE_ACCEL STREQUAL "hip") set_target_properties(dbcsr PROPERTIES HIP_ARCHITECTURES "${ACC_ARCH_NUMBER}") -elseif (USE_ACCEL MATCHES "cuda") +elseif (USE_ACCEL STREQUAL "cuda") set_target_properties(dbcsr PROPERTIES CUDA_ARCHITECTURES "${ACC_ARCH_NUMBER}") endif () -if (LIBXS_FETCHED) - target_compile_definitions(dbcsr PRIVATE __LIBXS __BLAS) - target_link_libraries(dbcsr PRIVATE libxs::libxs ${BLAS_LIBRARIES}) -elseif (LIBXS_LIBRARY) - target_compile_definitions(dbcsr PRIVATE __LIBXS __BLAS) - target_include_directories(dbcsr PRIVATE "${LIBXSROOT}/include") - target_link_libraries(dbcsr PRIVATE ${LIBXS_LIBRARY} ${BLAS_LIBRARIES}) +# Compile libxs_jit.F: provides LIBXS_JIT module with backend-aware dispatch. +# Must be compiled by the consumer so preprocessor defines (__MKL, __LIBXSMM) +# match the consumer's configuration. +if (USE_LIBXS) + message(STATUS "Adding libxs_jit.F from dependency libxs for compilation") + get_target_property(_libxs_incdirs libxs::libxs INTERFACE_INCLUDE_DIRECTORIES) + find_path(DBCSR_LIBXS_JIT_DIR "libxs_jit.F" HINTS ${_libxs_incdirs}) + if (DBCSR_LIBXS_JIT_DIR) + target_sources(dbcsr PRIVATE "${DBCSR_LIBXS_JIT_DIR}/libxs_jit.F") + endif () + unset(_libxs_incdirs) endif () +target_compile_definitions( + dbcsr + PRIVATE __STATM_TOTAL + $<$:__LIBXS> + $<$:__BLAS> + $<$:__parallel> + $<$:__USE_MPI_F08> + $<$:__NO_STATM_ACCESS> + $<$:NDEBUG> + $<$:__LIBXSMM>) + +# Instead of resetting the compiler for MPI, we are adding the compiler flags +# otherwise added by the mpifort-wrapper directly; based on hints from: +# https://cmake.org/pipermail/cmake/2012-June/050991.html Here we assume that +# the MPI implementation found uses the same compiler as the Fortran compiler we +# found prior. Otherwise we might be adding incompatible compiler flags at this +# point. when built against MPI, a dbcsr consumer has to specify the MPI flags +# as well, therefore: PUBLIC +target_link_libraries( + dbcsr + PUBLIC $<$:MPI::MPI_Fortran> + ${LAPACK_LIBRARIES} + $<$:libxs::libxs> + ${BLAS_LIBRARIES} + $<$:libxsmm::libxsmm> + $<$:OpenMP::OpenMP_C> + $<$:OpenMP::OpenMP_CXX> + $<$:OpenMP::OpenMP_Fortran>) + if (BLAS_LIBRARIES MATCHES "mkl_") target_compile_definitions(dbcsr PRIVATE __MKL) endif () -if (LIBXSMM_FETCHED) - target_compile_definitions(dbcsr PRIVATE __LIBXSMM) - target_include_directories(dbcsr PRIVATE "${LIBXSMMROOT}/include") - set_target_properties( - xsmm PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "$") - install( - TARGETS xsmm - EXPORT DBCSRTargets - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") - target_link_libraries(dbcsr PRIVATE xsmm) -elseif (LIBXSMM_LIBRARY) - target_compile_definitions(dbcsr PRIVATE __LIBXSMM) - target_include_directories(dbcsr PRIVATE "${LIBXSMMROOT}/include") - target_link_libraries(dbcsr PRIVATE ${LIBXSMM_LIBRARY}) -endif () - if (APPLE) - # fix /proc/self/statm can not be opened on macOS - target_compile_definitions(dbcsr PRIVATE __NO_STATM_ACCESS) - if (BLAS_LIBRARIES MATCHES "Accelerate") target_compile_definitions(dbcsr PRIVATE __ACCELERATE) endif () endif () -# set -DNDEBUG for Release builds -target_compile_definitions(dbcsr PRIVATE $<$:NDEBUG>) - -target_link_libraries(dbcsr PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) -target_include_directories( - dbcsr PRIVATE base) # do not export those includes, but some srcs do an - # unprefixed include -# make sure dependencies of dbcsr find the dbcsr_api.mod file plus some files -# they usually include: target_include_directories( dbcsr + PRIVATE base PUBLIC $ $ + # make sure dependencies of dbcsr find the dbcsr_api.mod file plus some + # files they usually include: $) -target_compile_definitions(dbcsr PRIVATE __STATM_TOTAL) -set_target_properties(dbcsr PROPERTIES LINKER_LANGUAGE Fortran) - -if (MPI_FOUND) - # once built, a user of the dbcsr library can not influence anything anymore - # by setting those flags: - target_compile_definitions(dbcsr PRIVATE __parallel) +# do not export those includes, but some srcs do an unprefixed include - # If requested, use the MPI_F08 module - if (USE_MPI_F08) - target_compile_definitions(dbcsr PRIVATE __USE_MPI_F08) - endif () +set_target_properties(dbcsr PROPERTIES LINKER_LANGUAGE Fortran) +if (USE_MPI) # Instead of resetting the compiler for MPI, we are adding the compiler flags # otherwise added by the mpifort-wrapper directly; based on hints from: # https://cmake.org/pipermail/cmake/2012-June/050991.html Here we assume that # the MPI implementation found uses the same compiler as the Fortran compiler # we found prior. Otherwise we might be adding incompatible compiler flags at # this point. when built against MPI, a dbcsr consumer has to specify the MPI - # flags as well, therefore: PUBLIC - target_link_libraries(dbcsr PUBLIC MPI::MPI_Fortran) - - # Workaround https://gitlab.kitware.com/cmake/cmake/-/issues/27231 - get_target_property(opts MPI::MPI_Fortran INTERFACE_COMPILE_OPTIONS) - set_target_properties( - MPI::MPI_Fortran PROPERTIES INTERFACE_COMPILE_OPTIONS - "$<$:${opts}>") - unset(opts) - + # flags as well, therefore: PUBLIC Workaround + # https://gitlab.kitware.com/cmake/cmake/-/issues/27231 + get_target_property(dbcsr_mpi_fortran_opts MPI::MPI_Fortran + INTERFACE_COMPILE_OPTIONS) + # very brittle + if (dbcsr_mpi_fortran_opts) + set_target_properties( + MPI::MPI_Fortran + PROPERTIES INTERFACE_COMPILE_OPTIONS + "$<$:${dbcsr_mpi_fortran_opts}>") + unset(dbcsr_mpi_fortran_opts) + endif () endif () -target_link_libraries( - dbcsr - PRIVATE $<$:OpenMP::OpenMP_C> - $<$:OpenMP::OpenMP_CXX> - $<$:OpenMP::OpenMP_Fortran>) - # todo, make this a bit better with opencl. -if (USE_ACCEL MATCHES "cuda|hip") +if (USE_ACCEL STREQUAL "cuda" OR USE_ACCEL STREQUAL "hip") add_subdirectory(acc/libsmm_acc) endif () # OpenCL SMM is provided by LIBXSTREAM (no local subdirectory) -if (USE_ACCEL) +if (DBCSR_WITH_ACCEL) target_compile_definitions( dbcsr PRIVATE __DBCSR_ACC @@ -295,48 +290,16 @@ if (USE_ACCEL) $<$:__CUDA_PROFILING> $<$:__HIP_PROFILING>) - if (USE_ACCEL MATCHES "opencl") + if (USE_ACCEL STREQUAL "opencl") target_compile_definitions(dbcsr PRIVATE __LIBXSTREAM) - target_include_directories(dbcsr PRIVATE "${LIBXSTREAMROOT}/include" - "${DBCSR_SMM_DIR}") - if (NOT LIBXS_FETCHED) - target_include_directories(dbcsr PRIVATE "${LIBXSROOT}/include") - endif () - if (LIBXSTREAM_FETCHED) - set(LIBXSTREAM_LINK libxstream::libxstream) - else () - set(_dbcsr_suffixes_save ${CMAKE_FIND_LIBRARY_SUFFIXES}) - if (BUILD_SHARED_LIBS) - set(CMAKE_FIND_LIBRARY_SUFFIXES .so .dylib) - else () - set(CMAKE_FIND_LIBRARY_SUFFIXES .a) - endif () - find_library( - LIBXSTREAM_LIBRARY xstream - PATHS "${LIBXSTREAMROOT}/lib" - NO_DEFAULT_PATH) - if (NOT LIBXSTREAM_LIBRARY) - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_dbcsr_suffixes_save}) - find_library( - LIBXSTREAM_LIBRARY xstream - PATHS "${LIBXSTREAMROOT}/lib" - NO_DEFAULT_PATH) - endif () - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_dbcsr_suffixes_save}) - if (NOT LIBXSTREAM_LIBRARY) - message( - FATAL_ERROR "libxstream library not found in ${LIBXSTREAMROOT}/lib") - endif () - set(LIBXSTREAM_LINK ${LIBXSTREAM_LIBRARY}) - endif () - # Generate smm_kernels.h from .cl kernel sources + target_include_directories(dbcsr PRIVATE "${DBCSR_SMM_DIR}") set(DBCSR_SMM_KERNELS "${DBCSR_SMM_DIR}/kernels/multiply.cl" "${DBCSR_SMM_DIR}/kernels/transpose.cl") file(GLOB DBCSR_SMM_PARAMS "${DBCSR_SMM_DIR}/params/tune_multiply_*.csv") set(DBCSR_SMM_GENHDR "${DBCSR_SMM_DIR}/smm_kernels.h") add_custom_command( OUTPUT "${DBCSR_SMM_GENHDR}" - COMMAND "${DBCSR_OPENCL_SCRIPT}" -I "${LIBXSTREAMROOT}/include" + COMMAND "${DBCSR_OPENCL_SCRIPT}" -I "${DBCSR_LIBXSTREAM_INCLUDE_DIR}" ${DBCSR_SMM_KERNELS} ${DBCSR_SMM_PARAMS} "${DBCSR_SMM_GENHDR}" DEPENDS "${DBCSR_OPENCL_SCRIPT}" ${DBCSR_SMM_KERNELS} ${DBCSR_SMM_PARAMS} COMMENT "Generating OpenCL SMM kernel header") @@ -346,21 +309,18 @@ if (USE_ACCEL) target_link_libraries( dbcsr - PRIVATE - $<$:CUDA::cudart> - $<$:CUDA::cuda_driver> - $<$:CUDA::cublas> - $<$:CUDA::nvrtc> - $<$:CUDA::nvToolsExt> - $<$:roc::hipblas> - $<$:hiprtc::hiprtc> - $<$:hip::host> - $<$:roctx64> - $<$:roctracer64> - $<$:OpenCL::OpenCL> - $<$:${LIBXSTREAM_LINK}> - $<$:$,libxs::libxs,${LIBXS_LIBRARY}>> - ) + PRIVATE $<$:CUDA::cudart> + $<$:CUDA::cuda_driver> + $<$:CUDA::cublas> + $<$:CUDA::nvrtc> + $<$:CUDA::nvToolsExt> + $<$:roc::hipblas> + $<$:hiprtc::hiprtc> + $<$:hip::host> + $<$:roctx64> + $<$:roctracer64> + $<$:OpenCL::OpenCL> + $<$:libxstream::libxstream>) endif () # ================================================================================================= @@ -379,16 +339,15 @@ if (WITH_C_API) SOVERSION ${dbcsr_APIVERSION} POSITION_INDEPENDENT_CODE ON) - target_link_libraries(dbcsr_c PRIVATE dbcsr) - target_link_libraries(dbcsr_c PUBLIC MPI::MPI_C) # the C API always needs MPI + target_link_libraries(dbcsr_c PUBLIC dbcsr MPI::MPI_C) # the C API always + # needs MPI target_include_directories( dbcsr_c - PUBLIC - $ # change order so compiler - # first checks binary - # directory - $ - $) + PUBLIC $ # change order so + # compiler + # first checks binary directory + $ + $) endif () # ================================================================================================= @@ -427,24 +386,46 @@ if (WITH_C_API) DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") endif () -if (USE_ACCEL MATCHES "opencl") +if (USE_ACCEL STREQUAL "opencl") foreach (FILE ${DBCSR_ACC_HEADER}) cmake_path(GET FILE PARENT_PATH SUBDIR) install(FILES "${FILE}" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${SUBDIR}") endforeach () endif () +# Record dependency prefixes for the installed config. Derive from _DIR +# (points to lib/cmake//) -> three levels up. +if (libxs_DIR) + cmake_path(GET libxs_DIR PARENT_PATH _tmp) + cmake_path(GET _tmp PARENT_PATH _tmp) + cmake_path(GET _tmp PARENT_PATH LIBXSROOT) +endif () +if (libxstream_DIR) + cmake_path(GET libxstream_DIR PARENT_PATH _tmp) + cmake_path(GET _tmp PARENT_PATH _tmp) + cmake_path(GET _tmp PARENT_PATH LIBXSTREAMROOT) +endif () +if (libxsmm_DIR) + cmake_path(GET libxsmm_DIR PARENT_PATH _tmp) + cmake_path(GET _tmp PARENT_PATH _tmp) + cmake_path(GET _tmp PARENT_PATH LIBXSMMROOT) +endif () +unset(_tmp) + configure_package_config_file( cmake/DBCSRConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/DBCSRConfig.cmake" INSTALL_DESTINATION "${config_install_dir}") + write_basic_package_version_file( "${CMAKE_CURRENT_BINARY_DIR}/DBCSRConfigVersion.cmake" VERSION "${dbcsr_VERSION}" COMPATIBILITY SameMajorVersion) + install( EXPORT DBCSRTargets NAMESPACE "${config_namespace}" DESTINATION "${config_install_dir}") + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/DBCSRConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/DBCSRConfigVersion.cmake DESTINATION ${config_install_dir}) diff --git a/src/acc/acc_bench.c b/src/acc/acc_bench.c index 2822ef70f21..7fc54c431cc 100644 --- a/src/acc/acc_bench.c +++ b/src/acc/acc_bench.c @@ -10,12 +10,12 @@ #include "acc_bench.h" #if defined(__LIBXS) -# include -# include -# include -# include +# include +# include +# include +# include #else /* code depends on LIBXS */ -# include +# include # define __LIBXS #endif diff --git a/src/cmake/DBCSRConfig.cmake.in b/src/cmake/DBCSRConfig.cmake.in index 3d6c8115e29..2b4bf802d8d 100644 --- a/src/cmake/DBCSRConfig.cmake.in +++ b/src/cmake/DBCSRConfig.cmake.in @@ -2,26 +2,57 @@ include(CMakeFindDependencyMacro) -# the following should only be needed when building statically +# Hints for locating DBCSR's dependencies (override with your own -D flags) +set(DBCSR_LIBXSROOT "@LIBXSROOT@") +set(DBCSR_LIBXSTREAMROOT "@LIBXSTREAMROOT@") +set(DBCSR_LIBXSMMROOT "@LIBXSMMROOT@") +# Make DBCSR's dependency locations discoverable +set(_dbcsr_prefix_path_save "${CMAKE_PREFIX_PATH}") +if (DBCSR_LIBXSROOT) + list(PREPEND CMAKE_PREFIX_PATH "${DBCSR_LIBXSROOT}") +endif () +if (DBCSR_LIBXSTREAMROOT) + list(PREPEND CMAKE_PREFIX_PATH "${DBCSR_LIBXSTREAMROOT}") +endif () +if (DBCSR_LIBXSMMROOT) + list(PREPEND CMAKE_PREFIX_PATH "${DBCSR_LIBXSMMROOT}") +endif () + +set(DBCSR_USE_MPI @USE_MPI@) if (@USE_MPI@) - set(DBCSR_USE_MPI @USE_MPI@) find_dependency(MPI) endif () +set(DBCSR_USE_OPENMP @USE_OPENMP@) if (@USE_OPENMP@) - set(DBCSR_USE_OPENMP @USE_OPENMP@) find_dependency(OpenMP) endif () +set(DBCSR_USE_LIBXS @USE_LIBXS@) +if (@USE_LIBXS@) + find_dependency(libxs CONFIG REQUIRED) +endif () + +set(DBCSR_USE_LIBXSMM @USE_LIBXSMM@) +if (@USE_LIBXSMM@) + find_dependency(libxsmm CONFIG QUIET) + if (NOT libxsmm_FOUND) + find_dependency(PkgConfig) + pkg_check_modules(DBCSR_LIBXSMM_PC QUIET IMPORTED_TARGET GLOBAL libxsmm) + if (DBCSR_LIBXSMM_PC_FOUND AND NOT TARGET libxsmm::libxsmm) + add_library(libxsmm::libxsmm ALIAS PkgConfig::DBCSR_LIBXSMM_PC) + endif () + endif () +endif () + +set(DBCSR_USE_ACCEL @USE_ACCEL@) if ("@USE_ACCEL@" MATCHES "cuda") - set(DBCSR_USE_ACCEL @USE_ACCEL@) enable_language(CUDA) find_dependency(CUDAToolkit) endif () if ("@USE_ACCEL@" MATCHES "hip") - set(DBCSR_USE_ACCEL @USE_ACCEL@) enable_language(HIP) find_dependency(hip) find_dependency(hipblas) @@ -30,21 +61,17 @@ endif () if ("@USE_ACCEL@" MATCHES "opencl") find_dependency(OpenCL) -endif () - -if (@USE_LIBXSMM@) - set(DBCSR_USE_LIBXSMM @USE_LIBXSMM@) - if (NOT TARGET xsmm) - find_library(DBCSR_LIBXSMM_LIBRARY xsmm - PATHS "@LIBXSMMROOT@/lib" NO_DEFAULT_PATH) - if (NOT DBCSR_LIBXSMM_LIBRARY) - find_library(DBCSR_LIBXSMM_LIBRARY xsmm) - endif () - if (DBCSR_LIBXSMM_LIBRARY) - add_library(xsmm UNKNOWN IMPORTED) - set_target_properties(xsmm PROPERTIES IMPORTED_LOCATION "${DBCSR_LIBXSMM_LIBRARY}") + find_dependency(libxstream CONFIG QUIET) + if (NOT libxstream_FOUND) + find_dependency(PkgConfig) + pkg_check_modules(DBCSR_LIBXSTREAM_PC QUIET IMPORTED_TARGET GLOBAL libxstream) + if (DBCSR_LIBXSTREAM_PC_FOUND AND NOT TARGET libxstream::libxstream) + add_library(libxstream::libxstream ALIAS PkgConfig::DBCSR_LIBXSTREAM_PC) endif () endif () endif () +set(CMAKE_PREFIX_PATH "${_dbcsr_prefix_path_save}") +unset(_dbcsr_prefix_path_save) + include("${CMAKE_CURRENT_LIST_DIR}/DBCSRTargets.cmake") diff --git a/src/mm/dbcsr_mm_hostdrv.F b/src/mm/dbcsr_mm_hostdrv.F index 2f3bd200c79..e8f65c03ef6 100644 --- a/src/mm/dbcsr_mm_hostdrv.F +++ b/src/mm/dbcsr_mm_hostdrv.F @@ -373,57 +373,10 @@ SUBROUTINE libxs_process_mm_stack_${nametype1}$ (stack_descr, params, & !! Processes MM stack using LIBXS indexed GEMM batch. !! Real types use LIBXS dispatch; complex types fall through to BLAS. #:if base1 == 'r' - USE LIBXS, ONLY: libxs_gemm_config_t, & - libxs_gemm_dispatch, libxs_gemm_index, & - C_LOC, C_SIZEOF, & - LIBXS_DATATYPE_F${bits1[n]}$ - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_FUNLOC - #:set ckind1 = 'C_DOUBLE' if nametype1 == 'd' else 'C_FLOAT' -#if defined(__MKL) - INTERFACE - FUNCTION mkl_cblas_jit_create_${nametype1}$gemm(jitter, & - layout, transa, transb, m, n, k, & - alpha, lda, ldb, beta, ldc) & - RESULT(status) BIND(C) - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT, ${ckind1}$ - INTEGER(C_INT) :: status - TYPE(C_PTR) :: jitter - INTEGER(C_INT), VALUE :: layout, transa, transb - INTEGER(C_INT), VALUE :: m, n, k, lda, ldb, ldc - REAL(${ckind1}$), VALUE :: alpha, beta - END FUNCTION - FUNCTION mkl_jit_get_${nametype1}$gemm_ptr(jitter) & - RESULT(ptr) BIND(C) - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_FUNPTR, C_PTR - TYPE(C_FUNPTR) :: ptr - TYPE(C_PTR), INTENT(IN), VALUE :: jitter - END FUNCTION - END INTERFACE -#endif -#if defined(__LIBXSMM) - INTERFACE - FUNCTION libxsmm_dispatch_gemm(shape, flags, prefetch) & - RESULT(fn) BIND(C) - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_FUNPTR, C_INT - INTEGER(C_INT), INTENT(IN) :: shape(10) - INTEGER(C_INT), INTENT(IN), VALUE :: flags, prefetch - TYPE(C_FUNPTR) :: fn - END FUNCTION - END INTERFACE -#endif -#if defined(__BLAS) || defined(__MKL) - INTERFACE - SUBROUTINE ${nametype1}$gemm_blas(transa, transb, & - m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) & - BIND(C, NAME="${nametype1}$gemm_") - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT, C_CHAR, ${ckind1}$ - CHARACTER(1, C_CHAR), INTENT(IN) :: transa, transb - INTEGER(C_INT), INTENT(IN) :: m, n, k, lda, ldb, ldc - REAL(${ckind1}$), INTENT(IN) :: alpha, beta, a(*), b(*) - REAL(${ckind1}$), INTENT(INOUT) :: c(*) - END SUBROUTINE - END INTERFACE -#endif + USE LIBXS_JIT, ONLY: libxs_gemm_config_t, & + libxs_gemm_dispatch, libxs_gemm_index, & + C_LOC, C_SIZEOF, & + LIBXS_DATATYPE_F${bits1[n]}$ #:endif INTEGER, INTENT(IN) :: stack_size TYPE(stack_descriptor_type), INTENT(IN) :: stack_descr @@ -450,22 +403,7 @@ FUNCTION libxsmm_dispatch_gemm(shape, flags, prefetch) & datatype=LIBXS_DATATYPE_F${bits1[n]}$, & transa='N', transb='N', & m=m, n=n, k=k, lda=m, ldb=k, ldc=m, & - alpha=1D0, beta=1D0 & -#if defined(__MKL) - , jit_create_${nametype1}$gemm= & - C_FUNLOC(mkl_cblas_jit_create_${nametype1}$gemm) & - , jit_get_${nametype1}$gemm= & - C_FUNLOC(mkl_jit_get_${nametype1}$gemm_ptr) & -#endif -#if defined(__LIBXSMM) - , xgemm_dispatch= & - C_FUNLOC(libxsmm_dispatch_gemm) & -#endif -#if defined(__BLAS) || defined(__MKL) - , ${nametype1}$gemm_blas= & - C_FUNLOC(${nametype1}$gemm_blas) & -#endif - ) + alpha=1D0, beta=1D0) IF (0 /= rc) THEN CALL libxs_gemm_index( & C_LOC(a_data), C_LOC(params(p_a_first, 1)), & diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 85c3dbae1f6..172dcd7146a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -73,9 +73,9 @@ set(DBCSR_TESTS_FTN dbcsr_tas_unittest dbcsr_test_csr_conversions) -if (USE_ACCEL) +if (DBCSR_WITH_ACCEL) set(DBCSR_TESTS_BACKEND dbcsr_acc_test) - if (LIBXS_LIBRARY OR LIBXS_FETCHED) + if (USE_LIBXS) list(APPEND DBCSR_TESTS_BACKEND acc_bench) endif () endif () @@ -191,16 +191,14 @@ if (NOT USE_MPI) if (BLAS_LIBRARIES MATCHES "mkl_") target_compile_definitions(${dbcsr_test_backend} PRIVATE __MKL) endif () - if (LIBXS_FETCHED) + if (USE_LIBXS) target_link_libraries(${dbcsr_test_backend} PRIVATE libxs::libxs) - elseif (LIBXS_LIBRARY) - target_include_directories(${dbcsr_test_backend} - PRIVATE "${LIBXSROOT}/include") + target_compile_definitions(${dbcsr_test_backend} PRIVATE __LIBXS) endif () - if (LIBXSMM_LIBRARY) + if (USE_LIBXSMM) target_compile_definitions(${dbcsr_test_backend} PRIVATE __LIBXSMM) - target_include_directories(${dbcsr_test_backend} - PRIVATE "${LIBXSMMROOT}/include") + target_link_libraries(${dbcsr_test_backend} + PRIVATE ${DBCSR_LIBXSMM_TARGET}) endif () # register unittest executable with CMake add_test(NAME ${dbcsr_test_backend} COMMAND ./${dbcsr_test_backend}) @@ -244,7 +242,7 @@ add_custom_target( BYPRODUCTS libsmm_acc_timer_multiply.cpp COMMENT "Generate tests/generate_libsmm_acc_timer_multiply.cpp") -if (USE_ACCEL MATCHES "cuda|hip") +if (USE_ACCEL STREQUAL "cuda" OR USE_ACCEL STREQUAL "hip") # All libsmm_acc tests set(LIBSMM_ACC_TESTS_SRCS diff --git a/tests/dbcsr_tensor_test.cpp b/tests/dbcsr_tensor_test.cpp index 47f1897b795..522d061de32 100644 --- a/tests/dbcsr_tensor_test.cpp +++ b/tests/dbcsr_tensor_test.cpp @@ -155,28 +155,29 @@ int main(int argc, char* argv[]) { // block sizes - std::vector blk1, blk2, blk3, blk4, blk5; - // blk indices of non-zero blocks - std::vector nz11, nz12, nz13, nz21, nz22, nz24, nz25, nz33, nz34, nz35; - - blk1 = {3, 9, 12, 1}; - blk2 = {4, 2, 3, 1, 9, 2, 32, 10, 5, 8, 7}; - blk3 = {7, 3, 8, 7, 9, 5, 10, 23, 2}; - blk4 = {8, 1, 4, 13, 6}; - blk5 = {4, 2, 22}; - - nz11 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3}; - nz12 = {2, 4, 4, 4, 5, 5, 6, 7, 9, 10, 10, 0, 0, 3, 6, 6, 8, 9, 1, 1, 4, 5, 7, 7, 8, 10, 10, 1, 3, 4, 4, 7}; - nz13 = {6, 2, 4, 8, 5, 7, 1, 7, 2, 1, 2, 0, 3, 5, 1, 6, 4, 7, 2, 6, 0, 3, 2, 6, 7, 4, 7, 8, 5, 0, 1, 6}; + std::vector blk1 = {3, 9, 12, 1}; + std::vector blk2 = {4, 2, 3, 1, 9, 2, 32, 10, 5, 8, 7}; + std::vector blk3 = {7, 3, 8, 7, 9, 5, 10, 23, 2}; + std::vector blk4 = {8, 1, 4, 13, 6}; + std::vector blk5 = {4, 2, 22}; - nz21 = {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3}; - nz22 = {0, 2, 3, 5, 9, 1, 1, 3, 4, 4, 5, 5, 5, 6, 6, 8, 8, 8, 9, 10, 0, 2, 2, 3, 4, 5, 7, 8, 10, 10, 0, 2, 3, 5, 9, 10}; - nz24 = {2, 4, 1, 2, 1, 2, 4, 0, 0, 3, 1, 2, 3, 0, 3, 2, 3, 3, 1, 0, 2, 0, 0, 2, 3, 2, 3, 1, 1, 2, 0, 0, 2, 1, 4, 4}; - nz25 = {0, 2, 1, 0, 0, 1, 2, 0, 2, 0, 1, 2, 1, 0, 2, 1, 2, 1, 0, 1, 2, 0, 1, 2, 1, 1, 1, 2, 0, 1, 0, 2, 1, 0, 2, 1}; - - nz33 = {1, 3, 4, 4, 4, 5, 5, 7}; - nz34 = {2, 1, 0, 0, 2, 1, 3, 4}; - nz35 = {2, 1, 0, 1, 2, 1, 0, 0}; + // blk indices of non-zero blocks + std::vector nz11 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3}; + std::vector nz12 = {2, 4, 4, 4, 5, 5, 6, 7, 9, 10, 10, 0, 0, 3, 6, 6, 8, 9, 1, 1, 4, 5, 7, 7, 8, 10, 10, 1, 3, 4, 4, 7}; + std::vector nz13 = {6, 2, 4, 8, 5, 7, 1, 7, 2, 1, 2, 0, 3, 5, 1, 6, 4, 7, 2, 6, 0, 3, 2, 6, 7, 4, 7, 8, 5, 0, 1, 6}; + + std::vector nz21 = { + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3}; + std::vector nz22 = { + 0, 2, 3, 5, 9, 1, 1, 3, 4, 4, 5, 5, 5, 6, 6, 8, 8, 8, 9, 10, 0, 2, 2, 3, 4, 5, 7, 8, 10, 10, 0, 2, 3, 5, 9, 10}; + std::vector nz24 = { + 2, 4, 1, 2, 1, 2, 4, 0, 0, 3, 1, 2, 3, 0, 3, 2, 3, 3, 1, 0, 2, 0, 0, 2, 3, 2, 3, 1, 1, 2, 0, 0, 2, 1, 4, 4}; + std::vector nz25 = { + 0, 2, 1, 0, 0, 1, 2, 0, 2, 0, 1, 2, 1, 0, 2, 1, 2, 1, 0, 1, 2, 0, 1, 2, 1, 1, 1, 2, 0, 1, 0, 2, 1, 0, 2, 1}; + + std::vector nz33 = {1, 3, 4, 4, 4, 5, 5, 7}; + std::vector nz34 = {2, 1, 0, 0, 2, 1, 3, 4}; + std::vector nz35 = {2, 1, 0, 1, 2, 1, 0, 0}; // (13|2)x(54|21)=(3|45) // distribute blocks @@ -231,14 +232,12 @@ int main(int argc, char* argv[]) { void* dist3 = nullptr; // (13|2)x(54|21)=(3|45) - std::vector map11, map12, map21, map22, map31, map32; - - map11 = {0, 2}; - map12 = {1}; - map21 = {3, 2}; - map22 = {1, 0}; - map31 = {0}; - map32 = {1, 2}; + std::vector map11 = {0, 2}; + std::vector map12 = {1}; + std::vector map21 = {3, 2}; + std::vector map22 = {1, 0}; + std::vector map31 = {0}; + std::vector map32 = {1, 2}; if (mpi_rank == 0) std::cout << "Creating dist objects..." << '\n' << std::endl; @@ -290,13 +289,12 @@ int main(int argc, char* argv[]) { // cn : indices to be contracted // noncn : indices not to be contracted // mapn : how nonc indices map to tensor 3 - std::vector c1, nonc1, c2, nonc2, map1, map2; - c1 = {0, 1}; - nonc1 = {2}; - c2 = {0, 1}; - nonc2 = {2, 3}; - map1 = {0}; - map2 = {1, 2}; + std::vector c1 = {0, 1}; + std::vector nonc1 = {2}; + std::vector c2 = {0, 1}; + std::vector nonc2 = {2, 3}; + std::vector map1 = {0}; + std::vector map2 = {1, 2}; int unit_nr = -1; diff --git a/tools/docker/Dockerfile.build-env-ubuntu b/tools/docker/Dockerfile.build-env-ubuntu index 3ffe0e6760a..0fb3ef191d2 100644 --- a/tools/docker/Dockerfile.build-env-ubuntu +++ b/tools/docker/Dockerfile.build-env-ubuntu @@ -60,8 +60,8 @@ RUN set -ex ; \ apt-get install -y --no-install-recommends opencl-c-headers ocl-icd-libopencl1 ; \ rm -rf /var/lib/apt/lists/* -ARG libxs_version=1a9fad72958a07f28e7514c19328162485c3c358 -ARG libxstream_version=eb9acbb55f83e65572add7569c2aa0a6f38a5a41 +ARG libxs_version=11348a69c6b8622f9192ec725b291f42384a5ce7 +ARG libxstream_version=8375c018d0619bfd9261f67b9ed8b609d8e4ff66 RUN set -ex ; \ curl -LsS https://github.com/hfp/libxs/archive/${libxs_version}.tar.gz | tar -xz -C /opt ; \ @@ -75,7 +75,7 @@ RUN set -ex ; \ ENV PKG_CONFIG_PATH="/opt/libxstream/lib/pkgconfig:/opt/libxs/lib/pkgconfig:${PKG_CONFIG_PATH}" -ARG libxs_version=1a9fad72958a07f28e7514c19328162485c3c358 +ARG libxs_version=11348a69c6b8622f9192ec725b291f42384a5ce7 RUN set -ex ; \ curl -LsS https://github.com/hfp/libxs/archive/${libxs_version}.tar.gz | tar -xz -C /opt ; \ diff --git a/tools/docker/Dockerfile.build-env-ubuntu-cuda b/tools/docker/Dockerfile.build-env-ubuntu-cuda index 056d6e79c3b..778965f68ba 100644 --- a/tools/docker/Dockerfile.build-env-ubuntu-cuda +++ b/tools/docker/Dockerfile.build-env-ubuntu-cuda @@ -51,8 +51,8 @@ RUN set -ex ; \ apt-get install -y --no-install-recommends opencl-c-headers ocl-icd-libopencl1 ; \ rm -rf /var/lib/apt/lists/* -ARG libxs_version=1a9fad72958a07f28e7514c19328162485c3c358 -ARG libxstream_version=eb9acbb55f83e65572add7569c2aa0a6f38a5a41 +ARG libxs_version=11348a69c6b8622f9192ec725b291f42384a5ce7 +ARG libxstream_version=8375c018d0619bfd9261f67b9ed8b609d8e4ff66 RUN set -ex ; \ curl -LsS https://github.com/hfp/libxs/archive/${libxs_version}.tar.gz | tar -xz -C /opt ; \ diff --git a/tools/fedora/dbcsr.spec b/tools/fedora/dbcsr.spec index 50311314341..b8704749e52 100644 --- a/tools/fedora/dbcsr.spec +++ b/tools/fedora/dbcsr.spec @@ -1,4 +1,5 @@ -# Currently does not build with opencl/libxsmm +# OpenCL support requires packaged libxs/libxstream/libxsmm dependencies. +# Keep it disabled until those packages are available in Fedora. %bcond_with opencl # No openmpi on i668 with openmpi 5 in Fedora 40+ @@ -26,6 +27,8 @@ BuildRequires: gcc-gfortran BuildRequires: make BuildRequires: flexiblas-devel %if %{with opencl} +BuildRequires: libxs-devel +BuildRequires: libxstream-devel BuildRequires: libxsmm-devel %endif BuildRequires: python3-fypp @@ -109,15 +112,24 @@ export CXXFLAGS="%{optflags} -fPIC" export FFLAGS="%{optflags} -fPIC" %cmake \ -DCMAKE_INSTALL_Fortran_MODULES=%{_fmoddir} \ + -DBUILD_SHARED_LIBS=ON \ + -DBUILD_TESTING=ON \ -DUSE_MPI=OFF \ - %{?with_opencl:-DUSE_ACCEL=opencl -DUSE_SMM=libxsmm} + -DUSE_LIBXS=OFF \ + -DUSE_LIBXSMM=OFF \ + %{?with_opencl:-DUSE_ACCEL=opencl -DUSE_LIBXS=ON -DUSE_LIBXSMM=ON} %cmake_build for mpi in %{mpi_list} do module load mpi/$mpi-%{_arch} %cmake \ -DCMAKE_INSTALL_Fortran_MODULES=$MPI_FORTRAN_MOD_DIR \ - %{?with_opencl:-DUSE_ACCEL=opencl -DUSE_SMM=libxsmm} \ + -DBUILD_SHARED_LIBS=ON \ + -DBUILD_TESTING=ON \ + -DUSE_MPI=ON \ + -DUSE_LIBXS=OFF \ + -DUSE_LIBXSMM=OFF \ + %{?with_opencl:-DUSE_ACCEL=opencl -DUSE_LIBXS=ON -DUSE_LIBXSMM=ON} \ -DCMAKE_INSTALL_PREFIX:PATH=$MPI_HOME \ -DCMAKE_INSTALL_LIBDIR:PATH=$MPI_LIB \ -DUSE_MPI_F08=ON \