From c648e8280772859b9f1cc3fbf6aa26b084f5b94c Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Wed, 19 Nov 2025 13:51:08 -0800 Subject: [PATCH 1/9] Updating submodules --- src/bamxx | 2 +- src/smithlab_cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bamxx b/src/bamxx index 2117a60..3fb0456 160000 --- a/src/bamxx +++ b/src/bamxx @@ -1 +1 @@ -Subproject commit 2117a60e9813ca696ea604197857e8a7ae3850c7 +Subproject commit 3fb045628a7c4d23dd7fec99a11a5b9f41ce51c6 diff --git a/src/smithlab_cpp b/src/smithlab_cpp index 28c5d48..b64428f 160000 --- a/src/smithlab_cpp +++ b/src/smithlab_cpp @@ -1 +1 @@ -Subproject commit 28c5d4895278de11319a4dc5fada912d21ab9f78 +Subproject commit b64428f65937b15ade6e9188a7ff1304de91bec3 From 1f76e49abb911c6d6746b380c284a92e11073153 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 22 Nov 2025 16:51:16 -0800 Subject: [PATCH 2/9] cmake/FindHTSLIB.cmake: adding cmake function to find htslib --- cmake/FindHTSLIB.cmake | 171 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 cmake/FindHTSLIB.cmake diff --git a/cmake/FindHTSLIB.cmake b/cmake/FindHTSLIB.cmake new file mode 100644 index 0000000..dd0bec7 --- /dev/null +++ b/cmake/FindHTSLIB.cmake @@ -0,0 +1,171 @@ +# SPDX-License-Identifier: GPL-3.0-or-later; (c) 2025 Andrew D Smith (author) +#[=======================================================================[.rst: +FindHTSLIB +-------- + +Find the native HTSLib includes and library. Based on the ZLIB module. + +#]=======================================================================] + +cmake_policy(PUSH) +cmake_policy(SET CMP0159 NEW) # file(STRINGS) with REGEX updates CMAKE_MATCH_ + +if(HTSLIB_FIND_COMPONENTS AND NOT HTSLIB_FIND_QUIETLY) + message(AUTHOR_WARNING + "HTSLib does not provide any COMPONENTS. Calling\n" + " find_package(HTSLIB COMPONENTS ...)\n" + "will always fail." + ) +endif() + +set(_HTSLIB_SEARCHES) + +# Search HTSLIB_ROOT first if it is set. +if(HTSLIB_ROOT) + set(_HTSLIB_SEARCH_ROOT PATHS ${HTSLIB_ROOT} NO_DEFAULT_PATH) + list(APPEND _HTSLIB_SEARCHES _HTSLIB_SEARCH_ROOT) +endif() + +# Normal search. +# Windows stuff +set(_HTSLIB_x86 "(x86)") +set(_HTSLIB_SEARCH_NORMAL + PATHS "$ENV{ProgramFiles}/htslib" + "$ENV{ProgramFiles${_HTSLIB_x86}}/htslib") +unset(_HTSLIB_x86) +list(APPEND _HTSLIB_SEARCHES _HTSLIB_SEARCH_NORMAL) + +if(HTSLIB_USE_STATIC_LIBS) + set(HTSLIB_NAMES hts) + set(HTSLIB_NAMES_DEBUG hts) +else() + set(HTSLIB_NAMES hts) + set(HTSLIB_NAMES_DEBUG hts) +endif() + +# Try each search configuration. +foreach(search ${_HTSLIB_SEARCHES}) + find_path(HTSLIB_INCLUDE_DIR NAMES htslib ${${search}} PATH_SUFFIXES include) +endforeach() + +# Allow HTSLIB_LIBRARY to be set manually, as the location of the htslib library +if(NOT HTSLIB_LIBRARY) + if(DEFINED CMAKE_FIND_LIBRARY_PREFIXES) + set(_htslib_ORIG_CMAKE_FIND_LIBRARY_PREFIXES "${CMAKE_FIND_LIBRARY_PREFIXES}") + else() + set(_htslib_ORIG_CMAKE_FIND_LIBRARY_PREFIXES) + endif() + if(DEFINED CMAKE_FIND_LIBRARY_SUFFIXES) + set(_htslib_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_FIND_LIBRARY_SUFFIXES}") + else() + set(_htslib_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES) + endif() + # Prefix/suffix of the win32/Makefile.gcc build + if(WIN32) + list(APPEND CMAKE_FIND_LIBRARY_PREFIXES "" "lib") + list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES ".dll.a") + endif() + # Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES + if(HTSLIB_USE_STATIC_LIBS) + if(WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) + else() + set(CMAKE_FIND_LIBRARY_SUFFIXES .a) + endif() + endif() + + foreach(search ${_HTSLIB_SEARCHES}) + find_library(HTSLIB_LIBRARY_RELEASE NAMES ${HTSLIB_NAMES} NAMES_PER_DIR ${${search}} PATH_SUFFIXES lib) + find_library(HTSLIB_LIBRARY_DEBUG NAMES ${HTSLIB_NAMES_DEBUG} NAMES_PER_DIR ${${search}} PATH_SUFFIXES lib) + endforeach() + + # Restore the original find library ordering + if(DEFINED _htslib_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES) + set(CMAKE_FIND_LIBRARY_SUFFIXES "${_htslib_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}") + else() + set(CMAKE_FIND_LIBRARY_SUFFIXES) + endif() + if(DEFINED _htslib_ORIG_CMAKE_FIND_LIBRARY_PREFIXES) + set(CMAKE_FIND_LIBRARY_PREFIXES "${_htslib_ORIG_CMAKE_FIND_LIBRARY_PREFIXES}") + else() + set(CMAKE_FIND_LIBRARY_PREFIXES) + endif() + + include(SelectLibraryConfigurations) + select_library_configurations(HTSLIB) +endif() + +unset(HTSLIB_NAMES) +unset(HTSLIB_NAMES_DEBUG) + +mark_as_advanced(HTSLIB_INCLUDE_DIR) + +if(HTSLIB_INCLUDE_DIR AND EXISTS "${HTSLIB_INCLUDE_DIR}/htslib/hts.h") + # Example: #define HTS_VERSION 101300 + file(STRINGS "${HTSLIB_INCLUDE_DIR}/htslib/hts.h" HTSLIB_H_LIST REGEX "^#define HTS_VERSION") + list(GET HTSLIB_H_LIST 0 HTSLIB_H) # Take the first matching line + if (HTSLIB_H MATCHES "#define[ \t]+HTS_VERSION[ \t]+\([0-9]+\)") + set(NUMERIC_VERSION "${CMAKE_MATCH_1}") + # Extract digits by position in string + # XYYYZZ => X = major, YYY = minor, ZZ = patch + string(SUBSTRING "${NUMERIC_VERSION}" 0 1 HTSLIB_VERSION_MAJOR) + string(SUBSTRING "${NUMERIC_VERSION}" 1 3 HTSLIB_VERSION_MINOR) + string(SUBSTRING "${NUMERIC_VERSION}" 4 2 HTSLIB_VERSION_PATCH) + else() + set(HTSLIB_VERSION_STRING "") + set(HTSLIB_VERSION_MAJOR "") + set(HTSLIB_VERSION_MINOR "") + set(HTSLIB_VERSION_PATCH "") + endif() + # Set canonical variables + set(HTSLIB_MAJOR_VERSION "${HTSLIB_VERSION_MAJOR}") + set(HTSLIB_MINOR_VERSION "${HTSLIB_VERSION_MINOR}") + set(HTSLIB_PATCH_VERSION "${HTSLIB_VERSION_PATCH}") + # Build the standard version string + set(HTSLIB_VERSION "${HTSLIB_VERSION_MAJOR}.${HTSLIB_VERSION_MINOR}") + # Only append patch if it's not "00" + if(NOT HTSLIB_VERSION_PATCH STREQUAL "00") + set(HTSLIB_VERSION "${HTSLIB_VERSION}.${HTSLIB_VERSION_PATCH}") + endif() +endif() + +include(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS( + HTSLIB + REQUIRED_VARS + HTSLIB_LIBRARY + HTSLIB_INCLUDE_DIR + VERSION_VAR + HTSLIB_VERSION + HANDLE_COMPONENTS +) + +if(HTSLIB_FOUND) + set(HTSLIB_INCLUDE_DIRS ${HTSLIB_INCLUDE_DIR}) + if(NOT HTSLIB_LIBRARIES) + set(HTSLIB_LIBRARIES ${HTSLIB_LIBRARY}) + endif() + if(NOT TARGET HTSLIB::HTSLIB) + add_library(HTSLIB::HTSLIB UNKNOWN IMPORTED) + set_target_properties(HTSLIB::HTSLIB PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${HTSLIB_INCLUDE_DIRS}") + if(HTSLIB_LIBRARY_RELEASE) + set_property(TARGET HTSLIB::HTSLIB APPEND PROPERTY + IMPORTED_CONFIGURATIONS RELEASE) + set_target_properties(HTSLIB::HTSLIB PROPERTIES + IMPORTED_LOCATION_RELEASE "${HTSLIB_LIBRARY_RELEASE}") + endif() + if(HTSLIB_LIBRARY_DEBUG) + set_property(TARGET HTSLIB::HTSLIB APPEND PROPERTY + IMPORTED_CONFIGURATIONS DEBUG) + set_target_properties(HTSLIB::HTSLIB PROPERTIES + IMPORTED_LOCATION_DEBUG "${HTSLIB_LIBRARY_DEBUG}") + endif() + if(NOT HTSLIB_LIBRARY_RELEASE AND NOT HTSLIB_LIBRARY_DEBUG) + set_property(TARGET HTSLIB::HTSLIB APPEND PROPERTY + IMPORTED_LOCATION "${HTSLIB_LIBRARY}") + endif() + endif() +endif() + +cmake_policy(POP) From 238d43e3f7d6167037aaf2eacef5cf0152efb53c Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 22 Nov 2025 16:51:28 -0800 Subject: [PATCH 3/9] .clang-format: update for this repo --- .clang-format | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/.clang-format b/.clang-format index d961b5c..742796c 100644 --- a/.clang-format +++ b/.clang-format @@ -1,24 +1,17 @@ -# MIT License +# Copyright (C) 2025 Andrew D Smith # -# Copyright (c) 2024 Andrew Smith +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. # -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . BasedOnStyle: LLVM ColumnLimit: 80 From 611eb4288ff11c14bba76a44c9e233ccae7ec67c Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 22 Nov 2025 16:52:11 -0800 Subject: [PATCH 4/9] submodule updates --- src/bamxx | 2 +- src/smithlab_cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bamxx b/src/bamxx index 3fb0456..d258e4a 160000 --- a/src/bamxx +++ b/src/bamxx @@ -1 +1 @@ -Subproject commit 3fb045628a7c4d23dd7fec99a11a5b9f41ce51c6 +Subproject commit d258e4a0fb46dbe5740409c873e870aa1946ec57 diff --git a/src/smithlab_cpp b/src/smithlab_cpp index b64428f..753301f 160000 --- a/src/smithlab_cpp +++ b/src/smithlab_cpp @@ -1 +1 @@ -Subproject commit b64428f65937b15ade6e9188a7ff1304de91bec3 +Subproject commit 753301f0e8b40f9cdcfff14c0170218e6f786025 From 9ad333837499455d594ff656526ac39f46f6e301 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 22 Nov 2025 16:53:20 -0800 Subject: [PATCH 5/9] iwyu --- src/AbismalIndex.cpp | 9 +++++---- src/AbismalIndex.hpp | 14 ++++++-------- src/abismal.cpp | 24 +++++++++++++++++++++--- src/abismal_main.cpp | 27 +++++++++++---------------- src/abismalidx.cpp | 8 +++----- src/simreads.cpp | 15 +++++++++++---- 6 files changed, 57 insertions(+), 40 deletions(-) diff --git a/src/AbismalIndex.cpp b/src/AbismalIndex.cpp index 8f4d96a..f3f3088 100644 --- a/src/AbismalIndex.cpp +++ b/src/AbismalIndex.cpp @@ -16,19 +16,20 @@ */ #include "AbismalIndex.hpp" +#include "dna_four_bit.hpp" #include "bamxx.hpp" -#include "dna_four_bit.hpp" -#include "smithlab_os.hpp" -#include "smithlab_utils.hpp" #include #include +#include #include -#include +#include #include #include #include +#include +#include #include #include #include diff --git a/src/AbismalIndex.hpp b/src/AbismalIndex.hpp index f100284..3241bed 100644 --- a/src/AbismalIndex.hpp +++ b/src/AbismalIndex.hpp @@ -18,22 +18,20 @@ #ifndef ABISMAL_INDEX_HPP #define ABISMAL_INDEX_HPP +#include "dna_four_bit.hpp" + #include -#include -#include #include #include -#include -#include +#include #include +#include // IWYU pragma: keep +#include #include #include -#include +#include #include -#include "dna_four_bit.hpp" -#include "smithlab_utils.hpp" - using element_t = std::size_t; using Genome = std::vector; using two_letter_t = bool; diff --git a/src/abismal.cpp b/src/abismal.cpp index 51275b7..cd1d2d1 100644 --- a/src/abismal.cpp +++ b/src/abismal.cpp @@ -16,27 +16,41 @@ */ #include "abismal.hpp" - #include "AbismalAlign.hpp" #include "AbismalIndex.hpp" +#include "popcnt.hpp" + #include "OptionParser.hpp" -#include "bamxx.hpp" #include "bisulfite_utils.hpp" +#include "dna_four_bit.hpp" #include "dna_four_bit_bisulfite.hpp" -#include "popcnt.hpp" #include "sam_record.hpp" #include "smithlab_os.hpp" #include "smithlab_utils.hpp" +#include "bamxx.hpp" + #include +#include +#include #include +#include #include #include +#include +#include +#include +#include #include +#include #include +#include +#include #include #include +#include +#include #include #include #include @@ -44,6 +58,10 @@ #include +#ifdef __SSE__ +#include +#endif + using abismal_clock = std::chrono::steady_clock; using abismal_timepoint = std::chrono::time_point; diff --git a/src/abismal_main.cpp b/src/abismal_main.cpp index 3b3de29..01ffa8f 100644 --- a/src/abismal_main.cpp +++ b/src/abismal_main.cpp @@ -13,13 +13,19 @@ * more details. */ +#include "abismal.hpp" +#include "abismalidx.hpp" +#include "simreads.hpp" + #include #include +#include +#include #include #include #include -#include +#include #include #include @@ -45,25 +51,14 @@ operator<<(std::ostream &out, const abismal_command &cmd) -> std::ostream & { << cmd.description; } -// ADS: not sure of best way to acquire these below beyond simply -// declaring them here -int -abismal(int argc, char *argv[]); - -int -abismalidx(int argc, char *argv[]); - -int -simreads(int argc, char *argv[]); - void print_help(const std::vector &commands) { std::cout << "Program: " << PROGRAM_NAME << "\n" << "Version: " << VERSION << "\n" << "Usage: " << PROGRAM_NAME << " [options]\n" - << "Commands:" << std::endl; + << "Commands:\n"; for (const auto &c : commands) - std::cout << c << std::endl; + std::cout << c << '\n'; } int @@ -87,10 +82,10 @@ main(int argc, char *argv[]) { std::find_if(std::cbegin(commands), std::cend(commands), has_tag); if (the_cmd != std::cend(commands)) return (*the_cmd)(argc, argv); - std::cerr << "ERROR: invalid command " << argv[1] << std::endl; + std::cerr << "ERROR: invalid command " << argv[1] << '\n'; } catch (const std::exception &e) { - std::cerr << e.what() << std::endl; + std::cerr << e.what() << '\n'; return EXIT_FAILURE; } return EXIT_SUCCESS; diff --git a/src/abismalidx.cpp b/src/abismalidx.cpp index e572669..2ec216d 100644 --- a/src/abismalidx.cpp +++ b/src/abismalidx.cpp @@ -18,21 +18,19 @@ #include "abismalidx.hpp" #include "AbismalIndex.hpp" + #include "OptionParser.hpp" -#include "dna_four_bit.hpp" #include "smithlab_os.hpp" -#include "smithlab_utils.hpp" #include -#include #include +#include +#include #include -#include #include #include #include -#include #include int diff --git a/src/simreads.cpp b/src/simreads.cpp index e6ee394..b2fe4c8 100644 --- a/src/simreads.cpp +++ b/src/simreads.cpp @@ -16,20 +16,27 @@ */ #include "simreads.hpp" + #include "AbismalIndex.hpp" +#include "OptionParser.hpp" #include "cigar_utils.hpp" #include "sam_record.hpp" - -#include "OptionParser.hpp" #include "smithlab_os.hpp" #include "smithlab_utils.hpp" #include -#include // for the int8_t and friends +#include +#include +#include +#include +#include +#include #include -#include #include +#include +#include #include +#include #include #include #include From 60055ce9212c1c28bc5f562b6bcbd5d935411d42 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 22 Nov 2025 20:11:25 -0800 Subject: [PATCH 6/9] src/AbismalAlign.hpp: just adding linter suppressions --- src/AbismalAlign.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/AbismalAlign.hpp b/src/AbismalAlign.hpp index 2984473..213c290 100644 --- a/src/AbismalAlign.hpp +++ b/src/AbismalAlign.hpp @@ -39,18 +39,22 @@ typedef std::vector bam_cigar_t; static inline score_t count_deletions(const bam_cigar_t &cigar) { score_t ans = 0; + // cppcheck-suppress-begin useStlAlgorithm for (const auto &x : cigar) if (abismal_bam_cigar_op(x) == ABISMAL_BAM_CDEL) ans += abismal_bam_cigar_oplen(x); + // cppcheck-suppress-end useStlAlgorithm return ans; } static inline score_t count_insertions(const bam_cigar_t &cigar) { score_t ans = 0; + // cppcheck-suppress-begin useStlAlgorithm for (const auto &x : cigar) if (abismal_bam_cigar_op(x) == ABISMAL_BAM_CINS) ans += abismal_bam_cigar_oplen(x); + // cppcheck-suppress-end useStlAlgorithm return ans; } From 32f837d7099ca5fe28106c0dede70e8efd7a69f2 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 22 Nov 2025 20:11:46 -0800 Subject: [PATCH 7/9] src/dna_four_bit_bisulfite.hpp: changing c arrays to std::array --- src/dna_four_bit_bisulfite.hpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/dna_four_bit_bisulfite.hpp b/src/dna_four_bit_bisulfite.hpp index 4e8d043..da0c099 100644 --- a/src/dna_four_bit_bisulfite.hpp +++ b/src/dna_four_bit_bisulfite.hpp @@ -17,7 +17,9 @@ #ifndef DNA_FOUR_BIT_BISULFITE_HPP #define DNA_FOUR_BIT_BISULFITE_HPP -#include // for the int8_t and friends + +#include +#include // clang-format off /* encoding of ASCII characters into T-rich bases, used @@ -26,15 +28,15 @@ * C: 0010 = 2 * G: 0100 = 4 * T: 1010 = 10 */ -static const uint8_t encode_base_t_rich[256] = { +constexpr std::array encode_base_t_rich = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //0 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //49 0, 1, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, //@,A-O - 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //P-Z + 0, 0, 0, 0, 10,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //P-Z 0, 1, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, //`,a-o - 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //p-z + 0, 0, 0, 0, 10,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //p-z 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -50,7 +52,7 @@ static const uint8_t encode_base_t_rich[256] = { * C: 0010 = 2 * G: 0100 = 4 * T: 1000 = 8 */ -static const uint8_t encode_base_a_rich[256] = { +constexpr std::array encode_base_a_rich = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //0 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //33 From e1bb30247b0e89993583b90a0ba277813f6eabdc Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 22 Nov 2025 20:12:17 -0800 Subject: [PATCH 8/9] src/AbismalIndex.cpp: just adding NOLINT blocks. too much to change for now --- src/AbismalIndex.cpp | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/AbismalIndex.cpp b/src/AbismalIndex.cpp index f3f3088..9538501 100644 --- a/src/AbismalIndex.cpp +++ b/src/AbismalIndex.cpp @@ -34,6 +34,8 @@ #include #include +// NOLINTBEGIN + using abismal_clock = std::chrono::steady_clock; using std::chrono::time_point; @@ -110,7 +112,7 @@ mask_non_target( std::vector &genome) { const auto target_end = std::cend(targets); auto target_itr = std::cbegin(targets); - for (std::size_t i = 0; i < genome.size(); ++i) { + for (std::size_t i = 0; i < std::size(genome); ++i) { if (target_itr == target_end || i < target_itr->first) genome[i] = 'N'; else @@ -159,7 +161,7 @@ static inline void replace_included_n( const std::vector> &exclude, G &genome) { std::size_t j = 0; - for (std::size_t i = 0; i < genome.size(); ++i) { + for (std::size_t i = 0; i < std::size(genome); ++i) { if (genome[i] == 'N' && i < exclude[j].first) genome[i] = random_base(); if (exclude[j].second <= i) @@ -208,7 +210,7 @@ AbismalIndex::create_index(const std::string &targets_file, std::vector orig_genome; load_genome(genome_file, orig_genome, cl); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; s_time = abismal_clock::now(); if (VERBOSE) @@ -216,7 +218,7 @@ AbismalIndex::create_index(const std::string &targets_file, std::vector orig_targets = load_target_regions(targets_file); orig_targets = sort_by_chrom(cl.names, orig_targets); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; s_time = abismal_clock::now(); if (VERBOSE) @@ -240,7 +242,7 @@ AbismalIndex::create_index(const std::string &targets_file, replace_included_n(exclude, orig_genome); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; s_time = abismal_clock::now(); @@ -251,7 +253,7 @@ AbismalIndex::create_index(const std::string &targets_file, std::begin(genome)); std::vector().swap(orig_genome); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; exclude_itr = get_exclude_itrs(genome, exclude); @@ -277,7 +279,7 @@ AbismalIndex::create_index(const std::string &genome_file) { load_genome(genome_file, orig_genome, cl); if (VERBOSE) std::clog << "[" << cl.names.size() << " targets]" << delta_seconds(s_time) - << std::endl; + << '\n'; s_time = abismal_clock::now(); if (VERBOSE) @@ -293,7 +295,7 @@ AbismalIndex::create_index(const std::string &genome_file) { replace_included_n(exclude, orig_genome); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; s_time = abismal_clock::now(); @@ -304,7 +306,7 @@ AbismalIndex::create_index(const std::string &genome_file) { std::begin(genome)); std::vector().swap(orig_genome); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; exclude_itr = get_exclude_itrs(genome, exclude); @@ -424,7 +426,7 @@ AbismalIndex::initialize_bucket_sizes() { bucket_ct.join(); bucket_ga.join(); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; } static inline auto @@ -531,7 +533,7 @@ AbismalIndex::select_two_letter_positions() { for (auto &thread : threads) thread.join(); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; } void @@ -557,9 +559,9 @@ AbismalIndex::hash_genome() { index_t.resize(index_size_three, 0); index_a.resize(index_size_three, 0); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl + std::clog << delta_seconds(s_time) << '\n' << "[index sizes: two-letter=" << index_size << " " - << "three-letter=" << index_size_three << "]" << std::endl; + << "three-letter=" << index_size_three << "]\n"; s_time = abismal_clock::now(); if (VERBOSE) @@ -628,7 +630,7 @@ AbismalIndex::hash_genome() { ltr_counter_a.join(); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; } struct dp_sol { @@ -843,7 +845,7 @@ AbismalIndex::compress_dp() { max_candidates = 100u; // GS: this is a heuristic if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; } struct BucketLess { @@ -917,7 +919,7 @@ AbismalIndex::sort_buckets() { for (auto &thread : threads) thread.join(); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; } { const auto s_time = abismal_clock::now(); @@ -940,7 +942,7 @@ AbismalIndex::sort_buckets() { for (auto &thread : threads) thread.join(); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; } { const auto s_time = abismal_clock::now(); @@ -963,7 +965,7 @@ AbismalIndex::sort_buckets() { for (auto &thread : threads) thread.join(); if (VERBOSE) - std::clog << delta_seconds(s_time) << std::endl; + std::clog << delta_seconds(s_time) << '\n'; } } @@ -1073,7 +1075,7 @@ void AbismalIndex::read(const std::string &index_file) { static const std::string error_msg("failed loading index file"); - FILE *in = fopen(index_file.c_str(), "rb"); + FILE *in = fopen(index_file.data(), "rb"); if (!in) throw std::runtime_error("cannot open input file " + index_file); @@ -1341,3 +1343,5 @@ load_genome(const std::string &genome_file, std::vector &genome, ChromLookup &cl) { load_genome_impl(genome_file, genome, cl); } + +// NOLINTEND From 2ae02a209703b8f8ab4e59d9cf0225075cf0b887 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sat, 22 Nov 2025 20:12:43 -0800 Subject: [PATCH 9/9] linting --- src/abismal.cpp | 255 ++++++++++++++++++++++++------------------- src/abismal_main.cpp | 16 +-- src/abismalidx.cpp | 22 ++-- src/simreads.cpp | 117 ++++++++++---------- 4 files changed, 221 insertions(+), 189 deletions(-) diff --git a/src/abismal.cpp b/src/abismal.cpp index cd1d2d1..3a4b851 100644 --- a/src/abismal.cpp +++ b/src/abismal.cpp @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -62,6 +63,8 @@ #include #endif +// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions) + using abismal_clock = std::chrono::steady_clock; using abismal_timepoint = std::chrono::time_point; @@ -77,17 +80,22 @@ typedef std::vector PackedRead; // 4-bit encoding of reads namespace abismal_concurrency { static constexpr std::uint32_t max_n_threads = 1024; +// NOLINTBEGIN(*-avoid-non-const-global-variables) static std::uint32_t n_threads = 1; static std::mutex read_mutex; static std::mutex write_mutex; static std::mutex report_mutex; +// NOLINTEND(*-avoid-non-const-global-variables) static bool invalid_n_threads() { return n_threads == 0 || n_threads > max_n_threads; } } // namespace abismal_concurrency -enum conversion_type { t_rich = false, a_rich = true }; +enum conversion_type : std::uint8_t { + t_rich = false, + a_rich = true, +}; static void log_msg(const std::string &s) { @@ -110,7 +118,7 @@ get_strand_code(const char strand, const conversion_type conv) { } struct ReadLoader { - ReadLoader(const std::string &fn) : cur_line{0}, filename{fn}, in{fn, "r"} {} + explicit ReadLoader(const std::string &fn) : filename{fn}, in{fn, "r"} {} bool good() const { @@ -189,8 +197,12 @@ const std::uint32_t ReadLoader::min_read_length = static inline void update_max_read_length(std::size_t &max_length, const std::vector &reads) { - for (const auto &i : reads) - max_length = std::max(max_length, std::size(i)); + max_length = std::accumulate(std::cbegin(reads), std::cend(reads), 0ul, + [](const std::size_t x, const auto &r) { + return std::max(x, std::size(r)); + }); + // for (const auto &i : reads) + // max_length = std::max(max_length, std::size(i)); } struct se_element { // size = 8 @@ -266,12 +278,14 @@ struct se_element { // size = 8 static const score_t MAX_DIFFS; }; -double se_element::valid_frac = 0.1; +static constexpr auto valid_frac_default = 0.1; +double se_element::valid_frac = valid_frac_default; const score_t se_element::MAX_DIFFS = std::numeric_limits::max() - 1; // a liberal number of mismatches accepted to // align a read downstream -const double se_element::invalid_hit_frac = 0.4; +static constexpr auto invalid_hit_frac_default = 0.4; +const double se_element::invalid_hit_frac = invalid_hit_frac_default; static inline score_t valid_diffs_cutoff(const std::uint32_t readlen, const double cutoff) { @@ -288,8 +302,8 @@ valid_len(const std::uint32_t aln_len, const std::uint32_t readlen) { } static inline bool -valid(const se_element &s, const std::uint32_t aln_len, - const std::uint32_t readlen, const double cutoff) { +check_valid(const se_element &s, const std::uint32_t aln_len, + const std::uint32_t readlen, const double cutoff) { return valid_len(aln_len, readlen) && s.diffs <= valid_diffs_cutoff(readlen, cutoff); } @@ -390,10 +404,11 @@ struct se_candidates { void reset(const std::uint32_t readlen) { + constexpr auto good_frac_denom = 10u; best.reset(readlen); v.front().reset(readlen); cutoff = v.front().diffs; - good_cutoff = readlen / 10u; + good_cutoff = static_cast(readlen / good_frac_denom); sure_ambig = false; sz = 1; @@ -410,11 +425,11 @@ struct se_candidates { sz = std::unique(std::begin(v), std::begin(v) + sz) - std::cbegin(v); } - bool sure_ambig; - score_t good_cutoff; - score_t cutoff; - std::uint32_t sz; - se_element best; + bool sure_ambig{}; + score_t good_cutoff{}; + score_t cutoff{}; + std::uint32_t sz{}; + se_element best{}; std::vector v; static const std::uint32_t max_size; @@ -447,13 +462,20 @@ chrom_and_posn(const ChromLookup &cl, const bam_cigar_t &cig, return true; } -enum map_type { map_unmapped, map_unique, map_ambig }; +enum map_type : std::uint8_t { + map_unmapped, + map_unique, + map_ambig, +}; static map_type format_se(const bool allow_ambig, const se_element &res, const ChromLookup &cl, // ADS: 'read' should not be used after a call to 'format_se' std::string &read, const std::string &read_name, const bam_cigar_t &cigar, bam_rec &sr) { + static constexpr auto mapq_max_val = 255u; + static constexpr auto aux_len = 16u; + const bool ambig = res.ambig(); const bool valid = !res.empty(); if (!allow_ambig && ambig) @@ -476,22 +498,24 @@ format_se(const bool allow_ambig, const se_element &res, const ChromLookup &cl, // flag |= BAM_FREAD1; // ADS: this might be wrong... sr.b = bam_init1(); + // clang-format off int ret = bam_set1(sr.b, - std::size(read_name), // size_t l_qname, - read_name.data(), // const char *qname, - flag, // uint16_t flag, - chrom_idx - 1, // int32_t tid (-1 for padding) - ref_s, // hts_pos_t pos, - 255, // uint8_t mapq, - std::size(cigar), // size_t n_cigar, - cigar.data(), // const uint32_t *cigar, - -1, // int32_t mtid, - -1, // hts_pos_t mpos, - 0, // hts_pos_t isize, - std::size(read), // size_t l_seq, - read.data(), // const char *seq, - nullptr, // const char *qual, - 16); // size_t l_aux); + std::size(read_name), // size_t l_qname, + read_name.data(), // const char *qname, + flag, // uint16_t flag, + static_cast(chrom_idx) - 1, // int32_t tid (-1 for padding) + ref_s, // hts_pos_t pos, + mapq_max_val, // uint8_t mapq, + std::size(cigar), // size_t n_cigar, + cigar.data(), // const uint32_t *cigar, + -1, // int32_t mtid, + -1, // hts_pos_t mpos, + 0, // hts_pos_t isize, + std::size(read), // size_t l_seq, + read.data(), // const char *seq, + nullptr, // const char *qual, + aux_len); // size_t l_aux); + // clang-format on if (ret < 0) throw std::runtime_error("failed to format bam"); @@ -508,11 +532,11 @@ format_se(const bool allow_ambig, const se_element &res, const ChromLookup &cl, } struct pe_element { - pe_element() : aln_score(0), r1(se_element()), r2(se_element()) {} + pe_element() : r1{se_element()}, r2{se_element()} {} score_t diffs() const { - return r1.diffs + r2.diffs; + return static_cast(r1.diffs + r2.diffs); } void @@ -570,8 +594,8 @@ struct pe_element { return ambig() && (aln_score == max_aln_score); } - score_t aln_score; - score_t max_aln_score; + score_t aln_score{}; + score_t max_aln_score{}; se_element r1; se_element r2; @@ -613,7 +637,12 @@ format_pe( std::string &read1, std::string &read2, const std::string &name1, const std::string &name2, const bam_cigar_t &cig1, const bam_cigar_t &cig2, bam_rec &sr1, bam_rec &sr2) { - static const uint8_t cv[2] = {'T', 'A'}; + static constexpr auto mapq_max_val = 255u; + static constexpr auto aux_len = 16u; + static const std::array cv = { + 'T', + 'A', + }; if (p.empty()) return map_unmapped; @@ -659,22 +688,24 @@ format_pe( flag2 |= BAM_FREAD2; sr1.b = bam_init1(); + // clang-format off int ret = bam_set1(sr1.b, - name1.size(), // size_t l_qname, - name1.data(), // const char *qname, - flag1, // uint16_t flag, - chr1 - 1, // (-1 for padding) int32_t tid - r_s1, // hts_pos_t pos, - 255, // uint8_t mapq, - cig1.size(), // size_t n_cigar, - cig1.data(), // const uint32_t *cigar, - chr2 - 1, // (-1 for padding) int32_t mtid, - r_s2, // hts_pos_t mpos, - isize, // hts_pos_t isize, - read1.size(), // size_t l_seq, - read1.data(), // const char *seq, - nullptr, // const char *qual, - 16); // size_t l_aux); + name1.size(), // size_t l_qname, + name1.data(), // const char *qname, + flag1, // uint16_t flag, + chr1 - 1, // (-1 for padding) int32_t tid + r_s1, // hts_pos_t pos, + mapq_max_val, // uint8_t mapq, + cig1.size(), // size_t n_cigar, + cig1.data(), // const uint32_t *cigar, + chr2 - 1, // (-1 for padding) int32_t mtid, + r_s2, // hts_pos_t mpos, + isize, // hts_pos_t isize, + read1.size(), // size_t l_seq, + read1.data(), // const char *seq, + nullptr, // const char *qual, + aux_len); // size_t l_aux); + // clang-format on if (ret < 0) throw std::runtime_error("error formatting bam"); @@ -682,27 +713,29 @@ format_pe( if (ret < 0) throw std::runtime_error("error adding aux field"); - ret = bam_aux_append(sr1.b, "CV", 'A', 1, cv + p.r1.elem_is_a_rich()); + ret = bam_aux_append(sr1.b, "CV", 'A', 1, cv.data() + p.r1.elem_is_a_rich()); if (ret < 0) throw std::runtime_error("error adding aux field"); sr2.b = bam_init1(); + // clang-format off ret = bam_set1(sr2.b, - name2.size(), // size_t l_qname, - name2.data(), // const char *qname, - flag2, // uint16_t flag, - chr2 - 1, // (-1 for padding) int32_t tid - r_s2, // hts_pos_t pos, - 255, // uint8_t mapq, - cig2.size(), // size_t n_cigar, - cig2.data(), // const uint32_t *cigar, - chr1 - 1, // (-1 for padding) int32_t mtid, - r_s1, // hts_pos_t mpos, - -isize, // hts_pos_t isize, - read2.size(), // size_t l_seq, - read2.data(), // const char *seq, - nullptr, // const char *qual, - 16); // size_t l_aux); + name2.size(), // size_t l_qname, + name2.data(), // const char *qname, + flag2, // uint16_t flag, + chr2 - 1, // (-1 for padding) int32_t tid + r_s2, // hts_pos_t pos, + mapq_max_val, // uint8_t mapq, + cig2.size(), // size_t n_cigar, + cig2.data(), // const uint32_t *cigar, + chr1 - 1, // (-1 for padding) int32_t mtid, + r_s1, // hts_pos_t mpos, + -isize, // hts_pos_t isize, + read2.size(), // size_t l_seq, + read2.data(), // const char *seq, + nullptr, // const char *qual, + aux_len); // size_t l_aux); + // clang-format on if (ret < 0) throw std::runtime_error("failed to format bam"); @@ -710,7 +743,7 @@ format_pe( if (ret < 0) throw std::runtime_error("error adding aux field"); - ret = bam_aux_append(sr2.b, "CV", 'A', 1, cv + p.r2.elem_is_a_rich()); + ret = bam_aux_append(sr2.b, "CV", 'A', 1, cv.data() + p.r2.elem_is_a_rich()); if (ret < 0) throw std::runtime_error("error adding aux field"); @@ -794,11 +827,11 @@ struct pe_candidates { sz = std::unique(std::begin(v), std::begin(v) + sz) - std::cbegin(v); } - bool sure_ambig; - score_t cutoff; - score_t good_cutoff; - std::uint32_t sz; - std::uint32_t capacity; + bool sure_ambig{}; + score_t cutoff{}; + score_t good_cutoff{}; + std::uint32_t sz{}; + std::uint32_t capacity{}; std::vector v; static const std::uint32_t max_size_small = 32u; @@ -1096,14 +1129,14 @@ check_hits(const std::uint32_t offset, const PackedRead::const_iterator read_st, } struct compare_bases { - compare_bases(const genome_iterator g_) : g(g_) {} - + explicit compare_bases(const genome_iterator g) : g{g} {} bool operator()(const std::uint32_t mid, const two_letter_t chr) const { + // cppcheck-suppress-begin comparisonOfFuncReturningBoolError return get_bit(*(g + mid)) < chr; + // cppcheck-suppress-end comparisonOfFuncReturningBoolError } - - const genome_iterator g; + const genome_iterator g; // NOLINT(*-avoid-const-or-ref-data-members) }; template @@ -1147,14 +1180,12 @@ get_three_letter_num_fast(const uint8_t nt) { } template struct compare_bases_three { - compare_bases_three(const genome_iterator g_) : g(g_) {} - + explicit compare_bases_three(const genome_iterator g) : g{g} {} bool operator()(const std::uint32_t mid, const three_letter_t chr) const { return get_three_letter_num_fast(*(g + mid)) < chr; } - - const genome_iterator g; + const genome_iterator g; // NOLINT(*-avoid-const-or-ref-data-members) }; template @@ -1237,10 +1268,10 @@ process_seeds(const std::uint32_t max_candidates, std::vector::const_iterator s_idx_three; std::vector::const_iterator e_idx_three; - std::uint32_t d_two = 0; - std::uint32_t d_three = 0; - std::uint32_t l_two = 0; - std::uint32_t l_three = 0; + std::uint32_t d_two{}; + std::uint32_t d_three{}; + std::uint32_t l_two{}; + std::uint32_t l_three{}; get_1bit_hash(read_idx, k); get_base_3_hash(read_idx, k_three); @@ -1324,10 +1355,11 @@ template static void prep_read(const std::string &r, Read &pread) { pread.resize(r.size()); + // NOLINTBEGIN(*-pro-bounds-constant-array-index) for (std::size_t i = 0; i != r.size(); ++i) - pread[i] = - (convert_a_to_g ? (encode_base_a_rich[static_cast(r[i])]) - : (encode_base_t_rich[static_cast(r[i])])); + pread[i] = (convert_a_to_g ? (encode_base_a_rich[r[i]]) + : (encode_base_t_rich[r[i]])); + // NOLINTEND(*-pro-bounds-constant-array-index) } /* GS: this function simply converts the vector pread to a @@ -1392,7 +1424,6 @@ align_se_candidates(const Read &pread_t, const Read &pread_t_rc, } score_t best_scr = 0; - std::uint32_t cand_pos = 0; std::uint32_t best_pos = 0; res.prepare_for_alignments(); @@ -1403,7 +1434,7 @@ align_se_candidates(const Read &pread_t, const Read &pread_t_rc, ; for (; it != lim; ++it) { if (valid_hit(*it, readlen)) { - cand_pos = it->pos; + std::uint32_t cand_pos = it->pos; const score_t cand_scr = aln.align( it->diffs, max_diffs, ((it->rc()) ? ((it->elem_is_a_rich()) ? (pread_t_rc) : (pread_a_rc)) @@ -1435,7 +1466,7 @@ align_se_candidates(const Read &pread_t, const Read &pread_t_rc, best.diffs = simple_aln::edit_distance(best_scr, len, cigar); // do not report and count it as unmapped if not valid - if (!valid(best, len, readlen, cutoff)) + if (!check_valid(best, len, readlen, cutoff)) best.reset(); } else @@ -1458,7 +1489,7 @@ template static void map_single_ended(const bool show_progress, const bool allow_ambig, const AbismalIndex &abismal_index, ReadLoader &rl, - se_map_stats &se_stats, bamxx::bam_header &hdr, + se_map_stats &se_stats, const bamxx::bam_header &hdr, bamxx::bam_out &out, ProgressBar &progress) { const auto counter_st(std::cbegin(abismal_index.counter)); const auto counter_t_st(std::cbegin(abismal_index.counter_t)); @@ -1491,9 +1522,8 @@ map_single_ended(const bool show_progress, const bool allow_ambig, se_candidates res; AbismalAlignSimple aln(genome_st); - std::size_t the_byte = 0; - while (rl) { + std::size_t the_byte{}; { const std::lock_guard lock(abismal_concurrency::read_mutex); rl.load_reads(names, reads); @@ -1562,7 +1592,7 @@ map_single_ended(const bool show_progress, const bool allow_ambig, static void map_single_ended_rand(const bool show_progress, const bool allow_ambig, const AbismalIndex &abismal_index, ReadLoader &rl, - se_map_stats &se_stats, bamxx::bam_header &hdr, + se_map_stats &se_stats, const bamxx::bam_header &hdr, bamxx::bam_out &out, ProgressBar &progress) { const auto counter_st(std::cbegin(abismal_index.counter)); const auto counter_t_st(std::cbegin(abismal_index.counter_t)); @@ -1594,9 +1624,8 @@ map_single_ended_rand(const bool show_progress, const bool allow_ambig, se_candidates res; AbismalAlignSimple aln(genome_st); - std::size_t the_byte = 0; - while (rl) { + std::size_t the_byte{}; { const std::lock_guard lock(abismal_concurrency::read_mutex); rl.load_reads(names, reads); @@ -1684,7 +1713,7 @@ static void run_single_ended(const bool show_progress, const bool allow_ambig, const std::string &reads_file, const AbismalIndex &abismal_index, se_map_stats &se_stats, - bamxx::bam_header &hdr, bamxx::bam_out &out) { + const bamxx::bam_header &hdr, bamxx::bam_out &out) { ReadLoader rl(reads_file); ProgressBar progress(get_filesize(reads_file), "mapping reads"); @@ -1692,7 +1721,7 @@ run_single_ended(const bool show_progress, const bool allow_ambig, std::vector threads; for (auto i = 0u; i < abismal_concurrency::n_threads; ++i) - threads.emplace_back([&] { + threads.emplace_back([&] { // NOLINT(*-inefficient-vector-operation) if (random_pbat) map_single_ended_rand(show_progress, allow_ambig, abismal_index, rl, se_stats, hdr, out, progress); @@ -1745,7 +1774,6 @@ best_pair(const pe_candidates &res1, const pe_candidates &res2, valid_diffs_cutoff(readlen2, se_element::valid_frac); score_t scr1 = 0; - score_t scr2 = 0; score_t best_scr1 = 0; score_t best_scr2 = 0; std::uint32_t best_pos1 = 0; @@ -1763,7 +1791,7 @@ best_pair(const pe_candidates &res1, const pe_candidates &res2, for (; j2 != j2_end && !best.sure_ambig(); ++j2) { s2 = *j2; - scr2 = 0; + score_t scr2 = 0; // rewind to first concordant position. Needed in case of // many-to-many concordance between candidates @@ -1897,7 +1925,7 @@ static void map_paired_ended(const bool show_progress, const bool allow_ambig, const AbismalIndex &abismal_index, ReadLoader &rl1, ReadLoader &rl2, pe_map_stats &pe_stats, - bamxx::bam_header &hdr, bamxx::bam_out &out, + const bamxx::bam_header &hdr, bamxx::bam_out &out, ProgressBar &progress) { const auto counter_st(std::begin(abismal_index.counter)); const auto counter_t_st(std::begin(abismal_index.counter_t)); @@ -1953,9 +1981,8 @@ map_paired_ended(const bool show_progress, const bool allow_ambig, se_candidates res_se1; se_candidates res_se2; - std::size_t the_byte = 0; - while (rl1 && rl2) { + std::size_t the_byte{}; { const std::lock_guard lock(abismal_concurrency::read_mutex); rl1.load_reads(names1, reads1); @@ -2064,7 +2091,7 @@ static void map_paired_ended_rand(const bool show_progress, const bool allow_ambig, const AbismalIndex &abismal_index, ReadLoader &rl1, ReadLoader &rl2, pe_map_stats &pe_stats, - bamxx::bam_header &hdr, bamxx::bam_out &out, + const bamxx::bam_header &hdr, bamxx::bam_out &out, ProgressBar &progress) { const auto counter_st(std::begin(abismal_index.counter)); const auto counter_t_st(std::begin(abismal_index.counter_t)); @@ -2116,10 +2143,8 @@ map_paired_ended_rand(const bool show_progress, const bool allow_ambig, se_candidates res_se1; se_candidates res_se2; - std::size_t the_byte = 0; - while (rl1 && rl2) { - + std::size_t the_byte{}; { const std::lock_guard lock(abismal_concurrency::read_mutex); rl1.load_reads(names1, reads1); @@ -2242,7 +2267,7 @@ static void run_paired_ended(const bool show_progress, const bool allow_ambig, const std::string &reads_file1, const std::string &reads_file2, const AbismalIndex &abismal_index, pe_map_stats &pe_stats, - bamxx::bam_header &hdr, bamxx::bam_out &out) { + const bamxx::bam_header &hdr, bamxx::bam_out &out) { ReadLoader rl1(reads_file1); ReadLoader rl2(reads_file2); ProgressBar progress(get_filesize(reads_file1), "mapping reads"); @@ -2251,7 +2276,7 @@ run_paired_ended(const bool show_progress, const bool allow_ambig, std::vector threads; for (auto i = 0u; i < abismal_concurrency::n_threads; ++i) - threads.emplace_back([&] { + threads.emplace_back([&] { // NOLINT(*-inefficient-vector-operation) if (random_pbat) map_paired_ended_rand(show_progress, allow_ambig, abismal_index, rl1, rl2, pe_stats, hdr, out, progress); @@ -2278,7 +2303,8 @@ file_exists(const std::string &filename) { } static int -abismal_make_sam_header(const ChromLookup &cl, const int argc, char *argv[], +abismal_make_sam_header(const ChromLookup &cl, const int argc, + char *argv[], // NOLINT(*-c-arrays) bamxx::bam_header &hdr) { assert(std::size(cl.names) > 2); // two entries exist for the padding assert(std::size(cl.starts) == std::size(cl.names) + 1); @@ -2316,9 +2342,8 @@ abismal_make_sam_header(const ChromLookup &cl, const int argc, char *argv[], } int -abismal(int argc, char *argv[]) { +abismal(int argc, char *argv[]) { // NOLINT(*-c-arrays) try { - const std::string version_str = std::string("(v") + VERSION + std::string(")"); const std::string description = @@ -2338,8 +2363,8 @@ abismal(int argc, char *argv[]) { std::string stats_outfile = ""; /****************** COMMAND LINE OPTIONS ********************/ - OptionParser opt_parse(strip_path(argv[0]), description, - " []"); + OptionParser opt_parse(argv[0], // NOLINT(*-pointer-arithmetic) + description, " []"); opt_parse.set_show_defaults(); opt_parse.add_opt("index", 'i', "index file", false, index_file); opt_parse.add_opt("genome", 'g', "genome file (FASTA)", false, genome_file); @@ -2535,3 +2560,5 @@ abismal(int argc, char *argv[]) { } return EXIT_SUCCESS; } + +// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions) diff --git a/src/abismal_main.cpp b/src/abismal_main.cpp index 01ffa8f..868cda2 100644 --- a/src/abismal_main.cpp +++ b/src/abismal_main.cpp @@ -29,7 +29,7 @@ #include #include -static const std::string PROGRAM_NAME = "abismal"; +static constexpr auto program_name = "abismal"; struct abismal_command { std::string tag; @@ -37,7 +37,7 @@ struct abismal_command { std::function fun; auto - operator()(const int argc, char *argv[]) const -> int { + operator()(const int argc, char *argv[]) const -> int { // NOLINT(*-c-arrays) return fun(argc - 1, argv + 1); } }; @@ -53,16 +53,16 @@ operator<<(std::ostream &out, const abismal_command &cmd) -> std::ostream & { void print_help(const std::vector &commands) { - std::cout << "Program: " << PROGRAM_NAME << "\n" + std::cout << "Program: " << program_name << "\n" << "Version: " << VERSION << "\n" - << "Usage: " << PROGRAM_NAME << " [options]\n" + << "Usage: " << program_name << " [options]\n" << "Commands:\n"; for (const auto &c : commands) std::cout << c << '\n'; } int -main(int argc, char *argv[]) { +main(int argc, char *argv[]) { // NOLINT(*-c-arrays) try { // clang-format off std::vector commands = { @@ -76,13 +76,15 @@ main(int argc, char *argv[]) { return EXIT_SUCCESS; } const auto has_tag = [&](const abismal_command &a) { - return a.tag == argv[1]; + return a.tag == argv[1]; // NOLINT(*-pointer-arithmetic,*-c-arrays) }; const auto the_cmd = std::find_if(std::cbegin(commands), std::cend(commands), has_tag); if (the_cmd != std::cend(commands)) return (*the_cmd)(argc, argv); - std::cerr << "ERROR: invalid command " << argv[1] << '\n'; + std::cerr << "ERROR: invalid command " + << argv[1] // NOLINT(*-pointer-arithmetic,*-c-arrays) + << '\n'; } catch (const std::exception &e) { std::cerr << e.what() << '\n'; diff --git a/src/abismalidx.cpp b/src/abismalidx.cpp index 2ec216d..b1fbd85 100644 --- a/src/abismalidx.cpp +++ b/src/abismalidx.cpp @@ -20,7 +20,6 @@ #include "AbismalIndex.hpp" #include "OptionParser.hpp" -#include "smithlab_os.hpp" #include @@ -34,10 +33,8 @@ #include int -abismalidx(int argc, char *argv[]) { - +abismalidx(int argc, char *argv[]) { // NOLINT(*-c-arrays) try { - const std::string version_str = std::string{"(v"} + VERSION + std::string{")"}; const std::string description = "build abismal index " + version_str; @@ -47,8 +44,9 @@ abismalidx(int argc, char *argv[]) { std::size_t n_threads = 1; /****************** COMMAND LINE OPTIONS ********************/ - OptionParser opt_parse(strip_path(argv[0]), description, - " ", 2); + OptionParser opt_parse(argv[0], // NOLINT(*-pointer-arithmetic) + description, " ", + 2); opt_parse.set_show_defaults(); opt_parse.add_opt("targets", 'A', "target regions", false, target_regions_file); @@ -58,20 +56,20 @@ abismalidx(int argc, char *argv[]) { std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); if (argc == 1 || opt_parse.help_requested()) { - std::cerr << opt_parse.help_message() << std::endl; - std::cerr << opt_parse.about_message() << std::endl; + std::cerr << opt_parse.help_message() << '\n'; + std::cerr << opt_parse.about_message() << '\n'; return EXIT_SUCCESS; } if (opt_parse.about_requested()) { - std::cerr << opt_parse.about_message() << std::endl; + std::cerr << opt_parse.about_message() << '\n'; return EXIT_SUCCESS; } if (opt_parse.option_missing()) { - std::cerr << opt_parse.option_missing_message() << std::endl; + std::cerr << opt_parse.option_missing_message() << '\n'; return EXIT_SUCCESS; } if (leftover_args.size() != 2) { - std::cerr << opt_parse.help_message() << std::endl; + std::cerr << opt_parse.help_message() << '\n'; return EXIT_SUCCESS; } const std::string genome_file = leftover_args.front(); @@ -110,7 +108,7 @@ abismalidx(int argc, char *argv[]) { /****************** END BUILDING INDEX *************/ } catch (const std::exception &e) { - std::cerr << e.what() << std::endl; + std::cerr << e.what() << '\n'; return EXIT_FAILURE; } return EXIT_SUCCESS; diff --git a/src/simreads.cpp b/src/simreads.cpp index b2fe4c8..477cc3c 100644 --- a/src/simreads.cpp +++ b/src/simreads.cpp @@ -51,14 +51,13 @@ namespace simreads_random { // the same seed, the results could be different. This meant testing // didn't work. -std::random_device rd; -std::mt19937 e; -bool initialized = false; -std::uniform_real_distribution dr; -std::uniform_int_distribution di; +std::mt19937 e; // NOLINT +bool initialized = false; // NOLINT +std::uniform_real_distribution dr; // NOLINT +std::uniform_int_distribution di; // NOLINT void -initialize([[maybe_unused]] const std::size_t the_seed) { +initialize(const std::size_t the_seed) { e = std::mt19937(the_seed); initialized = true; } @@ -122,7 +121,7 @@ struct FragInfo { assert(!name.empty()); std::string read(seq); revcomp_inplace(read); - read = read.substr(0, read_length); + read = read.substr(0, read_length); // cppcheck-suppress uselessCallsSubstr for (std::size_t i = 0; i < read_length - std::size(read); ++i) read += random_base(); return fasta_format ? format_fasta_record(name + ".2", read) @@ -140,9 +139,10 @@ struct FragInfo { const std::size_t rseq_ops = cigar_rseq_ops(cigar) + cigar_rseq_ops(cigar2); cigar = cigar + std::to_string(orig_ref_len - rseq_ops) + "N" + cigar2; - seq = seq.substr(0, read_length) + - std::string(orig_ref_len - rseq_ops, 'N') + - seq.substr(std::size(seq) - read_length, read_length); + seq = + seq.substr(0, read_length) + // cppcheck-suppress uselessCallsSubstr + std::string(orig_ref_len - rseq_ops, 'N') + + seq.substr(std::size(seq) - read_length, read_length); } } void @@ -152,7 +152,8 @@ struct FragInfo { } void bisulfite_conversion(const bool random_pbat, const double bs_conv) { - if (pbat || (random_pbat && simreads_random::rand_double() < 0.5)) { + constexpr auto coin_flip = 0.5; + if (pbat || (random_pbat && simreads_random::rand_double() < coin_flip)) { for (auto it(begin(seq)); it != end(seq); ++it) { if (*it == 'G' && (simreads_random::rand_double() < bs_conv)) *it = 'A'; @@ -186,13 +187,14 @@ struct FragInfo { static std::size_t read_length; }; +static constexpr auto read_length_default = 100; bool FragInfo::pbat = false; bool FragInfo::fasta_format = false; std::size_t FragInfo::frag_count = 0; -std::size_t FragInfo::read_length = 100; +std::size_t FragInfo::read_length = read_length_default; static std::ostream & -operator<<(std::ostream &out, FragInfo &the_info) { +operator<<(std::ostream &out, const FragInfo &the_info) { const bool rc = the_info.rc(); std::uint16_t flags_read = 0; std::uint16_t flags_mate = 0; @@ -211,7 +213,10 @@ operator<<(std::ostream &out, FragInfo &the_info) { const std::size_t read_pos = the_info.start_pos + 1; const std::size_t mate_pos = the_info.end_pos - FragInfo::read_length + 1; - const int tlen = rc ? -std::size(the_info.seq) : std::size(the_info.seq); + + const int tlen = rc ? -static_cast(std::size(the_info.seq)) + : static_cast(std::size(the_info.seq)); + std::string cigar1 = the_info.cigar; std::string cigar2 = the_info.cigar; @@ -260,17 +265,22 @@ operator<<(std::ostream &out, FragInfo &the_info) { // extract the position of the fragment checking all bases are valid static void sim_frag_position(const std::string &genome, const std::size_t frag_len, - std::string &the_frag, std::size_t &the_position, + std::string &the_frag, std::size_t &the_posn, const bool require_valid) { - static auto is_invalid = [](const char c) { return !valid_base(c); }; + static auto is_valid = [](const char c) { return valid_base(c); }; + + const auto g_beg = std::cbegin(genome); const std::size_t lim = std::size(genome) - frag_len + 1; - do { - the_position = simreads_random::rand() % lim; - the_frag = std::string(std::cbegin(genome) + the_position, - std::cbegin(genome) + the_position + frag_len); - } while (require_valid && find_if(std::cbegin(the_frag), std::cend(the_frag), - is_invalid) != std::cend(the_frag)); + the_posn = simreads_random::rand() % lim; + // NOLINTBEGIN(*-narrowing-conversions) + the_frag = std::string(g_beg + the_posn, g_beg + the_posn + frag_len); + while (require_valid && + std::all_of(std::cbegin(the_frag), std::cend(the_frag), is_valid)) { + the_posn = simreads_random::rand() % lim; + the_frag = std::string(g_beg + the_posn, g_beg + the_posn + frag_len); + } + // NOLINTEND(*-narrowing-conversions) } // simulate from a uniform distribution in a range @@ -284,7 +294,7 @@ sim_frag_length(const std::size_t min_length, const std::size_t max_length) { } struct FragSampler { - FragSampler(const std::string &g, const ChromLookup c, const char sc, + FragSampler(const std::string &g, const ChromLookup &c, const char sc, const std::size_t milen, const std::size_t malen, const bool require_valid) : genome(g), cl(c), strand_code(sc), min_length(milen), max_length(malen), @@ -321,7 +331,7 @@ struct FragSampler { std::abort(); } } - const std::string &genome; + const std::string &genome; // NOLINT(*-avoid-const-*-data-members) ChromLookup cl; char strand_code{}; std::size_t min_length{}; @@ -394,9 +404,9 @@ struct FragMutator { std::string tostring() const { std::ostringstream oss; - oss << "mutation_rate=" << mutation_rate << std::endl - << "substitution_rate=" << substitution_rate << std::endl - << "insertion_rate=" << insertion_rate << std::endl + oss << "mutation_rate=" << mutation_rate << '\n' + << "substitution_rate=" << substitution_rate << '\n' + << "insertion_rate=" << insertion_rate << '\n' << "deletion_rate=" << deletion_rate; return oss.str(); } @@ -412,7 +422,7 @@ extract_change_type_vals(const std::string &change_type_vals, double &deletion_rate) { if (!change_type_vals.empty()) { std::istringstream iss(change_type_vals); - char x; + char x{}; iss >> substitution_rate; iss >> x; iss >> insertion_rate; @@ -422,10 +432,11 @@ extract_change_type_vals(const std::string &change_type_vals, } int -simreads(int argc, char *argv[]) { - +simreads(int argc, char *argv[]) { // NOLINT(*-c-arrays) + static constexpr auto n_reads_default = 100; + static constexpr auto min_frag_len_default = 100; + static constexpr auto max_frag_len_defeault = 250; try { - std::string chrom_file; std::string output_prefix; std::string locations_file; @@ -435,9 +446,9 @@ simreads(int argc, char *argv[]) { bool random_pbat = false; bool require_valid = false; - std::size_t n_reads = 100; - std::size_t min_frag_len = 100; - std::size_t max_frag_len = 250; + std::size_t n_reads{n_reads_default}; + std::size_t min_frag_len{min_frag_len_default}; + std::size_t max_frag_len{max_frag_len_defeault}; char strand_arg = 'b'; @@ -454,7 +465,7 @@ simreads(int argc, char *argv[]) { std::size_t max_mutations = num_lim::max(); /****************** COMMAND LINE OPTIONS ********************/ - OptionParser opt_parse(strip_path(argv[0]), + OptionParser opt_parse(argv[0], // NOLINT(*-pointer-arithmetic) "simulate reads for " "testing walt2", "", 1); @@ -490,19 +501,19 @@ simreads(int argc, char *argv[]) { std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); if (argc == 1 || opt_parse.help_requested()) { - std::cerr << opt_parse.help_message() << std::endl; + std::cerr << opt_parse.help_message() << '\n'; return EXIT_SUCCESS; } if (opt_parse.about_requested()) { - std::cerr << opt_parse.about_message() << std::endl; + std::cerr << opt_parse.about_message() << '\n'; return EXIT_SUCCESS; } if (opt_parse.option_missing()) { - std::cerr << opt_parse.option_missing_message() << std::endl; + std::cerr << opt_parse.option_missing_message() << '\n'; return EXIT_SUCCESS; } if (std::size(leftover_args) != 1) { - std::cerr << opt_parse.help_message() << std::endl; + std::cerr << opt_parse.help_message() << '\n'; return EXIT_SUCCESS; } const std::string genome_file(leftover_args.front()); @@ -515,11 +526,11 @@ simreads(int argc, char *argv[]) { rng_seed = time(0) + getpid(); if (VERBOSE) - std::cerr << "rng seed: " << rng_seed << std::endl; + std::cerr << "rng seed: " << rng_seed << '\n'; simreads_random::initialize(rng_seed); if (VERBOSE) - std::cerr << "[loading genome]" << std::endl; + std::cerr << "[loading genome]\n"; std::ifstream in(genome_file); if (!in) throw std::runtime_error("bad genome file: " + genome_file); @@ -533,7 +544,7 @@ simreads(int argc, char *argv[]) { if (!locations_file.empty()) { if (VERBOSE) std::cerr << "[opening frag locations file: " << locations_file << "]" - << std::endl; + << '\n'; loc_out.open(locations_file); if (!loc_out) throw std::runtime_error("bad locations output file: " + @@ -544,11 +555,9 @@ simreads(int argc, char *argv[]) { output_prefix + (FragInfo::fasta_format ? "_1.fa" : "_1.fq"); if (VERBOSE) { if (FragInfo::fasta_format) - std::cerr << "[opening read1 fastq: " << read1_outfile << "]" - << std::endl; + std::cerr << "[opening read1 fastq: " << read1_outfile << "]\n"; else - std::cerr << "[opening read1 fasta: " << read1_outfile << "]" - << std::endl; + std::cerr << "[opening read1 fasta: " << read1_outfile << "]\n"; } std::ofstream read1_out(read1_outfile); if (!read1_out) @@ -560,11 +569,9 @@ simreads(int argc, char *argv[]) { output_prefix + (FragInfo::fasta_format ? "_2.fa" : "_2.fq"); if (VERBOSE) { if (FragInfo::fasta_format) - std::cerr << "[opening read2 fastq: " << read2_outfile << "]" - << std::endl; + std::cerr << "[opening read2 fastq: " << read2_outfile << "]\n"; else - std::cerr << "[opening read2 fasta: " << read2_outfile << "]" - << std::endl; + std::cerr << "[opening read2 fasta: " << read2_outfile << "]\n"; } read2_out.open(read2_outfile); if (!read2_out) @@ -574,15 +581,13 @@ simreads(int argc, char *argv[]) { FragSampler frag_samp(genome, cl, strand_arg, min_frag_len, max_frag_len, require_valid); if (VERBOSE) - std::cerr << "[constructed fragment sampler]" << std::endl; + std::cerr << "[constructed fragment sampler]\n"; FragMutator frag_mut(mutation_rate, substitution_rate, insertion_rate, deletion_rate); if (VERBOSE) - std::cerr << "[constructed mutator]" << std::endl; - - if (VERBOSE) - std::cerr << "[simulating frags]" << std::endl; + std::cerr << "[constructed mutator]\n" + << "[simulating frags]\n"; for (std::size_t i = 0; i < n_reads; ++i) { FragInfo info; @@ -599,7 +604,7 @@ simreads(int argc, char *argv[]) { } } catch (const std::exception &e) { - std::cerr << e.what() << std::endl; + std::cerr << e.what() << '\n'; return EXIT_FAILURE; } return EXIT_SUCCESS;