From 8fa1fa0f7d25398b7adfddd0de16c3d597bbd595 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 6 Oct 2025 12:19:54 -0700 Subject: [PATCH 1/6] src/abismal.cpp: using the strict-sam mode as default --- src/abismal.cpp | 133 +++++++++++++++++++++--------------------------- 1 file changed, 58 insertions(+), 75 deletions(-) diff --git a/src/abismal.cpp b/src/abismal.cpp index 639109b..da0968d 100644 --- a/src/abismal.cpp +++ b/src/abismal.cpp @@ -431,8 +431,7 @@ chrom_and_posn(const ChromLookup &cl, const bam_cigar_t &cig, enum map_type { map_unmapped, map_unique, map_ambig }; static map_type -format_se(const bool allow_ambig, const bool strict_sam, const se_element &res, - const ChromLookup &cl, +format_se(const bool allow_ambig, const se_element &res, const ChromLookup &cl, // ADS: 'read' should not be used after a call to 'format_se' std::string &read, const std::string &read_name, const bam_cigar_t &cigar, bam_rec &sr) { @@ -449,8 +448,7 @@ format_se(const bool allow_ambig, const bool strict_sam, const se_element &res, std::uint16_t flag = 0; if (res.rc()) { flag |= BAM_FREVERSE; - if (strict_sam) - revcomp_inplace(read); + revcomp_inplace(read); } if (allow_ambig && ambig) @@ -591,8 +589,7 @@ valid_pair(const pe_element &best, const std::uint32_t readlen1, */ static map_type format_pe( - const bool allow_ambig, const bool strict_sam, const pe_element &p, - const ChromLookup &cl, + const bool allow_ambig, const pe_element &p, const ChromLookup &cl, // ADS: 'read1' and 'read2' should not be used after a call to format_pe std::string &read1, std::string &read2, const std::string &name1, const std::string &name2, const bam_cigar_t &cig1, const bam_cigar_t &cig2, @@ -627,14 +624,12 @@ format_pe( if (p.r1.rc()) { // ADS: is p.r1.rc() always !p.r2.rc()? flag1 |= BAM_FREVERSE; flag2 |= BAM_FMREVERSE; - if (strict_sam) - revcomp_inplace(read1); + revcomp_inplace(read1); } if (p.r2.rc()) { flag2 |= BAM_FREVERSE; flag1 |= BAM_FMREVERSE; - if (strict_sam) - revcomp_inplace(read2); + revcomp_inplace(read2); } if (allow_ambig && ambig) { // ADS: mark ambig for both the same way? @@ -996,24 +991,23 @@ struct pe_map_stats { static void select_output( - const bool allow_ambig, const bool strict_sam, const ChromLookup &cl, + const bool allow_ambig, const ChromLookup &cl, // ADS: 'read1' and 'read2' should not be used after a call to 'select_output' std::string &read1, const std::string &name1, std::string &read2, const std::string &name2, const bam_cigar_t &cig1, const bam_cigar_t &cig2, pe_element &best, se_element &se1, se_element &se2, bam_rec &sr1, bam_rec &sr2) { - const map_type pe_map_type = - format_pe(allow_ambig, strict_sam, best, cl, read1, read2, name1, name2, - cig1, cig2, sr1, sr2); + const map_type pe_map_type = format_pe(allow_ambig, best, cl, read1, read2, + name1, name2, cig1, cig2, sr1, sr2); if (!best.should_report(allow_ambig) || pe_map_type == map_unmapped) { if (pe_map_type == map_unmapped) best.reset(); - if (format_se(allow_ambig, strict_sam, se1, cl, read1, name1, cig1, sr1) == + if (format_se(allow_ambig, se1, cl, read1, name1, cig1, sr1) == map_unmapped) se1.reset(); - if (format_se(allow_ambig, strict_sam, se2, cl, read2, name2, cig2, sr2) == + if (format_se(allow_ambig, se2, cl, read2, name2, cig2, sr2) == map_unmapped) se2.reset(); } @@ -1443,8 +1437,8 @@ reset_bam_rec(bam_rec &b) { template static void map_single_ended(const bool show_progress, const bool allow_ambig, - const bool strict_sam, const AbismalIndex &abismal_index, - ReadLoader &rl, se_map_stats &se_stats, bamxx::bam_header &hdr, + const AbismalIndex &abismal_index, ReadLoader &rl, + se_map_stats &se_stats, bamxx::bam_header &hdr, bamxx::bam_out &out, ProgressBar &progress) { const auto counter_st(std::cbegin(abismal_index.counter)); const auto counter_t_st(std::cbegin(abismal_index.counter_t)); @@ -1519,8 +1513,8 @@ map_single_ended(const bool show_progress, const bool allow_ambig, align_se_candidates(pread, pread_rc, pread, pread_rc, se_element::valid_frac, res, bests[i], cigar[i], aln); - if (format_se(allow_ambig, strict_sam, bests[i], abismal_index.cl, - reads[i], names[i], cigar[i], mr[i]) == map_unmapped) + if (format_se(allow_ambig, bests[i], abismal_index.cl, reads[i], + names[i], cigar[i], mr[i]) == map_unmapped) bests[i].reset(); } } @@ -1547,10 +1541,9 @@ map_single_ended(const bool show_progress, const bool allow_ambig, static void map_single_ended_rand(const bool show_progress, const bool allow_ambig, - const bool strict_sam, const AbismalIndex &abismal_index, - ReadLoader &rl, se_map_stats &se_stats, - bamxx::bam_header &hdr, bamxx::bam_out &out, - ProgressBar &progress) { + const AbismalIndex &abismal_index, ReadLoader &rl, + se_map_stats &se_stats, bamxx::bam_header &hdr, + bamxx::bam_out &out, ProgressBar &progress) { const auto counter_st(std::cbegin(abismal_index.counter)); const auto counter_t_st(std::cbegin(abismal_index.counter_t)); const auto counter_a_st(std::cbegin(abismal_index.counter_a)); @@ -1633,8 +1626,8 @@ map_single_ended_rand(const bool show_progress, const bool allow_ambig, align_se_candidates(pread_t, pread_t_rc, pread_a, pread_a_rc, se_element::valid_frac, res, bests[i], cigar[i], aln); - if (format_se(allow_ambig, strict_sam, bests[i], abismal_index.cl, - reads[i], names[i], cigar[i], mr[i]) == map_unmapped) + if (format_se(allow_ambig, bests[i], abismal_index.cl, reads[i], + names[i], cigar[i], mr[i]) == map_unmapped) bests[i].reset(); } } @@ -1670,7 +1663,7 @@ format_duration(const abismal_timepoint start_time, template static void run_single_ended(const bool show_progress, const bool allow_ambig, - const bool strict_sam, const std::string &reads_file, + const std::string &reads_file, const AbismalIndex &abismal_index, se_map_stats &se_stats, bamxx::bam_header &hdr, bamxx::bam_out &out) { ReadLoader rl(reads_file); @@ -1682,11 +1675,11 @@ run_single_ended(const bool show_progress, const bool allow_ambig, for (auto i = 0u; i < abismal_concurrency::n_threads; ++i) threads.emplace_back([&] { if (random_pbat) - map_single_ended_rand(show_progress, allow_ambig, strict_sam, - abismal_index, rl, se_stats, hdr, out, progress); + map_single_ended_rand(show_progress, allow_ambig, abismal_index, rl, + se_stats, hdr, out, progress); else - map_single_ended(show_progress, allow_ambig, strict_sam, - abismal_index, rl, se_stats, hdr, out, progress); + map_single_ended(show_progress, allow_ambig, abismal_index, rl, + se_stats, hdr, out, progress); }); for (auto &thread : threads) thread.join(); @@ -1883,8 +1876,8 @@ map_fragments(const std::uint32_t max_candidates, const std::string &read1, template static void map_paired_ended(const bool show_progress, const bool allow_ambig, - const bool strict_sam, const AbismalIndex &abismal_index, - ReadLoader &rl1, ReadLoader &rl2, pe_map_stats &pe_stats, + const AbismalIndex &abismal_index, ReadLoader &rl1, + ReadLoader &rl2, pe_map_stats &pe_stats, bamxx::bam_header &hdr, bamxx::bam_out &out, ProgressBar &progress) { const auto counter_st(std::begin(abismal_index.counter)); @@ -2016,9 +2009,9 @@ map_paired_ended(const bool show_progress, const bool allow_ambig, se_element::valid_frac / 2.0, res_se2, bests_se2[i], cigar2[i], aln); } - select_output(allow_ambig, strict_sam, abismal_index.cl, reads1[i], - names1[i], reads2[i], names2[i], cigar1[i], cigar2[i], - bests[i], bests_se1[i], bests_se2[i], mr1[i], mr2[i]); + select_output(allow_ambig, abismal_index.cl, reads1[i], names1[i], + reads2[i], names2[i], cigar1[i], cigar2[i], bests[i], + bests_se1[i], bests_se2[i], mr1[i], mr2[i]); } { @@ -2050,8 +2043,8 @@ map_paired_ended(const bool show_progress, const bool allow_ambig, static void map_paired_ended_rand(const bool show_progress, const bool allow_ambig, - const bool strict_sam, const AbismalIndex &abismal_index, - ReadLoader &rl1, ReadLoader &rl2, pe_map_stats &pe_stats, + const AbismalIndex &abismal_index, ReadLoader &rl1, + ReadLoader &rl2, pe_map_stats &pe_stats, bamxx::bam_header &hdr, bamxx::bam_out &out, ProgressBar &progress) { const auto counter_st(std::begin(abismal_index.counter)); @@ -2193,9 +2186,9 @@ map_paired_ended_rand(const bool show_progress, const bool allow_ambig, se_element::valid_frac / 2.0, res_se2, bests_se2[i], cigar2[i], aln); } - select_output(allow_ambig, strict_sam, abismal_index.cl, reads1[i], - names1[i], reads2[i], names2[i], cigar1[i], cigar2[i], - bests[i], bests_se1[i], bests_se2[i], mr1[i], mr2[i]); + select_output(allow_ambig, abismal_index.cl, reads1[i], names1[i], + reads2[i], names2[i], cigar1[i], cigar2[i], bests[i], + bests_se1[i], bests_se2[i], mr1[i], mr2[i]); } { @@ -2228,8 +2221,7 @@ map_paired_ended_rand(const bool show_progress, const bool allow_ambig, template static void run_paired_ended(const bool show_progress, const bool allow_ambig, - const bool strict_sam, const std::string &reads_file1, - const std::string &reads_file2, + const std::string &reads_file1, const std::string &reads_file2, const AbismalIndex &abismal_index, pe_map_stats &pe_stats, bamxx::bam_header &hdr, bamxx::bam_out &out) { ReadLoader rl1(reads_file1); @@ -2242,13 +2234,11 @@ run_paired_ended(const bool show_progress, const bool allow_ambig, for (auto i = 0u; i < abismal_concurrency::n_threads; ++i) threads.emplace_back([&] { if (random_pbat) - map_paired_ended_rand(show_progress, allow_ambig, strict_sam, - abismal_index, rl1, rl2, pe_stats, hdr, out, - progress); + map_paired_ended_rand(show_progress, allow_ambig, abismal_index, rl1, + rl2, pe_stats, hdr, out, progress); else - map_paired_ended(show_progress, allow_ambig, strict_sam, - abismal_index, rl1, rl2, pe_stats, hdr, out, - progress); + map_paired_ended(show_progress, allow_ambig, abismal_index, rl1, + rl2, pe_stats, hdr, out, progress); }); for (auto &thread : threads) thread.join(); @@ -2271,8 +2261,8 @@ file_exists(const std::string &filename) { static int abismal_make_sam_header(const ChromLookup &cl, const int argc, char *argv[], bamxx::bam_header &hdr) { - assert(cl.names.size() > 2); // two entries exist for the padding - assert(cl.starts.size() == cl.names.size() + 1); + assert(std::size(cl.names) > 2); // two entries exist for the padding + assert(std::size(cl.starts) == std::size(cl.names) + 1); const std::vector names(std::begin(cl.names) + 1, std::end(cl.names) - 1); std::vector sizes(names.size()); @@ -2298,8 +2288,8 @@ abismal_make_sam_header(const ChromLookup &cl, const int argc, char *argv[], // how the program was run std::ostringstream the_command; - copy(argv, argv + argc, - std::ostream_iterator(the_command, " ")); + std::copy(argv, argv + argc, + std::ostream_iterator(the_command, " ")); out << "CL:\"" << the_command.str() << "\"" << '\n'; hdr.h = sam_hdr_init(); @@ -2321,7 +2311,6 @@ abismal(int argc, char *argv[]) { bool pbat_mode = false; bool random_pbat = false; bool write_bam_fmt = false; - bool strict_sam = false; std::uint32_t max_candidates = 0; std::string index_file = ""; @@ -2358,9 +2347,6 @@ abismal(int argc, char *argv[]) { false, GA_conversion); opt_parse.add_opt("threads", 't', "number of threads", false, abismal_concurrency::n_threads); - opt_parse.add_opt("sam-orientation", 'S', - "reverse complement negative strand mappers", false, - strict_sam); opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose); std::vector leftover_args; opt_parse.parse(argc, argv, leftover_args); @@ -2491,31 +2477,28 @@ abismal(int argc, char *argv[]) { if (reads_file2.empty()) { if (GA_conversion || pbat_mode) - run_single_ended(show_progress, allow_ambig, strict_sam, - reads_file, abismal_index, se_stats, - hdr, out); + run_single_ended(show_progress, allow_ambig, reads_file, + abismal_index, se_stats, hdr, out); else if (random_pbat) - run_single_ended(show_progress, allow_ambig, strict_sam, - reads_file, abismal_index, se_stats, hdr, - out); + run_single_ended(show_progress, allow_ambig, reads_file, + abismal_index, se_stats, hdr, out); else - run_single_ended(show_progress, allow_ambig, strict_sam, - reads_file, abismal_index, se_stats, - hdr, out); + run_single_ended(show_progress, allow_ambig, reads_file, + abismal_index, se_stats, hdr, out); } else { if (pbat_mode) - run_paired_ended(show_progress, allow_ambig, strict_sam, - reads_file, reads_file2, abismal_index, - pe_stats, hdr, out); + run_paired_ended(show_progress, allow_ambig, reads_file, + reads_file2, abismal_index, pe_stats, + hdr, out); else if (random_pbat) - run_paired_ended(show_progress, allow_ambig, strict_sam, - reads_file, reads_file2, abismal_index, - pe_stats, hdr, out); + run_paired_ended(show_progress, allow_ambig, reads_file, + reads_file2, abismal_index, pe_stats, + hdr, out); else - run_paired_ended(show_progress, allow_ambig, strict_sam, - reads_file, reads_file2, abismal_index, - pe_stats, hdr, out); + run_paired_ended(show_progress, allow_ambig, reads_file, + reads_file2, abismal_index, pe_stats, + hdr, out); } if (!stats_outfile.empty()) { From 6d8478f7d5212b6557058a67374004708b8a1eeb Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 6 Oct 2025 12:36:44 -0700 Subject: [PATCH 2/6] src/abismal.cpp: attempting to add a label for the stats output even if it's just single-end, which will allow for easier parsing in the general case --- src/abismal.cpp | 72 +++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/src/abismal.cpp b/src/abismal.cpp index da0968d..79bec45 100644 --- a/src/abismal.cpp +++ b/src/abismal.cpp @@ -17,8 +17,20 @@ #include "abismal.hpp" +// ADS: specific to general headers, so those outside project control don't +// bring in names. +#include "AbismalAlign.hpp" +#include "AbismalIndex.hpp" +#include "OptionParser.hpp" +#include "bamxx.hpp" +#include "bisulfite_utils.hpp" +#include "dna_four_bit_bisulfite.hpp" +#include "popcnt.hpp" +#include "sam_record.hpp" +#include "smithlab_os.hpp" +#include "smithlab_utils.hpp" + #include -#include #include #include @@ -32,16 +44,7 @@ #include #include -#include "AbismalAlign.hpp" -#include "AbismalIndex.hpp" -#include "OptionParser.hpp" -#include "bamxx.hpp" -#include "bisulfite_utils.hpp" -#include "dna_four_bit_bisulfite.hpp" -#include "popcnt.hpp" -#include "sam_record.hpp" -#include "smithlab_os.hpp" -#include "smithlab_utils.hpp" +#include using abismal_clock = std::chrono::steady_clock; using abismal_timepoint = std::chrono::time_point; @@ -922,13 +925,15 @@ struct se_map_stats { } [[nodiscard]] std::string - tostring(const std::size_t n_tabs = 0) const { + tostring(const std::string &label, const std::size_t n_tabs = 1) const { static constexpr auto tab = " "; constexpr auto pct = [](const double x) { return x * 100.0; }; std::string t; for (std::size_t i = 0; i < n_tabs; ++i) t += tab; std::ostringstream oss; + oss << t << label << '\n'; + t += tab; // clang-format off oss << t << "total_reads: " << total_reads << '\n' << t << "mapped:\n" @@ -980,10 +985,10 @@ struct pe_map_stats { [[nodiscard]] std::string tostring(const bool allow_ambig) const { std::ostringstream oss; - oss << "pairs:\n" << both_stats.tostring(1); + oss << both_stats.tostring("pairs"); if (!allow_ambig) { - oss << "mate1:\n" << end1_stats.tostring(1); - oss << "mate2:\n" << end2_stats.tostring(1); + oss << end1_stats.tostring("mate1"); + oss << end2_stats.tostring("mate2"); } return oss.str(); } @@ -1108,8 +1113,8 @@ find_candidates(const std::uint32_t max_candidates, low = the_bit ? first_1 : low; } - // some bit narrows it down to 0 candidates, roll back to when we - // had some candidates to work with. + // some bit narrows it down to 0 candidates, roll back to when we had some + // candidates to work with. if (low == high) { --p; low = prev_low; @@ -1173,8 +1178,8 @@ find_candidates_three(const std::uint32_t max_candidates, } } - // some bit narrows it down to 0 candidates, roll back to when we - // had some candidates to work with. + // some bit narrows it down to 0 candidates, roll back to when we had some + // candidates to work with. if (low == high) { --p; low = prev_low; @@ -1270,9 +1275,9 @@ process_seeds(const std::uint32_t max_candidates, const std::uint32_t lim_two = readlen - seed::key_weight + 1; - // GS: this is to avoid chasing down uninformative two-letter - // seeds when there is a sufficiently high number of three - // letter seeds that is lower than the number of two-letter hits + // GS: this is to avoid chasing down uninformative two-letter seeds when + // there is a sufficiently high number of three letter seeds that is lower + // than the number of two-letter hits static const std::uint32_t MIN_FOLD_SIZE = 10; for (i = 0; i < lim_two && !res.sure_ambig; ++i, ++read_idx) { s_idx = index_st + *(counter_st + k); @@ -1309,12 +1314,11 @@ prep_read(const std::string &r, Read &pread) { : (encode_base_t_rich[static_cast(r[i])])); } -/* GS: this function simply converts the vector pread - * to a vector by putting 16 bases in each element of - * the packed read. If the read length does not divide 16, we add - * 1111s to the remaining positions so it divides 16. The remaining - * bases match all bases in the reference genome - * */ +/* GS: this function simply converts the vector pread to a + * vector by putting 16 bases in each element of the packed read. If + * the read length does not divide 16, we add 1111s to the remaining positions + * so it divides 16. The remaining bases match all bases in the reference + * genome */ static void pack_read(const Read &pread, PackedRead &packed_pread) { static const element_t base_match_any = static_cast(0xF); @@ -1339,8 +1343,8 @@ pack_read(const Read &pread, PackedRead &packed_pread) { if (pread_ind == sz) return; - // now put only the remaining bases in the last pos. The rest - // should match any base in the reference + // now put only the remaining bases in the last pos. The rest should match + // any base in the reference *it = 0; std::size_t j = 0; while (pread_ind < sz) @@ -1568,8 +1572,7 @@ map_single_ended_rand(const bool show_progress, const bool allow_ambig, bests.resize(ReadLoader::batch_size); mr.resize(ReadLoader::batch_size); - // GS: pre-allocated variables used once per read - // and not used for reporting + // GS: pre-allocated variables used once per read and not used for reporting Read pread_t, pread_t_rc, pread_a, pread_a_rc; PackedRead packed_pread; se_candidates res; @@ -2504,11 +2507,10 @@ abismal(int argc, char *argv[]) { if (!stats_outfile.empty()) { std::ofstream stats_of(stats_outfile); if (stats_of) - stats_of << (reads_file2.empty() ? se_stats.tostring() + stats_of << (reads_file2.empty() ? se_stats.tostring("mate1") : pe_stats.tostring(allow_ambig)); else - std::cerr << "failed to open stats output file: " << stats_outfile - << '\n'; + std::cerr << "failed to open stats out file: " << stats_outfile << '\n'; } } catch (const std::exception &e) { From c9a783fe1d3a74b7f4f10604a38e539ddb94873c Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 6 Oct 2025 12:58:18 -0700 Subject: [PATCH 3/6] src/abismal.cpp: fixing some formatting in stats output --- src/abismal.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/abismal.cpp b/src/abismal.cpp index 79bec45..59c3918 100644 --- a/src/abismal.cpp +++ b/src/abismal.cpp @@ -17,8 +17,6 @@ #include "abismal.hpp" -// ADS: specific to general headers, so those outside project control don't -// bring in names. #include "AbismalAlign.hpp" #include "AbismalIndex.hpp" #include "OptionParser.hpp" @@ -925,14 +923,14 @@ struct se_map_stats { } [[nodiscard]] std::string - tostring(const std::string &label, const std::size_t n_tabs = 1) const { + tostring(const std::string &label, const std::size_t n_tabs = 0) const { static constexpr auto tab = " "; constexpr auto pct = [](const double x) { return x * 100.0; }; std::string t; for (std::size_t i = 0; i < n_tabs; ++i) t += tab; std::ostringstream oss; - oss << t << label << '\n'; + oss << t << label << ":\n"; t += tab; // clang-format off oss << t << "total_reads: " << total_reads << '\n' From 6c7de34fe754e15370df6911d5b84fbdee13a82b Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 6 Oct 2025 12:59:05 -0700 Subject: [PATCH 4/6] data/md5sum.txt: updating hashes for SAM strands and for mstats output formats --- data/md5sum.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data/md5sum.txt b/data/md5sum.txt index e7f6123..2fe6e8d 100644 --- a/data/md5sum.txt +++ b/data/md5sum.txt @@ -6,11 +6,11 @@ aff19aeae4184c38cf364b3a94527098 tests/reads_pbat_pe_2.fq e94e71292fccd255bad3f3694efe7a4b tests/reads_rpbat_pe_1.fq 8bfa00acd0639dc66acd5aa8ac0369d5 tests/reads_rpbat_pe_2.fq bcbf01be810cbf4051292813eb6b9225 tests/tRex1.idx -ffff35a5110b3312555fc3ccaa78c99a tests/reads_pbat_pe.sam -af094fdc32b41a1605bb6d5b71e1b767 tests/reads_pe.sam -8370999d6ad6d5afc99ec483f16cc214 tests/reads_rpbat_pe.sam -a6d0c51aa69795b9bf3f9e424af35978 tests/reads.sam -6793c72a4957578afd2092f45f6dab5f tests/reads.mstats +e8a5fe0aec564a972ca7a3a36c0022f7 tests/reads_pbat_pe.sam +14e935a5b8a20c8a7422834c3327d1c7 tests/reads_pe.sam +98d6ad9d563e3318756bd5fc3fcbc263 tests/reads_rpbat_pe.sam +8126d46074213ad3674181f4ea4f8bd1 tests/reads.sam +d4034607fe75c7a207666fd3698a8fdb tests/reads.mstats 8361153e3610769ed1697fd0551c4adf tests/reads_pbat_pe.mstats 00d3c8c0de72cca992b3561c7734b381 tests/reads_pe.mstats dfe8102b04166f5127bddfc9c09893f0 tests/reads_rpbat_pe.mstats From 5936e0cd0816042e9802304199176dffcf94cf0f Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 6 Oct 2025 13:08:20 -0700 Subject: [PATCH 5/6] ../src/abismal.cpp: fixing 'mate1' to 'read1' so I eventually won't have to keep dealing with the terminology... --- src/abismal.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/abismal.cpp b/src/abismal.cpp index 59c3918..51275b7 100644 --- a/src/abismal.cpp +++ b/src/abismal.cpp @@ -445,7 +445,7 @@ format_se(const bool allow_ambig, const se_element &res, const ChromLookup &cl, if (!valid || !chrom_and_posn(cl, cigar, res.pos, ref_s, ref_e, chrom_idx)) return map_unmapped; - // ADS: we might be doing format_se for a mate in paried reads + // ADS: we might be doing format_se for an end in paried reads std::uint16_t flag = 0; if (res.rc()) { flag |= BAM_FREVERSE; @@ -985,8 +985,8 @@ struct pe_map_stats { std::ostringstream oss; oss << both_stats.tostring("pairs"); if (!allow_ambig) { - oss << end1_stats.tostring("mate1"); - oss << end2_stats.tostring("mate2"); + oss << end1_stats.tostring("read1"); + oss << end2_stats.tostring("read2"); } return oss.str(); } @@ -1803,7 +1803,7 @@ best_pair(const pe_candidates &res1, const pe_candidates &res2, s2.pos = best_pos2; s2.diffs = simple_aln::edit_distance(best_scr2, len2, cigar2); - // last check if, after alignment, mates are still concordant + // last check if, after alignment, ends are still concordant const std::uint32_t frag_end = best_pos2 + len2; if (frag_end >= best_pos1 + pe_element::min_dist && frag_end <= best_pos1 + pe_element::max_dist) { @@ -2505,7 +2505,7 @@ abismal(int argc, char *argv[]) { if (!stats_outfile.empty()) { std::ofstream stats_of(stats_outfile); if (stats_of) - stats_of << (reads_file2.empty() ? se_stats.tostring("mate1") + stats_of << (reads_file2.empty() ? se_stats.tostring("read1") : pe_stats.tostring(allow_ambig)); else std::cerr << "failed to open stats out file: " << stats_outfile << '\n'; From 78bf5feaac4820e31aa26d9649041fdc67dd9b2c Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 6 Oct 2025 13:09:46 -0700 Subject: [PATCH 6/6] data/md5sum.txt: updating hashes for 'mate' to 'read' --- data/md5sum.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data/md5sum.txt b/data/md5sum.txt index 2fe6e8d..d4ca674 100644 --- a/data/md5sum.txt +++ b/data/md5sum.txt @@ -10,7 +10,7 @@ e8a5fe0aec564a972ca7a3a36c0022f7 tests/reads_pbat_pe.sam 14e935a5b8a20c8a7422834c3327d1c7 tests/reads_pe.sam 98d6ad9d563e3318756bd5fc3fcbc263 tests/reads_rpbat_pe.sam 8126d46074213ad3674181f4ea4f8bd1 tests/reads.sam -d4034607fe75c7a207666fd3698a8fdb tests/reads.mstats -8361153e3610769ed1697fd0551c4adf tests/reads_pbat_pe.mstats -00d3c8c0de72cca992b3561c7734b381 tests/reads_pe.mstats -dfe8102b04166f5127bddfc9c09893f0 tests/reads_rpbat_pe.mstats +981dd110d1675c77533a485204fb13cc tests/reads.mstats +ba2a0cdbc3e431adf9f85e36d00d783a tests/reads_pbat_pe.mstats +c8211fb637c03050b348e51f2ff91f3a tests/reads_pe.mstats +bd54ba1e720039cb4b662faa4f0e04b3 tests/reads_rpbat_pe.mstats