From 5324c1995f958b98daf71fdf8fc9dda5f3fe9ca8 Mon Sep 17 00:00:00 2001 From: tjstruck Date: Mon, 28 Jul 2025 16:56:32 -0700 Subject: [PATCH 1/7] Update GenerateFs --calc-converage to use and store nseq with the coverage distribution dictionary --- dadi_cli/GenerateFs.py | 15 +++++++++------ dadi_cli/parsers/common_arguments.py | 9 +++------ dadi_cli/parsers/generate_fs_parsers.py | 5 +++-- tests/test_GenerateFs.py | 25 +++++++++++++++++++++++-- 4 files changed, 38 insertions(+), 16 deletions(-) diff --git a/dadi_cli/GenerateFs.py b/dadi_cli/GenerateFs.py index 36a5e4a..adb0e64 100644 --- a/dadi_cli/GenerateFs.py +++ b/dadi_cli/GenerateFs.py @@ -13,7 +13,7 @@ def generate_fs( bootstrap: int, chunk_size: int, masking: str, - calc_coverage: bool, + calc_coverage: int, seed: int, ) -> None: """ @@ -47,8 +47,8 @@ def generate_fs( 'singleton' - Masks singletons in each population, 'shared' - Masks singletons in each population and those shared across populations, '' - No masking is applied. - calc_coverage : bool - If True, a data dictionary with coverage information is generated as .coverage.pickle. + calc_coverage : int + Pass in the number of sequenced haploids (nseq) and create a data dictionary with coverage information is generated as .coverage.pickle than nseq. seed : int Seed for generating random numbers. If None, a random seed is used. @@ -70,9 +70,12 @@ def generate_fs( ) except ModuleNotFoundError: print("Unable to load cyvcf2 and check if ancestral alleles are in provided VCF.\n"+ - "Generated FS may be empty if ancestral allele not found.") + "Generated SFS may be empty if ancestral allele not found.") except ImportError: - print("Error importing cyvcf2") + print("Unable to load cyvcf2 and check if ancestral alleles are in provided VCF.\n"+ + "Generated SFS may be empty if ancestral allele not found.") + # if calc_coverage != False: + # calc_coverage, nseq = True, calc_coverage if subsample != []: subsample_dict = {} @@ -97,7 +100,7 @@ def generate_fs( import dadi.LowPass.LowPass as lp cov_dist = lp.compute_cov_dist(dd, pop_ids) print(f"\nSaving coverage distribution data in pickle named:\n{output}.coverage.pickle\n") - pickle.dump(cov_dist, open(f"{output}.coverage.pickle","wb")) + pickle.dump([cov_dist, calc_coverage], open(f"{output}.coverage.pickle","wb")) if bootstrap is None: fs = dadi.Spectrum.from_data_dict( diff --git a/dadi_cli/parsers/common_arguments.py b/dadi_cli/parsers/common_arguments.py index a667599..b93e745 100644 --- a/dadi_cli/parsers/common_arguments.py +++ b/dadi_cli/parsers/common_arguments.py @@ -514,14 +514,11 @@ def add_coverage_model_argument(parser: argparse.ArgumentParser) -> None: """ parser.add_argument( "--coverage-model", - nargs="+", - default=[], - action="store", + default=False, + type=str, required=False, dest="cov_args", - help="Enable coverage model.\nArguments are:\ - \n1. The name of the <>.coverage.pickle file produced by GenerateFs --calc-coverage.\ - \n2. The total number of samples sequenced for each population in the VCF.", + help="Enable coverage model. Pass in the name of the <>.coverage.pickle file produced by GenerateFs --calc-coverage.", ) # Need to make the convergence dictionary because dadi-cli does not save a data dictionary parser.add_argument( "--coverage-inbreeding", diff --git a/dadi_cli/parsers/generate_fs_parsers.py b/dadi_cli/parsers/generate_fs_parsers.py index 4442b60..92f58c5 100644 --- a/dadi_cli/parsers/generate_fs_parsers.py +++ b/dadi_cli/parsers/generate_fs_parsers.py @@ -173,10 +173,11 @@ def add_generate_fs_parsers(subparsers: argparse.ArgumentParser) -> None: parser.add_argument( "--calc-coverage", + nargs="+", default=False, - action="store_true", + type=positive_int, dest="calc_coverage", - help="Store coverage information of sites in .coverage.pickle object. Default: None.", + help="Pass in the total number of sequenced haploids and store coverage information of sites in .coverage.pickle object. Default: None.", ) add_output_argument(parser) diff --git a/tests/test_GenerateFs.py b/tests/test_GenerateFs.py index 02ab209..f74774e 100644 --- a/tests/test_GenerateFs.py +++ b/tests/test_GenerateFs.py @@ -411,7 +411,7 @@ def test_generate_fs_marginalize(data): assert "All populations to marginalize must be in the list of population ids." in str(excinfo.value) -def test_generate_fs_lowpass(data): +def test_generate_fs_lowpass_dd(data): generate_fs( vcf="tests/example_data/LowPass-files/cov_gatk_Replicate_8_filtered_2D-GT-update.vcf", output="tests/test_results/cov-test.fs", @@ -424,7 +424,28 @@ def test_generate_fs_lowpass(data): bootstrap=None, chunk_size=None, masking="", - calc_coverage=True, + calc_coverage=[20, 20], + seed=None, + ) + import pickle + cov_dist, nseq = pickle.load(open("tests/test_results/cov-test.fs.coverage.pickle", "rb")) + + assert nseq == [20, 20] + +def test_generate_fs_lowpass_fs(data): + generate_fs( + vcf="tests/example_data/LowPass-files/cov_gatk_Replicate_8_filtered_2D-GT-update.vcf", + output="tests/test_results/cov-test.fs", + pop_ids=["pop1", "pop2"], + pop_info="tests/example_data/LowPass-files/cov_popfile_2D.txt", + projections=[20, 20], + marginalize_pops=None, + subsample=[], + polarized=False, + bootstrap=None, + chunk_size=None, + masking="", + calc_coverage=[20, 20], seed=None, ) dadi_cli_fs = dadi.Spectrum.from_file("tests/test_results/cov-test.fs") From 043d605b938e68cb9e602bf5ec42b6c9dec43420 Mon Sep 17 00:00:00 2001 From: tjstruck Date: Mon, 28 Jul 2025 17:25:32 -0700 Subject: [PATCH 2/7] Update inferences subcommands with new method of implimenting LowPass, just taking in a pickle with the coverage dictionary and nseq --- dadi_cli/InferDFE.py | 6 +++--- dadi_cli/InferDM.py | 13 +++++++------ dadi_cli/parsers/infer_dfe_parsers.py | 8 ++++---- dadi_cli/parsers/infer_dm_parsers.py | 6 +++--- .../LowPass-files/cov.fs.coverage.pickle | Bin 535 -> 547 bytes tests/test_InferDFE.py | 4 ++-- tests/test_InferDM.py | 8 ++++---- 7 files changed, 23 insertions(+), 22 deletions(-) diff --git a/dadi_cli/InferDFE.py b/dadi_cli/InferDFE.py index 4f512b3..b12bd58 100644 --- a/dadi_cli/InferDFE.py +++ b/dadi_cli/InferDFE.py @@ -112,18 +112,18 @@ def infer_dfe( if misid: func = dadi.Numerics.make_anc_state_misid_func(func) - if cov_args != []: + if cov_args != None: try: from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - nseq = [int(ele) for ele in cov_args[1:]] + # nseq = [int(ele) for ele in cov_args[1:]] if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func = func_cov(func, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) + func = func_cov(func, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) p0_len = len(p0) # lower_bounds = convert_to_None(lower_bounds, p0_len) diff --git a/dadi_cli/InferDM.py b/dadi_cli/InferDM.py index 00104ef..60e9ce2 100644 --- a/dadi_cli/InferDM.py +++ b/dadi_cli/InferDM.py @@ -85,18 +85,19 @@ def infer_demography( func_ex = dadi.Numerics.make_extrap_func(func) - if cov_args != []: + if cov_args != None: try: from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - nseq = [int(ele) for ele in cov_args[1:]] + # nseq = [int(ele) for ele in cov_args[1]] + # cov_args[1] should be a list with ints, so no need to convert to int if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) + func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) p0_len = len(p0) if fixed_params == -1: @@ -208,19 +209,19 @@ def infer_global_opt( func_ex = dadi.Numerics.make_extrap_func(func) - if cov_args != []: + if cov_args != None: try: from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov import pickle except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - nseq = [int(ele) for ele in cov_args[1:]] + # nseq = [int(ele) for ele in cov_args[1:]] if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) + func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) p0_len = len(p0) if fixed_params == -1: diff --git a/dadi_cli/parsers/infer_dfe_parsers.py b/dadi_cli/parsers/infer_dfe_parsers.py index 5fc5322..0c9a54c 100644 --- a/dadi_cli/parsers/infer_dfe_parsers.py +++ b/dadi_cli/parsers/infer_dfe_parsers.py @@ -71,7 +71,7 @@ def _run_infer_dfe(args: argparse.Namespace) -> None: Path or URL to the file with best fit parameters. - nomisid : bool Flag to indicate that misidentification should not be considered. - - cov_args : list + - cov_args : str Dictionary that contains the data dictionary with coverage information and total number of sample sequenced in each population for coverage correction. - cov_inbreeding : list @@ -104,8 +104,8 @@ def _run_infer_dfe(args: argparse.Namespace) -> None: # Due to development history, much of the code expects a args.misid variable, so create it. args.misid = not (fs.folded or args.nomisid) - if args.cov_args != []: - args.cov_args[0] = pickle.load(open(args.cov_args[0], 'rb')) + if args.cov_args != None: + args.cov_args = pickle.load(open(args.cov_args, 'rb')) make_dir(args.output_prefix) @@ -166,7 +166,7 @@ def _run_infer_dfe(args: argparse.Namespace) -> None: else: cache2d = args.cache2d - if not np.all(cache_ns == fs.sample_sizes) and args.cov_args == []: + if not np.all(cache_ns == fs.sample_sizes) and args.cov_args == None: raise ValueError('Cache and frequencey spectrum do not have the same sample sizes') if args.maxeval == False: diff --git a/dadi_cli/parsers/infer_dm_parsers.py b/dadi_cli/parsers/infer_dm_parsers.py index 4982e4b..a24b32b 100644 --- a/dadi_cli/parsers/infer_dm_parsers.py +++ b/dadi_cli/parsers/infer_dm_parsers.py @@ -35,7 +35,7 @@ def _run_infer_dm(args: argparse.Namespace) -> None: Grid sizes to use in demographic calculations. - misid : bool Flag to indicate if misidentification handling should be enabled. - - cov_args : list + - cov_args : str Dictionary that contains the data dictionary with coverage information and total number of sample sequenced in each population for coverage correction. - cov_inbreeding : list @@ -94,9 +94,9 @@ def _run_infer_dm(args: argparse.Namespace) -> None: # Converts str to float and None string to None value args.lbounds, args.ubounds, args.constants = convert_bounds_and_constants(args.lbounds, args.ubounds, args.constants) - if args.cov_args != []: + if args.cov_args != None: import pickle - args.cov_args[0] = pickle.load(open(args.cov_args[0], 'rb')) + args.cov_args = pickle.load(open(args.cov_args, 'rb')) # Because basic standard neutral models do not need to optimized # we can calculate the log-likelihood and theta diff --git a/tests/example_data/LowPass-files/cov.fs.coverage.pickle b/tests/example_data/LowPass-files/cov.fs.coverage.pickle index 19561ec24bc2f495f8d049bbc014c9ebb37114a2..d04dc50bccd9e952136bd1389c3e88a7182fca1a 100644 GIT binary patch delta 67 zcmbQvvY3Uhfn}-$69X8;PSKdi9m~izG4rhiYX*lmL+g|buJ$QGQ#8C8y_vkZ+NNX( TP0nLn!39+4E#fVbnyLo?K3@_Q delta 55 zcmZ3?GM$CHfn_QW69X7b Date: Mon, 28 Jul 2025 18:29:07 -0700 Subject: [PATCH 3/7] Update Plot, unit tests, and misc bug fixes --- dadi_cli/GenerateFs.py | 6 ++++-- dadi_cli/Plot.py | 12 ++++++------ dadi_cli/parsers/common_arguments.py | 2 +- dadi_cli/parsers/generate_fs_parsers.py | 7 ++++--- dadi_cli/parsers/plot_parsers.py | 6 +++--- tests/test_Plot.py | 16 ++++++++-------- tests/test_main.py | 11 ++++++----- 7 files changed, 32 insertions(+), 28 deletions(-) diff --git a/dadi_cli/GenerateFs.py b/dadi_cli/GenerateFs.py index adb0e64..c184a39 100644 --- a/dadi_cli/GenerateFs.py +++ b/dadi_cli/GenerateFs.py @@ -74,8 +74,10 @@ def generate_fs( except ImportError: print("Unable to load cyvcf2 and check if ancestral alleles are in provided VCF.\n"+ "Generated SFS may be empty if ancestral allele not found.") - # if calc_coverage != False: - # calc_coverage, nseq = True, calc_coverage + if calc_coverage == None: + raise ValueError( + "Please provide the number of sequenced haploids with --calc-coverage." + ) if subsample != []: subsample_dict = {} diff --git a/dadi_cli/Plot.py b/dadi_cli/Plot.py index a4f215e..796e71e 100644 --- a/dadi_cli/Plot.py +++ b/dadi_cli/Plot.py @@ -178,18 +178,18 @@ def plot_fitted_demography( ns = fs.sample_sizes pts_l = pts_l_func(ns) - if cov_args != []: + if cov_args != None: try: from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - nseq = [int(ele) for ele in cov_args[1:]] + # nseq = [int(ele) for ele in cov_args[1:]] if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) + func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) model = func_ex(popt, ns, pts_l) @@ -299,18 +299,18 @@ def plot_fitted_dfe( if param_names[-2] == 'misid': func = dadi.Numerics.make_anc_state_misid_func(func) - if cov_args != []: + if cov_args != None: try: from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - nseq = [int(ele) for ele in cov_args[1:]] + # nseq = [int(ele) for ele in cov_args[1:]] if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func = func_cov(func, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) + func = func_cov(func, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) # Get expected SFS for MLE if (cache1d != None) and (cache2d != None): model = func(sele_popt, None, spectra1d, spectra2d, pdf, pdf2, theta, None) diff --git a/dadi_cli/parsers/common_arguments.py b/dadi_cli/parsers/common_arguments.py index b93e745..c60ecac 100644 --- a/dadi_cli/parsers/common_arguments.py +++ b/dadi_cli/parsers/common_arguments.py @@ -514,7 +514,7 @@ def add_coverage_model_argument(parser: argparse.ArgumentParser) -> None: """ parser.add_argument( "--coverage-model", - default=False, + default=None, type=str, required=False, dest="cov_args", diff --git a/dadi_cli/parsers/generate_fs_parsers.py b/dadi_cli/parsers/generate_fs_parsers.py index 92f58c5..19c4e04 100644 --- a/dadi_cli/parsers/generate_fs_parsers.py +++ b/dadi_cli/parsers/generate_fs_parsers.py @@ -175,11 +175,12 @@ def add_generate_fs_parsers(subparsers: argparse.ArgumentParser) -> None: "--calc-coverage", nargs="+", default=False, - type=positive_int, + type=list, dest="calc_coverage", - help="Pass in the total number of sequenced haploids and store coverage information of sites in .coverage.pickle object. Default: None.", + help="Pass in the total number of sequenced haploids (nseq) for each population:\n"+ + "ex. --calc-coverage 22 12, if the total number a sequenced haploids was 22 for population 1 and 12 for population 2.\n" + + "And store coverage information of sites and nseq in .coverage.pickle object. Default: False.", ) - add_output_argument(parser) add_seed_argument(parser) parser.set_defaults(runner=_run_generate_fs) diff --git a/dadi_cli/parsers/plot_parsers.py b/dadi_cli/parsers/plot_parsers.py index cfbdd16..ac4502c 100644 --- a/dadi_cli/parsers/plot_parsers.py +++ b/dadi_cli/parsers/plot_parsers.py @@ -45,7 +45,7 @@ def _run_plot(args: argparse.Namespace) -> None: Range of residuals for plotting. - projections : tuple, optional Projections for frequency spectrum summarization. - - cov_args : list + - cov_args : str Dictionary that contains the data dictionary with coverage information and total number of sample sequenced in each population for coverage correction. - cov_inbreeding : list @@ -87,9 +87,9 @@ def _run_plot(args: argparse.Namespace) -> None: make_dir(args.output) - if args.cov_args != []: + if args.cov_args != None: import pickle - args.cov_args[0] = pickle.load(open(args.cov_args[0], 'rb')) + args.cov_args = pickle.load(open(args.cov_args, 'rb')) if args.fs is None: plot_mut_prop( diff --git a/tests/test_Plot.py b/tests/test_Plot.py index 1adeaf8..85960b9 100644 --- a/tests/test_Plot.py +++ b/tests/test_Plot.py @@ -126,7 +126,7 @@ def test_plot_fitted_demography(files): func=dadi.Demographics1D.two_epoch, popt=pytest.fs1d_demo_popt, projections=[8], - cov_args=[], + cov_args=None, cov_inbreeding=[], output="tests/test_results/plot_plot_fitted_demography_1d_w_proj.png", vmin=0, @@ -138,7 +138,7 @@ def test_plot_fitted_demography(files): func=dadi.Demographics2D.split_mig, popt=pytest.fs2d_demo_popt, projections=[8, 8], - cov_args=[], + cov_args = None, cov_inbreeding=[], output="tests/test_results/plot_plot_fitted_demography_2d_w_proj.png", vmin=1e-3, @@ -150,7 +150,7 @@ def test_plot_fitted_demography(files): func=dadi.Demographics1D.two_epoch, popt=pytest.fs1d_demo_popt, projections=None, - cov_args=[], + cov_args = None, cov_inbreeding=[], output="tests/test_results/plot_plot_fitted_demography_1d_no_proj.png", vmin=0, @@ -162,7 +162,7 @@ def test_plot_fitted_demography(files): func=dadi.Demographics2D.split_mig, popt=pytest.fs2d_demo_popt, projections=None, - cov_args=[], + cov_args = None, cov_inbreeding=[], output="tests/test_results/plot_plot_fitted_demography_2d_no_proj.png", vmin=1e-3, @@ -174,7 +174,7 @@ def test_plot_fitted_demography(files): func=dadi.Demographics3D.out_of_africa, popt=pytest.fs3d_popt, projections=None, - cov_args=[], + cov_args = None, cov_inbreeding=[], output="tests/test_results/plot_plot_fitted_demography_3d_no_proj.png", vmin=1e-3, @@ -187,7 +187,7 @@ def test_plot_fitted_demography(files): func=dadi.Demographics2D.split_mig, popt=pytest.fs2d_demo_popt, projections=None, - cov_args=[pickle.load(open("./tests/example_data/LowPass-files/cov.fs.coverage.pickle", 'rb')), 20, 20], + cov_args=pickle.load(open("./tests/example_data/LowPass-files/cov.fs.coverage.pickle", 'rb')), cov_inbreeding=[], output="tests/test_results/plot_plot_low-pass_demography.png", vmin=1e-3, @@ -207,7 +207,7 @@ def test_plot_fitted_dfe(files): pdf="lognormal", pdf2="biv_lognormal", pdf_file=None, - cov_args=[], + cov_args = None, cov_inbreeding=[], output="tests/test_results/plot_fitted_dfe_mixture_w_proj.png", vmin=1e-3, @@ -224,7 +224,7 @@ def test_plot_fitted_dfe(files): pdf="lognormal", pdf2="biv_lognormal", pdf_file=None, - cov_args=[pickle.load(open("./tests/example_data/LowPass-files/cov.fs.coverage.pickle", 'rb')), 20, 20], + cov_args=pickle.load(open("./tests/example_data/LowPass-files/cov.fs.coverage.pickle", 'rb')), cov_inbreeding=[], output="tests/test_results/plot_fitted_dfe_mixture_w_proj.png", vmin=1e-3, diff --git a/tests/test_main.py b/tests/test_main.py index 13353be..4bbb29d 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -60,7 +60,7 @@ def gen_fs_args(): gen_fs_args.pop_info = "tests/example_data/LowPass-files/cov_popfile_2D.txt" gen_fs_args.pop_ids = ["pop1", "pop2"] gen_fs_args.polarized = False - gen_fs_args.calc_coverage = True + gen_fs_args.calc_coverage = [20, 20] _run_generate_fs(gen_fs_args) os.remove(gen_fs_args.output) os.remove(gen_fs_args.output+".coverage.pickle") @@ -218,7 +218,7 @@ def infer_dm_args(): pytest.lbounds = ["1e-3", "1e-3"] pytest.constants = -1 pytest.nomisid = True - pytest.cov_args = [] + pytest.cov_args = None pytest.cov_inbreeding = [] pytest.cuda = False pytest.maxeval = 200 @@ -300,7 +300,7 @@ def test_run_infer_dm_misid(infer_dm_args): # Test LowPass def test_run_infer_dm_lowpass(infer_dm_args): - pytest.cov_args = ["tests/example_data/LowPass-files/cov.fs.coverage.pickle", 20, 20] + pytest.cov_args = "tests/example_data/LowPass-files/cov.fs.coverage.pickle" pytest.fs = "tests/example_data/LowPass-files/cov.fs" pytest.model = "split_mig" pytest.model_file = None @@ -374,7 +374,7 @@ def infer_dfe_args(): pytest.lbounds = ["1e-3", "1e-3"] pytest.constants = -1 pytest.nomisid = True - pytest.cov_args = [] + pytest.cov_args = None pytest.cov_inbreeding = [] pytest.cuda = False pytest.maxeval = 100 @@ -513,7 +513,7 @@ def test_run_infer_dfe_mix_html(infer_dfe_args): # Test LowPass @pytest.mark.skip(reason="Finish later") def test_run_infer_dfe_lowpass(infer_dm_args): - pytest.cov_args = ["tests/example_data/LowPass-files/cov.fs.coverage.pickle", 20, 20] + pytest.cov_args = "tests/example_data/LowPass-files/cov.fs.coverage.pickle" pytest.fs = "tests/example_data/LowPass-files/cov.fs" # pytest.model = "split_mig" # pytest.model_file = None @@ -635,6 +635,7 @@ def plot_args(): pytest.dfe_popt = "tests/example_data/example.split_mig.dfe.lognormal_mixture.params.InferDFE.bestfits" pytest.cache1d = "tests/example_data/cache_split_mig_1d.bpkl" pytest.cache2d = "tests/example_data/cache_split_mig_2d.bpkl" + pytest.cov_args = None pytest.vmin = 1e-3 pytest.resid_range = 10 pytest.projections = None From 88be648dadc3d3617b298e4d983e4f02c049cd44 Mon Sep 17 00:00:00 2001 From: tjstruck Date: Mon, 28 Jul 2025 18:41:48 -0700 Subject: [PATCH 4/7] Update testing importing work_queue to avoid future issue --- tests/test_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index 4bbb29d..a84fe80 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -546,7 +546,7 @@ def test_top_opts_func(): try: - import work_queue as wq + import ndcctools.work_queue as wq wqskip = False except: wqskip = True From 1b064b3ef6f4732315e95955c90b4da4bcf74ddc Mon Sep 17 00:00:00 2001 From: tjstruck Date: Sun, 3 Aug 2025 09:45:06 -0700 Subject: [PATCH 5/7] Update dadi-cli LowPass integration to automatically generate a dictionary with population IDs and the max poptential sequenced smaples using dadi.make_data_dict_vcf's extract_ploidy argument --- dadi_cli/GenerateFs.py | 10 ++++++++-- dadi_cli/InferDFE.py | 6 ++++-- dadi_cli/InferDM.py | 13 ++++++++----- dadi_cli/Plot.py | 12 ++++++++---- dadi_cli/parsers/generate_fs_parsers.py | 7 ++----- pyproject.toml | 2 +- .../LowPass-files/cov.fs.coverage.pickle | Bin 547 -> 561 bytes tests/test_GenerateFs.py | 6 +++--- tests/test_main.py | 2 +- 9 files changed, 35 insertions(+), 23 deletions(-) diff --git a/dadi_cli/GenerateFs.py b/dadi_cli/GenerateFs.py index c184a39..e80bd03 100644 --- a/dadi_cli/GenerateFs.py +++ b/dadi_cli/GenerateFs.py @@ -90,6 +90,12 @@ def generate_fs( # multiply number of individuals subsamples by the ploidy to get sample size projections = [individuals*ploidy for individuals in subsample] print(projections, ploidy, subsample) + elif calc_coverage: + dd, ploidy = dadi.Misc.make_data_dict_vcf(vcf_filename=vcf, popinfo_filename=pop_info, calc_coverage=calc_coverage, extract_ploidy=True) + import collections + nseq = collections.defaultdict(int) + for line in open(pop_info).readlines(): + nseq[line.strip().split()[-1]] += 1 * ploidy else: dd = dadi.Misc.make_data_dict_vcf(vcf_filename=vcf, popinfo_filename=pop_info, calc_coverage=calc_coverage) @@ -97,12 +103,12 @@ def generate_fs( if len(pop_ids) != len(projections): raise ValueError("The lengths of `pop_ids` and `projections` must match.") - if calc_coverage: + if calc_coverage: import pickle import dadi.LowPass.LowPass as lp cov_dist = lp.compute_cov_dist(dd, pop_ids) print(f"\nSaving coverage distribution data in pickle named:\n{output}.coverage.pickle\n") - pickle.dump([cov_dist, calc_coverage], open(f"{output}.coverage.pickle","wb")) + pickle.dump([cov_dist, nseq], open(f"{output}.coverage.pickle","wb")) if bootstrap is None: fs = dadi.Spectrum.from_data_dict( diff --git a/dadi_cli/InferDFE.py b/dadi_cli/InferDFE.py index b12bd58..6682433 100644 --- a/dadi_cli/InferDFE.py +++ b/dadi_cli/InferDFE.py @@ -117,13 +117,15 @@ def infer_dfe( from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - # nseq = [int(ele) for ele in cov_args[1:]] + nseq = [] + for pop in fs.pop_ids: + nseq.append(cov_args[1][pop]) if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func = func_cov(func, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) + func = func_cov(func, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) p0_len = len(p0) # lower_bounds = convert_to_None(lower_bounds, p0_len) diff --git a/dadi_cli/InferDM.py b/dadi_cli/InferDM.py index 60e9ce2..8d86073 100644 --- a/dadi_cli/InferDM.py +++ b/dadi_cli/InferDM.py @@ -90,14 +90,15 @@ def infer_demography( from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - # nseq = [int(ele) for ele in cov_args[1]] - # cov_args[1] should be a list with ints, so no need to convert to int + nseq = [] + for pop in fs.pop_ids: + nseq.append(cov_args[1][pop]) if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) + func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) p0_len = len(p0) if fixed_params == -1: @@ -215,13 +216,15 @@ def infer_global_opt( import pickle except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - # nseq = [int(ele) for ele in cov_args[1:]] + nseq = [] + for pop in fs.pop_ids: + nseq.append(cov_args[1][pop]) if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) + func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) p0_len = len(p0) if fixed_params == -1: diff --git a/dadi_cli/Plot.py b/dadi_cli/Plot.py index 796e71e..3cf975b 100644 --- a/dadi_cli/Plot.py +++ b/dadi_cli/Plot.py @@ -183,13 +183,15 @@ def plot_fitted_demography( from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - # nseq = [int(ele) for ele in cov_args[1:]] + nseq = [] + for pop in fs.pop_ids: + nseq.append(cov_args[1][pop]) if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) + func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) model = func_ex(popt, ns, pts_l) @@ -304,13 +306,15 @@ def plot_fitted_dfe( from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov except ModuleNotFoundError: raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n") - # nseq = [int(ele) for ele in cov_args[1:]] + nseq = [] + for pop in fs.pop_ids: + nseq.append(cov_args[1][pop]) if cov_inbreeding == []: Fx = None else: Fx = cov_inbreeding - func = func_cov(func, cov_args[0], fs.pop_ids, cov_args[1], fs.sample_sizes, Fx=Fx) + func = func_cov(func, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx) # Get expected SFS for MLE if (cache1d != None) and (cache2d != None): model = func(sele_popt, None, spectra1d, spectra2d, pdf, pdf2, theta, None) diff --git a/dadi_cli/parsers/generate_fs_parsers.py b/dadi_cli/parsers/generate_fs_parsers.py index 19c4e04..c580bad 100644 --- a/dadi_cli/parsers/generate_fs_parsers.py +++ b/dadi_cli/parsers/generate_fs_parsers.py @@ -173,13 +173,10 @@ def add_generate_fs_parsers(subparsers: argparse.ArgumentParser) -> None: parser.add_argument( "--calc-coverage", - nargs="+", default=False, - type=list, + action="store_true", dest="calc_coverage", - help="Pass in the total number of sequenced haploids (nseq) for each population:\n"+ - "ex. --calc-coverage 22 12, if the total number a sequenced haploids was 22 for population 1 and 12 for population 2.\n" + - "And store coverage information of sites and nseq in .coverage.pickle object. Default: False.", + help="Store coverage information of sites and total haploids sequenced per population in .coverage.pickle object. Default: False.", ) add_output_argument(parser) add_seed_argument(parser) diff --git a/pyproject.toml b/pyproject.toml index c99a0c9..cae8b7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "dadi-cli" -version = "0.9.10" +version = "0.9.11" requires-python = ">=3.9" description = "A command line interface for dadi" readme = "README.md" diff --git a/tests/example_data/LowPass-files/cov.fs.coverage.pickle b/tests/example_data/LowPass-files/cov.fs.coverage.pickle index d04dc50bccd9e952136bd1389c3e88a7182fca1a..adaace91e66ab3ced5da4fc5a9a01d211656e909 100644 GIT binary patch delta 38 pcmZ3?vXO Date: Tue, 5 Aug 2025 13:40:09 -0700 Subject: [PATCH 6/7] Update dadi_cli for new method dadi handels extract_ploidy --- dadi_cli/GenerateFs.py | 5 +++-- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dadi_cli/GenerateFs.py b/dadi_cli/GenerateFs.py index e80bd03..a0346ff 100644 --- a/dadi_cli/GenerateFs.py +++ b/dadi_cli/GenerateFs.py @@ -88,14 +88,15 @@ def generate_fs( vcf_filename=vcf, popinfo_filename=pop_info, subsample=subsample_dict, calc_coverage=calc_coverage, extract_ploidy=True ) # multiply number of individuals subsamples by the ploidy to get sample size - projections = [individuals*ploidy for individuals in subsample] + for pop in pop_ids: + projections = [individuals*ploidy[pop] for individuals in subsample] print(projections, ploidy, subsample) elif calc_coverage: dd, ploidy = dadi.Misc.make_data_dict_vcf(vcf_filename=vcf, popinfo_filename=pop_info, calc_coverage=calc_coverage, extract_ploidy=True) import collections nseq = collections.defaultdict(int) for line in open(pop_info).readlines(): - nseq[line.strip().split()[-1]] += 1 * ploidy + nseq[line.strip().split()[-1]] += 1 * ploidy[line.strip().split()[-1]] else: dd = dadi.Misc.make_data_dict_vcf(vcf_filename=vcf, popinfo_filename=pop_info, calc_coverage=calc_coverage) diff --git a/pyproject.toml b/pyproject.toml index cae8b7f..4909d4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "dadi-cli" -version = "0.9.11" +version = "0.9.12" requires-python = ">=3.9" description = "A command line interface for dadi" readme = "README.md" From 12ae0fede64840d5cbff830e3c5e2bcd65aa46ea Mon Sep 17 00:00:00 2001 From: tjstruck Date: Tue, 5 Aug 2025 13:48:52 -0700 Subject: [PATCH 7/7] Update required version of dadi --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4909d4d..5ecef98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", ] dependencies = [ - "dadi", + "dadi>=2.4.3", ] [project.scripts]