From df5972970987a4422499692cf779fde716caf31a Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 13:24:43 +0200 Subject: [PATCH 01/22] Next development iteration `v0.9.12.dev0`. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 20decb54..6481cda3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "gpsea" -version = "0.9.11" +version = "v0.9.12.dev0" authors = [ {name = "Lauren Rekerle", email="lauren.rekerle@jax.org"}, {name = "Daniel Danis", email="daniel.danis@bih-charite.de"}, From 840f13e3491be5730f628dbce068ba5b78dcc086 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 15:23:05 +0200 Subject: [PATCH 02/22] Implement caching `GeneCoordinateService`, expose default imprecise SV annotator. Cache gene responses. --- .../gene_to_tx_coordinates/HGNC:24650.json | 3081 +++++++++++++++++ .../gene_to_tx_coordinates/HGNC:2600.json | 1737 ++++++++++ .../gene_to_tx_coordinates/HGNC:9965.json | 1863 ++++++++++ docs/user-guide/input-data.rst | 2 +- src/gpsea/preprocessing/__init__.py | 2 + src/gpsea/preprocessing/_caching.py | 30 + src/gpsea/preprocessing/_config.py | 167 +- tests/conftest.py | 16 + .../test_patient_and_cohort_creator.py | 31 +- tests/preprocessing/test_phenopacket.py | 20 +- tests/test_config.py | 3 + 11 files changed, 6885 insertions(+), 67 deletions(-) create mode 100644 .gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:24650.json create mode 100644 .gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:2600.json create mode 100644 .gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:9965.json diff --git a/.gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:24650.json b/.gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:24650.json new file mode 100644 index 00000000..b2d93e45 --- /dev/null +++ b/.gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:24650.json @@ -0,0 +1,3081 @@ +[ + { + "identifier": "NM_001354259.1", + "region": { + "start": 137618991, + "end": 137794169, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137618991, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137762820, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137775108, + "end": 137775252, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137776617, + "end": 137776844, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137777881, + "end": 137778055, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137779634, + "end": 137779717, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137782290, + "end": 137782397, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137790847, + "end": 137790970, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137792054, + "end": 137794169, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137716633, + "cds_end": 137792120 + }, + { + "identifier": "NM_001354263.1", + "region": { + "start": 137618991, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137618991, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137710966, + "end": 137711030, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743391, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137762820, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137775108, + "end": 137775252, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137776617, + "end": 137776844, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137777881, + "end": 137778055, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137779634, + "end": 137779717, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137782290, + "end": 137782397, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137790847, + "end": 137790970, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137798812, + "end": 137798914, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137800879, + "end": 137800984, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137811460, + "end": 137811615, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813005, + "end": 137813173, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813385, + "end": 137813530, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137814430, + "end": 137814508, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137815946, + "end": 137816062, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137817438, + "end": 137817525, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137818059, + "end": 137818138, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834348, + "end": 137834524, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834772, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137619028, + "cds_end": 137834953 + }, + { + "identifier": "NM_001354612.1", + "region": { + "start": 137618991, + "end": 137764776, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137618991, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137764776, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137716633, + "cds_end": 137762844 + }, + { + "identifier": "NM_024757.5", + "region": { + "start": 137619004, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137619004, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137710966, + "end": 137711030, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137762820, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137775108, + "end": 137775252, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137776617, + "end": 137776844, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137777881, + "end": 137778055, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137779634, + "end": 137779717, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137782290, + "end": 137782397, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137790847, + "end": 137790970, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137798812, + "end": 137798914, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137800879, + "end": 137800984, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137811460, + "end": 137811615, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813005, + "end": 137813173, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813385, + "end": 137813530, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137814430, + "end": 137814508, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137815946, + "end": 137816062, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137817438, + "end": 137817525, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137818059, + "end": 137818138, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834348, + "end": 137834524, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834772, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137619028, + "cds_end": 137834953 + }, + { + "identifier": "NM_001145527.1", + "region": { + "start": 137618991, + "end": 137784393, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137618991, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137710966, + "end": 137711030, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137762820, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137775108, + "end": 137775252, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137776617, + "end": 137776844, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137777881, + "end": 137778055, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137779634, + "end": 137779717, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137782290, + "end": 137782397, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137784105, + "end": 137784393, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137619028, + "cds_end": 137784150 + }, + { + "identifier": "NM_001354611.1", + "region": { + "start": 137618991, + "end": 137764776, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137618991, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137710966, + "end": 137711030, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137764776, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137619028, + "cds_end": 137762844 + }, + { + "identifier": "NM_001354611.2", + "region": { + "start": 137619004, + "end": 137764776, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137619004, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137710966, + "end": 137711030, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137764776, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137619028, + "cds_end": 137762844 + }, + { + "identifier": "NM_024757.4", + "region": { + "start": 137618991, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137618991, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137710966, + "end": 137711030, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137762820, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137775108, + "end": 137775252, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137776617, + "end": 137776844, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137777881, + "end": 137778055, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137779634, + "end": 137779717, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137782290, + "end": 137782397, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137790847, + "end": 137790970, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137798812, + "end": 137798914, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137800879, + "end": 137800984, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137811460, + "end": 137811615, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813005, + "end": 137813173, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813385, + "end": 137813530, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137814430, + "end": 137814508, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137815946, + "end": 137816062, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137817438, + "end": 137817525, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137818059, + "end": 137818138, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834348, + "end": 137834524, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834772, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137619028, + "cds_end": 137834953 + }, + { + "identifier": "NM_001354263.2", + "region": { + "start": 137619004, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137619004, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137710966, + "end": 137711030, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743391, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137762820, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137775108, + "end": 137775252, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137776617, + "end": 137776844, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137777881, + "end": 137778055, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137779634, + "end": 137779717, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137782290, + "end": 137782397, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137790847, + "end": 137790970, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137798812, + "end": 137798914, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137800879, + "end": 137800984, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137811460, + "end": 137811615, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813005, + "end": 137813173, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813385, + "end": 137813530, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137814430, + "end": 137814508, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137815946, + "end": 137816062, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137817438, + "end": 137817525, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137818059, + "end": 137818138, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834348, + "end": 137834524, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834772, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137619028, + "cds_end": 137834953 + }, + { + "identifier": "NM_001354259.2", + "region": { + "start": 137619004, + "end": 137794174, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137619004, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137762820, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137775108, + "end": 137775252, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137776617, + "end": 137776844, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137777881, + "end": 137778055, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137779634, + "end": 137779717, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137782290, + "end": 137782397, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137790847, + "end": 137790970, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137792054, + "end": 137794174, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137716633, + "cds_end": 137792120 + }, + { + "identifier": "NM_001354612.2", + "region": { + "start": 137619004, + "end": 137764776, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137619004, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137764776, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137716633, + "cds_end": 137762844 + }, + { + "identifier": "NM_024757.3", + "region": { + "start": 137710964, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137710964, + "end": 137711030, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137762820, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137775108, + "end": 137775252, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137776617, + "end": 137776844, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137777881, + "end": 137778055, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137779634, + "end": 137779717, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137782290, + "end": 137782397, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137790847, + "end": 137790970, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137798812, + "end": 137798914, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137800879, + "end": 137800984, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137811460, + "end": 137811615, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813005, + "end": 137813173, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137813385, + "end": 137813530, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137814430, + "end": 137814508, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137815946, + "end": 137816062, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137817438, + "end": 137817525, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137818059, + "end": 137818138, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834348, + "end": 137834524, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137834772, + "end": 137836127, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137716633, + "cds_end": 137834953 + }, + { + "identifier": "NM_001145527.2", + "region": { + "start": 137619004, + "end": 137784393, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 137619004, + "end": 137619049, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137710966, + "end": 137711030, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137716625, + "end": 137717182, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137728348, + "end": 137728529, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743370, + "end": 137743528, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137743901, + "end": 137744090, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137752330, + "end": 137752408, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137754170, + "end": 137754291, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137757879, + "end": 137758011, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137762674, + "end": 137762820, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137775108, + "end": 137775252, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137776617, + "end": 137776844, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137777881, + "end": 137778055, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137779634, + "end": 137779717, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137782290, + "end": 137782397, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + }, + { + "start": 137784105, + "end": 137784393, + "contig": { + "name": "9", + "genbank_acc": "CM000671.2", + "refseq_name": "NC_000009.12", + "ucsc_name": "chr9", + "length": 138394717 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 137619028, + "cds_end": 137784150 + } +] \ No newline at end of file diff --git a/.gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:2600.json b/.gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:2600.json new file mode 100644 index 00000000..eb159f06 --- /dev/null +++ b/.gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:2600.json @@ -0,0 +1,1737 @@ +[ + { + "identifier": "NM_001128590.4", + "region": { + "start": 32038414, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038414, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039093, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32038422, + "cds_end": 32041134 + }, + { + "identifier": "NM_000500.6", + "region": { + "start": 32038264, + "end": 32041670, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038264, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32038721, + "end": 32038811, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039093, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041670, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32038422, + "cds_end": 32041131 + }, + { + "identifier": "NM_001368143.2", + "region": { + "start": 32038414, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038414, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32038721, + "end": 32038811, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039074, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32039206, + "cds_end": 32041134 + }, + { + "identifier": "NM_001128590.2", + "region": { + "start": 32038414, + "end": 32041670, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038414, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039093, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041670, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32038422, + "cds_end": 32041131 + }, + { + "identifier": "NM_001368144.2", + "region": { + "start": 32038414, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038414, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039074, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32039206, + "cds_end": 32041134 + }, + { + "identifier": "NM_001128590.1", + "region": { + "start": 32038304, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038304, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039093, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32038422, + "cds_end": 32041134 + }, + { + "identifier": "NM_000500.9", + "region": { + "start": 32038414, + "end": 32041644, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038414, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32038721, + "end": 32038811, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039093, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041644, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32038422, + "cds_end": 32041134 + }, + { + "identifier": "NM_000500.8", + "region": { + "start": 32038414, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038414, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32038721, + "end": 32038811, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039093, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32038422, + "cds_end": 32041134 + }, + { + "identifier": "NM_001368143.1", + "region": { + "start": 32038305, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038305, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32038721, + "end": 32038811, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039074, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32039206, + "cds_end": 32041134 + }, + { + "identifier": "NM_001368144.1", + "region": { + "start": 32038305, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038305, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039074, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32039206, + "cds_end": 32041134 + }, + { + "identifier": "NM_001128590.3", + "region": { + "start": 32038315, + "end": 32041670, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038315, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039093, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041670, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32038422, + "cds_end": 32041134 + }, + { + "identifier": "NM_000500.7", + "region": { + "start": 32038315, + "end": 32041670, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038315, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32038721, + "end": 32038811, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039093, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041670, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32038422, + "cds_end": 32041134 + }, + { + "identifier": "NM_000500.5", + "region": { + "start": 32038304, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + "exons": [ + { + "start": 32038304, + "end": 32038624, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32038721, + "end": 32038811, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039093, + "end": 32039248, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039355, + "end": 32039457, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039545, + "end": 32039647, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32039748, + "end": 32039835, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040004, + "end": 32040205, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040405, + "end": 32040584, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040667, + "end": 32040771, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + }, + { + "start": 32040868, + "end": 32041642, + "contig": { + "name": "6", + "genbank_acc": "CM000668.2", + "refseq_name": "NC_000006.12", + "ucsc_name": "chr6", + "length": 170805979 + }, + "strand": "POSITIVE" + } + ], + "cds_start": 32038422, + "cds_end": 32041134 + } +] \ No newline at end of file diff --git a/.gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:9965.json b/.gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:9965.json new file mode 100644 index 00000000..44381e1d --- /dev/null +++ b/.gpsea_ci_cachedir/gene_to_tx_coordinates/HGNC:9965.json @@ -0,0 +1,1863 @@ +[ + { + "identifier": "NM_012102.2", + "region": { + "start": 240138957, + "end": 240604026, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + "exons": [ + { + "start": 240138957, + "end": 240139263, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240163834, + "end": 240164019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240299981, + "end": 240300450, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240332042, + "end": 240332113, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240341736, + "end": 240341862, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240398899, + "end": 240399005, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240399851, + "end": 240399948, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240415104, + "end": 240415209, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240447747, + "end": 240447796, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240458893, + "end": 240459018, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240461260, + "end": 240461360, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240490399, + "end": 240490498, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240533615, + "end": 240533696, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240590448, + "end": 240590611, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240591584, + "end": 240591677, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240592167, + "end": 240592367, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240593578, + "end": 240593740, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594546, + "end": 240594660, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594932, + "end": 240596311, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240596436, + "end": 240596659, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240597506, + "end": 240598227, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600176, + "end": 240600323, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600823, + "end": 240601004, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240601302, + "end": 240604026, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + } + ], + "cds_start": 240300126, + "cds_end": 240601337 + }, + { + "identifier": "NM_012102.4", + "region": { + "start": 240138782, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + "exons": [ + { + "start": 240138782, + "end": 240139263, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240163834, + "end": 240164019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240299981, + "end": 240300450, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240332042, + "end": 240332113, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240341736, + "end": 240341862, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240398899, + "end": 240399005, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240399851, + "end": 240399948, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240415104, + "end": 240415209, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240447747, + "end": 240447796, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240458893, + "end": 240459018, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240461260, + "end": 240461360, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240490399, + "end": 240490498, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240533615, + "end": 240533696, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240590448, + "end": 240590611, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240591584, + "end": 240591677, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240592167, + "end": 240592367, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240593578, + "end": 240593740, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594546, + "end": 240594660, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594932, + "end": 240596311, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240596436, + "end": 240596659, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240597506, + "end": 240598227, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600176, + "end": 240600323, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600823, + "end": 240601004, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240601302, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + } + ], + "cds_start": 240300125, + "cds_end": 240601336 + }, + { + "identifier": "NM_012102.3", + "region": { + "start": 240138782, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + "exons": [ + { + "start": 240138782, + "end": 240139263, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240163834, + "end": 240164019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240299981, + "end": 240300450, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240332042, + "end": 240332113, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240341736, + "end": 240341862, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240398899, + "end": 240399005, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240399851, + "end": 240399948, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240415104, + "end": 240415209, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240447747, + "end": 240447796, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240458893, + "end": 240459018, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240461260, + "end": 240461360, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240490399, + "end": 240490498, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240533615, + "end": 240533696, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240590448, + "end": 240590611, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240591584, + "end": 240591677, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240592167, + "end": 240592367, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240593578, + "end": 240593740, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594546, + "end": 240594660, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594932, + "end": 240596311, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240596436, + "end": 240596659, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240597506, + "end": 240598227, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600176, + "end": 240600323, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600823, + "end": 240601004, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240601302, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + } + ], + "cds_start": 240300125, + "cds_end": 240601336 + }, + { + "identifier": "NM_001042681.1", + "region": { + "start": 240138782, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + "exons": [ + { + "start": 240138782, + "end": 240139263, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240299981, + "end": 240300450, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240332042, + "end": 240332113, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240341736, + "end": 240341862, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240398899, + "end": 240399005, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240399851, + "end": 240399948, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240415104, + "end": 240415209, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240447747, + "end": 240447796, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240458893, + "end": 240459018, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240461260, + "end": 240461360, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240490399, + "end": 240490498, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240533615, + "end": 240533696, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240590448, + "end": 240590611, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240591584, + "end": 240591677, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240592167, + "end": 240592367, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240593578, + "end": 240593740, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594546, + "end": 240594660, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594932, + "end": 240596311, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240596436, + "end": 240596659, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240597506, + "end": 240598227, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600176, + "end": 240600323, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600823, + "end": 240601004, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240601302, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + } + ], + "cds_start": 240300125, + "cds_end": 240601336 + }, + { + "identifier": "NM_001042682.1", + "region": { + "start": 240532735, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + "exons": [ + { + "start": 240532735, + "end": 240532862, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240533615, + "end": 240533696, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240590448, + "end": 240590611, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240591584, + "end": 240591677, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240592167, + "end": 240592367, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240593578, + "end": 240593740, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594546, + "end": 240594660, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594932, + "end": 240596311, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240596436, + "end": 240596659, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240597506, + "end": 240598227, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600176, + "end": 240600323, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600823, + "end": 240601004, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240601302, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + } + ], + "cds_start": 240592289, + "cds_end": 240601336 + }, + { + "identifier": "NM_001042681.2", + "region": { + "start": 240138782, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + "exons": [ + { + "start": 240138782, + "end": 240139263, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240299981, + "end": 240300450, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240332042, + "end": 240332113, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240341736, + "end": 240341862, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240398899, + "end": 240399005, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240399851, + "end": 240399948, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240415104, + "end": 240415209, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240447747, + "end": 240447796, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240458893, + "end": 240459018, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240461260, + "end": 240461360, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240490399, + "end": 240490498, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240533615, + "end": 240533696, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240590448, + "end": 240590611, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240591584, + "end": 240591677, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240592167, + "end": 240592367, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240593578, + "end": 240593740, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594546, + "end": 240594660, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594932, + "end": 240596311, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240596436, + "end": 240596659, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240597506, + "end": 240598227, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600176, + "end": 240600323, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600823, + "end": 240601004, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240601302, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + } + ], + "cds_start": 240300125, + "cds_end": 240601336 + }, + { + "identifier": "NM_001042682.2", + "region": { + "start": 240532590, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + "exons": [ + { + "start": 240532590, + "end": 240532862, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240533615, + "end": 240533696, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240590448, + "end": 240590611, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240591584, + "end": 240591677, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240592167, + "end": 240592367, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240593578, + "end": 240593740, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594546, + "end": 240594660, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240594932, + "end": 240596311, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240596436, + "end": 240596659, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240597506, + "end": 240598227, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600176, + "end": 240600323, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240600823, + "end": 240601004, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + }, + { + "start": 240601302, + "end": 240604019, + "contig": { + "name": "1", + "genbank_acc": "CM000663.2", + "refseq_name": "NC_000001.11", + "ucsc_name": "chr1", + "length": 248956422 + }, + "strand": "NEGATIVE" + } + ], + "cds_start": 240592289, + "cds_end": 240601336 + } +] \ No newline at end of file diff --git a/docs/user-guide/input-data.rst b/docs/user-guide/input-data.rst index 164531bc..84e50139 100644 --- a/docs/user-guide/input-data.rst +++ b/docs/user-guide/input-data.rst @@ -88,7 +88,7 @@ Load phenopackets We can create a cohort starting from a collection of `Phenopacket` objects provided by Python `Phenopackets `_ library. For the purpose of this example, we will load a cohort of patients with pathogenic mutations in *RERE* gene -which are included in the release `0.1.18` of `Phenopacket Store `_. +which are included in the release `0.1.19` of `Phenopacket Store `_. We use `Phenopacket Store Toolkit `_ (``ppktstore`` in the code) to reduce the boilerplate code associated with extracting phenopacket data from Phenopacket Store release: diff --git a/src/gpsea/preprocessing/__init__.py b/src/gpsea/preprocessing/__init__.py index 9b1ec1da..41a06ee2 100644 --- a/src/gpsea/preprocessing/__init__.py +++ b/src/gpsea/preprocessing/__init__.py @@ -4,6 +4,7 @@ from ._config import load_phenopacket_folder, load_phenopacket_files, load_phenopackets from ._config import configure_caching_cohort_creator, configure_cohort_creator from ._config import configure_default_tx_coordinate_service, configure_default_functional_annotator +from ._config import configure_default_imprecise_sv_functional_annotator from ._config import configure_default_protein_metadata_service, configure_protein_metadata_service from ._generic import DefaultImpreciseSvFunctionalAnnotator from ._patient import PatientCreator, CohortCreator, CohortCreatorOptions @@ -21,6 +22,7 @@ "configure_cohort_creator", "configure_default_tx_coordinate_service", "configure_default_functional_annotator", + "configure_default_imprecise_sv_functional_annotator", "configure_default_protein_metadata_service", "configure_protein_metadata_service", "VariantCoordinateFinder", diff --git a/src/gpsea/preprocessing/_caching.py b/src/gpsea/preprocessing/_caching.py index 0873c084..e5c8edd0 100644 --- a/src/gpsea/preprocessing/_caching.py +++ b/src/gpsea/preprocessing/_caching.py @@ -17,6 +17,7 @@ from ._api import ( FunctionalAnnotator, + GeneCoordinateService, ProteinMetadataService, TranscriptCoordinateService, ) @@ -218,3 +219,32 @@ def annotate( self._cache.store_item(cache_key, annotations) return annotations + + +class CachingGeneCoordinateService(GeneCoordinateService): + # NOT PART OF THE PUBLIC API + + def __init__( + self, + cache: Cache[typing.Sequence[TranscriptCoordinates]], + fallback: GeneCoordinateService, + ) -> None: + assert isinstance(cache, Cache) + self._cache = cache + + assert isinstance(fallback, GeneCoordinateService) + self._fallback = fallback + + def fetch_for_gene( + self, + gene: str, + ) -> typing.Sequence[TranscriptCoordinates]: + assert isinstance(gene, str) + + # `gene` may be treated as a file name by the cache. + item = self._cache.load_item(gene) + if item is None: # cache miss + item = self._fallback.fetch_for_gene(gene) + self._cache.store_item(gene, item) + + return item diff --git a/src/gpsea/preprocessing/_config.py b/src/gpsea/preprocessing/_config.py index 3fe4209c..90aeaeb5 100644 --- a/src/gpsea/preprocessing/_config.py +++ b/src/gpsea/preprocessing/_config.py @@ -23,6 +23,8 @@ from gpsea.model.genome import GRCh37, GRCh38, GenomeBuild from ._api import ( FunctionalAnnotator, + GeneCoordinateService, + ImpreciseSvFunctionalAnnotator, ProteinMetadataService, PreprocessingValidationResult, TranscriptCoordinateService, @@ -34,6 +36,7 @@ from ._caching import ( JsonCache, CachingFunctionalAnnotator, + CachingGeneCoordinateService, CachingProteinMetadataService, CachingTranscriptCoordinateService, ) @@ -50,7 +53,8 @@ def configure_caching_cohort_creator( validation_runner: typing.Optional[ValidationRunner] = None, cache_dir: typing.Optional[str] = None, include_ontology_class_onsets: bool = True, - variant_fallback: str = "VEP", + variant_fallback: typing.Literal["VEP"] = "VEP", + tx_coord_source_fallback: typing.Literal["VV"] = "VV", timeout: typing.Union[float, int] = 30.0, ) -> CohortCreator[Phenopacket]: """ @@ -67,7 +71,9 @@ def configure_caching_cohort_creator( :param include_ontology_class_onsets: `True` if onsets in the ontology class format (e.g. `HP:0003621` for Juvenile onset) should be included (default `True`). :param variant_fallback: the fallback variant annotator to use if we cannot find the annotation locally. - Choose from ``{'VEP'}`` (just one fallback implementation is available at the moment). + Choose from ``{'VEP'}`` (just one fallback implementation is available at the moment). + :param tx_coord_source_fallback: the fallback source of transcript coordinates. + Choose from ``{'VV'}`` (only one fallback implementation is available at the moment). :param timeout: timeout in seconds for the REST APIs """ cache_dir = _configure_cache_dir(cache_dir) @@ -75,8 +81,17 @@ def configure_caching_cohort_creator( build = _configure_build(genome_build) validator = _setup_hpo_validator(hpo, validation_runner) - functional_annotator = _configure_functional_annotator(cache_dir, variant_fallback, timeout) - imprecise_sv_functional_annotator = _configure_imprecise_sv_annotator(build, cache_dir, timeout) + functional_annotator = _configure_functional_annotator( + cache_dir, + variant_fallback, + timeout, + ) + imprecise_sv_functional_annotator = _configure_imprecise_sv_annotator( + build, + tx_coord_source_fallback, + cache_dir, + timeout, + ) hgvs_annotator = VVHgvsVariantCoordinateFinder(build) term_onset_parser = PhenopacketOntologyTermOnsetParser.default_parser() if include_ontology_class_onsets else None pc = PhenopacketPatientCreator( @@ -97,7 +112,8 @@ def configure_cohort_creator( genome_build: typing.Literal["GRCh37.p13", "GRCh38.p13"] = "GRCh38.p13", validation_runner: typing.Optional[ValidationRunner] = None, include_ontology_class_onsets: bool = True, - variant_fallback: str = "VEP", + variant_fallback: typing.Literal["VEP"] = "VEP", + tx_coord_source_fallback: typing.Literal["VV"] = "VV", timeout: typing.Union[float, int] = 30.0, ) -> CohortCreator[Phenopacket]: """ @@ -114,15 +130,21 @@ def configure_cohort_creator( (e.g. `HP:0003621` for Juvenile onset) should be included (default `True`). :param variant_fallback: the fallback variant annotator to use if we cannot find the annotation locally. Choose from ``{'VEP'}`` (just one fallback implementation is available at the moment). + :param tx_coord_source_fallback: the fallback source of transcript coordinates. + Choose from ``{'VV'}`` (only one fallback implementation is available at the moment). :param timeout: timeout in seconds for the VEP API """ build = _configure_build(genome_build) timeout = _normalize_timeout(timeout) validator = _setup_hpo_validator(hpo, validation_runner) - functional_annotator = _configure_fallback_functional(variant_fallback, timeout) + functional_annotator = _configure_fallback_functional( + variant_fallback, + timeout, + ) imprecise_sv_functional_annotator = _configure_imprecise_sv_annotator( build, + tx_coord_source_fallback=tx_coord_source_fallback, cache_dir=None, timeout=timeout, ) @@ -197,6 +219,14 @@ def configure_default_tx_coordinate_service( cache_dir: typing.Optional[str] = None, timeout: typing.Union[float, int] = 30.0, ) -> TranscriptCoordinateService: + """ + Get the default implementation of the :class:`~gpsea.preprocessing.TranscriptCoordinateService`. + + :param tx_source: the source of transcript coordinates (VV - variant validator REST API by default). + :param genome_build: the genome build to use. + :param cache_dir: path to top-level cache directory or `None` if no caching should be done. + :param timeout: the timeout in seconds to use for interacting with REST APIs. + """ cache_dir = _configure_cache_dir(cache_dir) timeout = _normalize_timeout(timeout) build = _configure_build(genome_build) @@ -214,16 +244,48 @@ def configure_default_functional_annotator( cache_dir: typing.Optional[str] = None, timeout: typing.Union[float, int] = 30.0, ) -> FunctionalAnnotator: + """ + Get the default implementation of the :class:`~gpsea.preprocessing.FunctionalAnnotator`. + + :param ann_source: the source of transcript coordinates (VV - variant validator REST API by default). + :param cache_dir: path to top-level cache directory or `None` if no caching should be done. + :param timeout: the timeout in seconds to use for interacting with REST APIs. + """ cache_dir = _configure_cache_dir(cache_dir) timeout = _normalize_timeout(timeout) - return _configure_func_annotator( - ann_source=ann_source, + return _configure_functional_annotator( cache_dir=cache_dir, + variant_fallback=ann_source, timeout=timeout, ) +def configure_default_imprecise_sv_functional_annotator( + genome_build: typing.Union[GenomeBuild, typing.Literal["hg19", "hg38"]] = "hg38", + tx_coord_source: typing.Literal["VV"] = "VV", + cache_dir: typing.Optional[str] = None, + timeout: float = 30.0, +) -> ImpreciseSvFunctionalAnnotator: + """ + Get the default implementation of the :class:`~gpsea.preprocessing.ImpreciseSvFunctionalAnnotator`. + + :param genome_build: the genome build to use. + :param ann_source: the source of transcript coordinates (VV - variant validator REST API by default). + :param cache_dir: path to top-level cache directory or `None` if no caching should be done. + :param timeout: the timeout in seconds to use for interacting with REST APIs. + """ + cache_dir = _configure_cache_dir(cache_dir) + timeout = _normalize_timeout(timeout) + build = _configure_build(genome_build) + + return _configure_imprecise_sv_annotator( + genome_build=build, + tx_coord_source_fallback=tx_coord_source, + cache_dir=cache_dir, + timeout=timeout, + ) + def _configure_protein_service( protein_fallback: str, cache_dir: str, @@ -280,25 +342,6 @@ def _configure_tx_service( return CachingTranscriptCoordinateService(cache=tx_cache, fallback=fallback) -def _configure_func_annotator( - ann_source: str, - cache_dir: str, - timeout: float, -) -> FunctionalAnnotator: - if ann_source == "VEP": - fallback = VepFunctionalAnnotator(timeout=timeout) - else: - raise ValueError(f"Unknown functional annotation source {ann_source}") - # Setup cache - tx_cache_dir = os.path.join(cache_dir, "tx_cache") - os.makedirs(tx_cache_dir, exist_ok=True) - tx_cache = JsonCache( - data_dir=tx_cache_dir, - indent=2, - ) - return CachingFunctionalAnnotator(cache=tx_cache, fallback=fallback) - - def _configure_cache_dir( cache_dir: typing.Optional[str] = None, ) -> str: @@ -351,7 +394,7 @@ def _setup_hpo_validator( def _configure_functional_annotator( cache_dir: str, - variant_fallback: str, + variant_fallback: typing.Literal["VEP"], timeout: float, ) -> FunctionalAnnotator: # (2) FunctionalAnnotator @@ -366,11 +409,14 @@ def _configure_functional_annotator( indent=2, ) - return CachingFunctionalAnnotator(cache=cache, fallback=fallback) + return CachingFunctionalAnnotator( + cache=cache, + fallback=fallback, + ) def _configure_fallback_functional( - variant_fallback: str, + variant_fallback: typing.Literal["VEP"], timeout: float, ) -> FunctionalAnnotator: if variant_fallback == "VEP": @@ -380,23 +426,62 @@ def _configure_fallback_functional( return fallback -def _configure_imprecise_sv_annotator( +def _configure_gene_coordinate_service( genome_build: GenomeBuild, - cache_dir: typing.Optional[str] = None, + tx_coord_source_fallback: typing.Literal["VV"], + cache_dir: typing.Optional[str], timeout: float = 30.0, -): - # Setup cache for SVs - if cache_dir is not None: - _sv_cache_dir = os.path.join(cache_dir, "sv_cache") - # TODO: implement the cache. - # os.makedirs(sv_cache_dir, exist_ok=True) - # var_cache = VariantAnnotationCache(sv_cache_dir) +) -> GeneCoordinateService: + fallback = _configure_fallback_gene_coordinate_service( + genome_build=genome_build, + tx_coord_source=tx_coord_source_fallback, + timeout=timeout, + ) - return DefaultImpreciseSvFunctionalAnnotator( - gene_coordinate_service=VVMultiCoordinateService( + if cache_dir is None: + return fallback + else: + gene_cache_dir = os.path.join(cache_dir, "gene_to_tx_coordinates") + os.makedirs(gene_cache_dir, exist_ok=True) + cache = JsonCache( + gene_cache_dir, + indent=2, + ) + return CachingGeneCoordinateService( + cache=cache, + fallback=fallback, + ) + + +def _configure_fallback_gene_coordinate_service( + genome_build: GenomeBuild, + tx_coord_source: typing.Literal["VV"], + timeout: float = 30.0, +) -> GeneCoordinateService: + if tx_coord_source == "VV": + return VVMultiCoordinateService( genome_build=genome_build, timeout=timeout, ) + else: + raise ValueError(f"Unknown transcript coordinate source {tx_coord_source}") + +def _configure_imprecise_sv_annotator( + genome_build: GenomeBuild, + tx_coord_source_fallback: typing.Literal["VV"], + cache_dir: typing.Optional[str], + timeout: float = 30.0, +) -> ImpreciseSvFunctionalAnnotator: + # TODO: setup cache for SVs + gene_coordinate_service = _configure_gene_coordinate_service( + genome_build=genome_build, + cache_dir=cache_dir, + tx_coord_source_fallback=tx_coord_source_fallback, + timeout=timeout, + ) + + return DefaultImpreciseSvFunctionalAnnotator( + gene_coordinate_service=gene_coordinate_service, ) diff --git a/tests/conftest.py b/tests/conftest.py index e564d5bf..acccb789 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,7 @@ from gpsea.analysis.clf import GenotypeClassifier, biallelic_classifier, allele_count from gpsea.analysis.clf import PhenotypeClassifier, HpoClassifier from gpsea.analysis.predicate import variant_effect +from gpsea.config import CACHE_ENV, DEFAULT_CACHE_PATH from gpsea.io import GpseaJSONDecoder from gpsea.model import ( Cohort, @@ -63,6 +64,21 @@ def fpath_project_dir(fpath_test_dir: str) -> str: return os.path.dirname(fpath_test_dir) +@pytest.fixture(scope="session") +def fpath_cache_dir( + fpath_project_dir: str, +) -> str: + if CACHE_ENV in os.environ: + # The variable may be set e.g. on GitHub action runner + cache_dir = os.environ[CACHE_ENV] + else: + cache_dir = os.path.join(fpath_project_dir, DEFAULT_CACHE_PATH) + + assert os.path.isdir(cache_dir), "Cache dir with test responses should already exist!" + + return str(cache_dir) + + @pytest.fixture(scope="session") def fpath_test_dir() -> str: """ diff --git a/tests/preprocessing/test_patient_and_cohort_creator.py b/tests/preprocessing/test_patient_and_cohort_creator.py index 1f4adfaf..dd2410bb 100644 --- a/tests/preprocessing/test_patient_and_cohort_creator.py +++ b/tests/preprocessing/test_patient_and_cohort_creator.py @@ -2,39 +2,44 @@ import io import hpotk -from hpotk.validate import ValidationRunner import pytest +from hpotk.validate import ValidationRunner + from gpsea.model.genome import GenomeBuild from gpsea.preprocessing import FunctionalAnnotator, ImpreciseSvFunctionalAnnotator, VariantCoordinateFinder -from gpsea.preprocessing import VVHgvsVariantCoordinateFinder, DefaultImpreciseSvFunctionalAnnotator +from gpsea.preprocessing import VVHgvsVariantCoordinateFinder from gpsea.preprocessing import PhenopacketPatientCreator -from gpsea.preprocessing import VVMultiCoordinateService from gpsea.preprocessing import CohortCreator, load_phenopacket_folder -from gpsea.preprocessing import configure_default_functional_annotator +from gpsea.preprocessing import ( + configure_default_functional_annotator, + configure_default_imprecise_sv_functional_annotator, +) class TestPhenopacketCohortCreator: - @pytest.fixture + @pytest.fixture(scope="class") def functional_annotator( self, + fpath_cache_dir: str, ) -> FunctionalAnnotator: return configure_default_functional_annotator( ann_source="VEP", + cache_dir=fpath_cache_dir, ) - @pytest.fixture + @pytest.fixture(scope="class") def imprecise_sv_functional_annotator( self, genome_build: GenomeBuild, + fpath_cache_dir: str, ) -> ImpreciseSvFunctionalAnnotator: - return DefaultImpreciseSvFunctionalAnnotator( - gene_coordinate_service=VVMultiCoordinateService( - genome_build=genome_build, - ), + return configure_default_imprecise_sv_functional_annotator( + genome_build=genome_build, + cache_dir=fpath_cache_dir, ) - @pytest.fixture + @pytest.fixture(scope="class") def variant_coordinate_finder( self, genome_build: GenomeBuild, @@ -43,7 +48,7 @@ def variant_coordinate_finder( genome_build=genome_build, ) - @pytest.fixture + @pytest.fixture(scope="class") def patient_creator( self, hpo: hpotk.MinimalOntology, @@ -62,7 +67,7 @@ def patient_creator( hgvs_coordinate_finder=variant_coordinate_finder, ) - @pytest.fixture + @pytest.fixture(scope="class") def phenopacket_cohort_creator( self, patient_creator: PhenopacketPatientCreator, diff --git a/tests/preprocessing/test_phenopacket.py b/tests/preprocessing/test_phenopacket.py index 3e4123de..f68b11b0 100644 --- a/tests/preprocessing/test_phenopacket.py +++ b/tests/preprocessing/test_phenopacket.py @@ -22,11 +22,10 @@ from gpsea.preprocessing import ( FunctionalAnnotator, ImpreciseSvFunctionalAnnotator, - DefaultImpreciseSvFunctionalAnnotator, configure_default_functional_annotator, + configure_default_imprecise_sv_functional_annotator ) from gpsea.preprocessing import PhenopacketPatientCreator, PhenopacketOntologyTermOnsetParser -from gpsea.preprocessing import VVMultiCoordinateService class TestPhenopacketVariantCoordinateFinder: @@ -175,29 +174,26 @@ def read_genomic_interpretation_json(fpath: str) -> GenomicInterpretation: class TestPhenopacketPatientCreator: + @pytest.fixture(scope="class") def functional_annotator( self, - fpath_project_dir: str, + fpath_cache_dir: str, ) -> FunctionalAnnotator: - fpath_cache_dir = os.path.join(fpath_project_dir, ".gpsea_cache") - fpath_variant_cache_dir = os.path.join(fpath_cache_dir, "variant_cache") - os.makedirs(fpath_variant_cache_dir, exist_ok=True) - return configure_default_functional_annotator( ann_source="VEP", - cache_dir=fpath_variant_cache_dir, + cache_dir=fpath_cache_dir, ) @pytest.fixture(scope="class") def imprecise_sv_functional_annotator( self, genome_build: GenomeBuild, + fpath_cache_dir: str, ) -> ImpreciseSvFunctionalAnnotator: - return DefaultImpreciseSvFunctionalAnnotator( - gene_coordinate_service=VVMultiCoordinateService( - genome_build=genome_build, - ), + return configure_default_imprecise_sv_functional_annotator( + genome_build=genome_build, + cache_dir=fpath_cache_dir, ) @pytest.fixture(scope="class") diff --git a/tests/test_config.py b/tests/test_config.py index 62aa847a..ef30c325 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -28,3 +28,6 @@ def test_create_using_environment_variable( cd = get_cache_dir_path() assert cd == target + + # Clean up the side effect + del os.environ[CACHE_ENV] From d7f52fc9019edfb7146cdde72282205bef1ddcbb Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 15:25:39 +0200 Subject: [PATCH 03/22] Apply formatting. --- src/gpsea/preprocessing/_config.py | 2 ++ tests/conftest.py | 2 +- tests/preprocessing/test_phenopacket.py | 3 +-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gpsea/preprocessing/_config.py b/src/gpsea/preprocessing/_config.py index 90aeaeb5..3820b777 100644 --- a/src/gpsea/preprocessing/_config.py +++ b/src/gpsea/preprocessing/_config.py @@ -286,6 +286,7 @@ def configure_default_imprecise_sv_functional_annotator( timeout=timeout, ) + def _configure_protein_service( protein_fallback: str, cache_dir: str, @@ -466,6 +467,7 @@ def _configure_fallback_gene_coordinate_service( else: raise ValueError(f"Unknown transcript coordinate source {tx_coord_source}") + def _configure_imprecise_sv_annotator( genome_build: GenomeBuild, tx_coord_source_fallback: typing.Literal["VV"], diff --git a/tests/conftest.py b/tests/conftest.py index acccb789..a4800419 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,7 +73,7 @@ def fpath_cache_dir( cache_dir = os.environ[CACHE_ENV] else: cache_dir = os.path.join(fpath_project_dir, DEFAULT_CACHE_PATH) - + assert os.path.isdir(cache_dir), "Cache dir with test responses should already exist!" return str(cache_dir) diff --git a/tests/preprocessing/test_phenopacket.py b/tests/preprocessing/test_phenopacket.py index f68b11b0..52552be8 100644 --- a/tests/preprocessing/test_phenopacket.py +++ b/tests/preprocessing/test_phenopacket.py @@ -23,7 +23,7 @@ FunctionalAnnotator, ImpreciseSvFunctionalAnnotator, configure_default_functional_annotator, - configure_default_imprecise_sv_functional_annotator + configure_default_imprecise_sv_functional_annotator, ) from gpsea.preprocessing import PhenopacketPatientCreator, PhenopacketOntologyTermOnsetParser @@ -174,7 +174,6 @@ def read_genomic_interpretation_json(fpath: str) -> GenomicInterpretation: class TestPhenopacketPatientCreator: - @pytest.fixture(scope="class") def functional_annotator( self, From 21357c37197c626eaae8427cb5b3e9757f449fbf Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 15:32:32 +0200 Subject: [PATCH 04/22] Use absolute path in both branches. --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index a4800419..4a3fb10d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -70,7 +70,7 @@ def fpath_cache_dir( ) -> str: if CACHE_ENV in os.environ: # The variable may be set e.g. on GitHub action runner - cache_dir = os.environ[CACHE_ENV] + cache_dir = os.path.join(fpath_project_dir, os.environ[CACHE_ENV]) else: cache_dir = os.path.join(fpath_project_dir, DEFAULT_CACHE_PATH) From 9d6151abbf9e59fff590bc3c0d4e91074bd54eb9 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 15:40:54 +0200 Subject: [PATCH 05/22] Add debugging. --- tests/conftest.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 4a3fb10d..1c9c7685 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -70,10 +70,14 @@ def fpath_cache_dir( ) -> str: if CACHE_ENV in os.environ: # The variable may be set e.g. on GitHub action runner - cache_dir = os.path.join(fpath_project_dir, os.environ[CACHE_ENV]) + cache_dir = os.environ[CACHE_ENV] + print(f"Using environ: {cache_dir}") else: - cache_dir = os.path.join(fpath_project_dir, DEFAULT_CACHE_PATH) - + cache_dir = DEFAULT_CACHE_PATH + print(f"Using default: {cache_dir}") + cache_dir = os.path.join(fpath_project_dir, cache_dir) + print(os.listdir(fpath_project_dir)) + assert os.path.isdir(cache_dir), "Cache dir with test responses should already exist!" return str(cache_dir) From 55b6dea4018943c7193736709daf3bb30aac9a41 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 15:50:35 +0200 Subject: [PATCH 06/22] Further debug. --- .github/workflows/python_ci.yml | 7 +++++-- tests/conftest.py | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_ci.yml b/.github/workflows/python_ci.yml index d5d69f68..0ea88cdc 100644 --- a/.github/workflows/python_ci.yml +++ b/.github/workflows/python_ci.yml @@ -21,8 +21,11 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install package run: python3 -m pip install --editable .[test] - - name: Run pytest tests - run: MPLBACKEND=Agg GPSEA_CACHEDIR=.gpsea_ci_cachedir pytest + - name: Run tests + env: + GPSEA_CACHEDIR: ".gpsea_ci_cachedir" + MPLBACKEND: "Agg" + run: pytest formatting: name: Check code formatting diff --git a/tests/conftest.py b/tests/conftest.py index 1c9c7685..9f8d9544 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -68,6 +68,8 @@ def fpath_project_dir(fpath_test_dir: str) -> str: def fpath_cache_dir( fpath_project_dir: str, ) -> str: + e = list(os.environ.keys()) + print(f"Env: {e}") if CACHE_ENV in os.environ: # The variable may be set e.g. on GitHub action runner cache_dir = os.environ[CACHE_ENV] From 0e1e671f0151fea2b35aaf0921a8a48ecb68c8de Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 15:57:08 +0200 Subject: [PATCH 07/22] More debugging. --- .github/workflows/python_ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_ci.yml b/.github/workflows/python_ci.yml index 0ea88cdc..7586429a 100644 --- a/.github/workflows/python_ci.yml +++ b/.github/workflows/python_ci.yml @@ -23,9 +23,11 @@ jobs: run: python3 -m pip install --editable .[test] - name: Run tests env: - GPSEA_CACHEDIR: ".gpsea_ci_cachedir" MPLBACKEND: "Agg" - run: pytest + # GPSEA_CACHEDIR: ".gpsea_ci_cachedir" + run: | + export GPSEA_CACHEDIR=my_funky_thing + pytest formatting: name: Check code formatting From 7f266fd0158653271e130906626fc99845bb32a5 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 16:12:37 +0200 Subject: [PATCH 08/22] More debugging. --- .github/workflows/python_ci.yml | 6 ++---- tests/conftest.py | 8 +++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/python_ci.yml b/.github/workflows/python_ci.yml index 7586429a..a765dfb8 100644 --- a/.github/workflows/python_ci.yml +++ b/.github/workflows/python_ci.yml @@ -24,10 +24,8 @@ jobs: - name: Run tests env: MPLBACKEND: "Agg" - # GPSEA_CACHEDIR: ".gpsea_ci_cachedir" - run: | - export GPSEA_CACHEDIR=my_funky_thing - pytest + GPSEA_CACHEDIR: ".gpsea_ci_cachedir" + run: pytest tests/preprocessing formatting: name: Check code formatting diff --git a/tests/conftest.py b/tests/conftest.py index 9f8d9544..dbbfc0c9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -68,17 +68,15 @@ def fpath_project_dir(fpath_test_dir: str) -> str: def fpath_cache_dir( fpath_project_dir: str, ) -> str: - e = list(os.environ.keys()) - print(f"Env: {e}") + env_stuff = sorted(os.environ.keys()) + print(f"Env: {env_stuff}") if CACHE_ENV in os.environ: # The variable may be set e.g. on GitHub action runner cache_dir = os.environ[CACHE_ENV] print(f"Using environ: {cache_dir}") else: - cache_dir = DEFAULT_CACHE_PATH + cache_dir = os.path.join(fpath_project_dir, DEFAULT_CACHE_PATH) print(f"Using default: {cache_dir}") - cache_dir = os.path.join(fpath_project_dir, cache_dir) - print(os.listdir(fpath_project_dir)) assert os.path.isdir(cache_dir), "Cache dir with test responses should already exist!" From afb0734df1c99c1105217a88a5f990973d22fb6a Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 16:14:47 +0200 Subject: [PATCH 09/22] Check one more time. --- .github/workflows/python_ci.yml | 2 +- tests/conftest.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/python_ci.yml b/.github/workflows/python_ci.yml index a765dfb8..f126f3df 100644 --- a/.github/workflows/python_ci.yml +++ b/.github/workflows/python_ci.yml @@ -25,7 +25,7 @@ jobs: env: MPLBACKEND: "Agg" GPSEA_CACHEDIR: ".gpsea_ci_cachedir" - run: pytest tests/preprocessing + run: pytest formatting: name: Check code formatting diff --git a/tests/conftest.py b/tests/conftest.py index dbbfc0c9..acccb789 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -68,15 +68,11 @@ def fpath_project_dir(fpath_test_dir: str) -> str: def fpath_cache_dir( fpath_project_dir: str, ) -> str: - env_stuff = sorted(os.environ.keys()) - print(f"Env: {env_stuff}") if CACHE_ENV in os.environ: # The variable may be set e.g. on GitHub action runner cache_dir = os.environ[CACHE_ENV] - print(f"Using environ: {cache_dir}") else: cache_dir = os.path.join(fpath_project_dir, DEFAULT_CACHE_PATH) - print(f"Using default: {cache_dir}") assert os.path.isdir(cache_dir), "Cache dir with test responses should already exist!" From df9973855f74cf9bf11d9c02167db6c854a1659c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 16:14:59 +0200 Subject: [PATCH 10/22] Apply formatting. --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index acccb789..a4800419 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,7 +73,7 @@ def fpath_cache_dir( cache_dir = os.environ[CACHE_ENV] else: cache_dir = os.path.join(fpath_project_dir, DEFAULT_CACHE_PATH) - + assert os.path.isdir(cache_dir), "Cache dir with test responses should already exist!" return str(cache_dir) From 361e4412ee8998c6b52ded950de594de74ed91ed Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 16:19:15 +0200 Subject: [PATCH 11/22] More debugging. --- .github/workflows/python_ci.yml | 2 +- tests/conftest.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python_ci.yml b/.github/workflows/python_ci.yml index f126f3df..29e2a0fa 100644 --- a/.github/workflows/python_ci.yml +++ b/.github/workflows/python_ci.yml @@ -25,7 +25,7 @@ jobs: env: MPLBACKEND: "Agg" GPSEA_CACHEDIR: ".gpsea_ci_cachedir" - run: pytest + run: pytest tests/preprocessing/ formatting: name: Check code formatting diff --git a/tests/conftest.py b/tests/conftest.py index a4800419..3dfaec5b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -70,10 +70,13 @@ def fpath_cache_dir( ) -> str: if CACHE_ENV in os.environ: # The variable may be set e.g. on GitHub action runner + print("In env") cache_dir = os.environ[CACHE_ENV] else: + print("In default") cache_dir = os.path.join(fpath_project_dir, DEFAULT_CACHE_PATH) + print(cache_dir) assert os.path.isdir(cache_dir), "Cache dir with test responses should already exist!" return str(cache_dir) From b9fb137807a96217e4adb2318bbd3f586e4ebdda Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 4 Aug 2025 16:23:27 +0200 Subject: [PATCH 12/22] Update config test. --- .github/workflows/python_ci.yml | 2 +- tests/conftest.py | 3 --- tests/test_config.py | 5 +++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python_ci.yml b/.github/workflows/python_ci.yml index 29e2a0fa..f126f3df 100644 --- a/.github/workflows/python_ci.yml +++ b/.github/workflows/python_ci.yml @@ -25,7 +25,7 @@ jobs: env: MPLBACKEND: "Agg" GPSEA_CACHEDIR: ".gpsea_ci_cachedir" - run: pytest tests/preprocessing/ + run: pytest formatting: name: Check code formatting diff --git a/tests/conftest.py b/tests/conftest.py index 3dfaec5b..a4800419 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -70,13 +70,10 @@ def fpath_cache_dir( ) -> str: if CACHE_ENV in os.environ: # The variable may be set e.g. on GitHub action runner - print("In env") cache_dir = os.environ[CACHE_ENV] else: - print("In default") cache_dir = os.path.join(fpath_project_dir, DEFAULT_CACHE_PATH) - print(cache_dir) assert os.path.isdir(cache_dir), "Cache dir with test responses should already exist!" return str(cache_dir) diff --git a/tests/test_config.py b/tests/test_config.py index ef30c325..e9a341c3 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -23,11 +23,12 @@ def test_create_using_environment_variable( target = tmp_path / ".ou_yeah" assert not target.exists() + previous = os.environ.get(CACHE_ENV) os.environ[CACHE_ENV] = str(target) cd = get_cache_dir_path() assert cd == target - # Clean up the side effect - del os.environ[CACHE_ENV] + if previous is not None: + os.environ[CACHE_ENV] = previous From a0d152bd04c7a3506764b166563f895475da4086 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 3 Nov 2025 15:08:00 +0100 Subject: [PATCH 13/22] Phenopacket store toolkit is only needed for testing. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6481cda3..6e7f58c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,6 @@ dependencies = [ "Jinja2>=3.1.4,<4.0.0", "phenopackets>=2.0.2.post4", "pandas>=2.0.0,<3.0.0", - "phenopacket-store-toolkit>=0.1.2", "requests>=2.25.0,<3.0", "scipy>=1.10,<2.0", "statsmodels>=0.13.0", @@ -50,6 +49,7 @@ dependencies = [ [project.optional-dependencies] test = [ + "phenopacket-store-toolkit>=0.1.2", "pytest>=7.0.0,<8.0.0", "ruff==0.12.1", ] From 3ecd46e4a87bec0fd516b7b93dedc58af1f0e2bf Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 5 Nov 2025 15:07:15 +0100 Subject: [PATCH 14/22] Update Custom components section. --- docs/user-guide/custom-components.rst | 82 +++++++++++++++++---------- 1 file changed, 53 insertions(+), 29 deletions(-) diff --git a/docs/user-guide/custom-components.rst b/docs/user-guide/custom-components.rst index af851a2c..6cd64803 100644 --- a/docs/user-guide/custom-components.rst +++ b/docs/user-guide/custom-components.rst @@ -36,9 +36,9 @@ for using body mass index (BMI) as a phenotype score. >>> from gpsea.analysis.pscore import PhenotypeScorer >>> class BmiScorer(PhenotypeScorer): # ❶ ... -... def __init__( # ❷ +... def __init__( ... self, -... id2bmi: typing.Mapping[str, float], +... id2bmi: typing.Mapping[str, float], # ❷ ... ): ... self._id2bmi = id2bmi ... @@ -60,19 +60,22 @@ for using body mass index (BMI) as a phenotype score. ... except KeyError: ... return float('nan') -❶ The ``BmiScorer`` must extend :class:`~gpsea.analysis.pscore.PhenotypeScorer` -to be used as a phenotype scorer. -❷ The scorer needs a ``dict`` with `label` → `BMI` for the analyzed individuals. -We assume the user will pre-compute the corresponding ``dict``. +The ``BmiScorer`` must extend :class:`~gpsea.analysis.pscore.PhenotypeScorer` +to be used as a phenotype scorer (❶). +The scorer needs a mapping (e.g. a Python ``dict``) with `label` → `BMI` for the analyzed individuals (❷). +We assume the user will pre-compute the BMI values. + +Then, the scorer must expose several properties, including ``name``, ``description``, +and the ``variable_name`` it operates on (❸❹❺). +GPSEA uses the properties to describe the scorer in reports or visualizations. +We should always aim for short and concise descriptions. -Then, the scorer must expose several properties, including ❸ ``name``, ❹ ``description``, -and the ❺ ``variable_name`` it operates on. -The properties provide bookkeeping metadata to use in e.g. visualizations. -Try to choose short and concise names. +The most important part of the scorer is the `score` method (❻). +As stated above, the scorer is expected to compute a numerical value or `NaN` +if the individual should be excluded from the analysis. +In the case of BMI scorer, the BMI is retrieved from the ``id2bmi`` dictionary. +If the BMI is missing, `NaN` is returned and the individual is omitted from the analysis. -The most important part of the scorer is the ❻ `score` method -which retrieves the BMI for an individual or returns `NaN` if the value is not available -and the individual should be omitted from the analysis. .. _custom-variant-predicate: @@ -80,16 +83,17 @@ and the individual should be omitted from the analysis. Variant predicate ***************** -The purpose of a :class:`~gpsea.analysis.predicate.VariantPredicate` is to test -if a variant meets a certain criterion and GPSEA ships with an array -of builtin predicates (see :mod:`gpsea.analysis.predicate` module). -However, chances are a custom predicate will be needed in future, -so we show how to how to extend -the :class:`~gpsea.analysis.predicate.VariantPredicate` class -to create one's own predicate. +A :class:`~gpsea.analysis.predicate.VariantPredicate` tests +if a variant meets a certain criterion (e.g. variant is a deletion, variant is annotated wrt. a transcript of interest) +in order to assign the individual harboring the variant into a genotype class. +GPSEA ships with an array of builtin predicates (see :mod:`gpsea.analysis.predicate` module) +that should cover the most commonly needed cases. + +However, since it is unlikely that the builtin predicates cover *all* cases, +GPSEA allows to define custom variant predicates. Here we show how to create one. -Specifically, we show how to create a predicate to test if the variant affects a glycine residue -of the transcript of interest. +As an example, we show how to create a predicate for checking if the variant affects a glycine residue +in a transcript of interest. >>> from gpsea.model import Variant, VariantEffect >>> from gpsea.analysis.predicate import VariantPredicate @@ -133,10 +137,30 @@ of the transcript of interest. ... def __str__(self) -> str: # ➓ ... return f"AffectsGlycinePredicate(tx_id={self._tx_id})" -❶ The ``AffectsGlycinePredicate`` must extend :class:`~gpsea.analysis.predicate.VariantPredicate`. -❷ We ask the user to provide the transcript accession `str` and we set the target aminoacid code to glycine ``Gly``. -Like in the :ref:`custom-phenotype-scorer` above, ❸❹❺ provide metadata required for the bookkeeping. -The ❻ ``test`` method includes the most interesting part - we retrieve the :class:`~gpsea.model.TranscriptAnnotation` -with the functional annotation data for the transcript of interest, and we test if the HGVS protein indicates -that the reference aminoacid is glycine. -Last, we override ➐ ``__eq__()`` and ❽ ``__hash__()`` (required) as well as ❾ ``__repr__()`` and ➓ ``__str__()`` (recommended). + +The ``AffectsGlycinePredicate`` must extend :class:`~gpsea.analysis.predicate.VariantPredicate` to work with GPSEA (❶). +We ask the user to provide the transcript accession `str` and we set the target aminoacid code to glycine ``Gly`` (❷). + +.. note:: + + Clearly, to test for change of *any* aminoacid + with only a slight rewrite of the predicate's constructor. + We will leave this as an exercise for the interested readers. + +Like in the :ref:`custom-phenotype-scorer` above, we provide metadata required for reports and visualizations (❸❹❺). + +The ``test`` method includes the most important logic of the predicate (❻). +In this specific case, we retrieve the :class:`~gpsea.model.TranscriptAnnotation` +with the functional annotation data for the transcript of interest, +and we test if the HGVS protein indicates that the reference aminoacid is glycine. + +.. note:: + + We recommend using an Integrated Development Environment (IDE) such as PyCharm or VS Code to design the predicate. + On top of autocompletion and syntax checking features, an IDE simplifies accessing the properties and methods of objects. + In case of :class:`~gpsea.model.Variant`, an IDE will help us discover its ``get_tx_anno_by_tx_id`` method, + realize that it returns either :class:`~gpsea.model.TranscriptAnnotation` or ``None``, + and retrieve the functional annotation of the variant with respect to transcript's protein sequence + from the ``hgvsp`` field. + +Last, we override ``__eq__()`` and ``__hash__()`` (required, ➐❽) as well as ``__repr__()`` and ``__str__()`` (recommended, ❾➓). From 2cc3c77149a158d095199b3a70e5be78314244c4 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 6 Nov 2025 09:22:26 +0100 Subject: [PATCH 15/22] Add numpy compatibility notes. --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 6e7f58c3..9ed153b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,8 @@ dependencies = [ "requests>=2.25.0,<3.0", "scipy>=1.10,<2.0", "statsmodels>=0.13.0", + # numpy==1.23 was released on June 23rd, 2022. + # numpy==2.0.0 does not break any APIs used by GPSEA. "numpy>=1.23", "matplotlib>=3.2.0,<4.0", "ratelimit>=2.2.1,<3", From 2244dfaa8f57ace1ad6ff9996df69e5e610bf9bf Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 7 Nov 2025 14:28:47 +0100 Subject: [PATCH 16/22] Remove typo. --- src/gpsea/analysis/predicate/_predicates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpsea/analysis/predicate/_predicates.py b/src/gpsea/analysis/predicate/_predicates.py index 9537c213..98ab8150 100644 --- a/src/gpsea/analysis/predicate/_predicates.py +++ b/src/gpsea/analysis/predicate/_predicates.py @@ -194,7 +194,7 @@ class VariantTranscriptPredicate(VariantPredicate): is annotated to affect a transcript with `tx_id` accession. Args: - tx_id (str): the accessiono of the transcript of interest, e.g. `NM_123456.7` + tx_id (str): the accession of the transcript of interest, e.g. `NM_123456.7` """ def __init__(self, tx_id: str) -> None: From 15ac120553a6b4a5dfd882442b61d7c6632835c8 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 12 Dec 2025 13:46:00 +0100 Subject: [PATCH 17/22] Process VEP response for variants on the mitochondrial chromosome. --- src/gpsea/preprocessing/_vep.py | 34 +- .../data/vep_response/MT_11778_11778_G_A.json | 506 ++++++++++++++++++ tests/preprocessing/test_vep.py | 21 + 3 files changed, 551 insertions(+), 10 deletions(-) create mode 100644 tests/preprocessing/data/vep_response/MT_11778_11778_G_A.json diff --git a/src/gpsea/preprocessing/_vep.py b/src/gpsea/preprocessing/_vep.py index 40a736e1..75ef6de3 100644 --- a/src/gpsea/preprocessing/_vep.py +++ b/src/gpsea/preprocessing/_vep.py @@ -85,9 +85,11 @@ def _process_item(self, item: typing.Dict) -> typing.Optional[TranscriptAnnotati Parse one transcript annotation from the JSON response. """ trans_id = item.get("transcript_id") - if not self._include_computational_txs and not trans_id.startswith("NM_"): - # Skipping a computational transcript - return None + assert isinstance(trans_id, str) + if VepFunctionalAnnotator._seems_like_refseq_tx(trans_id): + if not self._include_computational_txs and not trans_id.startswith("NM_"): + # Skipping a computational transcript + return None is_preferred = True if ("canonical" in item and item["canonical"] == 1) else False hgvs_cdna = item.get("hgvsc") var_effects = [] @@ -97,12 +99,12 @@ def _process_item(self, item: typing.Dict) -> typing.Optional[TranscriptAnnotati if var_effect is not None: var_effects.append(var_effect) gene_name = item.get("gene_symbol") - exons_effected = item.get("exon") - if exons_effected is not None: - exons_effected = exons_effected.split("/")[0].split("-") - if len(exons_effected) == 2: - exons_effected = range(int(exons_effected[0]), int(exons_effected[1]) + 1) - exons_effected = (int(x) for x in exons_effected) + exons_affected = item.get("exon") + if exons_affected is not None: + exons_affected = exons_affected.split("/")[0].split("-") + if len(exons_affected) == 2: + exons_affected = range(int(exons_affected[0]), int(exons_affected[1]) + 1) + exons_affected = (int(x) for x in exons_affected) protein_id = item.get("protein_id") hgvsp = item.get("hgvsp") @@ -123,7 +125,15 @@ def _process_item(self, item: typing.Dict) -> typing.Optional[TranscriptAnnotati protein_effect = Region(protein_effect_start, protein_effect_end) return TranscriptAnnotation( - gene_name, trans_id, hgvs_cdna, is_preferred, var_effects, exons_effected, protein_id, hgvsp, protein_effect + gene_name, + trans_id, + hgvs_cdna, + is_preferred, + var_effects, + exons_affected, + protein_id, + hgvsp, + protein_effect, ) def fetch_response( @@ -190,3 +200,7 @@ def format_coordinates_for_vep_query(vc: VariantCoordinates) -> str: # MNV return f"{chrom}:{start}-{end}/{alt}" + + @staticmethod + def _seems_like_refseq_tx(tx_id: str) -> bool: + return tx_id.startswith('NM_') or tx_id.startswith('XM_') diff --git a/tests/preprocessing/data/vep_response/MT_11778_11778_G_A.json b/tests/preprocessing/data/vep_response/MT_11778_11778_G_A.json new file mode 100644 index 00000000..09b3580e --- /dev/null +++ b/tests/preprocessing/data/vep_response/MT_11778_11778_G_A.json @@ -0,0 +1,506 @@ +{ + "id": "MT_11778_G/A", + "variant_class": "SNV", + "input": "MT 11778 11778 G/A 1", + "most_severe_consequence": "missense_variant", + "transcript_consequences": [ + { + "gene_id": "4508", + "consequence_terms": [ + "downstream_gene_variant" + ], + "transcript_id": "ATP6.1", + "strand": 1, + "used_ref": "G", + "protein_id": "YP_003024031.1", + "variant_allele": "A", + "canonical": 1, + "distance": 2571, + "given_ref": "G", + "gene_symbol": "ATP6", + "gene_symbol_source": "EntrezGene", + "impact": "MODIFIER", + "biotype": "protein_coding" + }, + { + "given_ref": "G", + "gene_symbol": "ATP8", + "gene_symbol_source": "EntrezGene", + "impact": "MODIFIER", + "biotype": "protein_coding", + "used_ref": "G", + "protein_id": "YP_003024030.1", + "canonical": 1, + "variant_allele": "A", + "distance": 3206, + "strand": 1, + "gene_id": "4509", + "transcript_id": "ATP8.1", + "consequence_terms": [ + "downstream_gene_variant" + ] + }, + { + "transcript_id": "COX1.1", + "consequence_terms": [ + "downstream_gene_variant" + ], + "gene_id": "4512", + "strand": 1, + "variant_allele": "A", + "canonical": 1, + "distance": 4333, + "used_ref": "G", + "protein_id": "YP_003024028.1", + "impact": "MODIFIER", + "biotype": "protein_coding", + "given_ref": "G", + "gene_symbol": "COX1", + "gene_symbol_source": "EntrezGene" + }, + { + "strand": 1, + "gene_id": "4513", + "consequence_terms": [ + "downstream_gene_variant" + ], + "transcript_id": "COX2.1", + "gene_symbol": "COX2", + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "biotype": "protein_coding", + "impact": "MODIFIER", + "used_ref": "G", + "protein_id": "YP_003024029.1", + "distance": 3509, + "variant_allele": "A", + "canonical": 1 + }, + { + "strand": 1, + "consequence_terms": [ + "downstream_gene_variant" + ], + "transcript_id": "COX3.1", + "gene_id": "4514", + "impact": "MODIFIER", + "biotype": "protein_coding", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "gene_symbol": "COX3", + "variant_allele": "A", + "canonical": 1, + "distance": 1788, + "protein_id": "YP_003024032.1", + "used_ref": "G" + }, + { + "gene_symbol_source": "EntrezGene", + "gene_symbol": "CYTB", + "given_ref": "G", + "biotype": "protein_coding", + "impact": "MODIFIER", + "protein_id": "YP_003024038.1", + "used_ref": "G", + "canonical": 1, + "variant_allele": "A", + "distance": 2969, + "strand": 1, + "gene_id": "4519", + "consequence_terms": [ + "upstream_gene_variant" + ], + "transcript_id": "CYTB.1" + }, + { + "impact": "MODIFIER", + "biotype": "protein_coding", + "given_ref": "G", + "gene_symbol": "ND3", + "gene_symbol_source": "EntrezGene", + "distance": 1374, + "variant_allele": "A", + "canonical": 1, + "protein_id": "YP_003024033.1", + "used_ref": "G", + "strand": 1, + "transcript_id": "ND3.1", + "consequence_terms": [ + "downstream_gene_variant" + ], + "gene_id": "4537" + }, + { + "variant_allele": "A", + "distance": 1012, + "canonical": 1, + "protein_id": "YP_003024034.1", + "used_ref": "G", + "biotype": "protein_coding", + "impact": "MODIFIER", + "gene_symbol": "ND4L", + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "consequence_terms": [ + "downstream_gene_variant" + ], + "transcript_id": "ND4L.1", + "gene_id": "4539", + "strand": 1 + }, + { + "variant_allele": "A", + "cdna_end": 1019, + "protein_id": "YP_003024035.1", + "used_ref": "G", + "impact": "MODERATE", + "cds_start": 1019, + "transcript_id": "ND4.1", + "sift_prediction": "deleterious_low_confidence", + "gene_id": "4538", + "codons": "cGc/cAc", + "polyphen_score": 0.996, + "canonical": 1, + "hgvsp": "YP_003024035.1:p.Arg340His", + "amino_acids": "R/H", + "mutfunc": { + "mod": { + "dG_mt": 346.5258, + "ddG_sd": 0.009126, + "dG_mt_sd": 0.095423, + "dG_wt_sd": 0.097037, + "ddG": 0.3596, + "dG_wt": 346.1662 + } + }, + "protein_end": 340, + "cds_end": 1019, + "biotype": "protein_coding", + "exon": "1/1", + "gene_symbol": "ND4", + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "sift_score": 0, + "consequence_terms": [ + "missense_variant" + ], + "hgvsc": "ND4.1:c.1019G>A", + "cdna_start": 1019, + "polyphen_prediction": "probably_damaging", + "protein_start": 340, + "strand": 1 + }, + { + "impact": "MODIFIER", + "biotype": "protein_coding", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "gene_symbol": "ND5", + "variant_allele": "A", + "distance": 559, + "canonical": 1, + "used_ref": "G", + "protein_id": "YP_003024036.1", + "strand": 1, + "consequence_terms": [ + "upstream_gene_variant" + ], + "transcript_id": "ND5.1", + "gene_id": "4540" + }, + { + "transcript_id": "ND6.1", + "consequence_terms": [ + "downstream_gene_variant" + ], + "gene_id": "4541", + "strand": -1, + "canonical": 1, + "variant_allele": "A", + "distance": 2371, + "used_ref": "G", + "protein_id": "YP_003024037.1", + "impact": "MODIFIER", + "biotype": "protein_coding", + "given_ref": "G", + "gene_symbol": "ND6", + "gene_symbol_source": "EntrezGene" + }, + { + "variant_allele": "A", + "distance": 4193, + "canonical": 1, + "used_ref": "G", + "biotype": "tRNA", + "impact": "MODIFIER", + "gene_symbol_source": "EntrezGene", + "gene_symbol": "TRND", + "given_ref": "G", + "consequence_terms": [ + "downstream_gene_variant" + ], + "transcript_id": "TRND.1", + "gene_id": "4555", + "strand": 1 + }, + { + "impact": "MODIFIER", + "biotype": "tRNA", + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "gene_symbol": "TRNE", + "variant_allele": "A", + "distance": 2896, + "canonical": 1, + "used_ref": "G", + "strand": -1, + "transcript_id": "TRNE.1", + "consequence_terms": [ + "downstream_gene_variant" + ], + "gene_id": "4556" + }, + { + "transcript_id": "TRNG.1", + "consequence_terms": [ + "downstream_gene_variant" + ], + "gene_id": "4563", + "strand": 1, + "distance": 1720, + "variant_allele": "A", + "canonical": 1, + "used_ref": "G", + "impact": "MODIFIER", + "biotype": "tRNA", + "given_ref": "G", + "gene_symbol": "TRNG", + "gene_symbol_source": "EntrezGene" + }, + { + "distance": 360, + "variant_allele": "A", + "canonical": 1, + "used_ref": "G", + "impact": "MODIFIER", + "biotype": "tRNA", + "given_ref": "G", + "gene_symbol": "TRNH", + "gene_symbol_source": "EntrezGene", + "transcript_id": "TRNH.1", + "consequence_terms": [ + "upstream_gene_variant" + ], + "gene_id": "4564", + "strand": 1 + }, + { + "consequence_terms": [ + "downstream_gene_variant" + ], + "transcript_id": "TRNK.1", + "gene_id": "4566", + "strand": 1, + "canonical": 1, + "variant_allele": "A", + "distance": 3414, + "used_ref": "G", + "impact": "MODIFIER", + "biotype": "tRNA", + "given_ref": "G", + "gene_symbol": "TRNK", + "gene_symbol_source": "EntrezGene" + }, + { + "strand": 1, + "consequence_terms": [ + "upstream_gene_variant" + ], + "transcript_id": "TRNL2.1", + "gene_id": "4568", + "biotype": "tRNA", + "impact": "MODIFIER", + "gene_symbol": "TRNL2", + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "variant_allele": "A", + "canonical": 1, + "distance": 488, + "used_ref": "G" + }, + { + "biotype": "tRNA", + "impact": "MODIFIER", + "gene_symbol": "TRNP", + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "distance": 4178, + "variant_allele": "A", + "canonical": 1, + "used_ref": "G", + "strand": -1, + "transcript_id": "TRNP.1", + "consequence_terms": [ + "downstream_gene_variant" + ], + "gene_id": "4571" + }, + { + "biotype": "tRNA", + "impact": "MODIFIER", + "gene_symbol_source": "EntrezGene", + "gene_symbol": "TRNR", + "given_ref": "G", + "variant_allele": "A", + "canonical": 1, + "distance": 1309, + "used_ref": "G", + "strand": 1, + "transcript_id": "TRNR.1", + "consequence_terms": [ + "downstream_gene_variant" + ], + "gene_id": "4573" + }, + { + "gene_symbol": "TRNS1", + "gene_symbol_source": "EntrezGene", + "given_ref": "G", + "biotype": "tRNA", + "impact": "MODIFIER", + "used_ref": "G", + "canonical": 1, + "variant_allele": "A", + "distance": 4264, + "strand": -1, + "gene_id": "4574", + "consequence_terms": [ + "upstream_gene_variant" + ], + "transcript_id": "TRNS1.1" + }, + { + "used_ref": "G", + "distance": 429, + "variant_allele": "A", + "canonical": 1, + "given_ref": "G", + "gene_symbol_source": "EntrezGene", + "gene_symbol": "TRNS2", + "impact": "MODIFIER", + "biotype": "tRNA", + "gene_id": "4575", + "transcript_id": "TRNS2.1", + "consequence_terms": [ + "upstream_gene_variant" + ], + "strand": 1 + }, + { + "given_ref": "G", + "gene_symbol": "TRNT", + "gene_symbol_source": "EntrezGene", + "impact": "MODIFIER", + "biotype": "tRNA", + "used_ref": "G", + "variant_allele": "A", + "canonical": 1, + "distance": 4110, + "strand": 1, + "gene_id": "4576", + "consequence_terms": [ + "upstream_gene_variant" + ], + "transcript_id": "TRNT.1" + } + ], + "allele_string": "G/A", + "start": 11778, + "seq_region_name": "MT", + "assembly_name": "GRCh38", + "strand": 1, + "colocated_variants": [ + { + "start": 11778, + "id": "rs199476112", + "strand": 1, + "clin_sig_allele": "A:pathogenic", + "end": 11778, + "var_synonyms": { + "OMIM": [ + 516003.0001 + ], + "UniProt": [ + "VAR_004760" + ], + "ClinVar": [ + "RCV002260593", + "RCV002288481", + "RCV002285007", + "RCV004814876", + "RCV004814875", + "RCV000010354", + "VCV000009708", + "RCV000224219" + ] + }, + "seq_region_name": "MT", + "pubmed": [ + 25741868, + 1900003, + 20301353, + 34002094, + 12560876, + 1417830, + 8755941, + 19026397, + 1346348, + 1352537, + 1734726, + 1763894, + 1770533, + 1770665, + 1866007, + 1937476, + 1959619, + 1959931, + 2039048, + 2222273, + 2286378, + 2346190, + 2346203, + 2390098, + 2566021, + 2566116, + 2575667, + 2817063, + 3201231, + 8101084, + 8240101, + 8240102, + 8448903, + 8449667, + 8457609, + 8474822, + 8489402, + 8489411, + 9150158, + 11169561, + 11854175, + 12402249, + 16431939, + 16477364, + 16532388, + 18771762, + 29774306, + 39578757 + ], + "clin_sig": [ + "pathogenic" + ], + "allele_string": "G/A", + "phenotype_or_disease": 1 + } + ], + "end": 11778 +} \ No newline at end of file diff --git a/tests/preprocessing/test_vep.py b/tests/preprocessing/test_vep.py index f0466099..0dfa29d0 100644 --- a/tests/preprocessing/test_vep.py +++ b/tests/preprocessing/test_vep.py @@ -12,6 +12,7 @@ LMNA_MANE_TX_ID = "NM_170707.4" ANKRD11_MANE_TX_ID = "NM_013275.6" +ND4_TX_ID = "ND4.1" @pytest.mark.parametrize( @@ -188,6 +189,18 @@ def test_process_response_deletion( 803, 804, ), + ( # `MT_11778_11778_G_A` + "MT", + 11_777, + 11_778, + "G", + "A", + 0, + ND4_TX_ID, + "YP_003024035.1", + 339, + 340, + ), ], ) def test_parse_response( @@ -274,6 +287,14 @@ def test_parse_response( "T", 0, ), # `X_31180437_31180437_C_T` + ( + "MT", + 11_777, + 11_778, + "G", + "A", + 0, + ), # `MT_11778_11778_G_A` ], ) def test_fetch_response( From 3283f25ea9951c6e8a3affbff0e00a49874bed8c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 12 Dec 2025 13:57:24 +0100 Subject: [PATCH 18/22] Handle refseq transcripts. --- src/gpsea/preprocessing/_vep.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gpsea/preprocessing/_vep.py b/src/gpsea/preprocessing/_vep.py index 75ef6de3..4a22f0fc 100644 --- a/src/gpsea/preprocessing/_vep.py +++ b/src/gpsea/preprocessing/_vep.py @@ -33,6 +33,11 @@ class VepFunctionalAnnotator(FunctionalAnnotator): Non-coding variant effects where we do not complain if the functional annotation lacks the protein effects. """ + _REFSEQ_TX_ID_PREFIXES = ( + 'NM_', 'NR_', 'NC_', + 'XM_', 'XR_', 'XC_', + ) + def __init__(self, include_computational_txs: bool = False, timeout: float = 10.0): self._logger = logging.getLogger(__name__) self._url = ( @@ -203,4 +208,4 @@ def format_coordinates_for_vep_query(vc: VariantCoordinates) -> str: @staticmethod def _seems_like_refseq_tx(tx_id: str) -> bool: - return tx_id.startswith('NM_') or tx_id.startswith('XM_') + return len(tx_id) >= 3 and tx_id[:3] in VepFunctionalAnnotator._REFSEQ_TX_ID_PREFIXES From 2572b58c37f02995dceab7e9fa78adbee9b47445 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 12 Dec 2025 14:06:03 +0100 Subject: [PATCH 19/22] Regenerate caches, format code. --- .../6_32040421_32040421_C_T.json | 126 ++++++++ .../variant_cache/MT_11778_11778_G_A.json | 293 ++++++++++++++++++ src/gpsea/preprocessing/_vep.py | 10 +- tests/test_random.py | 33 ++ 4 files changed, 459 insertions(+), 3 deletions(-) create mode 100644 .gpsea_ci_cachedir/variant_cache/6_32040421_32040421_C_T.json create mode 100644 .gpsea_ci_cachedir/variant_cache/MT_11778_11778_G_A.json create mode 100644 tests/test_random.py diff --git a/.gpsea_ci_cachedir/variant_cache/6_32040421_32040421_C_T.json b/.gpsea_ci_cachedir/variant_cache/6_32040421_32040421_C_T.json new file mode 100644 index 00000000..c3883bb6 --- /dev/null +++ b/.gpsea_ci_cachedir/variant_cache/6_32040421_32040421_C_T.json @@ -0,0 +1,126 @@ +[ + { + "gene_symbol": "CYP21A2", + "transcript_id": "NM_000500.9", + "hgvs_cdna": "NM_000500.9:c.955C>T", + "is_preferred": true, + "variant_effects": [ + "STOP_GAINED" + ], + "overlapping_exons": [ + 8 + ], + "protein_id": "NP_000491.4", + "hgvsp": "NP_000491.4:p.Gln319Ter", + "protein_effect_location": { + "start": 318, + "end": 319 + } + }, + { + "gene_symbol": "CYP21A2", + "transcript_id": "NM_001128590.4", + "hgvs_cdna": "NM_001128590.4:c.865C>T", + "is_preferred": false, + "variant_effects": [ + "STOP_GAINED" + ], + "overlapping_exons": [ + 7 + ], + "protein_id": "NP_001122062.3", + "hgvsp": "NP_001122062.3:p.Gln289Ter", + "protein_effect_location": { + "start": 288, + "end": 289 + } + }, + { + "gene_symbol": "TNXB", + "transcript_id": "NM_001365276.2", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "NP_001352205.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "CYP21A2", + "transcript_id": "NM_001368143.2", + "hgvs_cdna": "NM_001368143.2:c.550C>T", + "is_preferred": false, + "variant_effects": [ + "STOP_GAINED" + ], + "overlapping_exons": [ + 8 + ], + "protein_id": "NP_001355072.1", + "hgvsp": "NP_001355072.1:p.Gln184Ter", + "protein_effect_location": { + "start": 183, + "end": 184 + } + }, + { + "gene_symbol": "CYP21A2", + "transcript_id": "NM_001368144.2", + "hgvs_cdna": "NM_001368144.2:c.550C>T", + "is_preferred": false, + "variant_effects": [ + "STOP_GAINED" + ], + "overlapping_exons": [ + 7 + ], + "protein_id": "NP_001355073.1", + "hgvsp": "NP_001355073.1:p.Gln184Ter", + "protein_effect_location": { + "start": 183, + "end": 184 + } + }, + { + "gene_symbol": "TNXB", + "transcript_id": "NM_001428335.1", + "hgvs_cdna": null, + "is_preferred": false, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "NP_001415264.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TNXB", + "transcript_id": "NM_019105.8", + "hgvs_cdna": null, + "is_preferred": false, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "NP_061978.6", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TNXB", + "transcript_id": "NM_032470.4", + "hgvs_cdna": null, + "is_preferred": false, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "NP_115859.2", + "hgvsp": null, + "protein_effect_location": null + } +] \ No newline at end of file diff --git a/.gpsea_ci_cachedir/variant_cache/MT_11778_11778_G_A.json b/.gpsea_ci_cachedir/variant_cache/MT_11778_11778_G_A.json new file mode 100644 index 00000000..5acdea93 --- /dev/null +++ b/.gpsea_ci_cachedir/variant_cache/MT_11778_11778_G_A.json @@ -0,0 +1,293 @@ +[ + { + "gene_symbol": "ATP6", + "transcript_id": "ATP6.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024031.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "ATP8", + "transcript_id": "ATP8.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024030.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "COX1", + "transcript_id": "COX1.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024028.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "COX2", + "transcript_id": "COX2.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024029.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "COX3", + "transcript_id": "COX3.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024032.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "CYTB", + "transcript_id": "CYTB.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "UPSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024038.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "ND3", + "transcript_id": "ND3.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024033.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "ND4L", + "transcript_id": "ND4L.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024034.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "ND4", + "transcript_id": "ND4.1", + "hgvs_cdna": "ND4.1:c.1019G>A", + "is_preferred": true, + "variant_effects": [ + "MISSENSE_VARIANT" + ], + "overlapping_exons": [ + 1 + ], + "protein_id": "YP_003024035.1", + "hgvsp": "YP_003024035.1:p.Arg340His", + "protein_effect_location": { + "start": 339, + "end": 340 + } + }, + { + "gene_symbol": "ND5", + "transcript_id": "ND5.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "UPSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024036.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "ND6", + "transcript_id": "ND6.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": "YP_003024037.1", + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRND", + "transcript_id": "TRND.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNE", + "transcript_id": "TRNE.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNG", + "transcript_id": "TRNG.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNH", + "transcript_id": "TRNH.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "UPSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNK", + "transcript_id": "TRNK.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNL2", + "transcript_id": "TRNL2.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "UPSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNP", + "transcript_id": "TRNP.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNR", + "transcript_id": "TRNR.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "DOWNSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNS1", + "transcript_id": "TRNS1.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "UPSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNS2", + "transcript_id": "TRNS2.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "UPSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + }, + { + "gene_symbol": "TRNT", + "transcript_id": "TRNT.1", + "hgvs_cdna": null, + "is_preferred": true, + "variant_effects": [ + "UPSTREAM_GENE_VARIANT" + ], + "overlapping_exons": null, + "protein_id": null, + "hgvsp": null, + "protein_effect_location": null + } +] \ No newline at end of file diff --git a/src/gpsea/preprocessing/_vep.py b/src/gpsea/preprocessing/_vep.py index 4a22f0fc..a605a13c 100644 --- a/src/gpsea/preprocessing/_vep.py +++ b/src/gpsea/preprocessing/_vep.py @@ -34,8 +34,12 @@ class VepFunctionalAnnotator(FunctionalAnnotator): """ _REFSEQ_TX_ID_PREFIXES = ( - 'NM_', 'NR_', 'NC_', - 'XM_', 'XR_', 'XC_', + "NM_", + "NR_", + "NC_", + "XM_", + "XR_", + "XC_", ) def __init__(self, include_computational_txs: bool = False, timeout: float = 10.0): @@ -205,7 +209,7 @@ def format_coordinates_for_vep_query(vc: VariantCoordinates) -> str: # MNV return f"{chrom}:{start}-{end}/{alt}" - + @staticmethod def _seems_like_refseq_tx(tx_id: str) -> bool: return len(tx_id) >= 3 and tx_id[:3] in VepFunctionalAnnotator._REFSEQ_TX_ID_PREFIXES diff --git a/tests/test_random.py b/tests/test_random.py new file mode 100644 index 00000000..10a37fe9 --- /dev/null +++ b/tests/test_random.py @@ -0,0 +1,33 @@ +# Tests of random stuff asked for by the users. + +import hpotk +import pytest + +from gpsea.preprocessing import CohortCreator, configure_caching_cohort_creator, load_phenopacket_files + + +@pytest.fixture(scope="module") +def hpo() -> hpotk.MinimalOntology: + store = hpotk.configure_ontology_store() + return store.load_minimal_hpo(release="v2025-10-22") + + +@pytest.fixture(scope="module") +def cohort_creator( + hpo: hpotk.MinimalOntology, +) -> CohortCreator: + return configure_caching_cohort_creator( + hpo=hpo, + ) + + +@pytest.mark.skip(reason="Just for interactive debugging") +def test_load_phenopacket( + cohort_creator: CohortCreator, +): + pps = ("dev/Mito/1-10011778Ff.json",) + _cohort, qc = load_phenopacket_files( + pp_files=pps, + cohort_creator=cohort_creator, + ) + qc.summarize() From 739d4f5bc80c81d4f3eea57d5c54e026ab141cc7 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 12 Dec 2025 14:18:21 +0100 Subject: [PATCH 20/22] Clear previously set env variable. --- tests/test_config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_config.py b/tests/test_config.py index e9a341c3..6d613e16 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -30,5 +30,7 @@ def test_create_using_environment_variable( assert cd == target - if previous is not None: + if previous is None: + del os.environ[CACHE_ENV] + else: os.environ[CACHE_ENV] = previous From 59ef4c39c8e6452176a05e350e57d00b10413a0a Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 12 Dec 2025 14:34:49 +0100 Subject: [PATCH 21/22] Add a TODO. --- src/gpsea/analysis/pscore/_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gpsea/analysis/pscore/_api.py b/src/gpsea/analysis/pscore/_api.py index 5db1e993..3564f321 100644 --- a/src/gpsea/analysis/pscore/_api.py +++ b/src/gpsea/analysis/pscore/_api.py @@ -356,6 +356,7 @@ def compare_genotype_vs_phenotype_score( # Sort by PatientCategory.cat_id and unpack. # For now, we only allow to have up to 2 groups. + # TODO: fails if one of the categories includes no individuals. x_key, y_key = sorted(data[MonoPhenotypeAnalysisResult.GT_COL].dropna().unique()) x = data.loc[ data[MonoPhenotypeAnalysisResult.GT_COL] == x_key, From 37e4a6cafe512e2e4fdd48027cdc8c9114486b35 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 12 Dec 2025 14:41:37 +0100 Subject: [PATCH 22/22] Make release `0.9.12`. --- HOW_TO_RELEASE.md | 1 - pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index ade1a6ca..0a051930 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -10,7 +10,6 @@ The document describes how to release `gpsea` to *PyPi*. the target version (e.g. `TODO[v0.3.0]`) - bump versions to a release: - `src/gpsea/__init__.py` - - `docs/conf.py` - ensure the CI passes - deploy to PyPi (described below) - merge to `main` diff --git a/pyproject.toml b/pyproject.toml index 9ed153b1..a7f510ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "gpsea" -version = "v0.9.12.dev0" +version = "0.9.12" authors = [ {name = "Lauren Rekerle", email="lauren.rekerle@jax.org"}, {name = "Daniel Danis", email="daniel.danis@bih-charite.de"},