diff --git a/bcbio/bam/__init__.py b/bcbio/bam/__init__.py index 4b8bcd6bd..2d205421c 100644 --- a/bcbio/bam/__init__.py +++ b/bcbio/bam/__init__.py @@ -1,6 +1,5 @@ """Functionality to query and extract information from aligned BAM files. """ -from __future__ import print_function import collections import os import signal @@ -10,7 +9,7 @@ import pybedtools import pysam import toolz as tz -from six.moves import zip_longest +from itertools import zip_longest from bcbio import broad, utils from bcbio.bam import ref diff --git a/bcbio/bam/counts.py b/bcbio/bam/counts.py index de9ebcbaf..823b1a30d 100644 --- a/bcbio/bam/counts.py +++ b/bcbio/bam/counts.py @@ -3,7 +3,6 @@ These are useful for plotting comparisons between BAM files to look at differences in defined or random regions. """ -from __future__ import print_function import random import collections diff --git a/bcbio/bam/coverage.py b/bcbio/bam/coverage.py index d33de1eb7..700a1b853 100644 --- a/bcbio/bam/coverage.py +++ b/bcbio/bam/coverage.py @@ -3,7 +3,6 @@ """ import os -import six import pandas as pd import pybedtools @@ -121,9 +120,9 @@ def plot_multiple_regions_coverage(samples, out_file, data, region_bed=None, ste return out_file in_bams = [dd.get_align_bam(x) for x in samples] samplenames = [dd.get_sample_name(x) for x in samples] - if isinstance(region_bed, six.string_types): + if isinstance(region_bed, str): region_bed = pybedtools.BedTool(region_bed) - if isinstance(stem_bed, six.string_types): + if isinstance(stem_bed, str): stem_bed = pybedtools.BedTool(stem_bed) if stem_bed is not None: # tabix indexed bedtools eval to false stem_bed = stem_bed.tabix() diff --git a/bcbio/bam/fastq.py b/bcbio/bam/fastq.py index 7205d7a7f..7637cfe8a 100644 --- a/bcbio/bam/fastq.py +++ b/bcbio/bam/fastq.py @@ -1,8 +1,6 @@ """Utilities for working with fastq files. """ -import six -from six.moves import zip from itertools import product import os import random @@ -250,7 +248,7 @@ def downsample(f1, f2, N, quick=False): out_files = (outf1, outf2) if outf2 else (outf1) with file_transaction(out_files) as tx_out_files: - if isinstance(tx_out_files, six.string_types): + if isinstance(tx_out_files, str): tx_out_f1 = tx_out_files else: tx_out_f1, tx_out_f2 = tx_out_files diff --git a/bcbio/chipseq/macs2.py b/bcbio/chipseq/macs2.py index 167c1a2ad..c7ed94b93 100644 --- a/bcbio/chipseq/macs2.py +++ b/bcbio/chipseq/macs2.py @@ -1,7 +1,6 @@ import os import glob import subprocess -import sys from bcbio import utils from bcbio.provenance import do diff --git a/bcbio/cwl/create.py b/bcbio/cwl/create.py index 45e8ad033..5ead4372b 100644 --- a/bcbio/cwl/create.py +++ b/bcbio/cwl/create.py @@ -1,6 +1,5 @@ """Create Common Workflow Language (CWL) runnable files and tools from a world object. """ -from __future__ import print_function import collections import copy import dateutil @@ -12,7 +11,6 @@ import tarfile import requests -import six import toolz as tz import yaml @@ -402,7 +400,7 @@ def _get_cur_remotes(path): elif isinstance(path, dict): for v in path.values(): cur_remotes |= _get_cur_remotes(v) - elif path and isinstance(path, six.string_types): + elif path and isinstance(path, str): if path.startswith(tuple(INTEGRATION_MAP.keys())): cur_remotes.add(INTEGRATION_MAP.get(path.split(":")[0] + ":")) return cur_remotes @@ -624,7 +622,7 @@ def _get_avro_type(val): elif val is None: return ["null"] # encode booleans as string True/False and unencode on other side - elif isinstance(val, bool) or isinstance(val, six.string_types) and val.lower() in ["true", "false", "none"]: + elif isinstance(val, bool) or isinstance(val, str) and val.lower() in ["true", "false", "none"]: return ["string", "null", "boolean"] elif isinstance(val, int): return "long" @@ -747,7 +745,7 @@ def _item_to_cwldata(x, get_retriever, indexes=None): """ if isinstance(x, (list, tuple)): return [_item_to_cwldata(subx, get_retriever) for subx in x] - elif (x and isinstance(x, six.string_types) and + elif (x and isinstance(x, str) and (((os.path.isfile(x) or os.path.isdir(x)) and os.path.exists(x)) or objectstore.is_remote(x))): if _file_local_or_remote(x, get_retriever): diff --git a/bcbio/cwl/cwlutils.py b/bcbio/cwl/cwlutils.py index a1319d375..a210be512 100644 --- a/bcbio/cwl/cwlutils.py +++ b/bcbio/cwl/cwlutils.py @@ -10,7 +10,6 @@ import pprint import tarfile -import six import toolz as tz from bcbio import bam, utils @@ -58,7 +57,7 @@ def normalize_missing(xs): xs[k] = normalize_missing(v) elif isinstance(xs, (list, tuple)): xs = [normalize_missing(x) for x in xs] - elif isinstance(xs, six.string_types): + elif isinstance(xs, str): if xs.lower() in ["none", "null"]: xs = None elif xs.lower() == "true": @@ -81,7 +80,7 @@ def unpack_tarballs(xs, data, use_subdir=True): xs[k] = unpack_tarballs(v, data, use_subdir) elif isinstance(xs, (list, tuple)): xs = [unpack_tarballs(x, data, use_subdir) for x in xs] - elif isinstance(xs, six.string_types): + elif isinstance(xs, str): if os.path.isfile(xs.encode("utf-8", "ignore")) and xs.endswith("-wf.tar.gz"): if use_subdir: tarball_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "wf-inputs")) diff --git a/bcbio/cwl/tool.py b/bcbio/cwl/tool.py index 7c06f4c46..6e06f8f5e 100644 --- a/bcbio/cwl/tool.py +++ b/bcbio/cwl/tool.py @@ -3,7 +3,6 @@ Handles wrapping and integrating with multiple tools making it easier to run bcbio in a standard way in many environments. """ -from __future__ import print_function import glob import json import os diff --git a/bcbio/cwl/workflow.py b/bcbio/cwl/workflow.py index e0cc08db3..0641244f6 100644 --- a/bcbio/cwl/workflow.py +++ b/bcbio/cwl/workflow.py @@ -3,7 +3,6 @@ import copy import pprint -import six import toolz as tz from bcbio.pipeline import alignment @@ -234,7 +233,7 @@ def _flatten_nested_input(v): for x in v["type"]: if isinstance(x, dict) and x["type"] == "array": new_type = x["items"] - elif isinstance(x, six.string_types) and x == "null": + elif isinstance(x, str) and x == "null": want_null = True else: new_type = x @@ -272,7 +271,7 @@ def _clean_output(v): return out def _get_string_vid(vid): - if isinstance(vid, six.string_types): + if isinstance(vid, str): return vid assert isinstance(vid, (list, tuple)), vid return "__".join(vid) @@ -280,7 +279,7 @@ def _get_string_vid(vid): def _get_variable(vid, variables): """Retrieve an input variable from our existing pool of options. """ - if isinstance(vid, six.string_types): + if isinstance(vid, str): vid = get_base_id(vid) else: vid = _get_string_vid(vid) @@ -426,7 +425,7 @@ def _create_variable(orig_v, step, variables): v = _get_variable(orig_v["id"], variables) except ValueError: v = copy.deepcopy(orig_v) - if not isinstance(v["id"], six.string_types): + if not isinstance(v["id"], str): v["id"] = _get_string_vid(v["id"]) for key, val in orig_v.items(): if key not in ["id", "type"]: diff --git a/bcbio/distributed/objectstore.py b/bcbio/distributed/objectstore.py index c7ad7385f..681b0dac5 100644 --- a/bcbio/distributed/objectstore.py +++ b/bcbio/distributed/objectstore.py @@ -13,7 +13,6 @@ import time import zlib -import six from bcbio.distributed.transaction import file_transaction from bcbio.provenance import do @@ -23,8 +22,7 @@ BIODATA_INFO = {"s3": "s3://biodata/prepped/{build}/{build}-{target}.tar.gz"} REGIONS_NEWPERMS = {"s3": ["eu-central-1"]} -@six.add_metaclass(abc.ABCMeta) -class FileHandle(object): +class FileHandle(metaclass=abc.ABCMeta): """Contract class for the file handle.""" @@ -221,8 +219,7 @@ def close(self): pass -@six.add_metaclass(abc.ABCMeta) -class StorageManager(object): +class StorageManager(metaclass=abc.ABCMeta): """The contract class for all the storage managers.""" diff --git a/bcbio/distributed/runfn.py b/bcbio/distributed/runfn.py index 0345fad04..3becde412 100644 --- a/bcbio/distributed/runfn.py +++ b/bcbio/distributed/runfn.py @@ -10,7 +10,6 @@ import pprint import shutil -import six import toolz as tz import yaml @@ -129,7 +128,7 @@ def _add_resources(data, runtime): data["config"] = {} # Convert input resources, which may be a JSON string resources = data.get("resources", {}) or {} - if isinstance(resources, six.string_types) and resources.startswith(("{", "[")): + if isinstance(resources, str) and resources.startswith(("{", "[")): resources = json.loads(resources) data["resources"] = resources assert isinstance(resources, dict), (resources, data) @@ -561,7 +560,7 @@ def _file_and_exists(val, input_files): def _to_cwl(val, input_files): """Convert a value into CWL formatted JSON, handling files and complex things. """ - if isinstance(val, six.string_types): + if isinstance(val, str): if _file_and_exists(val, input_files): val = {"class": "File", "path": val} secondary = [] diff --git a/bcbio/distributed/split.py b/bcbio/distributed/split.py index 4ff878429..f985b5fd0 100644 --- a/bcbio/distributed/split.py +++ b/bcbio/distributed/split.py @@ -10,7 +10,6 @@ """ import collections -import six from bcbio import utils @@ -60,7 +59,7 @@ def parallel_split_combine(args, split_fn, parallel_fn, split_args, combine_map, finished_out, extras = _get_split_tasks(args, split_fn, file_key, split_outfile_i) split_output = parallel_fn(parallel_name, split_args) - if isinstance(combiner, six.string_types): + if isinstance(combiner, str): combine_args, final_args = _organize_output(split_output, combine_map, file_key, combine_arg_keys) parallel_fn(combiner, combine_args) diff --git a/bcbio/galaxy/api.py b/bcbio/galaxy/api.py index 2a6b4dfcc..51a8e7daa 100644 --- a/bcbio/galaxy/api.py +++ b/bcbio/galaxy/api.py @@ -1,6 +1,7 @@ """Access Galaxy NGLIMS functionality via the standard API. """ -from six.moves import urllib +import urllib.parse +import urllib.request import json import time diff --git a/bcbio/galaxy/nglims.py b/bcbio/galaxy/nglims.py index 50054d6aa..dd191f778 100644 --- a/bcbio/galaxy/nglims.py +++ b/bcbio/galaxy/nglims.py @@ -1,6 +1,5 @@ """Integration with Galaxy nglims. """ -from __future__ import print_function import collections import copy import glob @@ -9,7 +8,6 @@ import subprocess import joblib -import six import yaml from bcbio import utils @@ -106,7 +104,7 @@ def _select_default_algorithm(analysis): def _relative_paths(xs, base_path): """Adjust paths to be relative to the provided base path. """ - if isinstance(xs, six.string_types): + if isinstance(xs, str): if xs.startswith(base_path): return xs.replace(base_path + "/", "", 1) else: diff --git a/bcbio/graph/graph.py b/bcbio/graph/graph.py index 487b4e4b2..8ad0eaab5 100644 --- a/bcbio/graph/graph.py +++ b/bcbio/graph/graph.py @@ -1,4 +1,3 @@ -from __future__ import print_function from datetime import datetime import collections diff --git a/bcbio/heterogeneity/__init__.py b/bcbio/heterogeneity/__init__.py index 27763252d..6f6cec7e4 100644 --- a/bcbio/heterogeneity/__init__.py +++ b/bcbio/heterogeneity/__init__.py @@ -5,7 +5,6 @@ to infer these sub-clones to help improve variant calls and interpretation especially in complex cancer samples. """ -from __future__ import print_function import collections import os diff --git a/bcbio/heterogeneity/bubbletree.py b/bcbio/heterogeneity/bubbletree.py index 516074957..426db3edf 100644 --- a/bcbio/heterogeneity/bubbletree.py +++ b/bcbio/heterogeneity/bubbletree.py @@ -3,7 +3,6 @@ http://www.bioconductor.org/packages/release/bioc/html/BubbleTree.html http://www.bioconductor.org/packages/release/bioc/vignettes/BubbleTree/inst/doc/BubbleTree-vignette.html """ -from __future__ import print_function import collections import csv import os diff --git a/bcbio/heterogeneity/loh.py b/bcbio/heterogeneity/loh.py index 99c2f92cc..17ce7ee31 100644 --- a/bcbio/heterogeneity/loh.py +++ b/bcbio/heterogeneity/loh.py @@ -9,8 +9,7 @@ import uuid import pandas as pd -import six -from six import StringIO +from io import StringIO import toolz as tz import yaml @@ -59,7 +58,7 @@ def _matches(tocheck, target): def _civic_regions(civic_file, variant_types=None, diseases=None, drugs=None): """Retrieve gene regions and names filtered by variant_types and diseases. """ - if isinstance(diseases, six.string_types): + if isinstance(diseases, str): diseases = [diseases] with utils.open_gzipsafe(civic_file) as in_handle: reader = csv.reader(in_handle, delimiter="\t") @@ -369,7 +368,7 @@ def __read_token(self): cp = c c = self.__read_fd(1) e = c - v = unicode(''.join(buf).decode('unicode-escape')) + v = str(''.join(buf).decode('unicode-escape')) elif t == "datetime": ## skip "inst" @@ -377,7 +376,7 @@ def __read_token(self): ## read next value as string s = self.__read_token() - if not isinstance(s, six.string_types): + if not isinstance(s, str): raise ValueError('Str expected, but got %s' % str(s)) ## remove read string from the value_stack @@ -392,7 +391,7 @@ def __read_token(self): ## read next value as string s = self.__read_token() - if not isinstance(s, six.string_types): + if not isinstance(s, str): raise ValueError('Str expected, but got %s' % str(s)) ## remove read string from the value_stack diff --git a/bcbio/heterogeneity/phylowgs.py b/bcbio/heterogeneity/phylowgs.py index f1d9b7621..c8b41893b 100644 --- a/bcbio/heterogeneity/phylowgs.py +++ b/bcbio/heterogeneity/phylowgs.py @@ -6,7 +6,6 @@ https://github.com/morrislab/phylowgs http://genomebiology.com/2015/16/1/35 """ -from __future__ import print_function import collections import os import sys diff --git a/bcbio/hla/optitype.py b/bcbio/hla/optitype.py index ae8a9168a..7aeb42c62 100644 --- a/bcbio/hla/optitype.py +++ b/bcbio/hla/optitype.py @@ -6,7 +6,6 @@ import glob import os import re -import sys import shutil import toolz as tz diff --git a/bcbio/hmmer/search.py b/bcbio/hmmer/search.py index 74934be0c..7ddd1cada 100644 --- a/bcbio/hmmer/search.py +++ b/bcbio/hmmer/search.py @@ -6,8 +6,8 @@ https://github.com/nickloman/entrezajax """ -from __future__ import print_function -from six.moves import urllib +import urllib.parse +import urllib.request import logging class SmartRedirectHandler(urllib.request.HTTPRedirectHandler): diff --git a/bcbio/illumina/flowcell.py b/bcbio/illumina/flowcell.py index 3ae07712d..a090cc93a 100644 --- a/bcbio/illumina/flowcell.py +++ b/bcbio/illumina/flowcell.py @@ -2,7 +2,9 @@ """ import os import glob -from six.moves import urllib, http_cookiejar +import urllib.parse +import urllib.request +import http.cookiejar import json def parse_dirname(fc_dir): @@ -59,7 +61,7 @@ class GalaxySqnLimsApi: def __init__(self, base_url, user, passwd): self._base_url = base_url # build cookies so we keep track of being logged in - cj = http_cookiejar.LWPCookieJar() + cj = http.cookiejar.LWPCookieJar() opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(opener) login = dict(email=user, password=passwd, login_button='Login') diff --git a/bcbio/illumina/machine.py b/bcbio/illumina/machine.py index e373ba815..7368cca82 100644 --- a/bcbio/illumina/machine.py +++ b/bcbio/illumina/machine.py @@ -1,6 +1,5 @@ """Support integration with Illumina sequencer machines. """ -from __future__ import print_function import glob import json import os diff --git a/bcbio/install.py b/bcbio/install.py index d969c5a17..cf7b1fcbf 100644 --- a/bcbio/install.py +++ b/bcbio/install.py @@ -3,7 +3,6 @@ Enables automated installation tool and in-place updates to install additional data and software. """ -from __future__ import print_function import argparse import collections import contextlib @@ -17,9 +16,8 @@ import subprocess import sys import glob +import urllib.request -import six -from six.moves import urllib import toolz as tz import yaml diff --git a/bcbio/ngsalign/alignprep.py b/bcbio/ngsalign/alignprep.py index 38a7a530e..2349055e9 100644 --- a/bcbio/ngsalign/alignprep.py +++ b/bcbio/ngsalign/alignprep.py @@ -7,7 +7,6 @@ import shutil import subprocess -import six import toolz as tz from bcbio import bam, utils @@ -664,7 +663,7 @@ def _bgzip_file(finput, config, work_dir, needs_bgzip, needs_gunzip, needs_conve Handles cases where finput might be multiple files and need to be concatenated. """ - if isinstance(finput, six.string_types): + if isinstance(finput, str): in_file = finput else: assert not needs_convert, "Do not yet handle quality conversion with multiple inputs" diff --git a/bcbio/ngsalign/novoalign.py b/bcbio/ngsalign/novoalign.py index de6062aca..00a62f8fc 100644 --- a/bcbio/ngsalign/novoalign.py +++ b/bcbio/ngsalign/novoalign.py @@ -15,7 +15,6 @@ from bcbio.distributed.transaction import tx_tmpdir from bcbio.utils import (memoize_outfile, file_exists) -import six # ## BAM realignment @@ -100,7 +99,7 @@ def _novoalign_args_from_config(config, need_quality=True): multi_mappers = config["algorithm"].get("multiple_mappers") if multi_mappers is True: multi_flag = "Random" - elif isinstance(multi_mappers, six.string_types): + elif isinstance(multi_mappers, str): multi_flag = multi_mappers else: multi_flag = "None" diff --git a/bcbio/ngsalign/postalign.py b/bcbio/ngsalign/postalign.py index 92ff828db..60b8e74ff 100644 --- a/bcbio/ngsalign/postalign.py +++ b/bcbio/ngsalign/postalign.py @@ -21,7 +21,6 @@ from bcbio.provenance import do from bcbio.variation import coverage -import six pysam = utils.LazyImport("pysam") @@ -310,7 +309,7 @@ def _check_dedup(data): dup_param = utils.get_in(data, ("config", "algorithm", "mark_duplicates"), False) else: dup_param = utils.get_in(data, ("config", "algorithm", "mark_duplicates"), True) - if dup_param and isinstance(dup_param, six.string_types): + if dup_param and isinstance(dup_param, str): logger.info("Warning: bcbio no longer support explicit setting of mark_duplicate algorithm. " "Using best-practice choice based on input data.") dup_param = True diff --git a/bcbio/pipeline/config_utils.py b/bcbio/pipeline/config_utils.py index be648f7d9..ecc49f39d 100644 --- a/bcbio/pipeline/config_utils.py +++ b/bcbio/pipeline/config_utils.py @@ -11,7 +11,6 @@ import toolz as tz -import six class CmdNotFound(Exception): @@ -218,7 +217,7 @@ def wrap(name, pconfig, config, default): return apath if is_ok(os.path.join(adir, program)): return os.path.join(adir, program) - raise CmdNotFound(" ".join(map(repr, (fn.__name__ if six.PY3 else fn.func_name, name, pconfig, default)))) + raise CmdNotFound(" ".join(map(repr, (fn.__name__, name, pconfig, default)))) return wrap @_get_check_program_cmd @@ -227,7 +226,7 @@ def _get_program_cmd(name, pconfig, config, default): """ if pconfig is None: return name - elif isinstance(pconfig, six.string_types): + elif isinstance(pconfig, str): return pconfig elif "cmd" in pconfig: return pconfig["cmd"] @@ -241,7 +240,7 @@ def _get_program_dir(name, config): """ if config is None: raise ValueError("Could not find directory in config for %s" % name) - elif isinstance(config, six.string_types): + elif isinstance(config, str): return config elif "dir" in config: return expand_path(config["dir"]) @@ -430,7 +429,7 @@ def use_vqsr(algs, call_file=None): coverage_intervals = set([]) for alg in algs: callers = alg.get("variantcaller") - if isinstance(callers, six.string_types): + if isinstance(callers, str): callers = [callers] if not callers: # no variant calling, no VQSR continue diff --git a/bcbio/pipeline/disambiguate/__init__.py b/bcbio/pipeline/disambiguate/__init__.py index 0b835e9e5..506205ba0 100644 --- a/bcbio/pipeline/disambiguate/__init__.py +++ b/bcbio/pipeline/disambiguate/__init__.py @@ -8,7 +8,6 @@ bcbio-nextgen: https://github.com/mjafin/disambiguate """ -from __future__ import print_function import collections import copy import os @@ -22,7 +21,6 @@ from bcbio import bam -import six def split(*items): @@ -37,7 +35,7 @@ def split(*items): "base": True} out.append([data]) # handle the instance where a single organism is disambiguated - if isinstance(dis_orgs, six.string_types): + if isinstance(dis_orgs, str): dis_orgs = [dis_orgs] for dis_org in dis_orgs: dis_data = copy.deepcopy(data) diff --git a/bcbio/pipeline/disambiguate/run.py b/bcbio/pipeline/disambiguate/run.py index f7a802e57..c5222cf46 100644 --- a/bcbio/pipeline/disambiguate/run.py +++ b/bcbio/pipeline/disambiguate/run.py @@ -25,7 +25,6 @@ """ -from __future__ import print_function import sys, re, pysam from array import array from os import path, makedirs diff --git a/bcbio/pipeline/genome.py b/bcbio/pipeline/genome.py index c8d5584dc..19b0af25f 100644 --- a/bcbio/pipeline/genome.py +++ b/bcbio/pipeline/genome.py @@ -1,12 +1,10 @@ """Read genome build configurations from Galaxy *.loc and bcbio-nextgen resource files. """ -from six.moves import configparser +import configparser import glob import os -import sys from xml.etree import ElementTree -import six import toolz as tz import yaml @@ -35,7 +33,7 @@ def get_resources(genome, ref_file, data): resources = yaml.safe_load(in_handle) def resource_file_path(x): - if isinstance(x, six.string_types) and os.path.exists(os.path.join(base_dir, x)): + if isinstance(x, str) and os.path.exists(os.path.join(base_dir, x)): return os.path.normpath(os.path.join(base_dir, x)) return x cleaned = utils.dictapply(resources, resource_file_path) @@ -88,7 +86,7 @@ def abs_file_paths(xs, base_dir=None, ignore_keys=None, fileonly_keys=None, cur_ if isinstance(xs, dict): out = {} for k, v in xs.items(): - if k not in ignore_keys and v and isinstance(v, six.string_types): + if k not in ignore_keys and v and isinstance(v, str): if v.lower() == "none": out[k] = None else: @@ -98,7 +96,7 @@ def abs_file_paths(xs, base_dir=None, ignore_keys=None, fileonly_keys=None, cur_ for x in v] else: out[k] = v - elif isinstance(xs, six.string_types): + elif isinstance(xs, str): if os.path.exists(xs) or (do_download and objectstore.is_remote(xs)): dl = objectstore.download(xs, input_dir) if dl and cur_key not in ignore_keys and not (cur_key in fileonly_keys and not os.path.isfile(dl)): diff --git a/bcbio/pipeline/main.py b/bcbio/pipeline/main.py index d821333fb..047694294 100644 --- a/bcbio/pipeline/main.py +++ b/bcbio/pipeline/main.py @@ -2,7 +2,6 @@ Handles running the full pipeline based on instructions """ -from __future__ import print_function from collections import defaultdict import copy import os diff --git a/bcbio/pipeline/qcsummary.py b/bcbio/pipeline/qcsummary.py index 19fb28eea..ed55c7a2d 100644 --- a/bcbio/pipeline/qcsummary.py +++ b/bcbio/pipeline/qcsummary.py @@ -3,6 +3,7 @@ import collections import copy import csv +import json import os import yaml @@ -22,7 +23,6 @@ from bcbio.rnaseq import gtf from bcbio.variation import damage, peddy, vcfutils, vcfanno -import six # ## High level functions to generate summary @@ -187,7 +187,7 @@ def _run_qc_tools(bam_file, data): # Check for files only output if "base" in out: qc_files = out - elif out and isinstance(out, six.string_types) and os.path.exists(out): + elif out and isinstance(out, str) and os.path.exists(out): qc_files = {"base": out, "secondary": []} if not qc_files: qc_files = _organize_qc_files(program_name, cur_qc_dir) @@ -315,7 +315,7 @@ def _merge_metadata(samples): sample_metrics = collections.defaultdict(dict) for s in samples: m = tz.get_in(['metadata'], s) - if isinstance(m, six.string_types): + if isinstance(m, str): m = json.loads(m) if m: for me in list(m.keys()): diff --git a/bcbio/pipeline/region.py b/bcbio/pipeline/region.py index 6bb27a568..a99035335 100644 --- a/bcbio/pipeline/region.py +++ b/bcbio/pipeline/region.py @@ -6,7 +6,6 @@ import collections import os -import six import toolz as tz from bcbio import utils @@ -20,7 +19,7 @@ def get_max_counts(samples): for data in (x[0] for x in samples): count = tz.get_in(["config", "algorithm", "callable_count"], data, 1) vcs = tz.get_in(["config", "algorithm", "variantcaller"], data, []) - if isinstance(vcs, six.string_types): + if isinstance(vcs, str): vcs = [vcs] if vcs: count *= len(vcs) diff --git a/bcbio/pipeline/run_info.py b/bcbio/pipeline/run_info.py index d29b9185b..4fde2f68a 100644 --- a/bcbio/pipeline/run_info.py +++ b/bcbio/pipeline/run_info.py @@ -13,7 +13,6 @@ import string import sys -import six import toolz as tz import yaml from bcbio import install, utils, structural @@ -70,7 +69,7 @@ def organize(dirs, config, run_info_yaml, sample_names=None, is_cwl=False, item["dirs"] = dirs if "name" not in item: item["name"] = ["", item["description"]] - elif isinstance(item["name"], six.string_types): + elif isinstance(item["name"], str): description = "%s-%s" % (item["name"], clean_name(item["description"])) item["name"] = [item["name"], description] item["description"] = description @@ -346,11 +345,11 @@ def _clean_algorithm(data): for key in ["variantcaller", "jointcaller", "svcaller"]: val = tz.get_in(["algorithm", key], data) if val: - if not isinstance(val, (list, tuple)) and isinstance(val, six.string_types): + if not isinstance(val, (list, tuple)) and isinstance(val, str): val = [val] # check for cases like [false] or [None] if isinstance(val, (list, tuple)): - if len(val) == 1 and not val[0] or (isinstance(val[0], six.string_types) + if len(val) == 1 and not val[0] or (isinstance(val[0], str) and val[0].lower() in ["none", "false"]): val = False data["algorithm"][key] = val @@ -381,12 +380,12 @@ def _clean_background(data): if val: out = {} # old style specification, single string for variant - if isinstance(val, six.string_types): + if isinstance(val, str): out["variant"] = _file_to_abs(val, [os.getcwd()]) elif isinstance(val, dict): for k, v in val.items(): if k in allowed_keys: - if isinstance(v, six.string_types): + if isinstance(v, str): out[k] = _file_to_abs(v, [os.getcwd()]) else: assert isinstance(v, dict) @@ -408,7 +407,7 @@ def _clean_background(data): def _clean_characters(x): """Clean problem characters in sample lane or descriptions. """ - if not isinstance(x, six.string_types): + if not isinstance(x, str): x = str(x) else: if not all(ord(char) < 128 for char in x): @@ -822,9 +821,9 @@ def _file_to_abs(x, dnames, makedir=False): """ if x is None or os.path.isabs(x): return x - elif isinstance(x, six.string_types) and objectstore.is_remote(x): + elif isinstance(x, str) and objectstore.is_remote(x): return x - elif isinstance(x, six.string_types) and x.lower() == "none": + elif isinstance(x, str) and x.lower() == "none": return None else: for dname in dnames: @@ -843,7 +842,7 @@ def _normalize_files(item, fc_dir=None): """ files = item.get("files") if files: - if isinstance(files, six.string_types): + if isinstance(files, str): files = [files] fastq_dir = flowcell.get_fastq_dir(fc_dir) if fc_dir else os.getcwd() files = [_file_to_abs(x, [os.getcwd(), fc_dir, fastq_dir]) for x in files] @@ -961,7 +960,7 @@ def _run_info_from_yaml(dirs, run_info_yaml, config, sample_names=None, if "upload" not in item and not is_cwl: upload = global_config.get("upload", {}) # Handle specifying a local directory directly in upload - if isinstance(upload, six.string_types): + if isinstance(upload, str): upload = {"dir": upload} if not upload: upload["dir"] = "../final" @@ -987,7 +986,7 @@ def _run_info_from_yaml(dirs, run_info_yaml, config, sample_names=None, for f in item["files"]] elif "files" in item: del item["files"] - if item.get("vrn_file") and isinstance(item["vrn_file"], six.string_types): + if item.get("vrn_file") and isinstance(item["vrn_file"], str): item["vrn_file"] = genome.abs_file_paths(item["vrn_file"], do_download=all(not x for x in integrations.values())) if os.path.isfile(item["vrn_file"]): @@ -1113,7 +1112,7 @@ def _add_algorithm_defaults(algorithm, analysis, is_cwl): elif v is None: algorithm[k] = [] elif k in convert_to_single: - if v and not isinstance(v, six.string_types): + if v and not isinstance(v, str): if isinstance(v, (list, tuple)) and len(v) == 1: algorithm[k] = v[0] else: @@ -1132,7 +1131,7 @@ def _replace_global_vars(xs, global_vars): elif isinstance(xs, dict): final = {} for k, v in xs.items(): - if isinstance(v, six.string_types) and v in global_vars: + if isinstance(v, str) and v in global_vars: v = global_vars[v] final[k] = v return final diff --git a/bcbio/pipeline/shared.py b/bcbio/pipeline/shared.py index b8497475f..05b321e07 100644 --- a/bcbio/pipeline/shared.py +++ b/bcbio/pipeline/shared.py @@ -8,7 +8,6 @@ import pybedtools import pysam -import six import toolz as tz from bcbio import bam, broad, utils @@ -251,7 +250,7 @@ def remove_exclude_regions(f): @functools.wraps(f) def wrapper(variant_regions, region, out_file, items=None, do_merge=True, data=None): region_bed = f(variant_regions, region, out_file, items, do_merge, data) - if region_bed and isinstance(region_bed, six.string_types) and os.path.exists(region_bed) and items: + if region_bed and isinstance(region_bed, str) and os.path.exists(region_bed) and items: for e in get_exclude_regions(items): if e in exclude_fns: region_bed = exclude_fns[e](region_bed, items) diff --git a/bcbio/provenance/do.py b/bcbio/provenance/do.py index 8f90877e1..5fa15f72b 100644 --- a/bcbio/provenance/do.py +++ b/bcbio/provenance/do.py @@ -10,7 +10,6 @@ from bcbio.provenance import diagnostics -import six def run(cmd, descr=None, data=None, checks=None, region=None, log_error=True, @@ -22,7 +21,7 @@ def run(cmd, descr=None, data=None, checks=None, region=None, log_error=True, logger.debug(descr) cmd_id = diagnostics.start_cmd(cmd, descr or "", data) try: - logger_cl.debug(" ".join(str(x) for x in cmd) if not isinstance(cmd, six.string_types) else cmd) + logger_cl.debug(" ".join(str(x) for x in cmd) if not isinstance(cmd, str) else cmd) _do_run(cmd, checks, log_stdout, env=env) except: diagnostics.end_cmd(cmd_id, False) @@ -62,7 +61,7 @@ def _normalize_cmd_args(cmd): Piped commands set pipefail and require use of bash to help with debugging intermediate errors. """ - if isinstance(cmd, six.string_types): + if isinstance(cmd, str): # check for standard or anonymous named pipes if cmd.find(" | ") > 0 or cmd.find(">(") or cmd.find("<("): return "set -o pipefail; " + cmd, True, find_bash() @@ -98,7 +97,7 @@ def _do_run(cmd, checks, log_stdout=False, env=None): for line in s.stdout: debug_stdout.append(line.decode("utf-8", errors="replace")) if exitcode is not None and exitcode != 0: - error_msg = " ".join(cmd) if not isinstance(cmd, six.string_types) else cmd + error_msg = " ".join(cmd) if not isinstance(cmd, str) else cmd error_msg += "\n" error_msg += "".join(debug_stdout) s.communicate() diff --git a/bcbio/provenance/programs.py b/bcbio/provenance/programs.py index e9166a890..87cb9e9c0 100644 --- a/bcbio/provenance/programs.py +++ b/bcbio/provenance/programs.py @@ -3,7 +3,6 @@ Catalogs the full list of programs used in analysis, enabling reproduction of results and tracking of provenance in output files. """ -from __future__ import print_function import os import contextlib import subprocess diff --git a/bcbio/qc/contamination.py b/bcbio/qc/contamination.py index 6604cdbe8..025c0f65c 100644 --- a/bcbio/qc/contamination.py +++ b/bcbio/qc/contamination.py @@ -3,7 +3,6 @@ import os import shutil import subprocess -import sys from bcbio import utils from bcbio.log import logger diff --git a/bcbio/qc/multiqc.py b/bcbio/qc/multiqc.py index 3a8e847eb..d10db8400 100644 --- a/bcbio/qc/multiqc.py +++ b/bcbio/qc/multiqc.py @@ -14,7 +14,6 @@ from collections import OrderedDict import pybedtools -import six import toolz as tz import yaml @@ -242,7 +241,7 @@ def _get_input_files(samples, base_dir, tx_out_dir): sum_qc = tz.get_in(["summary", "qc"], data, {}) if sum_qc in [None, "None"]: sum_qc = {} - elif isinstance(sum_qc, six.string_types): + elif isinstance(sum_qc, str): sum_qc = {dd.get_algorithm_qc(data)[0]: sum_qc} elif not isinstance(sum_qc, dict): raise ValueError("Unexpected summary qc: %s" % sum_qc) @@ -250,7 +249,7 @@ def _get_input_files(samples, base_dir, tx_out_dir): if isinstance(pfiles, dict): pfiles = [pfiles["base"]] + pfiles.get("secondary", []) # CWL: presents output files as single file plus associated secondary files - elif isinstance(pfiles, six.string_types): + elif isinstance(pfiles, str): if os.path.exists(pfiles): pfiles = [os.path.join(basedir, f) for basedir, subdir, filenames in os.walk(os.path.dirname(pfiles)) for f in filenames] else: @@ -477,7 +476,7 @@ def _merge_metrics(samples, out_dir): s = _add_disambiguate(s) s = _add_atac(s) m = tz.get_in(['summary', 'metrics'], s) - if isinstance(m, six.string_types): + if isinstance(m, str): m = json.loads(m) if m: for me in list(m.keys()): diff --git a/bcbio/qc/srna.py b/bcbio/qc/srna.py index 2798aa935..a15b9973b 100644 --- a/bcbio/qc/srna.py +++ b/bcbio/qc/srna.py @@ -1,7 +1,6 @@ """ Create log files to be parsed by multiqc """ -from __future__ import print_function import os import pandas as pd diff --git a/bcbio/qc/variant.py b/bcbio/qc/variant.py index 76f699173..aab7883ac 100644 --- a/bcbio/qc/variant.py +++ b/bcbio/qc/variant.py @@ -6,7 +6,6 @@ import os import shutil -import six import toolz as tz from bcbio import utils @@ -101,10 +100,10 @@ def _get_variants(data): variants = variants["samples"] for v in variants: # CWL -- a single variant file - if isinstance(v, six.string_types) and os.path.exists(v): + if isinstance(v, str) and os.path.exists(v): active_vs.append(_add_filename_details(v)) elif (isinstance(v, (list, tuple)) and len(v) > 0 and - isinstance(v[0], six.string_types) and os.path.exists(v[0])): + isinstance(v[0], str) and os.path.exists(v[0])): for subv in v: active_vs.append(_add_filename_details(subv)) elif isinstance(v, dict): diff --git a/bcbio/rnaseq/bcbiornaseq.py b/bcbio/rnaseq/bcbiornaseq.py index 1c1711c85..fc37f7343 100644 --- a/bcbio/rnaseq/bcbiornaseq.py +++ b/bcbio/rnaseq/bcbiornaseq.py @@ -10,7 +10,6 @@ from pathlib import Path from bcbio.log import logger -import six def make_bcbiornaseq_object(data): @@ -185,7 +184,7 @@ def _quotestring(string, double=True): def _list2Rlist(xs): """ convert a python list to an R list """ - if isinstance(xs, six.string_types): + if isinstance(xs, str): xs = [xs] rlist = ",".join([_quotestring(x) for x in xs]) return "c(" + rlist + ")" diff --git a/bcbio/rnaseq/dexseq.py b/bcbio/rnaseq/dexseq.py index c3a2a4ef6..f65ac80d9 100644 --- a/bcbio/rnaseq/dexseq.py +++ b/bcbio/rnaseq/dexseq.py @@ -1,7 +1,6 @@ """ perform exon-level counting using DEXSeq """ -import sys import os from bcbio.utils import R_sitelib, file_exists, safe_makedir, which from bcbio.distributed.transaction import file_transaction diff --git a/bcbio/rnaseq/oncofuse.py b/bcbio/rnaseq/oncofuse.py index 888524c02..a9193ab03 100644 --- a/bcbio/rnaseq/oncofuse.py +++ b/bcbio/rnaseq/oncofuse.py @@ -4,7 +4,6 @@ oncofuse: http://www.unav.es/genetica/oncofuse.html github: https://github.com/mikessh/oncofuse """ -from __future__ import print_function import os import pysam import toolz as tz diff --git a/bcbio/rnaseq/pizzly.py b/bcbio/rnaseq/pizzly.py index a4ce32a0e..ee5e83b79 100644 --- a/bcbio/rnaseq/pizzly.py +++ b/bcbio/rnaseq/pizzly.py @@ -3,7 +3,6 @@ https://github.com/pmelsted/pizzly http://www.biorxiv.org/content/early/2017/07/20/166322 """ -from __future__ import print_function import os diff --git a/bcbio/rnaseq/singlecellexperiment.py b/bcbio/rnaseq/singlecellexperiment.py index 10ee19760..743b4d654 100644 --- a/bcbio/rnaseq/singlecellexperiment.py +++ b/bcbio/rnaseq/singlecellexperiment.py @@ -1,4 +1,3 @@ -from __future__ import print_function import os import subprocess diff --git a/bcbio/rnaseq/umi.py b/bcbio/rnaseq/umi.py index d7a53ea93..d1b1322b8 100644 --- a/bcbio/rnaseq/umi.py +++ b/bcbio/rnaseq/umi.py @@ -24,7 +24,6 @@ from bcbio.log import logger from bcbio.rnaseq import gtf -import six class SparseMatrix(object): @@ -207,7 +206,7 @@ def filter_barcodes(data): bc2 = None bc3 = None umi_dir = os.path.join(dd.get_work_dir(data), "umis") - if isinstance(bc, six.string_types): + if isinstance(bc, str): bc1 = bc if len(bc) == 1: bc1 = bc[0] diff --git a/bcbio/server/background.py b/bcbio/server/background.py index ce96e7650..a0069f305 100644 --- a/bcbio/server/background.py +++ b/bcbio/server/background.py @@ -6,7 +6,6 @@ Copyright (c) 2012, Vukasin Toroman """ -from __future__ import print_function import subprocess import tornado.ioloop import time diff --git a/bcbio/server/run.py b/bcbio/server/run.py index 3424f2d63..69f65121c 100644 --- a/bcbio/server/run.py +++ b/bcbio/server/run.py @@ -4,8 +4,7 @@ import os import sys import uuid - -from six import StringIO +from io import StringIO import tornado.gen import tornado.web diff --git a/bcbio/setpath.py b/bcbio/setpath.py index fa72d2ae8..c1a1dd7b0 100644 --- a/bcbio/setpath.py +++ b/bcbio/setpath.py @@ -1,7 +1,6 @@ """Update the PATH environment variable to reflect the value of BCBIOPATH. """ -from __future__ import print_function import contextlib import os diff --git a/bcbio/srna/mirge.py b/bcbio/srna/mirge.py index db96a7690..d36f81c9d 100644 --- a/bcbio/srna/mirge.py +++ b/bcbio/srna/mirge.py @@ -1,6 +1,5 @@ """Run mirge tool""" import os -import sys import shutil import glob from bcbio.distributed.transaction import tx_tmpdir diff --git a/bcbio/structural/__init__.py b/bcbio/structural/__init__.py index 579d80def..92a8eb57e 100644 --- a/bcbio/structural/__init__.py +++ b/bcbio/structural/__init__.py @@ -16,7 +16,6 @@ from bcbio.variation import validate as vcvalidate from bcbio.variation import vcfutils -import six from functools import reduce @@ -58,7 +57,7 @@ def get_svcallers(data): svs = data["config"]["algorithm"].get("svcaller") if svs is None: svs = [] - elif isinstance(svs, six.string_types): + elif isinstance(svs, str): svs = [svs] return svs @@ -193,7 +192,7 @@ def _batch_split_by_sv(samples, stage): svcaller = tz.get_in(["config", "algorithm", "svcaller"], x) batch = dd.get_batch(x) or dd.get_sample_name(x) if stage in ["ensemble"]: # no batching for ensemble methods - if isinstance(batch, six.string_types) and batch != dd.get_sample_name(x): + if isinstance(batch, str) and batch != dd.get_sample_name(x): batch += "_%s" % dd.get_sample_name(x) else: batch = dd.get_sample_name(x) @@ -375,7 +374,7 @@ def standardize_cnv_reference(data): """ out = tz.get_in(["config", "algorithm", "background", "cnv_reference"], data, {}) cur_callers = set(data["config"]["algorithm"].get("svcaller")) & _CNV_REFERENCE - if isinstance(out, six.string_types): + if isinstance(out, str): if not len(cur_callers) == 1: raise ValueError("Multiple CNV callers and single background reference for %s: %s" % data["description"], list(cur_callers)) diff --git a/bcbio/structural/lumpy.py b/bcbio/structural/lumpy.py index d2f7c9d16..4146c3ae1 100644 --- a/bcbio/structural/lumpy.py +++ b/bcbio/structural/lumpy.py @@ -9,7 +9,6 @@ import re import subprocess -import six import vcf from bcbio import utils @@ -95,10 +94,7 @@ def _prepare_smoove_bams(full_bams, sr_bams, disc_bams, items, tx_work_dir): return out def _allowed_errors(msg): - if six.PY3: - msg = str(msg) - else: - msg = unicode(msg).encode("ascii", "replace") + msg = str(msg) allowed = ["covmed: not enough reads to sample for bam stats", "missing pair end parameters:", "mean stdev read_length min_non_overlap"] diff --git a/bcbio/structural/metasv.py b/bcbio/structural/metasv.py index 94bd080fc..0f5f7d300 100644 --- a/bcbio/structural/metasv.py +++ b/bcbio/structural/metasv.py @@ -4,7 +4,6 @@ http://dx.doi.org/10.1093/bioinformatics/btv204 """ import os -import sys from bcbio import utils from bcbio.provenance import do diff --git a/bcbio/structural/pindel.py b/bcbio/structural/pindel.py index c587762a5..be18b8256 100644 --- a/bcbio/structural/pindel.py +++ b/bcbio/structural/pindel.py @@ -3,9 +3,7 @@ http://gmt.genome.wustl.edu/packages/pindel/ """ -from __future__ import print_function import os -import six import time import shutil from bcbio import bam, utils, broad @@ -15,7 +13,6 @@ from bcbio.variation.vcfutils import bgzip_and_index, get_paired_bams from bcbio.variation import annotation from bcbio.provenance import do -from six.moves import zip def _pindel_options(items, config, out_file, region, tmp_path): """parse pindel options. Add region to cmd. @@ -29,7 +26,7 @@ def _pindel_options(items, config, out_file, region, tmp_path): target = subset_variant_regions(variant_regions, region, out_file, items) opts = "" if target: - if isinstance(target, six.string_types) and os.path.isfile(target): + if isinstance(target, str) and os.path.isfile(target): target_bed = target else: target_bed = os.path.join(tmp_path, "tmp.bed") diff --git a/bcbio/structural/validate.py b/bcbio/structural/validate.py index 585640601..72bb96027 100644 --- a/bcbio/structural/validate.py +++ b/bcbio/structural/validate.py @@ -3,7 +3,6 @@ import csv import os -import six import toolz as tz import numpy as np import pandas as pd @@ -384,7 +383,7 @@ def evaluate(data): summary_plots = _plot_evaluation(df_csv) data["sv-validate"] = {"csv": val_summary, "plot": summary_plots, "df": df_csv} else: - assert isinstance(truth_sets, six.string_types) and utils.file_exists(truth_sets), truth_sets + assert isinstance(truth_sets, str) and utils.file_exists(truth_sets), truth_sets val_summary = _evaluate_vcf(data["sv"], truth_sets, work_dir, data) title = "%s structural variants" % dd.get_sample_name(data) summary_plots = validateplot.classifyplot_from_valfile(val_summary, outtype="png", title=title) diff --git a/bcbio/upload/__init__.py b/bcbio/upload/__init__.py index 05d4002c5..3151813fc 100644 --- a/bcbio/upload/__init__.py +++ b/bcbio/upload/__init__.py @@ -3,7 +3,6 @@ import datetime import os -import six import toolz as tz from bcbio import log, utils @@ -157,7 +156,7 @@ def _add_meta(xs, sample=None, config=None): """ out = [] for x in xs: - if not isinstance(x["path"], six.string_types) or not os.path.exists(x["path"]): + if not isinstance(x["path"], str) or not os.path.exists(x["path"]): raise ValueError("Unexpected path for upload: %s" % x) x["mtime"] = shared.get_file_timestamp(x["path"]) if sample: diff --git a/bcbio/upload/galaxy.py b/bcbio/upload/galaxy.py index 88947125a..e3e1e57eb 100644 --- a/bcbio/upload/galaxy.py +++ b/bcbio/upload/galaxy.py @@ -3,7 +3,6 @@ Required configurable variables in upload: dir """ -from __future__ import print_function import collections import os import shutil diff --git a/bcbio/utils.py b/bcbio/utils.py index 3927019ec..2b7a61970 100644 --- a/bcbio/utils.py +++ b/bcbio/utils.py @@ -1,5 +1,7 @@ """Helpful utilities for building analysis pipelines. """ + +import configparser import glob import gzip import os @@ -15,7 +17,6 @@ import sys import types -import six import toolz as tz import yaml @@ -284,7 +285,7 @@ def read_galaxy_amqp_config(galaxy_config, base_dir): """Read connection information on the RabbitMQ server from Galaxy config. """ galaxy_config = add_full_path(galaxy_config, base_dir) - config = six.moves.configparser.ConfigParser() + config = configparser.ConfigParser() config.read(galaxy_config) amqp_config = {} for option in config.options("galaxy_amqp"): @@ -379,15 +380,9 @@ def symlink_plus(orig, new): def open_gzipsafe(f, is_gz=False): if f.endswith(".gz") or is_gz: - if six.PY3: - return gzip.open(f, "rt", encoding="utf-8", errors="ignore") - else: - return gzip.open(f) + return gzip.open(f, "rt", encoding="utf-8", errors="ignore") else: - if six.PY3: - return open(f, encoding="utf-8", errors="ignore") - else: - return open(f) + return open(f, encoding="utf-8", errors="ignore") def is_empty_gzipsafe(f): h = open_gzipsafe(f) @@ -467,8 +462,8 @@ def partition(pred, iterable, tolist=False): 'Use a predicate to partition entries into false entries and true entries' # partition(is_odd, range(10)) --> 0 2 4 6 8 and 1 3 5 7 9 t1, t2 = itertools.tee(iterable) - ifalse = six.moves.filterfalse(pred, t1) - itrue = six.moves.filter(pred, t2) + ifalse = itertools.filterfalse(pred, t1) + itrue = filter(pred, t2) if tolist: return list(ifalse), list(itrue) else: @@ -572,7 +567,7 @@ def is_pair(arg): return is_sequence(arg) and len(arg) == 2 def is_string(arg): - return isinstance(arg, six.string_types) + return isinstance(arg, str) def locate(pattern, root=os.curdir): diff --git a/bcbio/variation/cortex.py b/bcbio/variation/cortex.py index 4ec546b7e..1dfb68227 100644 --- a/bcbio/variation/cortex.py +++ b/bcbio/variation/cortex.py @@ -7,7 +7,6 @@ http://cortexassembler.sourceforge.net/index_cortex_var.html """ -from __future__ import print_function import os import glob import subprocess diff --git a/bcbio/variation/effects.py b/bcbio/variation/effects.py index f20ed2831..446bde14e 100644 --- a/bcbio/variation/effects.py +++ b/bcbio/variation/effects.py @@ -10,7 +10,6 @@ import subprocess import string -import six import toolz as tz import yaml @@ -330,7 +329,7 @@ def get_db(data): snpeff_base_dir = None if snpeff_db: snpeff_base_dir = utils.get_in(data, ("reference", "snpeff")) - if not (isinstance(snpeff_base_dir, six.string_types) and os.path.isdir(snpeff_base_dir)): + if not (isinstance(snpeff_base_dir, str) and os.path.isdir(snpeff_base_dir)): snpeff_base_dir = utils.get_in(data, ("reference", "snpeff", snpeff_db)) if not snpeff_base_dir: # We need to mask '.' characters for CWL/WDL processing, check for them here @@ -339,9 +338,9 @@ def get_db(data): snpeff_db = snpeff_db.replace("_", ".") if isinstance(snpeff_base_dir, dict) and snpeff_base_dir.get("base"): snpeff_base_dir = snpeff_base_dir["base"] - if (snpeff_base_dir and isinstance(snpeff_base_dir, six.string_types) and os.path.isfile(snpeff_base_dir)): + if (snpeff_base_dir and isinstance(snpeff_base_dir, str) and os.path.isfile(snpeff_base_dir)): snpeff_base_dir = os.path.dirname(snpeff_base_dir) - if (snpeff_base_dir and isinstance(snpeff_base_dir, six.string_types) + if (snpeff_base_dir and isinstance(snpeff_base_dir, str) and snpeff_base_dir.endswith("%s%s" % (os.path.sep, snpeff_db))): snpeff_base_dir = os.path.dirname(snpeff_base_dir) if not snpeff_base_dir: diff --git a/bcbio/variation/freebayes.py b/bcbio/variation/freebayes.py index 0b8d3c525..5b44497f8 100644 --- a/bcbio/variation/freebayes.py +++ b/bcbio/variation/freebayes.py @@ -6,7 +6,6 @@ import os import sys -import six import toolz as tz from bcbio import utils @@ -51,7 +50,7 @@ def _freebayes_options_from_config(items, config, out_file, region=None): no_target_regions = False target = shared.subset_variant_regions(variant_regions, region, out_file, items) if target: - if isinstance(target, six.string_types) and os.path.isfile(target): + if isinstance(target, str) and os.path.isfile(target): if os.path.getsize(target) == 0: no_target_regions = True else: diff --git a/bcbio/variation/gatkfilter.py b/bcbio/variation/gatkfilter.py index d9e1dcf6d..456c8a61e 100644 --- a/bcbio/variation/gatkfilter.py +++ b/bcbio/variation/gatkfilter.py @@ -2,6 +2,7 @@ """ import os import gzip +import sys from distutils.version import LooseVersion from bcbio import broad, utils diff --git a/bcbio/variation/genotype.py b/bcbio/variation/genotype.py index 877372be5..f84e5e7cf 100644 --- a/bcbio/variation/genotype.py +++ b/bcbio/variation/genotype.py @@ -5,7 +5,6 @@ import copy import pprint -import six import toolz as tz from bcbio import bam, utils @@ -294,7 +293,7 @@ def handle_multiple_callers(data, key, default=None, require_bam=True): """Split samples that potentially require multiple variant calling approaches. """ callers = get_variantcaller(data, key, default, require_bam=require_bam) - if isinstance(callers, six.string_types): + if isinstance(callers, str): return [data] elif not callers: return [] @@ -309,7 +308,7 @@ def handle_multiple_callers(data, key, default=None, require_bam=True): # if splitting by variant caller, also split by jointcaller if key == "variantcaller": jcallers = get_variantcaller(data, "jointcaller", []) - if isinstance(jcallers, six.string_types): + if isinstance(jcallers, str): jcallers = [jcallers] if jcallers: base["config"]["algorithm"]["orig_jointcaller"] = jcallers diff --git a/bcbio/variation/octopus.py b/bcbio/variation/octopus.py index 1c602297a..a82aaa40e 100644 --- a/bcbio/variation/octopus.py +++ b/bcbio/variation/octopus.py @@ -6,7 +6,6 @@ import subprocess import pysam -import six from bcbio import utils from bcbio.distributed.transaction import file_transaction @@ -33,7 +32,7 @@ def _get_regions(region, out_file, items): vrs = bedutils.population_variant_regions(items) target = shared.subset_variant_regions(vrs, region, out_file, items=items, do_merge=True) if target: - if isinstance(target, six.string_types) and os.path.isfile(target): + if isinstance(target, str) and os.path.isfile(target): return "--regions-file %s" % target else: return "--regions %s" % bamprep.region_to_gatk(target) diff --git a/bcbio/variation/platypus.py b/bcbio/variation/platypus.py index ec157c53d..0d474a1a1 100644 --- a/bcbio/variation/platypus.py +++ b/bcbio/variation/platypus.py @@ -13,7 +13,6 @@ from bcbio.provenance import do from bcbio.variation import bamprep, bedutils, vcfutils -import six def run(align_bams, items, ref_file, assoc_files, region, out_file): @@ -66,7 +65,7 @@ def _subset_regions(region, base_file, items): """ variant_regions = bedutils.population_variant_regions(items, merged=True) target = pshared.subset_variant_regions(variant_regions, region, base_file, items) - if isinstance(target, six.string_types) and os.path.isfile(target): + if isinstance(target, str) and os.path.isfile(target): return target else: return bamprep.region_to_gatk(target) diff --git a/bcbio/variation/qsnp.py b/bcbio/variation/qsnp.py index 84cb45cf5..b58b576cb 100644 --- a/bcbio/variation/qsnp.py +++ b/bcbio/variation/qsnp.py @@ -3,7 +3,6 @@ https://qcmg.org/bioinformatics/tiki-index.php?page=qSNP#EXAMPLES """ -from __future__ import print_function import os import shutil from re import sub @@ -16,7 +15,6 @@ from bcbio.variation import annotation, bedutils from bcbio.variation.vcfutils import get_paired_bams, bgzip_and_index, combine_variant_files, PairedData -import six def is_installed(config): @@ -87,7 +85,7 @@ def _clean_regions(items, region): with utils.tmpfile() as tx_out_file: target = subset_variant_regions(variant_regions, region, tx_out_file, items) if target: - if isinstance(target, six.string_types) and os.path.isfile(target): + if isinstance(target, str) and os.path.isfile(target): target = _load_regions(target) else: target = [target] diff --git a/bcbio/variation/realign.py b/bcbio/variation/realign.py index 391d0baf3..300d0d01b 100644 --- a/bcbio/variation/realign.py +++ b/bcbio/variation/realign.py @@ -7,7 +7,6 @@ from bcbio.distributed.transaction import file_transaction from bcbio.pipeline.shared import subset_variant_regions -import six # ## GATK realignment @@ -79,14 +78,14 @@ def has_aligned_reads(align_bam, region=None): """ import pybedtools if region is not None: - if isinstance(region, six.string_types) and os.path.isfile(region): + if isinstance(region, str) and os.path.isfile(region): regions = [tuple(r) for r in pybedtools.BedTool(region)] else: regions = [region] with pysam.Samfile(align_bam, "rb") as cur_bam: if region is not None: for region in regions: - if isinstance(region, six.string_types): + if isinstance(region, str): for item in cur_bam.fetch(str(region)): return True else: diff --git a/bcbio/variation/samtools.py b/bcbio/variation/samtools.py index 2a72d5b0f..2677593fa 100644 --- a/bcbio/variation/samtools.py +++ b/bcbio/variation/samtools.py @@ -13,7 +13,6 @@ from bcbio.provenance import do, programs from bcbio.variation import annotation, bamprep, bedutils, vcfutils -import six def shared_variantcall(call_fn, name, align_bams, ref_file, items, @@ -31,7 +30,7 @@ def shared_variantcall(call_fn, name, align_bams, ref_file, items, name=name, region=region, fname=os.path.basename(align_bams[0]))) variant_regions = bedutils.population_variant_regions(items, merged=True) target_regions = subset_variant_regions(variant_regions, region, out_file, items=items) - if (variant_regions is not None and isinstance(target_regions, six.string_types) + if (variant_regions is not None and isinstance(target_regions, str) and not os.path.isfile(target_regions)): vcfutils.write_empty_vcf(out_file, config) else: diff --git a/bcbio/variation/scalpel.py b/bcbio/variation/scalpel.py index 6a9221678..3dd9dc6ab 100644 --- a/bcbio/variation/scalpel.py +++ b/bcbio/variation/scalpel.py @@ -3,7 +3,6 @@ https://sourceforge.net/p/scalpel/code/ci/master/tree/ """ -from __future__ import print_function import os import shutil @@ -20,14 +19,13 @@ from bcbio.variation import annotation, bedutils, vcfutils from bcbio.variation.vcfutils import get_paired_bams, is_paired_analysis, bgzip_and_index -import six def _scalpel_bed_file_opts(items, config, out_file, region, tmp_path): variant_regions = bedutils.population_variant_regions(items) target = shared.subset_variant_regions(variant_regions, region, out_file, items) if target: - if isinstance(target, six.string_types) and os.path.isfile(target): + if isinstance(target, str) and os.path.isfile(target): target_bed = target else: target_bed = os.path.join(tmp_path, "tmp.bed") diff --git a/bcbio/variation/sentieon.py b/bcbio/variation/sentieon.py index 373cf2615..e0985688c 100644 --- a/bcbio/variation/sentieon.py +++ b/bcbio/variation/sentieon.py @@ -19,7 +19,6 @@ from bcbio.provenance import do from bcbio.variation import bamprep, bedutils, joint, vcfutils -import six def license_export(data): @@ -34,7 +33,7 @@ def license_export(data): "environmental variables to export \n" "http://bcbio-nextgen.readthedocs.io/en/latest/contents/configuration.html#resources\n" "Configuration: %s" % pprint.pformat(data)) - if isinstance(server, six.string_types): + if isinstance(server, str): return "export SENTIEON_LICENSE=%s && " % server else: assert isinstance(server, dict), server @@ -50,7 +49,7 @@ def _get_interval(variant_regions, region, out_file, items): """ target = shared.subset_variant_regions(variant_regions, region, out_file, items) if target: - if isinstance(target, six.string_types) and os.path.isfile(target): + if isinstance(target, str) and os.path.isfile(target): return "--interval %s" % target else: return "--interval %s" % bamprep.region_to_gatk(target) diff --git a/bcbio/variation/strelka2.py b/bcbio/variation/strelka2.py index 801809077..1af40f139 100644 --- a/bcbio/variation/strelka2.py +++ b/bcbio/variation/strelka2.py @@ -2,7 +2,6 @@ """ import collections import os -import six import sys import numpy as np @@ -40,7 +39,7 @@ def get_region_bed(region, items, out_file, want_gzip=True): target = shared.subset_variant_regions(variant_regions, region, out_file, items) if not target: raise ValueError("Need BED input for strelka2 regions: %s %s" % (region, target)) - if not isinstance(target, six.string_types) or not os.path.isfile(target): + if not isinstance(target, str) or not os.path.isfile(target): chrom, start, end = target target = "%s-regions.bed" % utils.splitext_plus(out_file)[0] with file_transaction(items[0], target) as tx_out_file: diff --git a/bcbio/variation/validate.py b/bcbio/variation/validate.py index 682f95af4..1f4db2d9f 100644 --- a/bcbio/variation/validate.py +++ b/bcbio/variation/validate.py @@ -14,7 +14,6 @@ import time from pysam import VariantFile -import six import toolz as tz import yaml @@ -548,7 +547,7 @@ def _group_validate_samples(samples, vkey, batch_keys): if is_v: for batch_key in batch_keys: vname = tz.get_in(batch_key, data) - if vname and not (isinstance(vname, six.string_types) and vname.lower() in ["none", "false"]): + if vname and not (isinstance(vname, str) and vname.lower() in ["none", "false"]): break if isinstance(vname, (list, tuple)): vname = vname[0] diff --git a/bcbio/variation/vardict.py b/bcbio/variation/vardict.py index a5509be9d..41bdcb3b6 100644 --- a/bcbio/variation/vardict.py +++ b/bcbio/variation/vardict.py @@ -11,14 +11,12 @@ specify 'vardict-perl'. """ -from decimal import * +from decimal import Decimal from distutils.version import LooseVersion import os import sys -from six.moves import zip -import six import toolz as tz import pybedtools @@ -31,7 +29,7 @@ from bcbio.variation import bamprep, bedutils, vcfutils def _is_bed_file(target): - return target and isinstance(target, six.string_types) and os.path.isfile(target) + return target and isinstance(target, str) and os.path.isfile(target) def _vardict_options_from_config(items, config, out_file, target=None, is_rnaseq=False): var2vcf_opts = [] diff --git a/bcbio/variation/vcfanno.py b/bcbio/variation/vcfanno.py index 4d792bc2e..e2280c953 100644 --- a/bcbio/variation/vcfanno.py +++ b/bcbio/variation/vcfanno.py @@ -2,7 +2,6 @@ """ import os -import six import toolz as tz from bcbio import utils @@ -79,7 +78,7 @@ def _find_file(xs, target): f = _find_file(x, target) if f: return f - elif isinstance(xs, six.string_types) and os.path.exists(xs) and xs.endswith("/%s" % target): + elif isinstance(xs, str) and os.path.exists(xs) and xs.endswith("/%s" % target): return xs orig_file = line.split("=")[-1].replace('"', '').strip() full_file = _find_file(data, os.path.basename(orig_file)) diff --git a/bcbio/variation/vcfutils.py b/bcbio/variation/vcfutils.py index ac9a36c63..85fa8e415 100644 --- a/bcbio/variation/vcfutils.py +++ b/bcbio/variation/vcfutils.py @@ -10,9 +10,6 @@ import toolz as tz -import six -from six.moves import zip - from bcbio import broad, utils from bcbio.bam import ref from bcbio.distributed.multi import run_multicore, zeromq_aware_logging @@ -376,7 +373,7 @@ def _sort_by_region(fnames, regions, ref_file, config): if fname not in added_fnames: if isinstance(region, (list, tuple)): c, s, e = region - elif isinstance(region, six.string_types) and region.find(":") >= 0: + elif isinstance(region, str) and region.find(":") >= 0: c, coords = region.split(":") s, e = [int(x) for x in coords.split("-")] else: diff --git a/bcbio/workflow/template.py b/bcbio/workflow/template.py index ca4812b53..122c2c4b4 100644 --- a/bcbio/workflow/template.py +++ b/bcbio/workflow/template.py @@ -4,7 +4,6 @@ YAML template. Default templates are provided for common approaches which can be tweaked as needed. """ -from __future__ import print_function import collections import contextlib import copy @@ -15,9 +14,9 @@ import itertools import os import shutil -from six.moves import urllib +import urllib.error +import urllib.request -import six import toolz as tz import yaml import sys @@ -241,7 +240,7 @@ def _set_global_vars(metadata): fnames = collections.defaultdict(list) for sample in metadata.keys(): for k, v in metadata[sample].items(): - if isinstance(v, six.string_types) and os.path.isfile(v): + if isinstance(v, str) and os.path.isfile(v): v = _expand_file(v) metadata[sample][k] = v fnames[v].append(k) @@ -257,7 +256,7 @@ def _set_global_vars(metadata): # global_vars[name] = fname # for sample in metadata.keys(): # for k, v in metadata[sample].items(): - # if isinstance(v, six.string_types) and v in global_var_sub: + # if isinstance(v, str) and v in global_var_sub: # metadata[sample][k] = global_var_sub[v] return metadata, global_vars @@ -267,7 +266,7 @@ def _clean_string(v, sinfo): if isinstance(v, (list, tuple)): return [_clean_string(x, sinfo) for x in v] else: - assert isinstance(v, six.string_types), v + assert isinstance(v, str), v try: if hasattr(v, "decode"): return str(v.decode("ascii")) @@ -499,7 +498,7 @@ def _convert_to_relpaths(data, work_dir): data["files"] = [os.path.relpath(f, work_dir) for f in data["files"]] for topk in ["metadata", "algorithm"]: for k, v in data[topk].items(): - if isinstance(v, six.string_types) and os.path.isfile(v) and os.path.isabs(v): + if isinstance(v, str) and os.path.isfile(v) and os.path.isabs(v): data[topk][k] = os.path.relpath(v, work_dir) return data diff --git a/scripts/bcbio_fastq_umi_prep.py b/scripts/bcbio_fastq_umi_prep.py index 4e59799d2..ffcad1276 100755 --- a/scripts/bcbio_fastq_umi_prep.py +++ b/scripts/bcbio_fastq_umi_prep.py @@ -31,7 +31,6 @@ If you're using a different approach to generate the UMIs, please maintain the same R1/R2/R3 naming scheme. """ -from __future__ import print_function import argparse import math import os diff --git a/scripts/bcbio_nextgen.py b/scripts/bcbio_nextgen.py index 75bd49118..abe303c3a 100755 --- a/scripts/bcbio_nextgen.py +++ b/scripts/bcbio_nextgen.py @@ -26,7 +26,6 @@ -s scheduler for ipython parallelization (lsf, sge, slurm, torque, pbspro) -q queue to submit jobs for ipython parallelization """ -from __future__ import print_function import os import argparse import sys diff --git a/scripts/bcbio_nextgen_install.py b/scripts/bcbio_nextgen_install.py index 845f7bbfd..8647e8717 100755 --- a/scripts/bcbio_nextgen_install.py +++ b/scripts/bcbio_nextgen_install.py @@ -6,7 +6,6 @@ Requires: git, wget, bgzip2, Python 3 or 2.7 """ -from __future__ import print_function import argparse import collections import contextlib diff --git a/scripts/bcbio_setup_genome.py b/scripts/bcbio_setup_genome.py index 17a4b6365..f2f65ae34 100755 --- a/scripts/bcbio_setup_genome.py +++ b/scripts/bcbio_setup_genome.py @@ -2,7 +2,6 @@ """ Script to set up a custom genome for bcbio-nextgen """ -from __future__ import print_function from argparse import ArgumentParser import collections diff --git a/scripts/cwl/arvados_bcbio_runtimes.py b/scripts/cwl/arvados_bcbio_runtimes.py index d118eef67..1cf040596 100644 --- a/scripts/cwl/arvados_bcbio_runtimes.py +++ b/scripts/cwl/arvados_bcbio_runtimes.py @@ -11,6 +11,7 @@ import os import pprint import sys +from functools import reduce import arrow diff --git a/scripts/cwltool2wdl.py b/scripts/cwltool2wdl.py index b811951e6..771e7c73c 100755 --- a/scripts/cwltool2wdl.py +++ b/scripts/cwltool2wdl.py @@ -22,7 +22,6 @@ - Associate secondary files (like `bai`, `tbi`) with primary file ('bam`, 'vcf.gz`) https://github.com/broadinstitute/cromwell/issues/2269 """ -from __future__ import print_function import collections import os import subprocess @@ -233,7 +232,7 @@ def _variable_type_to_read_fn(vartype, records): return fn_map[vartype] def _arg_to_dict(x, requirements): - if isinstance(x, basestring): + if isinstance(x, str): return {"prefix": "", "position": None, "value": x} elif isinstance(x, dict) and "valueFrom" in x and x["valueFrom"].startswith("sentinel_runtime"): for r in requirements: