| Previous changeset 1:3ee6f4d0ac80 (2017-05-16) |
|
Commit message:
"Update all the pico_galaxy tools on main Tool Shed" |
|
modified:
tools/count_roi_variants/README.rst tools/count_roi_variants/count_roi_variants.py tools/count_roi_variants/count_roi_variants.xml tools/count_roi_variants/tool_dependencies.xml |
| b |
| diff -r 3ee6f4d0ac80 -r 167765a633c0 tools/count_roi_variants/README.rst --- a/tools/count_roi_variants/README.rst Tue May 16 09:14:18 2017 -0400 +++ b/tools/count_roi_variants/README.rst Fri Apr 16 22:34:00 2021 +0000 |
| b |
| @@ -83,6 +83,7 @@ v0.0.5 - Fix samtools dependency version inconsistency, using v1.2 now. - Use ``<command detect_errors="aggressive">`` (internal change only). - Single quote command line arguments (internal change only). +v0.0.6 - Python 3 compatibility fix. ======= ====================================================================== |
| b |
| diff -r 3ee6f4d0ac80 -r 167765a633c0 tools/count_roi_variants/count_roi_variants.py --- a/tools/count_roi_variants/count_roi_variants.py Tue May 16 09:14:18 2017 -0400 +++ b/tools/count_roi_variants/count_roi_variants.py Fri Apr 16 22:34:00 2021 +0000 |
| [ |
| b'@@ -20,7 +20,7 @@\n \n if "-v" in sys.argv or "--version" in sys.argv:\n # Galaxy seems to invert the order of the two lines\n- print("BAM coverage statistics v0.0.4 (using samtools)")\n+ print("BAM coverage statistics v0.0.6 (using samtools)")\n cmd = "samtools 2>&1 | grep -i ^Version"\n sys.exit(os.system(cmd))\n \n@@ -37,7 +37,8 @@\n AGCCCATGAGATGGGAAGCAATGGGCTACA\t14\t87.50\n AGCCCATGAGATGGGAAGCAATGGGCTACG\t1\t6.25\n AGCGCATGAGATGGGAAGCAATGGGCTACG\t1\t6.25\n-"""\n+""" # noqa: E501\n+\n if len(sys.argv) == 5:\n bam_filename, bai_filename, tabular_filename, region = sys.argv[1:]\n else:\n@@ -84,7 +85,7 @@\n \n \n def decode_cigar(cigar):\n- """Returns a list of 2-tuples, integer count and operator char."""\n+ """Return a list of 2-tuples, integer count and operator char."""\n count = ""\n answer = []\n for letter in cigar:\n@@ -98,30 +99,38 @@\n return answer\n \n \n-assert decode_cigar("14S15M1P1D3P54M1D34M5S") == [(14, \'S\'), (15, \'M\'), (1, \'P\'),\n- (1, \'D\'), (3, \'P\'), (54, \'M\'),\n- (1, \'D\'), (34, \'M\'), (5, \'S\')]\n+assert decode_cigar("14S15M1P1D3P54M1D34M5S") == [\n+ (14, "S"),\n+ (15, "M"),\n+ (1, "P"),\n+ (1, "D"),\n+ (3, "P"),\n+ (54, "M"),\n+ (1, "D"),\n+ (34, "M"),\n+ (5, "S"),\n+]\n \n \n def align_len(cigar_ops):\n- """Sums the CIGAR M/=/X/D/N operators."""\n+ """Sum the CIGAR M/=/X/D/N operators."""\n return sum(count for count, op in cigar_ops if op in "M=XDN")\n \n \n def expand_cigar(seq, cigar_ops):\n- """Yields (ref_offset, seq_base) pairs."""\n+ """Yield (ref_offset, seq_base) pairs."""\n ref_offset = 0\n seq_offset = 0\n for count, op in cigar_ops:\n if op in "MX=":\n- for (i, base) in enumerate(seq[seq_offset:seq_offset + count]):\n+ for (i, base) in enumerate(seq[seq_offset : seq_offset + count]):\n yield ref_offset + i, base\n ref_offset += count\n seq_offset += count\n elif op == "I":\n # Give them all an in-between reference position\n # (Python lets us mix integers and floats, wouldn\'t work in C)\n- for (i, base) in enumerate(seq[seq_offset:seq_offset + count]):\n+ for (i, base) in enumerate(seq[seq_offset : seq_offset + count]):\n yield ref_offset - 0.5, base\n # Does not change ref_offset\n seq_offset += count\n@@ -142,31 +151,105 @@\n raise NotImplementedError("Unexpected CIGAR operator %s" % op)\n \n \n-assert list(expand_cigar("ACGT", decode_cigar("4M"))) == [(0, "A"), (1, "C"), (2, "G"), (3, "T")]\n-assert list(expand_cigar("ACGT", decode_cigar("2=1X1="))) == [(0, "A"), (1, "C"), (2, "G"), (3, "T")]\n-assert list(expand_cigar("ACGT", decode_cigar("2M1D2M"))) == [(0, "A"), (1, "C"), (3, "G"), (4, "T")]\n-assert list(expand_cigar("ACtGT", decode_cigar("2M1I2M"))) == [(0, "A"), (1, "C"), (1.5, "t"), (2, "G"), (3, "T")]\n-assert list(expand_cigar("tACGT", decode_cigar("1I4M"))) == [(-0.5, \'t\'), (0, \'A\'), (1, \'C\'), (2, \'G\'), (3, \'T\')]\n-assert list(expand_cigar("ACGTt", decode_cigar("4M1I"))) == [(0, \'A\'), (1, \'C\'), (2, \'G\'), (3, \'T\'), (3.5, \'t\')]\n-assert list(expand_cigar("AAAAGGGGTTTT", decode_cigar("12M"))) == [(0, \'A\'), (1, \'A\'), (2, \'A\'),\n- (3, \'A\'), (4, \'G\'), (5, \'G\'),\n- (6, \'G\'), (7, \'G\'), (8, \'T\'),\n- (9, \'T\'), (10, \'T\'), (11, \'T\')]\n-assert list(expand_cigar("AAAAcGGGGTTTT", decode_cigar("4M1I8M"))) == [(0, \'A\'), (1, \'A\'), (2, \'A\'),\n- (3, \'A\'), (3.5, \'c\'), (4, \'G\'),\n- (5, \'G\'), (6, \'G\'), (7, \'G\'),\n- (8, \'T\'), (9, '..b'0, "A"),\n+ (1, "C"),\n+ (2, "G"),\n+ (3, "T"),\n+]\n+assert list(expand_cigar("ACGT", decode_cigar("2M1D2M"))) == [\n+ (0, "A"),\n+ (1, "C"),\n+ (3, "G"),\n+ (4, "T"),\n+]\n+assert list(expand_cigar("ACtGT", decode_cigar("2M1I2M"))) == [\n+ (0, "A"),\n+ (1, "C"),\n+ (1.5, "t"),\n+ (2, "G"),\n+ (3, "T"),\n+]\n+assert list(expand_cigar("tACGT", decode_cigar("1I4M"))) == [\n+ (-0.5, "t"),\n+ (0, "A"),\n+ (1, "C"),\n+ (2, "G"),\n+ (3, "T"),\n+]\n+assert list(expand_cigar("ACGTt", decode_cigar("4M1I"))) == [\n+ (0, "A"),\n+ (1, "C"),\n+ (2, "G"),\n+ (3, "T"),\n+ (3.5, "t"),\n+]\n+assert list(expand_cigar("AAAAGGGGTTTT", decode_cigar("12M"))) == [\n+ (0, "A"),\n+ (1, "A"),\n+ (2, "A"),\n+ (3, "A"),\n+ (4, "G"),\n+ (5, "G"),\n+ (6, "G"),\n+ (7, "G"),\n+ (8, "T"),\n+ (9, "T"),\n+ (10, "T"),\n+ (11, "T"),\n+]\n+assert list(expand_cigar("AAAAcGGGGTTTT", decode_cigar("4M1I8M"))) == [\n+ (0, "A"),\n+ (1, "A"),\n+ (2, "A"),\n+ (3, "A"),\n+ (3.5, "c"),\n+ (4, "G"),\n+ (5, "G"),\n+ (6, "G"),\n+ (7, "G"),\n+ (8, "T"),\n+ (9, "T"),\n+ (10, "T"),\n+ (11, "T"),\n+]\n+assert list(expand_cigar("AAAAGGGGcTTTT", decode_cigar("8M1I4M"))) == [\n+ (0, "A"),\n+ (1, "A"),\n+ (2, "A"),\n+ (3, "A"),\n+ (4, "G"),\n+ (5, "G"),\n+ (6, "G"),\n+ (7, "G"),\n+ (7.5, "c"),\n+ (8, "T"),\n+ (9, "T"),\n+ (10, "T"),\n+ (11, "T"),\n+]\n+assert list(expand_cigar("AAAAcGGGGcTTTT", decode_cigar("4M1I4M1I4M"))) == [\n+ (0, "A"),\n+ (1, "A"),\n+ (2, "A"),\n+ (3, "A"),\n+ (3.5, "c"),\n+ (4, "G"),\n+ (5, "G"),\n+ (6, "G"),\n+ (7, "G"),\n+ (7.5, "c"),\n+ (8, "T"),\n+ (9, "T"),\n+ (10, "T"),\n+ (11, "T"),\n+]\n \n \n def get_roi(seq, cigar_ops, start, end):\n@@ -184,7 +267,9 @@\n return seq[start:end]\n # Would use "start <= i < end" if they were all integers, but\n # want to exclude e.g. 3.5 and 7.5 when given start 4 and end 8.\n- return "".join(base for i, base in expand_cigar(seq, cigar_ops) if start <= i <= end - 1)\n+ return "".join(\n+ base for i, base in expand_cigar(seq, cigar_ops) if start <= i <= end - 1\n+ )\n \n \n assert "GGGG" == get_roi("AAAAGGGGTTTT", decode_cigar("12M"), 4, 8)\n@@ -203,15 +288,31 @@\n # Could recreate the region string (with no commas in start/end)?\n # region = "%s:%i-%i" % (ref, start, end)\n \n- tally = dict()\n+ tally = {}\n \n # Call samtools view, don\'t need header so no -h added.\n # Only want mapped reads, thus flag filter -F 4.\n- child = subprocess.Popen(["samtools", "view", "-F", "4", bam_file, region],\n- stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+ child = subprocess.Popen(\n+ ["samtools", "view", "-F", "4", bam_file, region],\n+ universal_newlines=True,\n+ stdout=subprocess.PIPE,\n+ stderr=subprocess.PIPE,\n+ )\n for line in child.stdout:\n assert line[0] != "@", "Got unexpected SAM header line: %s" % line\n- qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, rest = line.split("\\t", 10)\n+ (\n+ qname,\n+ flag,\n+ rname,\n+ pos,\n+ mapq,\n+ cigar,\n+ rnext,\n+ pnext,\n+ tlen,\n+ seq,\n+ rest,\n+ ) = line.split("\\t", 10)\n pos = int(pos) # one-based\n if start < pos:\n # Does not span the ROI\n@@ -235,9 +336,11 @@\n if return_code:\n sys.exit("Got return code %i from samtools view" % return_code)\n elif "specifies an unknown reference name. Continue anyway." in stderr:\n- sys.exit(stderr.strip() +\n- "\\n\\nERROR: samtools did not recognise the region requested, "\n- "can\'t count any variants.")\n+ sys.exit(\n+ stderr.strip()\n+ + "\\n\\nERROR: samtools did not recognise the region requested, "\n+ "can\'t count any variants."\n+ )\n \n return tally\n \n' |
| b |
| diff -r 3ee6f4d0ac80 -r 167765a633c0 tools/count_roi_variants/count_roi_variants.xml --- a/tools/count_roi_variants/count_roi_variants.xml Tue May 16 09:14:18 2017 -0400 +++ b/tools/count_roi_variants/count_roi_variants.xml Fri Apr 16 22:34:00 2021 +0000 |
| b |
| @@ -1,4 +1,4 @@ -<tool id="count_roi_variants" name="Count sequence variants in region of interest" version="0.0.5"> +<tool id="count_roi_variants" name="Count sequence variants in region of interest" version="0.0.6"> <description>using samtools view</description> <requirements> <requirement type="package" version="1.2">samtools</requirement> @@ -96,7 +96,7 @@ Heng Li et al (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics 25(16), 2078-9. -http://dx.doi.org/10.1093/bioinformatics/btp352 +https://doi.org/10.1093/bioinformatics/btp352 Peter J.A. Cock (2016), Count sequence variants in region of interest in BAM file. http://toolshed.g2.bx.psu.edu/view/peterjc/count_roi_variants |
| b |
| diff -r 3ee6f4d0ac80 -r 167765a633c0 tools/count_roi_variants/tool_dependencies.xml --- a/tools/count_roi_variants/tool_dependencies.xml Tue May 16 09:14:18 2017 -0400 +++ b/tools/count_roi_variants/tool_dependencies.xml Fri Apr 16 22:34:00 2021 +0000 |
| b |
| @@ -1,6 +1,6 @@ -<?xml version="1.0"?> +<?xml version="1.0" ?> <tool_dependency> <package name="samtools" version="1.2"> - <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" changeset_revision="f6ae3ba3f3c1"/> </package> -</tool_dependency> +</tool_dependency> \ No newline at end of file |