Repository 'varscan_mpileup'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/varscan_mpileup

Changeset 9:e3f170cc4f95 (2019-08-16)
Previous changeset 8:45288fb1f3f5 (2019-03-28) Next changeset 10:db94eadb92c1 (2021-12-04)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit fcf5ac14629c694f0f64773fab0428b1e78fe156"
modified:
macros.xml
varscan.py
b
diff -r 45288fb1f3f5 -r e3f170cc4f95 macros.xml
--- a/macros.xml Thu Mar 28 18:19:19 2019 -0400
+++ b/macros.xml Fri Aug 16 15:50:25 2019 -0400
b
@@ -56,9 +56,9 @@
         <param argument="--min-coverage" name="min_coverage" type="integer" value="8" min="1" max="200"
             label="Minimum coverage" help="@HELP@"/>
     </xml>
-    <xml name="min_reads2">
+    <xml name="min_reads2" token_help="Minimum number (default: 2) of variant-supporting reads at a position required to make a call">
         <param argument="--min-reads2" name="min_reads2" type="integer" value="2" min="1" max="200"
-            label="Minimum supporting reads" help="Minimum number of variant-supporting reads at a position required to make a call"/>
+            label="Minimum supporting reads" help="@HELP@"/>
     </xml>
     <xml name="min_avg_qual">
         <param argument="--min-avg-qual" name="min_avg_qual" type="integer" value="15" min="1" max="50"
@@ -68,12 +68,12 @@
     <xml name="min_var_freq" token_value="0.01">
         <param argument="--min-var-freq" name="min_var_freq" type="float" value="@VALUE@" min="0" max="1"
             label="Minimum variant allele frequency"
-            help="Minimum variant allele frequency required for calling a variant"/>
+            help="Minimum variant allele frequency (default: @VALUE@) required for calling a variant"/>
     </xml>
     <xml name="min_freq_for_hom">
         <param argument="--min-freq-for-hom" name="min_freq_for_hom" type="float" value="0.75" min="0" max="1"
             label="Minimum homozygous variant allele frequency"
-            help="Minimum variant allele frequency required for calling a homozygous genotype" />
+            help="Minimum variant allele frequency (default: 0.75) required for calling a homozygous genotype" />
     </xml>
     <xml name="p_value" token_value="0.01"
     token_label="p-value threshold for calling variants"
b
diff -r 45288fb1f3f5 -r e3f170cc4f95 varscan.py
--- a/varscan.py Thu Mar 28 18:19:19 2019 -0400
+++ b/varscan.py Fri Aug 16 15:50:25 2019 -0400
[
b'@@ -8,6 +8,7 @@\n import sys\n import tempfile\n import time\n+from collections import namedtuple\n from contextlib import ExitStack\n from functools import partial\n from threading import Thread\n@@ -15,6 +16,332 @@\n import pysam\n \n \n+AlleleStats = namedtuple(\n+    "AlleleStats",\n+    [\n+        \'reads_total\',\n+        \'reads_fw\',\n+        \'reads_rv\',\n+        \'avg_mapqual\',\n+        \'avg_basequal\',\n+        \'avg_dist_from_center\',\n+        \'avg_mismatch_fraction\',\n+        \'avg_mismatch_qualsum\',\n+        \'avg_clipped_len\',\n+        \'avg_dist_from_3prime\'\n+    ]\n+)\n+\n+\n+def _get_allele_specific_pileup_column_stats(pileups, ref_fetch,\n+                                             ignore_md, ignore_nm,\n+                                             mm_runs, detect_q2_runs):\n+    var_reads_plus = var_reads_minus = 0\n+    sum_mapping_qualities = 0\n+    sum_base_qualities = 0\n+    sum_dist_from_center = 0\n+    sum_dist_from_3prime = 0\n+    sum_clipped_length = 0\n+    sum_unclipped_length = 0\n+    sum_mismatch_fractions = 0\n+    sum_mismatch_qualities = 0\n+\n+    for p in pileups:\n+        if p.alignment.is_reverse:\n+            var_reads_minus += 1\n+        else:\n+            var_reads_plus += 1\n+        sum_mapping_qualities += p.alignment.mapping_quality\n+        sum_base_qualities += p.alignment.query_qualities[p.query_position]\n+        sum_clipped_length += p.alignment.query_alignment_length\n+        unclipped_length = p.alignment.query_length\n+        sum_unclipped_length += unclipped_length\n+        # The following calculations are all in 1-based coordinates\n+        # with respect to the physical 5\'-end of the read sequence.\n+        if p.alignment.is_reverse:\n+            read_base_pos = unclipped_length - p.query_position\n+            read_first_aln_pos = (\n+                unclipped_length - p.alignment.query_alignment_end + 1\n+            )\n+            read_last_aln_pos = (\n+                unclipped_length - p.alignment.query_alignment_start\n+            )\n+            qualities_3to5prime = p.alignment.query_alignment_qualities\n+        else:\n+            read_base_pos = p.query_position + 1\n+            read_first_aln_pos = p.alignment.query_alignment_start + 1\n+            read_last_aln_pos = p.alignment.query_alignment_end\n+            qualities_3to5prime = reversed(\n+                p.alignment.query_alignment_qualities\n+            )\n+\n+        read_last_effective_aln_pos = read_last_aln_pos\n+        if detect_q2_runs:\n+            # Note: the original bam-readcount algorithm always takes\n+            # terminal q2 runs into account when determining the\n+            # effective 3\'-ends of reads.\n+            # However, q2 runs have no special meaning since Illumina\n+            # pipeline version 1.8 so detecting them is optional\n+            # in this code.\n+            for qual in qualities_3to5prime:\n+                if qual != 2:\n+                    break\n+                read_last_effective_aln_pos -= 1\n+\n+        # Note: the original bam-readcount algorithm defines the\n+        # read center as:\n+        # read_center = p.alignment.query_alignment_length / 2\n+        # This is less accurate than the implementation here.\n+        read_center = (read_first_aln_pos + read_last_aln_pos) / 2\n+        sum_dist_from_center += 1 - abs(\n+            read_base_pos - read_center\n+        ) / (read_center - 1)\n+        # To calculate the distance of base positions from the 3\'-ends\n+        # of reads bam-readcount uses the formula:\n+        # sum_dist_from_3prime += abs(\n+        #     read_last_effective_aln_pos - read_base_pos\n+        # ) / (unclipped_length - 1)\n+        # , which treats base positions on both sides of the effective\n+        # 3\'-end equally. Since this seems hard to justify, we cap\n+        # the distance calculation at 0 for base positions past the\n+        # effective 3\'-end (which, in turn, should only be possible\n+        # with detection of q2 runs).\n+        if read_last_effective_aln_pos > read_b'..b'            / ref_stats.avg_clipped_len\n                         ) > max_relative_len_diff:\n                             record.filter.add(\'ReadLenDiff\')\n                 else:\n@@ -994,6 +1233,8 @@\n     instance_args = {\n         k: args.pop(k) for k in [\n             \'max_depth\',\n+            \'count_orphans\',\n+            \'detect_overlaps\',\n             \'min_mapqual\',\n             \'min_basequal\',\n             \'threads\',\n@@ -1046,7 +1287,7 @@\n     )\n     p.add_argument(\n         \'-s\', \'--split-output\',\n-        dest=\'split_output\', action=\'store_true\', default=False,\n+        dest=\'split_output\', action=\'store_true\',\n         help=\'indicate that separate output files for SNPs and indels \'\n              \'should be generated (original VarScan behavior). If specified, \'\n              \'%%T in the --ofile file name will be replaced with "snp" and \'\n@@ -1090,6 +1331,20 @@\n              \'default: 8000)\'\n     )\n     call_group.add_argument(\n+        \'--count-orphans\',\n+        dest=\'count_orphans\', action=\'store_true\',\n+        help=\'Use anomalous read pairs in variant calling \'\n+             \'(samtools mpileup -A option; default: ignore anomalous pairs)\'\n+    )\n+    call_group.add_argument(\n+        \'--no-detect-overlaps\',\n+        dest=\'detect_overlaps\', action=\'store_false\',\n+        help=\'Disable automatic read-pair overlap detection by samtools \'\n+             \'mpileup. Overlap detection tries to avoid counting duplicated \'\n+             \'bases twice during variant calling. \'\n+             \'(samtools mpileup -x option; default: use overlap detection)\'\n+    )\n+    call_group.add_argument(\n         \'--min-basequal\',\n         dest=\'min_basequal\', type=int,\n         default=13,\n@@ -1160,7 +1415,15 @@\n         dest=\'min_var_count2_lc\', type=int,\n         default=2,\n         help=\'Minimum number of variant-supporting reads when depth below \'\n-             \'--somatic-p-depth (default: 2)\'\n+             \'--min-var-count2-depth (default: 2)\'\n+    )\n+    filter_group.add_argument(\n+        \'--min-var-count2-depth\',\n+        dest=\'min_var_count2_depth\', type=int,\n+        default=10,\n+        help=\'Combined depth of ref- and variant-supporting reads required to \'\n+             \'apply the --min-var-count filter instead of --min-var-count-lc \'\n+             \'(default: 10)\'\n     )\n     filter_group.add_argument(\n         \'--min-var-freq2\',\n@@ -1169,18 +1432,6 @@\n         help=\'Minimum variant allele frequency (default: 0.05)\'\n     )\n     filter_group.add_argument(\n-        \'--max-somatic-p\',\n-        dest=\'max_somatic_p\', type=float,\n-        default=0.05,\n-        help=\'Maximum somatic p-value (default: 0.05)\'\n-    )\n-    filter_group.add_argument(\n-        \'--max-somatic-p-depth\',\n-        dest=\'max_somatic_p_depth\', type=int,\n-        default=10,\n-        help=\'Depth required to run --max-somatic-p filter (default: 10)\'\n-    )\n-    filter_group.add_argument(\n         \'--min-ref-readpos\',\n         dest=\'min_ref_readpos\', type=float,\n         default=0.1,\n@@ -1209,6 +1460,13 @@\n              \'3\\\'end of variant-supporting reads (default: 0.1)\'\n     )\n     filter_group.add_argument(\n+        \'--detect-q2-runs\',\n+        dest=\'detect_q2_runs\', action=\'store_true\',\n+        help=\'Look for 3\\\'-terminal q2 runs and take their lengths into \'\n+             \'account for determining the effective 3\\\'end of reads \'\n+             \'(default: off)\'\n+    )\n+    filter_group.add_argument(\n         \'--min-ref-len\',\n         dest=\'min_ref_len\', type=int,\n         default=90,\n@@ -1309,5 +1567,13 @@\n         default=50,\n         help=\'Maximum mismatch quality sum difference (var - ref; default: 50)\'\n     )\n+    filter_group.add_argument(\n+        \'--ignore-md\',\n+        dest=\'ignore_md\', action=\'store_true\',\n+        help=\'Do not rely on read MD tag, even if it is present on the mapped \'\n+             \'reads, and recalculate mismatch quality stats from ref \'\n+             \'alignments instead.\'\n+    )\n+\n     args = vars(p.parse_args())\n     varscan_call(**args)\n'