Repository 'varscan_somatic'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/varscan_somatic

Changeset 9:4e97191a1ff7 (2019-08-16)
Previous changeset 8:b79bb8b09822 (2019-03-28) Next changeset 10:a57606054bd7 (2020-01-18)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit fcf5ac14629c694f0f64773fab0428b1e78fe156"
modified:
macros.xml
varscan.py
varscan_somatic.xml
b
diff -r b79bb8b09822 -r 4e97191a1ff7 macros.xml
--- a/macros.xml Thu Mar 28 18:19:00 2019 -0400
+++ b/macros.xml Fri Aug 16 15:49:54 2019 -0400
b
@@ -56,9 +56,9 @@
         <param argument="--min-coverage" name="min_coverage" type="integer" value="8" min="1" max="200"
             label="Minimum coverage" help="@HELP@"/>
     </xml>
-    <xml name="min_reads2">
+    <xml name="min_reads2" token_help="Minimum number (default: 2) of variant-supporting reads at a position required to make a call">
         <param argument="--min-reads2" name="min_reads2" type="integer" value="2" min="1" max="200"
-            label="Minimum supporting reads" help="Minimum number of variant-supporting reads at a position required to make a call"/>
+            label="Minimum supporting reads" help="@HELP@"/>
     </xml>
     <xml name="min_avg_qual">
         <param argument="--min-avg-qual" name="min_avg_qual" type="integer" value="15" min="1" max="50"
@@ -68,12 +68,12 @@
     <xml name="min_var_freq" token_value="0.01">
         <param argument="--min-var-freq" name="min_var_freq" type="float" value="@VALUE@" min="0" max="1"
             label="Minimum variant allele frequency"
-            help="Minimum variant allele frequency required for calling a variant"/>
+            help="Minimum variant allele frequency (default: @VALUE@) required for calling a variant"/>
     </xml>
     <xml name="min_freq_for_hom">
         <param argument="--min-freq-for-hom" name="min_freq_for_hom" type="float" value="0.75" min="0" max="1"
             label="Minimum homozygous variant allele frequency"
-            help="Minimum variant allele frequency required for calling a homozygous genotype" />
+            help="Minimum variant allele frequency (default: 0.75) required for calling a homozygous genotype" />
     </xml>
     <xml name="p_value" token_value="0.01"
     token_label="p-value threshold for calling variants"
b
diff -r b79bb8b09822 -r 4e97191a1ff7 varscan.py
--- a/varscan.py Thu Mar 28 18:19:00 2019 -0400
+++ b/varscan.py Fri Aug 16 15:49:54 2019 -0400
[
b'@@ -8,6 +8,7 @@\n import sys\n import tempfile\n import time\n+from collections import namedtuple\n from contextlib import ExitStack\n from functools import partial\n from threading import Thread\n@@ -15,6 +16,332 @@\n import pysam\n \n \n+AlleleStats = namedtuple(\n+    "AlleleStats",\n+    [\n+        \'reads_total\',\n+        \'reads_fw\',\n+        \'reads_rv\',\n+        \'avg_mapqual\',\n+        \'avg_basequal\',\n+        \'avg_dist_from_center\',\n+        \'avg_mismatch_fraction\',\n+        \'avg_mismatch_qualsum\',\n+        \'avg_clipped_len\',\n+        \'avg_dist_from_3prime\'\n+    ]\n+)\n+\n+\n+def _get_allele_specific_pileup_column_stats(pileups, ref_fetch,\n+                                             ignore_md, ignore_nm,\n+                                             mm_runs, detect_q2_runs):\n+    var_reads_plus = var_reads_minus = 0\n+    sum_mapping_qualities = 0\n+    sum_base_qualities = 0\n+    sum_dist_from_center = 0\n+    sum_dist_from_3prime = 0\n+    sum_clipped_length = 0\n+    sum_unclipped_length = 0\n+    sum_mismatch_fractions = 0\n+    sum_mismatch_qualities = 0\n+\n+    for p in pileups:\n+        if p.alignment.is_reverse:\n+            var_reads_minus += 1\n+        else:\n+            var_reads_plus += 1\n+        sum_mapping_qualities += p.alignment.mapping_quality\n+        sum_base_qualities += p.alignment.query_qualities[p.query_position]\n+        sum_clipped_length += p.alignment.query_alignment_length\n+        unclipped_length = p.alignment.query_length\n+        sum_unclipped_length += unclipped_length\n+        # The following calculations are all in 1-based coordinates\n+        # with respect to the physical 5\'-end of the read sequence.\n+        if p.alignment.is_reverse:\n+            read_base_pos = unclipped_length - p.query_position\n+            read_first_aln_pos = (\n+                unclipped_length - p.alignment.query_alignment_end + 1\n+            )\n+            read_last_aln_pos = (\n+                unclipped_length - p.alignment.query_alignment_start\n+            )\n+            qualities_3to5prime = p.alignment.query_alignment_qualities\n+        else:\n+            read_base_pos = p.query_position + 1\n+            read_first_aln_pos = p.alignment.query_alignment_start + 1\n+            read_last_aln_pos = p.alignment.query_alignment_end\n+            qualities_3to5prime = reversed(\n+                p.alignment.query_alignment_qualities\n+            )\n+\n+        read_last_effective_aln_pos = read_last_aln_pos\n+        if detect_q2_runs:\n+            # Note: the original bam-readcount algorithm always takes\n+            # terminal q2 runs into account when determining the\n+            # effective 3\'-ends of reads.\n+            # However, q2 runs have no special meaning since Illumina\n+            # pipeline version 1.8 so detecting them is optional\n+            # in this code.\n+            for qual in qualities_3to5prime:\n+                if qual != 2:\n+                    break\n+                read_last_effective_aln_pos -= 1\n+\n+        # Note: the original bam-readcount algorithm defines the\n+        # read center as:\n+        # read_center = p.alignment.query_alignment_length / 2\n+        # This is less accurate than the implementation here.\n+        read_center = (read_first_aln_pos + read_last_aln_pos) / 2\n+        sum_dist_from_center += 1 - abs(\n+            read_base_pos - read_center\n+        ) / (read_center - 1)\n+        # To calculate the distance of base positions from the 3\'-ends\n+        # of reads bam-readcount uses the formula:\n+        # sum_dist_from_3prime += abs(\n+        #     read_last_effective_aln_pos - read_base_pos\n+        # ) / (unclipped_length - 1)\n+        # , which treats base positions on both sides of the effective\n+        # 3\'-end equally. Since this seems hard to justify, we cap\n+        # the distance calculation at 0 for base positions past the\n+        # effective 3\'-end (which, in turn, should only be possible\n+        # with detection of q2 runs).\n+        if read_last_effective_aln_pos > read_b'..b'            / ref_stats.avg_clipped_len\n                         ) > max_relative_len_diff:\n                             record.filter.add(\'ReadLenDiff\')\n                 else:\n@@ -994,6 +1233,8 @@\n     instance_args = {\n         k: args.pop(k) for k in [\n             \'max_depth\',\n+            \'count_orphans\',\n+            \'detect_overlaps\',\n             \'min_mapqual\',\n             \'min_basequal\',\n             \'threads\',\n@@ -1046,7 +1287,7 @@\n     )\n     p.add_argument(\n         \'-s\', \'--split-output\',\n-        dest=\'split_output\', action=\'store_true\', default=False,\n+        dest=\'split_output\', action=\'store_true\',\n         help=\'indicate that separate output files for SNPs and indels \'\n              \'should be generated (original VarScan behavior). If specified, \'\n              \'%%T in the --ofile file name will be replaced with "snp" and \'\n@@ -1090,6 +1331,20 @@\n              \'default: 8000)\'\n     )\n     call_group.add_argument(\n+        \'--count-orphans\',\n+        dest=\'count_orphans\', action=\'store_true\',\n+        help=\'Use anomalous read pairs in variant calling \'\n+             \'(samtools mpileup -A option; default: ignore anomalous pairs)\'\n+    )\n+    call_group.add_argument(\n+        \'--no-detect-overlaps\',\n+        dest=\'detect_overlaps\', action=\'store_false\',\n+        help=\'Disable automatic read-pair overlap detection by samtools \'\n+             \'mpileup. Overlap detection tries to avoid counting duplicated \'\n+             \'bases twice during variant calling. \'\n+             \'(samtools mpileup -x option; default: use overlap detection)\'\n+    )\n+    call_group.add_argument(\n         \'--min-basequal\',\n         dest=\'min_basequal\', type=int,\n         default=13,\n@@ -1160,7 +1415,15 @@\n         dest=\'min_var_count2_lc\', type=int,\n         default=2,\n         help=\'Minimum number of variant-supporting reads when depth below \'\n-             \'--somatic-p-depth (default: 2)\'\n+             \'--min-var-count2-depth (default: 2)\'\n+    )\n+    filter_group.add_argument(\n+        \'--min-var-count2-depth\',\n+        dest=\'min_var_count2_depth\', type=int,\n+        default=10,\n+        help=\'Combined depth of ref- and variant-supporting reads required to \'\n+             \'apply the --min-var-count filter instead of --min-var-count-lc \'\n+             \'(default: 10)\'\n     )\n     filter_group.add_argument(\n         \'--min-var-freq2\',\n@@ -1169,18 +1432,6 @@\n         help=\'Minimum variant allele frequency (default: 0.05)\'\n     )\n     filter_group.add_argument(\n-        \'--max-somatic-p\',\n-        dest=\'max_somatic_p\', type=float,\n-        default=0.05,\n-        help=\'Maximum somatic p-value (default: 0.05)\'\n-    )\n-    filter_group.add_argument(\n-        \'--max-somatic-p-depth\',\n-        dest=\'max_somatic_p_depth\', type=int,\n-        default=10,\n-        help=\'Depth required to run --max-somatic-p filter (default: 10)\'\n-    )\n-    filter_group.add_argument(\n         \'--min-ref-readpos\',\n         dest=\'min_ref_readpos\', type=float,\n         default=0.1,\n@@ -1209,6 +1460,13 @@\n              \'3\\\'end of variant-supporting reads (default: 0.1)\'\n     )\n     filter_group.add_argument(\n+        \'--detect-q2-runs\',\n+        dest=\'detect_q2_runs\', action=\'store_true\',\n+        help=\'Look for 3\\\'-terminal q2 runs and take their lengths into \'\n+             \'account for determining the effective 3\\\'end of reads \'\n+             \'(default: off)\'\n+    )\n+    filter_group.add_argument(\n         \'--min-ref-len\',\n         dest=\'min_ref_len\', type=int,\n         default=90,\n@@ -1309,5 +1567,13 @@\n         default=50,\n         help=\'Maximum mismatch quality sum difference (var - ref; default: 50)\'\n     )\n+    filter_group.add_argument(\n+        \'--ignore-md\',\n+        dest=\'ignore_md\', action=\'store_true\',\n+        help=\'Do not rely on read MD tag, even if it is present on the mapped \'\n+             \'reads, and recalculate mismatch quality stats from ref \'\n+             \'alignments instead.\'\n+    )\n+\n     args = vars(p.parse_args())\n     varscan_call(**args)\n'
b
diff -r b79bb8b09822 -r 4e97191a1ff7 varscan_somatic.xml
--- a/varscan_somatic.xml Thu Mar 28 18:19:00 2019 -0400
+++ b/varscan_somatic.xml Fri Aug 16 15:49:54 2019 -0400
b
b'@@ -1,4 +1,4 @@\n-<tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.4">\n+<tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.5">\n     <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description>\n     <macros>\n         <import>macros.xml</import>\n@@ -28,6 +28,16 @@\n                 text="##FILTER=&lt;ID=RefDist3,Description=" />\n             </assert_contents>\n         </macro>\n+        <macro name="filter_compat_options">\n+            <section name="experts_only" title="Compatibility options for experts" expanded="false">\n+                <param name="compat_opts" type="select" display="checkboxes" multiple="true" optional="true"\n+                label="Compatibility Options for Posterior Variant Filtering"\n+                help="">\n+                    <option value="--ignore-md" selected="true">Always determine mismatch quality statistics from recalculated read to reference alignments. Ignore read MD tags if present.</option>\n+                    <option value="--detect-q2-runs" selected="false">Treat runs of base qualities of value 2 at the 3\' end of reads as quality control indicator (Illumina 1.5 compatibility setting)</option>\n+                </param>\n+            </section>\n+        </macro>\n     </macros>\n     <expand macro="requirements">\n         <requirement type="package" version="3.6.7">python</requirement>\n@@ -63,8 +73,11 @@\n             --threads \\${GALAXY_SLOTS:-2}\n             #if str($call_params.settings) == "custom":\n                 ## samtools mpileup parameters\n-                --min-basequal ${call_params.min_avg_qual}\n-                --min-mapqual ${call_params.min_mapqual}\n+                --min-basequal ${call_params.read_selection.min_basequal}\n+                --min-mapqual ${call_params.read_selection.min_mapqual}\n+                ${call_params.read_selection.count_orphans}\n+                ${call_params.read_selection.detect_overlaps}\n+                --max-pileup-depth ${call_params.read_selection.max_pileup_depth}\n                 ## VarScan parameters\n                 --min-coverage ${call_params.min_coverage}\n                 --min-var-count ${call_params.min_reads2}\n@@ -75,56 +88,61 @@\n             #end if\n             #if str($filter_params.settings) == "no_filter":\n                 --no-filters\n-            #elif str($filter_params.settings) == "dream3_settings":\n-                --min-var-count2 3\n-                --min-var-count2-lc 1\n-                --min-var-freq2 0.05\n-                --max-somatic-p 0.05\n-                --max-somatic-p-depth 10\n-                --min-ref-readpos 0.2\n-                --min-var-readpos 0.15\n-                --min-ref-dist3 0.2\n-                --min-var-dist3 0.15\n-                --min-ref-len 90\n-                --min-var-len 90\n-                --max-len-diff 0.05\n-                --min-strandedness 0\n-                --min-strand-reads 5\n-                --min-ref-basequal 15\n-                --min-var-basequal 30\n-                --max-basequal-diff 50\n-                --min-ref-mapqual 20\n-                --min-var-mapqual 30\n-                --max-mapqual-diff 10\n-                --max-ref-mmqs 50\n-                --max-var-mmqs 100\n-                --min-mmqs-diff 0\n-                --max-mmqs-diff 50\n-            #elif str($filter_params.settings) == "custom":\n-                --min-var-count2 ${filter_params.min_var_count}\n-                --min-var-count2-lc ${filter_params.min_var_count_lc}\n-                --min-var-freq2 ${filter_params.min_var_freq2}\n-                --max-somatic-p ${filter_params.max_somatic_p}\n-                --max-somatic-p-depth ${filter_params.max_somatic_p_depth}\n-                --min-ref-readpos ${filter_params.min_ref_readpos}\n-                --min-var-readpos ${filter_params.min_var_readpos}\n-                --min-ref-dist3 ${filter_params.min_ref_dist3}\n-                --min-var-dist3 ${filter_params.min_var_dist3}\n-               '..b'ng the variant and the reference allele" />\n+                help="The maximum difference (default: 50) in the mismatch base quality sums (var - ref) allowed between reads supporting the variant and the reference allele" />\n+                <expand macro="filter_compat_options" />\n             </when>\n         </conditional>\n     </inputs>\n@@ -286,6 +319,7 @@\n     </outputs>\n     <tests>\n         <test expect_num_outputs="1">\n+            <!-- run with default settings and genome from history -->\n             <conditional name="reference">\n                 <param name="source" value="history" />\n                 <param name="genome" value="hg19_chrM.fa" />\n@@ -305,6 +339,7 @@\n             </output>\n         </test>\n         <test expect_num_outputs="1">\n+            <!-- run with default settings and cached genome -->\n             <conditional name="reference">\n                 <param name="source" value="cached" />\n                 <param name="genome" value="hg19mito" />\n@@ -324,6 +359,7 @@\n             </output>\n         </test>\n         <test expect_num_outputs="2">\n+            <!-- run with default settings and split output -->\n             <conditional name="reference">\n                 <param name="source" value="history" />\n                 <param name="genome" value="hg19_chrM.fa" />\n@@ -347,6 +383,7 @@\n             </output>\n         </test>\n         <test expect_num_outputs="1">\n+            <!-- run with custom params for variant calling -->\n             <conditional name="reference">\n                 <param name="source" value="history" />\n                 <param name="genome" value="hg19_chrM.fa" />\n@@ -355,10 +392,12 @@\n             <param name="tumor_bam" value="tumor_chrM.bam" />\n             <param name="split_output" value="false" />\n             <conditional name="call_params">\n+                <section name="read_selection">\n+                    <param name="min_basequal" value="5" />\n+                </section>\n                 <param name="settings" value="custom" />\n                 <param name="min_coverage" value="2" />\n                 <param name="min_reads2" value="1" />\n-                <param name="min_avg_qual" value="5" />\n                 <param name="min_var_freq" value="0.01" />\n                 <param name="min_freq_for_hom" value="0.66" />\n                 <param name="p_value" value="0.97" />\n@@ -395,6 +434,7 @@\n             </output>\n         </test>\n         <test expect_num_outputs="1">\n+            <!-- run with preconfigured dream3 post-filter settings -->\n             <conditional name="reference">\n                 <param name="source" value="history" />\n                 <param name="genome" value="hg19_chrM.fa" />\n@@ -414,6 +454,7 @@\n             </output>\n         </test>\n         <test expect_num_outputs="1">\n+            <!-- run without post-filters -->\n             <conditional name="reference">\n                 <param name="source" value="history" />\n                 <param name="genome" value="hg19_chrM.fa" />\n@@ -433,6 +474,7 @@\n             </output>\n         </test>\n         <test expect_num_outputs="1">\n+            <!-- run with custom post-filters -->\n             <conditional name="reference">\n                 <param name="source" value="history" />\n                 <param name="genome" value="hg19_chrM.fa" />\n@@ -490,13 +532,6 @@\n This tool wraps the functionality of the ``varscan somatic`` and the\n ``varscan fpfilter`` command line tools.\n \n-.. class:: infomark\n-\n-   The wrapper aims at providing the same functionality as the\n-   ``varscan fpfilter`` tool, but implements it using ``pysam`` internally.\n-   Note that, as one limitation compared to the original ``varscan`` tool,\n-   the current version does not apply filters to indels!\n-\n The tool is designed to detect genetic variants in a **pair of samples**\n representing normal and tumor tissue from the same individual. It classifies\n the variants, according to their most likely origin, as **somatic** (variant is\n'