diff varscan_somatic.xml @ 9:4e97191a1ff7 draft

"planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit fcf5ac14629c694f0f64773fab0428b1e78fe156"
author iuc
date Fri, 16 Aug 2019 15:49:54 -0400
parents b79bb8b09822
children a57606054bd7
line wrap: on
line diff
--- a/varscan_somatic.xml	Thu Mar 28 18:19:00 2019 -0400
+++ b/varscan_somatic.xml	Fri Aug 16 15:49:54 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.4">
+<tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.5">
     <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description>
     <macros>
         <import>macros.xml</import>
@@ -28,6 +28,16 @@
                 text="##FILTER=&lt;ID=RefDist3,Description=" />
             </assert_contents>
         </macro>
+        <macro name="filter_compat_options">
+            <section name="experts_only" title="Compatibility options for experts" expanded="false">
+                <param name="compat_opts" type="select" display="checkboxes" multiple="true" optional="true"
+                label="Compatibility Options for Posterior Variant Filtering"
+                help="">
+                    <option value="--ignore-md" selected="true">Always determine mismatch quality statistics from recalculated read to reference alignments. Ignore read MD tags if present.</option>
+                    <option value="--detect-q2-runs" selected="false">Treat runs of base qualities of value 2 at the 3' end of reads as quality control indicator (Illumina 1.5 compatibility setting)</option>
+                </param>
+            </section>
+        </macro>
     </macros>
     <expand macro="requirements">
         <requirement type="package" version="3.6.7">python</requirement>
@@ -63,8 +73,11 @@
             --threads \${GALAXY_SLOTS:-2}
             #if str($call_params.settings) == "custom":
                 ## samtools mpileup parameters
-                --min-basequal ${call_params.min_avg_qual}
-                --min-mapqual ${call_params.min_mapqual}
+                --min-basequal ${call_params.read_selection.min_basequal}
+                --min-mapqual ${call_params.read_selection.min_mapqual}
+                ${call_params.read_selection.count_orphans}
+                ${call_params.read_selection.detect_overlaps}
+                --max-pileup-depth ${call_params.read_selection.max_pileup_depth}
                 ## VarScan parameters
                 --min-coverage ${call_params.min_coverage}
                 --min-var-count ${call_params.min_reads2}
@@ -75,56 +88,61 @@
             #end if
             #if str($filter_params.settings) == "no_filter":
                 --no-filters
-            #elif str($filter_params.settings) == "dream3_settings":
-                --min-var-count2 3
-                --min-var-count2-lc 1
-                --min-var-freq2 0.05
-                --max-somatic-p 0.05
-                --max-somatic-p-depth 10
-                --min-ref-readpos 0.2
-                --min-var-readpos 0.15
-                --min-ref-dist3 0.2
-                --min-var-dist3 0.15
-                --min-ref-len 90
-                --min-var-len 90
-                --max-len-diff 0.05
-                --min-strandedness 0
-                --min-strand-reads 5
-                --min-ref-basequal 15
-                --min-var-basequal 30
-                --max-basequal-diff 50
-                --min-ref-mapqual 20
-                --min-var-mapqual 30
-                --max-mapqual-diff 10
-                --max-ref-mmqs 50
-                --max-var-mmqs 100
-                --min-mmqs-diff 0
-                --max-mmqs-diff 50
-            #elif str($filter_params.settings) == "custom":
-                --min-var-count2 ${filter_params.min_var_count}
-                --min-var-count2-lc ${filter_params.min_var_count_lc}
-                --min-var-freq2 ${filter_params.min_var_freq2}
-                --max-somatic-p ${filter_params.max_somatic_p}
-                --max-somatic-p-depth ${filter_params.max_somatic_p_depth}
-                --min-ref-readpos ${filter_params.min_ref_readpos}
-                --min-var-readpos ${filter_params.min_var_readpos}
-                --min-ref-dist3 ${filter_params.min_ref_dist3}
-                --min-var-dist3 ${filter_params.min_var_dist3}
-                --min-ref-len ${filter_params.min_ref_len}
-                --min-var-len ${filter_params.min_var_len}
-                --max-len-diff ${filter_params.max_len_diff}
-                --min-strandedness ${filter_params.min_strandedness}
-                --min-strand-reads ${filter_params.min_strand_reads}
-                --min-ref-basequal ${filter_params.min_ref_basequal}
-                --min-var-basequal ${filter_params.min_var_basequal}
-                --max-basequal-diff ${filter_params.max_basequal_diff}
-                --min-ref-mapqual ${filter_params.min_ref_mapqual}
-                --min-var-mapqual ${filter_params.min_var_mapqual}
-                --max-mapqual-diff ${filter_params.max_mapqual_diff}
-                --max-ref-mmqs ${filter_params.max_ref_mmqs}
-                --max-var-mmqs ${filter_params.max_var_mmqs}
-                --min-mmqs-diff ${filter_params.min_mmqs_diff}
-                --max-mmqs-diff ${filter_params.max_mmqs_diff}
+            #else:
+                #if str($filter_params.settings) == "dream3_settings":
+                    --min-var-count2 3
+                    --min-var-count2-lc 1
+                    --min-var-count2-depth 10
+                    --min-var-freq2 0.05
+                    --min-ref-readpos 0.2
+                    --min-var-readpos 0.15
+                    --min-ref-dist3 0.2
+                    --min-var-dist3 0.15
+                    --min-ref-len 90
+                    --min-var-len 90
+                    --max-len-diff 0.05
+                    --min-strandedness 0
+                    --min-strand-reads 5
+                    --min-ref-basequal 15
+                    --min-var-basequal 30
+                    --max-basequal-diff 50
+                    --min-ref-mapqual 20
+                    --min-var-mapqual 30
+                    --max-mapqual-diff 10
+                    --max-ref-mmqs 50
+                    --max-var-mmqs 100
+                    --min-mmqs-diff 0
+                    --max-mmqs-diff 50
+                #elif str($filter_params.settings) == "custom":
+                    --min-var-count2 ${filter_params.min_var_count}
+                    --min-var-count2-lc ${filter_params.min_var_count_lc}
+                    --min-var-count2-depth ${filter_params.min_var_count_depth}
+                    --min-var-freq2 ${filter_params.min_var_freq2}
+                    --min-ref-readpos ${filter_params.min_ref_readpos}
+                    --min-var-readpos ${filter_params.min_var_readpos}
+                    --min-ref-dist3 ${filter_params.min_ref_dist3}
+                    --min-var-dist3 ${filter_params.min_var_dist3}
+                    --min-ref-len ${filter_params.min_ref_len}
+                    --min-var-len ${filter_params.min_var_len}
+                    --max-len-diff ${filter_params.max_len_diff}
+                    --min-strandedness ${filter_params.min_strandedness}
+                    --min-strand-reads ${filter_params.min_strand_reads}
+                    --min-ref-basequal ${filter_params.min_ref_basequal}
+                    --min-var-basequal ${filter_params.min_var_basequal}
+                    --max-basequal-diff ${filter_params.max_basequal_diff}
+                    --min-ref-mapqual ${filter_params.min_ref_mapqual}
+                    --min-var-mapqual ${filter_params.min_var_mapqual}
+                    --max-mapqual-diff ${filter_params.max_mapqual_diff}
+                    --max-ref-mmqs ${filter_params.max_ref_mmqs}
+                    --max-var-mmqs ${filter_params.max_var_mmqs}
+                    --min-mmqs-diff ${filter_params.min_mmqs_diff}
+                    --max-mmqs-diff ${filter_params.max_mmqs_diff}
+                #end if
+                #if $filter_params.experts_only.compat_opts:
+                    #for $opt in str($filter_params.experts_only.compat_opts).split(','):
+                        $opt
+                    #end for
+                #end if
             #end if
             --verbose
             '$ref_genome'
@@ -166,22 +184,35 @@
                 <option value="custom">Customize settings</option>
             </param>
             <when value="custom">
-                <param argument="samtools mpileup -Q" name="min_avg_qual" type="integer" value="13" min="0" max="50"
-                label="Minimum base quality"
-                help="The minimum base quality at the variant position required to use a read for calling" />
-                <param argument="samtools mpileup -q" name="min_mapqual" type="integer" value="0" min="0" max="60"
-                label="Minimum mapping quality"
-                help="The minimum mapping quality required for a read to be considered in variant calling" />
+                <section name="read_selection" title="Read selection" expanded="true"
+                help="The settings in this section control which mapped reads will be used for variant calling.">
+                    <param argument="samtools mpileup -Q" name="min_basequal" type="integer" value="13" min="1" max="50"
+                    label="Minimum base quality"
+                    help="The minimum base quality (default: 13) at a given position required to use a read for calling variants at that site" />
+                    <param argument="samtools mpileup -q" name="min_mapqual" type="integer" value="0" min="0" max="60"
+                    label="Minimum mapping quality"
+                    help="The minimum mapping quality (default: 0) required for a read to be considered in variant calling" />
+                    <param argument="samtools mpileup -A" name="count_orphans" type="boolean" truevalue="--count-orphans" falsevalue="" checked="false"
+                    label="Use reads from anomalously mapped pairs"
+                    help="Applies to paired-end reads only. If set to true, reads from pairs that are flagged as non-proper pairs (SAM/BAM FLAG field 2) will be used in variant calling. The default is to ignore such reads." />
+                    <param argument="samtools mpileup -x" name="detect_overlaps" type="boolean" truevalue="" falsevalue="--no-detect-overlaps" checked="true"
+                    label="Try to correct for read-pair overlaps"
+                    help="Applies to paired-end reads only. If the reads of a pair overlap on the reference, then with this option (on by default), the bases for which both reads provide evidence will be counted only once in variant calling (of the two sequenced bases in the reads, the base with the better base quality will be used)." />
+                    <param argument="samtools mpileup -d" name="max_pileup_depth" type="integer" value="8000" min="4000"
+                    label="Maximum number of reads per site"
+                    help="Restrict the number of reads used for variant calling at a site to this maximum (default: 8000) for each sample. Helps protect against excessive memory usage (and slow tool runs) at sites of extraordinary high coverage." />
+                </section>
                 <expand macro="min_coverage"
-                help="Minimum site coverage required in the normal and in the tumor sample to call a variant. This threshold gets applied after eliminating reads with low base and mapping qualitiy as defined above." />
-                <expand macro="min_reads2" />
+                help="Minimum site coverage (default: 8) required in the normal and in the tumor sample to call a variant. This threshold gets applied after eliminating reads based on the read selection criteria above." />
+                <expand macro="min_reads2"
+                help="Minimum number (default: 2) of variant-supporting reads (after read selection) at a position required to make a call"/>
                 <expand macro="min_var_freq" value="0.1" />
                 <expand macro="min_freq_for_hom" />
                 <expand macro="p_value" value="0.99"
-                help="The p-value threshold used to determine if a variant should be called for either sample" />
+                help="The p-value threshold (default: 0.99) used to determine if a variant should be called for either sample" />
                 <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1"
                 label="P-value threshold for calling somatic variants and LOH events"
-                help="The p-value threshold used to determine if read count differences between the normal and the tumor sample justify classification of a variant as somatic or as an LOH event" />
+                help="The p-value threshold (default: 0.05) used to determine if read count differences between the normal and the tumor sample justify classification of a variant as somatic or as an LOH event" />
             </when>
             <when value="varscan_defaults" />
         </conditional>
@@ -192,82 +223,84 @@
                 <option value="no_filter">Do not perform posterior filtering</option>
                 <option value="custom">Customize settings</option>
             </param>
-            <when value="varscan_defaults" />
-            <when value="dream3_settings" />
             <when value="no_filter" />
+            <when value="varscan_defaults">
+                <expand macro="filter_compat_options" />
+            </when>
+            <when value="dream3_settings">
+                <expand macro="filter_compat_options" />
+            </when>
             <when value="custom">
                 <param argument="--min-var-count" name="min_var_count" type="integer" value="4" min="1" max="200"
                 label="Minimum number of variant-supporting reads"
-                help="" />
+                help="(default: 4)" />
                 <param argument="--min-var-count-lc" name="min_var_count_lc" type="integer" value="2" min="1" max="200"
                 label="Low coverage minimum number of variant-supporting reads"
-                help="Will be applied instead of the --min-var-count limit for sites with poor overall (less than --max-somatic-p-depth) coverage" />
+                help="This setting (default: 2) will be applied instead of the --min-var-count limit for sites with poor overall (see threshold below) coverage." />
+                <param argument="--max-somatic-p-depth" name="min_var_count_depth" type="integer" value="10" min="2" max="200"
+                label="Minimum variant allele count threshold"
+                help="Combined depth (default: 10) of ref- and variant-supporting reads required at variant site to apply the (stricter) --min-var-count filter instead of --min-var-count-lc" />
                 <param argument="--min-var-freq" name="min_var_freq2" type="float" value="0.05" min="0" max="1"
                 label="Minimum variant allele frequency"
-                help="" />
-                <param argument="--max-somatic-p" name="max_somatic_p" type="float" value="0.05" min="0" max="1"
-                label="Maximum somatic p-value allowed for a somatic call"
-                help="" />
-                <param argument="--max-somatic-p-depth" name="max_somatic_p_depth" type="integer" value="10" min="2" max="200"
-                label="Depth required at variant site to run --max-somatic-p filter"
-                help="" />
+                help="(default: 0.05)" />
                 <param argument="--min-ref-readpos" name="min_ref_readpos" type="float" value="0.1" min="0" max="1"
                 label="Minimum relative variant position in ref-supporting reads"
-                help="The minimum average relative distance from the ends of ref-supporting reads required for variant sites" />
+                help="The minimum average relative distance from either end of ref-supporting reads (default: 0.1) required for variant sites" />
                 <param argument="--min-var-readpos" name="min_var_readpos" type="float" value="0.1" min="0" max="1"
                 label="Minimum relative variant position in variant-supporting reads"
-                help="The minimum average relative distance from the ends of variant-supporting reads required for variant sites" />
+                help="The minimum average relative distance from either end of variant-supporting reads (default: 0.1) required for variant sites" />
                 <param argument="--min-ref-dist3" name="min_ref_dist3" type="float" value="0.1" min="0" max="1"
                 label="Minimum distance of variant site from 3'-end of ref-supporting reads"
-                help="The minimum average relative distance from the effective 3'end of ref-supporting reads required for variant sites" />
+                help="The minimum average relative distance from the effective 3'end of ref-supporting reads (default: 0.1) required for variant sites. The effective 3'end is defined by the end of the alignment of the read to the reference (or, if the Illumina 1.5 compatibility setting is used, by the first base in 3'->5' direction with a base quality > 2)." />
                 <param argument="--min-var-dist3" name="min_var_dist3" type="float" value="0.1" min="0" max="1"
                 label="Minimum distance of variant site from 3'-end of variant-supporting reads"
-                help="The minimum average relative distance from the effective 3'end of variant-supporting reads required for variant sites" />
+                help="The minimum average relative distance from the effective 3'end of variant-supporting reads (default: 0.1) required for variant sites. The  effective 3'end is defined as above." />
                 <param argument="--min-ref-avgrl" name="min_ref_len" type="integer" value="90" min="0" max="200"
                 label="Minimum length of ref-supporting reads"
-                help="The minimum average trimmed length required for reads supporting the reference allele" />
+                help="The minimum average trimmed length (default: 90) required for reads supporting the reference allele" />
                 <param argument="--min-var-avgrl" name="min_var_len" type="integer" value="90" min="0" max="200"
                 label="Minimum length of variant-supporting reads"
-                help="The minimum average trimmed length required for reads supporting the variant allele" />
+                help="The minimum average trimmed length (default: 90) required for reads supporting the variant allele" />
                 <param argument="--max-rl-diff" name="max_len_diff" type="float" value="0.25" min="0" max="1"
                 label="Maximum relative read length difference"
-                help="The maximum allowed average relative read length difference (ref - var) between reads supporting the reference and the variant allele" />
+                help="The maximum allowed difference (default: 0.25) in the average relative read length (ref - var) between reads supporting the reference and the variant allele" />
                 <param argument="--min-strandedness" name="min_strandedness" type="float" value="0.01" min="0" max="0.5"
                 label="Minimum fraction of variant reads from each strand"
-                help="The minimum fraction of variant reads that are required to come from the forward and from the reverse strand" />
+                help="The minimum fraction (default: 0.01) of variant reads that are required to come from the forward and from the reverse strand" />
                 <param argument="--min-strand-reads" name="min_strand_reads" type="integer" value="5" min="2" max="200"
                 label="Minimum variant allele depth required to apply the --min-strandedness filter"
-                help="" />
+                help="(default: 5)" />
                 <param argument="--min-ref-basequal" name="min_ref_basequal" type="integer" value="15" min="1" max="50"
                 label="Minimum average base quality for the ref allele"
-                help="The minimum average base quality required at the variant site for reads supporting the reference allele" />
+                help="The minimum average base quality (default: 15) required at the variant site for reads supporting the reference allele" />
                 <param argument="--min-var-basequal" name="min_var_basequal" type="integer" value="15" min="1" max="50"
                 label="Minimum average base quality for the variant allele"
-                help="The minimum average base quality required at the variant site for reads supporting the variant allele" />
+                help="The minimum average base quality (default: 15) required at the variant site for reads supporting the variant allele" />
                 <param argument="max-basequal-diff" name="max_basequal_diff" type="integer" value="50" min="0" max="50"
                 label="Maximum base quality difference between ref- and variant-supporting reads"
-                help="The maximum average base quality difference (ref - var) allowed between the variant site positions of reads supporting the reference and the variant allele" />
+                help="The maximum difference (default: 50) in the average base quality (ref - var) allowed between the variant site positions of reads supporting the reference and the variant allele" />
                 <param argument="--min-ref-mapqual" name="min_ref_mapqual" type="integer" value="15" min="1" max="60"
                 label="Minimum average mapping quality of ref-supporting reads"
-                help="The minimum average mapping quality required for reads supporting the reference allele" />
+                help="The minimum average mapping quality (default: 15) required for reads supporting the reference allele" />
                 <param argument="--min-var-mapqual" name="min_var_mapqual" type="integer" value="15" min="1" max="60"
                 label="Minimum average mapping quality of variant-supporting reads"
-                help="The minimum average mapping quality required for reads supporting the variant allele" />
+                help="The minimum average mapping quality (default: 15) required for reads supporting the variant allele" />
                 <param argument="--max-mapqual-diff" name="max_mapqual_diff" type="integer" value="50" min="0" max="60"
                 label="Maximum mapping quality difference between ref- and variant-supporting reads"
-                help="The maximum average mapping quality difference (ref - var) allowed between reads supporting the reference and the variant allele" />
+                help="The maximum difference (default: 50) in the average mapping quality (ref - var) allowed between reads supporting the reference and the variant allele" />
                 <param argument="--max-ref-mmqs" name="max_ref_mmqs" type="integer" value="100" min="0"
                 label="Maximum mismatch base quality sum of ref-supporting reads"
-                help="The maximum mismatch base quality sum allowed for reads supporting the reference allele" />
+                help="The maximum mismatch base quality sum (default: 100) allowed for reads supporting the reference allele" />
                 <param argument="--max-var-mmqs" name="max_var_mmqs" type="integer" value="100" min="0"
                 label="Maximum mismatch base quality sum of var-supporting reads"
-                help="The maximum mismatch base quality sum allowed for reads supporting the variant allele" />
+                help="The maximum mismatch base quality sum (default: 100) allowed for reads supporting the variant allele" />
                 <param argument="--min-mmqs-diff" name="min_mmqs_diff" type="integer" value="0" min="0"
                 label="Minimum difference between mismatch base quality sums of variant- and ref-supporting reads"
-                help="The minimum difference in the mismatch base quality sums (var - ref) required between reads supporting the variant and the reference allele" />
+                help="The minimum difference (default: 0) in the mismatch base quality sums (var - ref) required between reads supporting the variant and the reference allele" />
                 <param argument="--max-mmqs-diff" name="max_mmqs_diff" type="integer" value="50" min="1"
                 label="Maximum difference between mismatch base quality sums of variant- and ref-supporting reads"
-                help="The maximum difference in the mismatch base quality sums (var - ref) allowed between reads supporting the variant and the reference allele" />
+                help="The maximum difference (default: 50) in the mismatch base quality sums (var - ref) allowed between reads supporting the variant and the reference allele" />
+                <expand macro="filter_compat_options" />
             </when>
         </conditional>
     </inputs>
@@ -286,6 +319,7 @@
     </outputs>
     <tests>
         <test expect_num_outputs="1">
+            <!-- run with default settings and genome from history -->
             <conditional name="reference">
                 <param name="source" value="history" />
                 <param name="genome" value="hg19_chrM.fa" />
@@ -305,6 +339,7 @@
             </output>
         </test>
         <test expect_num_outputs="1">
+            <!-- run with default settings and cached genome -->
             <conditional name="reference">
                 <param name="source" value="cached" />
                 <param name="genome" value="hg19mito" />
@@ -324,6 +359,7 @@
             </output>
         </test>
         <test expect_num_outputs="2">
+            <!-- run with default settings and split output -->
             <conditional name="reference">
                 <param name="source" value="history" />
                 <param name="genome" value="hg19_chrM.fa" />
@@ -347,6 +383,7 @@
             </output>
         </test>
         <test expect_num_outputs="1">
+            <!-- run with custom params for variant calling -->
             <conditional name="reference">
                 <param name="source" value="history" />
                 <param name="genome" value="hg19_chrM.fa" />
@@ -355,10 +392,12 @@
             <param name="tumor_bam" value="tumor_chrM.bam" />
             <param name="split_output" value="false" />
             <conditional name="call_params">
+                <section name="read_selection">
+                    <param name="min_basequal" value="5" />
+                </section>
                 <param name="settings" value="custom" />
                 <param name="min_coverage" value="2" />
                 <param name="min_reads2" value="1" />
-                <param name="min_avg_qual" value="5" />
                 <param name="min_var_freq" value="0.01" />
                 <param name="min_freq_for_hom" value="0.66" />
                 <param name="p_value" value="0.97" />
@@ -395,6 +434,7 @@
             </output>
         </test>
         <test expect_num_outputs="1">
+            <!-- run with preconfigured dream3 post-filter settings -->
             <conditional name="reference">
                 <param name="source" value="history" />
                 <param name="genome" value="hg19_chrM.fa" />
@@ -414,6 +454,7 @@
             </output>
         </test>
         <test expect_num_outputs="1">
+            <!-- run without post-filters -->
             <conditional name="reference">
                 <param name="source" value="history" />
                 <param name="genome" value="hg19_chrM.fa" />
@@ -433,6 +474,7 @@
             </output>
         </test>
         <test expect_num_outputs="1">
+            <!-- run with custom post-filters -->
             <conditional name="reference">
                 <param name="source" value="history" />
                 <param name="genome" value="hg19_chrM.fa" />
@@ -490,13 +532,6 @@
 This tool wraps the functionality of the ``varscan somatic`` and the
 ``varscan fpfilter`` command line tools.
 
-.. class:: infomark
-
-   The wrapper aims at providing the same functionality as the
-   ``varscan fpfilter`` tool, but implements it using ``pysam`` internally.
-   Note that, as one limitation compared to the original ``varscan`` tool,
-   the current version does not apply filters to indels!
-
 The tool is designed to detect genetic variants in a **pair of samples**
 representing normal and tumor tissue from the same individual. It classifies
 the variants, according to their most likely origin, as **somatic** (variant is