Repository 'varscan_mpileup'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/varscan_mpileup

Changeset 2:d062703d6f13 (2018-12-04)
Previous changeset 1:0bc800d67a0e (2018-07-15) Next changeset 3:ee62c46d9cbe (2018-12-04)
Commit message:
planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit 30867f1f022bed18ba1c3b8dc9c54226890b3a9c
modified:
macros.xml
varscan_mpileup.xml
added:
test-data/control_chrM.bam
test-data/hg19_chrM.fa
test-data/tumor_chrM.bam
varscan.py
removed:
test-data/varscan_somatic_indel_result1.vcf
test-data/varscan_somatic_indel_result2.vcf
test-data/varscan_somatic_snp_result1.vcf
test-data/varscan_somatic_snp_result2.vcf
b
diff -r 0bc800d67a0e -r d062703d6f13 macros.xml
--- a/macros.xml Sun Jul 15 09:19:37 2018 -0400
+++ b/macros.xml Tue Dec 04 05:16:18 2018 -0500
[
@@ -2,7 +2,6 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@VERSION@">varscan</requirement>
-            <requirement type="package" version="4.2.1">gawk</requirement>
             <yield/>
         </requirements>
     </xml>
@@ -20,6 +19,7 @@
     <xml name="citations">
         <citations>
             <citation type="doi">10.1101/gr.129684.111</citation>
+            <citation type="doi">10.1002/0471250953.bi1504s44</citation>
         </citations>
     </xml>
 
@@ -52,29 +52,34 @@
         
     </token>
 
-    <xml name="min_coverage">
+    <xml name="min_coverage" token_help="Minimum depth at a position to make a call">
         <param argument="--min-coverage" name="min_coverage" type="integer" value="8" min="1" max="200"
-            label="Minimum read depth" help="Minimum depth at a position to make a call"/>
+            label="Minimum coverage" help="@HELP@"/>
     </xml>
     <xml name="min_reads2">
         <param argument="--min-reads2" name="min_reads2" type="integer" value="2" min="1" max="200"
-            label="Minimum supporting reads" help="Minimum supporting reads at a position to make a call"/>
+            label="Minimum supporting reads" help="Minimum number of variant-supporting reads at a position required to make a call"/>
     </xml>
     <xml name="min_avg_qual">
         <param argument="--min-avg-qual" name="min_avg_qual" type="integer" value="15" min="1" max="50"
-            label="Minimum base quality at a position to count a read"/>
+            label="Minimum base quality"
+            help="The minimum base quality at the variant position required to use a read for calling" />
     </xml>
     <xml name="min_var_freq" token_value="0.01">
         <param argument="--min-var-freq" name="min_var_freq" type="float" value="@VALUE@" min="0" max="1"
-            label="Minimum variant allele frequency threshold"/>
+            label="Minimum variant allele frequency"
+            help="Minimum variant allele frequency required for calling a variant"/>
     </xml>
     <xml name="min_freq_for_hom">
         <param argument="--min-freq-for-hom" name="min_freq_for_hom" type="float" value="0.75" min="0" max="1"
-            label="Minimum frequency to call homozygote"/>
+            label="Minimum homozygous variant allele frequency"
+            help="Minimum variant allele frequency required for calling a homozygous genotype" />
     </xml>
-    <xml name="p_value" token_label="p-value threshold for calling variants" token_value="0.01">
-        <param argument="--p-value" name="p_value" type="float" value="@VALUE@" min="0.0" max="1.0"
-            label="@LABEL@"/>
+    <xml name="p_value" token_value="0.01"
+    token_label="p-value threshold for calling variants"
+    token_help="">
+        <param argument="--p-value" name="p_value" type="float" value="@VALUE@" min="0" max="1"
+            label="@LABEL@" help="@HELP@"/>
     </xml>
     <xml name="strand_filter">
         <param name="strand_filter" type="select" label="Ignore variants with >90% support on one strand">
@@ -83,4 +88,12 @@
         </param>
     </xml>
 
+    <token name="@HELP_HEADER@"><![CDATA[
+**VarScan Overview**
+
+VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. Full documentation of the command line package is available here_.
+
+.. _VarScan: http://dkoboldt.github.io/varscan/
+.. _here: http://dkoboldt.github.io/varscan/using-varscan.html
+   ]]></token>
 </macros>
b
diff -r 0bc800d67a0e -r d062703d6f13 test-data/control_chrM.bam
b
Binary file test-data/control_chrM.bam has changed
b
diff -r 0bc800d67a0e -r d062703d6f13 test-data/hg19_chrM.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_chrM.fa Tue Dec 04 05:16:18 2018 -0500
b
b'@@ -0,0 +1,333 @@\n+>chrM\n+GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT\n+TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG\n+GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT\n+CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA\n+AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT\n+GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA\n+AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGC\n+CAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAAT\n+TTTATCTTTAGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACA\n+TTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCC\n+GCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAAC\n+CAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA\n+AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAA\n+ATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGC\n+AAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAG\n+GGACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCC\n+ACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAAACGAAAGT\n+TTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACC\n+GCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTT\n+TAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAA\n+CTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAAC\n+ACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGC\n+CCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGA\n+GCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGA\n+GGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGC\n+TCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGT\n+AAGCGCAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGG\n+TGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTT\n+ATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGT\n+GCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCC\n+TCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATA\n+GAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACG\n+AACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGATTT\n+CAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCAC\n+CTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGG\n+CGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGA\n+TGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTT\n+CTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAA\n+GACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCC\n+GTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCT\n+ACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTT\n+TAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAG\n+TCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGA\n+GTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGA\n+AAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAACATATAACT\n+GAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAA\n+TGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCA\n+GATCAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCA\n+ACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAG\n+GAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTACCCCGCCTGTT\n+TACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCC\n+CAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAaaggtagca\n+taatcacttgttccttaaatagggacctgtatgaatggctccacgagggt\n+tcagctgtctcttacttttaaccagtgaaattgacctgcccgtgaagagg\n+cgggcatgacacagcaagacgagaagaccctatggagctttaatttaTTA\n+ATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCA\n+TTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCA\n+GTACATGCTAAGACTTCACCAGTCAAAGCGAACTACTATACTCAATTGAT\n+CCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATC\n+CTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGAT\n+CAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGA\n+TTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTT\n+CTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCC\n+TACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTAT\n+TATACCCACACCCACCCAAGAACAGGGTTTgttaagatggcagagcccgg\n+taatcgcataaaacttaaaactttacagtcagaggttcaattcctcttct\n+taacaacaTACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAA\n+TCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATA\n+CAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACC\n+CTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCA\n+CATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATC\n+GCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCT\n+CAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACT\n+CAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGC\n+GCACTGCGAGCAGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCAT\n+CATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCC\n+TTATCACAACACAAGAACACC'..b'TTAGTTACCGCTAACAACCTATT\n+CCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCA\n+TCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCA\n+GTCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATG\n+ATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAA\n+ACGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCA\n+GGCAAATCAGCCCAATTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGA\n+AGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAG\n+CAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCA\n+CTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGC\n+AGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCT\n+CCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAA\n+CCACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCAT\n+ACTATTTATGTGCTCCGGGTCCATCATCCACAACCTTAACAATGAACAAG\n+ATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTCACTTCAACC\n+TCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGG\n+TTTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAA\n+ACGCCTGAGCCCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCC\n+TATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCC\n+CACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTA\n+AACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATTACTAACAAC\n+ATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACT\n+CACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCA\n+ACTACCTAACCAACAAACTTAAAATAAAATCCCCACTATGCACATTTTAT\n+TTCTCCAACATACTCGGATTCTACCCTAGCATCACACACCGCACAATCCC\n+CTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACC\n+TAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATC\n+TCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTT\n+CCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAAC\n+CTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGT\n+TCAACCAGTAACCACTACTAATCAACGCCCATAATCATACAAAGCCCCCG\n+CACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATT\n+ATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATA\n+CTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAA\n+CACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCA\n+ATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATA\n+AATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAA\n+TAACACACCCGACCACACCGCTAACAATCAGTACTAAACCCCCATAAATA\n+GGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACT\n+CAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGA\n+CCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATG\n+ACCCCAATACGCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATT\n+CATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCT\n+CACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTA\n+GCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCA\n+CATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCA\n+ATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGC\n+CTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTAT\n+CCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGT\n+GAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCC\n+GCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTA\n+CTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCT\n+TACCCTTCATTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCAC\n+GAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAAT\n+CACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCT\n+TCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGC\n+GACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACAT\n+CAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCC\n+CTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTA\n+GCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCG\n+CCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTC\n+TAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGA\n+CAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAAC\n+TATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTA\n+TAAACTAATACACCAGTCTTGTAAACCGGAGACGAAAACCTTTTTCCAAG\n+GACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAA\n+GATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAGATTTGGG\n+TACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTAC\n+ATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCA\n+CCTGTAGTACATAAAAACCCAACCCACATCAAACCCCCCCCCCCCATGCT\n+TACAAGCAAGTACAGCAATCAACCTTCAACTATCACACATCAACTGCAAC\n+TCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTT\n+AACAGTACATAGTACATAAAGTCATTTACCGTACATAGCACATTACAGTC\n+AAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTG\n+ACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCT\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGAC\n+ATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTCCC\n+CTTAAATAAGACATCACGATG\n'
b
diff -r 0bc800d67a0e -r d062703d6f13 test-data/tumor_chrM.bam
b
Binary file test-data/tumor_chrM.bam has changed
b
diff -r 0bc800d67a0e -r d062703d6f13 test-data/varscan_somatic_indel_result1.vcf
--- a/test-data/varscan_somatic_indel_result1.vcf Sun Jul 15 09:19:37 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,18 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
b
diff -r 0bc800d67a0e -r d062703d6f13 test-data/varscan_somatic_indel_result2.vcf
--- a/test-data/varscan_somatic_indel_result2.vcf Sun Jul 15 09:19:37 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,18 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
b
diff -r 0bc800d67a0e -r d062703d6f13 test-data/varscan_somatic_snp_result1.vcf
--- a/test-data/varscan_somatic_snp_result1.vcf Sun Jul 15 09:19:37 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,41 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
-chr1 51436072 . C A . PASS DP=47;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.4681E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:26:26:0:0%:23,3,0,0 0/1:.:21:20:1:4.76%:19,1,1,0
-chr1 51436311 . T C . PASS DP=16;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.375E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:7:6:1:14.29%:0,6,0,1
-chr1 51436320 . G A . PASS DP=19;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2632E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:10:9:1:10%:0,9,0,1
-chr1 51439628 . T C . str10 DP=237;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.8101E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:123:123:0:0%:77,46,0,0 0/1:.:114:113:1:0.88%:74,39,0,1
-chr1 51439638 . G A . str10 DP=234;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.9145E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:119:119:0:0%:72,47,0,0 0/1:.:115:114:1:0.87%:75,39,0,1
-chr1 51439665 . C T . PASS DP=226;SOMATIC;SS=2;SSC=9;GPV=1E0;SPV=1.2006E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:114:114:0:0%:56,58,0,0 0/1:.:112:109:3:2.68%:63,46,2,1
-chr1 51439671 . G A . str10 DP=222;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.045E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:110:110:0:0%:53,57,0,0 0/1:.:112:111:1:0.89%:59,52,1,0
-chr1 51439684 . G T . str10 DP=210;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.9524E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:106:106:0:0%:51,55,0,0 0/1:.:104:103:1:0.96%:53,50,1,0
-chr1 51439703 . C T . str10 DP=202;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.099E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:99:99:0:0%:46,53,0,0 0/1:.:103:102:1:0.97%:48,54,0,1
-chr1 51439705 . G T . str10 DP=204;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1961E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:98:98:0:0%:42,56,0,0 0/1:.:106:105:1:0.94%:48,57,0,1
-chr1 51439706 . G T . str10 DP=201;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1741E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:97:97:0:0%:41,56,0,0 0/1:.:104:103:1:0.96%:46,57,1,0
-chr1 51439726 . C G . str10 DP=187;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1872E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:90:90:0:0%:37,53,0,0 0/1:.:97:96:1:1.03%:44,52,1,0
-chr1 51439751 . C G . str10 DP=168;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.3293E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:79:78:0:0%:28,50,0,0 0/1:.:89:88:1:1.12%:35,53,0,1
-chr1 51439763 . G A . PASS DP=159;SOMATIC;SS=2;SSC=5;GPV=1E0;SPV=2.7092E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:76:76:0:0%:34,42,0,0 0/1:.:83:81:2:2.41%:32,49,1,1
-chr1 51439766 . G T . str10 DP=154;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1299E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:75:75:0:0%:34,41,0,0 0/1:.:79:78:1:1.27%:30,48,0,1
-chr1 51439788 . T C . str10 DP=136;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1471E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:66:66:0:0%:21,45,0,0 0/1:.:70:69:1:1.43%:24,45,1,0
-chr1 51439828 . G A . str10 DP=122;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.7377E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:52:52:0:0%:14,38,0,0 0/1:.:70:69:1:1.43%:27,42,0,1
-chr1 51439832 . C G . str10 DP=125;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.52E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:56:56:0:0%:14,42,0,0 0/1:.:69:68:1:1.45%:25,43,0,1
-chr1 51439876 . T G . str10 DP=105;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.619E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:46:46:0:0%:10,36,0,0 0/1:.:59:58:1:1.69%:23,35,0,1
-chr1 51439882 . G T . str10 DP=105;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2381E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:50:50:0:0%:13,37,0,0 0/1:.:55:54:1:1.82%:22,32,0,1
-chr1 51439889 . G T . str10 DP=97;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1546E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:47:47:0:0%:14,33,0,0 0/1:.:50:49:1:2%:21,28,0,1
-chr1 51439953 . G T . str10 DP=59;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.9322E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:24:24:0:0%:5,19,0,0 0/1:.:35:34:1:2.86%:7,27,0,1
-chr1 51440035 . G T . PASS DP=21;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=6.1905E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:8:8:0:0%:1,7,0,0 0/1:.:13:12:1:7.69%:1,11,0,1
b
diff -r 0bc800d67a0e -r d062703d6f13 test-data/varscan_somatic_snp_result2.vcf
--- a/test-data/varscan_somatic_snp_result2.vcf Sun Jul 15 09:19:37 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,26 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
-chr1 51436072 . C A . PASS DP=47;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.4681E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:26:26:0:0%:23,3,0,0 0/1:.:21:20:1:4.76%:19,1,1,0
-chr1 51436311 . T C . PASS DP=16;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.375E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:7:6:1:14.29%:0,6,0,1
-chr1 51436320 . G A . PASS DP=19;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2632E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:10:9:1:10%:0,9,0,1
-chr1 51439665 . C T . PASS DP=226;SOMATIC;SS=2;SSC=9;GPV=1E0;SPV=1.2006E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:114:114:0:0%:56,58,0,0 0/1:.:112:109:3:2.68%:63,46,2,1
-chr1 51439763 . G A . PASS DP=159;SOMATIC;SS=2;SSC=5;GPV=1E0;SPV=2.7092E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:76:76:0:0%:34,42,0,0 0/1:.:83:81:2:2.41%:32,49,1,1
-chr1 51440025 . A C . PASS DP=27;SOMATIC;SS=2;SSC=1;GPV=1E0;SPV=6.6667E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:18:17:1:5.56%:1,16,0,1
-chr1 51440035 . G T . PASS DP=21;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=6.1905E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:8:8:0:0%:1,7,0,0 0/1:.:13:12:1:7.69%:1,11,0,1
-chr1 51440056 . T G . PASS DP=22;SOMATIC;SS=2;SSC=1;GPV=1E0;SPV=6.3636E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:8:8:0:0%:1,7,0,0 0/1:.:14:13:1:7.14%:1,12,0,1
b
diff -r 0bc800d67a0e -r d062703d6f13 varscan.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan.py Tue Dec 04 05:16:18 2018 -0500
[
b'@@ -0,0 +1,1178 @@\n+#!/usr/bin/env python3\n+from __future__ import print_function\n+\n+import argparse\n+import io\n+import os\n+import subprocess\n+import sys\n+import tempfile\n+import time\n+from contextlib import ExitStack\n+from functools import partial\n+from threading import Thread\n+\n+import pysam\n+\n+\n+class VariantCallingError (RuntimeError):\n+    """Exception class for issues with samtools and varscan subprocesses."""\n+\n+    def __init__(self, message=None, call=\'\', error=\'\'):\n+        self.message = message\n+        self.call = call.strip()\n+        self.error = error.strip()\n+\n+    def __str__(self):\n+        if self.message is None:\n+            return \'\'\n+        if self.error:\n+            msg_header = \'"{0}" failed with:\\n{1}\\n\\n\'.format(\n+                self.call, self.error\n+            )\n+        else:\n+            msg_header = \'{0} failed.\\n\'\n+            \'No further information about this error is available.\\n\\n\'.format(\n+                self.call\n+            )\n+        return msg_header + self.message\n+\n+\n+class VarScanCaller (object):\n+    def __init__(self, ref_genome, bam_input_files,\n+                 max_depth=None,\n+                 min_mapqual=None, min_basequal=None,\n+                 threads=1, verbose=False, quiet=True\n+                 ):\n+        self.ref_genome = ref_genome\n+        self.bam_input_files = bam_input_files\n+        self.max_depth = max_depth\n+        self.min_mapqual = min_mapqual\n+        self.min_basequal = min_basequal\n+        self.threads = threads\n+        self.verbose = verbose\n+        self.quiet = quiet\n+\n+        with pysam.FastaFile(ref_genome) as ref_fa:\n+            self.ref_contigs = ref_fa.references\n+            self.ref_lengths = ref_fa.lengths\n+\n+        self.pileup_engine = [\'samtools\', \'mpileup\']\n+        self.varcall_engine = [\'varscan\', \'somatic\']\n+        self.requires_stdout_redirect = False\n+        self.TemporaryContigVCF = partial(\n+            tempfile.NamedTemporaryFile,\n+            mode=\'wb\', suffix=\'\', delete=False, dir=os.getcwd()\n+        )\n+        self.tmpfiles = []\n+\n+    def _get_pysam_pileup_args(self):\n+        param_dict = {}\n+        if self.max_depth is not None:\n+            param_dict[\'max_depth\'] = self.max_depth\n+        if self.min_mapqual is not None:\n+            param_dict[\'min_mapping_quality\'] = self.min_mapqual\n+        if self.min_basequal is not None:\n+            param_dict[\'min_base_quality\'] = self.min_basequal\n+        param_dict[\'compute_baq\'] = False\n+        param_dict[\'stepper\'] = \'samtools\'\n+        return param_dict\n+\n+    def varcall_parallel(self, normal_purity=None, tumor_purity=None,\n+                         min_coverage=None,\n+                         min_var_count=None,\n+                         min_var_freq=None, min_hom_freq=None,\n+                         p_value=None, somatic_p_value=None,\n+                         threads=None, verbose=None, quiet=None\n+                         ):\n+        if not threads:\n+            threads = self.threads\n+        if verbose is None:\n+            verbose = self.verbose\n+        if quiet is None:\n+            quiet = self.quiet\n+        # mapping of method parameters to varcall engine command line options\n+        varcall_engine_option_mapping = [\n+            (\'--normal-purity\', normal_purity),\n+            (\'--tumor-purity\', tumor_purity),\n+            (\'--min-coverage\', min_coverage),\n+            (\'--min-reads2\', min_var_count),\n+            (\'--min-var-freq\', min_var_freq),\n+            (\'--min-freq-for-hom\', min_hom_freq),\n+            (\'--p-value\', p_value),\n+            (\'--somatic-p-value\', somatic_p_value),\n+            (\'--min-avg-qual\', self.min_basequal)\n+        ]\n+        varcall_engine_options = []\n+        for option, value in varcall_engine_option_mapping:\n+            if value is not None:\n+                varcall_engine_options += [option, str(value)]\n+        pileup_engine_options = [\'-B\']\n+        if self.max_depth is not None:\n+            pileup_engine'..b"efault=0.1,\n+        help='Minimum average relative distance of site from the effective '\n+             '3\\'end of ref-supporting reads (default: 0.1)'\n+    )\n+    filter_group.add_argument(\n+        '--min-var-dist3',\n+        dest='min_var_dist3', type=float,\n+        default=0.1,\n+        help='Minimum average relative distance of site from the effective '\n+             '3\\'end of variant-supporting reads (default: 0.1)'\n+    )\n+    filter_group.add_argument(\n+        '--min-ref-len',\n+        dest='min_ref_len', type=int,\n+        default=90,\n+        help='Minimum average trimmed length of reads supporting the ref '\n+             'allele (default: 90)'\n+    )\n+    filter_group.add_argument(\n+        '--min-var-len',\n+        dest='min_var_len', type=int,\n+        default=90,\n+        help='Minimum average trimmed length of reads supporting the variant '\n+             'allele (default: 90)'\n+    )\n+    filter_group.add_argument(\n+        '--max-len-diff',\n+        dest='max_relative_len_diff', type=float,\n+        default=0.25,\n+        help='Maximum average relative read length difference (ref - var; '\n+             'default: 0.25)'\n+    )\n+    filter_group.add_argument(\n+        '--min-strandedness',\n+        dest='min_strandedness', type=float,\n+        default=0.01,\n+        help='Minimum fraction of variant reads from each strand '\n+             '(default: 0.01)'\n+    )\n+    filter_group.add_argument(\n+        '--min-strand-reads',\n+        dest='min_strand_reads', type=int,\n+        default=5,\n+        help='Minimum allele depth required to run --min-strandedness filter '\n+             '(default: 5)'\n+    )\n+    filter_group.add_argument(\n+        '--min-ref-basequal',\n+        dest='min_ref_basequal', type=int,\n+        default=15,\n+        help='Minimum average base quality for the ref allele (default: 15)'\n+    )\n+    filter_group.add_argument(\n+        '--min-var-basequal',\n+        dest='min_var_basequal', type=int,\n+        default=15,\n+        help='Minimum average base quality for the variant allele '\n+             '(default: 15)'\n+    )\n+    filter_group.add_argument(\n+        '--max-basequal-diff',\n+        dest='max_basequal_diff', type=int,\n+        default=50,\n+        help='Maximum average base quality diff (ref - var; default: 50)'\n+    )\n+    filter_group.add_argument(\n+        '--min-ref-mapqual',\n+        dest='min_ref_mapqual', type=int,\n+        default=15,\n+        help='Minimum average mapping quality of reads supporting the ref '\n+             'allele (default: 15)'\n+    )\n+    filter_group.add_argument(\n+        '--min-var-mapqual',\n+        dest='min_var_mapqual', type=int,\n+        default=15,\n+        help='Minimum average mapping quality of reads supporting the variant '\n+             'allele (default: 15)'\n+    )\n+    filter_group.add_argument(\n+        '--max-mapqual-diff',\n+        dest='max_mapqual_diff', type=int,\n+        default=50,\n+        help='Maximum average mapping quality difference (ref - var; '\n+             'default: 50)'\n+    )\n+    filter_group.add_argument(\n+        '--max-ref-mmqs',\n+        dest='max_ref_mmqs', type=int,\n+        default=100,\n+        help='Maximum mismatch quality sum of reads supporting the ref '\n+             'allele (default: 100)'\n+    )\n+    filter_group.add_argument(\n+        '--max-var-mmqs',\n+        dest='max_var_mmqs', type=int,\n+        default=100,\n+        help='Maximum mismatch quality sum of reads supporting the variant '\n+             'allele (default: 100)'\n+    )\n+    filter_group.add_argument(\n+        '--min-mmqs-diff',\n+        dest='min_mmqs_diff', type=int,\n+        default=0,\n+        help='Minimum mismatch quality sum difference (var - ref; default: 0)'\n+    )\n+    filter_group.add_argument(\n+        '--max-mmqs-diff',\n+        dest='max_mmqs_diff', type=int,\n+        default=50,\n+        help='Maximum mismatch quality sum difference (var - ref; default: 50)'\n+    )\n+    args = vars(p.parse_args())\n+    varscan_call(**args)\n"
b
diff -r 0bc800d67a0e -r d062703d6f13 varscan_mpileup.xml
--- a/varscan_mpileup.xml Sun Jul 15 09:19:37 2018 -0400
+++ b/varscan_mpileup.xml Tue Dec 04 05:16:18 2018 -0500
[
@@ -1,9 +1,11 @@
-<tool id="varscan_mpileup" name="VarScan mpileup" version="@VERSION@.0">
+<tool id="varscan_mpileup" name="VarScan mpileup" version="@VERSION@.1">
     <description>for variant detection</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements" />
+    <expand macro="requirements">
+        <requirement type="package" version="4.2.1">gawk</requirement>
+    </expand>
     <expand macro="stdio" />
     <command><![CDATA[
         ## Set up samples list file.
@@ -74,16 +76,8 @@
         </test>
     </tests>
 
-    <help>
-**VarScan Overview**
-
-VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data.
-It calls variants from a mpileup dataset and produces a VCF 4.1. Full documentation is available online_.
-
-This tool detects variants from pileups.
-
-.. _VarScan: http://dkoboldt.github.io/varscan/
-.. _online: http://dkoboldt.github.io/varscan/using-varscan.html
+    <help><![CDATA[
+@HELP_HEADER@
 
 **Input**
 
@@ -96,6 +90,6 @@
 
 VarScan produces a VCF 4.1 dataset as output.
 
-    </help>
+    ]]></help>
     <expand macro="citations" />
 </tool>