Repository 'varscan_somatic'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/varscan_somatic

Changeset 2:2fe9ebb98aad (2018-12-04)
Previous changeset 1:31a38ce7e8ae (2018-07-15) Next changeset 3:7903598ccbaf (2018-12-04)
Commit message:
planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit 30867f1f022bed18ba1c3b8dc9c54226890b3a9c
modified:
macros.xml
varscan_somatic.xml
added:
test-data/control_chrM.bam
test-data/hg19_chrM.fa
test-data/tumor_chrM.bam
varscan.py
removed:
test-data/varscan_somatic_indel_result1.vcf
test-data/varscan_somatic_indel_result2.vcf
test-data/varscan_somatic_snp_result1.vcf
test-data/varscan_somatic_snp_result2.vcf
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad macros.xml
--- a/macros.xml Sun Jul 15 09:19:25 2018 -0400
+++ b/macros.xml Tue Dec 04 05:15:50 2018 -0500
[
@@ -2,7 +2,6 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@VERSION@">varscan</requirement>
-            <requirement type="package" version="4.2.1">gawk</requirement>
             <yield/>
         </requirements>
     </xml>
@@ -20,6 +19,7 @@
     <xml name="citations">
         <citations>
             <citation type="doi">10.1101/gr.129684.111</citation>
+            <citation type="doi">10.1002/0471250953.bi1504s44</citation>
         </citations>
     </xml>
 
@@ -52,29 +52,34 @@
         
     </token>
 
-    <xml name="min_coverage">
+    <xml name="min_coverage" token_help="Minimum depth at a position to make a call">
         <param argument="--min-coverage" name="min_coverage" type="integer" value="8" min="1" max="200"
-            label="Minimum read depth" help="Minimum depth at a position to make a call"/>
+            label="Minimum coverage" help="@HELP@"/>
     </xml>
     <xml name="min_reads2">
         <param argument="--min-reads2" name="min_reads2" type="integer" value="2" min="1" max="200"
-            label="Minimum supporting reads" help="Minimum supporting reads at a position to make a call"/>
+            label="Minimum supporting reads" help="Minimum number of variant-supporting reads at a position required to make a call"/>
     </xml>
     <xml name="min_avg_qual">
         <param argument="--min-avg-qual" name="min_avg_qual" type="integer" value="15" min="1" max="50"
-            label="Minimum base quality at a position to count a read"/>
+            label="Minimum base quality"
+            help="The minimum base quality at the variant position required to use a read for calling" />
     </xml>
     <xml name="min_var_freq" token_value="0.01">
         <param argument="--min-var-freq" name="min_var_freq" type="float" value="@VALUE@" min="0" max="1"
-            label="Minimum variant allele frequency threshold"/>
+            label="Minimum variant allele frequency"
+            help="Minimum variant allele frequency required for calling a variant"/>
     </xml>
     <xml name="min_freq_for_hom">
         <param argument="--min-freq-for-hom" name="min_freq_for_hom" type="float" value="0.75" min="0" max="1"
-            label="Minimum frequency to call homozygote"/>
+            label="Minimum homozygous variant allele frequency"
+            help="Minimum variant allele frequency required for calling a homozygous genotype" />
     </xml>
-    <xml name="p_value" token_label="p-value threshold for calling variants" token_value="0.01">
-        <param argument="--p-value" name="p_value" type="float" value="@VALUE@" min="0.0" max="1.0"
-            label="@LABEL@"/>
+    <xml name="p_value" token_value="0.01"
+    token_label="p-value threshold for calling variants"
+    token_help="">
+        <param argument="--p-value" name="p_value" type="float" value="@VALUE@" min="0" max="1"
+            label="@LABEL@" help="@HELP@"/>
     </xml>
     <xml name="strand_filter">
         <param name="strand_filter" type="select" label="Ignore variants with >90% support on one strand">
@@ -83,4 +88,12 @@
         </param>
     </xml>
 
+    <token name="@HELP_HEADER@"><![CDATA[
+**VarScan Overview**
+
+VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. Full documentation of the command line package is available here_.
+
+.. _VarScan: http://dkoboldt.github.io/varscan/
+.. _here: http://dkoboldt.github.io/varscan/using-varscan.html
+   ]]></token>
 </macros>
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/control_chrM.bam
b
Binary file test-data/control_chrM.bam has changed
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/hg19_chrM.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_chrM.fa Tue Dec 04 05:15:50 2018 -0500
b
b'@@ -0,0 +1,333 @@\n+>chrM\n+GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT\n+TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG\n+GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT\n+CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA\n+AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT\n+GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA\n+AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGC\n+CAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAAT\n+TTTATCTTTAGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACA\n+TTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCC\n+GCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAAC\n+CAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA\n+AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAA\n+ATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGC\n+AAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAG\n+GGACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCC\n+ACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAAACGAAAGT\n+TTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACC\n+GCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTT\n+TAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAA\n+CTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAAC\n+ACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGC\n+CCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGA\n+GCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGA\n+GGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGC\n+TCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGT\n+AAGCGCAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGG\n+TGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTT\n+ATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGT\n+GCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCC\n+TCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATA\n+GAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACG\n+AACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGATTT\n+CAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCAC\n+CTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGG\n+CGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGA\n+TGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTT\n+CTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAA\n+GACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCC\n+GTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCT\n+ACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTT\n+TAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAG\n+TCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGA\n+GTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGA\n+AAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAACATATAACT\n+GAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAA\n+TGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCA\n+GATCAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCA\n+ACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAG\n+GAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTACCCCGCCTGTT\n+TACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCC\n+CAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAaaggtagca\n+taatcacttgttccttaaatagggacctgtatgaatggctccacgagggt\n+tcagctgtctcttacttttaaccagtgaaattgacctgcccgtgaagagg\n+cgggcatgacacagcaagacgagaagaccctatggagctttaatttaTTA\n+ATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCA\n+TTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCA\n+GTACATGCTAAGACTTCACCAGTCAAAGCGAACTACTATACTCAATTGAT\n+CCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATC\n+CTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGAT\n+CAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGA\n+TTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTT\n+CTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCC\n+TACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTAT\n+TATACCCACACCCACCCAAGAACAGGGTTTgttaagatggcagagcccgg\n+taatcgcataaaacttaaaactttacagtcagaggttcaattcctcttct\n+taacaacaTACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAA\n+TCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATA\n+CAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACC\n+CTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCA\n+CATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATC\n+GCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCT\n+CAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACT\n+CAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGC\n+GCACTGCGAGCAGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCAT\n+CATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCC\n+TTATCACAACACAAGAACACC'..b'TTAGTTACCGCTAACAACCTATT\n+CCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCA\n+TCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCA\n+GTCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATG\n+ATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAA\n+ACGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCA\n+GGCAAATCAGCCCAATTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGA\n+AGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAG\n+CAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCA\n+CTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGC\n+AGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCT\n+CCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAA\n+CCACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCAT\n+ACTATTTATGTGCTCCGGGTCCATCATCCACAACCTTAACAATGAACAAG\n+ATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTCACTTCAACC\n+TCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGG\n+TTTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAA\n+ACGCCTGAGCCCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCC\n+TATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCC\n+CACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTA\n+AACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATTACTAACAAC\n+ATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACT\n+CACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCA\n+ACTACCTAACCAACAAACTTAAAATAAAATCCCCACTATGCACATTTTAT\n+TTCTCCAACATACTCGGATTCTACCCTAGCATCACACACCGCACAATCCC\n+CTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACC\n+TAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATC\n+TCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTT\n+CCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAAC\n+CTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGT\n+TCAACCAGTAACCACTACTAATCAACGCCCATAATCATACAAAGCCCCCG\n+CACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATT\n+ATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATA\n+CTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAA\n+CACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCA\n+ATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATA\n+AATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAA\n+TAACACACCCGACCACACCGCTAACAATCAGTACTAAACCCCCATAAATA\n+GGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACT\n+CAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGA\n+CCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATG\n+ACCCCAATACGCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATT\n+CATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCT\n+CACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTA\n+GCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCA\n+CATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCA\n+ATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGC\n+CTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTAT\n+CCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGT\n+GAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCC\n+GCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTA\n+CTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCT\n+TACCCTTCATTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCAC\n+GAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAAT\n+CACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCT\n+TCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGC\n+GACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACAT\n+CAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCC\n+CTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTA\n+GCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCG\n+CCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTC\n+TAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGA\n+CAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAAC\n+TATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTA\n+TAAACTAATACACCAGTCTTGTAAACCGGAGACGAAAACCTTTTTCCAAG\n+GACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAA\n+GATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAGATTTGGG\n+TACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTAC\n+ATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCA\n+CCTGTAGTACATAAAAACCCAACCCACATCAAACCCCCCCCCCCCATGCT\n+TACAAGCAAGTACAGCAATCAACCTTCAACTATCACACATCAACTGCAAC\n+TCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTT\n+AACAGTACATAGTACATAAAGTCATTTACCGTACATAGCACATTACAGTC\n+AAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTG\n+ACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCT\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGAC\n+ATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTCCC\n+CTTAAATAAGACATCACGATG\n'
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/tumor_chrM.bam
b
Binary file test-data/tumor_chrM.bam has changed
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/varscan_somatic_indel_result1.vcf
--- a/test-data/varscan_somatic_indel_result1.vcf Sun Jul 15 09:19:25 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,18 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/varscan_somatic_indel_result2.vcf
--- a/test-data/varscan_somatic_indel_result2.vcf Sun Jul 15 09:19:25 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,18 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/varscan_somatic_snp_result1.vcf
--- a/test-data/varscan_somatic_snp_result1.vcf Sun Jul 15 09:19:25 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,41 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
-chr1 51436072 . C A . PASS DP=47;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.4681E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:26:26:0:0%:23,3,0,0 0/1:.:21:20:1:4.76%:19,1,1,0
-chr1 51436311 . T C . PASS DP=16;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.375E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:7:6:1:14.29%:0,6,0,1
-chr1 51436320 . G A . PASS DP=19;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2632E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:10:9:1:10%:0,9,0,1
-chr1 51439628 . T C . str10 DP=237;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.8101E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:123:123:0:0%:77,46,0,0 0/1:.:114:113:1:0.88%:74,39,0,1
-chr1 51439638 . G A . str10 DP=234;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.9145E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:119:119:0:0%:72,47,0,0 0/1:.:115:114:1:0.87%:75,39,0,1
-chr1 51439665 . C T . PASS DP=226;SOMATIC;SS=2;SSC=9;GPV=1E0;SPV=1.2006E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:114:114:0:0%:56,58,0,0 0/1:.:112:109:3:2.68%:63,46,2,1
-chr1 51439671 . G A . str10 DP=222;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.045E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:110:110:0:0%:53,57,0,0 0/1:.:112:111:1:0.89%:59,52,1,0
-chr1 51439684 . G T . str10 DP=210;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.9524E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:106:106:0:0%:51,55,0,0 0/1:.:104:103:1:0.96%:53,50,1,0
-chr1 51439703 . C T . str10 DP=202;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.099E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:99:99:0:0%:46,53,0,0 0/1:.:103:102:1:0.97%:48,54,0,1
-chr1 51439705 . G T . str10 DP=204;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1961E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:98:98:0:0%:42,56,0,0 0/1:.:106:105:1:0.94%:48,57,0,1
-chr1 51439706 . G T . str10 DP=201;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1741E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:97:97:0:0%:41,56,0,0 0/1:.:104:103:1:0.96%:46,57,1,0
-chr1 51439726 . C G . str10 DP=187;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1872E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:90:90:0:0%:37,53,0,0 0/1:.:97:96:1:1.03%:44,52,1,0
-chr1 51439751 . C G . str10 DP=168;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.3293E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:79:78:0:0%:28,50,0,0 0/1:.:89:88:1:1.12%:35,53,0,1
-chr1 51439763 . G A . PASS DP=159;SOMATIC;SS=2;SSC=5;GPV=1E0;SPV=2.7092E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:76:76:0:0%:34,42,0,0 0/1:.:83:81:2:2.41%:32,49,1,1
-chr1 51439766 . G T . str10 DP=154;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1299E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:75:75:0:0%:34,41,0,0 0/1:.:79:78:1:1.27%:30,48,0,1
-chr1 51439788 . T C . str10 DP=136;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1471E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:66:66:0:0%:21,45,0,0 0/1:.:70:69:1:1.43%:24,45,1,0
-chr1 51439828 . G A . str10 DP=122;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.7377E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:52:52:0:0%:14,38,0,0 0/1:.:70:69:1:1.43%:27,42,0,1
-chr1 51439832 . C G . str10 DP=125;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.52E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:56:56:0:0%:14,42,0,0 0/1:.:69:68:1:1.45%:25,43,0,1
-chr1 51439876 . T G . str10 DP=105;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.619E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:46:46:0:0%:10,36,0,0 0/1:.:59:58:1:1.69%:23,35,0,1
-chr1 51439882 . G T . str10 DP=105;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2381E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:50:50:0:0%:13,37,0,0 0/1:.:55:54:1:1.82%:22,32,0,1
-chr1 51439889 . G T . str10 DP=97;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1546E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:47:47:0:0%:14,33,0,0 0/1:.:50:49:1:2%:21,28,0,1
-chr1 51439953 . G T . str10 DP=59;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.9322E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:24:24:0:0%:5,19,0,0 0/1:.:35:34:1:2.86%:7,27,0,1
-chr1 51440035 . G T . PASS DP=21;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=6.1905E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:8:8:0:0%:1,7,0,0 0/1:.:13:12:1:7.69%:1,11,0,1
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/varscan_somatic_snp_result2.vcf
--- a/test-data/varscan_somatic_snp_result2.vcf Sun Jul 15 09:19:25 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,26 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
-chr1 51436072 . C A . PASS DP=47;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.4681E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:26:26:0:0%:23,3,0,0 0/1:.:21:20:1:4.76%:19,1,1,0
-chr1 51436311 . T C . PASS DP=16;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.375E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:7:6:1:14.29%:0,6,0,1
-chr1 51436320 . G A . PASS DP=19;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2632E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:10:9:1:10%:0,9,0,1
-chr1 51439665 . C T . PASS DP=226;SOMATIC;SS=2;SSC=9;GPV=1E0;SPV=1.2006E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:114:114:0:0%:56,58,0,0 0/1:.:112:109:3:2.68%:63,46,2,1
-chr1 51439763 . G A . PASS DP=159;SOMATIC;SS=2;SSC=5;GPV=1E0;SPV=2.7092E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:76:76:0:0%:34,42,0,0 0/1:.:83:81:2:2.41%:32,49,1,1
-chr1 51440025 . A C . PASS DP=27;SOMATIC;SS=2;SSC=1;GPV=1E0;SPV=6.6667E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:18:17:1:5.56%:1,16,0,1
-chr1 51440035 . G T . PASS DP=21;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=6.1905E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:8:8:0:0%:1,7,0,0 0/1:.:13:12:1:7.69%:1,11,0,1
-chr1 51440056 . T G . PASS DP=22;SOMATIC;SS=2;SSC=1;GPV=1E0;SPV=6.3636E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:8:8:0:0%:1,7,0,0 0/1:.:14:13:1:7.14%:1,12,0,1
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad varscan.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan.py Tue Dec 04 05:15:50 2018 -0500
[
b'@@ -0,0 +1,1178 @@\n+#!/usr/bin/env python3\n+from __future__ import print_function\n+\n+import argparse\n+import io\n+import os\n+import subprocess\n+import sys\n+import tempfile\n+import time\n+from contextlib import ExitStack\n+from functools import partial\n+from threading import Thread\n+\n+import pysam\n+\n+\n+class VariantCallingError (RuntimeError):\n+    """Exception class for issues with samtools and varscan subprocesses."""\n+\n+    def __init__(self, message=None, call=\'\', error=\'\'):\n+        self.message = message\n+        self.call = call.strip()\n+        self.error = error.strip()\n+\n+    def __str__(self):\n+        if self.message is None:\n+            return \'\'\n+        if self.error:\n+            msg_header = \'"{0}" failed with:\\n{1}\\n\\n\'.format(\n+                self.call, self.error\n+            )\n+        else:\n+            msg_header = \'{0} failed.\\n\'\n+            \'No further information about this error is available.\\n\\n\'.format(\n+                self.call\n+            )\n+        return msg_header + self.message\n+\n+\n+class VarScanCaller (object):\n+    def __init__(self, ref_genome, bam_input_files,\n+                 max_depth=None,\n+                 min_mapqual=None, min_basequal=None,\n+                 threads=1, verbose=False, quiet=True\n+                 ):\n+        self.ref_genome = ref_genome\n+        self.bam_input_files = bam_input_files\n+        self.max_depth = max_depth\n+        self.min_mapqual = min_mapqual\n+        self.min_basequal = min_basequal\n+        self.threads = threads\n+        self.verbose = verbose\n+        self.quiet = quiet\n+\n+        with pysam.FastaFile(ref_genome) as ref_fa:\n+            self.ref_contigs = ref_fa.references\n+            self.ref_lengths = ref_fa.lengths\n+\n+        self.pileup_engine = [\'samtools\', \'mpileup\']\n+        self.varcall_engine = [\'varscan\', \'somatic\']\n+        self.requires_stdout_redirect = False\n+        self.TemporaryContigVCF = partial(\n+            tempfile.NamedTemporaryFile,\n+            mode=\'wb\', suffix=\'\', delete=False, dir=os.getcwd()\n+        )\n+        self.tmpfiles = []\n+\n+    def _get_pysam_pileup_args(self):\n+        param_dict = {}\n+        if self.max_depth is not None:\n+            param_dict[\'max_depth\'] = self.max_depth\n+        if self.min_mapqual is not None:\n+            param_dict[\'min_mapping_quality\'] = self.min_mapqual\n+        if self.min_basequal is not None:\n+            param_dict[\'min_base_quality\'] = self.min_basequal\n+        param_dict[\'compute_baq\'] = False\n+        param_dict[\'stepper\'] = \'samtools\'\n+        return param_dict\n+\n+    def varcall_parallel(self, normal_purity=None, tumor_purity=None,\n+                         min_coverage=None,\n+                         min_var_count=None,\n+                         min_var_freq=None, min_hom_freq=None,\n+                         p_value=None, somatic_p_value=None,\n+                         threads=None, verbose=None, quiet=None\n+                         ):\n+        if not threads:\n+            threads = self.threads\n+        if verbose is None:\n+            verbose = self.verbose\n+        if quiet is None:\n+            quiet = self.quiet\n+        # mapping of method parameters to varcall engine command line options\n+        varcall_engine_option_mapping = [\n+            (\'--normal-purity\', normal_purity),\n+            (\'--tumor-purity\', tumor_purity),\n+            (\'--min-coverage\', min_coverage),\n+            (\'--min-reads2\', min_var_count),\n+            (\'--min-var-freq\', min_var_freq),\n+            (\'--min-freq-for-hom\', min_hom_freq),\n+            (\'--p-value\', p_value),\n+            (\'--somatic-p-value\', somatic_p_value),\n+            (\'--min-avg-qual\', self.min_basequal)\n+        ]\n+        varcall_engine_options = []\n+        for option, value in varcall_engine_option_mapping:\n+            if value is not None:\n+                varcall_engine_options += [option, str(value)]\n+        pileup_engine_options = [\'-B\']\n+        if self.max_depth is not None:\n+            pileup_engine'..b"efault=0.1,\n+        help='Minimum average relative distance of site from the effective '\n+             '3\\'end of ref-supporting reads (default: 0.1)'\n+    )\n+    filter_group.add_argument(\n+        '--min-var-dist3',\n+        dest='min_var_dist3', type=float,\n+        default=0.1,\n+        help='Minimum average relative distance of site from the effective '\n+             '3\\'end of variant-supporting reads (default: 0.1)'\n+    )\n+    filter_group.add_argument(\n+        '--min-ref-len',\n+        dest='min_ref_len', type=int,\n+        default=90,\n+        help='Minimum average trimmed length of reads supporting the ref '\n+             'allele (default: 90)'\n+    )\n+    filter_group.add_argument(\n+        '--min-var-len',\n+        dest='min_var_len', type=int,\n+        default=90,\n+        help='Minimum average trimmed length of reads supporting the variant '\n+             'allele (default: 90)'\n+    )\n+    filter_group.add_argument(\n+        '--max-len-diff',\n+        dest='max_relative_len_diff', type=float,\n+        default=0.25,\n+        help='Maximum average relative read length difference (ref - var; '\n+             'default: 0.25)'\n+    )\n+    filter_group.add_argument(\n+        '--min-strandedness',\n+        dest='min_strandedness', type=float,\n+        default=0.01,\n+        help='Minimum fraction of variant reads from each strand '\n+             '(default: 0.01)'\n+    )\n+    filter_group.add_argument(\n+        '--min-strand-reads',\n+        dest='min_strand_reads', type=int,\n+        default=5,\n+        help='Minimum allele depth required to run --min-strandedness filter '\n+             '(default: 5)'\n+    )\n+    filter_group.add_argument(\n+        '--min-ref-basequal',\n+        dest='min_ref_basequal', type=int,\n+        default=15,\n+        help='Minimum average base quality for the ref allele (default: 15)'\n+    )\n+    filter_group.add_argument(\n+        '--min-var-basequal',\n+        dest='min_var_basequal', type=int,\n+        default=15,\n+        help='Minimum average base quality for the variant allele '\n+             '(default: 15)'\n+    )\n+    filter_group.add_argument(\n+        '--max-basequal-diff',\n+        dest='max_basequal_diff', type=int,\n+        default=50,\n+        help='Maximum average base quality diff (ref - var; default: 50)'\n+    )\n+    filter_group.add_argument(\n+        '--min-ref-mapqual',\n+        dest='min_ref_mapqual', type=int,\n+        default=15,\n+        help='Minimum average mapping quality of reads supporting the ref '\n+             'allele (default: 15)'\n+    )\n+    filter_group.add_argument(\n+        '--min-var-mapqual',\n+        dest='min_var_mapqual', type=int,\n+        default=15,\n+        help='Minimum average mapping quality of reads supporting the variant '\n+             'allele (default: 15)'\n+    )\n+    filter_group.add_argument(\n+        '--max-mapqual-diff',\n+        dest='max_mapqual_diff', type=int,\n+        default=50,\n+        help='Maximum average mapping quality difference (ref - var; '\n+             'default: 50)'\n+    )\n+    filter_group.add_argument(\n+        '--max-ref-mmqs',\n+        dest='max_ref_mmqs', type=int,\n+        default=100,\n+        help='Maximum mismatch quality sum of reads supporting the ref '\n+             'allele (default: 100)'\n+    )\n+    filter_group.add_argument(\n+        '--max-var-mmqs',\n+        dest='max_var_mmqs', type=int,\n+        default=100,\n+        help='Maximum mismatch quality sum of reads supporting the variant '\n+             'allele (default: 100)'\n+    )\n+    filter_group.add_argument(\n+        '--min-mmqs-diff',\n+        dest='min_mmqs_diff', type=int,\n+        default=0,\n+        help='Minimum mismatch quality sum difference (var - ref; default: 0)'\n+    )\n+    filter_group.add_argument(\n+        '--max-mmqs-diff',\n+        dest='max_mmqs_diff', type=int,\n+        default=50,\n+        help='Maximum mismatch quality sum difference (var - ref; default: 50)'\n+    )\n+    args = vars(p.parse_args())\n+    varscan_call(**args)\n"
b
diff -r 31a38ce7e8ae -r 2fe9ebb98aad varscan_somatic.xml
--- a/varscan_somatic.xml Sun Jul 15 09:19:25 2018 -0400
+++ b/varscan_somatic.xml Tue Dec 04 05:15:50 2018 -0500
[
b'@@ -1,120 +1,534 @@\n <tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.1">\n-    <description>Call germline/somatic variants from tumor-normal pileups</description>\n+    <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description>\n     <macros>\n         <import>macros.xml</import>\n+        <macro name="test_mentions_contig">\n+            <assert_contents>\n+                <has_line_matching\n+                expression="##contig=.ID=chrM,length=16571." />\n+            </assert_contents>\n+        </macro>\n+        <macro name="test_mentions_filters">\n+            <assert_contents>\n+                <has_line_matching\n+                expression="##FILTER=.ID=VarCount,Description=.+" />\n+                <has_line_matching\n+                expression="##FILTER=.ID=ReadLenDiff,Description=.+" />\n+                <has_line_matching\n+                expression="##FILTER=.ID=RefDist3,Description=.+" />\n+            </assert_contents>\n+        </macro>\n+        <macro name="test_not_mentions_filters">\n+            <assert_contents>\n+                <not_has_text\n+                text="##FILTER=&lt;ID=VarCount,Description=" />\n+                <not_has_text\n+                text="##FILTER=&lt;ID=ReadLenDiff,Description=" />\n+                <not_has_text\n+                text="##FILTER=&lt;ID=RefDist3,Description=" />\n+            </assert_contents>\n+        </macro>\n     </macros>\n-    <expand macro="requirements" />\n-    <expand macro="stdio" />\n+    <expand macro="requirements">\n+        <requirement type="package" version="3.6.7">python</requirement>\n+        <requirement type="package" version="0.15.1">pysam</requirement>\n+    </expand>\n+    <stdio>\n+        <exit_code range="1:" />\n+    </stdio>\n     <command><![CDATA[\n-        varscan somatic\n-            @INPUT_PILEUPS@\n-            --min-coverage ${min_coverage}\n-            --min-reads2 ${min_reads2}\n-            --min-avg-qual ${min_avg_qual}\n-            --min-var-freq ${min_var_freq}\n-            --min-freq-for-hom ${min_freq_for_hom}\n+        #if str($reference.source) == "history":\n+            #set ref_genome = \'ref.fa\'\n+            ln -s -f \'$reference.genome\' $ref_genome &&\n+        #else:\n+            #set ref_genome = \'$reference.genome.fields.path\'\n+        #end if\n+        #set normal_data = \'normal.bam\'\n+        #set tumor_data = \'tumor.bam\'\n+        ln -s -f \'$normal_bam\' $normal_data &&\n+        ln -s -f \'$tumor_bam\' $tumor_data &&\n+        ln -s -f \'${normal_bam.metadata.bam_index}\' ${normal_data}.bai &&\n+        ln -s -f \'${tumor_bam.metadata.bam_index}\' ${tumor_data}.bai &&\n+        python3 $__tool_directory__/varscan.py\n+            --normal \'$normal_data\'\n+            --tumor \'$tumor_data\'\n             --normal-purity ${normal_purity}\n             --tumor-purity ${tumor_purity}\n-            --tumor-purity ${tumor_purity}\n-            --min-coverage-normal ${min_coverage_normal}\n-            --somatic-p-value ${somatic_p_value}\n-            --p-value ${p_value}\n-            #if str($strand_filter) == \'yes\':\n-              --strand-filter 1\n+            #if str($split_output):\n+                --ofile variants_out\n+                $split_output\n+            #else:\n+                --ofile \'$output\'\n+            #end if\n+            --threads \\${GALAXY_SLOTS:-2}\n+            #if str($call_params.settings) == "custom":\n+                ## samtools mpileup parameters\n+                --min-basequal ${call_params.min_avg_qual}\n+                --min-mapqual ${call_params.min_mapqual}\n+                ## VarScan parameters\n+                --min-coverage ${call_params.min_coverage}\n+                --min-var-count ${call_params.min_reads2}\n+                --min-var-freq ${call_params.min_var_freq}\n+                --min-hom-freq ${call_params.min_freq_for_hom}\n+                --p-value ${call_params.p_value}\n+                --somatic-p-value ${call_params.somatic_p_value}\n             #end if\n-\n-            '..b'ant is found in both samples =>\n+germline mutation event) and **LOH** (variant is found in both samples, but\n+only the tumor sample appears to be homozygous for it => loss of heterozygosity\n+event).\n+This classification is encoded in the variant ``INFO`` fields of the VCF output\n+produced by the tool in the form of a status code ``SS`` (somatic status),\n+where:\n+\n+- ``SS=1`` signifies a likely germline variant,\n+- ``SS=2`` a somatic variant\n+- ``SS=3`` a LOH variant\n \n-.. _VarScan: http://dkoboldt.github.io/varscan/\n-.. _online: http://dkoboldt.github.io/varscan/using-varscan.html\n+In addition, ``SS=0`` indicates a possible variant, but with insufficient\n+evidence for an, at least, heterozygous state in either individual sample, and\n+``SS=5`` is used for variants of unexplained origin (*e.g.*, variants found in\n+the normal, but not in the tumor tissue sample).\n+\n+In a second step, following variant calling, the tool can try to detect likely\n+false-positive calls by re-inspecting the data at the variant sites more\n+carefully and looking for signs that may indicate problems with the\n+sequencing data or its mapping. If a called variant is deemed a possible\n+false-positive at this step, this gets indicated in the ``FILTER`` field of the\n+variant record in the VCF output. For high confidence variants passing all\n+posterior (applied after variant calling) filters the value of the field will\n+be ``PASS``, for variants failing any of the posterior filters the value will\n+be a ``;``-separated list of the problematic filters.\n+\n \n **Input**\n \n-::\n-\n-  mpileup file - The SAMtools mpileup files for the normal and tumor tissue\n- \n+The tool takes as input a reference genome (in fasta format) and a pair of\n+aligned reads datasets (bam format).\n \n **Output**\n \n-VarScan produces a VCF 4.1 dataset as output.\n+A VCF dataset of called variants. When asked to *Generate separate output\n+datasets for SNP and indel calls*, the tool will behave like the\n+``varscan somatic`` command line tool and produce two VCF datasets - one with\n+just the single nucleotide variants, while the other one will store\n+insertion/deletion variants.\n+\n+**Options**\n+\n+*Estimated purity of normal sample / of tumor sample*\n+\n+Since, in practice, it is often impossible to isolate tissue samples without\n+contamination from surrounding tissue or from invading cells, these two fields\n+let you indicate your estimate of the purity of the two samples (as fractions\n+between 0 and 1, where 1 would indicate a contamination-free sample and 0.5 a\n+sample to which the desired tissue contributes only 50%, while the other 50%\n+consist of cells from the other tissue type).\n+\n+*Settings for Variant Calling*\n \n+Settings in this section will affect the steps of variant calling and\n+classification. You can accept VarScan\'s default values for the corresponding\n+parameters or customize them according to your needs.\n \n+*Settings for Posterior Variant Filtering*\n+\n+Use the parameters in this section to configure the false-positive filtering\n+step that follows variant calling and classification. These settings will not\n+influence the number of variants detected nor their classification, but may\n+change the ``FILTER`` field of variant records to indicate which variants\n+failed to pass certain filters. You can use this information with downstream\n+tools to exclude certain variants from further analysis steps or include only\n+high confidence variants that passed all filters (those with ``PASS`` as their\n+``INFO`` field value. You can accept the orignal filter defaults of the\n+``varscan fpfilter`` command line tool, use the settings established for the\n+tool in the `DREAM3 challenge`_, or choose to customize the settings.\n+Alternatively, you can also choose to skip posterior filtering entirely, in\n+which case all variants will have their ``INFO`` field set to ``PASS``.\n+\n+.. _DREAM3 challenge: https://www.synapse.org/#!Synapse:syn312572/wiki/58893\n     </help>\n     <expand macro="citations" />\n </tool>\n'