Previous changeset 1:31a38ce7e8ae (2018-07-15) Next changeset 3:7903598ccbaf (2018-12-04) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit 30867f1f022bed18ba1c3b8dc9c54226890b3a9c |
modified:
macros.xml varscan_somatic.xml |
added:
test-data/control_chrM.bam test-data/hg19_chrM.fa test-data/tumor_chrM.bam varscan.py |
removed:
test-data/varscan_somatic_indel_result1.vcf test-data/varscan_somatic_indel_result2.vcf test-data/varscan_somatic_snp_result1.vcf test-data/varscan_somatic_snp_result2.vcf |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad macros.xml --- a/macros.xml Sun Jul 15 09:19:25 2018 -0400 +++ b/macros.xml Tue Dec 04 05:15:50 2018 -0500 |
[ |
@@ -2,7 +2,6 @@ <xml name="requirements"> <requirements> <requirement type="package" version="@VERSION@">varscan</requirement> - <requirement type="package" version="4.2.1">gawk</requirement> <yield/> </requirements> </xml> @@ -20,6 +19,7 @@ <xml name="citations"> <citations> <citation type="doi">10.1101/gr.129684.111</citation> + <citation type="doi">10.1002/0471250953.bi1504s44</citation> </citations> </xml> @@ -52,29 +52,34 @@ </token> - <xml name="min_coverage"> + <xml name="min_coverage" token_help="Minimum depth at a position to make a call"> <param argument="--min-coverage" name="min_coverage" type="integer" value="8" min="1" max="200" - label="Minimum read depth" help="Minimum depth at a position to make a call"/> + label="Minimum coverage" help="@HELP@"/> </xml> <xml name="min_reads2"> <param argument="--min-reads2" name="min_reads2" type="integer" value="2" min="1" max="200" - label="Minimum supporting reads" help="Minimum supporting reads at a position to make a call"/> + label="Minimum supporting reads" help="Minimum number of variant-supporting reads at a position required to make a call"/> </xml> <xml name="min_avg_qual"> <param argument="--min-avg-qual" name="min_avg_qual" type="integer" value="15" min="1" max="50" - label="Minimum base quality at a position to count a read"/> + label="Minimum base quality" + help="The minimum base quality at the variant position required to use a read for calling" /> </xml> <xml name="min_var_freq" token_value="0.01"> <param argument="--min-var-freq" name="min_var_freq" type="float" value="@VALUE@" min="0" max="1" - label="Minimum variant allele frequency threshold"/> + label="Minimum variant allele frequency" + help="Minimum variant allele frequency required for calling a variant"/> </xml> <xml name="min_freq_for_hom"> <param argument="--min-freq-for-hom" name="min_freq_for_hom" type="float" value="0.75" min="0" max="1" - label="Minimum frequency to call homozygote"/> + label="Minimum homozygous variant allele frequency" + help="Minimum variant allele frequency required for calling a homozygous genotype" /> </xml> - <xml name="p_value" token_label="p-value threshold for calling variants" token_value="0.01"> - <param argument="--p-value" name="p_value" type="float" value="@VALUE@" min="0.0" max="1.0" - label="@LABEL@"/> + <xml name="p_value" token_value="0.01" + token_label="p-value threshold for calling variants" + token_help=""> + <param argument="--p-value" name="p_value" type="float" value="@VALUE@" min="0" max="1" + label="@LABEL@" help="@HELP@"/> </xml> <xml name="strand_filter"> <param name="strand_filter" type="select" label="Ignore variants with >90% support on one strand"> @@ -83,4 +88,12 @@ </param> </xml> + <token name="@HELP_HEADER@"><![CDATA[ +**VarScan Overview** + +VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. Full documentation of the command line package is available here_. + +.. _VarScan: http://dkoboldt.github.io/varscan/ +.. _here: http://dkoboldt.github.io/varscan/using-varscan.html + ]]></token> </macros> |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/control_chrM.bam |
b |
Binary file test-data/control_chrM.bam has changed |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/hg19_chrM.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hg19_chrM.fa Tue Dec 04 05:15:50 2018 -0500 |
b |
b'@@ -0,0 +1,333 @@\n+>chrM\n+GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT\n+TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG\n+GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT\n+CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA\n+AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT\n+GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA\n+AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGC\n+CAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAAT\n+TTTATCTTTAGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACA\n+TTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCC\n+GCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAAC\n+CAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA\n+AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAA\n+ATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGC\n+AAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAG\n+GGACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCC\n+ACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAAACGAAAGT\n+TTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACC\n+GCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTT\n+TAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAA\n+CTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAAC\n+ACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGC\n+CCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGA\n+GCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGA\n+GGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGC\n+TCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGT\n+AAGCGCAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGG\n+TGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTT\n+ATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGT\n+GCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCC\n+TCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATA\n+GAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACG\n+AACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGATTT\n+CAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCAC\n+CTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGG\n+CGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGA\n+TGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTT\n+CTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAA\n+GACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCC\n+GTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCT\n+ACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTT\n+TAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAG\n+TCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGA\n+GTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGA\n+AAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAACATATAACT\n+GAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAA\n+TGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCA\n+GATCAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCA\n+ACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAG\n+GAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTACCCCGCCTGTT\n+TACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCC\n+CAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAaaggtagca\n+taatcacttgttccttaaatagggacctgtatgaatggctccacgagggt\n+tcagctgtctcttacttttaaccagtgaaattgacctgcccgtgaagagg\n+cgggcatgacacagcaagacgagaagaccctatggagctttaatttaTTA\n+ATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCA\n+TTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCA\n+GTACATGCTAAGACTTCACCAGTCAAAGCGAACTACTATACTCAATTGAT\n+CCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATC\n+CTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGAT\n+CAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGA\n+TTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTT\n+CTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCC\n+TACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTAT\n+TATACCCACACCCACCCAAGAACAGGGTTTgttaagatggcagagcccgg\n+taatcgcataaaacttaaaactttacagtcagaggttcaattcctcttct\n+taacaacaTACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAA\n+TCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATA\n+CAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACC\n+CTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCA\n+CATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATC\n+GCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCT\n+CAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACT\n+CAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGC\n+GCACTGCGAGCAGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCAT\n+CATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCC\n+TTATCACAACACAAGAACACC'..b'TTAGTTACCGCTAACAACCTATT\n+CCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCA\n+TCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCA\n+GTCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATG\n+ATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAA\n+ACGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCA\n+GGCAAATCAGCCCAATTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGA\n+AGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAG\n+CAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCA\n+CTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGC\n+AGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCT\n+CCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAA\n+CCACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCAT\n+ACTATTTATGTGCTCCGGGTCCATCATCCACAACCTTAACAATGAACAAG\n+ATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTCACTTCAACC\n+TCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGG\n+TTTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAA\n+ACGCCTGAGCCCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCC\n+TATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCC\n+CACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTA\n+AACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATTACTAACAAC\n+ATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACT\n+CACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCA\n+ACTACCTAACCAACAAACTTAAAATAAAATCCCCACTATGCACATTTTAT\n+TTCTCCAACATACTCGGATTCTACCCTAGCATCACACACCGCACAATCCC\n+CTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACC\n+TAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATC\n+TCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTT\n+CCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAAC\n+CTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGT\n+TCAACCAGTAACCACTACTAATCAACGCCCATAATCATACAAAGCCCCCG\n+CACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATT\n+ATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATA\n+CTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAA\n+CACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCA\n+ATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATA\n+AATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAA\n+TAACACACCCGACCACACCGCTAACAATCAGTACTAAACCCCCATAAATA\n+GGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACT\n+CAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGA\n+CCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATG\n+ACCCCAATACGCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATT\n+CATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCT\n+CACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTA\n+GCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCA\n+CATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCA\n+ATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGC\n+CTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTAT\n+CCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGT\n+GAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCC\n+GCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTA\n+CTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCT\n+TACCCTTCATTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCAC\n+GAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAAT\n+CACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCT\n+TCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGC\n+GACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACAT\n+CAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCC\n+CTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTA\n+GCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCG\n+CCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTC\n+TAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGA\n+CAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAAC\n+TATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTA\n+TAAACTAATACACCAGTCTTGTAAACCGGAGACGAAAACCTTTTTCCAAG\n+GACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAA\n+GATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAGATTTGGG\n+TACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTAC\n+ATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCA\n+CCTGTAGTACATAAAAACCCAACCCACATCAAACCCCCCCCCCCCATGCT\n+TACAAGCAAGTACAGCAATCAACCTTCAACTATCACACATCAACTGCAAC\n+TCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTT\n+AACAGTACATAGTACATAAAGTCATTTACCGTACATAGCACATTACAGTC\n+AAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTG\n+ACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCT\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGAC\n+ATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTCCC\n+CTTAAATAAGACATCACGATG\n' |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/tumor_chrM.bam |
b |
Binary file test-data/tumor_chrM.bam has changed |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/varscan_somatic_indel_result1.vcf --- a/test-data/varscan_somatic_indel_result1.vcf Sun Jul 15 09:19:25 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,18 +0,0 @@ -##fileformat=VCFv4.1 -##source=VarScan2 -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases"> -##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation"> -##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)"> -##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value"> -##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls"> -##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls"> -##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand"> -##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)"> -##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)"> -##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency"> -##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/varscan_somatic_indel_result2.vcf --- a/test-data/varscan_somatic_indel_result2.vcf Sun Jul 15 09:19:25 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,18 +0,0 @@ -##fileformat=VCFv4.1 -##source=VarScan2 -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases"> -##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation"> -##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)"> -##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value"> -##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls"> -##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls"> -##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand"> -##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)"> -##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)"> -##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency"> -##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/varscan_somatic_snp_result1.vcf --- a/test-data/varscan_somatic_snp_result1.vcf Sun Jul 15 09:19:25 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,41 +0,0 @@ -##fileformat=VCFv4.1 -##source=VarScan2 -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases"> -##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation"> -##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)"> -##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value"> -##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls"> -##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls"> -##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand"> -##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)"> -##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)"> -##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency"> -##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR -chr1 51436072 . C A . PASS DP=47;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.4681E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:26:26:0:0%:23,3,0,0 0/1:.:21:20:1:4.76%:19,1,1,0 -chr1 51436311 . T C . PASS DP=16;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.375E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:7:6:1:14.29%:0,6,0,1 -chr1 51436320 . G A . PASS DP=19;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2632E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:10:9:1:10%:0,9,0,1 -chr1 51439628 . T C . str10 DP=237;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.8101E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:123:123:0:0%:77,46,0,0 0/1:.:114:113:1:0.88%:74,39,0,1 -chr1 51439638 . G A . str10 DP=234;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.9145E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:119:119:0:0%:72,47,0,0 0/1:.:115:114:1:0.87%:75,39,0,1 -chr1 51439665 . C T . PASS DP=226;SOMATIC;SS=2;SSC=9;GPV=1E0;SPV=1.2006E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:114:114:0:0%:56,58,0,0 0/1:.:112:109:3:2.68%:63,46,2,1 -chr1 51439671 . G A . str10 DP=222;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.045E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:110:110:0:0%:53,57,0,0 0/1:.:112:111:1:0.89%:59,52,1,0 -chr1 51439684 . G T . str10 DP=210;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.9524E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:106:106:0:0%:51,55,0,0 0/1:.:104:103:1:0.96%:53,50,1,0 -chr1 51439703 . C T . str10 DP=202;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.099E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:99:99:0:0%:46,53,0,0 0/1:.:103:102:1:0.97%:48,54,0,1 -chr1 51439705 . G T . str10 DP=204;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1961E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:98:98:0:0%:42,56,0,0 0/1:.:106:105:1:0.94%:48,57,0,1 -chr1 51439706 . G T . str10 DP=201;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1741E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:97:97:0:0%:41,56,0,0 0/1:.:104:103:1:0.96%:46,57,1,0 -chr1 51439726 . C G . str10 DP=187;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1872E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:90:90:0:0%:37,53,0,0 0/1:.:97:96:1:1.03%:44,52,1,0 -chr1 51439751 . C G . str10 DP=168;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.3293E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:79:78:0:0%:28,50,0,0 0/1:.:89:88:1:1.12%:35,53,0,1 -chr1 51439763 . G A . PASS DP=159;SOMATIC;SS=2;SSC=5;GPV=1E0;SPV=2.7092E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:76:76:0:0%:34,42,0,0 0/1:.:83:81:2:2.41%:32,49,1,1 -chr1 51439766 . G T . str10 DP=154;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1299E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:75:75:0:0%:34,41,0,0 0/1:.:79:78:1:1.27%:30,48,0,1 -chr1 51439788 . T C . str10 DP=136;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1471E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:66:66:0:0%:21,45,0,0 0/1:.:70:69:1:1.43%:24,45,1,0 -chr1 51439828 . G A . str10 DP=122;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.7377E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:52:52:0:0%:14,38,0,0 0/1:.:70:69:1:1.43%:27,42,0,1 -chr1 51439832 . C G . str10 DP=125;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.52E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:56:56:0:0%:14,42,0,0 0/1:.:69:68:1:1.45%:25,43,0,1 -chr1 51439876 . T G . str10 DP=105;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.619E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:46:46:0:0%:10,36,0,0 0/1:.:59:58:1:1.69%:23,35,0,1 -chr1 51439882 . G T . str10 DP=105;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2381E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:50:50:0:0%:13,37,0,0 0/1:.:55:54:1:1.82%:22,32,0,1 -chr1 51439889 . G T . str10 DP=97;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1546E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:47:47:0:0%:14,33,0,0 0/1:.:50:49:1:2%:21,28,0,1 -chr1 51439953 . G T . str10 DP=59;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.9322E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:24:24:0:0%:5,19,0,0 0/1:.:35:34:1:2.86%:7,27,0,1 -chr1 51440035 . G T . PASS DP=21;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=6.1905E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:8:8:0:0%:1,7,0,0 0/1:.:13:12:1:7.69%:1,11,0,1 |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad test-data/varscan_somatic_snp_result2.vcf --- a/test-data/varscan_somatic_snp_result2.vcf Sun Jul 15 09:19:25 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,26 +0,0 @@ -##fileformat=VCFv4.1 -##source=VarScan2 -##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases"> -##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation"> -##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)"> -##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value"> -##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls"> -##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls"> -##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand"> -##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> -##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)"> -##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)"> -##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency"> -##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR -chr1 51436072 . C A . PASS DP=47;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.4681E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:26:26:0:0%:23,3,0,0 0/1:.:21:20:1:4.76%:19,1,1,0 -chr1 51436311 . T C . PASS DP=16;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.375E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:7:6:1:14.29%:0,6,0,1 -chr1 51436320 . G A . PASS DP=19;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2632E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:10:9:1:10%:0,9,0,1 -chr1 51439665 . C T . PASS DP=226;SOMATIC;SS=2;SSC=9;GPV=1E0;SPV=1.2006E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:114:114:0:0%:56,58,0,0 0/1:.:112:109:3:2.68%:63,46,2,1 -chr1 51439763 . G A . PASS DP=159;SOMATIC;SS=2;SSC=5;GPV=1E0;SPV=2.7092E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:76:76:0:0%:34,42,0,0 0/1:.:83:81:2:2.41%:32,49,1,1 -chr1 51440025 . A C . PASS DP=27;SOMATIC;SS=2;SSC=1;GPV=1E0;SPV=6.6667E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:9:9:0:0%:1,8,0,0 0/1:.:18:17:1:5.56%:1,16,0,1 -chr1 51440035 . G T . PASS DP=21;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=6.1905E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:8:8:0:0%:1,7,0,0 0/1:.:13:12:1:7.69%:1,11,0,1 -chr1 51440056 . T G . PASS DP=22;SOMATIC;SS=2;SSC=1;GPV=1E0;SPV=6.3636E-1 GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:8:8:0:0%:1,7,0,0 0/1:.:14:13:1:7.14%:1,12,0,1 |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad varscan.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varscan.py Tue Dec 04 05:15:50 2018 -0500 |
[ |
b'@@ -0,0 +1,1178 @@\n+#!/usr/bin/env python3\n+from __future__ import print_function\n+\n+import argparse\n+import io\n+import os\n+import subprocess\n+import sys\n+import tempfile\n+import time\n+from contextlib import ExitStack\n+from functools import partial\n+from threading import Thread\n+\n+import pysam\n+\n+\n+class VariantCallingError (RuntimeError):\n+ """Exception class for issues with samtools and varscan subprocesses."""\n+\n+ def __init__(self, message=None, call=\'\', error=\'\'):\n+ self.message = message\n+ self.call = call.strip()\n+ self.error = error.strip()\n+\n+ def __str__(self):\n+ if self.message is None:\n+ return \'\'\n+ if self.error:\n+ msg_header = \'"{0}" failed with:\\n{1}\\n\\n\'.format(\n+ self.call, self.error\n+ )\n+ else:\n+ msg_header = \'{0} failed.\\n\'\n+ \'No further information about this error is available.\\n\\n\'.format(\n+ self.call\n+ )\n+ return msg_header + self.message\n+\n+\n+class VarScanCaller (object):\n+ def __init__(self, ref_genome, bam_input_files,\n+ max_depth=None,\n+ min_mapqual=None, min_basequal=None,\n+ threads=1, verbose=False, quiet=True\n+ ):\n+ self.ref_genome = ref_genome\n+ self.bam_input_files = bam_input_files\n+ self.max_depth = max_depth\n+ self.min_mapqual = min_mapqual\n+ self.min_basequal = min_basequal\n+ self.threads = threads\n+ self.verbose = verbose\n+ self.quiet = quiet\n+\n+ with pysam.FastaFile(ref_genome) as ref_fa:\n+ self.ref_contigs = ref_fa.references\n+ self.ref_lengths = ref_fa.lengths\n+\n+ self.pileup_engine = [\'samtools\', \'mpileup\']\n+ self.varcall_engine = [\'varscan\', \'somatic\']\n+ self.requires_stdout_redirect = False\n+ self.TemporaryContigVCF = partial(\n+ tempfile.NamedTemporaryFile,\n+ mode=\'wb\', suffix=\'\', delete=False, dir=os.getcwd()\n+ )\n+ self.tmpfiles = []\n+\n+ def _get_pysam_pileup_args(self):\n+ param_dict = {}\n+ if self.max_depth is not None:\n+ param_dict[\'max_depth\'] = self.max_depth\n+ if self.min_mapqual is not None:\n+ param_dict[\'min_mapping_quality\'] = self.min_mapqual\n+ if self.min_basequal is not None:\n+ param_dict[\'min_base_quality\'] = self.min_basequal\n+ param_dict[\'compute_baq\'] = False\n+ param_dict[\'stepper\'] = \'samtools\'\n+ return param_dict\n+\n+ def varcall_parallel(self, normal_purity=None, tumor_purity=None,\n+ min_coverage=None,\n+ min_var_count=None,\n+ min_var_freq=None, min_hom_freq=None,\n+ p_value=None, somatic_p_value=None,\n+ threads=None, verbose=None, quiet=None\n+ ):\n+ if not threads:\n+ threads = self.threads\n+ if verbose is None:\n+ verbose = self.verbose\n+ if quiet is None:\n+ quiet = self.quiet\n+ # mapping of method parameters to varcall engine command line options\n+ varcall_engine_option_mapping = [\n+ (\'--normal-purity\', normal_purity),\n+ (\'--tumor-purity\', tumor_purity),\n+ (\'--min-coverage\', min_coverage),\n+ (\'--min-reads2\', min_var_count),\n+ (\'--min-var-freq\', min_var_freq),\n+ (\'--min-freq-for-hom\', min_hom_freq),\n+ (\'--p-value\', p_value),\n+ (\'--somatic-p-value\', somatic_p_value),\n+ (\'--min-avg-qual\', self.min_basequal)\n+ ]\n+ varcall_engine_options = []\n+ for option, value in varcall_engine_option_mapping:\n+ if value is not None:\n+ varcall_engine_options += [option, str(value)]\n+ pileup_engine_options = [\'-B\']\n+ if self.max_depth is not None:\n+ pileup_engine'..b"efault=0.1,\n+ help='Minimum average relative distance of site from the effective '\n+ '3\\'end of ref-supporting reads (default: 0.1)'\n+ )\n+ filter_group.add_argument(\n+ '--min-var-dist3',\n+ dest='min_var_dist3', type=float,\n+ default=0.1,\n+ help='Minimum average relative distance of site from the effective '\n+ '3\\'end of variant-supporting reads (default: 0.1)'\n+ )\n+ filter_group.add_argument(\n+ '--min-ref-len',\n+ dest='min_ref_len', type=int,\n+ default=90,\n+ help='Minimum average trimmed length of reads supporting the ref '\n+ 'allele (default: 90)'\n+ )\n+ filter_group.add_argument(\n+ '--min-var-len',\n+ dest='min_var_len', type=int,\n+ default=90,\n+ help='Minimum average trimmed length of reads supporting the variant '\n+ 'allele (default: 90)'\n+ )\n+ filter_group.add_argument(\n+ '--max-len-diff',\n+ dest='max_relative_len_diff', type=float,\n+ default=0.25,\n+ help='Maximum average relative read length difference (ref - var; '\n+ 'default: 0.25)'\n+ )\n+ filter_group.add_argument(\n+ '--min-strandedness',\n+ dest='min_strandedness', type=float,\n+ default=0.01,\n+ help='Minimum fraction of variant reads from each strand '\n+ '(default: 0.01)'\n+ )\n+ filter_group.add_argument(\n+ '--min-strand-reads',\n+ dest='min_strand_reads', type=int,\n+ default=5,\n+ help='Minimum allele depth required to run --min-strandedness filter '\n+ '(default: 5)'\n+ )\n+ filter_group.add_argument(\n+ '--min-ref-basequal',\n+ dest='min_ref_basequal', type=int,\n+ default=15,\n+ help='Minimum average base quality for the ref allele (default: 15)'\n+ )\n+ filter_group.add_argument(\n+ '--min-var-basequal',\n+ dest='min_var_basequal', type=int,\n+ default=15,\n+ help='Minimum average base quality for the variant allele '\n+ '(default: 15)'\n+ )\n+ filter_group.add_argument(\n+ '--max-basequal-diff',\n+ dest='max_basequal_diff', type=int,\n+ default=50,\n+ help='Maximum average base quality diff (ref - var; default: 50)'\n+ )\n+ filter_group.add_argument(\n+ '--min-ref-mapqual',\n+ dest='min_ref_mapqual', type=int,\n+ default=15,\n+ help='Minimum average mapping quality of reads supporting the ref '\n+ 'allele (default: 15)'\n+ )\n+ filter_group.add_argument(\n+ '--min-var-mapqual',\n+ dest='min_var_mapqual', type=int,\n+ default=15,\n+ help='Minimum average mapping quality of reads supporting the variant '\n+ 'allele (default: 15)'\n+ )\n+ filter_group.add_argument(\n+ '--max-mapqual-diff',\n+ dest='max_mapqual_diff', type=int,\n+ default=50,\n+ help='Maximum average mapping quality difference (ref - var; '\n+ 'default: 50)'\n+ )\n+ filter_group.add_argument(\n+ '--max-ref-mmqs',\n+ dest='max_ref_mmqs', type=int,\n+ default=100,\n+ help='Maximum mismatch quality sum of reads supporting the ref '\n+ 'allele (default: 100)'\n+ )\n+ filter_group.add_argument(\n+ '--max-var-mmqs',\n+ dest='max_var_mmqs', type=int,\n+ default=100,\n+ help='Maximum mismatch quality sum of reads supporting the variant '\n+ 'allele (default: 100)'\n+ )\n+ filter_group.add_argument(\n+ '--min-mmqs-diff',\n+ dest='min_mmqs_diff', type=int,\n+ default=0,\n+ help='Minimum mismatch quality sum difference (var - ref; default: 0)'\n+ )\n+ filter_group.add_argument(\n+ '--max-mmqs-diff',\n+ dest='max_mmqs_diff', type=int,\n+ default=50,\n+ help='Maximum mismatch quality sum difference (var - ref; default: 50)'\n+ )\n+ args = vars(p.parse_args())\n+ varscan_call(**args)\n" |
b |
diff -r 31a38ce7e8ae -r 2fe9ebb98aad varscan_somatic.xml --- a/varscan_somatic.xml Sun Jul 15 09:19:25 2018 -0400 +++ b/varscan_somatic.xml Tue Dec 04 05:15:50 2018 -0500 |
[ |
b'@@ -1,120 +1,534 @@\n <tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.1">\n- <description>Call germline/somatic variants from tumor-normal pileups</description>\n+ <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description>\n <macros>\n <import>macros.xml</import>\n+ <macro name="test_mentions_contig">\n+ <assert_contents>\n+ <has_line_matching\n+ expression="##contig=.ID=chrM,length=16571." />\n+ </assert_contents>\n+ </macro>\n+ <macro name="test_mentions_filters">\n+ <assert_contents>\n+ <has_line_matching\n+ expression="##FILTER=.ID=VarCount,Description=.+" />\n+ <has_line_matching\n+ expression="##FILTER=.ID=ReadLenDiff,Description=.+" />\n+ <has_line_matching\n+ expression="##FILTER=.ID=RefDist3,Description=.+" />\n+ </assert_contents>\n+ </macro>\n+ <macro name="test_not_mentions_filters">\n+ <assert_contents>\n+ <not_has_text\n+ text="##FILTER=<ID=VarCount,Description=" />\n+ <not_has_text\n+ text="##FILTER=<ID=ReadLenDiff,Description=" />\n+ <not_has_text\n+ text="##FILTER=<ID=RefDist3,Description=" />\n+ </assert_contents>\n+ </macro>\n </macros>\n- <expand macro="requirements" />\n- <expand macro="stdio" />\n+ <expand macro="requirements">\n+ <requirement type="package" version="3.6.7">python</requirement>\n+ <requirement type="package" version="0.15.1">pysam</requirement>\n+ </expand>\n+ <stdio>\n+ <exit_code range="1:" />\n+ </stdio>\n <command><![CDATA[\n- varscan somatic\n- @INPUT_PILEUPS@\n- --min-coverage ${min_coverage}\n- --min-reads2 ${min_reads2}\n- --min-avg-qual ${min_avg_qual}\n- --min-var-freq ${min_var_freq}\n- --min-freq-for-hom ${min_freq_for_hom}\n+ #if str($reference.source) == "history":\n+ #set ref_genome = \'ref.fa\'\n+ ln -s -f \'$reference.genome\' $ref_genome &&\n+ #else:\n+ #set ref_genome = \'$reference.genome.fields.path\'\n+ #end if\n+ #set normal_data = \'normal.bam\'\n+ #set tumor_data = \'tumor.bam\'\n+ ln -s -f \'$normal_bam\' $normal_data &&\n+ ln -s -f \'$tumor_bam\' $tumor_data &&\n+ ln -s -f \'${normal_bam.metadata.bam_index}\' ${normal_data}.bai &&\n+ ln -s -f \'${tumor_bam.metadata.bam_index}\' ${tumor_data}.bai &&\n+ python3 $__tool_directory__/varscan.py\n+ --normal \'$normal_data\'\n+ --tumor \'$tumor_data\'\n --normal-purity ${normal_purity}\n --tumor-purity ${tumor_purity}\n- --tumor-purity ${tumor_purity}\n- --min-coverage-normal ${min_coverage_normal}\n- --somatic-p-value ${somatic_p_value}\n- --p-value ${p_value}\n- #if str($strand_filter) == \'yes\':\n- --strand-filter 1\n+ #if str($split_output):\n+ --ofile variants_out\n+ $split_output\n+ #else:\n+ --ofile \'$output\'\n+ #end if\n+ --threads \\${GALAXY_SLOTS:-2}\n+ #if str($call_params.settings) == "custom":\n+ ## samtools mpileup parameters\n+ --min-basequal ${call_params.min_avg_qual}\n+ --min-mapqual ${call_params.min_mapqual}\n+ ## VarScan parameters\n+ --min-coverage ${call_params.min_coverage}\n+ --min-var-count ${call_params.min_reads2}\n+ --min-var-freq ${call_params.min_var_freq}\n+ --min-hom-freq ${call_params.min_freq_for_hom}\n+ --p-value ${call_params.p_value}\n+ --somatic-p-value ${call_params.somatic_p_value}\n #end if\n-\n- '..b'ant is found in both samples =>\n+germline mutation event) and **LOH** (variant is found in both samples, but\n+only the tumor sample appears to be homozygous for it => loss of heterozygosity\n+event).\n+This classification is encoded in the variant ``INFO`` fields of the VCF output\n+produced by the tool in the form of a status code ``SS`` (somatic status),\n+where:\n+\n+- ``SS=1`` signifies a likely germline variant,\n+- ``SS=2`` a somatic variant\n+- ``SS=3`` a LOH variant\n \n-.. _VarScan: http://dkoboldt.github.io/varscan/\n-.. _online: http://dkoboldt.github.io/varscan/using-varscan.html\n+In addition, ``SS=0`` indicates a possible variant, but with insufficient\n+evidence for an, at least, heterozygous state in either individual sample, and\n+``SS=5`` is used for variants of unexplained origin (*e.g.*, variants found in\n+the normal, but not in the tumor tissue sample).\n+\n+In a second step, following variant calling, the tool can try to detect likely\n+false-positive calls by re-inspecting the data at the variant sites more\n+carefully and looking for signs that may indicate problems with the\n+sequencing data or its mapping. If a called variant is deemed a possible\n+false-positive at this step, this gets indicated in the ``FILTER`` field of the\n+variant record in the VCF output. For high confidence variants passing all\n+posterior (applied after variant calling) filters the value of the field will\n+be ``PASS``, for variants failing any of the posterior filters the value will\n+be a ``;``-separated list of the problematic filters.\n+\n \n **Input**\n \n-::\n-\n- mpileup file - The SAMtools mpileup files for the normal and tumor tissue\n- \n+The tool takes as input a reference genome (in fasta format) and a pair of\n+aligned reads datasets (bam format).\n \n **Output**\n \n-VarScan produces a VCF 4.1 dataset as output.\n+A VCF dataset of called variants. When asked to *Generate separate output\n+datasets for SNP and indel calls*, the tool will behave like the\n+``varscan somatic`` command line tool and produce two VCF datasets - one with\n+just the single nucleotide variants, while the other one will store\n+insertion/deletion variants.\n+\n+**Options**\n+\n+*Estimated purity of normal sample / of tumor sample*\n+\n+Since, in practice, it is often impossible to isolate tissue samples without\n+contamination from surrounding tissue or from invading cells, these two fields\n+let you indicate your estimate of the purity of the two samples (as fractions\n+between 0 and 1, where 1 would indicate a contamination-free sample and 0.5 a\n+sample to which the desired tissue contributes only 50%, while the other 50%\n+consist of cells from the other tissue type).\n+\n+*Settings for Variant Calling*\n \n+Settings in this section will affect the steps of variant calling and\n+classification. You can accept VarScan\'s default values for the corresponding\n+parameters or customize them according to your needs.\n \n+*Settings for Posterior Variant Filtering*\n+\n+Use the parameters in this section to configure the false-positive filtering\n+step that follows variant calling and classification. These settings will not\n+influence the number of variants detected nor their classification, but may\n+change the ``FILTER`` field of variant records to indicate which variants\n+failed to pass certain filters. You can use this information with downstream\n+tools to exclude certain variants from further analysis steps or include only\n+high confidence variants that passed all filters (those with ``PASS`` as their\n+``INFO`` field value. You can accept the orignal filter defaults of the\n+``varscan fpfilter`` command line tool, use the settings established for the\n+tool in the `DREAM3 challenge`_, or choose to customize the settings.\n+Alternatively, you can also choose to skip posterior filtering entirely, in\n+which case all variants will have their ``INFO`` field set to ``PASS``.\n+\n+.. _DREAM3 challenge: https://www.synapse.org/#!Synapse:syn312572/wiki/58893\n </help>\n <expand macro="citations" />\n </tool>\n' |