Mercurial > repos > iuc > varscan_somatic
diff varscan_somatic.xml @ 2:2fe9ebb98aad draft
planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit 30867f1f022bed18ba1c3b8dc9c54226890b3a9c
author | iuc |
---|---|
date | Tue, 04 Dec 2018 05:15:50 -0500 |
parents | 31a38ce7e8ae |
children | d37adcc2ec03 |
line wrap: on
line diff
--- a/varscan_somatic.xml Sun Jul 15 09:19:25 2018 -0400 +++ b/varscan_somatic.xml Tue Dec 04 05:15:50 2018 -0500 @@ -1,120 +1,534 @@ <tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.1"> - <description>Call germline/somatic variants from tumor-normal pileups</description> + <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description> <macros> <import>macros.xml</import> + <macro name="test_mentions_contig"> + <assert_contents> + <has_line_matching + expression="##contig=.ID=chrM,length=16571." /> + </assert_contents> + </macro> + <macro name="test_mentions_filters"> + <assert_contents> + <has_line_matching + expression="##FILTER=.ID=VarCount,Description=.+" /> + <has_line_matching + expression="##FILTER=.ID=ReadLenDiff,Description=.+" /> + <has_line_matching + expression="##FILTER=.ID=RefDist3,Description=.+" /> + </assert_contents> + </macro> + <macro name="test_not_mentions_filters"> + <assert_contents> + <not_has_text + text="##FILTER=<ID=VarCount,Description=" /> + <not_has_text + text="##FILTER=<ID=ReadLenDiff,Description=" /> + <not_has_text + text="##FILTER=<ID=RefDist3,Description=" /> + </assert_contents> + </macro> </macros> - <expand macro="requirements" /> - <expand macro="stdio" /> + <expand macro="requirements"> + <requirement type="package" version="3.6.7">python</requirement> + <requirement type="package" version="0.15.1">pysam</requirement> + </expand> + <stdio> + <exit_code range="1:" /> + </stdio> <command><![CDATA[ - varscan somatic - @INPUT_PILEUPS@ - --min-coverage ${min_coverage} - --min-reads2 ${min_reads2} - --min-avg-qual ${min_avg_qual} - --min-var-freq ${min_var_freq} - --min-freq-for-hom ${min_freq_for_hom} + #if str($reference.source) == "history": + #set ref_genome = 'ref.fa' + ln -s -f '$reference.genome' $ref_genome && + #else: + #set ref_genome = '$reference.genome.fields.path' + #end if + #set normal_data = 'normal.bam' + #set tumor_data = 'tumor.bam' + ln -s -f '$normal_bam' $normal_data && + ln -s -f '$tumor_bam' $tumor_data && + ln -s -f '${normal_bam.metadata.bam_index}' ${normal_data}.bai && + ln -s -f '${tumor_bam.metadata.bam_index}' ${tumor_data}.bai && + python3 $__tool_directory__/varscan.py + --normal '$normal_data' + --tumor '$tumor_data' --normal-purity ${normal_purity} --tumor-purity ${tumor_purity} - --tumor-purity ${tumor_purity} - --min-coverage-normal ${min_coverage_normal} - --somatic-p-value ${somatic_p_value} - --p-value ${p_value} - #if str($strand_filter) == 'yes': - --strand-filter 1 + #if str($split_output): + --ofile variants_out + $split_output + #else: + --ofile '$output' + #end if + --threads \${GALAXY_SLOTS:-2} + #if str($call_params.settings) == "custom": + ## samtools mpileup parameters + --min-basequal ${call_params.min_avg_qual} + --min-mapqual ${call_params.min_mapqual} + ## VarScan parameters + --min-coverage ${call_params.min_coverage} + --min-var-count ${call_params.min_reads2} + --min-var-freq ${call_params.min_var_freq} + --min-hom-freq ${call_params.min_freq_for_hom} + --p-value ${call_params.p_value} + --somatic-p-value ${call_params.somatic_p_value} #end if - - --output-vcf 1 + #if str($filter_params.settings) == "no_filter": + --no-filters + #elif str($filter_params.settings) == "dream3_settings": + --min-var-count2 3 + --min-var-count2-lc 1 + --min-var-freq2 0.05 + --max-somatic-p 0.05 + --max-somatic-p-depth 10 + --min-ref-readpos 0.2 + --min-var-readpos 0.15 + --min-ref-dist3 0.2 + --min-var-dist3 0.15 + --min-ref-len 90 + --min-var-len 90 + --max-len-diff 0.05 + --min-strandedness 0 + --min-strand-reads 5 + --min-ref-basequal 15 + --min-var-basequal 30 + --max-basequal-diff 50 + --min-ref-mapqual 20 + --min-var-mapqual 30 + --max-mapqual-diff 10 + --max-ref-mmqs 50 + --max-var-mmqs 100 + --min-mmqs-diff 0 + --max-mmqs-diff 50 + #elif str($filter_params.settings) == "custom": + --min-var-count2 ${filter_params.min_var_count} + --min-var-count2-lc ${filter_params.min_var_count_lc} + --min-var-freq2 ${filter_params.min_var_freq2} + --max-somatic-p ${filter_params.max_somatic_p} + --max-somatic-p-depth ${filter_params.max_somatic_p_depth} + --min-ref-readpos ${filter_params.min_ref_readpos} + --min-var-readpos ${filter_params.min_var_readpos} + --min-ref-dist3 ${filter_params.min_ref_dist3} + --min-var-dist3 ${filter_params.min_var_dist3} + --min-ref-len ${filter_params.min_ref_len} + --min-var-len ${filter_params.min_var_len} + --max-len-diff ${filter_params.max_len_diff} + --min-strandedness ${filter_params.min_strandedness} + --min-strand-reads ${filter_params.min_strand_reads} + --min-ref-basequal ${filter_params.min_ref_basequal} + --min-var-basequal ${filter_params.min_var_basequal} + --max-basequal-diff ${filter_params.max_basequal_diff} + --min-ref-mapqual ${filter_params.min_ref_mapqual} + --min-var-mapqual ${filter_params.min_var_mapqual} + --max-mapqual-diff ${filter_params.max_mapqual_diff} + --max-ref-mmqs ${filter_params.max_ref_mmqs} + --max-var-mmqs ${filter_params.max_var_mmqs} + --min-mmqs-diff ${filter_params.min_mmqs_diff} + --max-mmqs-diff ${filter_params.max_mmqs_diff} + #end if + --verbose + $ref_genome ]]></command> <inputs> - - <expand macro="input_pileups"/> - - <expand macro="min_coverage" /> - <param argument="--min-coverage-normal" name="min_coverage_normal" type="integer" value="8" min="1" max="200" - label="Minimum read depth from the normal sample" help="Minimum depth at a position to make a call" /> - <param argument="--min-coverage-tumor" name="min_coverage_tumor" type="integer" value="6" min="1" max="200" - label="Minimum read depth from the tumor sample" help="Minimum depth at a position to make a call" /> - <expand macro="min_reads2" /> - <expand macro="min_avg_qual" /> - <expand macro="min_var_freq" value="0.10" /> - <expand macro="min_freq_for_hom" /> - <param argument="--normal-purity" name="normal_purity" type="float" value="1.00" min="0" max="1.00" - label="Estimated purity (non-tumor content) of normal sample"/> - <param argument="--tumor-purity" name="tumor_purity" type="float" value="1.00" min="0" max="1.00" - label="Estimated purity (tumor content) of tumor sample"/> - <expand macro="p_value" label="P-value threshold to call a heterozygote" value="0.99"/> - <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1" - label="p-value threshold for calling somatic sites"/> - <expand macro="strand_filter" /> + <conditional name="reference"> + <param name="source" type="select" + label="Will you select a reference genome from your history or use a built-in genome?"> + <option value="cached">Use a built-in genome</option> + <option value="history">Use a genome from my history</option> + </param> + <when value="cached"> + <param name="genome" type="select" + label="reference genome" + help="The fasta reference genome that variants should be called against."> + <options from_data_table="fasta_indexes" /> + </param> + </when> + <when value="history"> + <param name="genome" type="data" format="fasta" + label="reference genome" + help="The fasta reference genome that variants should be called against."/> + </when> + </conditional> + <param name="normal_bam" type="data" format="bam" + label="aligned reads from normal sample" /> + <param name="tumor_bam" type="data" format="bam" + label="aligned reads from tumor sample" /> + <param argument="--normal-purity" name="normal_purity" type="float" value="1.0" min="0" max="1.0" + label="Estimated purity (non-tumor content) of normal sample"/> + <param argument="--tumor-purity" name="tumor_purity" type="float" value="1.0" min="0" max="1.0" + label="Estimated purity (tumor content) of tumor sample"/> + <param name="split_output" type="boolean" truevalue="--split-output" falsevalue="" checked="false" + label="Generate separate output datasets for SNP and indel calls?" /> + <conditional name="call_params"> + <param name="settings" label="Settings for Variant Calling" type="select"> + <option value="varscan_defaults" selected="true">Use default values</option> + <option value="custom">Customize settings</option> + </param> + <when value="custom"> + <param argument="samtools mpileup -Q" name="min_avg_qual" type="integer" value="13" min="0" max="50" + label="Minimum base quality" + help="The minimum base quality at the variant position required to use a read for calling" /> + <param argument="samtools mpileup -q" name="min_mapqual" type="integer" value="0" min="0" max="60" + label="Minimum mapping quality" + help="The minimum mapping quality required for a read to be considered in variant calling" /> + <expand macro="min_coverage" + help="Minimum site coverage required in the normal and in the tumor sample to call a variant. This threshold gets applied after eliminating reads with low base and mapping qualitiy as defined above." /> + <expand macro="min_reads2" /> + <expand macro="min_var_freq" value="0.1" /> + <expand macro="min_freq_for_hom" /> + <expand macro="p_value" value="0.99" + help="The p-value threshold used to determine if a variant should be called for either sample" /> + <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1" + label="P-value threshold for calling somatic variants and LOH events" + help="The p-value threshold used to determine if read count differences between the normal and the tumor sample justify classification of a variant as somatic or as an LOH event" /> + </when> + <when value="varscan_defaults" /> + </conditional> + <conditional name="filter_params"> + <param name="settings" label="Settings for Posterior Variant Filtering" type="select"> + <option value="varscan_defaults" selected="true">Use default values</option> + <option value="dream3_settings">Use settings optimized for DREAM-3</option> + <option value="no_filter">Do not perform posterior filtering</option> + <option value="custom">Customize settings</option> + </param> + <when value="varscan_defaults" /> + <when value="dream3_settings" /> + <when value="no_filter" /> + <when value="custom"> + <param argument="--min-var-count" name="min_var_count" type="integer" value="4" min="1" max="200" + label="Minimum number of variant-supporting reads" + help="" /> + <param argument="--min-var-count-lc" name="min_var_count_lc" type="integer" value="2" min="1" max="200" + label="Low coverage minimum number of variant-supporting reads" + help="Will be applied instead of the --min-var-count limit for sites with poor overall (less than --max-somatic-p-depth) coverage" /> + <param argument="--min-var-freq" name="min_var_freq2" type="float" value="0.05" min="0" max="1" + label="Minimum variant allele frequency" + help="" /> + <param argument="--max-somatic-p" name="max_somatic_p" type="float" value="0.05" min="0" max="1" + label="Maximum somatic p-value allowed for a somatic call" + help="" /> + <param argument="--max-somatic-p-depth" name="max_somatic_p_depth" type="integer" value="10" min="2" max="200" + label="Depth required at variant site to run --max-somatic-p filter" + help="" /> + <param argument="--min-ref-readpos" name="min_ref_readpos" type="float" value="0.1" min="0" max="1" + label="Minimum relative variant position in ref-supporting reads" + help="The minimum average relative distance from the ends of ref-supporting reads required for variant sites" /> + <param argument="--min-var-readpos" name="min_var_readpos" type="float" value="0.1" min="0" max="1" + label="Minimum relative variant position in variant-supporting reads" + help="The minimum average relative distance from the ends of variant-supporting reads required for variant sites" /> + <param argument="--min-ref-dist3" name="min_ref_dist3" type="float" value="0.1" min="0" max="1" + label="Minimum distance of variant site from 3'-end of ref-supporting reads" + help="The minimum average relative distance from the effective 3'end of ref-supporting reads required for variant sites" /> + <param argument="--min-var-dist3" name="min_var_dist3" type="float" value="0.1" min="0" max="1" + label="Minimum distance of variant site from 3'-end of variant-supporting reads" + help="The minimum average relative distance from the effective 3'end of variant-supporting reads required for variant sites" /> + <param argument="--min-ref-avgrl" name="min_ref_len" type="integer" value="90" min="0" max="200" + label="Minimum length of ref-supporting reads" + help="The minimum average trimmed length required for reads supporting the reference allele" /> + <param argument="--min-var-avgrl" name="min_var_len" type="integer" value="90" min="0" max="200" + label="Minimum length of variant-supporting reads" + help="The minimum average trimmed length required for reads supporting the variant allele" /> + <param argument="--max-rl-diff" name="max_len_diff" type="float" value="0.25" min="0" max="1" + label="Maximum relative read length difference" + help="The maximum allowed average relative read length difference (ref - var) between reads supporting the reference and the variant allele" /> + <param argument="--min-strandedness" name="min_strandedness" type="float" value="0.01" min="0" max="0.5" + label="Minimum fraction of variant reads from each strand" + help="The minimum fraction of variant reads that are required to come from the forward and from the reverse strand" /> + <param argument="--min-strand-reads" name="min_strand_reads" type="integer" value="5" min="2" max="200" + label="Minimum variant allele depth required to apply the --min-strandedness filter" + help="" /> + <param argument="--min-ref-basequal" name="min_ref_basequal" type="integer" value="15" min="1" max="50" + label="Minimum average base quality for the ref allele" + help="The minimum average base quality required at the variant site for reads supporting the reference allele" /> + <param argument="--min-var-basequal" name="min_var_basequal" type="integer" value="15" min="1" max="50" + label="Minimum average base quality for the variant allele" + help="The minimum average base quality required at the variant site for reads supporting the variant allele" /> + <param argument="max-basequal-diff" name="max_basequal_diff" type="integer" value="50" min="0" max="50" + label="Maximum base quality difference between ref- and variant-supporting reads" + help="The maximum average base quality difference (ref - var) allowed between the variant site positions of reads supporting the reference and the variant allele" /> + <param argument="--min-ref-mapqual" name="min_ref_mapqual" type="integer" value="15" min="1" max="60" + label="Minimum average mapping quality of ref-supporting reads" + help="The minimum average mapping quality required for reads supporting the reference allele" /> + <param argument="--min-var-mapqual" name="min_var_mapqual" type="integer" value="15" min="1" max="60" + label="Minimum average mapping quality of variant-supporting reads" + help="The minimum average mapping quality required for reads supporting the variant allele" /> + <param argument="--max-mapqual-diff" name="max_mapqual_diff" type="integer" value="50" min="0" max="60" + label="Maximum mapping quality difference between ref- and variant-supporting reads" + help="The maximum average mapping quality difference (ref - var) allowed between reads supporting the reference and the variant allele" /> + <param argument="--max-ref-mmqs" name="max_ref_mmqs" type="integer" value="100" min="0" + label="Maximum mismatch base quality sum of ref-supporting reads" + help="The maximum mismatch base quality sum allowed for reads supporting the reference allele" /> + <param argument="--max-var-mmqs" name="max_var_mmqs" type="integer" value="100" min="0" + label="Maximum mismatch base quality sum of var-supporting reads" + help="The maximum mismatch base quality sum allowed for reads supporting the variant allele" /> + <param argument="--min-mmqs-diff" name="min_mmqs_diff" type="integer" value="0" min="0" + label="Minimum difference between mismatch base quality sums of variant- and ref-supporting reads" + help="The minimum difference in the mismatch base quality sums (var - ref) required between reads supporting the variant and the reference allele" /> + <param argument="--max-mmqs-diff" name="max_mmqs_diff" type="integer" value="50" min="1" + label="Maximum difference between mismatch base quality sums of variant- and ref-supporting reads" + help="The maximum difference in the mismatch base quality sums (var - ref) allowed between reads supporting the variant and the reference allele" /> + </when> + </conditional> </inputs> <outputs> - <data name="output_indel" from_work_dir="galaxy_out.indel.vcf" format="vcf"/> - <data name="output_snp" from_work_dir="galaxy_out.snp.vcf" format="vcf"/> + <data name="output" format="vcf"> + <filter>not split_output</filter> + </data> + <data name="output_snp" from_work_dir="variants_out.snp" format="vcf" + label="Varscan somatic SNP calls on ${on_string}"> + <filter>split_output</filter> + </data> + <data name="output_indel" from_work_dir="variants_out.indel" format="vcf" + label="Varscan somatic indel calls on ${on_string}"> + <filter>split_output</filter> + </data> </outputs> <tests> - <test> - <conditional name="pileup"> - <param name="pileup_select" value="separated" /> - <param name="normal_pileup" value="N_Region_Chr1_CDKN2C.pileup.gz" /> - <param name="tumor_pileup" value="T_Region_Chr1_CDKN2C.pileup.gz" /> + <test expect_num_outputs="1"> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="genome" value="hg19_chrM.fa" /> + </conditional> + <param name="normal_bam" value="control_chrM.bam" /> + <param name="tumor_bam" value="tumor_chrM.bam" /> + <param name="split_output" value="false" /> + <conditional name="call_params"> + <param name="settings" value="varscan_defaults" /> + </conditional> + <conditional name="filter_params"> + <param name="settings" value="varscan_defaults" /> + </conditional> + <output name="output"> + <expand macro="test_mentions_contig" /> + <expand macro="test_mentions_filters" /> + </output> + </test> + <test expect_num_outputs="2"> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="genome" value="hg19_chrM.fa" /> + </conditional> + <param name="normal_bam" value="control_chrM.bam" /> + <param name="tumor_bam" value="tumor_chrM.bam" /> + <param name="split_output" value="true" /> + <conditional name="call_params"> + <param name="settings" value="varscan_defaults" /> </conditional> - <param name="min_coverage" value="2" /> - <param name="min_coverage_normal" value="2" /> - <param name="min_coverage_tumor" value="2" /> - <param name="min_reads2" value="1" /> - <param name="min_avg_qual" value="5" /> - <param name="min_var_freq" value="0.01" /> - <param name="min_freq_for_hom" value="0.75" /> + <conditional name="filter_params"> + <param name="settings" value="varscan_defaults" /> + </conditional> + <output name="output_indel"> + <expand macro="test_mentions_contig" /> + <expand macro="test_mentions_filters" /> + </output> + <output name="output_snp"> + <expand macro="test_mentions_contig" /> + <expand macro="test_mentions_filters" /> + </output> + </test> + <test expect_num_outputs="1"> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="genome" value="hg19_chrM.fa" /> + </conditional> + <param name="normal_bam" value="control_chrM.bam" /> + <param name="tumor_bam" value="tumor_chrM.bam" /> + <param name="split_output" value="false" /> + <conditional name="call_params"> + <param name="settings" value="custom" /> + <param name="min_coverage" value="2" /> + <param name="min_reads2" value="1" /> + <param name="min_avg_qual" value="5" /> + <param name="min_var_freq" value="0.01" /> + <param name="min_freq_for_hom" value="0.66" /> + <param name="p_value" value="0.97" /> + <param name="somatic_p_value" value="0.09" /> + </conditional> <param name="normal_purity" value="0.6" /> <param name="tumor_purity" value="0.6" /> - <param name="p_value" value="0.99" /> - <output name="output_indel" file="varscan_somatic_indel_result1.vcf" lines_diff="0" /> - <output name="output_snp" file="varscan_somatic_snp_result1.vcf" lines_diff="0" /> + <conditional name="filter_params"> + <param name="settings" value="varscan_defaults" /> + </conditional> + <assert_stderr> + <has_line_matching + expression=" Min coverage:	2x for Normal, 2x for Tumor" /> + <has_line_matching + expression="Min reads2:	1" /> + <has_line_matching + expression="Min var freq:	0.01" /> + <has_line_matching + expression="Min freq for hom:	0.66" /> + <has_line_matching + expression="Normal purity:	0.6" /> + <has_line_matching + expression="Tumor purity:	0.6" /> + <has_line_matching + expression="Min avg qual:	5" /> + <has_line_matching + expression="P-value thresh:	0.97" /> + <has_line_matching + expression="Somatic p-value:	0.09" /> + </assert_stderr> + <output name="output"> + <expand macro="test_mentions_contig" /> + <expand macro="test_mentions_filters" /> + </output> </test> - <test> - <conditional name="pileup"> - <param name="pileup_select" value="combined" /> - <param name="combined_pileup" value="NT.pileup.gz" /> + <test expect_num_outputs="1"> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="genome" value="hg19_chrM.fa" /> + </conditional> + <param name="normal_bam" value="control_chrM.bam" /> + <param name="tumor_bam" value="tumor_chrM.bam" /> + <param name="split_output" value="false" /> + <conditional name="call_params"> + <param name="settings" value="varscan_defaults" /> + </conditional> + <conditional name="filter_params"> + <param name="settings" value="dream3_settings" /> </conditional> - <param name="min_coverage" value="2" /> - <param name="min_coverage_normal" value="2" /> - <param name="min_coverage_tumor" value="2" /> - <param name="min_reads2" value="1" /> - <param name="min_avg_qual" value="5" /> - <param name="min_var_freq" value="0.01" /> - <param name="min_freq_for_hom" value="0.75" /> - <param name="normal_purity" value="0.6" /> - <param name="tumor_purity" value="0.6" /> - <param name="p_value" value="0.99" /> - <output name="output_indel" file="varscan_somatic_indel_result2.vcf" lines_diff="0" /> - <output name="output_snp" file="varscan_somatic_snp_result2.vcf" lines_diff="0" /> + <output name="output"> + <expand macro="test_mentions_contig" /> + <expand macro="test_mentions_filters" /> + </output> + </test> + <test expect_num_outputs="1"> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="genome" value="hg19_chrM.fa" /> + </conditional> + <param name="normal_bam" value="control_chrM.bam" /> + <param name="tumor_bam" value="tumor_chrM.bam" /> + <param name="split_output" value="false" /> + <conditional name="call_params"> + <param name="settings" value="varscan_defaults" /> + </conditional> + <conditional name="filter_params"> + <param name="settings" value="no_filter" /> + </conditional> + <output name="output"> + <expand macro="test_mentions_contig" /> + <expand macro="test_not_mentions_filters" /> + </output> + </test> + <test expect_num_outputs="1"> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="genome" value="hg19_chrM.fa" /> + </conditional> + <param name="normal_bam" value="control_chrM.bam" /> + <param name="tumor_bam" value="tumor_chrM.bam" /> + <param name="split_output" value="false" /> + <conditional name="call_params"> + <param name="settings" value="varscan_defaults" /> + </conditional> + <conditional name="filter_params"> + <param name="settings" value="custom" /> + <param name="min_ref_basequal" value="28" /> + <param name="min_var_basequal" value="28" /> + </conditional> + <output name="output"> + <expand macro="test_mentions_contig" /> + <expand macro="test_mentions_filters" /> + </output> </test> </tests> <help> -**VarScan Overview** +@HELP_HEADER@ + +**The Varscan Somatic tool for Galaxy** + +This tool wraps the functionality of the ``varscan somatic`` and the +``varscan fpfilter`` command line tools. -VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. -It calls variants from a mpileup dataset and produces a VCF 4.1. Full documentation is available online_. +.. class:: infomark + + The wrapper aims at providing the same functionality as the + ``varscan fpfilter`` tool, but implements it using ``pysam`` internally. + Note that, as one limitation compared to the original ``varscan`` tool, + the current version does not apply filters to indels! -This tool calls germline/somatic variants from tumor-normal pileups. +The tool is designed to detect genetic variants in a **pair of samples** +representing normal and tumor tissue from the same individual. It classifies +the variants, according to their most likely origin, as **somatic** (variant is +found in the tumor, but not in the normal sample, *i.e.*, is the consequence of +a somatic mutation event), **germline** (variant is found in both samples => +germline mutation event) and **LOH** (variant is found in both samples, but +only the tumor sample appears to be homozygous for it => loss of heterozygosity +event). +This classification is encoded in the variant ``INFO`` fields of the VCF output +produced by the tool in the form of a status code ``SS`` (somatic status), +where: + +- ``SS=1`` signifies a likely germline variant, +- ``SS=2`` a somatic variant +- ``SS=3`` a LOH variant -.. _VarScan: http://dkoboldt.github.io/varscan/ -.. _online: http://dkoboldt.github.io/varscan/using-varscan.html +In addition, ``SS=0`` indicates a possible variant, but with insufficient +evidence for an, at least, heterozygous state in either individual sample, and +``SS=5`` is used for variants of unexplained origin (*e.g.*, variants found in +the normal, but not in the tumor tissue sample). + +In a second step, following variant calling, the tool can try to detect likely +false-positive calls by re-inspecting the data at the variant sites more +carefully and looking for signs that may indicate problems with the +sequencing data or its mapping. If a called variant is deemed a possible +false-positive at this step, this gets indicated in the ``FILTER`` field of the +variant record in the VCF output. For high confidence variants passing all +posterior (applied after variant calling) filters the value of the field will +be ``PASS``, for variants failing any of the posterior filters the value will +be a ``;``-separated list of the problematic filters. + **Input** -:: - - mpileup file - The SAMtools mpileup files for the normal and tumor tissue - +The tool takes as input a reference genome (in fasta format) and a pair of +aligned reads datasets (bam format). **Output** -VarScan produces a VCF 4.1 dataset as output. +A VCF dataset of called variants. When asked to *Generate separate output +datasets for SNP and indel calls*, the tool will behave like the +``varscan somatic`` command line tool and produce two VCF datasets - one with +just the single nucleotide variants, while the other one will store +insertion/deletion variants. + +**Options** + +*Estimated purity of normal sample / of tumor sample* + +Since, in practice, it is often impossible to isolate tissue samples without +contamination from surrounding tissue or from invading cells, these two fields +let you indicate your estimate of the purity of the two samples (as fractions +between 0 and 1, where 1 would indicate a contamination-free sample and 0.5 a +sample to which the desired tissue contributes only 50%, while the other 50% +consist of cells from the other tissue type). + +*Settings for Variant Calling* +Settings in this section will affect the steps of variant calling and +classification. You can accept VarScan's default values for the corresponding +parameters or customize them according to your needs. +*Settings for Posterior Variant Filtering* + +Use the parameters in this section to configure the false-positive filtering +step that follows variant calling and classification. These settings will not +influence the number of variants detected nor their classification, but may +change the ``FILTER`` field of variant records to indicate which variants +failed to pass certain filters. You can use this information with downstream +tools to exclude certain variants from further analysis steps or include only +high confidence variants that passed all filters (those with ``PASS`` as their +``INFO`` field value. You can accept the orignal filter defaults of the +``varscan fpfilter`` command line tool, use the settings established for the +tool in the `DREAM3 challenge`_, or choose to customize the settings. +Alternatively, you can also choose to skip posterior filtering entirely, in +which case all variants will have their ``INFO`` field set to ``PASS``. + +.. _DREAM3 challenge: https://www.synapse.org/#!Synapse:syn312572/wiki/58893 </help> <expand macro="citations" /> </tool>