comparison varscan_somatic.xml @ 2:2fe9ebb98aad draft

planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit 30867f1f022bed18ba1c3b8dc9c54226890b3a9c
author iuc
date Tue, 04 Dec 2018 05:15:50 -0500
parents 31a38ce7e8ae
children d37adcc2ec03
comparison
equal deleted inserted replaced
1:31a38ce7e8ae 2:2fe9ebb98aad
1 <tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.1"> 1 <tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.1">
2 <description>Call germline/somatic variants from tumor-normal pileups</description> 2 <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <macro name="test_mentions_contig">
6 <assert_contents>
7 <has_line_matching
8 expression="##contig=.ID=chrM,length=16571." />
9 </assert_contents>
10 </macro>
11 <macro name="test_mentions_filters">
12 <assert_contents>
13 <has_line_matching
14 expression="##FILTER=.ID=VarCount,Description=.+" />
15 <has_line_matching
16 expression="##FILTER=.ID=ReadLenDiff,Description=.+" />
17 <has_line_matching
18 expression="##FILTER=.ID=RefDist3,Description=.+" />
19 </assert_contents>
20 </macro>
21 <macro name="test_not_mentions_filters">
22 <assert_contents>
23 <not_has_text
24 text="##FILTER=&lt;ID=VarCount,Description=" />
25 <not_has_text
26 text="##FILTER=&lt;ID=ReadLenDiff,Description=" />
27 <not_has_text
28 text="##FILTER=&lt;ID=RefDist3,Description=" />
29 </assert_contents>
30 </macro>
5 </macros> 31 </macros>
6 <expand macro="requirements" /> 32 <expand macro="requirements">
7 <expand macro="stdio" /> 33 <requirement type="package" version="3.6.7">python</requirement>
34 <requirement type="package" version="0.15.1">pysam</requirement>
35 </expand>
36 <stdio>
37 <exit_code range="1:" />
38 </stdio>
8 <command><![CDATA[ 39 <command><![CDATA[
9 varscan somatic 40 #if str($reference.source) == "history":
10 @INPUT_PILEUPS@ 41 #set ref_genome = 'ref.fa'
11 --min-coverage ${min_coverage} 42 ln -s -f '$reference.genome' $ref_genome &&
12 --min-reads2 ${min_reads2} 43 #else:
13 --min-avg-qual ${min_avg_qual} 44 #set ref_genome = '$reference.genome.fields.path'
14 --min-var-freq ${min_var_freq} 45 #end if
15 --min-freq-for-hom ${min_freq_for_hom} 46 #set normal_data = 'normal.bam'
47 #set tumor_data = 'tumor.bam'
48 ln -s -f '$normal_bam' $normal_data &&
49 ln -s -f '$tumor_bam' $tumor_data &&
50 ln -s -f '${normal_bam.metadata.bam_index}' ${normal_data}.bai &&
51 ln -s -f '${tumor_bam.metadata.bam_index}' ${tumor_data}.bai &&
52 python3 $__tool_directory__/varscan.py
53 --normal '$normal_data'
54 --tumor '$tumor_data'
16 --normal-purity ${normal_purity} 55 --normal-purity ${normal_purity}
17 --tumor-purity ${tumor_purity} 56 --tumor-purity ${tumor_purity}
18 --tumor-purity ${tumor_purity} 57 #if str($split_output):
19 --min-coverage-normal ${min_coverage_normal} 58 --ofile variants_out
20 --somatic-p-value ${somatic_p_value} 59 $split_output
21 --p-value ${p_value} 60 #else:
22 #if str($strand_filter) == 'yes': 61 --ofile '$output'
23 --strand-filter 1
24 #end if 62 #end if
25 63 --threads \${GALAXY_SLOTS:-2}
26 --output-vcf 1 64 #if str($call_params.settings) == "custom":
65 ## samtools mpileup parameters
66 --min-basequal ${call_params.min_avg_qual}
67 --min-mapqual ${call_params.min_mapqual}
68 ## VarScan parameters
69 --min-coverage ${call_params.min_coverage}
70 --min-var-count ${call_params.min_reads2}
71 --min-var-freq ${call_params.min_var_freq}
72 --min-hom-freq ${call_params.min_freq_for_hom}
73 --p-value ${call_params.p_value}
74 --somatic-p-value ${call_params.somatic_p_value}
75 #end if
76 #if str($filter_params.settings) == "no_filter":
77 --no-filters
78 #elif str($filter_params.settings) == "dream3_settings":
79 --min-var-count2 3
80 --min-var-count2-lc 1
81 --min-var-freq2 0.05
82 --max-somatic-p 0.05
83 --max-somatic-p-depth 10
84 --min-ref-readpos 0.2
85 --min-var-readpos 0.15
86 --min-ref-dist3 0.2
87 --min-var-dist3 0.15
88 --min-ref-len 90
89 --min-var-len 90
90 --max-len-diff 0.05
91 --min-strandedness 0
92 --min-strand-reads 5
93 --min-ref-basequal 15
94 --min-var-basequal 30
95 --max-basequal-diff 50
96 --min-ref-mapqual 20
97 --min-var-mapqual 30
98 --max-mapqual-diff 10
99 --max-ref-mmqs 50
100 --max-var-mmqs 100
101 --min-mmqs-diff 0
102 --max-mmqs-diff 50
103 #elif str($filter_params.settings) == "custom":
104 --min-var-count2 ${filter_params.min_var_count}
105 --min-var-count2-lc ${filter_params.min_var_count_lc}
106 --min-var-freq2 ${filter_params.min_var_freq2}
107 --max-somatic-p ${filter_params.max_somatic_p}
108 --max-somatic-p-depth ${filter_params.max_somatic_p_depth}
109 --min-ref-readpos ${filter_params.min_ref_readpos}
110 --min-var-readpos ${filter_params.min_var_readpos}
111 --min-ref-dist3 ${filter_params.min_ref_dist3}
112 --min-var-dist3 ${filter_params.min_var_dist3}
113 --min-ref-len ${filter_params.min_ref_len}
114 --min-var-len ${filter_params.min_var_len}
115 --max-len-diff ${filter_params.max_len_diff}
116 --min-strandedness ${filter_params.min_strandedness}
117 --min-strand-reads ${filter_params.min_strand_reads}
118 --min-ref-basequal ${filter_params.min_ref_basequal}
119 --min-var-basequal ${filter_params.min_var_basequal}
120 --max-basequal-diff ${filter_params.max_basequal_diff}
121 --min-ref-mapqual ${filter_params.min_ref_mapqual}
122 --min-var-mapqual ${filter_params.min_var_mapqual}
123 --max-mapqual-diff ${filter_params.max_mapqual_diff}
124 --max-ref-mmqs ${filter_params.max_ref_mmqs}
125 --max-var-mmqs ${filter_params.max_var_mmqs}
126 --min-mmqs-diff ${filter_params.min_mmqs_diff}
127 --max-mmqs-diff ${filter_params.max_mmqs_diff}
128 #end if
129 --verbose
130 $ref_genome
27 ]]></command> 131 ]]></command>
28 132
29 <inputs> 133 <inputs>
30 134 <conditional name="reference">
31 <expand macro="input_pileups"/> 135 <param name="source" type="select"
32 136 label="Will you select a reference genome from your history or use a built-in genome?">
33 <expand macro="min_coverage" /> 137 <option value="cached">Use a built-in genome</option>
34 <param argument="--min-coverage-normal" name="min_coverage_normal" type="integer" value="8" min="1" max="200" 138 <option value="history">Use a genome from my history</option>
35 label="Minimum read depth from the normal sample" help="Minimum depth at a position to make a call" /> 139 </param>
36 <param argument="--min-coverage-tumor" name="min_coverage_tumor" type="integer" value="6" min="1" max="200" 140 <when value="cached">
37 label="Minimum read depth from the tumor sample" help="Minimum depth at a position to make a call" /> 141 <param name="genome" type="select"
38 <expand macro="min_reads2" /> 142 label="reference genome"
39 <expand macro="min_avg_qual" /> 143 help="The fasta reference genome that variants should be called against.">
40 <expand macro="min_var_freq" value="0.10" /> 144 <options from_data_table="fasta_indexes" />
41 <expand macro="min_freq_for_hom" /> 145 </param>
42 <param argument="--normal-purity" name="normal_purity" type="float" value="1.00" min="0" max="1.00" 146 </when>
43 label="Estimated purity (non-tumor content) of normal sample"/> 147 <when value="history">
44 <param argument="--tumor-purity" name="tumor_purity" type="float" value="1.00" min="0" max="1.00" 148 <param name="genome" type="data" format="fasta"
45 label="Estimated purity (tumor content) of tumor sample"/> 149 label="reference genome"
46 <expand macro="p_value" label="P-value threshold to call a heterozygote" value="0.99"/> 150 help="The fasta reference genome that variants should be called against."/>
47 <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1" 151 </when>
48 label="p-value threshold for calling somatic sites"/> 152 </conditional>
49 <expand macro="strand_filter" /> 153 <param name="normal_bam" type="data" format="bam"
154 label="aligned reads from normal sample" />
155 <param name="tumor_bam" type="data" format="bam"
156 label="aligned reads from tumor sample" />
157 <param argument="--normal-purity" name="normal_purity" type="float" value="1.0" min="0" max="1.0"
158 label="Estimated purity (non-tumor content) of normal sample"/>
159 <param argument="--tumor-purity" name="tumor_purity" type="float" value="1.0" min="0" max="1.0"
160 label="Estimated purity (tumor content) of tumor sample"/>
161 <param name="split_output" type="boolean" truevalue="--split-output" falsevalue="" checked="false"
162 label="Generate separate output datasets for SNP and indel calls?" />
163 <conditional name="call_params">
164 <param name="settings" label="Settings for Variant Calling" type="select">
165 <option value="varscan_defaults" selected="true">Use default values</option>
166 <option value="custom">Customize settings</option>
167 </param>
168 <when value="custom">
169 <param argument="samtools mpileup -Q" name="min_avg_qual" type="integer" value="13" min="0" max="50"
170 label="Minimum base quality"
171 help="The minimum base quality at the variant position required to use a read for calling" />
172 <param argument="samtools mpileup -q" name="min_mapqual" type="integer" value="0" min="0" max="60"
173 label="Minimum mapping quality"
174 help="The minimum mapping quality required for a read to be considered in variant calling" />
175 <expand macro="min_coverage"
176 help="Minimum site coverage required in the normal and in the tumor sample to call a variant. This threshold gets applied after eliminating reads with low base and mapping qualitiy as defined above." />
177 <expand macro="min_reads2" />
178 <expand macro="min_var_freq" value="0.1" />
179 <expand macro="min_freq_for_hom" />
180 <expand macro="p_value" value="0.99"
181 help="The p-value threshold used to determine if a variant should be called for either sample" />
182 <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1"
183 label="P-value threshold for calling somatic variants and LOH events"
184 help="The p-value threshold used to determine if read count differences between the normal and the tumor sample justify classification of a variant as somatic or as an LOH event" />
185 </when>
186 <when value="varscan_defaults" />
187 </conditional>
188 <conditional name="filter_params">
189 <param name="settings" label="Settings for Posterior Variant Filtering" type="select">
190 <option value="varscan_defaults" selected="true">Use default values</option>
191 <option value="dream3_settings">Use settings optimized for DREAM-3</option>
192 <option value="no_filter">Do not perform posterior filtering</option>
193 <option value="custom">Customize settings</option>
194 </param>
195 <when value="varscan_defaults" />
196 <when value="dream3_settings" />
197 <when value="no_filter" />
198 <when value="custom">
199 <param argument="--min-var-count" name="min_var_count" type="integer" value="4" min="1" max="200"
200 label="Minimum number of variant-supporting reads"
201 help="" />
202 <param argument="--min-var-count-lc" name="min_var_count_lc" type="integer" value="2" min="1" max="200"
203 label="Low coverage minimum number of variant-supporting reads"
204 help="Will be applied instead of the --min-var-count limit for sites with poor overall (less than --max-somatic-p-depth) coverage" />
205 <param argument="--min-var-freq" name="min_var_freq2" type="float" value="0.05" min="0" max="1"
206 label="Minimum variant allele frequency"
207 help="" />
208 <param argument="--max-somatic-p" name="max_somatic_p" type="float" value="0.05" min="0" max="1"
209 label="Maximum somatic p-value allowed for a somatic call"
210 help="" />
211 <param argument="--max-somatic-p-depth" name="max_somatic_p_depth" type="integer" value="10" min="2" max="200"
212 label="Depth required at variant site to run --max-somatic-p filter"
213 help="" />
214 <param argument="--min-ref-readpos" name="min_ref_readpos" type="float" value="0.1" min="0" max="1"
215 label="Minimum relative variant position in ref-supporting reads"
216 help="The minimum average relative distance from the ends of ref-supporting reads required for variant sites" />
217 <param argument="--min-var-readpos" name="min_var_readpos" type="float" value="0.1" min="0" max="1"
218 label="Minimum relative variant position in variant-supporting reads"
219 help="The minimum average relative distance from the ends of variant-supporting reads required for variant sites" />
220 <param argument="--min-ref-dist3" name="min_ref_dist3" type="float" value="0.1" min="0" max="1"
221 label="Minimum distance of variant site from 3'-end of ref-supporting reads"
222 help="The minimum average relative distance from the effective 3'end of ref-supporting reads required for variant sites" />
223 <param argument="--min-var-dist3" name="min_var_dist3" type="float" value="0.1" min="0" max="1"
224 label="Minimum distance of variant site from 3'-end of variant-supporting reads"
225 help="The minimum average relative distance from the effective 3'end of variant-supporting reads required for variant sites" />
226 <param argument="--min-ref-avgrl" name="min_ref_len" type="integer" value="90" min="0" max="200"
227 label="Minimum length of ref-supporting reads"
228 help="The minimum average trimmed length required for reads supporting the reference allele" />
229 <param argument="--min-var-avgrl" name="min_var_len" type="integer" value="90" min="0" max="200"
230 label="Minimum length of variant-supporting reads"
231 help="The minimum average trimmed length required for reads supporting the variant allele" />
232 <param argument="--max-rl-diff" name="max_len_diff" type="float" value="0.25" min="0" max="1"
233 label="Maximum relative read length difference"
234 help="The maximum allowed average relative read length difference (ref - var) between reads supporting the reference and the variant allele" />
235 <param argument="--min-strandedness" name="min_strandedness" type="float" value="0.01" min="0" max="0.5"
236 label="Minimum fraction of variant reads from each strand"
237 help="The minimum fraction of variant reads that are required to come from the forward and from the reverse strand" />
238 <param argument="--min-strand-reads" name="min_strand_reads" type="integer" value="5" min="2" max="200"
239 label="Minimum variant allele depth required to apply the --min-strandedness filter"
240 help="" />
241 <param argument="--min-ref-basequal" name="min_ref_basequal" type="integer" value="15" min="1" max="50"
242 label="Minimum average base quality for the ref allele"
243 help="The minimum average base quality required at the variant site for reads supporting the reference allele" />
244 <param argument="--min-var-basequal" name="min_var_basequal" type="integer" value="15" min="1" max="50"
245 label="Minimum average base quality for the variant allele"
246 help="The minimum average base quality required at the variant site for reads supporting the variant allele" />
247 <param argument="max-basequal-diff" name="max_basequal_diff" type="integer" value="50" min="0" max="50"
248 label="Maximum base quality difference between ref- and variant-supporting reads"
249 help="The maximum average base quality difference (ref - var) allowed between the variant site positions of reads supporting the reference and the variant allele" />
250 <param argument="--min-ref-mapqual" name="min_ref_mapqual" type="integer" value="15" min="1" max="60"
251 label="Minimum average mapping quality of ref-supporting reads"
252 help="The minimum average mapping quality required for reads supporting the reference allele" />
253 <param argument="--min-var-mapqual" name="min_var_mapqual" type="integer" value="15" min="1" max="60"
254 label="Minimum average mapping quality of variant-supporting reads"
255 help="The minimum average mapping quality required for reads supporting the variant allele" />
256 <param argument="--max-mapqual-diff" name="max_mapqual_diff" type="integer" value="50" min="0" max="60"
257 label="Maximum mapping quality difference between ref- and variant-supporting reads"
258 help="The maximum average mapping quality difference (ref - var) allowed between reads supporting the reference and the variant allele" />
259 <param argument="--max-ref-mmqs" name="max_ref_mmqs" type="integer" value="100" min="0"
260 label="Maximum mismatch base quality sum of ref-supporting reads"
261 help="The maximum mismatch base quality sum allowed for reads supporting the reference allele" />
262 <param argument="--max-var-mmqs" name="max_var_mmqs" type="integer" value="100" min="0"
263 label="Maximum mismatch base quality sum of var-supporting reads"
264 help="The maximum mismatch base quality sum allowed for reads supporting the variant allele" />
265 <param argument="--min-mmqs-diff" name="min_mmqs_diff" type="integer" value="0" min="0"
266 label="Minimum difference between mismatch base quality sums of variant- and ref-supporting reads"
267 help="The minimum difference in the mismatch base quality sums (var - ref) required between reads supporting the variant and the reference allele" />
268 <param argument="--max-mmqs-diff" name="max_mmqs_diff" type="integer" value="50" min="1"
269 label="Maximum difference between mismatch base quality sums of variant- and ref-supporting reads"
270 help="The maximum difference in the mismatch base quality sums (var - ref) allowed between reads supporting the variant and the reference allele" />
271 </when>
272 </conditional>
50 </inputs> 273 </inputs>
51 <outputs> 274 <outputs>
52 <data name="output_indel" from_work_dir="galaxy_out.indel.vcf" format="vcf"/> 275 <data name="output" format="vcf">
53 <data name="output_snp" from_work_dir="galaxy_out.snp.vcf" format="vcf"/> 276 <filter>not split_output</filter>
277 </data>
278 <data name="output_snp" from_work_dir="variants_out.snp" format="vcf"
279 label="Varscan somatic SNP calls on ${on_string}">
280 <filter>split_output</filter>
281 </data>
282 <data name="output_indel" from_work_dir="variants_out.indel" format="vcf"
283 label="Varscan somatic indel calls on ${on_string}">
284 <filter>split_output</filter>
285 </data>
54 </outputs> 286 </outputs>
55 <tests> 287 <tests>
56 <test> 288 <test expect_num_outputs="1">
57 <conditional name="pileup"> 289 <conditional name="reference">
58 <param name="pileup_select" value="separated" /> 290 <param name="source" value="history" />
59 <param name="normal_pileup" value="N_Region_Chr1_CDKN2C.pileup.gz" /> 291 <param name="genome" value="hg19_chrM.fa" />
60 <param name="tumor_pileup" value="T_Region_Chr1_CDKN2C.pileup.gz" /> 292 </conditional>
61 </conditional> 293 <param name="normal_bam" value="control_chrM.bam" />
62 <param name="min_coverage" value="2" /> 294 <param name="tumor_bam" value="tumor_chrM.bam" />
63 <param name="min_coverage_normal" value="2" /> 295 <param name="split_output" value="false" />
64 <param name="min_coverage_tumor" value="2" /> 296 <conditional name="call_params">
65 <param name="min_reads2" value="1" /> 297 <param name="settings" value="varscan_defaults" />
66 <param name="min_avg_qual" value="5" /> 298 </conditional>
67 <param name="min_var_freq" value="0.01" /> 299 <conditional name="filter_params">
68 <param name="min_freq_for_hom" value="0.75" /> 300 <param name="settings" value="varscan_defaults" />
301 </conditional>
302 <output name="output">
303 <expand macro="test_mentions_contig" />
304 <expand macro="test_mentions_filters" />
305 </output>
306 </test>
307 <test expect_num_outputs="2">
308 <conditional name="reference">
309 <param name="source" value="history" />
310 <param name="genome" value="hg19_chrM.fa" />
311 </conditional>
312 <param name="normal_bam" value="control_chrM.bam" />
313 <param name="tumor_bam" value="tumor_chrM.bam" />
314 <param name="split_output" value="true" />
315 <conditional name="call_params">
316 <param name="settings" value="varscan_defaults" />
317 </conditional>
318 <conditional name="filter_params">
319 <param name="settings" value="varscan_defaults" />
320 </conditional>
321 <output name="output_indel">
322 <expand macro="test_mentions_contig" />
323 <expand macro="test_mentions_filters" />
324 </output>
325 <output name="output_snp">
326 <expand macro="test_mentions_contig" />
327 <expand macro="test_mentions_filters" />
328 </output>
329 </test>
330 <test expect_num_outputs="1">
331 <conditional name="reference">
332 <param name="source" value="history" />
333 <param name="genome" value="hg19_chrM.fa" />
334 </conditional>
335 <param name="normal_bam" value="control_chrM.bam" />
336 <param name="tumor_bam" value="tumor_chrM.bam" />
337 <param name="split_output" value="false" />
338 <conditional name="call_params">
339 <param name="settings" value="custom" />
340 <param name="min_coverage" value="2" />
341 <param name="min_reads2" value="1" />
342 <param name="min_avg_qual" value="5" />
343 <param name="min_var_freq" value="0.01" />
344 <param name="min_freq_for_hom" value="0.66" />
345 <param name="p_value" value="0.97" />
346 <param name="somatic_p_value" value="0.09" />
347 </conditional>
69 <param name="normal_purity" value="0.6" /> 348 <param name="normal_purity" value="0.6" />
70 <param name="tumor_purity" value="0.6" /> 349 <param name="tumor_purity" value="0.6" />
71 <param name="p_value" value="0.99" /> 350 <conditional name="filter_params">
72 <output name="output_indel" file="varscan_somatic_indel_result1.vcf" lines_diff="0" /> 351 <param name="settings" value="varscan_defaults" />
73 <output name="output_snp" file="varscan_somatic_snp_result1.vcf" lines_diff="0" /> 352 </conditional>
353 <assert_stderr>
354 <has_line_matching
355 expression=" Min coverage:&#09;2x for Normal, 2x for Tumor" />
356 <has_line_matching
357 expression="Min reads2:&#09;1" />
358 <has_line_matching
359 expression="Min var freq:&#09;0.01" />
360 <has_line_matching
361 expression="Min freq for hom:&#09;0.66" />
362 <has_line_matching
363 expression="Normal purity:&#09;0.6" />
364 <has_line_matching
365 expression="Tumor purity:&#09;0.6" />
366 <has_line_matching
367 expression="Min avg qual:&#09;5" />
368 <has_line_matching
369 expression="P-value thresh:&#09;0.97" />
370 <has_line_matching
371 expression="Somatic p-value:&#09;0.09" />
372 </assert_stderr>
373 <output name="output">
374 <expand macro="test_mentions_contig" />
375 <expand macro="test_mentions_filters" />
376 </output>
74 </test> 377 </test>
75 <test> 378 <test expect_num_outputs="1">
76 <conditional name="pileup"> 379 <conditional name="reference">
77 <param name="pileup_select" value="combined" /> 380 <param name="source" value="history" />
78 <param name="combined_pileup" value="NT.pileup.gz" /> 381 <param name="genome" value="hg19_chrM.fa" />
79 </conditional> 382 </conditional>
80 <param name="min_coverage" value="2" /> 383 <param name="normal_bam" value="control_chrM.bam" />
81 <param name="min_coverage_normal" value="2" /> 384 <param name="tumor_bam" value="tumor_chrM.bam" />
82 <param name="min_coverage_tumor" value="2" /> 385 <param name="split_output" value="false" />
83 <param name="min_reads2" value="1" /> 386 <conditional name="call_params">
84 <param name="min_avg_qual" value="5" /> 387 <param name="settings" value="varscan_defaults" />
85 <param name="min_var_freq" value="0.01" /> 388 </conditional>
86 <param name="min_freq_for_hom" value="0.75" /> 389 <conditional name="filter_params">
87 <param name="normal_purity" value="0.6" /> 390 <param name="settings" value="dream3_settings" />
88 <param name="tumor_purity" value="0.6" /> 391 </conditional>
89 <param name="p_value" value="0.99" /> 392 <output name="output">
90 <output name="output_indel" file="varscan_somatic_indel_result2.vcf" lines_diff="0" /> 393 <expand macro="test_mentions_contig" />
91 <output name="output_snp" file="varscan_somatic_snp_result2.vcf" lines_diff="0" /> 394 <expand macro="test_mentions_filters" />
395 </output>
396 </test>
397 <test expect_num_outputs="1">
398 <conditional name="reference">
399 <param name="source" value="history" />
400 <param name="genome" value="hg19_chrM.fa" />
401 </conditional>
402 <param name="normal_bam" value="control_chrM.bam" />
403 <param name="tumor_bam" value="tumor_chrM.bam" />
404 <param name="split_output" value="false" />
405 <conditional name="call_params">
406 <param name="settings" value="varscan_defaults" />
407 </conditional>
408 <conditional name="filter_params">
409 <param name="settings" value="no_filter" />
410 </conditional>
411 <output name="output">
412 <expand macro="test_mentions_contig" />
413 <expand macro="test_not_mentions_filters" />
414 </output>
415 </test>
416 <test expect_num_outputs="1">
417 <conditional name="reference">
418 <param name="source" value="history" />
419 <param name="genome" value="hg19_chrM.fa" />
420 </conditional>
421 <param name="normal_bam" value="control_chrM.bam" />
422 <param name="tumor_bam" value="tumor_chrM.bam" />
423 <param name="split_output" value="false" />
424 <conditional name="call_params">
425 <param name="settings" value="varscan_defaults" />
426 </conditional>
427 <conditional name="filter_params">
428 <param name="settings" value="custom" />
429 <param name="min_ref_basequal" value="28" />
430 <param name="min_var_basequal" value="28" />
431 </conditional>
432 <output name="output">
433 <expand macro="test_mentions_contig" />
434 <expand macro="test_mentions_filters" />
435 </output>
92 </test> 436 </test>
93 </tests> 437 </tests>
94 438
95 <help> 439 <help>
96 **VarScan Overview** 440 @HELP_HEADER@
97 441
98 VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. 442 **The Varscan Somatic tool for Galaxy**
99 It calls variants from a mpileup dataset and produces a VCF 4.1. Full documentation is available online_. 443
100 444 This tool wraps the functionality of the ``varscan somatic`` and the
101 This tool calls germline/somatic variants from tumor-normal pileups. 445 ``varscan fpfilter`` command line tools.
102 446
103 .. _VarScan: http://dkoboldt.github.io/varscan/ 447 .. class:: infomark
104 .. _online: http://dkoboldt.github.io/varscan/using-varscan.html 448
449 The wrapper aims at providing the same functionality as the
450 ``varscan fpfilter`` tool, but implements it using ``pysam`` internally.
451 Note that, as one limitation compared to the original ``varscan`` tool,
452 the current version does not apply filters to indels!
453
454 The tool is designed to detect genetic variants in a **pair of samples**
455 representing normal and tumor tissue from the same individual. It classifies
456 the variants, according to their most likely origin, as **somatic** (variant is
457 found in the tumor, but not in the normal sample, *i.e.*, is the consequence of
458 a somatic mutation event), **germline** (variant is found in both samples =>
459 germline mutation event) and **LOH** (variant is found in both samples, but
460 only the tumor sample appears to be homozygous for it => loss of heterozygosity
461 event).
462 This classification is encoded in the variant ``INFO`` fields of the VCF output
463 produced by the tool in the form of a status code ``SS`` (somatic status),
464 where:
465
466 - ``SS=1`` signifies a likely germline variant,
467 - ``SS=2`` a somatic variant
468 - ``SS=3`` a LOH variant
469
470 In addition, ``SS=0`` indicates a possible variant, but with insufficient
471 evidence for an, at least, heterozygous state in either individual sample, and
472 ``SS=5`` is used for variants of unexplained origin (*e.g.*, variants found in
473 the normal, but not in the tumor tissue sample).
474
475 In a second step, following variant calling, the tool can try to detect likely
476 false-positive calls by re-inspecting the data at the variant sites more
477 carefully and looking for signs that may indicate problems with the
478 sequencing data or its mapping. If a called variant is deemed a possible
479 false-positive at this step, this gets indicated in the ``FILTER`` field of the
480 variant record in the VCF output. For high confidence variants passing all
481 posterior (applied after variant calling) filters the value of the field will
482 be ``PASS``, for variants failing any of the posterior filters the value will
483 be a ``;``-separated list of the problematic filters.
484
105 485
106 **Input** 486 **Input**
107 487
108 :: 488 The tool takes as input a reference genome (in fasta format) and a pair of
109 489 aligned reads datasets (bam format).
110 mpileup file - The SAMtools mpileup files for the normal and tumor tissue
111
112 490
113 **Output** 491 **Output**
114 492
115 VarScan produces a VCF 4.1 dataset as output. 493 A VCF dataset of called variants. When asked to *Generate separate output
116 494 datasets for SNP and indel calls*, the tool will behave like the
117 495 ``varscan somatic`` command line tool and produce two VCF datasets - one with
496 just the single nucleotide variants, while the other one will store
497 insertion/deletion variants.
498
499 **Options**
500
501 *Estimated purity of normal sample / of tumor sample*
502
503 Since, in practice, it is often impossible to isolate tissue samples without
504 contamination from surrounding tissue or from invading cells, these two fields
505 let you indicate your estimate of the purity of the two samples (as fractions
506 between 0 and 1, where 1 would indicate a contamination-free sample and 0.5 a
507 sample to which the desired tissue contributes only 50%, while the other 50%
508 consist of cells from the other tissue type).
509
510 *Settings for Variant Calling*
511
512 Settings in this section will affect the steps of variant calling and
513 classification. You can accept VarScan's default values for the corresponding
514 parameters or customize them according to your needs.
515
516 *Settings for Posterior Variant Filtering*
517
518 Use the parameters in this section to configure the false-positive filtering
519 step that follows variant calling and classification. These settings will not
520 influence the number of variants detected nor their classification, but may
521 change the ``FILTER`` field of variant records to indicate which variants
522 failed to pass certain filters. You can use this information with downstream
523 tools to exclude certain variants from further analysis steps or include only
524 high confidence variants that passed all filters (those with ``PASS`` as their
525 ``INFO`` field value. You can accept the orignal filter defaults of the
526 ``varscan fpfilter`` command line tool, use the settings established for the
527 tool in the `DREAM3 challenge`_, or choose to customize the settings.
528 Alternatively, you can also choose to skip posterior filtering entirely, in
529 which case all variants will have their ``INFO`` field set to ``PASS``.
530
531 .. _DREAM3 challenge: https://www.synapse.org/#!Synapse:syn312572/wiki/58893
118 </help> 532 </help>
119 <expand macro="citations" /> 533 <expand macro="citations" />
120 </tool> 534 </tool>