comparison varscan_mpileup2indel_from_bam.xml @ 1:2c56a59a112f draft default tip

planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/galaxytools-emc/tree/master/tools/galaxy-tool-shed-tools commit bd543e68c1af82bcd6a04f0ae3d1180e8887e122
author erasmus-medical-center
date Wed, 15 Feb 2017 16:15:21 -0500
parents 10e2ea79ec55
children
comparison
equal deleted inserted replaced
0:10e2ea79ec55 1:2c56a59a112f
1 <?xml version="1.0" encoding="UTF-8"?> 1 <?xml version="1.0" encoding="UTF-8"?>
2 <tool id="varscan_mpileup2indel_from_bam" name="VarScan2 Call INDELs from BAM" version="2.3.6.a"> 2 <tool id="varscan_mpileup2indel_from_bam" name="VarScan2 Call INDELs from BAM" version="2.4.2.a">
3 <description>VarScan2 INDEL detection; directly reading *.bam file(s) &amp; using parallel mpileup generation, to avoid unnecessairy I/O overhead and increase performance.</description> 3 <description>VarScan2 INDEL detection; directly reading *.bam file(s) &amp; using parallel mpileup generation, to avoid unnecessairy I/O overhead and increase performance.</description>
4 4
5 <requirements> 5 <requirements>
6 <requirement type="package" version="0.1.19a">samtools_parallel_mpileup_0_1_19a</requirement> 6 <requirement type="package" version="2.4.2">varscan</requirement>
7 <requirement type="package" version="0.1.19">samtools</requirement> 7 <requirement type="package" version="0.6.5">sambamba</requirement>
8 <requirement type="package" version="2.3.6">varscan</requirement>
9 </requirements> 8 </requirements>
10 9
11 <version_command>java -jar $JAVA_JAR_PATH/VarScan.v2.3.6.jar 2>&amp;1 | head -n 1</version_command> 10 <version_command>varscan 2&gt;&amp;1 | head -n 1</version_command>
12 11
13 <command> 12 <command detect_errors="exit_code"><![CDATA[
14 #if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1 13 #for $alignment in $alignments
15 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&amp;2 14 ln -f -s '${alignment.metadata.bam_index}' '${alignment}.bai' &&
16 #else 15 #end for
17 #import os.path 16
17 sambamba mpileup
18 -t \${GALAXY_SLOTS:-4}
19
18 #for $alignment in $alignments 20 #for $alignment in $alignments
19 <!-- @todo use the existence of $alignment.metadata.bam_index or $alignment.metadata['bam_index'] --> 21 '${alignment}'
20 #if not os.path.isfile(str($alignment)+".bai") 22 #end for
21 echo "- Indexing alignment file: $alignment.name " ; 23
22 samtools index $alignment 2>&amp;1 ; 24 --samtools
25 -f
26 #if $reference_genome_source.source_select == "indexed_filtered"
27 '$reference_genome_source.reference_genome'
28 #else if $reference_genome_source.source_select == "indexed_all"
29 '$reference_genome_source.reference_genome'
30 #else if $reference_genome_source.source_select == "history"
31 '$reference_genome_source.reference_genome'
23 #else 32 #else
24 echo "- Skiping indexing: $alignment.name " ; 33 <!--
34 This is a workaround to obtain the "genome.fa" file that
35 corresponds to the dbkey of the alignments.
36 Because this file is "calculated" during run-time, it can
37 be used in a workflow.
38 -->
39 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }"
25 #end if 40 #end if
26 #end for 41
27 42 #if $extended_parameters_regions.samtools_regions == "region"
28 #if $mpileup_parallelization.mpileup_parallelization_select == "true" 43 -r '${extended_parameters_regions.samtools_r}'
29 samtools-parallel-mpileup mpileup 44 #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed"
30 -t $mpileup_parallelization.samtools_threads 45 -l '${extended_parameters_regions.sambamba_l}'
31 #else 46 #end if
32 samtools mpileup 47
33 #end if 48 #if $extended_parameters.parameters == "extended"
34 -f 49 $extended_parameters.samtools_6
35 #if $reference_genome_source.source_select == "indexed_filtered" 50 $extended_parameters.samtools_A
36 "$reference_genome_source.reference_genome" 51 $extended_parameters.samtools_B
37 #else if $reference_genome_source.source_select == "indexed_all" 52 -C $extended_parameters.samtools_C
38 "$reference_genome_source.reference_genome" 53 -d $extended_parameters.samtools_d
39 #else if $reference_genome_source.source_select == "history" 54 $extended_parameters.samtools_E
40 "$reference_genome_source.reference_genome" 55 -M $extended_parameters.samtools_M
41 #else 56 $extended_parameters.samtools_R
42 <!-- 57 -q $extended_parameters.samtools_q
43 This is a workaround to obtain the "genome.fa" file that 58 -Q $extended_parameters.samtools_Q
44 corresponds to the dbkey of the alignments. 59
45 Because this file is "calculated" during run-time, it can 60 -e $extended_parameters.samtools_e
46 be used in a workflow. 61 -F $extended_parameters.samtools_F
47 --> 62 -h $extended_parameters.samtools_h
48 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }" 63 $extended_parameters.samtools_I
49 #end if 64 -L $extended_parameters.samtools_L
50 65 -m $extended_parameters.samtools_m
51 #if $extended_parameters_regions.samtools_regions == "region" 66 -o $extended_parameters.samtools_o
52 -r $extended_parameters_regions.samtools_r 67 $extended_parameters.samtools_p
53 #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed" 68 -P $extended_parameters.samtools_P
54 -l $extended_parameters_regions.samtools_l 69 #end if
55 #end if 70
56 71 #for $alignment in $alignments
57 #if $extended_parameters.parameters == "extended" 72 '${alignment}'
58 $extended_parameters.samtools_6 73 #end for
59 $extended_parameters.samtools_A 74
60 $extended_parameters.samtools_B 75 | varscan mpileup2indel
61 -C $extended_parameters.samtools_C 76
62 -d $extended_parameters.samtools_d 77 #if $extended_parameters.parameters == "extended"
63 $extended_parameters.samtools_E 78 --min-coverage $extended_parameters.varscan_min_coverage
64 -M $extended_parameters.samtools_M 79 --min-reads2 $extended_parameters.varscan_min_reads2
65 $extended_parameters.samtools_R 80 --min-avg-qual $extended_parameters.varscan_min_avg_qual
66 -q $extended_parameters.samtools_q 81 --min-var-freq $extended_parameters.varscan_min_var_freq
67 -Q $extended_parameters.samtools_Q 82 --min-freq-for-hom $extended_parameters.varscan_min_freq_for_hom
68 83 --p-value $extended_parameters.varscan_p_value
69 -e $extended_parameters.samtools_e 84 $extended_parameters.varscan_strand_filter
70 -F $extended_parameters.samtools_F 85 $extended_parameters.varscan_variants
71 -h $extended_parameters.samtools_h 86 #end if
72 $extended_parameters.samtools_I 87
73 -L $extended_parameters.samtools_L 88 #if $varscan_output == "vcf" or $varscan_output.value == "vcf"
74 -m $extended_parameters.samtools_m 89 --output-vcf 1
75 -o $extended_parameters.samtools_o 90 #end if
76 $extended_parameters.samtools_p 91
77 -P $extended_parameters.samtools_P 92 > '${snv_output}'
78 #end if 93
79 94 ]]></command>
80 #for $alignment in $alignments
81 ${alignment}
82 #end for
83 2>stderr_1.txt
84
85 #if $mpileup_parallelization.mpileup_parallelization_select == "true"
86 #if $mpileup_parallelization.sort_mpileup
87 | sort -k1,1V -k2,2g
88 #end if
89 #end if
90
91 | java
92 -Xmx64G
93 -jar \$JAVA_JAR_PATH/VarScan.v2.3.6.jar
94 mpileup2indel
95
96 #if $extended_parameters.parameters == "extended"
97 --min-coverage $extended_parameters.varscan_min_coverage
98 --min-reads2 $extended_parameters.varscan_min_reads2
99 --min-avg-qual $extended_parameters.varscan_min_avg_qual
100 --min-var-freq $extended_parameters.varscan_min_var_freq
101 --min-freq-for-hom $extended_parameters.varscan_min_freq_for_hom
102 --p-value $extended_parameters.varscan_p_value
103 $extended_parameters.varscan_strand_filter
104 $extended_parameters.varscan_variants
105 #end if
106
107 #if $varscan_output == "vcf" or $varscan_output.value == "vcf"
108 --output-vcf 1
109 #end if
110
111 2>stderr_2.txt
112 > $snv_output ;
113
114
115 echo "---------------[ mpileup generation ]---------------" ;
116 cat stderr_1.txt ;
117 echo "" ;
118 echo "---------------[ VarScan INDEL detect ]-------------" ;
119 cat stderr_2.txt ;
120 echo "" ;
121 echo "----------------------------------------------------" ;
122 #end if
123 </command>
124 95
125 <inputs> 96 <inputs>
126 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file(s)" help="Mapped reads in BAM or SAM format."/> 97 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file(s)" help="Mapped reads in BAM or SAM format."/>
127 98
128 <!-- Find out how to access the reference genome from the BAM file(s) --> 99 <!-- Find out how to access the reference genome from the BAM file(s) -->
174 <when value="regions_file_pos"> 145 <when value="regions_file_pos">
175 <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" /> 146 <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" />
176 </when> 147 </when>
177 <when value="regions_file_bed"> 148 <when value="regions_file_bed">
178 <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" /> 149 <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" />
179 </when>
180 </conditional>
181
182 <conditional name="mpileup_parallelization">
183 <param name="mpileup_parallelization_select" type="select" label="Use parallelization for the mpileup generation (experimental)" help="Especially if larger numbers of bam/sam files are processed, or the file infrastructure is optimized for IO-paralellization, this feature might improve performance.">
184 <option value="false" >False - uses classical samtools</option>
185 <option value="true">True - uses (experimental) samtools mpileup-parallel</option>
186 </param>
187 <when value="false" />
188 <when value="true">
189 <param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" />
190 <param type="boolean" name="sort_mpileup" truevalue="true" falsevalue="false" label="Sort mpileup file (SLOW)" help="Because parallelization may disrupt the outputs order, sorting can be conveniet for e.g. testing. Notice that this function has only use in a limited number of situations but consumes (much) resources. Only use it if it's really neccesairy." />
191 </when> 150 </when>
192 </conditional> 151 </conditional>
193 152
194 <conditional name="extended_parameters"> 153 <conditional name="extended_parameters">
195 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings."> 154 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings.">
258 217
259 <param name="parameters" value="default" /> 218 <param name="parameters" value="default" />
260 <param name="varscan_output_vcf" value="1" /> 219 <param name="varscan_output_vcf" value="1" />
261 220
262 221
263 <output name="snv_output" file="example.vcf" /> 222 <output name="snv_output" file="example.2.vcf" />
264 </test>
265 <test><!-- Use parallelized samtools -->
266 <param name="alignments" value="example.bam" ftype="bam" />
267
268 <param name="source_select" value="history" />
269 <param name="reference_genome" value="example.fa" ftype="fasta" />
270
271 <param name="samtools_regions" value="entire_genome" />
272
273 <param name="mpileup_parallelization_select" value="true" />
274 <param name="samtools_threads" value="2" />
275 <param name="sort_mpileup" value="true" />
276
277 <param name="parameters" value="default" />
278 <param name="varscan_output_vcf" value="1" />
279
280
281 <output name="snv_output" file="example.vcf" />
282 </test> 223 </test>
283 </tests> 224 </tests>
284 225
285 <help> 226 <help>
286 **VarScan 2.3.6** 227 **VarScan 2.4.2**
287 228
288 VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. 229 VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems.
289 http://dx.doi.org/10.1101/gr.129684.111 230 http://dx.doi.org/10.1101/gr.129684.111
290 http://www.ncbi.nlm.nih.gov/pubmed/19542151 231 http://www.ncbi.nlm.nih.gov/pubmed/19542151
291 232
300 241
301 **Input formats** 242 **Input formats**
302 243
303 VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing. 244 VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing.
304 245
305 **Installation**
306
307 Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment.
308
309 **License** 246 **License**
310 247
311 * VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0) 248 * VarScan 2.4.2: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0)
312 * parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING)
313 249
314 250
315 Contact 251 Contact
316 ------- 252 -------
317 253
318 The tool wrapper has been written by Youri Hoogstrate from the Erasmus 254 The tool wrapper has been written by Youri Hoogstrate from the Erasmus
319 Medical Center (Rotterdam, Netherlands) on behalf of the Translational 255 Medical Center (Rotterdam, Netherlands).
320 Research IT (TraIT) project:
321
322 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
323
324 More tools by the Translational Research IT (TraIT) project can be found
325 in the following toolsheds:
326
327 http://toolshed.g2.bx.psu.edu/
328
329 http://testtoolshed.g2.bx.psu.edu/
330 </help> 256 </help>
331 <citations> 257 <citations>
332 <citation type="doi">10.1101/gr.129684.111</citation> 258 <citation type="doi">10.1101/gr.129684.111</citation>
333 </citations> 259 </citations>
334 </tool> 260 </tool>