Mercurial > repos > yhoogstrate > varscan_mpileup2snp_from_bam
comparison varscan_mpileup2snp_from_bam.xml @ 1:9a39c4105901 draft default tip
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/galaxytools-emc/tree/master/tools/galaxy-tool-shed-tools commit bd543e68c1af82bcd6a04f0ae3d1180e8887e122
author | erasmus-medical-center |
---|---|
date | Wed, 15 Feb 2017 16:16:01 -0500 |
parents | 0c5cc5763091 |
children |
comparison
equal
deleted
inserted
replaced
0:0c5cc5763091 | 1:9a39c4105901 |
---|---|
1 <?xml version="1.0" encoding="UTF-8"?> | 1 <?xml version="1.0" encoding="UTF-8"?> |
2 <tool id="varscan_mpileup2snp_from_bam" name="VarScan2 Call SNPs from BAM" version="2.3.6.a"> | 2 <tool id="varscan_mpileup2snp_from_bam" name="VarScan2 Call SNPs from BAM" version="2.4.2.a"> |
3 <description>VarScan2 SNP/SNV detection; directly reading *.bam file(s) & using parallel mpileup generation, to avoid unnecessairy I/O overhead and increase performance.</description> | 3 <description>VarScan2 SNP/SNV detection; directly reading *.bam file(s) & using parallel mpileup generation, to avoid unnecessairy I/O overhead and increase performance.</description> |
4 | 4 |
5 <requirements> | 5 <requirements> |
6 <requirement type="package" version="0.1.19-a">samtools_parallel_mpileup</requirement> | 6 <requirement type="package" version="2.4.2">varscan</requirement> |
7 <requirement type="package" version="0.1.19">samtools</requirement> | 7 <requirement type="package" version="0.6.5">sambamba</requirement> |
8 <requirement type="package" version="2.3.6">varscan</requirement> | |
9 </requirements> | 8 </requirements> |
10 | 9 |
11 <version_command>java -jar $JAVA_JAR_PATH/VarScan.v2.3.6.jar 2>&1 | head -n 1</version_command> | 10 <version_command>varscan 2>&1 | head -n 1</version_command> |
12 | 11 |
13 <command> | 12 <command detect_errors="exit_code"><![CDATA[ |
14 #if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1 | 13 #for $alignment in $alignments |
15 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&2 | 14 ln -f -s '${alignment.metadata.bam_index}' '${alignment}.bai' && |
16 #else | 15 #end for |
17 #import os.path | 16 |
17 sambamba mpileup | |
18 -t \${GALAXY_SLOTS:-4} | |
19 | |
18 #for $alignment in $alignments | 20 #for $alignment in $alignments |
19 <!-- @todo use the existence of $alignment.metadata.bam_index or $alignment.metadata['bam_index'] --> | 21 '${alignment}' |
20 #if not os.path.isfile(str($alignment)+".bai") | 22 #end for |
21 echo "- Indexing alignment file: $alignment.name " ; | 23 |
22 samtools index $alignment 2>&1 ; | 24 --samtools |
25 -f | |
26 #if $reference_genome_source.source_select == "indexed_filtered" | |
27 '$reference_genome_source.reference_genome' | |
28 #else if $reference_genome_source.source_select == "indexed_all" | |
29 '$reference_genome_source.reference_genome' | |
30 #else if $reference_genome_source.source_select == "history" | |
31 '$reference_genome_source.reference_genome' | |
23 #else | 32 #else |
24 echo "- Skiping indexing: $alignment.name " ; | 33 <!-- |
25 #end if | 34 This is a workaround to obtain the "genome.fa" file that |
26 #end for | 35 corresponds to the dbkey of the alignments. |
27 | 36 Because this file is "calculated" during run-time, it can |
28 #if $mpileup_parallelization.mpileup_parallelization_select == "true" | 37 be used in a workflow. |
29 samtools-parallel-mpileup mpileup | 38 --> |
30 -t $mpileup_parallelization.samtools_threads | 39 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }" |
31 #else | 40 #end if |
32 samtools mpileup | 41 |
33 #end if | 42 #if $extended_parameters_regions.sambamba_regions == "region" |
34 -f | 43 -r '${extended_parameters_regions.sambamba_r}' |
35 #if $reference_genome_source.source_select == "indexed_filtered" | 44 #elif $extended_parameters_regions.sambamba_regions == "regions_file_pos" or $extended_parameters_regions.sambamba_regions == "regions_file_bed" |
36 "$reference_genome_source.reference_genome" | 45 -l '${extended_parameters_regions.sambamba_l}' |
37 #else if $reference_genome_source.source_select == "indexed_all" | 46 #end if |
38 "$reference_genome_source.reference_genome" | 47 |
39 #else if $reference_genome_source.source_select == "history" | 48 #if $extended_parameters.parameters == "extended" |
40 "$reference_genome_source.reference_genome" | 49 $extended_parameters.sambamba_6 |
41 #else | 50 $extended_parameters.sambamba_A |
42 <!-- | 51 $extended_parameters.sambamba_B |
43 This is a workaround to obtain the "genome.fa" file that | 52 -C $extended_parameters.sambamba_C |
44 corresponds to the dbkey of the alignments. | 53 -d $extended_parameters.sambamba_d |
45 Because this file is "calculated" during run-time, it can | 54 $extended_parameters.sambamba_E |
46 be used in a workflow. | 55 -M $extended_parameters.sambamba_M |
47 --> | 56 $extended_parameters.sambamba_R |
48 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }" | 57 -q $extended_parameters.sambamba_q |
49 #end if | 58 -Q $extended_parameters.sambamba_Q |
50 | 59 |
51 #if $extended_parameters_regions.samtools_regions == "region" | 60 -e $extended_parameters.sambamba_e |
52 -r $extended_parameters_regions.samtools_r | 61 -F $extended_parameters.sambamba_F |
53 #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed" | 62 -h $extended_parameters.sambamba_h |
54 -l $extended_parameters_regions.samtools_l | 63 $extended_parameters.sambamba_I |
55 #end if | 64 -L $extended_parameters.sambamba_L |
56 | 65 -m $extended_parameters.sambamba_m |
57 #if $extended_parameters.parameters == "extended" | 66 -o $extended_parameters.sambamba_o |
58 $extended_parameters.samtools_6 | 67 $extended_parameters.sambamba_p |
59 $extended_parameters.samtools_A | 68 -P $extended_parameters.sambamba_P |
60 $extended_parameters.samtools_B | 69 #end if |
61 -C $extended_parameters.samtools_C | 70 | varscan mpileup2snp |
62 -d $extended_parameters.samtools_d | |
63 $extended_parameters.samtools_E | |
64 -M $extended_parameters.samtools_M | |
65 $extended_parameters.samtools_R | |
66 -q $extended_parameters.samtools_q | |
67 -Q $extended_parameters.samtools_Q | |
68 | |
69 -e $extended_parameters.samtools_e | |
70 -F $extended_parameters.samtools_F | |
71 -h $extended_parameters.samtools_h | |
72 $extended_parameters.samtools_I | |
73 -L $extended_parameters.samtools_L | |
74 -m $extended_parameters.samtools_m | |
75 -o $extended_parameters.samtools_o | |
76 $extended_parameters.samtools_p | |
77 -P $extended_parameters.samtools_P | |
78 #end if | |
79 | |
80 #for $alignment in $alignments | |
81 ${alignment} | |
82 #end for | |
83 2>stderr_1.txt | |
84 | |
85 #if $mpileup_parallelization.mpileup_parallelization_select == "true" | |
86 #if $mpileup_parallelization.sort_mpileup | |
87 | sort -k1,1V -k2,2g | |
88 #end if | |
89 #end if | |
90 | |
91 ## Make for every MPILEUP file an | |
92 ## http://en.wikipedia.org/wiki/Named_pipe | |
93 | |
94 | java | |
95 -Xmx64G | |
96 -jar \$JAVA_JAR_PATH/VarScan.v2.3.6.jar | |
97 mpileup2snp | |
98 | 71 |
99 #if $extended_parameters.parameters == "extended" | 72 #if $extended_parameters.parameters == "extended" |
100 --min-coverage $extended_parameters.varscan_min_coverage | 73 --min-coverage $extended_parameters.varscan_min_coverage |
101 --min-reads2 $extended_parameters.varscan_min_reads2 | 74 --min-reads2 $extended_parameters.varscan_min_reads2 |
102 --min-avg-qual $extended_parameters.varscan_min_avg_qual | 75 --min-avg-qual $extended_parameters.varscan_min_avg_qual |
103 --min-var-freq $extended_parameters.varscan_min_var_freq | 76 --min-var-freq $extended_parameters.varscan_min_var_freq |
104 --min-freq-for-hom $extended_parameters.varscan_min_freq_for_hom | 77 --min-freq-for-hom $extended_parameters.varscan_min_freq_for_hom |
105 --p-value $extended_parameters.varscan_p_value | 78 --p-value $extended_parameters.varscan_p_value |
106 $extended_parameters.varscan_strand_filter | 79 $extended_parameters.varscan_strand_filter |
107 $extended_parameters.varscan_variants | 80 $extended_parameters.varscan_variants |
108 #end if | 81 #end if |
109 | 82 |
110 #if $varscan_output == "vcf" or $varscan_output.value == "vcf" | 83 #if $varscan_output == "vcf" or $varscan_output.value == "vcf" |
111 --output-vcf 1 | 84 --output-vcf 1 |
112 #end if | 85 #end if |
113 | 86 |
114 2>stderr_2.txt | 87 > '$snv_output' |
115 > $snv_output ; | 88 ]]></command> |
116 | 89 |
117 | |
118 echo "---------------[ mpileup generation ]---------------" ; | |
119 cat stderr_1.txt ; | |
120 echo "" ; | |
121 echo "---------------[ VarScan SNP detect ]---------------" ; | |
122 cat stderr_2.txt ; | |
123 echo "" ; | |
124 echo "----------------------------------------------------" ; | |
125 #end if | |
126 </command> | |
127 | |
128 <inputs> | 90 <inputs> |
129 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file(s)" help="Mapped reads in BAM or SAM format."/> | 91 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file(s)" help="Mapped reads in BAM or SAM format."/> |
130 | 92 |
131 <!-- Find out how to access the reference genome from the BAM file(s) --> | 93 <!-- Find out how to access the reference genome from the BAM file(s) --> |
132 <conditional name="reference_genome_source"> | 94 <conditional name="reference_genome_source"> |
162 </when> | 124 </when> |
163 <when value="attribute" /> | 125 <when value="attribute" /> |
164 </conditional> | 126 </conditional> |
165 | 127 |
166 <conditional name="extended_parameters_regions"> | 128 <conditional name="extended_parameters_regions"> |
167 <param name="samtools_regions" type="select" label="Region specific parameters" help="Let samtools target specific genomic locations."> | 129 <param name="sambamba_regions" type="select" label="Region specific parameters" help="Let sambamba target specific genomic locations."> |
168 <option value="entire_genome">Entire genome</option> | 130 <option value="entire_genome">Entire genome</option> |
169 <option value="region">Specific region</option> | 131 <option value="region">Specific region</option> |
170 <option value="regions_file_pos">Specific positions (file); list of positions</option> | 132 <option value="regions_file_pos">Specific positions (file); list of positions</option> |
171 <option value="regions_file_bed">Specific regions (file); list of regions in BED</option> | 133 <option value="regions_file_bed">Specific regions (file); list of regions in BED</option> |
172 </param> | 134 </param> |
173 <when value="entire_genome" /> | 135 <when value="entire_genome" /> |
174 <when value="region"> | 136 <when value="region"> |
175 <param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="e.g. chrX or chr:pos or chr:start-end" /> | 137 <param type="text" name="sambamba_r" label="Samtools: region in which pileup is generated" help="e.g. chrX or chr:pos or chr:start-end" /> |
176 </when> | 138 </when> |
177 <when value="regions_file_pos"> | 139 <when value="regions_file_pos"> |
178 <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" /> | 140 <param type="data" name="sambamba_l" format="tabular" label="Samtools: list of positions (chr pos)" /> |
179 </when> | 141 </when> |
180 <when value="regions_file_bed"> | 142 <when value="regions_file_bed"> |
181 <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" /> | 143 <param type="data" name="sambamba_l" format="bed" label="Samtools: specific regions (BED)" /> |
182 </when> | 144 </when> |
183 </conditional> | 145 </conditional> |
184 | 146 |
185 <conditional name="mpileup_parallelization"> | |
186 <param name="mpileup_parallelization_select" type="select" label="Use parallelization for the mpileup generation (experimental)" help="Especially if larger numbers of bam/sam files are processed, or the file infrastructure is optimized for IO-paralellization, this feature might improve performance."> | |
187 <option value="false" >False - uses classical samtools</option> | |
188 <option value="true">True - uses (experimental) samtools mpileup-parallel</option> | |
189 </param> | |
190 <when value="false" /> | |
191 <when value="true"> | |
192 <param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" /> | |
193 <param type="boolean" name="sort_mpileup" truevalue="true" falsevalue="false" label="Sort mpileup file (SLOW)" help="Because parallelization may disrupt the outputs order, sorting can be conveniet for e.g. testing. Notice that this function has only use in a limited number of situations but consumes (much) resources. Only use it if it's really neccesairy." /> | |
194 </when> | |
195 </conditional> | |
196 | |
197 <conditional name="extended_parameters"> | 147 <conditional name="extended_parameters"> |
198 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings."> | 148 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and sambamba settings."> |
199 <option value="default">Default settings</option> | 149 <option value="default">Default settings</option> |
200 <option value="extended">Extended settings</option> | 150 <option value="extended">Extended settings</option> |
201 </param> | 151 </param> |
202 <when value="default" /> | 152 <when value="default" /> |
203 <when value="extended"> | 153 <when value="extended"> |
204 <param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" /> | 154 <param type="boolean" name="sambamba_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" /> |
205 <param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" /> | 155 <param type="boolean" name="sambamba_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" /> |
206 <param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" /> | 156 <param type="boolean" name="sambamba_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" /> |
207 <param type="integer" name="samtools_C" value="0" label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" /> | 157 <param type="integer" name="sambamba_C" value="0" label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" /> |
208 <param type="integer" name="samtools_d" value="250" label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" /> | 158 <param type="integer" name="sambamba_d" value="250" label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" /> |
209 <param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" /> | 159 <param type="boolean" name="sambamba_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" /> |
210 <param type="integer" name="samtools_M" value="60" label="cap mapping quality at INT [60]" /> | 160 <param type="integer" name="sambamba_M" value="60" label="cap mapping quality at INT [60]" /> |
211 <param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" /> | 161 <param type="boolean" name="sambamba_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" /> |
212 <param type="integer" name="samtools_q" value="0" label="Samtools: skip alignments with mapQ smaller than INT [0]" /> | 162 <param type="integer" name="sambamba_q" value="0" label="Samtools: skip alignments with mapQ smaller than INT [0]" /> |
213 <param type="integer" name="samtools_Q" value="13" label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" /> | 163 <param type="integer" name="sambamba_Q" value="13" label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" /> |
214 | 164 |
215 <param type="integer" name="samtools_e" value="20" label="Samtools: Phred-scaled gap extension seq error probability [20]" /> | 165 <param type="integer" name="sambamba_e" value="20" label="Samtools: Phred-scaled gap extension seq error probability [20]" /> |
216 <param type="float" name="samtools_F" value="0.002" label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> | 166 <param type="float" name="sambamba_F" value="0.002" label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> |
217 <param type="integer" name="samtools_h" value="100" label="Samtools: coefficient for homopolymer errors [100]" /> | 167 <param type="integer" name="sambamba_h" value="100" label="Samtools: coefficient for homopolymer errors [100]" /> |
218 <param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" /> | 168 <param type="boolean" name="sambamba_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" /> |
219 <param type="integer" name="samtools_L" value="250" label="Samtools: max per-sample depth for INDEL calling [250]" /> | 169 <param type="integer" name="sambamba_L" value="250" label="Samtools: max per-sample depth for INDEL calling [250]" /> |
220 <param type="integer" name="samtools_m" value="1" label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" /> | 170 <param type="integer" name="sambamba_m" value="1" label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" /> |
221 <param type="integer" name="samtools_o" value="40" label="Samtools: Phred-scaled gap open sequencing error probability [40]" /> | 171 <param type="integer" name="sambamba_o" value="40" label="Samtools: Phred-scaled gap open sequencing error probability [40]" /> |
222 <param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" /> | 172 <param type="boolean" name="sambamba_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" /> |
223 <param type="text" name="samtools_P" value="all" label="Samtools: comma separated list of platforms for indels [all]" /> | 173 <param type="text" name="sambamba_P" value="all" label="Samtools: comma separated list of platforms for indels [all]" /> |
224 | 174 |
225 <param type="integer" name="varscan_min_coverage" value="8" label="VarScan: Minimum read depth at a position to make a call [8]" /> | 175 <param type="integer" name="varscan_min_coverage" value="8" label="VarScan: Minimum read depth at a position to make a call [8]" /> |
226 <param type="integer" name="varscan_min_reads2" value="2" label="VarScan: PMinimum supporting reads at a position to call variants [2]" /> | 176 <param type="integer" name="varscan_min_reads2" value="2" label="VarScan: PMinimum supporting reads at a position to call variants [2]" /> |
227 <param type="integer" name="varscan_min_avg_qual" value="15" label="VarScan: Minimum base quality at a position to count a read [15]" /> | 177 <param type="integer" name="varscan_min_avg_qual" value="15" label="VarScan: Minimum base quality at a position to count a read [15]" /> |
228 <param type="float" name="varscan_min_var_freq" value="0.01" label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> | 178 <param type="float" name="varscan_min_var_freq" value="0.01" label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> |
246 </change_format> | 196 </change_format> |
247 </data> | 197 </data> |
248 </outputs> | 198 </outputs> |
249 | 199 |
250 <tests> | 200 <tests> |
251 <test><!-- Use classical samtools --> | 201 <test><!-- Use classical sambamba --> |
252 <param name="alignments" value="example.bam" ftype="bam" /> | 202 <param name="alignments" value="example.bam" ftype="bam" /> |
253 | 203 |
254 <param name="source_select" value="history" /> | 204 <param name="source_select" value="history" /> |
255 <param name="reference_genome" value="example.fa" ftypet="fasta" /> | 205 <param name="reference_genome" value="example.fa" /> |
256 | 206 |
257 <param name="samtools_regions" value="entire_genome" /> | 207 <param name="sambamba_regions" value="entire_genome" /> |
258 | |
259 <param name="mpileup_parallelization_select" value="false" /> | |
260 <param name="sort_mpileup" value="true" /> | |
261 | |
262 <param name="parameters" value="default" /> | |
263 <param name="varscan_output_vcf" value="1" /> | |
264 | |
265 | |
266 <output name="snv_output" file="example.vcf" /> | |
267 </test> | |
268 <test><!-- Use parallelized samtools - @todo replace with sambamba! --> | |
269 <param name="alignments" value="example.bam" ftype="bam" /> | |
270 | |
271 <param name="source_select" value="history" /> | |
272 <param name="reference_genome" value="example.fa" ftypet="fasta" /> | |
273 | |
274 <param name="samtools_regions" value="entire_genome" /> | |
275 | |
276 <param name="mpileup_parallelization_select" value="true" /> | |
277 <param name="samtools_threads" value="2" /> | |
278 <param name="sort_mpileup" value="true" /> | |
279 | 208 |
280 <param name="parameters" value="default" /> | 209 <param name="parameters" value="default" /> |
281 <param name="varscan_output_vcf" value="1" /> | 210 <param name="varscan_output_vcf" value="1" /> |
282 | 211 |
283 | 212 |
284 <output name="snv_output" file="example.vcf" /> | 213 <output name="snv_output" file="example.vcf" /> |
285 </test> | 214 </test> |
286 </tests> | 215 </tests> |
287 | 216 |
288 <help> | 217 <help> |
289 **VarScan 2.3.6** | 218 **VarScan 2.4.2** |
290 | 219 |
291 VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. | 220 VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. |
292 http://dx.doi.org/10.1101/gr.129684.111 | 221 http://dx.doi.org/10.1101/gr.129684.111 |
293 http://www.ncbi.nlm.nih.gov/pubmed/19542151 | 222 http://www.ncbi.nlm.nih.gov/pubmed/19542151 |
294 | 223 |
295 *VarScan* requires mpileup formatted input files, which are generally derived from BAM files. Since mpileup files can become humongous, the interim step of storing it is bypassed. Thus, in this wrapper one or multiple BAM/SAM files go in, get processed into a mpileup file and get directly linked to VarScan. | 224 *VarScan* requires mpileup input files, generally derived from BAM files. Since mpileup files can become humongous, the interim step of storing can be by-passed using this tool. |
296 The samtools package is not able to parallelize the mpileup generation which make it a very slow process. | 225 Thus, in this wrapper one or multiple BAM/SAM files go in, get processed into a mpileup file and get directly linked to VarScan. |
297 Other people were aware of this and have written a version that can do parallelization: | |
298 https://github.com/mydatascience/parallel-mpileup | |
299 | |
300 Consequently, when a BAM files gets processed by this wrapper, it's processed by *parallel-mpileup* before its send to VarScan. | |
301 | 226 |
302 .. _VarScan: http://varscan.sourceforge.net/ | 227 .. _VarScan: http://varscan.sourceforge.net/ |
303 | 228 |
304 **Input formats** | 229 **Input formats** |
305 | 230 |
306 VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing. | 231 VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). |
307 | 232 The alignment files must have a reference genome (dbkey) in Galaxy. |
308 **Installation** | |
309 | |
310 Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment. | |
311 | |
312 **License** | |
313 | |
314 * VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0) | |
315 * parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING) | |
316 | 233 |
317 Contact | 234 Contact |
318 ------- | 235 ------- |
319 | 236 |
320 The tool wrapper has been written by Youri Hoogstrate from the Erasmus | 237 The tool wrapper has been written by Youri Hoogstrate from the Erasmus |
321 Medical Center (Rotterdam, Netherlands) on behalf of the Translational | 238 Medical Center (Rotterdam, Netherlands) |
322 Research IT (TraIT) project: | |
323 | |
324 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch | |
325 | |
326 More tools by the Translational Research IT (TraIT) project can be found | |
327 in the following toolsheds: | |
328 | |
329 http://toolshed.dtls.nl/ | |
330 | |
331 http://toolshed.g2.bx.psu.edu/ | |
332 | |
333 http://testtoolshed.g2.bx.psu.edu/ | |
334 </help> | 239 </help> |
335 <citations> | 240 <citations> |
336 <citation type="doi">10.1101/gr.129684.111</citation> | 241 <citation type="doi">10.1101/gr.129684.111</citation> |
337 </citations> | 242 </citations> |
338 </tool> | 243 </tool> |