comparison snpEff.xml @ 2:e09ce114d240 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff
author iuc
date Fri, 19 Feb 2016 08:26:25 -0500
parents 500832f27cbc
children b24873564cf6
comparison
equal deleted inserted replaced
1:500832f27cbc 2:e09ce114d240
2 <description>Variant effect and annotation</description> 2 <description>Variant effect and annotation</description>
3 <expand macro="requirements" /> 3 <expand macro="requirements" />
4 <macros> 4 <macros>
5 <import>snpEff_macros.xml</import> 5 <import>snpEff_macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements" />
8 <expand macro="stdio" />
7 <command> 9 <command>
8 <![CDATA[ 10 <![CDATA[
9 java -Xmx6G -jar \$SNPEFF_JAR_PATH/snpEff.jar eff 11 java -Xmx6G -jar "\$SNPEFF_JAR_PATH/snpEff.jar" eff
10 -c \$SNPEFF_JAR_PATH/snpEff.config 12 -c "\$SNPEFF_JAR_PATH/snpEff.config"
11 -i $inputFormat -o ${outputConditional.outputFormat} -upDownStreamLen $udLength 13 -i $inputFormat -o ${outputConditional.outputFormat} -upDownStreamLen $udLength
12 #if $spliceSiteSize and $spliceSiteSize.__str__ != '': 14 #if $spliceSiteSize and str($spliceSiteSize) != '':
13 -spliceSiteSize $spliceSiteSize 15 -spliceSiteSize "$spliceSiteSize"
14 #end if 16 #end if
15 #if $filterIn and $filterIn.__str__ != 'no_filter': 17 #if $annotations and str($annotations) != '':
16 $filterIn
17 #end if
18 #if $filterHomHet and $filterHomHet.__str__ != 'no_filter':
19 $filterHomHet
20 #end if
21 #if $annotations and $annotations.__str__ != '':
22 #echo " " 18 #echo " "
23 #echo ' '.join($annotations.__str__.split(',')) 19 #echo ' '.join(str($annotations).split(','))
24 #end if 20 #end if
25 #if $filterOut and $filterOut.__str__ != '': 21 #if $filterOut and str($filterOut) != '':
26 #echo " " 22 #echo " "
27 #echo ' '.join($filterOut.__str__.split(',')) 23 #echo ' '.join(str($filterOut).split(','))
24 #end if
25 #if $filter.specificEffects == 'yes' and $filter.effects:
26 #for $eff in str($filter.effects).split(','):
27 -no $eff
28 #end for
28 #end if 29 #end if
29 #if str( $transcripts ) != 'None': 30 #if str( $transcripts ) != 'None':
30 -onlyTr $transcripts 31 -onlyTr $transcripts
31 #end if 32 #end if
32 #if str( $intervals ) != 'None': ### fix this for multiple dataset input 33 #if str( $intervals ) != 'None': ### fix this for multiple dataset input
33 -interval $intervals 34 -interval $intervals
34 #end if 35 #end if
35 #if $statsFile: 36 #if $statsFile:
36 -stats $statsFile 37 -stats $statsFile
37 #end if 38 #end if
38 #if $offset.__str__ != 'default': 39 #if str($offset) != 'default':
39 ${offset} 40 ${offset}
40 #end if 41 #end if
41 #if $chr.__str__.strip() != '': 42 #if str($chr).strip() != '':
42 -chr "$chr" 43 -chr "$chr"
43 #end if 44 #end if
44 $noLog 45 $noLog
45 #if $snpDb.genomeSrc == 'cached': 46 #if $snpDb.genomeSrc == 'cached':
46 -dataDir ${snpDb.genomeVersion.fields.path} 47 -dataDir ${snpDb.genomeVersion.fields.path}
47 #if $snpDb.extra_annotations and $snpDb.extra_annotations.__str__ != '': 48 #if $snpDb.extra_annotations and str($snpDb.extra_annotations) != '':
48 #echo " " 49 #echo " "
49 #echo ' '.join($snpDb.extra_annotations.__str__.split(',')) 50 #echo ' '.join(str($snpDb.extra_annotations).split(','))
50 #end if 51 #end if
51 #if $snpDb.regulation and $snpDb.regulation.__str__ != '': 52 #if $snpDb.regulation and str($snpDb.regulation) != '':
52 -reg #echo ' -reg '.join($snpDb.regulation.__str__.split(','))# 53 -reg #echo ' -reg '.join(str($snpDb.regulation).split(','))#
53 #end if 54 #end if
54 $snpDb.genomeVersion 55 $snpDb.genomeVersion
55 #elif $snpDb.genomeSrc == 'history': 56 #elif $snpDb.genomeSrc == 'history':
56 -dataDir ${snpDb.snpeff_db.extra_files_path} 57 -dataDir ${snpDb.snpeff_db.extra_files_path}
57 #if $snpDb.extra_annotations and $snpDb.extra_annotations.__str__ != '': 58 #if $snpDb.extra_annotations and str($snpDb.extra_annotations) != '':
58 #set xannotations = [' '] + $snpDb.extra_annotations.__str__.split(',') 59 #set xannotations = [' '] + str($snpDb.extra_annotations).split(',')
59 #echo " " 60 #echo " "
60 #echo ' -'.join($xannotations) 61 #echo ' -'.join($xannotations)
61 #end if 62 #end if
62 #if $snpDb.regulation and $snpDb.regulation.__str__ != '': 63 #if $snpDb.regulation and str($snpDb.regulation) != '':
63 -reg #echo ' -reg '.join($snpDb.regulation.__str__.split(','))# 64 -reg #echo ' -reg '.join(str($snpDb.regulation).split(','))#
64 #end if 65 #end if
65 ${snpDb.snpeff_db.metadata.genome_version} 66 ${snpDb.snpeff_db.metadata.genome_version}
66 #else 67 #else
67 -download 68 -download
68 $snpDb.genome_version 69 $snpDb.genome_version
69 #end if 70 #end if
70 $input > $snpeff_output ; 71 "$input" > "$snpeff_output";
71 #if $statsFile: 72 #if $statsFile:
72 #import os 73 #import os
73 #set $genes_file = str($statsFile) + '.genes.txt' 74 #set $genes_file = str($statsFile) + '.genes.txt'
74 #set $genes_file_name = os.path.split($genes_file)[-1] 75 #set $genes_file_name = os.path.split($genes_file)[-1]
75 mkdir $statsFile.files_path; 76 mkdir $statsFile.files_path;
76 mv $genes_file #echo os.path.join($statsFile.files_path, $genes_file_name)#; 77 mv "$genes_file" #echo os.path.join($statsFile.files_path, $genes_file_name)#;
77 #end if 78 #end if
78 #if $outputConditional.outputFormat == 'gatk' and $outputConditional.gatk_v1 79 #if $outputConditional.outputFormat == 'gatk' and $outputConditional.gatk_v1
79 ## Replace real SnpEff version with 2.0.5 to prevent this GATK 1.x error: "The version of SnpEff used to generate the SnpEff input file (x.x) is not currently supported by the GATK. Supported versions are: [2.0.5]" 80 ## Replace real SnpEff version with 2.0.5 to prevent this GATK 1.x error: "The version of SnpEff used to generate the SnpEff input file (x.x) is not currently supported by the GATK. Supported versions are: [2.0.5]"
80 sed -i 's/^\#\#SnpEffVersion="\(\S*\s\)/\#\#SnpEffVersion="2.0.5 - real is \1/' $snpeff_output 81 sed -i -e 's/^\#\#SnpEffVersion="\(\S*\s\)/\#\#SnpEffVersion="2.0.5 - real is \1/' "$snpeff_output"
81 #end if 82 #end if
82 ]]> 83 ]]>
83 </command> 84 </command>
84 <inputs> 85 <inputs>
85 <param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/> 86 <param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/>
157 <filter type="data_meta" ref="snpeff_db" key="regulation" /> 158 <filter type="data_meta" ref="snpeff_db" key="regulation" />
158 </options> 159 </options>
159 </param> 160 </param>
160 </when> 161 </when>
161 <when value="named"> 162 <when value="named">
162 <param name="genome_version" type="text" size="40" value="" label="Snpff Genome Version Name (e.g. GRCh38.76)"> 163 <param name="genome_version" type="text" value="" label="Snpff Genome Version Name (e.g. GRCh38.76)">
163 <help>@SNPEFF_DATABASE_URL@</help> 164 <help>@SNPEFF_DATABASE_URL@</help>
164 <validator type="regex" message="A genome version name is required">\S+</validator> 165 <validator type="regex" message="A genome version name is required">\S+</validator>
165 </param> 166 </param>
166 </when> 167 </when>
167 </conditional> 168 </conditional>
187 <option value="7">7 bases</option> 188 <option value="7">7 bases</option>
188 <option value="8">8 bases</option> 189 <option value="8">8 bases</option>
189 <option value="9">9 bases</option> 190 <option value="9">9 bases</option>
190 </param> 191 </param>
191 192
192 <param name="filterHomHet" type="select" display="radio" label="Filter homozygous / heterozygous changes">
193 <option value="no_filter" selected="true">No filter (analyze everything)</option>
194 <option value="-hom">Analyze homozygous sequence changes only</option>
195 <option value="-het">Analyze heterozygous sequence changes only</option>
196 </param>
197
198 <!-- The tool testing code can not handle select,radio,check boxes values that start with '-', so the '-' is added in the command generation -->
199 <param name="filterIn" type="select" display="radio" label="Filter sequence changes">
200 <option value="no_filter" selected="true">No filter (analyze everything)</option>
201 <option value="-del">Analyze deletions only</option>
202 <option value="-ins">Analyze insertions only</option>
203 <option value="-mnp">Only MNPs (multiple nucleotide polymorphisms)</option>
204 <option value="-snp">Only SNPs (single nucleotide polymorphisms)</option>
205 </param>
206
207 <param name="annotations" type="select" display="checkboxes" multiple="true" label="Annotation options"> 193 <param name="annotations" type="select" display="checkboxes" multiple="true" label="Annotation options">
208 <option value="-cancer">Perform 'cancer' comparisons (somatic vs. germline)</option> 194 <option value="-cancer">Perform 'cancer' comparisons (somatic vs. germline)</option>
209 <option value="-canon">Only use canonical transcripts</option> 195 <option value="-canon">Only use canonical transcripts</option>
210 <option value="-geneId">Use gene ID instead of gene name (VCF output)</option> 196 <option value="-geneId">Use gene ID instead of gene name (VCF output)</option>
211 <option value="-lof">Add loss of function (LOF) and nonsense mediated decay (NMD) tags</option> 197 <option value="-lof">Add loss of function (LOF) and nonsense mediated decay (NMD) tags</option>
222 <option value="-no-intergenic">Do not show INTERGENIC changes</option> 208 <option value="-no-intergenic">Do not show INTERGENIC changes</option>
223 <option value="-no-intron">Do not show INTRON changes</option> 209 <option value="-no-intron">Do not show INTRON changes</option>
224 <option value="-no-upstream">Do not show UPSTREAM changes</option> 210 <option value="-no-upstream">Do not show UPSTREAM changes</option>
225 <option value="-no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes</option> 211 <option value="-no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes</option>
226 </param> 212 </param>
213 <conditional name="filter">
214 <param name="specificEffects" type="select" label="Filter out specific Effects">
215 <option value="no">No</option>
216 <option value="yes">Yes</option>
217 </param>
218 <when value="no"/>
219 <when value="yes">
220 <param name="effects" type="select" display="checkboxes" multiple="true" label="Filter output: do not report these Effects">
221 <option value="CDS">CDS (coding_sequence_variant) The variant hits a CDS. MODIFIER</option>
222 <option value="CHROMOSOME_LARGE_DELETION">CHROMOSOME_LARGE_DELETION (chromosome) A large parte (over 1%) of the chromosome was deleted. HIGH</option>
223 <option value="CODON_CHANGE">CODON_CHANGE (coding_sequence_variant) One or many codons are changed e.g.: An MNP of size multiple of 3 MODERATE</option>
224 <option value="CODON_INSERTION">CODON_INSERTION (inframe_insertion) One or many codons are inserted e.g.: An insert multiple of three in a codon boundary MODERATE</option>
225 <option value="CODON_CHANGE_PLUS_CODON_INSERTION">CODON_CHANGE_PLUS_CODON_INSERTION (disruptive_inframe_insertion) One codon is changed and one or many codons are inserted e.g.: An insert of size multiple of three, not at codon boundary MODERATE</option>
226 <option value="CODON_DELETION">CODON_DELETION (inframe_deletion) One or many codons are deleted e.g.: A deletion multiple of three at codon boundary MODERATE</option>
227 <option value="CODON_CHANGE_PLUS_CODON_DELETION">CODON_CHANGE_PLUS_CODON_DELETION (disruptive_inframe_deletion) One codon is changed and one or more codons are deleted e.g.: A deletion of size multiple of three, not at codon boundary MODERATE</option>
228 <option value="DOWNSTREAM">DOWNSTREAM (downstream_gene_variant) Downstream of a gene (default length: 5K bases) MODIFIER</option>
229 <option value="EXON">EXON (exon_variant) The variant hits an exon (from a non-coding transcript) or a retained intron. MODIFIER</option>
230 <option value="EXON_DELETED">EXON_DELETED (exon_loss_variant) A deletion removes the whole exon. HIGH</option>
231 <option value="FRAME_SHIFT">FRAME_SHIFT (frameshift_variant) Insertion or deletion causes a frame shift e.g.: An indel size is not multple of 3 HIGH</option>
232 <option value="GENE">GENE (gene_variant) The variant hits a gene. MODIFIER</option>
233 <option value="INTERGENIC">INTERGENIC (intergenic_region) The variant is in an intergenic region MODIFIER</option>
234 <option value="INTERGENIC_CONSERVED">INTERGENIC_CONSERVED (conserved_intergenic_variant) The variant is in a highly conserved intergenic region MODIFIER</option>
235 <option value="INTRAGENIC">INTRAGENIC (intragenic_variant) The variant hits a gene, but no transcripts within the gene MODIFIER</option>
236 <option value="INTRON">INTRON (intron_variant) Variant hits and intron. Technically, hits no exon in the transcript. MODIFIER</option>
237 <option value="INTRON_CONSERVED">INTRON_CONSERVED (conserved_intron_variant) The variant is in a highly conserved intronic region MODIFIER</option>
238 <option value="MICRO_RNA">MICRO_RNA (miRNA) Variant affects an miRNA MODIFIER</option>
239 <option value="NON_SYNONYMOUS_CODING">NON_SYNONYMOUS_CODING (missense_variant) Variant causes a codon that produces a different amino acid e.g.: Tgg/Cgg, W/R MODERATE</option>
240 <option value="NON_SYNONYMOUS_START">NON_SYNONYMOUS_START (initiator_codon_variant) Variant causes start codon to be mutated into another start codon (the new codon produces a different AA). e.g.: Atg/Ctg, M/L (ATG and CTG can be START codons) LOW</option>
241 <option value="NON_SYNONYMOUS_STOP">NON_SYNONYMOUS_STOP (stop_retained_variant) Variant causes stop codon to be mutated into another stop codon (the new codon produces a different AA). e.g.: Atg/Ctg, M/L (ATG and CTG can be START codons) LOW</option>
242 <option value="RARE_AMINO_ACID">RARE_AMINO_ACID (rare_amino_acid_variant) The variant hits a rare amino acid thus is likely to produce protein loss of function HIGH</option>
243 <option value="SPLICE_SITE_ACCEPTOR">SPLICE_SITE_ACCEPTOR (splice_acceptor_variant) The variant hits a splice acceptor site (defined as two bases before exon start, except for the first exon). HIGH</option>
244 <option value="SPLICE_SITE_DONOR">SPLICE_SITE_DONOR (splice_donor_variant) The variant hits a Splice donor site (defined as two bases after coding exon end, except for the last exon). HIGH</option>
245 <option value="SPLICE_SITE_REGION">SPLICE_SITE_REGION (splice_region_variant) A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron. LOW</option>
246 <option value="SPLICE_SITE_BRANCH">SPLICE_SITE_BRANCH (splice_region_variant) A varaint affective putative (Lariat) branch point, located in the intron. LOW</option>
247 <option value="SPLICE_SITE_BRANCH_U12">SPLICE_SITE_BRANCH_U12 (splice_region_variant) A varaint affective putative (Lariat) branch point from U12 splicing machinery, located in the intron. MODERATE</option>
248 <option value="STOP_LOST">STOP_LOST (stop_lost) Variant causes stop codon to be mutated into a non-stop codon e.g.: Tga/Cga, */R HIGH</option>
249 <option value="START_GAINED">START_GAINED (5_prime_UTR_premature start_codon_gain_variant) A variant in 5'UTR region produces a three base sequence that can be a START codon. LOW</option>
250 <option value="START_LOST">START_LOST (start_lost) Variant causes start codon to be mutated into a non-start codon. e.g.: aTg/aGg, M/R HIGH</option>
251 <option value="STOP_GAINED">STOP_GAINED (stop_gained) Variant causes a STOP codon e.g.: Cag/Tag, Q/* HIGH</option>
252 <option value="SYNONYMOUS_CODING">SYNONYMOUS_CODING (synonymous_variant) Variant causes a codon that produces the same amino acid e.g.: Ttg/Ctg, L/L LOW</option>
253 <option value="SYNONYMOUS_START">SYNONYMOUS_START (start_retained) Variant causes start codon to be mutated into another start codon. e.g.: Ttg/Ctg, L/L (TTG and CTG can be START codons) LOW</option>
254 <option value="SYNONYMOUS_STOP">SYNONYMOUS_STOP (stop_retained_variant) Variant causes stop codon to be mutated into another stop codon. e.g.: taA/taG, */* LOW</option>
255 <option value="TRANSCRIPT">TRANSCRIPT (transcript_variant) The variant hits a transcript. MODIFIER</option>
256 <option value="REGULATION">REGULATION (regulatory_region_variant) The variant hits a known regulatory feature (non-coding). MODIFIER</option>
257 <option value="UPSTREAM">UPSTREAM (upstream_gene_variant) Upstream of a gene (default length: 5K bases) MODIFIER</option>
258 <option value="UTR_3_PRIME">UTR_3_PRIME (3_prime_UTR_variant) Variant hits 3'UTR region MODIFIER</option>
259 <option value="UTR_3_DELETED">UTR_3_DELETED (3_prime_UTR_truncation + exon_loss) The variant deletes an exon which is in the 3'UTR of the transcript MODERATE</option>
260 <option value="UTR_5_PRIME">UTR_5_PRIME (5_prime_UTR_variant) Variant hits 5'UTR region MODIFIER</option>
261 <option value="UTR_5_DELETED">UTR_5_DELETED (5_prime_UTR_truncation + exon_loss_variant) The variant deletes an exon which is in the 5'UTR of the transcript MODERATE</option>
262 <option value="NEXT_PROT">NEXT_PROT (sequence_feature + exon_loss_variant) A 'NextProt' based annotation. Details are provided in the 'feature type' sub-field (ANN), or in the effect details (EFF). MODERATE </option>
263
264 </param>
265 </when>
266 </conditional>
227 267
228 <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position"> 268 <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position">
229 <option value="default" selected="true">Use default (based on input type)</option> 269 <option value="default" selected="true">Use default (based on input type)</option>
230 <option value="-0">Force zero-based positions (both input and output)</option> 270 <option value="-0">Force zero-based positions (both input and output)</option>
231 <option value="-1">Force one-based positions (both input and output)</option> 271 <option value="-1">Force one-based positions (both input and output)</option>
250 </data> 290 </data>
251 <data format="html" name="statsFile" label="${tool.name} on ${on_string} - stats"> 291 <data format="html" name="statsFile" label="${tool.name} on ${on_string} - stats">
252 <filter>generate_stats == True</filter> 292 <filter>generate_stats == True</filter>
253 </data> 293 </data>
254 </outputs> 294 </outputs>
255 <expand macro="stdio" />
256 <tests> 295 <tests>
257 <!-- Check that an effect was added in out VCF --> 296 <!-- Check that an effect was added in out VCF -->
258 <!-- Check for a HTML header indicating that this was successful --> 297 <!-- Check for a HTML header indicating that this was successful -->
259 <!-- 298 <!--
260 <output name="statsFile"> 299 <output name="statsFile">
269 <param name="inputFormat" value="vcf"/> 308 <param name="inputFormat" value="vcf"/>
270 <param name="outputFormat" value="vcf"/> 309 <param name="outputFormat" value="vcf"/>
271 <param name="genomeSrc" value="named"/> 310 <param name="genomeSrc" value="named"/>
272 <param name="genome_version" value="testCase"/> 311 <param name="genome_version" value="testCase"/>
273 <param name="udLength" value="0"/> 312 <param name="udLength" value="0"/>
274 <param name="filterHomHet" value="no_filter"/>
275 <param name="filterIn" value="no_filter"/>
276 <param name="generate_stats" value="False"/> 313 <param name="generate_stats" value="False"/>
277 <param name="filterOut" value="+-no-upstream"/> 314 <param name="filterOut" value="+-no-upstream"/>
278 <output name="snpeff_output"> 315 <output name="snpeff_output">
279 <assert_contents> 316 <assert_contents>
280 <has_text text="EFF=" /> 317 <has_text text="EFF=" />
288 <param name="inputFormat" value="vcf"/> 325 <param name="inputFormat" value="vcf"/>
289 <param name="outputFormat" value="vcf"/> 326 <param name="outputFormat" value="vcf"/>
290 <param name="genomeSrc" value="named"/> 327 <param name="genomeSrc" value="named"/>
291 <param name="genome_version" value="testCase"/> 328 <param name="genome_version" value="testCase"/>
292 <param name="udLength" value="0"/> 329 <param name="udLength" value="0"/>
293 <param name="filterHomHet" value="+-het"/>
294 <param name="filterIn" value="no_filter"/>
295 <!--
296 <param name="filterOut" value=""/>
297 -->
298 <param name="generate_stats" value="False"/>
299 <output name="snpeff_output">
300 <assert_contents>
301 <!-- Check that NO effects were added since -het is set -->
302 <not_has_text text="EFF=NON_SYNONYMOUS_CODING" />
303 </assert_contents>
304 </output>
305 </test>
306
307 <test>
308 <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
309 <param name="inputFormat" value="vcf"/>
310 <param name="outputFormat" value="vcf"/>
311 <param name="genomeSrc" value="named"/>
312 <param name="genome_version" value="testCase"/>
313 <param name="udLength" value="0"/>
314 <param name="filterHomHet" value="no_filter"/>
315 <param name="filterIn" value="+-del"/>
316 <!-- 330 <!--
317 <param name="filterOut" value=""/> 331 <param name="filterOut" value=""/>
318 --> 332 -->
319 <param name="generate_stats" value="False"/> 333 <param name="generate_stats" value="False"/>
320 <output name="snpeff_output"> 334 <output name="snpeff_output">
334 <param name="inputFormat" value="vcf"/> 348 <param name="inputFormat" value="vcf"/>
335 <param name="outputFormat" value="vcf"/> 349 <param name="outputFormat" value="vcf"/>
336 <param name="genomeSrc" value="named"/> 350 <param name="genomeSrc" value="named"/>
337 <param name="genome_version" value="testCase"/> 351 <param name="genome_version" value="testCase"/>
338 <param name="udLength" value="0"/> 352 <param name="udLength" value="0"/>
339 <param name="filterHomHet" value="no_filter"/>
340 <param name="filterIn" value="no_filter"/>
341 <param name="filterOut" value="+-no-upstream"/> 353 <param name="filterOut" value="+-no-upstream"/>
342 <param name="generate_stats" value="False"/> 354 <param name="generate_stats" value="False"/>
343 <output name="snpeff_output"> 355 <output name="snpeff_output">
344 <assert_contents> 356 <assert_contents>
345 <not_has_text text="UPSTREAM" /> 357 <not_has_text text="UPSTREAM" />