diff snpEff.xml @ 7:7adfd0589f49 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 5c6d595ecbf2e4e39c25662a165c9e475e01ecd1
author iuc
date Tue, 18 Apr 2017 09:33:56 -0400
parents 2950d5afa3fe
children ce135864629c
line wrap: on
line diff
--- a/snpEff.xml	Mon Dec 19 11:56:53 2016 -0500
+++ b/snpEff.xml	Tue Apr 18 09:33:56 2017 -0400
@@ -6,23 +6,20 @@
     <expand macro="requirements" />
     <expand macro="stdio" />
     <expand macro="version_command" />
-    <command>
-<![CDATA[
-        @CONDA_SNPEFF_JAR_PATH@ &&
-        java -Xmx6G -jar "\$SNPEFF_JAR_PATH/snpEff.jar" eff
-        -c "\$SNPEFF_JAR_PATH/snpEff.config"
+    <command><![CDATA[
+        snpEff -Xmx8g eff
         -i $inputFormat -o ${outputConditional.outputFormat} -upDownStreamLen $udLength
         #if $spliceSiteSize and str($spliceSiteSize) != '':
           -spliceSiteSize "$spliceSiteSize"
         #end if
         #if $spliceRegion.setSpliceRegions == 'yes':
-          #if $spliceRegion.spliceRegionExonSize and str($spliceRegion.spliceRegionExonSize) != '':
+          #if str($spliceRegion.spliceRegionExonSize)
             -spliceRegionExonSize $spliceRegion.spliceRegionExonSize
           #end if
-          #if $spliceRegion.spliceRegionIntronMin and str($spliceRegion.spliceRegionIntronMin) != '':
+          #if str($spliceRegion.spliceRegionIntronMin)
             -spliceRegionIntronMin $spliceRegion.spliceRegionIntronMin
           #end if
-          #if $spliceRegion.spliceRegionIntronMax and str($spliceRegion.spliceRegionIntronMax) != '':
+          #if str($spliceRegion.spliceRegionIntronMax)
             -spliceRegionIntronMax $spliceRegion.spliceRegionIntronMax
           #end if
         #end if
@@ -39,63 +36,63 @@
             -no $eff
           #end for
         #end if
-        #if str( $transcripts ) != 'None':
-          -onlyTr $transcripts
+        #if $transcripts
+          -onlyTr '$transcripts'
         #end if
-        #if str( $intervals ) != 'None':     ### fix this for multiple dataset input
-          -interval $intervals
+        #if $intervals     ### fix this for multiple dataset input
+          -interval '$intervals'
         #end if
         #if $statsFile:
-          -stats $statsFile
+          -stats '$statsFile'
         #end if
         #if str($offset) != 'default':
           ${offset}
         #end if
         #if str($chr).strip() != '':
-          -chr "$chr"
+          -chr '$chr'
         #end if
           $noLog
+        ## Regulation names can include parentheses: H3K4me3-MSC_(VB)_enriched_sites
+        ## Enclose them in in single and double quotes, as the conda snpEff bash script will remove outer quotes
         #if $snpDb.genomeSrc == 'cached':
           -dataDir ${snpDb.genomeVersion.fields.path}
-          #if $snpDb.extra_annotations and str($snpDb.extra_annotations) != '':
-            #echo " "
-            #echo ' '.join(str($snpDb.extra_annotations).split(','))
-          #end if
-          #if $snpDb.regulation and str($snpDb.regulation) != '':
-            -reg #echo ' -reg '.join(str($snpDb.regulation).split(','))#
+          #if $snpDb.reg_section.regulation and str($snpDb.reg_section.regulation) != '':
+            #set $regs = [x for x in str($snpDb.reg_section.regulation).split(',')]
+            #for reg in $regs:
+              -reg '"${reg}"'
+            #end for
           #end if
           $snpDb.genomeVersion
         #elif $snpDb.genomeSrc == 'history':
-          -dataDir ${snpDb.snpeff_db.extra_files_path}
-          #if $snpDb.extra_annotations and str($snpDb.extra_annotations) != '':
-            #set xannotations = [' '] + str($snpDb.extra_annotations).split(',')
-            #echo " "
-            #echo ' -'.join($xannotations)
+          -dataDir '${snpDb.snpeff_db.extra_files_path}'
+          #if $snpDb.reg_section.regulation and str($snpDb.reg_section.regulation) != '':
+            #set $regs = [x for x in str($snpDb.reg_section.regulation).split(',')]
+            #for reg in $regs:
+              -reg '"${reg}"'
+            #end for
           #end if
-          #if $snpDb.regulation and str($snpDb.regulation) != '':
-            -reg #echo ' -reg '.join(str($snpDb.regulation).split(','))#
-          #end if
-          ${snpDb.snpeff_db.metadata.genome_version}
+          '${snpDb.snpeff_db.metadata.genome_version}'
         #else
           -download
-          $snpDb.genome_version
+          '$snpDb.genome_version'
         #end if
-        "$input" > "$snpeff_output";
+        '$input' > '$snpeff_output'
         #if $statsFile:
+            &&
             #import os
             #set $genes_file = str($statsFile) + '.genes.txt'
             #set $genes_file_name = os.path.split($genes_file)[-1]
-            mkdir $statsFile.files_path;
-            mv "$genes_file" #echo os.path.join($statsFile.files_path, $genes_file_name)#;
+            mkdir '$statsFile.files_path' &&
+            mv '$genes_file' '#echo os.path.join($statsFile.files_path, $genes_file_name)#'
         #end if
         #if $outputConditional.outputFormat == 'gatk' and $outputConditional.gatk_v1
+          &&
           ## Replace real SnpEff version with 2.0.5 to prevent this GATK 1.x error: "The version of SnpEff used to generate the SnpEff input file (x.x) is not currently supported by the GATK. Supported versions are: [2.0.5]"
-          sed -i.bak -e 's/^\#\#SnpEffVersion="\(\S*\s\)/\#\#SnpEffVersion="2.0.5 - real is \1/' "$snpeff_output"
+          sed -i.bak -e 's/^\#\#SnpEffVersion="\(\S*\s\)/\#\#SnpEffVersion="2.0.5 - real is \1/' '$snpeff_output'
         #end if
-]]>
-    </command>
+    ]]></command>
     <inputs>
-        <param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/>
+        <param name="input" type="data" format="vcf,tabular,pileup,bed" label="Sequence changes (SNPs, MNPs, InDels)"/>
 
         <param name="inputFormat" type="select" label="Input format">
             <option value="vcf" selected="true">VCF</option>
@@ -127,45 +124,34 @@
                 <param name="genomeVersion" type="select" label="Genome">
                     <!--GENOME    DESCRIPTION-->
                     <options from_data_table="snpeffv_genomedb">
-                           <filter type="static_value" name="snpeff_version" value="@SNPEFF_VERSION@" column="1"/>
-                           <filter type="unique_value" column="2" />
+                            <filter type="static_value" name="snpeff_version" value="@SNPEFF_VERSION@" column="1"/>
+                            <filter type="unique_value" column="2" />
                     </options>
                 </param>
-                <param name="extra_annotations" type="select" display="checkboxes" multiple="true" label="Additional annotations">
-                       <help>These are available for only a few genomes</help>
-                       <options from_data_table="snpeffv_annotations">
-                           <filter type="param_value" ref="genomeVersion" key="genome" column="2" />
-                           <filter type="unique_value" column="3" />
-                       </options>
-                </param>
-                <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory annotation">
-                       <help>These are available for only a few genomes</help>
-                       <options from_data_table="snpeffv_regulationdb">
-                           <filter type="param_value" ref="genomeVersion" key="genome" column="2" />
-                           <filter type="unique_value" column="3" />
-                       </options>
-                </param>
+                <section name="reg_section" expanded="false" title="Regulation options">
+                    <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory annotation" help="These are available for only a few genomes">
+                        <options from_data_table="snpeffv_regulationdb">
+                            <filter type="param_value" ref="genomeVersion" key="genome" column="2" />
+                            <filter type="unique_value" column="3" />
+                        </options>
+                    </param>
+                </section>
             </when>
             <when value="history">
-                <param format="snpeffdb" name="snpeff_db" type="data" label="@SNPEFF_VERSION@ Genome Data">
+                <param name="snpeff_db" type="data" format="snpeffdb" label="@SNPEFF_VERSION@ Genome Data">
                     <options options_filter_attribute="metadata.snpeff_version" >
                         <filter type="add_value" value="@SNPEFF_VERSION@" />
                     </options>
-                    <validator type="expression" message="This version of SnpEff will only work with @SNPEFF_VERSION@ Genome databases.">value is not None and value.metadata.snpeff_version == "@SNPEFF_VERSION@"</validator>
+                    <validator type="expression" message="This version of SnpEff will only work with @SNPEFF_VERSION@ genome databases">value is not None and value.metadata.snpeff_version == "@SNPEFF_VERSION@"</validator>
                 </param>
-                <!-- From metadata -->
-                <param name="extra_annotations" type="select" display="checkboxes" multiple="true" label="Additional annotations">
-                    <help>These are available for only a few genomes</help>
-                    <options>
-                        <filter type="data_meta" ref="snpeff_db" key="annotation" />
-                    </options>
-                </param>
-                <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory annotation">
-                    <help>These are available for only a few genomes</help>
-                    <options>
-                        <filter type="data_meta" ref="snpeff_db" key="regulation" />
-                    </options>
-                </param>
+                <section name="reg_section" expanded="false" title="Regulation options">
+                    <!-- From metadata -->
+                    <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory annotation" help="These are available for only a few genomes">
+                        <options>
+                            <filter type="data_meta" ref="snpeff_db" key="regulation" />
+                        </options>
+                    </param>
+                </section>
             </when>
             <when value="named">
                 <param name="genome_version" type="text" value="" label="Snpff Genome Version Name (e.g. GRCh38.76)">
@@ -186,7 +172,7 @@
             <option value="20000">20000 bases</option>
         </param>
 
-        <param name="spliceSiteSize" type="select" optional="true" label="Set size for splice sites (donor and acceptor) in bases" help="Default: 2">
+        <param name="spliceSiteSize" type="select" optional="true" label="Set size for splice sites (donor and acceptor) in bases">
             <option value="1">1 base</option>
             <option value="2" selected="true">2 bases</option>
             <option value="3">3 bases</option>
@@ -212,24 +198,33 @@
         </conditional>
 
         <param name="annotations" type="select" display="checkboxes" multiple="true" label="Annotation options">
-            <option value="-cancer">Perform 'cancer' comparisons (somatic vs. germline)</option>
-            <option value="-canon">Only use canonical transcripts</option>
-            <option value="-geneId">Use gene ID instead of gene name (VCF output)</option>
-            <option value="-lof">Add loss of function (LOF) and nonsense mediated decay (NMD) tags</option>
-            <option value="-oicr">Add OICR tag in VCF file</option>
-            <option value="-onlyReg">Only use regulation tracks</option>
+            <option value="-formatEff">Use 'EFF' field compatible with older versions (instead of 'ANN')</option>
             <option value="-classic">Use Classic Effect names and amino acid variant annotations (NON_SYNONYMOUS_CODING vs missense_variant and G180R vs p.Gly180Arg/c.538G>C)</option>
+            <option value="-sequenceOntology">Override classic and use Sequence Ontolgy terms for effects (missense_variant vs NON_SYNONYMOUS_CODING)</option>
             <option value="-hgvs">Override classic and use HGVS annotations for amino acid annotations (p.Gly180Arg/c.538G>C vs G180R)</option>
-            <option value="-sequenceOntology">Override classic and use Sequence Ontolgy terms for effects (missense_variant vs NON_SYNONYMOUS_CODING)</option>
-            <option value="-formatEff">Use 'EFF' field compatible with older versions (instead of 'ANN').</option>
-	    <option value="-noHgvs">Do not add HGVS annotations.</option>
-	    <option value="-noLof">Do not add LOF and NMD annotations.</option>
-	    <option value="-noShiftHgvs">Do not shift variants according to HGVS notation (most 3prime end).</option>
-	    <option value="-oicr">Add OICR tag in VCF file. Default: false</option>
+            <option value="-hgvsOld">Old notation style notation: E.g. 'c.G123T' instead of 'c.123G>T' and 'X' instead of '*'</option>
+            <option value="-hgvs1LetterAa">Use one letter Amino acid codes in HGVS notation. E.g. p.R47G instead of p.Arg47Gly</option>
+            <option value="-hgvsTrId">Use transcript ID in HGVS notation. E.g. ENST00000252100:c.914C>G instead of c.914C>G</option>
+            <option value="-noShiftHgvs">Do not shift variants according to HGVS notation (most 3prime end)</option>
+            <option value="-noHgvs">Do not add HGVS annotations</option>
+            <option value="-canon">Only use canonical transcripts</option>
+            <option value="-onlyProtein">Only use protein coding transcripts</option>
+            <option value="-geneId">Use gene ID instead of gene name (VCF output)</option>
+            <option value="-noExpandIUB">Disable IUB code expansion in input variants</option>
+            <option value="-oicr">Add OICR tag in VCF file</option>
+            <option value="-lof">Add loss of function (LOF) and nonsense mediated decay (NMD) tags</option>
+            <option value="-noLof">Do not add LOF and NMD annotations</option>
+            <option value="-noMotif">Disable motif annotations</option>
+            <option value="-noNextProt">Disable NextProt annotations</option>
+            <option value="-noInteraction">Disable interaction annotations</option>
+            <option value="-cancer">Perform 'cancer' comparisons (somatic vs. germline)</option>
+            <!--  onlyReg option results in ifrequent exceptions with version 4.3k
+            <option value="-onlyReg">Only use regulation tracks</option>
+            -->
         </param>
         <!-- -cancerSamples <file>           : Two column TXT file defining 'oringinal \t derived' samples. -->
-        <param name="intervals" format="bed" type="data" optional="true" label="Use custom interval file for annotation"/>
-        <param name="transcripts" format="tabular" type="data" optional="true" label="Only use the transcripts in this file." help="Format is one transcript ID per line."/>
+        <param name="intervals" type="data" format="bed" optional="true" label="Use custom interval file for annotation"/>
+        <param name="transcripts" type="data" format="tabular" optional="true" label="Only use the transcripts in this file" help="Format is one transcript ID per line"/>
         <param name="filterOut" type="select" display="checkboxes" multiple="true" label="Filter output">
             <option value="-no-downstream">Do not show DOWNSTREAM changes</option>
             <option value="-no-intergenic">Do not show INTERGENIC changes</option>
@@ -287,12 +282,11 @@
                     <option value="UTR_5_PRIME">UTR_5_PRIME  (5_prime_UTR_variant) Variant hits 5'UTR region  MODIFIER</option>
                     <option value="UTR_5_DELETED">UTR_5_DELETED  (5_prime_UTR_truncation + exon_loss_variant) The variant deletes an exon which is in the 5'UTR of the transcript  MODERATE</option>
                     <option value="NEXT_PROT">NEXT_PROT  (sequence_feature + exon_loss_variant) A 'NextProt' based annotation. Details are provided in the 'feature type' sub-field (ANN), or in the effect details (EFF).  MODERATE </option>
-
                 </param>
             </when>
         </conditional>
 
-        <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position">
+        <param name="offset" type="select" display="radio" label="Chromosomal position">
             <option value="default" selected="true">Use default (based on input type)</option>
             <option value="-0">Force zero-based positions (both input and output)</option>
             <option value="-1">Force one-based positions (both input and output)</option>
@@ -300,7 +294,7 @@
         <param name="chr" type="text" label="Text to prepend to chromosome name">
             <help>
                By default SnpEff simplifies all chromosome names. For instance 'chr1' is just '1'.
-               You can prepend any string you want to the chromosome name.
+               You can prepend any string you want to the chromosome name
             </help>
             <validator type="regex" message="No whitespace allowed">^\S*$</validator>
         </param>
@@ -308,93 +302,38 @@
         <param name="noLog" type="boolean" truevalue="-noLog" falsevalue="" checked="true" label="Suppress reporting usage statistics to server"/>
     </inputs>
     <outputs>
-        <data format="vcf" name="snpeff_output" >
+        <data name="snpeff_output" format="vcf">
             <change_format>
                 <when input="outputConditional.outputFormat" value="bed" format="bed" />
                 <when input="outputConditional.outputFormat" value="bedAnn" format="bed" />
             </change_format>
         </data>
-        <data format="html" name="statsFile" label="${tool.name} on ${on_string} - stats">
+        <data name="statsFile" format="html" label="${tool.name} on ${on_string} - stats">
             <filter>generate_stats == True</filter>
         </data>
     </outputs>
     <tests>
-        <!-- Check that an effect was added in out VCF -->
-        <!-- Check for a HTML header indicating that this was successful -->
-        <!--
-        <output name="statsFile">
-            <assert_contents>
-            <has_text text="SnpEff: Variant analysis" />
-            </assert_contents>
-        </output>
-        -->
-        <!-- Setting filterOut throws exception in twilltestcase.py
         <test>
-        <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
-        <param name="inputFormat" value="vcf"/>
-        <param name="outputFormat" value="vcf"/>
-        <param name="genomeSrc" value="named"/>
-        <param name="genome_version" value="testCase"/>
-        <param name="udLength" value="0"/>
-        <param name="generate_stats" value="False"/>
-        <param name="filterOut" value="+-no-upstream"/>
-        <output name="snpeff_output">
-            <assert_contents>
-            <has_text text="EFF=" />
-            </assert_contents>
-        </output>
+            <param name="input" ftype="vcf" value="input.vcf"/>
+            <param name="inputFormat" value="vcf"/>
+            <param name="outputFormat" value="vcf"/>
+            <param name="genomeSrc" value="named"/>
+            <param name="genome_version" value="ebola_zaire"/>
+            <param name="udLength" value="0"/>
+            <param name="generate_stats" value="False"/>
+            <output name="snpeff_output">
+                <assert_contents>
+                    <has_text_matching expression="KJ660346\t572\t.*missense_variant" />
+                    <has_text_matching expression="KJ660346\t1024\t.*synonymous_variant" />
+                </assert_contents>
+            </output>
         </test>
-        -->
-
-        <test>
-        <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
-        <param name="inputFormat" value="vcf"/>
-        <param name="outputFormat" value="vcf"/>
-        <param name="genomeSrc" value="named"/>
-        <param name="genome_version" value="testCase"/>
-        <param name="udLength" value="0"/>
-        <!--
-        <param name="filterOut" value=""/>
-        -->
-        <param name="generate_stats" value="False"/>
-        <output name="snpeff_output">
-            <assert_contents>
-            <!-- Check that deleletions were evaluated -->
-            <has_text_matching expression="Y\t59030478\t.*intergenic_region" />
-            <!-- Check that insertion on last line was NOT evaluated -->
-            <has_text_matching expression="Y\t59032947\t.*\tGT" />
-            </assert_contents>
-        </output>
-        </test>
-
-        <!-- Check that NO UPSTREAM  effect was added -->
-        <!-- Setting filterOut throws exception in twilltestcase.py
-        <test>
-        <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
-        <param name="inputFormat" value="vcf"/>
-        <param name="outputFormat" value="vcf"/>
-        <param name="genomeSrc" value="named"/>
-        <param name="genome_version" value="testCase"/>
-        <param name="udLength" value="0"/>
-        <param name="filterOut" value="+-no-upstream"/>
-        <param name="generate_stats" value="False"/>
-        <output name="snpeff_output">
-            <assert_contents>
-            <not_has_text text="UPSTREAM" />
-            </assert_contents>
-        </output>
-        </test>
-        -->
-
     </tests>
     <help><![CDATA[
-
 This tool calculate the effect of variants (SNPs/MNPs/Insertions) and deletions.
 
 @EXTERNAL_DOCUMENTATION@
-
 ]]>
     </help>
     <expand macro="citations" />
 </tool>
-