view snpEff.xml @ 7:7adfd0589f49 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 5c6d595ecbf2e4e39c25662a165c9e475e01ecd1
author iuc
date Tue, 18 Apr 2017 09:33:56 -0400
parents 2950d5afa3fe
children ce135864629c
line wrap: on
line source

<tool id="snpEff" name="SnpEff" version="@WRAPPER_VERSION@.0">
    <description>Variant effect and annotation</description>
    <macros>
        <import>snpEff_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command><![CDATA[
        snpEff -Xmx8g eff
        -i $inputFormat -o ${outputConditional.outputFormat} -upDownStreamLen $udLength
        #if $spliceSiteSize and str($spliceSiteSize) != '':
          -spliceSiteSize "$spliceSiteSize"
        #end if
        #if $spliceRegion.setSpliceRegions == 'yes':
          #if str($spliceRegion.spliceRegionExonSize)
            -spliceRegionExonSize $spliceRegion.spliceRegionExonSize
          #end if
          #if str($spliceRegion.spliceRegionIntronMin)
            -spliceRegionIntronMin $spliceRegion.spliceRegionIntronMin
          #end if
          #if str($spliceRegion.spliceRegionIntronMax)
            -spliceRegionIntronMax $spliceRegion.spliceRegionIntronMax
          #end if
        #end if
        #if $annotations and str($annotations) != '':
          #echo " "
          #echo ' '.join(str($annotations).split(','))
        #end if
        #if $filterOut and str($filterOut) != '':
          #echo " "
          #echo ' '.join(str($filterOut).split(','))
        #end if
        #if $filter.specificEffects == 'yes' and $filter.effects:
          #for $eff in str($filter.effects).split(','):
            -no $eff
          #end for
        #end if
        #if $transcripts
          -onlyTr '$transcripts'
        #end if
        #if $intervals     ### fix this for multiple dataset input
          -interval '$intervals'
        #end if
        #if $statsFile:
          -stats '$statsFile'
        #end if
        #if str($offset) != 'default':
          ${offset}
        #end if
        #if str($chr).strip() != '':
          -chr '$chr'
        #end if
          $noLog
        ## Regulation names can include parentheses: H3K4me3-MSC_(VB)_enriched_sites
        ## Enclose them in in single and double quotes, as the conda snpEff bash script will remove outer quotes
        #if $snpDb.genomeSrc == 'cached':
          -dataDir ${snpDb.genomeVersion.fields.path}
          #if $snpDb.reg_section.regulation and str($snpDb.reg_section.regulation) != '':
            #set $regs = [x for x in str($snpDb.reg_section.regulation).split(',')]
            #for reg in $regs:
              -reg '"${reg}"'
            #end for
          #end if
          $snpDb.genomeVersion
        #elif $snpDb.genomeSrc == 'history':
          -dataDir '${snpDb.snpeff_db.extra_files_path}'
          #if $snpDb.reg_section.regulation and str($snpDb.reg_section.regulation) != '':
            #set $regs = [x for x in str($snpDb.reg_section.regulation).split(',')]
            #for reg in $regs:
              -reg '"${reg}"'
            #end for
          #end if
          '${snpDb.snpeff_db.metadata.genome_version}'
        #else
          -download
          '$snpDb.genome_version'
        #end if
        '$input' > '$snpeff_output'
        #if $statsFile:
            &&
            #import os
            #set $genes_file = str($statsFile) + '.genes.txt'
            #set $genes_file_name = os.path.split($genes_file)[-1]
            mkdir '$statsFile.files_path' &&
            mv '$genes_file' '#echo os.path.join($statsFile.files_path, $genes_file_name)#'
        #end if
        #if $outputConditional.outputFormat == 'gatk' and $outputConditional.gatk_v1
          &&
          ## Replace real SnpEff version with 2.0.5 to prevent this GATK 1.x error: "The version of SnpEff used to generate the SnpEff input file (x.x) is not currently supported by the GATK. Supported versions are: [2.0.5]"
          sed -i.bak -e 's/^\#\#SnpEffVersion="\(\S*\s\)/\#\#SnpEffVersion="2.0.5 - real is \1/' '$snpeff_output'
        #end if
    ]]></command>
    <inputs>
        <param name="input" type="data" format="vcf,tabular,pileup,bed" label="Sequence changes (SNPs, MNPs, InDels)"/>

        <param name="inputFormat" type="select" label="Input format">
            <option value="vcf" selected="true">VCF</option>
            <option value="bed">BED (Deprecated)</option>
        </param>

        <conditional name="outputConditional">
            <param name="outputFormat" type="select" label="Output format">
                <option value="vcf" selected="true">VCF (only if input is VCF)</option>
                <option value="gatk">GATK-compatible VCF (only if input is VCF)</option>
                <option value="bed">BED</option>
                <option value="bedAnn">BED annotations</option>
            </param>
            <when value="vcf" />
            <when value="gatk">
                <param name="gatk_v1" type="boolean" checked="true" label="Compatible with GATK 1.x" />
            </when>
            <when value="bed" />
            <when value="bedAnn" />
        </conditional>

        <conditional name="snpDb">
            <param name="genomeSrc" type="select" label="Genome source">
                <option value="cached">Locally installed reference genome</option>
                <option value="history">Reference genome from your history</option>
                <option value="named">Named on demand</option>
            </param>
            <when value="cached">
                <param name="genomeVersion" type="select" label="Genome">
                    <!--GENOME    DESCRIPTION-->
                    <options from_data_table="snpeffv_genomedb">
                            <filter type="static_value" name="snpeff_version" value="@SNPEFF_VERSION@" column="1"/>
                            <filter type="unique_value" column="2" />
                    </options>
                </param>
                <section name="reg_section" expanded="false" title="Regulation options">
                    <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory annotation" help="These are available for only a few genomes">
                        <options from_data_table="snpeffv_regulationdb">
                            <filter type="param_value" ref="genomeVersion" key="genome" column="2" />
                            <filter type="unique_value" column="3" />
                        </options>
                    </param>
                </section>
            </when>
            <when value="history">
                <param name="snpeff_db" type="data" format="snpeffdb" label="@SNPEFF_VERSION@ Genome Data">
                    <options options_filter_attribute="metadata.snpeff_version" >
                        <filter type="add_value" value="@SNPEFF_VERSION@" />
                    </options>
                    <validator type="expression" message="This version of SnpEff will only work with @SNPEFF_VERSION@ genome databases">value is not None and value.metadata.snpeff_version == "@SNPEFF_VERSION@"</validator>
                </param>
                <section name="reg_section" expanded="false" title="Regulation options">
                    <!-- From metadata -->
                    <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory annotation" help="These are available for only a few genomes">
                        <options>
                            <filter type="data_meta" ref="snpeff_db" key="regulation" />
                        </options>
                    </param>
                </section>
            </when>
            <when value="named">
                <param name="genome_version" type="text" value="" label="Snpff Genome Version Name (e.g. GRCh38.76)">
                    <help>@SNPEFF_DATABASE_URL@</help>
                    <validator type="regex" message="A genome version name is required">\S+</validator>
                </param>
            </when>
        </conditional>

        <param name="udLength" type="select" label="Upstream / Downstream length">
            <option value="0">No upstream / downstream intervals (0 bases)</option>
            <option value="200">200 bases</option>
            <option value="500">500 bases</option>
            <option value="1000">1000 bases</option>
            <option value="2000">2000 bases</option>
            <option value="5000" selected="true">5000 bases</option>
            <option value="10000">10000 bases</option>
            <option value="20000">20000 bases</option>
        </param>

        <param name="spliceSiteSize" type="select" optional="true" label="Set size for splice sites (donor and acceptor) in bases">
            <option value="1">1 base</option>
            <option value="2" selected="true">2 bases</option>
            <option value="3">3 bases</option>
            <option value="4">4 bases</option>
            <option value="5">5 bases</option>
            <option value="6">6 bases</option>
            <option value="7">7 bases</option>
            <option value="8">8 bases</option>
            <option value="9">9 bases</option>
        </param>

        <conditional name="spliceRegion">
            <param name="setSpliceRegions" type="select" label="spliceRegion Settings">
                <option value="no">Use Defaults</option>
                <option value="yes">Set Splice Region Parameters</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <param name="spliceRegionExonSize" type="integer" value="" min="1" max="10" optional="true" label="Set size for splice site region within exons. Default: 3 bases"/>
                <param name="spliceRegionIntronMin" type="integer" value="" min="1" max="10" optional="true" label="Set minimum number of bases for splice site region within intron. Default: 3 bases"/>
                <param name="spliceRegionIntronMax" type="integer" value="" min="1" max="10" optional="true" label="Set maximum number of bases for splice site region within intron. Default: 8 bases"/>
            </when>
        </conditional>

        <param name="annotations" type="select" display="checkboxes" multiple="true" label="Annotation options">
            <option value="-formatEff">Use 'EFF' field compatible with older versions (instead of 'ANN')</option>
            <option value="-classic">Use Classic Effect names and amino acid variant annotations (NON_SYNONYMOUS_CODING vs missense_variant and G180R vs p.Gly180Arg/c.538G>C)</option>
            <option value="-sequenceOntology">Override classic and use Sequence Ontolgy terms for effects (missense_variant vs NON_SYNONYMOUS_CODING)</option>
            <option value="-hgvs">Override classic and use HGVS annotations for amino acid annotations (p.Gly180Arg/c.538G>C vs G180R)</option>
            <option value="-hgvsOld">Old notation style notation: E.g. 'c.G123T' instead of 'c.123G>T' and 'X' instead of '*'</option>
            <option value="-hgvs1LetterAa">Use one letter Amino acid codes in HGVS notation. E.g. p.R47G instead of p.Arg47Gly</option>
            <option value="-hgvsTrId">Use transcript ID in HGVS notation. E.g. ENST00000252100:c.914C>G instead of c.914C>G</option>
            <option value="-noShiftHgvs">Do not shift variants according to HGVS notation (most 3prime end)</option>
            <option value="-noHgvs">Do not add HGVS annotations</option>
            <option value="-canon">Only use canonical transcripts</option>
            <option value="-onlyProtein">Only use protein coding transcripts</option>
            <option value="-geneId">Use gene ID instead of gene name (VCF output)</option>
            <option value="-noExpandIUB">Disable IUB code expansion in input variants</option>
            <option value="-oicr">Add OICR tag in VCF file</option>
            <option value="-lof">Add loss of function (LOF) and nonsense mediated decay (NMD) tags</option>
            <option value="-noLof">Do not add LOF and NMD annotations</option>
            <option value="-noMotif">Disable motif annotations</option>
            <option value="-noNextProt">Disable NextProt annotations</option>
            <option value="-noInteraction">Disable interaction annotations</option>
            <option value="-cancer">Perform 'cancer' comparisons (somatic vs. germline)</option>
            <!--  onlyReg option results in ifrequent exceptions with version 4.3k
            <option value="-onlyReg">Only use regulation tracks</option>
            -->
        </param>
        <!-- -cancerSamples <file>           : Two column TXT file defining 'oringinal \t derived' samples. -->
        <param name="intervals" type="data" format="bed" optional="true" label="Use custom interval file for annotation"/>
        <param name="transcripts" type="data" format="tabular" optional="true" label="Only use the transcripts in this file" help="Format is one transcript ID per line"/>
        <param name="filterOut" type="select" display="checkboxes" multiple="true" label="Filter output">
            <option value="-no-downstream">Do not show DOWNSTREAM changes</option>
            <option value="-no-intergenic">Do not show INTERGENIC changes</option>
            <option value="-no-intron">Do not show INTRON changes</option>
            <option value="-no-upstream">Do not show UPSTREAM changes</option>
            <option value="-no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes</option>
        </param>
        <conditional name="filter">
            <param name="specificEffects" type="select" label="Filter out specific Effects">
                <option value="no">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <param name="effects" type="select" display="checkboxes" multiple="true" label="Filter output: do not report these Effects">
                    <option value="CDS">CDS  (coding_sequence_variant) The variant hits a CDS.  MODIFIER</option>
                    <option value="CHROMOSOME_LARGE_DELETION">CHROMOSOME_LARGE_DELETION  (chromosome) A large parte (over 1%) of the chromosome was deleted.  HIGH</option>
                    <option value="CODON_CHANGE">CODON_CHANGE  (coding_sequence_variant) One or many codons are changed e.g.: An MNP of size multiple of 3  MODERATE</option>
                    <option value="CODON_INSERTION">CODON_INSERTION  (inframe_insertion) One or many codons are inserted e.g.: An insert multiple of three in a codon boundary  MODERATE</option>
                    <option value="CODON_CHANGE_PLUS_CODON_INSERTION">CODON_CHANGE_PLUS_CODON_INSERTION  (disruptive_inframe_insertion) One codon is changed and one or many codons are inserted e.g.: An insert of size multiple of three, not at codon boundary  MODERATE</option>
                    <option value="CODON_DELETION">CODON_DELETION  (inframe_deletion) One or many codons are deleted e.g.: A deletion multiple of three at codon boundary  MODERATE</option>
                    <option value="CODON_CHANGE_PLUS_CODON_DELETION">CODON_CHANGE_PLUS_CODON_DELETION  (disruptive_inframe_deletion) One codon is changed and one or more codons are deleted e.g.: A deletion of size multiple of three, not at codon boundary  MODERATE</option>
                    <option value="DOWNSTREAM">DOWNSTREAM  (downstream_gene_variant) Downstream of a gene (default length: 5K bases)  MODIFIER</option>
                    <option value="EXON">EXON  (exon_variant) The variant hits an exon (from a non-coding transcript) or a retained intron.  MODIFIER</option>
                    <option value="EXON_DELETED">EXON_DELETED  (exon_loss_variant) A deletion removes the whole exon.  HIGH</option>
                    <option value="FRAME_SHIFT">FRAME_SHIFT  (frameshift_variant) Insertion or deletion causes a frame shift e.g.: An indel size is not multple of 3  HIGH</option>
                    <option value="GENE">GENE  (gene_variant) The variant hits a gene.  MODIFIER</option>
                    <option value="INTERGENIC">INTERGENIC  (intergenic_region) The variant is in an intergenic region  MODIFIER</option>
                    <option value="INTERGENIC_CONSERVED">INTERGENIC_CONSERVED  (conserved_intergenic_variant) The variant is in a highly conserved intergenic region  MODIFIER</option>
                    <option value="INTRAGENIC">INTRAGENIC  (intragenic_variant) The variant hits a gene, but no transcripts within the gene  MODIFIER</option>
                    <option value="INTRON">INTRON  (intron_variant) Variant hits and intron. Technically, hits no exon in the transcript.  MODIFIER</option>
                    <option value="INTRON_CONSERVED">INTRON_CONSERVED  (conserved_intron_variant) The variant is in a highly conserved intronic region  MODIFIER</option>
                    <option value="MICRO_RNA">MICRO_RNA  (miRNA) Variant affects an miRNA  MODIFIER</option>
                    <option value="NON_SYNONYMOUS_CODING">NON_SYNONYMOUS_CODING  (missense_variant) Variant causes a codon that produces a different amino acid e.g.: Tgg/Cgg, W/R  MODERATE</option>
                    <option value="NON_SYNONYMOUS_START">NON_SYNONYMOUS_START  (initiator_codon_variant) Variant causes start codon to be mutated into another start codon (the new codon produces a different AA).  e.g.: Atg/Ctg, M/L (ATG and CTG can be START codons)  LOW</option>
                    <option value="NON_SYNONYMOUS_STOP">NON_SYNONYMOUS_STOP  (stop_retained_variant) Variant causes stop codon to be mutated into another stop codon (the new codon produces a different AA).  e.g.: Atg/Ctg, M/L (ATG and CTG can be START codons)  LOW</option>
                    <option value="RARE_AMINO_ACID">RARE_AMINO_ACID  (rare_amino_acid_variant) The variant hits a rare amino acid thus is likely to produce protein loss of function  HIGH</option>
                    <option value="SPLICE_SITE_ACCEPTOR">SPLICE_SITE_ACCEPTOR  (splice_acceptor_variant) The variant hits a splice acceptor site (defined as two bases before exon start, except for the first exon).  HIGH</option>
                    <option value="SPLICE_SITE_DONOR">SPLICE_SITE_DONOR  (splice_donor_variant) The variant hits a Splice donor site (defined as two bases after coding exon end, except for the last exon).  HIGH</option>
                    <option value="SPLICE_SITE_REGION">SPLICE_SITE_REGION  (splice_region_variant) A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron.  LOW</option>
                    <option value="SPLICE_SITE_BRANCH">SPLICE_SITE_BRANCH  (splice_region_variant) A varaint affective putative (Lariat) branch point, located in the intron.  LOW</option>
                    <option value="SPLICE_SITE_BRANCH_U12">SPLICE_SITE_BRANCH_U12  (splice_region_variant) A varaint affective putative (Lariat) branch point from U12 splicing machinery, located in the intron.  MODERATE</option>
                    <option value="STOP_LOST">STOP_LOST  (stop_lost) Variant causes stop codon to be mutated into a non-stop codon e.g.: Tga/Cga, */R  HIGH</option>
                    <option value="START_GAINED">START_GAINED  (5_prime_UTR_premature start_codon_gain_variant) A variant in 5'UTR region produces a three base sequence that can be a START codon.  LOW</option>
                    <option value="START_LOST">START_LOST  (start_lost) Variant causes start codon to be mutated into a non-start codon.  e.g.: aTg/aGg, M/R  HIGH</option>
                    <option value="STOP_GAINED">STOP_GAINED  (stop_gained) Variant causes a STOP codon e.g.: Cag/Tag, Q/*  HIGH</option>
                    <option value="SYNONYMOUS_CODING">SYNONYMOUS_CODING  (synonymous_variant) Variant causes a codon that produces the same amino acid e.g.: Ttg/Ctg, L/L  LOW</option>
                    <option value="SYNONYMOUS_START">SYNONYMOUS_START  (start_retained) Variant causes start codon to be mutated into another start codon.  e.g.: Ttg/Ctg, L/L (TTG and CTG can be START codons)  LOW</option>
                    <option value="SYNONYMOUS_STOP">SYNONYMOUS_STOP  (stop_retained_variant) Variant causes stop codon to be mutated into another stop codon.  e.g.: taA/taG, */*  LOW</option>
                    <option value="TRANSCRIPT">TRANSCRIPT  (transcript_variant) The variant hits a transcript.  MODIFIER</option>
                    <option value="REGULATION">REGULATION  (regulatory_region_variant) The variant hits a known regulatory feature (non-coding).  MODIFIER</option>
                    <option value="UPSTREAM">UPSTREAM  (upstream_gene_variant) Upstream of a gene (default length: 5K bases)  MODIFIER</option>
                    <option value="UTR_3_PRIME">UTR_3_PRIME  (3_prime_UTR_variant) Variant hits 3'UTR region  MODIFIER</option>
                    <option value="UTR_3_DELETED">UTR_3_DELETED  (3_prime_UTR_truncation + exon_loss) The variant deletes an exon which is in the 3'UTR of the transcript  MODERATE</option>
                    <option value="UTR_5_PRIME">UTR_5_PRIME  (5_prime_UTR_variant) Variant hits 5'UTR region  MODIFIER</option>
                    <option value="UTR_5_DELETED">UTR_5_DELETED  (5_prime_UTR_truncation + exon_loss_variant) The variant deletes an exon which is in the 5'UTR of the transcript  MODERATE</option>
                    <option value="NEXT_PROT">NEXT_PROT  (sequence_feature + exon_loss_variant) A 'NextProt' based annotation. Details are provided in the 'feature type' sub-field (ANN), or in the effect details (EFF).  MODERATE </option>
                </param>
            </when>
        </conditional>

        <param name="offset" type="select" display="radio" label="Chromosomal position">
            <option value="default" selected="true">Use default (based on input type)</option>
            <option value="-0">Force zero-based positions (both input and output)</option>
            <option value="-1">Force one-based positions (both input and output)</option>
        </param>
        <param name="chr" type="text" label="Text to prepend to chromosome name">
            <help>
               By default SnpEff simplifies all chromosome names. For instance 'chr1' is just '1'.
               You can prepend any string you want to the chromosome name
            </help>
            <validator type="regex" message="No whitespace allowed">^\S*$</validator>
        </param>
        <param name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats"/>
        <param name="noLog" type="boolean" truevalue="-noLog" falsevalue="" checked="true" label="Suppress reporting usage statistics to server"/>
    </inputs>
    <outputs>
        <data name="snpeff_output" format="vcf">
            <change_format>
                <when input="outputConditional.outputFormat" value="bed" format="bed" />
                <when input="outputConditional.outputFormat" value="bedAnn" format="bed" />
            </change_format>
        </data>
        <data name="statsFile" format="html" label="${tool.name} on ${on_string} - stats">
            <filter>generate_stats == True</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input" ftype="vcf" value="input.vcf"/>
            <param name="inputFormat" value="vcf"/>
            <param name="outputFormat" value="vcf"/>
            <param name="genomeSrc" value="named"/>
            <param name="genome_version" value="ebola_zaire"/>
            <param name="udLength" value="0"/>
            <param name="generate_stats" value="False"/>
            <output name="snpeff_output">
                <assert_contents>
                    <has_text_matching expression="KJ660346\t572\t.*missense_variant" />
                    <has_text_matching expression="KJ660346\t1024\t.*synonymous_variant" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
This tool calculate the effect of variants (SNPs/MNPs/Insertions) and deletions.

@EXTERNAL_DOCUMENTATION@
]]>
    </help>
    <expand macro="citations" />
</tool>