view annotate_variants.xml @ 23:5db0545b9004 draft

update to v0.1.7.3
author wolma
date Thu, 21 Jul 2016 03:55:49 -0400
parents c46406466625
children
line wrap: on
line source

<tool id="annotate_variants" name="Variant Annotation" version="0.1.7.3">
  <description>Predict the effects of SNPs and indels on known genes in the reference genome using SnpEff</description>
  <macros>
    <import>toolshed_macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <version_command>python3 -m MiModD version -q</version_command>
  <command>
  	python3 -m MiModD annotate
	
	"$inputfile"
	
	#if $str($annotool.name)=='snpeff':
	--genome "${annotool.genomeVersion}"
	    #if $annotool.ori_output:
	--snpeff-out "$snpeff_file"
	    #end if
	    #if $annotool.stats:
	--stats "$summary_file"
	    #end if
	${annotool.snpeff_settings.chr} ${annotool.snpeff_settings.no_us} ${annotool.snpeff_settings.no_ds} ${annotool.snpeff_settings.no_intron} ${annotool.snpeff_settings.no_intergenic} ${annotool.snpeff_settings.no_utr}
	    #if $annotool.snpeff_settings.min_cov:
	--minC "${annotool.snpeff_settings.min_cov}"
	    #end if
	    #if $annotool.snpeff_settings.min_qual:
	--minQ "${annotool.snpeff_settings.min_qual}"
	    #end if
	    #if $annotool.snpeff_settings.ud:
	--ud "${annotool.snpeff_settings.ud}"
	    #end if
	#end if

	--ofile "$outputfile"
	#if $str($formatting.oformat) == "text":
	--oformat text
	#end if
	#if $str($formatting.oformat) == "html":
	  #if $formatting.formatter_file:
	--link "${formatting.formatter_file}"
	  #end if
	  #if $formatting.species
	--species "${formatting.species}"
	  #end if
	#end if
	
	#if $str($grouping):
	--grouping $grouping
	#end if
	--verbose
  </command>

  <inputs>
    <param format="vcf" label="vcf inputfile to be annotated" name="inputfile" type="data" />
    <param label="Group variants by" name="grouping" type="select">
      <option value="">order in the input file</option>
      <option value="by_sample">sample</option>
      <option value="by_genes">most affected genes</option>
    </param>
    <conditional name="formatting">
      <param label="Format of the annotation output file" name="oformat" type="select">
	<option value="html">HTML</option>
	<option value="text">Tab-separated plain text</option>
      </param>
      <when value="html">
	<param format="txt" label="Optional file with hyperlink formatting instructions" name="formatter_file" optional="true" type="data" />
	<param help="Overwrite the species guess from the SnpEff genome, often not necessary" label="Species" name="species" type="text" />
      </when>
    </conditional>
    <conditional name="annotool">
      <param help="Select SnpEff here, if you want to have the vcf input annotated with genomic feature information. Select None if you do not want additional annotation, if you do not have SnpEff installed, or if you have no appropriate SnpEff annotation file for the input." label="Use this tool to annotate the input file" name="name" type="select">
	<option value="snpeff">SnpEff</option>
	<option value="None">None</option>
      </param>   
      <when value="snpeff">
          <param format="tabular" label="genome list" name="genome_list" type="data" /> 
          <param label="Genome" name="genomeVersion" type="select">
      	    <options from_dataset="genome_list">
              <column index="0" name="name" />
              <column index="1" name="value" />
            </options>
          </param>
          <param checked="true" label="Keep the original SnpEff output" name="ori_output" type="boolean" />
          <param checked="true" label="Produce a summary file of results" name="stats" type="boolean" />
          
	  <conditional name="snpeff_settings">
            <param help="This section lets you specify the detailed parameter settings for the SnpEff tool." label="SnpEff-specific parameter settings" name="detail_level" type="select">
              <option value="default">default settings</option>
	      <option value="change">change settings</option>
            </param>   
            <when value="default">
	      ## default settings for SnpEff
	      <param name="chr" type="hidden" value="" />
    	      <param name="min_cov" type="hidden" value="" />
    	      <param name="min_qual" type="hidden" value="" />
	      <param name="no_ds" type="hidden" value="" />
    	      <param name="no_us" type="hidden" value="" />
	      <param name="no_intron" type="hidden" value="" />  
	      <param name="no_intergenic" type="hidden" value="" />
	      <param name="no_utr" type="hidden" value="" />
	      <param name="ud" type="hidden" value="" />
            </when>
            <when value="change">
              <param checked="false" falsevalue="" label="prepend 'chr' to chromosome names, e.g., 'chr7' instead of '7'" name="chr" truevalue="-chr" type="boolean" />
              <param help="do not include variants with a coverage lower than this value" label="minimum coverage (default = not used)" name="min_cov" optional="true" type="integer" />
              <param help="do not include variants with a quality lower than this value" label="minimum quality (default = not used)" name="min_qual" optional="true" type="integer" />
              <param checked="false" falsevalue="" help="annotation of effects on the downstream region of genes can be suppressed" label="do not show downstream changes" name="no_ds" truevalue="--no-downstream" type="boolean" />
              <param checked="false" falsevalue="" help="annotation of effects on the upstream region of genes can be suppressed" label="do not show upstream changes" name="no_us" truevalue="--no-upstream" type="boolean" />
              <param checked="false" falsevalue="" help="annotation of effects on introns of genes can be suppressed" label="do not show intron changes" name="no_intron" truevalue="--no-intron" type="boolean" />
              <param checked="false" falsevalue="" help="annotation of effects on intergenic regions can be suppressed" label="do not show intergenic changes" name="no_intergenic" truevalue="--no-intergenic" type="boolean" />    
              <param checked="false" falsevalue="" help="annotation of effects on the untranslated regions of genes can be suppressed" label="do not show UTR changes" name="no_utr" truevalue="--no-utr" type="boolean" />
              <param help="specify the upstream/downstream interval length, i.e., variants more than INTERVAL nts from the next annotated gene are considered to be intergenic" label="upstream downstream interval length (default = 5000 bases)" name="ud" optional="true" type="integer" />
            </when>
	  </conditional>
      </when>
    </conditional>    
  </inputs>

  <outputs>
    <data format="html" name="outputfile">
      <change_format>
	<when format="tabular" input="formatting.oformat" value="text" />
      </change_format>
    </data>
    <data format="vcf" name="snpeff_file">
      <filter>(annotool['name']=="snpeff" and annotool['ori_output'])</filter>
    </data>
    <data format="html" name="summary_file">
      <filter>(annotool['name']=="snpeff" and annotool['stats'])</filter>
    </data>
  </outputs>

  <help>
.. class:: infomark

   **What it does**

The tool turns a variant list in VCF format into a more readable summary table listing variant sites and effects.

If installed, the variant annotation tool SnpEff can be used transparently to determine the genomic features, e.g., genes or transcripts, affected by the variants.

Use of this feature requires that you have an appropriate SnpEff genome file installed on the host machine. You can use the *List installed SnpEff genomes* tool to generate a list of all available SnpEff genomes.
This list can then be used (by selecting the dataset as the *genome list*) to populate the *genome* dropdown menu, from which you can select the SnpEff genome file to be used for the annotation.

As output file formats HTML or plain text are supported.
In HTML mode, variant positions and/or affected genomic features can be turned into hyperlinks to corresponding views in web-based genome browsers and databases.

The behavior of this feature depends on:

1) Recognition of the species that is analyzed

   You can declare the species you are working with using the *Species* text field.
   If you are not declaring the species explicitly, but are choosing SnpEff for effect annotation, the tool will usually be able to auto-detect the species from the SnpEff genome you are using.
   If no species gets assigned in either way, no hyperlinks will be generated and the html output will look essentially like plain text.
   
2) Available hyperlink formatting rules for this species
   
   When the species has been recognized, the tool checks if you have selected an *optional file with hyperlink formatting instructions*.
   If you did and that file contains an entry matching the recognized species, that entry will be used as a template to construct the hyperlinks.
   If no matching entry is found in the file, an error will be raised.
   
   If you did not supply a hyperlink formatting instruction file, the tool will consult an internal lookup table to see if it finds default rules for the construction of the hyperlinks for the species.
   If not, no hyperlinks will be generated and the html output will look essentially like plain text.
   
   **TIP:**
   MiModD's internal hyperlink formatting lookup tables are maintained and growing with every new version, but since weblinks are changing frequently as well, it is possible that you will encounter broken hyperlinks for your species of interest. In such a case, you can resort to two things: `tell us about the problem`_ to make sure it gets fixed in the next release and, in the meantime, use a custom file with hyperlink formatting instructions to overwrite the default entry for your species.
   
.. _tell us about the problem: mailto:mimodd@googlegroups.com
  </help>
</tool>