view VCFToolsStats/vcfToolsStats.xml @ 4:b762ecbe2314 draft

planemo upload
author gandres
date Fri, 11 Dec 2015 07:35:00 -0500
parents 612066e3f57d
children 2b6bb9d5e4e9
line wrap: on
line source

<tool id="sniplay_vcftoolsstats" name="VCFtools Stats" version="1.1.0">
    
    <!-- [REQUIRED] Tool description displayed after the tool name -->
    <description> </description>
    
    <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
    <requirements>
        <requirement type="binary">perl</requirement>
	<requirement type="package" version="0.1.12b">vcftools</requirement>
    </requirements>
    
    <!-- [STRONGLY RECOMMANDED] Exit code rules -->
    <stdio>
        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
        <exit_code range="1:" level="fatal" />
    </stdio>
    
    <!-- [REQUIRED] The command to execute -->
    <command interpreter="perl">
	vcfToolsStats.sh $filein $fileout_annot $fileout_het $fileout_imiss $fileout_sum $filelog
    </command>
     
    <!-- [REQUIRED] Input files and tool parameters -->
    <inputs>
	<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
	<param name="fileout_label" type="text" value="vcf_stats" optional="false" label="Output file basename"/>
    </inputs>
    
    <!-- [REQUIRED] Output files -->
    <outputs>
	<data name="fileout_annot" format="txt" label="${fileout_label}.annotation" />
	<data name="fileout_het" format="txt" label="${fileout_label}.het" />
	<data name="fileout_imiss" format="txt" label="${fileout_label}.imiss" />
	<data name="fileout_sum" format="txt" label="${fileout_label}.TsTv.summary" />
	<data name="filelog" format="txt" label="${fileout_label}.log" />
    </outputs>
    
    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
    <tests>
        <!-- [HELP] Test files have to be in the ~/test-data directory -->
        <test>
         <param name="filein" value="sample.vcf" />
         <output name="fileout_annot" file="stats-result.annotation" />
         <output name="fileout_het" file="stats-result.het" />
         <output name="fileout_imiss" file="stats-result.imiss" />
         <output name="fileout_sum" file="stats-result.TsTv.summary" />
        </test>
    </tests>
    
    <!-- [OPTIONAL] Help displayed in Galaxy -->
    <help>

.. class:: infomark

**Authors**     Adam Auton, Petr Danecek and Anthony Marcketta (C++ Module) : VCFtools_

.. _VCFtools: http://vcftools.sourceforge.net

 | **Please cite** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, **Bioinformatics**, 2011 

.. class:: infomark

**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique.

.. class:: infomark

**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr

---------------------------------------------------


================
VCF tools filter
================

-----------
Description
-----------

  | Compute statistics on VCF file 
  | For further informations on VCFtools, please visite the VCFtools website_.

.. _website: http://vcftools.sourceforge.net

-----------------
Workflow position
-----------------

**Upstream tools**

=========== ========================== =======
Name            output file(s)         format 
=========== ========================== =======
=========== ========================== =======


**Downstream tools**

=========== ========================== =======
Name            output file(s)         format
=========== ========================== =======
=========== ========================== =======


----------
Input file
----------

VCF file
	VCF file with all SNPs

----------
Parameters
----------

Output file basename
	Prefix for the output VCF file

------------
Output files
------------

.annotation file
	Statistics on annotation/location along genome

.het file 
	Statistics on heterozygosity of the individuals

.imiss
	Statistics on missing data of the inidividuals
.TsTv.summary
	Statistics on mutation types and transition/transvertion number

.log file

---------------------------------------------------

---------------
Working example
---------------

Input files
===========

VCF file
---------

::

	#fileformat=VCFv4.1
	#FILTER=&lt;ID=LowQual,Description="Low quality">
	#FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
	[...]
	CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	CATB1
	chr1	2209	.	G	T	213.84	.	AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)	GT:AD:DP:GQ:PL	1/1:0,7:7:18:242,18,0


Parameters
==========

Output name -> vcf_stat


Output files
============

.annotation file
----------------

::

	Genic	4489
	Intergenic	466
	========
	Intron	960
	Exon	3248
	UTR	281
	========
	Non-syn	226
	Synonym	3022
        
.het file
---------

::

	INDV	O(HOM)	E(HOM)	N_SITES	F
	CATB1	0	0.0	3616	0.00000

.imiss file
-----------

::

	INDV	N_DATA	N_GENOTYPES_FILTERED	N_MISS	F_MISS
	CATB1	4813	0	0	0

.TsTv.summary file
------------------

::

	MODEL	COUNT
	AC	371
	AG	1467
	AT	562
	CG	330
	CT	1659
	GT	397
	Ts	3126
	Tv	1660


    </help>
    <citations>
                <!-- [HELP] As DOI or BibTex entry -->
        <citation type="bibtex">
        @article{Danecek01082011,
        author = {Danecek, Petr and Auton, Adam and Abecasis, Goncalo and Albers, Cornelis A. and Banks, Eric and DePristo, Mark A. and Handsaker, Robert E. and Lunter, Gerton and Marth, Gabor T. and Sherry, Stephen T. and McVean, Gilean and Durbin, Richard and 1000 Genomes Project Analysis Group},
        title = {The variant call format and VCFtools},
        volume = {27},
        number = {15},
        pages = {2156-2158},
        year = {2011},
        doi = {10.1093/bioinformatics/btr330},
        abstract ={Summary: The variant call format (VCF) is a generic format for storing DNA polymorphism data such as SNPs, insertions, deletions and structural variants, together with rich annotations. VCF is usually stored in a compressed manner and can be indexed for fast data retrieval of variants from a range of positions on the reference genome. The format was developed for the 1000 Genomes Project, and has also been adopted by other projects such as UK10K, dbSNP and the NHLBI Exome Project. VCFtools is a software suite that implements various utilities for processing VCF files, including validation, merging, comparing and also provides a general Perl API.Availability: http://vcftools.sourceforge.netContact: rd@sanger.ac.uk},
        URL = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.abstract},
        eprint = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.full.pdf+html},
        journal = {Bioinformatics}
        }
                </citation>

    </citations>

</tool>