view vcftools_main/VCFToolFilter/vcfToolsFilter.xml @ 1:0f67ed444d47 draft

Uploaded
author gandres
date Thu, 02 Jul 2015 11:07:45 -0400
parents 3b1436a9a6e5
children
line wrap: on
line source

<tool id="sniplay_vcftoolsfilter" name="VCFtools Filter" version="1.1.1">
    
    <!-- [REQUIRED] Tool description displayed after the tool name -->
    <description> </description>
    
    <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
    <requirements>
        <requirement type="binary">perl</requirement>
	<requirement type="package" version="0.1.12b">vcftools</requirement>
    </requirements>
    
    <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
    <version_command>
<!--
        tool_binary -v
-->
    </version_command>
    
    <!-- [REQUIRED] The command to execute -->
    <command interpreter="perl">
	vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end
	#if str( $samples ) == "":
	'None'
	#else
	$samples
	#end if
	#if str( $chromosomes ) == "":
	'None'
	#else
	$chromosomes
	#end if
	#if str( $export ) == "plink":
	$fileout_map
	#else
	''
	#end if
    </command>
     
    <!-- [REQUIRED] Input files and tool parameters -->
    <inputs>
	<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
	<param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>
	<param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">
		<validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
	</param>
	<param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">
	        <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
        </param>
	<param name="export" type="select" label="Output format" >
	    <option value="VCF" selected="true">VCF</option>
	    <option value="freq">freq</option>
            <option value="plink">plink</option>
        </param>
	<param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />
	<param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />
	<param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />
	<param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />
	<param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />
        <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >
            <option value="ALL" selected="true">All</option>
            <option value="SNP">SNP</option>
            <option value="INDEL">Indel</option>
        </param>
	<param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />
	<param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />
    </inputs>
    
    <!-- [REQUIRED] Output files -->
    <outputs>
	<data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" >
		<change_format>
                	<when input="export" value="freq" format="tabular" />
			<when input="export" value="plink" format="txt" />
		</change_format>	
	</data>
	<data name="fileout_map" format="txt" label="${fileout_label}.map">
		<filter>(export == 'plink')</filter>
	</data>
	<data name="filelog" format="txt" label="${fileout_label}.log" />
    </outputs>
    
    <!-- [STRONGLY RECOMMANDED] Exit code rules -->
    <stdio>
        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
        <exit_code range="1:" level="fatal" />
    </stdio>
    
    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
    <tests>
        <!-- [HELP] Test files have to be in the ~/test-data directory -->
        <test>
         <param name="filein" value="sample.vcf" />
         <param name="chromosomes" value="chr1" />
         <param name="export" value="VCF" />
         <param name="frequency" value="0.001" />
         <param name="max_freq" value="0.5" />
         <param name="allow_missing" value="1" />
         <param name="nb_alleles_min" value="2" />
	<param name="nb_alleles_max" value="4" />
         <param name="type_p" value="ALL" />
         <param name="bound_start" value="1" />
         <param name="bound_end" value="100000000" />
         <output name="fileout" file="result.vcf" />
         <output name="filelog" file="result.log" />
        </test>
    </tests>
    
    <!-- [OPTIONAL] Help displayed in Galaxy -->
    <help>

.. class:: infomark

**Authors**     Adam Auton, Petr Danecek and Anthony Marcketta (C++ Module) : .. _VCFtools: http://vcftools.sourceforge.net

 | ** Please cite ** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, ** Bioinformatics **, 2011 

.. class:: infomark

**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique.

.. class:: infomark

**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr

---------------------------------------------------



================
VCF tools filter
================

-----------
Description
-----------

  | Filter VCF file 
  | For further informations on VCFtools, please visite the VCFtools website.
  | .. _VCFtools: http://vcftools.sourceforge.net

-----------------
Workflow position
-----------------

**Upstream tools**

=========== ========================== =======
Name            output file(s)         format 
=========== ========================== =======
=========== ========================== =======


**Downstream tools**

=========== ========================== =======
Name            output file(s)         format
=========== ========================== =======
=========== ========================== =======


----------
Input file
----------

VCF file
	VCF file with all SNPs

----------
Parameters
----------

Output file basename
	Prefix for the output VCF file

Samples
        Samples to be analyzed. Comma separated list

Chromosomes
	Chromosomes to be analyzed. Comma separated list

Output format
	VCF/freq/plink

Minimum MAF
	Minimum frequency

Maximum MAF
	Maximum frequency

Missing data proportion
	Allowed missing data proportion per site. Must be comprised between 0 and 1.

Number of alleles
	Accepted number of alleles min and max.

Polymorphisms
	Type of polymorphisms to keep (ALL/SNP/INDEL).
Bounds
	Lower bound and upper bound for a range of sites to be processed.

------------
Output files
------------

VCF file
	VCF file filtered 

Log file

---------------------------------------------------

---------------
Working example
---------------

Input files
===========

VCF file
---------

::

	#fileformat=VCFv4.1
	#FILTER=&lt;ID=LowQual,Description="Low quality">
	#FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
	[...]
	CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	CATB1
	chr1	2209	.	G	T	213.84	.	AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)	GT:AD:DP:GQ:PL	1/1:0,7:7:18:242,18,0


Parameters
==========

Output name -> filtered_chr1

Chromosomes -> chr1

Output format -> VCF

Minimum MAF -> 0.001

Maximum MAF -> 0.5

Missing data proportion -> 1

Number of alleles min -> 2

Number of alleles max -> 4

Polymorphisms -> All

Lower bound -> 1

Upper bound -> 100000000


Output files
============

filtered_genelist_intron.vcf
----------------------------

::

        #fileformat=VCFv4.1
        #FILTER=&lt;ID=LowQual,Description="Low quality"&gt;
        #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
        [...]
        CHROM   POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  CATB1
	chr1	5059	.	C	G	146.84	.	AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)	GT:AD:DP:GQ:PL	1/1:0,8:8:18:175,18,0


    </help>
    
</tool>