view VCFFiltering_wrapper.xml @ 7:a6b557df86db draft

Uploaded
author urgi-team
date Tue, 15 Dec 2015 05:35:36 -0500
parents
children
line wrap: on
line source

<tool id="VCFFiltering" name="VCFFiltering" version="0.01">
    <description>Filters SNP on a VCF depending on depth, allele number and allele frequency</description>
    <requirements>
        <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>
    </requirements>
    <version_command>
  VCFFiltering.py --version
    </version_command>
    <command interpreter="python">
	#if $DP_auto.is_DP_auto
    VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN
    #else
    VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN -m $DP_auto.DPmin -M $DP_auto.DPmax
	#end if
	## Bed files
	#if $BedFile_List.is_BedFile
	#for $bed in $BedFile_List.BedFiles
	 -b $bed.inputBed
	#end for
	#end if
	--graphHTML $output_html --dirGraphs "$output_html.files_path"
	
    </command>
    <inputs>
        <param name="inputVCF" type="data" format="vcf" label="Input VCF File name (from FreeBayes)"/>
        <conditional name="DP_auto">
            <param name="is_DP_auto" type="boolean" label="Calculate optimal depth range automatically" truevalue="yes" falsevalue="no" checked="on" />
            <when value="yes"/>
            <when value="no">
                <param name="DPmin" type="integer" label="minumum Depth" value="1" help="default = 1">
                    <validator type="in_range" min="0" message="DP can't be negative" />
                </param>
                <param name="DPmax" type="integer" label="maximum Depth" value="200" help="default = 200">
                    <validator type="in_range" min="0" message="DP can't be negative"  />
                </param>
            </when>
        </conditional>
        <param name="AF" type="float" value="0.9" label="minimum allele frequency" help="default = 0.9">
            <validator type="in_range" min="0.0" max="1.0"/>
        </param>
        <param name="AN" type="integer" value="2" label="maximum allele number" help="default = 2">
            <validator type="in_range" min="1" message="Allele number can't be negative" />
        </param>
        <conditional name="BedFile_List">
            <param name="is_BedFile" type="boolean" label="bed files : list of coordinates to filter, multiple beds allowed" truevalue="yes" falsevalue="no" checked="off" />
            <when value="no"/>
            <when value="yes">
                <repeat name="BedFiles" title="bed files : list of coordinates to filter, multiple beds allowed" min="1">
                    <param name="inputBed" type="data" format="bed" label="Select Bed file "/>
                </repeat>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="vcf" name="outputVCF" label="${tool.name} on ${on_string} (vcf)"/>
        <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)">
        </data>
    </outputs>
    <tests>
        <test>
            <param name="is_DP_auto" value="yes" />
            <param name="AF" value="0.9"/>
            <param name="AN" value="2"/>
            <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
            <output name="outputVCF" file="VCFFiltering_DPauto_output.vcf"/>
        </test>
        <test>
            <param name="is_DP_auto" value="no" />
            <param name="DPmin" value="4"/>
            <param name="DPmax" value="200"/>
            <param name="AF" value="0.9"/>
            <param name="AN" value="2"/>
            <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
            <output name="outputVCF" file="VCFFiltering_DP_4_200_output.vcf"/>
        </test>
    </tests>
    <help><![CDATA[
**Filters SNP on a VCF depending on depth, allele number and allele frequency**

-----

**what it does :**

VCFFiltering is a python script that allows to filter SNP results from freebayes on multiple criterias as once. The filters are : 

 - Allele number : number of possible allele at the genomic position
 - Allele frequency : frenquency of the most represented allele ; note that if the most represented allele is the reference (a "." in the 4th column of the VCF, the allele frequency will still work but allele frequency should be under 1-x)
 - Depth : Higher and lower bound of the depth ; the depth is the number of reads mapped on the genomic positions.

Depth can be automatically detected. If you do so, The 90 % of the positions with a depth closest to the most frequent depth will pass the filter. 
	
This script has been developped to be used with freebayes output, on haploïd data. 


.. class:: infomark

the VCF source is detected from the header. Please keep the header of your VCF file if you want to use this tool

-----

**input and output formats :**

input format is a VCF file obtaines with freebayes ; headers are necessary
you can also add some bed files to filter some specific regions.

output format is a filtered VCF file.

-----

**example :**


VCF input file: ::

    ##fileformat=VCFv4.1
    ##fileDate=20150126
    ##source=freeBayes v0.9.13-2-ga830efd
    ##reference=ref.fsa
    ##phasing=none
    ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
    #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	strain_1
    chrom1	1	.	T	.	.	.	DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177	GT:DP:RO:QR:AO:QA:GL	0/0:4:4:38:.:.:0
    chrom1	2	.	A	.	.	.	DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177	GT:DP:RO:QR:AO:QA:GL	0/0:12:11:38:.:.:0
    chrom1	3	.	T	A	.	.	DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177	GT:DP:RO:QR:AO:QA:GL	0/0:5:0:38:5:.:0
    chrom1	4	.	G	T	.	.	DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177	GT:DP:RO:QR:AO:QA:GL	0/0:6:1:38:5:.:0
    chrom1	5	.	C	C	.	.	DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177	GT:DP:RO:QR:AO:QA:GL	0/0:12:1:38:11:.:0

purposed options: ::

 - Calculate optimal depth range automatically = no
 - minumum Depth = 5
 - maximum Depth = 14
 - minimum allele frequency = 0.9
 - maximum allele number = 2

exemple result : ::

    ##fileformat=VCFv4.1
    ##fileDate=20150126
    ##source=freeBayes v0.9.13-2-ga830efd
    ##reference=ref.fsa
    ##phasing=none
    ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
    #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	strain_1
    chrom1	1	.	T	.	.	G_DP	DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177;G_AN=0;G_AF=0.00;G_DP=4;G_Base=T	GT:DP:RO:QR:AO:QA:GL	0/0:4:4:38:.:.:0
    chrom1	2	.	A	.	.	.	DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177;G_AN=0;G_AF=0.08;G_DP=12;G_Base=A	GT:DP:RO:QR:AO:QA:GL	0/0:12:11:38:.:.:0
    chrom1	3	.	T	A	.	.	DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=1.00;G_DP=5;G_Base=A	GT:DP:RO:QR:AO:QA:GL	0/0:5:0:38:5:.:0
    chrom1	4	.	G	T	.	G_AF	DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=0.83;G_DP=6;G_Base=T	GT:DP:RO:QR:AO:QA:GL	0/0:6:1:38:5:.:0
    chrom1	5	.	C	C	.	.	DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177;G_AN=0;G_AF=0.92;G_DP=12;G_Base=C	GT:DP:RO:QR:AO:QA:GL	0/0:12:1:38:11:.:0

-----

**reference :**

]]>
    </help>
</tool>