Mercurial > repos > urgi-team > vcfgandalftools
view VCFFiltering_wrapper.xml @ 1:cfd4eaadad42 draft
Uploaded
author | urgi-team |
---|---|
date | Tue, 15 Dec 2015 05:36:12 -0500 |
parents | 3552a8d9f51c |
children |
line wrap: on
line source
<tool id="VCFFiltering" name="VCFFiltering" version="0.01"> <description>Filters SNP on a VCF depending on depth, allele number and allele frequency</description> <requirements> <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement> </requirements> <version_command> VCFFiltering.py --version </version_command> <command interpreter="python"> #if $DP_auto.is_DP_auto VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN #else VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN -m $DP_auto.DPmin -M $DP_auto.DPmax #end if ## Bed files #if $BedFile_List.is_BedFile #for $bed in $BedFile_List.BedFiles -b $bed.inputBed #end for #end if --graphHTML $output_html --dirGraphs "$output_html.files_path" </command> <inputs> <param name="inputVCF" type="data" format="vcf" label="Input VCF File name (from FreeBayes)"/> <conditional name="DP_auto"> <param name="is_DP_auto" type="boolean" label="Calculate optimal depth range automatically" truevalue="yes" falsevalue="no" checked="on" /> <when value="yes"/> <when value="no"> <param name="DPmin" type="integer" label="minumum Depth" value="1" help="default = 1"> <validator type="in_range" min="0" message="DP can't be negative" /> </param> <param name="DPmax" type="integer" label="maximum Depth" value="200" help="default = 200"> <validator type="in_range" min="0" message="DP can't be negative" /> </param> </when> </conditional> <param name="AF" type="float" value="0.9" label="minimum allele frequency" help="default = 0.9"> <validator type="in_range" min="0.0" max="1.0"/> </param> <param name="AN" type="integer" value="2" label="maximum allele number" help="default = 2"> <validator type="in_range" min="1" message="Allele number can't be negative" /> </param> <conditional name="BedFile_List"> <param name="is_BedFile" type="boolean" label="bed files : list of coordinates to filter, multiple beds allowed" truevalue="yes" falsevalue="no" checked="off" /> <when value="no"/> <when value="yes"> <repeat name="BedFiles" title="bed files : list of coordinates to filter, multiple beds allowed" min="1"> <param name="inputBed" type="data" format="bed" label="Select Bed file "/> </repeat> </when> </conditional> </inputs> <outputs> <data format="vcf" name="outputVCF" label="${tool.name} on ${on_string} (vcf)"/> <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)"> </data> </outputs> <tests> <test> <param name="is_DP_auto" value="yes" /> <param name="AF" value="0.9"/> <param name="AN" value="2"/> <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" /> <output name="outputVCF" file="VCFFiltering_DPauto_output.vcf"/> </test> <test> <param name="is_DP_auto" value="no" /> <param name="DPmin" value="4"/> <param name="DPmax" value="200"/> <param name="AF" value="0.9"/> <param name="AN" value="2"/> <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" /> <output name="outputVCF" file="VCFFiltering_DP_4_200_output.vcf"/> </test> </tests> <help><![CDATA[ **Filters SNP on a VCF depending on depth, allele number and allele frequency** ----- **what it does :** VCFFiltering is a python script that allows to filter SNP results from freebayes on multiple criterias as once. The filters are : - Allele number : number of possible allele at the genomic position - Allele frequency : frenquency of the most represented allele ; note that if the most represented allele is the reference (a "." in the 4th column of the VCF, the allele frequency will still work but allele frequency should be under 1-x) - Depth : Higher and lower bound of the depth ; the depth is the number of reads mapped on the genomic positions. Depth can be automatically detected. If you do so, The 90 % of the positions with a depth closest to the most frequent depth will pass the filter. This script has been developped to be used with freebayes output, on haploïd data. .. class:: infomark the VCF source is detected from the header. Please keep the header of your VCF file if you want to use this tool ----- **input and output formats :** input format is a VCF file obtaines with freebayes ; headers are necessary you can also add some bed files to filter some specific regions. output format is a filtered VCF file. ----- **example :** VCF input file: :: ##fileformat=VCFv4.1 ##fileDate=20150126 ##source=freeBayes v0.9.13-2-ga830efd ##reference=ref.fsa ##phasing=none ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam" #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1 chrom1 1 . T . . . DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0 chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0 chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0 chrom1 4 . G T . . DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0 chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0 purposed options: :: - Calculate optimal depth range automatically = no - minumum Depth = 5 - maximum Depth = 14 - minimum allele frequency = 0.9 - maximum allele number = 2 exemple result : :: ##fileformat=VCFv4.1 ##fileDate=20150126 ##source=freeBayes v0.9.13-2-ga830efd ##reference=ref.fsa ##phasing=none ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam" #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1 chrom1 1 . T . . G_DP DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177;G_AN=0;G_AF=0.00;G_DP=4;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0 chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177;G_AN=0;G_AF=0.08;G_DP=12;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0 chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=1.00;G_DP=5;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0 chrom1 4 . G T . G_AF DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=0.83;G_DP=6;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0 chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177;G_AN=0;G_AF=0.92;G_DP=12;G_Base=C GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0 ----- **reference :** ]]> </help> </tool>