Mercurial > repos > wolma > mimodd_main
view vcf_filter.xml @ 1:bfcd121b99bf draft default tip
planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit 44e872b808f88eacd05963fc2478da2c07b50228
author | wolma |
---|---|
date | Thu, 22 Mar 2018 10:32:39 -0400 |
parents | f0f2795de2c7 |
children |
line wrap: on
line source
<tool id="mimodd_vcf_filter" name="MiModD VCF Filter" version="@MIMODD_WRAPPER_VERSION@"> <description> extracts lines from a vcf variant file based on field-specific filters </description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version_command" /> <command><![CDATA[ mimodd vcf-filter '$inputfile' -o '$outputfile' #if len($datasets): -s #for $i in $datasets '$i.sample' #end for --gt #for $i in $datasets ## remove whitespace from free-text input '#echo ("".join($i.GT.split()) or "ANY")#' #echo " " #end for --dp #for $i in $datasets $i.DP #end for --gq #for $i in $datasets $i.GQ #end for --af #for $i in $datasets '#echo ($i.AF or "::")#' #end for #end if #if len($regions): -r #for $i in $regions #if $i.stop: '$i.chrom:$i.start-$i.stop' #else: '$i.chrom:$i.start' #end if #end for #end if #if $vfilter: --vfilter ## remove ',' and replace with ' ' '#echo ('" "'.join($vfilter.split(',')))#' #end if $vartype ]]></command> <inputs> <param name="inputfile" type="data" format="vcf" label="VCF input file" /> <repeat name="datasets" title="Sample-specific Filter" default="0" min="0"> <param name="sample" type="text" label="sample" help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to."> <expand macro="lex_sam_header" message="Non-ASCII characters are not valid in sample names." /> </param> <param name="GT" type="text" label="genotype pattern(s) for the inclusion of variants" help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list."> <validator type="expression" message="Malformed genotype pattern">not value or all(c.isdigit() or c in './|' for token in value.split(',') for c in token.strip(' '))</validator> </param> <param name="DP" type="integer" value="0" label="depth of coverage for the sample at the variant site" help="keep only variants with at least this sample-specific coverage at the variant site" /> <param name="GQ" type="integer" value="0" label="genotype quality for the variant in the sample" help="keep only variants for which the genotype prediction for the sample has at least this quality" /> <param name="AF" type="text" label="allelic fraction filter" help="expected format: [allele number]:[minimal fraction]:[maximal fraction]; keep only variants for which the fraction of sample-specific reads supporting a given allele number is between minimal and maximal fraction; if allele number is omitted, the filter operates on the most frequent non-reference allele instead"> <validator type="expression" message="Malformed allelic fraction filter">not value or all(c.isdigit() or c in '.:' for c in value)</validator> </param> </repeat> <repeat name="regions" title="Region Filter" default="0" min="0" help="Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported."> <param name="chrom" type="text" label="Chromosome"> <expand macro="lex_sam_header" message="Non-ASCII characters are not valid in chromosome names." /> </param> <param name="start" type="text" label="Region Start"> <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> </param> <param name="stop" type="text" label="Region End"> <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> </param> </repeat> <param name="vartype" type="select" label="Select the types of variants to include in the output"> <option value="">all types of variants</option> <option value="--no-indels">exclude indels</option> <option value="--indels-only">only indels</option> </param> <param name="vfilter" type="text" label="sample" help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output."> <expand macro="lex_sam_header" message="Non-ASCII characters are not valid in sample names." /> </param> </inputs> <outputs> <data name="outputfile" format="vcf" /> </outputs> <tests> <test> <param name="inputfile" value="a.vcf" /> <repeat name="datasets"> <param name="sample" value="N2" /> <param name="GT" value="0/0" /> </repeat> <output name="outputfile" ftype="vcf" compare="diff"> <assert_contents> <has_text text="GT:PL:DP:DPR:GQ	0/0" /> <not_has_text text="GT:PL:DP:DPR:GQ	1/1" /> <not_has_text text="GT:PL:DP:DPR:GQ	0/1" /> </assert_contents> </output> </test> <test> <param name="inputfile" value="a.vcf" /> <repeat name="regions"> <param name="chrom" value="chrX" /> </repeat> <output name="outputfile" ftype="vcf"> <assert_contents> <has_text text="chrX	" /> <not_has_text text="chrI	" /> <not_has_text text="chrII	" /> <not_has_text text="chrIII	" /> <not_has_text text="chrIV	" /> <not_has_text text="chrV	" /> </assert_contents> </output> </test> <test> <param name="inputfile" value="a.vcf" /> <param name="vartype" value="--no-indels" /> <param name="vfilter" value="ot266" /> <output name="outputfile" ftype="vcf"> <assert_contents> <not_has_text text="INDEL;" /> <has_line line="#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ot266" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ .. class:: infomark **What it does** The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants. The following types of variant filters can be set up: 1) Sample-specific filters: Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept. 2) Region filters: Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept. 3) Variant type filter: Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter. The *sample* filter is included mainly for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file. Besides, the filter can also be used to change the order of the samples since it will sort the samples in the order specified in the filter field. **Examples of sample-specific filters:** *Simple genotype pattern* genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant *Complex genotype pattern* genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype *Multiple sample-specific filters* Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1: ==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant *Combining sample-specific filter criteria* genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9 ==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9 **and** at least three reads from the sample cover the variant site **TIP:** As in the example above, genotype quality is typically most useful in combination with a genotype pattern. It acts then, effectively, to make the genotype filter more stringent. @HELP_FOOTER@ ]]></help> <expand macro="citations" /> </tool>