annotate vcf_filter.xml @ 21:c46406466625 draft

Uploaded
author wolma
date Sat, 11 Jun 2016 19:10:44 -0400
parents 93db2f9bca12
children 5db0545b9004
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
93db2f9bca12 upgrade to v0.1.7.2
Wolfgang Maier wolfgang.maier@biologie.uni-freiburg.de
parents: 8
diff changeset
1 <tool id="vcf_filter" name="VCF Filter" version="0.1.7.2">
0
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
2 <description>Extracts lines from a vcf variant file based on field-specific filters</description>
2
72d20758ba2c final upload
wolma
parents: 1
diff changeset
3 <macros>
72d20758ba2c final upload
wolma
parents: 1
diff changeset
4 <import>toolshed_macros.xml</import>
72d20758ba2c final upload
wolma
parents: 1
diff changeset
5 </macros>
72d20758ba2c final upload
wolma
parents: 1
diff changeset
6 <expand macro="requirements"/>
21
c46406466625 Uploaded
wolma
parents: 9
diff changeset
7 <version_command>python3 -m MiModD version -q</version_command>
0
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
8 <command>
21
c46406466625 Uploaded
wolma
parents: 9
diff changeset
9 python3 -m MiModD vcf-filter
0
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
10 "$inputfile"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
11 -o "$outputfile"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
12 #if len($datasets):
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
13 -s
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
14 #for $i in $datasets
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
15 "$i.sample"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
16 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
17 --gt
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
18 #for $i in $datasets
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
19 ## remove whitespace from free-text input
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
20 "#echo ("".join($i.GT.split()) or "ANY")#"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
21 #echo " "
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
22 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
23 --dp
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
24 #for $i in $datasets
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
25 "$i.DP"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
26 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
27 --gq
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
28 #for $i in $datasets
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
29 "$i.GQ"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
30 #end for
5
bdd1995c9e66 upgrade to mimodd version 0.1.6.1
Wolfgang Maier
parents: 4
diff changeset
31 --af
bdd1995c9e66 upgrade to mimodd version 0.1.6.1
Wolfgang Maier
parents: 4
diff changeset
32 #for $i in $datasets
bdd1995c9e66 upgrade to mimodd version 0.1.6.1
Wolfgang Maier
parents: 4
diff changeset
33 "#echo ($i.AF or "::")#"
bdd1995c9e66 upgrade to mimodd version 0.1.6.1
Wolfgang Maier
parents: 4
diff changeset
34 #end for
0
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
35 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
36 #if len($regions):
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
37 -r
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
38 #for $i in $regions
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
39 #if $i.stop:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
40 "$i.chrom:$i.start-$i.stop"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
41 #else:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
42 "$i.chrom:$i.start"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
43 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
44 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
45 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
46 #if $vfilter:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
47 --vfilter
4
ffee8534a5c4 upgrade to mimodd version 0.1.6
Wolfgang Maier
parents: 2
diff changeset
48 ## remove ',' and replace with ' '
0
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
49 "#echo ('" "'.join($vfilter.split(',')))#"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
50 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
51 $vartype
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
52 </command>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
53
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
54 <inputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
55 <param name="inputfile" type="data" format="vcf" label="VCF input file" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
56 <repeat name="datasets" title="Sample-specific Filter" default="0" min="0">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
57 <param name="sample" type="text" label="sample" help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to." />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
58 <param name="GT" type="text" label="genotype pattern(s) for the inclusion of variants" help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list." />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
59 <param name="DP" type="integer" label="depth of coverage for the sample at the variant site" value = "0" help="keep only variants with at least this sample-specific coverage at the variant site" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
60 <param name="GQ" type="integer" label="genotype quality for the variant in the sample" value = "0" help="keep only variants for which the genotype prediction for the sample has at least this quality" />
5
bdd1995c9e66 upgrade to mimodd version 0.1.6.1
Wolfgang Maier
parents: 4
diff changeset
61 <param name="AF" type="text" label="allelic fraction filter" help="expected format: [allele number]:[minimal fraction]:[maximal fraction]; keep only variants for which the fraction of sample-specific reads supporting a given allele number is between minimal and maximal fraction; if allele number is omitted, the filter operates on the most frequent non-reference allele instead" />
0
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
62 </repeat>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
63 <repeat name="regions" title="Region Filter" default="0" min="0" help = "Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported.">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
64 <param name="chrom" type="text" label="Chromosome" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
65 <param name="start" type="text" label="Region Start" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
66 <param name="stop" type="text" label="Region End" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
67 </repeat>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
68 <param name="vartype" type="select" label="Select the types of variants to include in the output">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
69 <option value="">all types of variants</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
70 <option value="--no-indels">exclude indels</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
71 <option value="--indels-only">only indels</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
72 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
73 <param name="vfilter" type="text" label="sample" help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output." />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
74 </inputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
75
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
76 <outputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
77 <data name="outputfile" format="vcf" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
78 </outputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
79
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
80 <help>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
81 .. class:: infomark
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
82
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
83 **What it does**
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
84
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
85 The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
86
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
87 The following types of variant filters can be set up:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
88
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
89 1) Sample-specific filters:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
90
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
91 Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
92
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
93 2) Region filters:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
94
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
95 Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
96
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
97 3) Variant type filter:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
98
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
99 Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
100
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
101 In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
102 The *sample* filter is included mainly for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file. Besides, the filter can also be used to change the order of the samples since it will sort the samples in the order specified in the filter field.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
103
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
104 **Examples of sample-specific filters:**
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
105
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
106 *Simple genotype pattern*
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
107
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
108 genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
109
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
110 *Complex genotype pattern*
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
111
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
112 genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
113
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
114 *Multiple sample-specific filters*
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
115
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
116 Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
117 ==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
118
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
119 *Combining sample-specific filter criteria*
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
120
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
121 genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
122 ==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
123 **and** at least three reads from the sample cover the variant site
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
124
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
125 **TIP:**
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
126
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
127 As in the example above, genotype quality is typically most useful in combination with a genotype pattern.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
128 It acts then, effectively, to make the genotype filter more stringent.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
129
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
130
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
131
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
132 </help>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
133 </tool>