Mercurial > repos > wolma > mimodd_main
comparison vcf_filter.xml @ 0:f0f2795de2c7 draft
planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit 528bcf3b769c7c73f119b2a176d19071f9ef5312
author | wolma |
---|---|
date | Tue, 19 Dec 2017 04:54:04 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f0f2795de2c7 |
---|---|
1 <tool id="mimodd_vcf_filter" name="MiModD VCF Filter" version="@MIMODD_WRAPPER_VERSION@"> | |
2 <description> | |
3 extracts lines from a vcf variant file based on field-specific filters | |
4 </description> | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 </macros> | |
8 <expand macro="requirements" /> | |
9 <expand macro="stdio" /> | |
10 <expand macro="version_command" /> | |
11 <command><![CDATA[ | |
12 mimodd vcf-filter | |
13 '$inputfile' -o '$outputfile' | |
14 #if len($datasets): | |
15 -s | |
16 #for $i in $datasets | |
17 '$i.sample' | |
18 #end for | |
19 --gt | |
20 #for $i in $datasets | |
21 ## remove whitespace from free-text input | |
22 '#echo ("".join($i.GT.split()) or "ANY")#' | |
23 #echo " " | |
24 #end for | |
25 --dp | |
26 #for $i in $datasets | |
27 $i.DP | |
28 #end for | |
29 --gq | |
30 #for $i in $datasets | |
31 $i.GQ | |
32 #end for | |
33 --af | |
34 #for $i in $datasets | |
35 '#echo ($i.AF or "::")#' | |
36 #end for | |
37 #end if | |
38 #if len($regions): | |
39 -r | |
40 #for $i in $regions | |
41 #if $i.stop: | |
42 '$i.chrom:$i.start-$i.stop' | |
43 #else: | |
44 '$i.chrom:$i.start' | |
45 #end if | |
46 #end for | |
47 #end if | |
48 #if $vfilter: | |
49 --vfilter | |
50 ## remove ',' and replace with ' ' | |
51 '#echo ('" "'.join($vfilter.split(',')))#' | |
52 #end if | |
53 $vartype | |
54 ]]></command> | |
55 | |
56 <inputs> | |
57 <param name="inputfile" type="data" format="vcf" label="VCF input file" /> | |
58 <repeat name="datasets" title="Sample-specific Filter" default="0" min="0"> | |
59 <param name="sample" type="text" label="sample" | |
60 help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to."> | |
61 <expand macro="lex_sam_header" message="Non-ASCII characters are not valid in sample names." /> | |
62 </param> | |
63 <param name="GT" type="text" | |
64 label="genotype pattern(s) for the inclusion of variants" | |
65 help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list."> | |
66 <validator type="expression" message="Malformed genotype pattern">not value or all(c.isdigit() or c in './|' for token in value.split(',') for c in token.strip(' '))</validator> | |
67 </param> | |
68 <param name="DP" type="integer" value="0" | |
69 label="depth of coverage for the sample at the variant site" | |
70 help="keep only variants with at least this sample-specific coverage at the variant site" /> | |
71 <param name="GQ" type="integer" value="0" | |
72 label="genotype quality for the variant in the sample" | |
73 help="keep only variants for which the genotype prediction for the sample has at least this quality" /> | |
74 <param name="AF" type="text" | |
75 label="allelic fraction filter" | |
76 help="expected format: [allele number]:[minimal fraction]:[maximal fraction]; keep only variants for which the fraction of sample-specific reads supporting a given allele number is between minimal and maximal fraction; if allele number is omitted, the filter operates on the most frequent non-reference allele instead"> | |
77 <validator type="expression" message="Malformed allelic fraction filter">not value or all(c.isdigit() or c in '.:' for c in value)</validator> | |
78 </param> | |
79 </repeat> | |
80 <repeat name="regions" title="Region Filter" default="0" min="0" | |
81 help="Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported."> | |
82 <param name="chrom" type="text" label="Chromosome"> | |
83 <expand macro="lex_sam_header" message="Non-ASCII characters are not valid in chromosome names." /> | |
84 </param> | |
85 <param name="start" type="text" label="Region Start"> | |
86 <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> | |
87 </param> | |
88 <param name="stop" type="text" label="Region End"> | |
89 <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> | |
90 </param> | |
91 </repeat> | |
92 <param name="vartype" type="select" | |
93 label="Select the types of variants to include in the output"> | |
94 <option value="">all types of variants</option> | |
95 <option value="--no-indels">exclude indels</option> | |
96 <option value="--indels-only">only indels</option> | |
97 </param> | |
98 <param name="vfilter" type="text" label="sample" | |
99 help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output."> | |
100 <expand macro="lex_sam_header" message="Non-ASCII characters are not valid in sample names." /> | |
101 </param> | |
102 </inputs> | |
103 | |
104 <outputs> | |
105 <data name="outputfile" format="vcf" /> | |
106 </outputs> | |
107 | |
108 <tests> | |
109 <test> | |
110 <param name="inputfile" value="a.vcf" /> | |
111 <repeat name="datasets"> | |
112 <param name="sample" value="N2" /> | |
113 <param name="GT" value="0/0" /> | |
114 </repeat> | |
115 <output name="outputfile" ftype="vcf" compare="diff"> | |
116 <assert_contents> | |
117 <has_text text="GT:PL:DP:DPR:GQ	0/0" /> | |
118 <not_has_text text="GT:PL:DP:DPR:GQ	1/1" /> | |
119 <not_has_text text="GT:PL:DP:DPR:GQ	0/1" /> | |
120 </assert_contents> | |
121 </output> | |
122 </test> | |
123 <test> | |
124 <param name="inputfile" value="a.vcf" /> | |
125 <repeat name="regions"> | |
126 <param name="chrom" value="chrX" /> | |
127 </repeat> | |
128 <output name="outputfile" ftype="vcf"> | |
129 <assert_contents> | |
130 <has_text text="chrX	" /> | |
131 <not_has_text text="chrI	" /> | |
132 <not_has_text text="chrII	" /> | |
133 <not_has_text text="chrIII	" /> | |
134 <not_has_text text="chrIV	" /> | |
135 <not_has_text text="chrV	" /> | |
136 </assert_contents> | |
137 </output> | |
138 </test> | |
139 <test> | |
140 <param name="inputfile" value="a.vcf" /> | |
141 <param name="vartype" value="--no-indels" /> | |
142 <param name="vfilter" value="ot266" /> | |
143 <output name="outputfile" ftype="vcf"> | |
144 <assert_contents> | |
145 <not_has_text text="INDEL;" /> | |
146 <has_line line="#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ot266" /> | |
147 </assert_contents> | |
148 </output> | |
149 </test> | |
150 </tests> | |
151 | |
152 <help><![CDATA[ | |
153 .. class:: infomark | |
154 | |
155 **What it does** | |
156 | |
157 The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants. | |
158 | |
159 The following types of variant filters can be set up: | |
160 | |
161 1) Sample-specific filters: | |
162 | |
163 Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept. | |
164 | |
165 2) Region filters: | |
166 | |
167 Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept. | |
168 | |
169 3) Variant type filter: | |
170 | |
171 Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels | |
172 | |
173 In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter. | |
174 The *sample* filter is included mainly for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file. Besides, the filter can also be used to change the order of the samples since it will sort the samples in the order specified in the filter field. | |
175 | |
176 **Examples of sample-specific filters:** | |
177 | |
178 *Simple genotype pattern* | |
179 | |
180 genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant | |
181 | |
182 *Complex genotype pattern* | |
183 | |
184 genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype | |
185 | |
186 *Multiple sample-specific filters* | |
187 | |
188 Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1: | |
189 ==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant | |
190 | |
191 *Combining sample-specific filter criteria* | |
192 | |
193 genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9 | |
194 ==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9 | |
195 **and** at least three reads from the sample cover the variant site | |
196 | |
197 **TIP:** | |
198 | |
199 As in the example above, genotype quality is typically most useful in combination with a genotype pattern. | |
200 It acts then, effectively, to make the genotype filter more stringent. | |
201 | |
202 @HELP_FOOTER@ | |
203 ]]></help> | |
204 <expand macro="citations" /> | |
205 </tool> |