comparison vcf_filter.xml @ 0:f0f2795de2c7 draft

planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit 528bcf3b769c7c73f119b2a176d19071f9ef5312
author wolma
date Tue, 19 Dec 2017 04:54:04 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f0f2795de2c7
1 <tool id="mimodd_vcf_filter" name="MiModD VCF Filter" version="@MIMODD_WRAPPER_VERSION@">
2 <description>
3 extracts lines from a vcf variant file based on field-specific filters
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="requirements" />
9 <expand macro="stdio" />
10 <expand macro="version_command" />
11 <command><![CDATA[
12 mimodd vcf-filter
13 '$inputfile' -o '$outputfile'
14 #if len($datasets):
15 -s
16 #for $i in $datasets
17 '$i.sample'
18 #end for
19 --gt
20 #for $i in $datasets
21 ## remove whitespace from free-text input
22 '#echo ("".join($i.GT.split()) or "ANY")#'
23 #echo " "
24 #end for
25 --dp
26 #for $i in $datasets
27 $i.DP
28 #end for
29 --gq
30 #for $i in $datasets
31 $i.GQ
32 #end for
33 --af
34 #for $i in $datasets
35 '#echo ($i.AF or "::")#'
36 #end for
37 #end if
38 #if len($regions):
39 -r
40 #for $i in $regions
41 #if $i.stop:
42 '$i.chrom:$i.start-$i.stop'
43 #else:
44 '$i.chrom:$i.start'
45 #end if
46 #end for
47 #end if
48 #if $vfilter:
49 --vfilter
50 ## remove ',' and replace with ' '
51 '#echo ('" "'.join($vfilter.split(',')))#'
52 #end if
53 $vartype
54 ]]></command>
55
56 <inputs>
57 <param name="inputfile" type="data" format="vcf" label="VCF input file" />
58 <repeat name="datasets" title="Sample-specific Filter" default="0" min="0">
59 <param name="sample" type="text" label="sample"
60 help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to.">
61 <expand macro="lex_sam_header" message="Non-ASCII characters are not valid in sample names." />
62 </param>
63 <param name="GT" type="text"
64 label="genotype pattern(s) for the inclusion of variants"
65 help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list.">
66 <validator type="expression" message="Malformed genotype pattern">not value or all(c.isdigit() or c in './|' for token in value.split(',') for c in token.strip(' '))</validator>
67 </param>
68 <param name="DP" type="integer" value="0"
69 label="depth of coverage for the sample at the variant site"
70 help="keep only variants with at least this sample-specific coverage at the variant site" />
71 <param name="GQ" type="integer" value="0"
72 label="genotype quality for the variant in the sample"
73 help="keep only variants for which the genotype prediction for the sample has at least this quality" />
74 <param name="AF" type="text"
75 label="allelic fraction filter"
76 help="expected format: [allele number]:[minimal fraction]:[maximal fraction]; keep only variants for which the fraction of sample-specific reads supporting a given allele number is between minimal and maximal fraction; if allele number is omitted, the filter operates on the most frequent non-reference allele instead">
77 <validator type="expression" message="Malformed allelic fraction filter">not value or all(c.isdigit() or c in '.:' for c in value)</validator>
78 </param>
79 </repeat>
80 <repeat name="regions" title="Region Filter" default="0" min="0"
81 help="Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported.">
82 <param name="chrom" type="text" label="Chromosome">
83 <expand macro="lex_sam_header" message="Non-ASCII characters are not valid in chromosome names." />
84 </param>
85 <param name="start" type="text" label="Region Start">
86 <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator>
87 </param>
88 <param name="stop" type="text" label="Region End">
89 <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator>
90 </param>
91 </repeat>
92 <param name="vartype" type="select"
93 label="Select the types of variants to include in the output">
94 <option value="">all types of variants</option>
95 <option value="--no-indels">exclude indels</option>
96 <option value="--indels-only">only indels</option>
97 </param>
98 <param name="vfilter" type="text" label="sample"
99 help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output.">
100 <expand macro="lex_sam_header" message="Non-ASCII characters are not valid in sample names." />
101 </param>
102 </inputs>
103
104 <outputs>
105 <data name="outputfile" format="vcf" />
106 </outputs>
107
108 <tests>
109 <test>
110 <param name="inputfile" value="a.vcf" />
111 <repeat name="datasets">
112 <param name="sample" value="N2" />
113 <param name="GT" value="0/0" />
114 </repeat>
115 <output name="outputfile" ftype="vcf" compare="diff">
116 <assert_contents>
117 <has_text text="GT:PL:DP:DPR:GQ&#009;0/0" />
118 <not_has_text text="GT:PL:DP:DPR:GQ&#009;1/1" />
119 <not_has_text text="GT:PL:DP:DPR:GQ&#009;0/1" />
120 </assert_contents>
121 </output>
122 </test>
123 <test>
124 <param name="inputfile" value="a.vcf" />
125 <repeat name="regions">
126 <param name="chrom" value="chrX" />
127 </repeat>
128 <output name="outputfile" ftype="vcf">
129 <assert_contents>
130 <has_text text="chrX&#009;" />
131 <not_has_text text="chrI&#009;" />
132 <not_has_text text="chrII&#009;" />
133 <not_has_text text="chrIII&#009;" />
134 <not_has_text text="chrIV&#009;" />
135 <not_has_text text="chrV&#009;" />
136 </assert_contents>
137 </output>
138 </test>
139 <test>
140 <param name="inputfile" value="a.vcf" />
141 <param name="vartype" value="--no-indels" />
142 <param name="vfilter" value="ot266" />
143 <output name="outputfile" ftype="vcf">
144 <assert_contents>
145 <not_has_text text="INDEL;" />
146 <has_line line="#CHROM&#009;POS&#009;ID&#009;REF&#009;ALT&#009;QUAL&#009;FILTER&#009;INFO&#009;FORMAT&#009;ot266" />
147 </assert_contents>
148 </output>
149 </test>
150 </tests>
151
152 <help><![CDATA[
153 .. class:: infomark
154
155 **What it does**
156
157 The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants.
158
159 The following types of variant filters can be set up:
160
161 1) Sample-specific filters:
162
163 Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept.
164
165 2) Region filters:
166
167 Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept.
168
169 3) Variant type filter:
170
171 Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels
172
173 In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter.
174 The *sample* filter is included mainly for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file. Besides, the filter can also be used to change the order of the samples since it will sort the samples in the order specified in the filter field.
175
176 **Examples of sample-specific filters:**
177
178 *Simple genotype pattern*
179
180 genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant
181
182 *Complex genotype pattern*
183
184 genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype
185
186 *Multiple sample-specific filters*
187
188 Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1:
189 ==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant
190
191 *Combining sample-specific filter criteria*
192
193 genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9
194 ==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9
195 **and** at least three reads from the sample cover the variant site
196
197 **TIP:**
198
199 As in the example above, genotype quality is typically most useful in combination with a genotype pattern.
200 It acts then, effectively, to make the genotype filter more stringent.
201
202 @HELP_FOOTER@
203 ]]></help>
204 <expand macro="citations" />
205 </tool>