comparison variant_filtration.xml @ 0:da6e2503c62d draft default tip

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:50:10 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:da6e2503c62d
1 <tool id="gatk_variant_filtration" name="Variant Filtration" version="0.0.5">
2 <description>on VCF files</description>
3 <requirements>
4 <requirement type="package" version="1.4">gatk</requirement>
5 </requirements>
6 <macros>
7 <import>gatk_macros.xml</import>
8 </macros>
9 <command interpreter="python">gatk_wrapper.py
10 #from binascii import hexlify
11 --max_jvm_heap_fraction "1"
12 --stdout "${output_log}"
13 -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant"
14 -p 'java
15 -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar"
16 -T "VariantFiltration"
17 ##--num_threads 4 ##hard coded, for now
18 -et "NO_ET" ##ET no phone home
19 -o "${output_vcf}"
20 ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
21 #if $reference_source.reference_source_selector != "history":
22 -R "${reference_source.ref_file.fields.path}"
23 #end if
24 '
25 #for $variant_filter in $variant_filters:
26 #set $variant_filter = "--%sExpression '%s' --%sName '%s'" % ( str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_expression ), str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_name ) )
27 -o '${ hexlify( $variant_filter ) }'
28 #end for
29
30 #if str( $mask_rod_bind_type.mask_rod_bind_type_selector ) == 'set_mask':
31 -d "--mask:${mask_rod_bind_type.mask_rod_name},%(file_type)s" "${mask_rod_bind_type.input_mask_rod}" "${mask_rod_bind_type.input_mask_rod.ext}" "input_mask_${mask_rod_bind_type.mask_rod_name}"
32 -p '
33 --maskExtension "${mask_rod_bind_type.mask_extension}"
34 --maskName "${mask_rod_bind_type.mask_rod_name}"
35 '
36 #end if
37
38 #include source=$standard_gatk_options#
39
40 ##start analysis specific options
41 #if $cluster_snp_type.cluster_snp_type_selector == "cluster_snp":
42 -p '
43 --clusterSize "${cluster_snp_type.cluster_size}"
44 --clusterWindowSize "${cluster_snp_type.cluster_window_size}"
45 '
46 #end if
47 -p '${missing_values_in_expressions_should_evaluate_as_failing}'
48 </command>
49 <inputs>
50 <conditional name="reference_source">
51 <expand macro="reference_source_selector_param" />
52 <when value="cached">
53 <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
54 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
55 <options from_data_table="gatk_picard_indexes">
56 <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/>
57 </options>
58 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
59 </param>
60 </when>
61 <when value="history"> <!-- FIX ME!!!! -->
62 <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
63 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
64 </when>
65 </conditional>
66
67
68 <repeat name="variant_filters" title="Variant Filters">
69 <param name="filter_expression" value="AB &lt; 0.2 || MQ0 &gt; 50" type="text" label="Filter expression" help="JEXL formatted expressions (-filter,--filterExpression &amp;lt;filterExpression&amp;gt;)">
70 <sanitizer>
71 <valid initial="string.printable">
72 <remove value="&apos;"/>
73 </valid>
74 <mapping initial="none"/>
75 </sanitizer>
76 </param>
77 <param name="filter_name" value="custom_filter" type="text" label="Filter name" help="-filterName,--filterName &amp;lt;filterName&amp;gt;"/>
78 <param name="is_genotype_filter" type="boolean" truevalue="genotypeFilter" falsevalue="filter" label="Use filter at the individual sample level" help="Use -G_filter,--genotypeFilterExpression &amp;lt;genotypeFilterExpression&amp;gt; and -G_filterName,--genotypeFilterName &amp;lt;genotypeFilterName&amp;gt; for filter type" />
79 </repeat>
80
81
82
83 <conditional name="mask_rod_bind_type">
84 <param name="mask_rod_bind_type_selector" type="select" label="Provide a Mask reference-ordered data file">
85 <option value="set_mask" selected="True">Set maskP</option>
86 <option value="exclude_mask">Don't set mask</option>
87 </param>
88 <when value="exclude_mask">
89 <!-- Do nothing here -->
90 </when>
91 <when value="set_mask">
92 <param name="input_mask_rod" type="data" format="bed,gatk_dbsnp,vcf" label="Mask ROD file" help="--mask &amp;lt;mask&amp;gt;" />
93 <param name="mask_rod_name" type="text" value="Mask" label="Mask Name" help="-maskName,--maskName &amp;lt;maskName&amp;gt;"/>
94 <param name="mask_extension" type="integer" value="0" label="Mask Extension" help="-maskExtend,--maskExtension &amp;lt;maskExtension&amp;gt;"/>
95 </when>
96 </conditional>
97
98
99 <expand macro="gatk_param_type_conditional" />
100
101 <conditional name="cluster_snp_type">
102 <param name="cluster_snp_type_selector" type="select" label="Cluster SNPs">
103 <option value="cluster_snp">Cluster SNPs</option>
104 <option value="do_not_cluster_snp" selected="True">Do not cluster SNPs</option>
105 </param>
106 <when value="do_not_cluster_snp">
107 <!-- Do nothing here -->
108 </when>
109 <when value="cluster_snp">
110 <param name="cluster_size" type="integer" value="3" label="The number of SNPs which make up a cluster" help="-cluster,--clusterSize &amp;lt;clusterSize&amp;gt;"/>
111 <param name="cluster_window_size" type="integer" value="0" label="The window size (in bases) in which to evaluate clustered SNPs" help="-window,--clusterWindowSize &amp;lt;clusterWindowSize&amp;gt;"/>
112 </when>
113 </conditional>
114
115 <param name="missing_values_in_expressions_should_evaluate_as_failing" type="boolean" truevalue="--missingValuesInExpressionsShouldEvaluateAsFailing" falsevalue="" label="Should missing values be considered failing the expression" help="--missingValuesInExpressionsShouldEvaluateAsFailing" />
116
117 </inputs>
118 <outputs>
119 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" />
120 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
121 </outputs>
122 <tests>
123 <test>
124 <param name="reference_source_selector" value="history" />
125 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
126 <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
127 <param name="filter_expression" value="MQ &lt; 37.74 || MQ0 &gt; 50" />
128 <param name="filter_name" value="Galaxy_filter" />
129 <param name="is_genotype_filter" />
130 <param name="mask_rod_bind_type_selector" value="set_mask" />
131 <param name="input_mask_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
132 <param name="mask_rod_name" value="." />
133 <param name="mask_extension" value="0" />
134 <param name="gatk_param_type_selector" value="basic" />
135 <param name="cluster_snp_type_selector" value="do_not_cluster_snp" />
136 <param name="missing_values_in_expressions_should_evaluate_as_failing" />
137 <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="4" />
138 <output name="output_log" file="gatk/gatk_variant_filtration/gatk_variant_filtration_out_1.log.contains" compare="contains" />
139 </test>
140 </tests>
141 <help>
142 **What it does**
143
144 Filters variant calls using a number of user-selectable, parameterizable criteria.
145
146 For more information on using the VariantFiltration module, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/wiki/index.php/VariantFiltrationWalker&gt;`_.
147
148 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.
149
150 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.
151
152 ------
153
154 **Inputs**
155
156 GenomeAnalysisTK: VariantFiltration accepts a VCF input file.
157
158
159 **Outputs**
160
161 The output is in VCF format.
162
163
164 Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.
165
166 -------
167
168 **Settings**::
169
170
171 filterExpression One or more expression used with INFO fields to filter (see wiki docs for more info)
172 filterName Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
173 genotypeFilterExpression One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)
174 genotypeFilterName Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
175 clusterSize The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]
176 clusterWindowSize The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]
177 maskName The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']
178 missingValuesInExpressionsShouldEvaluateAsFailing When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?
179
180 @CITATION_SECTION@
181 </help>
182 </tool>