comparison variant_eval.xml @ 15:01ff8dd37d4d draft default tip

Uploaded
author lz_hust
date Sat, 01 Jun 2019 07:20:41 -0400
parents
children
comparison
equal deleted inserted replaced
14:68426930d59c 15:01ff8dd37d4d
1 <tool id="gatk2_variant_eval" name="Eval Variants" version="@VERSION@.1">
2 <description></description>
3 <macros>
4 <import>gatk2_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_command" />
8 <command interpreter="python">
9 #from binascii import hexlify
10
11 gatk2_wrapper.py
12 --stdout "${output_log}"
13 #for $var_count, $variant in enumerate( $reference_source.input_variants ):
14 -d "--eval:input_${var_count},%(file_type)s" "${variant}" "${variant.ext}" "input_variants_${var_count}"
15 #end for
16 -p '
17 @JAR_PATH@
18 -T "VariantEval"
19 --out "${output_report}"
20 \$GATK2_SITE_OPTIONS
21
22 @THREADS@
23
24 #if $reference_source.reference_source_selector != "history":
25 -R "${reference_source.ref_file.fields.path}"
26 #end if
27 '
28
29 #for $rod_binding in $comp_rod_bind:
30 -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}"
31 #if str( $rod_binding.comp_known_names ):
32 -p '--known_names "${rod_binding.comp_rod_name}"'
33 #end if
34 #end for
35
36 #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
37 -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
38 #if $dbsnp_rod_bind_type.dbsnp_known_names
39 -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"'
40 #end if
41 #end if
42
43 #include source=$standard_gatk_options#
44
45 ##start analysis specific options
46 #if $analysis_param_type.analysis_param_type_selector == "advanced":
47 #for $stratification in $analysis_param_type.stratifications:
48 #set $select_string = "--select_exps '%s' --select_names '%s'" % ( str( $stratification.select_exps ), str( $stratification.select_name ) )
49 -o '${ hexlify( $select_string ) }'
50 #end for
51 -p '
52
53 #for $sample in $analysis_param_type.samples:
54 --sample "${sample.sample}"
55 #end for
56
57 #if str( $analysis_param_type.stratification_modules ) != "None":
58 #for $stratification_module in str( $analysis_param_type.stratification_modules).split( ',' ):
59 --stratificationModule "${stratification_module}"
60 #end for
61 #end if
62
63 ${analysis_param_type.do_not_use_all_standard_stratifications}
64
65 #for $variant_type in $analysis_param_type.only_variants_of_type:
66 --onlyVariantsOfType "${variant_type.variant_type}"
67 #end for
68
69 #if str( $analysis_param_type.eval_modules ) != "None":
70 #for $eval_module in str( $analysis_param_type.eval_modules).split( ',' ):
71 --evalModule "${eval_module}"
72 #end for
73 #end if
74
75 ${analysis_param_type.do_not_use_all_standard_modules}
76
77 #if str( $analysis_param_type.num_samples ) != "0":
78 --numSamples "${analysis_param_type.num_samples}"
79 #end if
80
81 --minPhaseQuality "${analysis_param_type.min_phase_quality}"
82
83 --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}"
84
85 #if str( $analysis_param_type.ancestral_alignments ) != "None":
86 --ancestralAlignments "${analysis_param_type.ancestral_alignments}"
87 #end if
88 '
89 #if str( $analysis_param_type.known_cnvs ) != "None":
90 -d "--knownCNVs" "${analysis_param_type.known_cnvs}" "${analysis_param_type.known_cnvs.ext}" "input_known_cnvs"
91 #end if
92
93 #if str( $analysis_param_type.strat_intervals ) != "None":
94 -d "--stratIntervals" "${analysis_param_type.strat_intervals}" "${analysis_param_type.strat_intervals.ext}" "input_strat_intervals"
95 #end if
96 #end if
97 </command>
98 <inputs>
99
100 <conditional name="reference_source">
101 <expand macro="reference_source_selector_param" />
102 <when value="cached">
103 <expand macro="input_variants" help="-eval,--eval &amp;lt;eval&amp;gt;"/>
104 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
105 <options from_data_table="gatk2_picard_indexes">
106 <!-- <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/> -->
107 </options>
108 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
109 </param>
110 </when>
111 <when value="history"> <!-- FIX ME!!!! -->
112 <expand macro="input_variants" help="-eval,--eval &amp;lt;eval&amp;gt;" />
113 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
114 </when>
115 </conditional>
116
117 <repeat name="comp_rod_bind" title="Comparison Reference-Ordered Data (ROD) file" help="-comp,--comp &amp;lt;comp&amp;gt;">
118 <param name="comp_input_rod" type="data" format="vcf" label="Comparison ROD file" />
119 <param name="comp_rod_name" type="text" value="" label="Comparison ROD name">
120 <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
121 </param>
122 <param name="comp_known_names" type="boolean" label="Use comparison ROD file as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;"/>
123 </repeat>
124 <conditional name="dbsnp_rod_bind_type">
125 <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP Reference-Ordered Data (ROD) file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
126 <option value="set_dbsnp" selected="True">Set dbSNP</option>
127 <option value="exclude_dbsnp">Don't set dbSNP</option>
128 </param>
129 <when value="exclude_dbsnp" />
130 <when value="set_dbsnp">
131 <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" />
132 <param name="dbsnp_rod_name" type="text" value="dbsnp" label="dbsnp ROD name">
133 <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
134 </param>
135 <param name="dbsnp_known_names" type="boolean" label="Use dbSNP ROD file as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;" />
136 </when>
137 </conditional>
138
139 <expand macro="gatk_param_type_conditional" />
140
141 <expand macro="analysis_type_conditional">
142 <repeat name="stratifications" title="Stratification">
143 <param name="select_exps" value="" type="text" label="Stratification Expression" help="-select,--select_exps &amp;lt;select_exps&amp;gt;">
144 <sanitizer>
145 <valid initial="string.printable">
146 <remove value="&apos;"/>
147 </valid>
148 <mapping initial="none"/>
149 </sanitizer>
150 </param>
151 <param name="select_name" value="" type="text" label="Name" help="-selectName,--select_names &amp;lt;select_names&amp;gt;"/>
152 </repeat>
153
154 <repeat name="samples" title="Sample" help="-sn,--sample &amp;lt;sample&amp;gt;">
155 <param name="sample" value="" type="text" label="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context"/>
156 </repeat>
157
158 <param name="stratification_modules" type="select" multiple="True" display="checkboxes" label="Stratification modules to apply to the eval track(s)" help="-ST,--stratificationModule &amp;lt;stratificationModule&amp;gt;" >
159 <option value="AlleleCount" />
160 <option value="AlleleFrequency" />
161 <option value="CompRod" />
162 <option value="Contig" />
163 <option value="CpG" />
164 <option value="Degeneracy" />
165 <option value="EvalRod" />
166 <option value="Filter" />
167 <option value="FunctionalClass" />
168 <option value="IndelSize" />
169 <option value="IntervalStratification" />
170 <option value="JexlExpression" />
171 <option value="Novelty" />
172 <option value="OneBPIndel" />
173 <option value="Sample" />
174 <option value="SnpEffPositionModifier" />
175 <option value="TandemRepeat" />
176 <option value="VariantType" />
177 </param>
178 <param name="do_not_use_all_standard_stratifications" checked="false" type="boolean" truevalue="--doNotUseAllStandardStratifications" falsevalue="" label="Do not use the standard stratification modules by default" help="-noST,--doNotUseAllStandardStratifications" />
179
180 <repeat name="only_variants_of_type" title="only Variants Of Type" help="--onlyVariantsOfType">
181 <param name="variant_type" type="text" value="" label="only variants of these types will be considered during the evaluation"/>
182 </repeat>
183
184 <param name="eval_modules" type="select" multiple="True" display="checkboxes" label="Eval modules to apply to the eval track(s)" help="-EV,--evalModule &amp;lt;evalModule&amp;gt;" >
185 <option value="CompOverlap" />
186 <option value="CountVariants" />
187 <option value="IndelLengthHistogram" />
188 <option value="IndelSummary" />
189 <option value="MendelianViolationEvaluator" />
190 <option value="MultiallelicSummary" />
191 <option value="PrintMissingComp" />
192 <option value="ThetaVariantEvaluator" />
193 <option value="TiTvVariantEvaluator" />
194 <option value="ValidationReport" />
195 <option value="VariantSummary" />
196 </param>
197 <param name="do_not_use_all_standard_modules" checked="false" type="boolean" truevalue="--doNotUseAllStandardModules" falsevalue="" label="Do not use the standard eval modules by default" help="-noEV,--doNotUseAllStandardModules" />
198
199 <param name="num_samples" type="integer" label="Number of samples (used if no samples are available in the VCF file" value="0" help="-ns,--numSamples &amp;lt;numSamples&amp;gt;"/>
200 <param name="min_phase_quality" type="float" label="Minimum phasing quality " value="10.0" help="-mpq,--minPhaseQuality &amp;lt;minPhaseQuality&amp;gt;"/>
201 <param name="mendelian_violation_qual_threshold" type="integer" label="Minimum genotype QUAL score for each trio member required to accept a site as a violation" value="50" help="-mvq,--mendelianViolationQualThreshold &amp;lt;mendelianViolationQualThreshold&amp;gt;"/>
202 <param name="ancestral_alignments" type="data" format="fasta" optional="True" label="Fasta file with ancestral alleles" help="-aa,--ancestralAlignments &amp;lt;ancestralAlignments&amp;gt;" />
203 <param name="known_cnvs" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features describing a known list of copy number variants" help="-knownCNVs,--knownCNVs &amp;lt;knownCNVs&amp;gt;" />
204 <param name="strat_intervals" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features for the IntervalStratificiation" help="-stratIntervals,--stratIntervals &amp;lt;stratIntervals&amp;gt;" />
205 </expand>
206
207 </inputs>
208 <outputs>
209 <data format="gatk_report" name="output_report" label="${tool.name} on ${on_string} (report)" />
210 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
211 </outputs>
212 <tests>
213 <test>
214 <param name="reference_source_selector" value="history" />
215 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
216 <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
217 <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
218 <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
219 <param name="dbsnp_rod_name" value="dbsnp" />
220 <param name="dbsnp_known_names" value="True"/>
221 <param name="comp_rod_bind" value="0" />
222 <param name="gatk_param_type_selector" value="basic" />
223 <param name="analysis_param_type_selector" value="basic" />
224 <output name="output_report" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.gatk_report" />
225 <output name="output_log" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.log.contains" compare="contains" />
226 </test>
227 </tests>
228 <help>
229 **What it does**
230
231 General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more)
232
233 For more information on using the VariantEval module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_varianteval_VariantEval.html&gt;`_.
234
235 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
236
237 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
238
239 ------
240
241 **Inputs**
242
243 GenomeAnalysisTK: VariantEval accepts variant files as input.
244
245
246 **Outputs**
247
248 The output is a table of variant evaluation.
249
250
251 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
252
253 -------
254
255 **Settings**::
256
257 out An output file presented to the walker. Will overwrite contents if file exists.
258 list List the available eval modules and exit
259 select_exps One or more stratifications to use when evaluating the data
260 select_names Names to use for the list of stratifications (must be a 1-to-1 mapping)
261 sample Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context
262 known_names Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets
263 stratificationModule One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)
264 doNotUseAllStandardStratifications Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)
265 onlyVariantsOfType If provided, only variants of these types will be considered during the evaluation, in
266 evalModule One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)
267 doNotUseAllStandardModules Do not use the standard modules by default (instead, only those that are specified with the -E option)
268 numSamples Number of samples (used if no samples are available in the VCF file
269 minPhaseQuality Minimum phasing quality
270 mendelianViolationQualThreshold Minimum genotype QUAL score for each trio member required to accept a site as a violation
271 ancestralAlignments Fasta file with ancestral alleles
272
273 @CITATION_SECTION@
274 </help>
275 <expand macro="citations" />
276 </tool>