comparison unified_genotyper.xml @ 15:01ff8dd37d4d draft default tip

Uploaded
author lz_hust
date Sat, 01 Jun 2019 07:20:41 -0400
parents
children
comparison
equal deleted inserted replaced
14:68426930d59c 15:01ff8dd37d4d
1 <tool id="gatk2_unified_genotyper" name="Unified Genotyper" version="@VERSION@.2">
2 <description>SNP and indel caller</description>
3 <macros>
4 <import>gatk2_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_command" />
8 <command interpreter="python">
9 gatk2_wrapper.py
10 --stdout "${output_log}"
11 @BAM_INPUTS@
12 -p '
13 @JAR_PATH@
14 -T "UnifiedGenotyper"
15 @THREADS@
16 --out "${output_vcf}"
17 --metrics_file "${output_metrics}"
18 \$GATK2_SITE_OPTIONS
19
20 ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
21 --num_cpu_threads_per_data_thread 1
22
23 #if $reference_source.reference_source_selector != "history":
24 -R "${reference_source.ref_file.fields.path}"
25 #end if
26 --genotype_likelihoods_model "${genotype_likelihoods_model}"
27 --standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}"
28 --standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}"
29 '
30 @DBSNP_OPTIONS@
31 $allow_n_cigar_reads
32 #include source=$standard_gatk_options#
33 ##start analysis specific options
34 #if $analysis_param_type.analysis_param_type_selector == "advanced":
35 -p '
36 --heterozygosity "${analysis_param_type.heterozygosity}"
37 --pcr_error_rate "${analysis_param_type.pcr_error_rate}"
38 --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}"
39 #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES':
40 --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}"
41 #end if
42 --output_mode "${analysis_param_type.output_mode}"
43 ${analysis_param_type.compute_SLOD}
44 --min_base_quality_score "${analysis_param_type.min_base_quality_score}"
45 --max_deletion_fraction "${analysis_param_type.max_deletion_fraction}"
46 --max_alternate_alleles "${analysis_param_type.max_alternate_alleles}"
47 --min_indel_count_for_genotyping "${analysis_param_type.min_indel_count_for_genotyping}"
48 --indel_heterozygosity "${analysis_param_type.indel_heterozygosity}"
49 --indelGapContinuationPenalty "${analysis_param_type.indelGapContinuationPenalty}"
50 --indelGapOpenPenalty "${analysis_param_type.indelGapOpenPenalty}"
51 --indelHaplotypeSize "${analysis_param_type.indelHaplotypeSize}"
52 ${analysis_param_type.doContextDependentGapPenalties}
53 #if str( $analysis_param_type.annotation ) != "None":
54 #for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','):
55 --annotation "${annotation}"
56 #end for
57 #end if
58 #for $additional_annotation in $analysis_param_type.additional_annotations:
59 --annotation "${additional_annotation.additional_annotation_name}"
60 #end for
61 #if str( $analysis_param_type.group ) != "None":
62 #for $group in str( $analysis_param_type.group ).split( ','):
63 --group "${group}"
64 #end for
65 #end if
66 #if str( $analysis_param_type.exclude_annotations ) != "None":
67 #for $annotation in str( $analysis_param_type.exclude_annotations.fields.gatk_value ).split( ','):
68 --excludeAnnotation "${annotation}"
69 #end for
70 #end if
71 #if str( $analysis_param_type.sample_ploidy ) != '':
72 --sample_ploidy "$analysis_param_type.sample_ploidy"
73 #end if
74 '
75 ## #if str( $analysis_param_type.snpEff_rod_bind_type.snpEff_rod_bind_type_selector ) == 'set_snpEff':
76 ## -p '--annotation "SnpEff"'
77 ## -d "--snpEffFile:${analysis_param_type.snpEff_rod_bind_type.snpEff_rod_name},%(file_type)s" "${analysis_param_type.snpEff_rod_bind_type.snpEff_input_rod}" "${analysis_param_type.snpEff_rod_bind_type.snpEff_input_rod.ext}" "input_snpEff_${analysis_param_type.snpEff_rod_bind_type.snpEff_rod_name}"
78 ## #else:
79 ## -p '--excludeAnnotation "SnpEff"'
80 ## #end if
81 #end if
82 </command>
83 <inputs>
84 <conditional name="reference_source">
85 <expand macro="reference_source_selector_param" />
86 <when value="cached">
87 <expand macro="input_bams_cached" />
88 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
89 <options from_data_table="gatk2_picard_indexes">
90 <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...-->
91 </options>
92 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
93 </param>
94 </when>
95 <when value="history"> <!-- FIX ME!!!! -->
96 <expand macro="input_bams_history" />
97 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
98 </when>
99 </conditional>
100 <expand macro="dbsnp_param" />
101
102 <param name="genotype_likelihoods_model" type="select" label="Genotype likelihoods calculation model to employ" help="-glm,--genotype_likelihoods_model &amp;lt;genotype_likelihoods_model&amp;gt;">
103 <option value="BOTH" selected="True">BOTH</option>
104 <option value="SNP">SNP</option>
105 <option value="INDEL">INDEL</option>
106 </param>
107
108 <param name="standard_min_confidence_threshold_for_calling" type="float" value="30.0" label="The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called" help="-stand_call_conf,--standard_min_confidence_threshold_for_calling &amp;lt;standard_min_confidence_threshold_for_calling&amp;gt;" />
109 <param name="standard_min_confidence_threshold_for_emitting" type="float" value="30.0" label="The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)" help="-stand_emit_conf,--standard_min_confidence_threshold_for_emitting &amp;lt;standard_min_confidence_threshold_for_emitting&amp;gt;" />
110
111 <expand macro="allow_n_cigar_reads" />
112 <expand macro="gatk_param_type_conditional" />
113
114 <expand macro="analysis_type_conditional">
115 <param name="heterozygosity" type="float" value="1e-3" label="Heterozygosity value used to compute prior likelihoods for any locus"
116 help="-hets,--heterozygosity &amp;lt;heterozygosity&amp;gt;" />
117 <param name="pcr_error_rate" type="float" value="1e-4" label="The PCR error rate to be used for computing fragment-based likelihoods"
118 help="-pcr_error,--pcr_error_rate &amp;lt;pcr_error_rate&amp;gt;" />
119 <conditional name="genotyping_mode_type">
120 <param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping" help="-gt_mode,--genotyping_mode &amp;lt;genotyping_mode&amp;gt;">
121 <option value="DISCOVERY" selected="True">DISCOVERY</option>
122 <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option>
123 </param>
124 <when value="DISCOVERY">
125 <!-- Do nothing here -->
126 </when>
127 <when value="GENOTYPE_GIVEN_ALLELES">
128 <param name="input_alleles_rod" type="data" format="vcf" label="Alleles ROD file" help="-alleles,--alleles &amp;lt;alleles&amp;gt;" />
129 </when>
130 </conditional>
131 <param name="output_mode" type="select" label="Should we output confident genotypes (i.e. including ref calls) or just the variants?" help="-out_mode,--output_mode &amp;lt;output_mode&amp;gt;">
132 <option value="EMIT_VARIANTS_ONLY" selected="True">EMIT_VARIANTS_ONLY</option>
133 <option value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option>
134 <option value="EMIT_ALL_SITES">EMIT_ALL_SITES</option>
135 </param>
136 <param name="compute_SLOD" type="boolean" truevalue="--computeSLOD" falsevalue="" label="Compute the SLOD" help="--computeSLOD" />
137 <param name="min_base_quality_score" type="integer" value="17" label="Minimum base quality required to consider a base for calling" help="-mbq,--min_base_quality_score &amp;lt;min_base_quality_score&amp;gt;" />
138 <param name="max_deletion_fraction" type="float" value="0.05" label="Maximum fraction of reads with deletions spanning this locus for it to be callable" help="to disable, set to &lt; 0 or &gt; 1 (-deletions,--max_deletion_fraction &amp;lt;max_deletion_fraction&amp;gt;)" />
139 <param name="max_alternate_alleles" type="integer" value="6" label="Maximum number of alternate alleles to genotype" help="-maxAlleles,--max_alternate_alleles &amp;lt;max_alternate_alleles&amp;gt;" />
140 <param name="min_indel_count_for_genotyping" type="integer" value="5" label="Minimum number of consensus indels required to trigger genotyping run" help="-minIndelCnt,--min_indel_count_for_genotyping &amp;lt;min_indel_count_for_genotyping&amp;gt;" />
141 <param name="indel_heterozygosity" type="float" value="0.000125" label="Heterozygosity for indel calling" help="1.0/8000==0.000125 (-indelHeterozygosity,--indel_heterozygosity &amp;lt;indel_heterozygosity&amp;gt;)"/>
142 <param name="indelGapContinuationPenalty" type="integer" value="10" label="Indel gap continuation penalty" help="As Phred-scaled probability, i.e. 30 => 10^-30/10 (--indelGapContinuationPenalty)">
143 <validator type="in_range" message="value between 0 and 255" min="0" max="255" />
144 </param>
145 <param name="indelGapOpenPenalty" type="integer" value="45" label="Indel gap open penalty" help="As Phred-scaled probability, i.e. 30 => 10^-30/10 (--indelGapOpenPenalty)">
146 <validator type="in_range" message="value between 0 and 255" min="0" max="255" />
147 </param>
148 <!-- indelHaplotypeSize - Gone in GATK 2.4? -->
149 <param name="indelHaplotypeSize" type="integer" value="80" label="Indel haplotype size" help="--indelHaplotypeSize" />
150 <param name="doContextDependentGapPenalties" type="boolean" truevalue="--doContextDependentGapPenalties" falsevalue="" label="Vary gap penalties by context" help="--doContextDependentGapPenalties" />
151 <param name="annotation" type="select" multiple="True" display="checkboxes" label="Annotation Types" help="-A,--annotation &amp;lt;annotation&amp;gt;">
152 <!-- load the available annotations from an external configuration file, since additional ones can be added to local installs -->
153 <options from_data_table="gatk2_annotations">
154 <filter type="multiple_splitter" column="tools_valid_for" separator=","/>
155 <filter type="static_value" value="UnifiedGenotyper" column="tools_valid_for"/>
156 </options>
157 </param>
158 <repeat name="additional_annotations" title="Additional annotation" help="-A,--annotation &amp;lt;annotation&amp;gt;">
159 <param name="additional_annotation_name" type="text" value="" label="Annotation name" />
160 </repeat>
161 <!--
162 <conditional name="snpEff_rod_bind_type">
163 <param name="snpEff_rod_bind_type_selector" type="select" label="Provide a snpEff reference-ordered data file">
164 <option value="set_snpEff">Set snpEff</option>
165 <option value="exclude_snpEff" selected="True">Don't set snpEff</option>
166 </param>
167 <when value="exclude_snpEff">
168 </when>
169 <when value="set_snpEff">
170 <param name="snpEff_input_rod" type="data" format="vcf" label="ROD file" />
171 <param name="snpEff_rod_name" type="hidden" value="snpEff" label="ROD Name"/>
172 </when>
173 </conditional>
174 -->
175 <param name="group" type="select" multiple="True" display="checkboxes" label="Annotation Interfaces/Groups" help="-G,--group &amp;lt;group&amp;gt;">
176 <option value="RodRequiringAnnotation">RodRequiringAnnotation</option>
177 <option value="Standard">Standard</option>
178 <option value="Experimental">Experimental</option>
179 <option value="WorkInProgress">WorkInProgress</option>
180 <option value="RankSumTest">RankSumTest</option>
181 <!-- <option value="none">none</option> -->
182 </param>
183 <!-- <param name="family_string" type="text" value="" label="Family String"/> -->
184 <param name="exclude_annotations" type="select" multiple="True" display="checkboxes" label="Annotations to exclude"
185 help="-XA,--excludeAnnotation &amp;lt;excludeAnnotation&amp;gt;" >
186 <!-- load the available annotations from an external configuration file, since additional ones can be added to local installs -->
187 <options from_data_table="gatk2_annotations">
188 <filter type="multiple_splitter" column="tools_valid_for" separator=","/>
189 <filter type="static_value" value="UnifiedGenotyper" column="tools_valid_for"/>
190 </options>
191 </param>
192 <param name="sample_ploidy" type="integer" value="2"
193 label="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)" help="-ploidy,--sample_ploidy" />
194 </expand>
195 </inputs>
196 <outputs>
197 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (VCF)" />
198 <data format="txt" name="output_metrics" label="${tool.name} on ${on_string} (metrics)" />
199 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
200 </outputs>
201 <trackster_conf/>
202 <tests>
203 <test>
204 <param name="reference_source_selector" value="history" />
205 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
206 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
207 <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
208 <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
209 <param name="dbsnp_rod_name" value="dbsnp" />
210 <param name="standard_min_confidence_threshold_for_calling" value="0" />
211 <param name="standard_min_confidence_threshold_for_emitting" value="4" />
212 <param name="gatk_param_type_selector" value="basic" />
213 <param name="analysis_param_type_selector" value="advanced" />
214 <param name="genotype_likelihoods_model" value="BOTH" />
215 <param name="heterozygosity" value="0.001" />
216 <param name="pcr_error_rate" value="0.0001" />
217 <param name="genotyping_mode" value="DISCOVERY" />
218 <param name="output_mode" value="EMIT_ALL_CONFIDENT_SITES" />
219 <param name="compute_SLOD" />
220 <param name="min_base_quality_score" value="17" />
221 <param name="max_deletion_fraction" value="-1" />
222 <param name="min_indel_count_for_genotyping" value="2" />
223 <param name="indel_heterozygosity" value="0.000125" />
224 <param name="indelGapContinuationPenalty" value="10" />
225 <param name="indelGapOpenPenalty" value="3" />
226 <param name="indelHaplotypeSize" value="80" />
227 <param name="doContextDependentGapPenalties" />
228 <!-- <param name="annotation" value="" />
229 <param name="group" value="" /> -->
230 <output name="output_vcf" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.vcf" lines_diff="4" />
231 <output name="output_metrics" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.metrics" />
232 <output name="output_log" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.log.contains" compare="contains" />
233 </test>
234 </tests>
235 <help>
236 **What it does**
237
238 A variant caller which unifies the approaches of several disparate callers. Works for single-sample and multi-sample data. The user can choose from several different incorporated calculation models.
239
240 For more information on the GATK Unified Genotyper, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_genotyper_UnifiedGenotyper.html&gt;`_.
241
242 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
243
244 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
245
246 ------
247
248 **Inputs**
249
250 GenomeAnalysisTK: UnifiedGenotyper accepts an aligned BAM input file.
251
252
253 **Outputs**
254
255 The output is in VCF format.
256
257
258 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
259
260 -------
261
262 **Settings**::
263
264 genotype_likelihoods_model Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only (SNP|INDEL|BOTH)
265 heterozygosity Heterozygosity value used to compute prior likelihoods for any locus
266 pcr_error_rate The PCR error rate to be used for computing fragment-based likelihoods
267 genotyping_mode Should we output confident genotypes (i.e. including ref calls) or just the variants? (DISCOVERY|GENOTYPE_GIVEN_ALLELES)
268 output_mode Should we output confident genotypes (i.e. including ref calls) or just the variants? (EMIT_VARIANTS_ONLY|EMIT_ALL_CONFIDENT_SITES|EMIT_ALL_SITES)
269 standard_min_confidence_threshold_for_calling The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called
270 standard_min_confidence_threshold_for_emitting The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)
271 noSLOD If provided, we will not calculate the SLOD
272 min_base_quality_score Minimum base quality required to consider a base for calling
273 max_deletion_fraction Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to &lt; 0 or &gt; 1; default:0.05]
274 min_indel_count_for_genotyping Minimum number of consensus indels required to trigger genotyping run
275 indel_heterozygosity Heterozygosity for indel calling
276 indelGapContinuationPenalty Indel gap continuation penalty
277 indelGapOpenPenalty Indel gap open penalty
278 indelHaplotypeSize Indel haplotype size
279 doContextDependentGapPenalties Vary gap penalties by context
280 indel_recal_file Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE
281 indelDebug Output indel debug info
282 out File to which variants should be written
283 annotation One or more specific annotations to apply to variant calls
284 group One or more classes/groups of annotations to apply to variant calls
285
286 @CITATION_SECTION@
287 </help>
288 <expand macro="citations" />
289 </tool>