comparison haplotype_caller.xml @ 15:01ff8dd37d4d draft default tip

Uploaded
author lz_hust
date Sat, 01 Jun 2019 07:20:41 -0400
parents
children
comparison
equal deleted inserted replaced
14:68426930d59c 15:01ff8dd37d4d
1 <tool id="gatk2_haplotype_caller" name="Haplotype Caller" version="@VERSION@.2">
2 <description>Call SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region</description>
3 <macros>
4 <import>gatk2_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_command" />
8 <command interpreter="python">
9 gatk2_wrapper.py
10 --stdout "${output_log}"
11 @BAM_INPUTS@
12 -p '
13 @JAR_PATH@
14 -T "HaplotypeCaller"
15 -o "${output_vcf}"
16
17 \$GATK2_SITE_OPTIONS
18
19 --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-4}
20
21 #if $reference_source.reference_source_selector != "history":
22 -R "${reference_source.ref_file.fields.path}"
23 #end if
24 #if str($input_recal) != 'None':
25 --BQSR "${input_recal}"
26 #end if
27 '
28 @DBSNP_OPTIONS@
29 $allow_n_cigar_reads
30 #include source=$standard_gatk_options#
31
32 ##start analysis specific options
33 #if $analysis_param_type.analysis_param_type_selector == "advanced":
34 -p '
35 #if $analysis_param_type.heterozygosity.__str__.strip() != '':
36 --heterozygosity $analysis_param_type.heterozygosity
37 #end if
38 --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}"
39 #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES':
40 --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}"
41 #end if
42 #if not $analysis_param_type.emitRefConfidence is None:
43 --emitRefConfidence $analysis_param_type.emitRefConfidence
44 #end if
45
46 ## files
47 #if str($analysis_param_type.activeRegionIn) != 'None':
48 --activeRegionIn "$analysis_param_type.activeRegionIn"
49 #end if
50 #if str($analysis_param_type.comp) != 'None':
51 --comp "$analysis_param_type.comp"
52 #end if
53 ##
54 #if str( $analysis_param_type.annotation ) != "None":
55 #for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','):
56 --annotation "${annotation}"
57 #end for
58 #end if
59 #for $additional_annotation in $analysis_param_type.additional_annotations:
60 --annotation "${additional_annotation.additional_annotation_name}"
61 #end for
62 #if str( $analysis_param_type.group ) != "None":
63 #for $group in str( $analysis_param_type.group ).split( ','):
64 --group "${group}"
65 #end for
66 #end if
67 #if str( $analysis_param_type.exclude_annotations ) != "None":
68 #for $annotation in str( $analysis_param_type.exclude_annotations.fields.gatk_value ).split( ','):
69 --excludeAnnotation "${annotation}"
70 #end for
71 #end if
72
73 ## value setings
74 #if $analysis_param_type.contamination_fraction_to_filter.__str__.strip() != '':
75 --contamination_fraction_to_filter $analysis_param_type.contamination_fraction_to_filter
76 #end if
77 #if $analysis_param_type.minPruning.__str__.strip() != '':
78 --minPruning $analysis_param_type.minPruning
79 #end if
80 #if $analysis_param_type.standard_min_confidence_threshold_for_calling.__str__.strip() != '':
81 --standard_min_confidence_threshold_for_calling $analysis_param_type.standard_min_confidence_threshold_for_calling
82 #end if
83 #if $analysis_param_type.standard_min_confidence_threshold_for_emitting.__str__.strip() != '':
84 --standard_min_confidence_threshold_for_emitting $analysis_param_type.standard_min_confidence_threshold_for_emitting
85 #end if
86 #if $analysis_param_type.gcpHMM.__str__.strip() != '':
87 --gcpHMM $analysis_param_type.gcpHMM
88 #end if
89 #if $analysis_param_type.max_alternate_alleles.__str__.strip() != '':
90 --max_alternate_alleles $analysis_param_type.max_alternate_alleles
91 #end if
92 ## mode selections
93
94 #if $analysis_param_type.pair_hmm_implementation.__str__ != "None" and len($analysis_param_type.pair_hmm_implementation.__str__) > 0:
95 --pair_hmm_implementation $analysis_param_type.pair_hmm_implementation
96 #end if
97 ## optional outputs
98 #if $analysis_param_type.activeRegionOut:
99 --activeRegionOut $active_region_out
100 #end if
101 #if $analysis_param_type.graphOutput:
102 --graphOutput $graph_out
103 #end if
104 ## flags
105 $analysis_param_type.useAllelesTrigger
106 $analysis_param_type.fullHaplotype
107 $analysis_param_type.genotypeFullActiveRegion
108 $analysis_param_type.debug
109 '
110 #end if
111 </command>
112 <inputs>
113 <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &amp;lt;recal_file&amp;gt;)" />
114 <conditional name="reference_source">
115 <expand macro="reference_source_selector_param" />
116 <when value="cached">
117 <expand macro="input_bams_cached" />
118 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" >
119 <options from_data_table="gatk2_picard_indexes">
120 <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...-->
121 </options>
122 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
123 </param>
124 </when>
125 <when value="history">
126 <expand macro="input_bams_history" />
127 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
128 </when>
129 </conditional>
130 <expand macro="dbsnp_param" />
131
132 <expand macro="allow_n_cigar_reads" />
133 <expand macro="gatk_param_type_conditional" />
134
135 <conditional name="analysis_param_type">
136 <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
137 <option value="basic" selected="True">Basic</option>
138 <option value="advanced">Advanced</option>
139 </param>
140 <when value="basic">
141 <!-- Do nothing here -->
142 </when>
143 <when value="advanced">
144
145 <param name="activeRegionIn" type="data" format="bed,gatk_interval,picard_interval_list,vcf" optional="true" label="activeRegionIn" help="--activeRegionIn / -AR Use this interval list file as the active regions to process"/>
146 <param name="activeRegionOut" type="boolean" checked="False" truevalue="" falsevalue="" label="activeRegionOut" help="--activeRegionOut / -ARO Output the active region to an interval list file"/>
147
148 <param name="annotation" type="select" multiple="True" display="checkboxes" label="Annotation Types" help="-A,--annotation &amp;lt;annotation&amp;gt;">
149 <!-- load the available annotations from an external configuration file, since additional ones can be added to local installs -->
150 <options from_data_table="gatk2_annotations">
151 <filter type="multiple_splitter" column="tools_valid_for" separator=","/>
152 <filter type="static_value" value="HaplotypeCaller" column="tools_valid_for"/>
153 </options>
154 </param>
155 <repeat name="additional_annotations" title="Additional annotation" help="-A,--annotation &amp;lt;annotation&amp;gt;">
156 <param name="additional_annotation_name" type="text" value="" label="Annotation name" />
157 </repeat>
158 <!--
159 <conditional name="snpEff_rod_bind_type">
160 <param name="snpEff_rod_bind_type_selector" type="select" label="Provide a snpEff reference-ordered data file">
161 <option value="set_snpEff">Set snpEff</option>
162 <option value="exclude_snpEff" selected="True">Don't set snpEff</option>
163 </param>
164 <when value="exclude_snpEff">
165 </when>
166 <when value="set_snpEff">
167 <param name="snpEff_input_rod" type="data" format="vcf" label="ROD file" />
168 <param name="snpEff_rod_name" type="hidden" value="snpEff" label="ROD Name"/>
169 </when>
170 </conditional>
171 -->
172 <param name="group" type="select" multiple="True" display="checkboxes" label="Annotation Interfaces/Groups" help="-G,--group &amp;lt;group&amp;gt;">
173 <option value="RodRequiringAnnotation">RodRequiringAnnotation</option>
174 <option value="Standard">Standard</option>
175 <option value="Experimental">Experimental</option>
176 <option value="WorkInProgress">WorkInProgress</option>
177 <option value="RankSumTest">RankSumTest</option>
178 <!-- <option value="none">none</option> -->
179 </param>
180 <!-- <param name="family_string" type="text" value="" label="Family String"/> -->
181 <param name="exclude_annotations" type="select" multiple="True" display="checkboxes" label="Annotations to exclude" help="-XA,--excludeAnnotation &amp;lt;excludeAnnotation&amp;gt;" >
182 <!-- load the available annotations from an external configuration file, since additional ones can be added to local installs -->
183 <options from_data_table="gatk2_annotations">
184 <filter type="multiple_splitter" column="tools_valid_for" separator=","/>
185 <filter type="static_value" value="HaplotypeCaller" column="tools_valid_for"/>
186 </options>
187 </param>
188
189 <param name="comp" type="data" format="vcf" optional="true" label="comp" help="--comp / -comp comparison VCF file"/>
190 <param name="contamination_fraction_to_filter" type="float" value="0.05" optional="true" label="contamination_fraction_to_filter" help="--contamination_fraction_to_filter / -contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove">
191 <validator type="in_range" message="value between 0.00 and 1.00" min="0" max="1"/>
192 </param>
193 <param name="debug" type="boolean" checked="False" truevalue="-debug" falsevalue="" label="debug" help="--debug / -debug If specified, print out very verbose debug information about each triggering active region"/>
194
195 <conditional name="genotyping_mode_type">
196 <param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping" help="-gt_mode,--genotyping_mode &amp;lt;genotyping_mode&amp;gt;">
197 <option value="DISCOVERY" selected="True">DISCOVERY</option>
198 <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option>
199 </param>
200 <when value="DISCOVERY">
201 <!-- Do nothing here -->
202 </when>
203 <when value="GENOTYPE_GIVEN_ALLELES">
204 <param name="input_alleles_rod" type="data" format="vcf" label="Alleles ROD file" help="-alleles,--alleles &amp;lt;alleles&amp;gt;" />
205 </when>
206 </conditional>
207 <param name="graphOutput" type="boolean" checked="False" truevalue="" falsevalue="" label="graphOutput" help="--graphOutput / -graph File to which debug assembly graph information should be written"/>
208 <param name="heterozygosity" type="float" value="0.0010" optional="true" label="heterozygosity" help="--heterozygosity / -hets Heterozygosity value used to compute prior likelihoods for any locus"/>
209 <param name="minPruning" type="integer" value="1" optional="true" label="minPruning" help="--minPruning / -minPruning The minimum allowed pruning factor in assembly graph. Paths with &gt;= X supporting kmers are pruned from the graph">
210 <validator type="in_range" message="value between 0 and 127" min="0" max="127"/>
211 </param>
212 <!-- http://www.broadinstitute.org/gatk/guide/article?id=2940 -->
213 <param name="emitRefConfidence" type="select" optional="true" label="Output confidence estimates" help="Emitting a per-bp or summarized confidence estimate for a site being strictly homozygous-reference (--emitRefConfidence)">
214 <option value="NONE" selected="True">don't emit anything</option>
215 <option value="BP_RESOLUTION">BP_RESOLUTION (emit detailed information for each BP)</option>
216 <option value="GVCF">GVCF (emit a block summarized version of the BP_RESOLUTION data)</option>
217 </param>
218 <param name="pair_hmm_implementation" type="select" optional="true" label="pair_hmm_implementation" help="--pair_hmm_implementation / -pairHMM The PairHMM implementation to use for genotype likelihood calculations">
219 <option value="EXACT">EXACT</option>
220 <option value="ORIGINAL">ORIGINAL</option>
221 <option value="CACHING">CACHING</option>
222 <option value="LOGLESS_CACHING" selected="True">LOGLESS_CACHING</option>
223 </param>
224 <param name="standard_min_confidence_threshold_for_calling" type="float" value="30.0" optional="true" label="standard_min_confidence_threshold_for_calling" help="--standard_min_confidence_threshold_for_calling / -stand_call_conf The minimum phred-scaled confidence threshold at which variants should be called"/>
225 <param name="standard_min_confidence_threshold_for_emitting" type="float" value="30.0" optional="true" label="standard_min_confidence_threshold_for_emitting" help="--standard_min_confidence_threshold_for_emitting / -stand_emit_conf The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)"/>
226 <param name="useAllelesTrigger" type="boolean" checked="False" truevalue="-allelesTrigger" falsevalue="" label="useAllelesTrigger" help="--useAllelesTrigger / -allelesTrigger If specified, use additional trigger on variants found in an external alleles file"/>
227 <param name="fullHaplotype" type="boolean" checked="False" truevalue="-fullHaplotype" falsevalue="" label="fullHaplotype" help="--fullHaplotype / -fullHaplotype If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference"/>
228 <param name="gcpHMM" type="integer" value="10" optional="true" label="gcpHMM" help="--gcpHMM / -gcpHMM Flat gap continuation penalty for use in the Pair HMM"/>
229 <param name="genotypeFullActiveRegion" type="boolean" checked="False" truevalue="-genotypeFullActiveRegion" falsevalue="" label="genotypeFullActiveRegion" help="--genotypeFullActiveRegion / -genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping"/>
230 <param name="max_alternate_alleles" type="integer" value="6" optional="true" label="max_alternate_alleles" help="--max_alternate_alleles / -maxAltAlleles Maximum number of alternate alleles to genotype"/>
231 </when>
232 </conditional>
233 </inputs>
234 <outputs>
235 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (VCF)" />
236 <data format="vcf" name="graph_out" label="${tool.name} on ${on_string} graph" >
237 <filter>analysis_param_type['analysis_param_type_selector'] == "advanced" and analysis_param_type['graphOutput'] == True</filter>
238 </data>
239 <data format="vcf" name="active_region_out" label="${tool.name} on ${on_string} activeRegion" >
240 <filter>analysis_param_type['analysis_param_type_selector'] == "advanced" and analysis_param_type['activeRegionOut'] == True</filter>
241 </data>
242 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
243 </outputs>
244 <tests>
245 <test>
246 <param name="input_recal" value="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" ftype="csv" />
247 <param name="reference_source_selector" value="history" />
248 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
249 <param name="input_bam" value="gatk/gatk_indel_realigner/gatk_indel_realigner_out_1.bam" ftype="bam" />
250 <param name="gatk_param_type_selector" value="basic" />
251 <param name="analysis_param_type_selector" value="basic" />
252 <output name="output_bam" file="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" lines_diff="4" />
253 <output name="output_log" file="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.log.contains" compare="contains" />
254 </test>
255 </tests>
256 <help>
257 **What it does**
258
259 **HaplotypeCaller**
260 calls SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region.
261 Haplotypes are evaluated using an affine gap penalty Pair HMM.
262
263 For more information on using read based compression in the GATK, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_haplotypecaller_HaplotypeCaller.html&gt;`_.
264
265 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
266
267 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
268
269 ------
270
271 **Inputs**
272
273 GenomeAnalysisTK: PrintReads accepts aligned BAM files.
274
275
276 **Outputs**
277
278 The output is a VCF file with raw, unrecalibrated SNP and indel calls.
279
280
281 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
282
283 -------
284
285 **Settings**::
286
287 activeRegionIn Use this interval list file as the active regions to process
288 activeRegionOut Output the active region to this interval list file
289 alleles The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES
290 annotation One or more specific annotations to apply to variant calls
291 comp comparison VCF file
292 contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove
293 dbsnp dbSNP file
294 debug If specified, print out very verbose debug information about each triggering active region
295 excludeAnnotation One or more specific annotations to exclude
296 genotyping_mode Specifies how to determine the alternate alleles to use for genotyping
297 graphOutput File to which debug assembly graph information should be written
298 group One or more classes/groups of annotations to apply to variant calls
299 heterozygosity Heterozygosity value used to compute prior likelihoods for any locus
300 minPruning The minimum allowed pruning factor in assembly graph. Paths with less than or equal supporting kmers are pruned from the graph
301 pair_hmm_implementation The PairHMM implementation to use for genotype likelihood calculations
302 stand_call_conf The minimum phred-scaled confidence threshold at which variants should be called
303 stand_emit_conf The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)
304 useAllelesTrigger If specified, use additional trigger on variants found in an external alleles file
305 fullHaplotype If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference
306 gcpHMM Flat gap continuation penalty for use in the Pair HMM
307 genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping
308 max_alternate_alleles Maximum number of alternate alleles to genotype
309
310 @CITATION_SECTION@
311 </help>
312 <expand macro="citations" />
313 </tool>