comparison base_recalibrator.xml @ 6:35c00763cb5c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gatk2 commit cf399638ebca4250bcc15f468238a9964de97b33
author iuc
date Mon, 04 Jun 2018 05:38:15 -0400
parents f244b8209eb8
children
comparison
equal deleted inserted replaced
5:84584664264c 6:35c00763cb5c
1 <tool id="gatk2_base_recalibrator" name="Base Recalibrator" version="@VERSION@.0"> 1 <tool id="gatk2_base_recalibrator" name="Base Recalibrator" version="@VERSION@.0">
2 <description>calculates covariates used to recalibrate base quality scores of reads</description> 2 <description>calculates covariates used to recalibrate base quality scores of reads</description>
3 <expand macro="requirements" />
4 <macros> 3 <macros>
5 <import>gatk2_macros.xml</import> 4 <import>gatk2_macros.xml</import>
6 </macros> 5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_command" />
7 <command interpreter="python"> 8 <command interpreter="python">
8 gatk2_wrapper.py 9 gatk2_wrapper.py
9 --stdout "${output_log}" 10 --stdout "${output_log}"
10 -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" 11 -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
11 #if str( $reference_source.input_bam.metadata.bam_index ) != "None": 12 #if str( $reference_source.input_bam.metadata.bam_index ) != "None":
34 #for $cov in str( $covariates ).split( ',' ): 35 #for $cov in str( $covariates ).split( ',' ):
35 -cov "${cov}" 36 -cov "${cov}"
36 #end for 37 #end for
37 #end if 38 #end if
38 ' 39 '
39 40
40 #set $snp_dataset_provided = False 41 #set $snp_dataset_provided = False
41 #set $rod_binding_names = dict() 42 #set $rod_binding_names = dict()
42 #for $rod_binding in $rod_bind: 43 #for $rod_binding in $rod_bind:
43 #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': 44 #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
44 #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name 45 #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
49 #set $snp_dataset_provided = True 50 #set $snp_dataset_provided = True
50 #end if 51 #end if
51 #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 52 #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
52 -d "--knownSites:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" 53 -d "--knownSites:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
53 #end for 54 #end for
54 55
55 #include source=$standard_gatk_options# 56 #include source=$standard_gatk_options#
56 57
57 ##start analysis specific options 58 ##start analysis specific options
58 #if $analysis_param_type.analysis_param_type_selector == "advanced": 59 #if $analysis_param_type.analysis_param_type_selector == "advanced":
59 -p ' 60 -p '
60 #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set": 61 #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set":
61 --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}" 62 --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}"
70 --force_platform "${analysis_param_type.force_platform}" 71 --force_platform "${analysis_param_type.force_platform}"
71 #end if 72 #end if
72 ${analysis_param_type.exception_if_no_tile} 73 ${analysis_param_type.exception_if_no_tile}
73 #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set": 74 #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set":
74 #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default": 75 #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default":
75 --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}" 76 --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}"
76 #end if 77 #end if
77 #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default": 78 #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default":
78 --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}" 79 --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}"
79 #end if 80 #end if
80 #end if 81 #end if
81 --window_size_nqs "${analysis_param_type.window_size_nqs}" 82 --window_size_nqs "${analysis_param_type.window_size_nqs}"
82 --homopolymer_nback "${analysis_param_type.homopolymer_nback}" 83 --homopolymer_nback "${analysis_param_type.homopolymer_nback}"
83 ' 84 '
118 <option value="CycleCovariate" selected="true"/> 119 <option value="CycleCovariate" selected="true"/>
119 <option value="RepeatLengthCovariate" /> 120 <option value="RepeatLengthCovariate" />
120 <option value="RepeatUnitCovariate" /> 121 <option value="RepeatUnitCovariate" />
121 <option value="RepeatUnitAndLengthCovariate" /> 122 <option value="RepeatUnitAndLengthCovariate" />
122 <!-- 123 <!--
123 Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will 124 Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will
124 be added for the user regardless of whether or not they were specified. 125 be added for the user regardless of whether or not they were specified.
125 <option value="QualityScoreCovariate" /> 126 <option value="QualityScoreCovariate" />
126 <option value="ReadGroupCovariate" /> 127 <option value="ReadGroupCovariate" />
127 --> 128 -->
128 </param> 129 </param>
129 130
130 <repeat name="rod_bind" title="Known Variants" help="Using data sets of known variants (-knownSites,--knownSites &amp;lt;knownSites&amp;gt;)"> 131 <repeat name="rod_bind" title="Known Variants" help="Using data sets of known variants (-knownSites,--knownSites &amp;lt;knownSites&amp;gt;)">
131 <conditional name="rod_bind_type"> 132 <conditional name="rod_bind_type">
132 <param name="rod_bind_type_selector" type="select" label="Variant Type"> 133 <param name="rod_bind_type_selector" type="select" label="Variant Type">
133 <option value="dbsnp" selected="True">dbSNP</option> 134 <option value="dbsnp" selected="True">dbSNP</option>
134 <option value="snps">SNPs</option> 135 <option value="snps">SNPs</option>
154 </when> 155 </when>
155 </conditional> 156 </conditional>
156 </repeat> 157 </repeat>
157 158
158 <expand macro="gatk_param_type_conditional" /> 159 <expand macro="gatk_param_type_conditional" />
159 160
160 <conditional name="analysis_param_type"> 161 <conditional name="analysis_param_type">
161 <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options"> 162 <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
162 <option value="basic" selected="True">Basic</option> 163 <option value="basic" selected="True">Basic</option>
163 <option value="advanced">Advanced</option> 164 <option value="advanced">Advanced</option>
164 </param> 165 </param>
245 <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" /> 246 <param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
246 <param name="standard_covs" value="True" /> 247 <param name="standard_covs" value="True" />
247 <param name="covariates" value="ReadGroupCovariate,HomopolymerCovariate,MinimumNQSCovariate,PositionCovariate" /> 248 <param name="covariates" value="ReadGroupCovariate,HomopolymerCovariate,MinimumNQSCovariate,PositionCovariate" />
248 <param name="gatk_param_type_selector" value="basic" /> 249 <param name="gatk_param_type_selector" value="basic" />
249 <param name="analysis_param_type_selector" value="basic" /> 250 <param name="analysis_param_type_selector" value="basic" />
250 <output name="output_recal" file="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" /> 251 <output name="output_recal" file="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" />
251 <output name="output_log" file="gatk/gatk_count_covariates/gatk_count_covariates_out_1.log.contains" compare="contains" /> 252 <output name="output_log" file="gatk/gatk_count_covariates/gatk_count_covariates_out_1.log.contains" compare="contains" />
252 </test> 253 </test>
253 </tests> 254 </tests>
254 <help> 255 <help>
255 .. class:: warningmark 256 .. class:: warningmark
256 257
257 "This calculation is critically dependent on being able to skip over known variant sites. Please provide a dbSNP ROD or a VCF file containing known sites of genetic variation." 258 "This calculation is critically dependent on being able to skip over known variant sites. Please provide a dbSNP ROD or a VCF file containing known sites of genetic variation."
258 However, if you do not provide this file, the '--run_without_dbsnp_potentially_ruining_quality' flag will be automatically used, and the command will be allowed to run. 259 However, if you do not provide this file, the '--run_without_dbsnp_potentially_ruining_quality' flag will be automatically used, and the command will be allowed to run.
259 260
260 **What it does** 261 **What it does**
261 262
262 This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative of poor base quality. This walker generates tables based on various user-specified covariates (such as read group, reported quality score, cycle, and dinucleotide) Since there is a large amount of data one can then calculate an empirical probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations The output file is a CSV list of (the several covariate values, num observations, num mismatches, empirical quality score) The first non-comment line of the output file gives the name of the covariates that were used for this calculation. Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will be added for the user regardless of whether or not they were specified Note: This walker is designed to be used in conjunction with TableRecalibrationWalker. 263 This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative of poor base quality. This walker generates tables based on various user-specified covariates (such as read group, reported quality score, cycle, and dinucleotide) Since there is a large amount of data one can then calculate an empirical probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations The output file is a CSV list of (the several covariate values, num observations, num mismatches, empirical quality score) The first non-comment line of the output file gives the name of the covariates that were used for this calculation. Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will be added for the user regardless of whether or not they were specified Note: This walker is designed to be used in conjunction with TableRecalibrationWalker.
263 264
264 For more information on base quality score recalibration using the GATK, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_bqsr_BaseRecalibrator.html&gt;`_. 265 For more information on base quality score recalibration using the GATK, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_bqsr_BaseRecalibrator.html&gt;`_.