Repository 'gatk2'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/gatk2

Changeset 2:8bcc13094767 (2014-01-18)
Previous changeset 1:f760c0de8e3a (2013-12-02) Next changeset 3:2553f84b8174 (2014-02-19)
Commit message:
Uploaded
modified:
base_recalibrator.xml
gatk2_macros.xml
gatk2_picard_index.loc.sample
haplotype_caller.xml
indel_realigner.xml
print_reads.xml
readme.rst
reduce_reads.xml
unified_genotyper.xml
variant_annotator.xml
variant_eval.xml
variant_filtration.xml
variant_validate.xml
b
diff -r f760c0de8e3a -r 8bcc13094767 base_recalibrator.xml
--- a/base_recalibrator.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/base_recalibrator.xml Sat Jan 18 07:21:33 2014 -0500
b
@@ -17,9 +17,8 @@
     \$GATK2_SITE_OPTIONS
 
     ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
-    --num_cpu_threads_per_data_thread 8
+    --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-8}
 
-    @THREADS@
     ## we set non standards at every run and the user can choose which ones are preferred
     ## in our select box both standard options (ContextCovariate, CycleCovariate) are selected by default
     --no_standard_covs
@@ -111,12 +110,7 @@
         </param>
       </when>
     </conditional>
-    <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="-BQSR,--BQSR &amp;lt;recal_file&amp;gt;" >
-      <help>The input covariates table file which enables on-the-fly base quality score recalibration.
-            Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
-            Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-      </help>
-    </param>
+    <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &amp;lt;recal_file&amp;gt;)" />
 
     <param name="covariates" type="select" multiple="True" display="checkboxes" label="Covariates to be used in the recalibration" help="-cov,--covariate &amp;lt;covariate&amp;gt;" >
       <!-- might we want to load the available covariates from an external configuration file, since additional ones can be added to local installs? -->
b
diff -r f760c0de8e3a -r 8bcc13094767 gatk2_macros.xml
--- a/gatk2_macros.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/gatk2_macros.xml Sat Jan 18 07:21:33 2014 -0500
b
@@ -1,17 +1,23 @@
 <macros>
-    <xml name="requirements">
-        <requirements>
-            <requirement type="package" version="0.1.19">samtools</requirement>
-            <requirement type="set_environment">GATK2_PATH</requirement>
-            <requirement type="set_environment">GATK2_SITE_OPTIONS</requirement>
-        </requirements>
-    </xml>
-    <token name="@THREADS@">
-        --num_threads \${GALAXY_SLOTS:-4}
-    </token>
-    <token name="@JAR_PATH@">
-        java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar"
-    </token>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package">gatk2</requirement>
+      <requirement type="package" version="0.1.19">samtools</requirement>
+      <requirement type="set_environment">GATK2_PATH</requirement>
+      <requirement type="set_environment">GATK2_SITE_OPTIONS</requirement>
+    </requirements>
+  </xml>
+  <token name="@THREADS@">
+    --num_threads \${GALAXY_SLOTS:-4}
+  </token>
+  <token name="@JAR_PATH@">
+    java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar"
+  </token>
+  <token name="@DBSNP_OPTIONS@">
+    #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
+        -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
+    #end if
+  </token>
   <template name="standard_gatk_options">
     ##start standard gatk options
     #if $gatk_param_type.gatk_param_type_selector == "advanced":
@@ -311,6 +317,21 @@
       <option value="history">History</option>
     </param>
   </xml>
+  <xml name="dbsnp_param">
+    <conditional name="dbsnp_rod_bind_type">
+      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP Reference-Ordered Data (ROD) file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
+        <option value="set_dbsnp" selected="True">Set dbSNP</option>
+        <option value="exclude_dbsnp">Don't set dbSNP</option>
+      </param>
+      <when value="exclude_dbsnp" />
+      <when value="set_dbsnp">
+        <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" />
+        <param name="dbsnp_rod_name" type="text" value="dbsnp" label="dbsnp ROD name">
+          <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
+        </param>
+      </when>
+    </conditional>
+  </xml>
   <token name="@CITATION_SECTION@">------
 
 **Citation**
b
diff -r f760c0de8e3a -r 8bcc13094767 gatk2_picard_index.loc.sample
--- a/gatk2_picard_index.loc.sample Mon Dec 02 10:36:02 2013 -0500
+++ b/gatk2_picard_index.loc.sample Sat Jan 18 07:21:33 2014 -0500
b
@@ -24,7 +24,3 @@
 #the dict file does not have the .fa extension although the
 #path list in the loc file does include it.
 #
-hg18 hg18 hg18 /data/galaxy/ext-tool-data/picard/hg18.fa
-hg19 hg19 hg19 /data/galaxy/ext-tool-data/picard/hg19.fa
-mm8 mm8 mm8 /data/galaxy/ext-tool-data/picard/mm8.fa
-mm9 mm9 mm9 /data/galaxy/ext-tool-data/picard/mm9.fa
b
diff -r f760c0de8e3a -r 8bcc13094767 haplotype_caller.xml
--- a/haplotype_caller.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/haplotype_caller.xml Sat Jan 18 07:21:33 2014 -0500
b
b'@@ -7,10 +7,12 @@\n   <command interpreter="python">\n     gatk2_wrapper.py\n     --stdout "${output_log}"\n-    -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"\n-    #if str( $reference_source.input_bam.metadata.bam_index ) != "None":\n-        -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index\n-    #end if\n+    #for $i, $input_bam in enumerate( $reference_source.input_bams ):\n+        -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"\n+        #if str( $input_bam.input_bam.metadata.bam_index ) != "None":\n+            -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index\n+        #end if\n+    #end for\n     -p \'\n     @JAR_PATH@\n     -T "HaplotypeCaller"\n@@ -18,7 +20,7 @@\n \n     \\$GATK2_SITE_OPTIONS\n \n-    @THREADS@\n+    --num_cpu_threads_per_data_thread \\${GALAXY_SLOTS:-4}\n \n     #if $reference_source.reference_source_selector != "history":\n         -R "${reference_source.ref_file.fields.path}"\n@@ -27,14 +29,12 @@\n         --BQSR "${input_recal}"\n     #end if\n    \'\n+    @DBSNP_OPTIONS@\n     #include source=$standard_gatk_options#\n     \n     ##start analysis specific options\n     #if $analysis_param_type.analysis_param_type_selector == "advanced":\n         -p \'\n-        #if $analysis_param_type.p_nonref_model.__str__ != "None" and len($analysis_param_type.p_nonref_model.__str__) > 0:\n-          --p_nonref_model $analysis_param_type.p_nonref_model\n-        #end if\n         #if $analysis_param_type.heterozygosity.__str__.strip() != \'\':\n             --heterozygosity $analysis_param_type.heterozygosity\n         #end if\n@@ -42,8 +42,8 @@\n         #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == \'GENOTYPE_GIVEN_ALLELES\':\n             --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}"\n         #end if\n-        #if $analysis_param_type.output_mode.__str__ != "None" and len($analysis_param_type.output_mode.__str__) > 0:\n-          --output_mode $analysis_param_type.output_mode\n+        #if not $analysis_param_type.emitRefConfidence is None:\n+          --emitRefConfidence $analysis_param_type.emitRefConfidence\n         #end if\n \n         ## files\n@@ -53,9 +53,6 @@\n         #if str($analysis_param_type.comp) != \'None\':\n             --comp "$analysis_param_type.comp"\n         #end if\n-        #if str($analysis_param_type.dbsnp) != \'None\':\n-            --dbsnp "$analysis_param_type.dbsnp"\n-        #end if\n         ##  \n         #if str( $analysis_param_type.annotation ) != "None":\n             #for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( \',\'):\n@@ -80,9 +77,6 @@\n         #if $analysis_param_type.contamination_fraction_to_filter.__str__.strip() != \'\':\n             --contamination_fraction_to_filter $analysis_param_type.contamination_fraction_to_filter\n         #end if\n-        #if $analysis_param_type.downsampleRegion.__str__.strip() != \'\':\n-            --downsampleRegion $analysis_param_type.downsampleRegion\n-        #end if\n         #if $analysis_param_type.minPruning.__str__.strip() != \'\':\n             --minPruning $analysis_param_type.minPruning\n         #end if\n@@ -99,9 +93,7 @@\n             --max_alternate_alleles $analysis_param_type.max_alternate_alleles\n         #end if\n         ## mode selections\n-        #if $analysis_param_type.genotyping_mode.__str__ != "None" and len($analysis_param_type.genotyping_mode.__str__) > 0:\n-          --genotyping_mode $analysis_param_type.genotyping_mode\n-        #end if\n+\n         #if $analysis_param_type.pair_hmm_implementation.__str__ != "None" and len($analysis_param_type.pair_hmm_implementation.__str__) > 0:\n           --pair_hmm_implementation $analysis_param_type.pair_hmm_implementation\n         #end if\n@@ -121,35 +113,31 @@\n     #end if\n   </command>\n   <inputs>\n-    <param name="input_recal" type="data" format='..b'ed="True">don\'t emit anything</option>\n+              <option value="BP_RESOLUTION">BP_RESOLUTION (emit detailed information for each BP)</option>\n+              <option value="GVCF">GVCF (emit a block summarized version of the BP_RESOLUTION data)</option>\n         </param>\n         <param name="pair_hmm_implementation" type="select" optional="true" label="pair_hmm_implementation" help="--pair_hmm_implementation / -pairHMM  The PairHMM implementation to use for genotype likelihood calculations">\n               <option value="EXACT">EXACT</option>\n@@ -252,13 +239,6 @@\n         <param name="gcpHMM" type="integer" value="10" optional="true" label="gcpHMM" help="--gcpHMM / -gcpHMM  Flat gap continuation penalty for use in the Pair HMM"/>\n         <param name="genotypeFullActiveRegion" type="boolean" checked="False" truevalue="-genotypeFullActiveRegion" falsevalue="" label="genotypeFullActiveRegion" help="--genotypeFullActiveRegion / -genotypeFullActiveRegion  If specified, alternate alleles are considered to be the full active region for the purposes of genotyping"/>\n         <param name="max_alternate_alleles" type="integer" value="6" optional="true" label="max_alternate_alleles" help="--max_alternate_alleles / -maxAltAlleles  Maximum number of alternate alleles to genotype"/>\n-        <param name="p_nonref_model" type="select" optional="true" label="p_nonref_model" help="--p_nonref_model / -pnrm  Non-reference probability calculation model to employ">\n-              <option value="EXACT_INDEPENDENT" selected="True">EXACT_INDEPENDENT experimental implementation - for testing only</option>\n-              <option value="EXACT_REFERENCE">EXACT_REFERENCE reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles</option>\n-              <option value="EXACT_ORIGINAL">EXACT_ORIGINAL original biallelic exact model, for testing only</option>\n-              <option value="EXACT_GENERAL_PLOIDY">implementation that supports any sample ploidy</option>\n-        </param>\n-\n       </when>\n     </conditional>\n   </inputs>\n@@ -323,14 +303,12 @@\n  contamination               Fraction of contamination in sequencing data (for all samples) to aggressively remove\n  dbsnp                       dbSNP file\n  debug                       If specified, print out very verbose debug information about each triggering active region\n- downsampleRegion            coverage, per-sample, to downsample each active region to\n  excludeAnnotation           One or more specific annotations to exclude\n  genotyping_mode             Specifies how to determine the alternate alleles to use for genotyping\n  graphOutput                 File to which debug assembly graph information should be written\n  group                       One or more classes/groups of annotations to apply to variant calls\n  heterozygosity              Heterozygosity value used to compute prior likelihoods for any locus\n  minPruning                  The minimum allowed pruning factor in assembly graph. Paths with less than or equal supporting kmers are pruned from the graph\n- output_mode                 Specifies which type of calls we should output\n  pair_hmm_implementation     The PairHMM implementation to use for genotype likelihood calculations\n  stand_call_conf             The minimum phred-scaled confidence threshold at which variants should be called\n  stand_emit_conf             The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)\n@@ -339,9 +317,6 @@\n  gcpHMM                      Flat gap continuation penalty for use in the Pair HMM\n  genotypeFullActiveRegion    If specified, alternate alleles are considered to be the full active region for the purposes of genotyping\n  max_alternate_alleles       Maximum number of alternate alleles to genotype\n- p_nonref_model              Non-reference probability calculation model to employ\n-\n-------\n \n @CITATION_SECTION@\n   </help>\n'
b
diff -r f760c0de8e3a -r 8bcc13094767 indel_realigner.xml
--- a/indel_realigner.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/indel_realigner.xml Sat Jan 18 07:21:33 2014 -0500
b
@@ -21,8 +21,6 @@
     ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
     --num_cpu_threads_per_data_thread 1
 
-    @THREADS@
-
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
     #end if
b
diff -r f760c0de8e3a -r 8bcc13094767 print_reads.xml
--- a/print_reads.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/print_reads.xml Sat Jan 18 07:21:33 2014 -0500
b
@@ -18,9 +18,8 @@
     \$GATK2_SITE_OPTIONS
 
     ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
-    --num_cpu_threads_per_data_thread 8
+    --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-6}
 
-    @THREADS@
 
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
b
diff -r f760c0de8e3a -r 8bcc13094767 readme.rst
--- a/readme.rst Mon Dec 02 10:36:02 2013 -0500
+++ b/readme.rst Sat Jan 18 07:21:33 2014 -0500
b
@@ -13,7 +13,6 @@
 http://www.broadinstitute.org/gatk
 http://www.broadinstitute.org/gatk/about/citing-gatk
 
-
 GATK is Free for academics, and fee for commercial use. Please study the GATK licensing website:
 http://www.broadinstitute.org/gatk/about/#licensing
 
@@ -23,35 +22,42 @@
 
 The recommended installation is by means of the toolshed_.
 
-.. _toolshed: http://toolshed.g2.bx.psu.edu/view/bjoern-gruening/augustus
+.. _toolshed: http://toolshed.g2.bx.psu.edu/view/iuc/gatk2
 
-Galaxy should be able to automatically install samtools dependencies automatically
+Galaxy should be able to install samtools dependencies automatically
 for you. GATK2, and its new licence model, does not allow us to distribute the GATK binaries.
-As a consequence you need to install GATK2 by your own, please see the GATK website for more informations:
+As a consequence you need to install GATK2 by your own, please see the GATK website for more information:
 
 http://www.broadinstitute.org/gatk/download
 
-Once you have installed GATK2 you need to edit the env.sh file that is installed with these wrappers.
-You will find this env.sh file under:
+Once you have installed GATK2, you need to edit the env.sh files that are installed together with the wrappers.
+You must edit the GATK2_PATH environment variable in the file:
 
-<tool_dependency_dir>/gatk2/<version>/iuc/<hash_string>/env.sh
+<tool_dependency_dir>/environment_settings/GATK2_PATH/iuc/gatk2/<hash_string>/env.sh
+
+to point to the folder where you have installed GATK2.
+
+Optionally, you may also want to edit the GATK2_SITE_OPTIONS environment variable in the file:
 
-You should edit the GATK2_PATH environment variable to point to the folder you have installed GATK2
-and if you want to deactivate the 'call home feature' from GATK you can set
+<tool_dependency_dir>/environment_settings/GATK2_SITE_OPTIONS/iuc/gatk2/<hash_string>/env.sh
 
-GATK2_SITE_OPTIONS='-et "NO_ET" -K "/data/gatk2_key_file"'
+to deactivate the 'call home feature' of GATK with something like:
 
-GATK2_SITE_OPTIONS can be used to insert specific options into every GATK2 wrapper 
-during runtime, without changing the actuall wrapper.
+GATK2_SITE_OPTIONS='-et NO_ET -K /data/gatk2_key_file'
 
-Read more about the "Phone Home" problem under:
+GATK2_SITE_OPTIONS can be also used to insert other specific options into every GATK2 wrapper
+at runtime, without changing the actual wrapper.
+
+Read more about the "Phone Home" problem at:
 http://www.broadinstitute.org/gatk/guide/article?id=1250
 
+Optionally, you may also want to add some commands to be executed before GATK (e.g. to load modules) to the file:
+
+<tool_dependency_dir>/gatk2/default/env.sh
 
 Finally, you should fill in additional information about your genomes and 
 annotations in the gatk2_picard_index.loc and gatk2_annotations.txt. 
-You can find them under ./tool-data/.
-
+You can find them in the tool-data/ Galaxy directory.
 
 
 History
@@ -80,5 +86,3 @@
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
-
-
b
diff -r f760c0de8e3a -r 8bcc13094767 reduce_reads.xml
--- a/reduce_reads.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/reduce_reads.xml Sat Jan 18 07:21:33 2014 -0500
b
@@ -21,8 +21,6 @@
     ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
     --num_cpu_threads_per_data_thread 1
 
-    @THREADS@
-
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
     #end if
@@ -68,12 +66,7 @@
     #end if
   </command>
   <inputs>
-    <param name="input_recal" type="data" format="csv" optional="true" label="Covariates table recalibration file" help="-BQSR,--BQSR &amp;lt;recal_file&amp;gt;" >
-      <help>The input covariates table file which enables on-the-fly base quality score recalibration. 
-            Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. 
-            Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-      </help>
-    </param>
+    <param name="input_recal" type="data" format="csv" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &amp;lt;recal_file&amp;gt;)" />
     <conditional name="reference_source">
       <expand macro="reference_source_selector_param" />
       <when value="cached">
@@ -228,8 +221,6 @@
  -noclip_ad / --dont_hardclip_adaptor_sequences ( boolean with default value false )
  Do not hard clip adaptor sequences. Note: You don't have to turn this on for reads that are not mate paired. The program will behave correctly in those cases.
 
-------
-
 @CITATION_SECTION@
   </help>
 </tool>
b
diff -r f760c0de8e3a -r 8bcc13094767 unified_genotyper.xml
--- a/unified_genotyper.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/unified_genotyper.xml Sat Jan 18 07:21:33 2014 -0500
[
b'@@ -31,22 +31,12 @@\n     --standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}"\n     --standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}"\n    \'\n-    #set $rod_binding_names = dict()\n-    #for $rod_binding in $rod_bind:\n-        #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == \'custom\':\n-            #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name\n-        #else\n-            #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector\n-        #end if\n-        #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1\n-        -d "--dbsnp:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"\n-    #end for\n-   \n+    @DBSNP_OPTIONS@\n+\n     #include source=$standard_gatk_options#\n     ##start analysis specific options\n     #if $analysis_param_type.analysis_param_type_selector == "advanced":\n         -p \'\n-        --p_nonref_model "${analysis_param_type.p_nonref_model}"\n         --heterozygosity "${analysis_param_type.heterozygosity}"\n         --pcr_error_rate "${analysis_param_type.pcr_error_rate}"\n         --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}"\n@@ -120,30 +110,7 @@\n         <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />\n       </when>\n     </conditional>\n-    \n-    <repeat name="rod_bind" title="Binding for reference-ordered data" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">\n-        <conditional name="rod_bind_type">\n-          <param name="rod_bind_type_selector" type="select" label="Binding Type">\n-            <option value="dbsnp" selected="True">dbSNP</option>\n-            <option value="snps">SNPs</option>\n-            <option value="indels">INDELs</option>\n-            <option value="custom">Custom</option>\n-          </param>\n-          <when value="dbsnp">\n-              <param name="input_rod" type="data" format="vcf" label="ROD file" />\n-          </when>\n-          <when value="snps">\n-              <param name="input_rod" type="data" format="vcf" label="ROD file" />\n-          </when>\n-          <when value="indels">\n-              <param name="input_rod" type="data" format="vcf" label="ROD file" />\n-          </when>\n-          <when value="custom">\n-              <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/>\n-              <param name="input_rod" type="data" format="vcf" label="ROD file" />\n-          </when>\n-        </conditional>\n-    </repeat>\n+    <expand macro="dbsnp_param" />\n     \n     <param name="genotype_likelihoods_model" type="select" label="Genotype likelihoods calculation model to employ" help="-glm,--genotype_likelihoods_model &amp;lt;genotype_likelihoods_model&amp;gt;">\n       <option value="BOTH" selected="True">BOTH</option>\n@@ -158,10 +125,6 @@\n     <expand macro="gatk_param_type_conditional" />\n     \n     <expand macro="analysis_type_conditional">\n-        <param name="p_nonref_model" type="select" label="Non-reference probability calculation model to employ" help="-pnrm,--p_nonref_model &amp;lt;p_nonref_model&amp;gt;">\n-          <option value="EXACT_GENERAL_PLOIDY" selected="True">EXACT_GENERAL_PLOIDY (supports any sample ploidy)</option>\n-          <option value="EXACT_REFERENCE">EXACT_REFERENCE (multi-allelic EXACT model.  Extremely slow for many alternate alleles)</option>\n-        </param>\n         <param name="heterozygosity" type="float" value="1e-3" label="Heterozygosity value used to compute prior likelihoods for any locus" help="-hets,--heterozygosity &amp;lt;heterozygosity&amp;gt;" />\n         <param name="pcr_error_rate" type="float" value="1e-4" label="The PCR error rate to be used for computing fragment-based likelihoods" help'..b';lt;min_indel_count_for_genotyping&amp;gt;" />\n         <param name="indel_heterozygosity" type="float" value="0.000125" label="Heterozygosity for indel calling" help="1.0/8000==0.000125 (-indelHeterozygosity,--indel_heterozygosity &amp;lt;indel_heterozygosity&amp;gt;)"/>\n-        <param name="indelGapContinuationPenalty" type="integer" value="10" label="Indel gap continuation penalty" help="--indelGapContinuationPenalty">\n-          <help>Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.</help>\n-          <validator type="in_range" message="value between 0 and 255" min="0" max="255"/> \n+        <param name="indelGapContinuationPenalty" type="integer" value="10" label="Indel gap continuation penalty" help="As Phred-scaled probability, i.e. 30 => 10^-30/10 (--indelGapContinuationPenalty)">\n+          <validator type="in_range" message="value between 0 and 255" min="0" max="255" />\n         </param>\n-        <param name="indelGapOpenPenalty" type="integer" value="45" label="Indel gap open penalty" help="--indelGapOpenPenalty" >\n-          <help>Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.</help>\n-          <validator type="in_range" message="value between 0 and 255" min="0" max="255"/> \n+        <param name="indelGapOpenPenalty" type="integer" value="45" label="Indel gap open penalty" help="As Phred-scaled probability, i.e. 30 => 10^-30/10 (--indelGapOpenPenalty)">\n+          <validator type="in_range" message="value between 0 and 255" min="0" max="255" />\n         </param>\n         <!-- indelHaplotypeSize - Gone in GATK 2.4? -->\n         <param name="indelHaplotypeSize" type="integer" value="80" label="Indel haplotype size" help="--indelHaplotypeSize" />\n@@ -252,14 +213,14 @@\n           <param name="reference_source_selector" value="history" />\n           <param name="ref_file" value="phiX.fasta" ftype="fasta" />\n           <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />\n-          <param name="rod_bind_type_selector" value="dbsnp" />\n-          <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />\n+          <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />\n+          <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />\n+          <param name="dbsnp_rod_name" value="dbsnp" />\n           <param name="standard_min_confidence_threshold_for_calling" value="0" />\n           <param name="standard_min_confidence_threshold_for_emitting" value="4" />\n           <param name="gatk_param_type_selector" value="basic" />\n           <param name="analysis_param_type_selector" value="advanced" />\n           <param name="genotype_likelihoods_model" value="BOTH" />\n-          <param name="p_nonref_model" value="EXACT" />\n           <param name="heterozygosity" value="0.001" />\n           <param name="pcr_error_rate" value="0.0001" />\n           <param name="genotyping_mode" value="DISCOVERY" />\n@@ -310,7 +271,6 @@\n **Settings**::\n \n  genotype_likelihoods_model                        Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only (SNP|INDEL|BOTH)\n- p_nonref_model                                    Non-reference probability calculation model to employ -- EXACT_GENERAL_PLOIDY is the default option, while EXACT_REFERENCE is also available. (EXACT_INDEPENDENT,EXACT_REFERENCE,EXACT_ORIGINAL,EXACT_GENERAL_PLOIDY)\n  heterozygosity                                    Heterozygosity value used to compute prior likelihoods for any locus\n  pcr_error_rate                                    The PCR error rate to be used for computing fragment-based likelihoods\n  genotyping_mode                                   Should we output confident genotypes (i.e. including ref calls) or just the variants? (DISCOVERY|GENOTYPE_GIVEN_ALLELES)\n'
b
diff -r f760c0de8e3a -r 8bcc13094767 variant_annotator.xml
--- a/variant_annotator.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_annotator.xml Sat Jan 18 07:21:33 2014 -0500
b
@@ -51,10 +51,7 @@
         -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}"
     #end for
     
-    #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
-        -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
-    #end if
-    
+    @DBSNP_OPTIONS@
     
     #for $rod_binding in $resource_rod_bind:
         -d "--resource:${rod_binding.resource_rod_name},%(file_type)s" "${rod_binding.resource_input_rod}" "${rod_binding.resource_input_rod.ext}" "input_resource_${rod_binding.resource_rod_name}"
@@ -136,20 +133,7 @@
       <param name="comp_input_rod" type="data" format="vcf" label="ROD file" />
       <param name="comp_rod_name" type="text" value="Unnamed" label="ROD Name"/>
     </repeat>
-    
-    <conditional name="dbsnp_rod_bind_type">
-      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
-        <option value="set_dbsnp" selected="True">Set dbSNP</option>
-        <option value="exclude_dbsnp">Don't set dbSNP</option>
-      </param>
-      <when value="exclude_dbsnp">
-        <!-- Do nothing here -->
-      </when>
-      <when value="set_dbsnp">
-        <param name="dbsnp_input_rod" type="data" format="vcf" label="ROD file" />
-        <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="ROD Name"/>
-      </when>
-    </conditional>
+    <expand macro="dbsnp_param" />
     
     <repeat name="resource_rod_bind" title="Binding for reference-ordered resource data" help="-resource,--resource &amp;lt;resource&amp;gt;">
       <param name="resource_input_rod" type="data" format="vcf" label="ROD file" />
@@ -210,6 +194,7 @@
           <param name="additional_annotations" value="0" />
           <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
           <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
+          <param name="dbsnp_rod_name" value="dbsnp" />
           <param name="snpEff_rod_bind_type_selector" value="exclude_snpEff" />
           <param name="gatk_param_type_selector" value="basic" />
           <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="4" /> 
b
diff -r f760c0de8e3a -r 8bcc13094767 variant_eval.xml
--- a/variant_eval.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_eval.xml Sat Jan 18 07:21:33 2014 -0500
b
@@ -32,9 +32,9 @@
         #end if
     #end for
     
-    #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
+    #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
         -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
-        #if str( $dbsnp_rod_bind_type.dbsnp_known_names ):
+        #if $dbsnp_rod_bind_type.dbsnp_known_names
             -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"'
         #end if
     #end if
@@ -122,24 +122,25 @@
       </when>
     </conditional>
     
-    <repeat name="comp_rod_bind" title="Binding for reference-ordered comparison data" help="-comp,--comp &amp;lt;comp&amp;gt;">
+    <repeat name="comp_rod_bind" title="Comparison Reference-Ordered Data (ROD) file" help="-comp,--comp &amp;lt;comp&amp;gt;">
       <param name="comp_input_rod" type="data" format="vcf" label="Comparison ROD file" />
-      <param name="comp_rod_name" type="text" value="Unnamed" label="Comparison ROD Name"/>
-      <param name="comp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use Comparison ROD as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;"/>
+      <param name="comp_rod_name" type="text" value="" label="Comparison ROD name">
+          <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
+      </param>
+      <param name="comp_known_names" type="boolean" label="Use comparison ROD file as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;"/>
     </repeat>
-    
     <conditional name="dbsnp_rod_bind_type">
-      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
+      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP Reference-Ordered Data (ROD) file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
         <option value="set_dbsnp" selected="True">Set dbSNP</option>
         <option value="exclude_dbsnp">Don't set dbSNP</option>
       </param>
-      <when value="exclude_dbsnp">
-        <!-- Do nothing here -->
-      </when>
+      <when value="exclude_dbsnp" />
       <when value="set_dbsnp">
         <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" />
-        <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="dbSNP ROD Name"/>
-        <param name="dbsnp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use dbSNP ROD as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;" />
+        <param name="dbsnp_rod_name" type="text" value="dbsnp" label="dbsnp ROD name">
+          <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
+        </param>
+        <param name="dbsnp_known_names" type="boolean" label="Use dbSNP ROD file as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;" />
       </when>
     </conditional>
     
@@ -228,6 +229,7 @@
           <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
           <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
           <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
+          <param name="dbsnp_rod_name" value="dbsnp" />
           <param name="dbsnp_known_names" value="True"/>
           <param name="comp_rod_bind" value="0" />
           <param name="gatk_param_type_selector" value="basic" />
b
diff -r f760c0de8e3a -r 8bcc13094767 variant_filtration.xml
--- a/variant_filtration.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_filtration.xml Sat Jan 18 07:21:33 2014 -0500
b
@@ -15,8 +15,6 @@
     -T "VariantFiltration"
     \$GATK2_SITE_OPTIONS
 
-    @THREADS@
-
     -o "${output_vcf}"
 
     #if $reference_source.reference_source_selector != "history":
@@ -83,7 +81,7 @@
     
     <conditional name="mask_rod_bind_type">
       <param name="mask_rod_bind_type_selector" type="select" label="Provide a Mask reference-ordered data file">
-        <option value="set_mask" selected="True">Set maskP</option>
+        <option value="set_mask" selected="True">Set mask</option>
         <option value="exclude_mask">Don't set mask</option>
       </param>
       <when value="exclude_mask">
b
diff -r f760c0de8e3a -r 8bcc13094767 variant_validate.xml
--- a/variant_validate.xml Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_validate.xml Sat Jan 18 07:21:33 2014 -0500
b
@@ -14,18 +14,13 @@
     
     \$GATK2_SITE_OPTIONS
 
-    @THREADS@
-
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
     #end if
     ${warn_on_errors}
     ${do_not_validate_filtered_records}
    '
-   
-    #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
-        -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
-    #end if
+    @DBSNP_OPTIONS@
    
     #include source=$standard_gatk_options#
   </command>
@@ -47,20 +42,7 @@
         <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
       </when>
     </conditional>
-    
-    <conditional name="dbsnp_rod_bind_type">
-      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
-        <option value="set_dbsnp" selected="True">Set dbSNP</option>
-        <option value="exclude_dbsnp">Don't set dbSNP</option>
-      </param>
-      <when value="exclude_dbsnp">
-        <!-- Do nothing here -->
-      </when>
-      <when value="set_dbsnp">
-        <param name="dbsnp_input_rod" type="data" format="vcf" label="ROD file" />
-        <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="ROD Name"/>
-      </when>
-    </conditional>
+    <expand macro="dbsnp_param" />
     
     <param name="warn_on_errors" type="boolean" checked="False" truevalue="-warnOnErrors" falsevalue="" label="instead of terminating the run at the first error, print warning messages for each error seen." help="-warnOnErrors,--warnOnErrors"/>
     <param name="do_not_validate_filtered_records" type="boolean" checked="False" truevalue="-doNotValidateFilteredRecords" falsevalue="" label="do not try to validate records that are FILTERed." help="-doNotValidateFilteredRecords,--doNotValidateFilteredRecords"/>
@@ -78,6 +60,7 @@
           <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
           <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
           <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
+          <param name="dbsnp_rod_name" value="dbsnp" />
           <param name="warn_on_errors" value="True"/>
           <param name="do_not_validate_filtered_records" />
           <param name="gatk_param_type_selector" value="basic" />