changeset 2:8bcc13094767 draft

Uploaded
author iuc
date Sat, 18 Jan 2014 07:21:33 -0500
parents f760c0de8e3a
children 2553f84b8174
files base_recalibrator.xml gatk2_macros.xml gatk2_picard_index.loc.sample haplotype_caller.xml indel_realigner.xml print_reads.xml readme.rst reduce_reads.xml unified_genotyper.xml variant_annotator.xml variant_eval.xml variant_filtration.xml variant_validate.xml
diffstat 13 files changed, 119 insertions(+), 213 deletions(-) [+]
line wrap: on
line diff
--- a/base_recalibrator.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/base_recalibrator.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -17,9 +17,8 @@
     \$GATK2_SITE_OPTIONS
 
     ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
-    --num_cpu_threads_per_data_thread 8
+    --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-8}
 
-    @THREADS@
     ## we set non standards at every run and the user can choose which ones are preferred
     ## in our select box both standard options (ContextCovariate, CycleCovariate) are selected by default
     --no_standard_covs
@@ -111,12 +110,7 @@
         </param>
       </when>
     </conditional>
-    <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="-BQSR,--BQSR &amp;lt;recal_file&amp;gt;" >
-      <help>The input covariates table file which enables on-the-fly base quality score recalibration.
-            Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
-            Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-      </help>
-    </param>
+    <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &amp;lt;recal_file&amp;gt;)" />
 
     <param name="covariates" type="select" multiple="True" display="checkboxes" label="Covariates to be used in the recalibration" help="-cov,--covariate &amp;lt;covariate&amp;gt;" >
       <!-- might we want to load the available covariates from an external configuration file, since additional ones can be added to local installs? -->
--- a/gatk2_macros.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/gatk2_macros.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -1,17 +1,23 @@
 <macros>
-    <xml name="requirements">
-        <requirements>
-            <requirement type="package" version="0.1.19">samtools</requirement>
-            <requirement type="set_environment">GATK2_PATH</requirement>
-            <requirement type="set_environment">GATK2_SITE_OPTIONS</requirement>
-        </requirements>
-    </xml>
-    <token name="@THREADS@">
-        --num_threads \${GALAXY_SLOTS:-4}
-    </token>
-    <token name="@JAR_PATH@">
-        java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar"
-    </token>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package">gatk2</requirement>
+      <requirement type="package" version="0.1.19">samtools</requirement>
+      <requirement type="set_environment">GATK2_PATH</requirement>
+      <requirement type="set_environment">GATK2_SITE_OPTIONS</requirement>
+    </requirements>
+  </xml>
+  <token name="@THREADS@">
+    --num_threads \${GALAXY_SLOTS:-4}
+  </token>
+  <token name="@JAR_PATH@">
+    java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar"
+  </token>
+  <token name="@DBSNP_OPTIONS@">
+    #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
+        -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
+    #end if
+  </token>
   <template name="standard_gatk_options">
     ##start standard gatk options
     #if $gatk_param_type.gatk_param_type_selector == "advanced":
@@ -311,6 +317,21 @@
       <option value="history">History</option>
     </param>
   </xml>
+  <xml name="dbsnp_param">
+    <conditional name="dbsnp_rod_bind_type">
+      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP Reference-Ordered Data (ROD) file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
+        <option value="set_dbsnp" selected="True">Set dbSNP</option>
+        <option value="exclude_dbsnp">Don't set dbSNP</option>
+      </param>
+      <when value="exclude_dbsnp" />
+      <when value="set_dbsnp">
+        <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" />
+        <param name="dbsnp_rod_name" type="text" value="dbsnp" label="dbsnp ROD name">
+          <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
+        </param>
+      </when>
+    </conditional>
+  </xml>
   <token name="@CITATION_SECTION@">------
 
 **Citation**
--- a/gatk2_picard_index.loc.sample	Mon Dec 02 10:36:02 2013 -0500
+++ b/gatk2_picard_index.loc.sample	Sat Jan 18 07:21:33 2014 -0500
@@ -24,7 +24,3 @@
 #the dict file does not have the .fa extension although the
 #path list in the loc file does include it.
 #
-hg18	hg18	hg18	/data/galaxy/ext-tool-data/picard/hg18.fa
-hg19	hg19	hg19	/data/galaxy/ext-tool-data/picard/hg19.fa
-mm8	mm8	mm8	/data/galaxy/ext-tool-data/picard/mm8.fa
-mm9	mm9	mm9	/data/galaxy/ext-tool-data/picard/mm9.fa
--- a/haplotype_caller.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/haplotype_caller.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -7,10 +7,12 @@
   <command interpreter="python">
     gatk2_wrapper.py
     --stdout "${output_log}"
-    -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
-    #if str( $reference_source.input_bam.metadata.bam_index ) != "None":
-        -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
-    #end if
+    #for $i, $input_bam in enumerate( $reference_source.input_bams ):
+        -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"
+        #if str( $input_bam.input_bam.metadata.bam_index ) != "None":
+            -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index
+        #end if
+    #end for
     -p '
     @JAR_PATH@
     -T "HaplotypeCaller"
@@ -18,7 +20,7 @@
 
     \$GATK2_SITE_OPTIONS
 
-    @THREADS@
+    --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-4}
 
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
@@ -27,14 +29,12 @@
         --BQSR "${input_recal}"
     #end if
    '
+    @DBSNP_OPTIONS@
     #include source=$standard_gatk_options#
     
     ##start analysis specific options
     #if $analysis_param_type.analysis_param_type_selector == "advanced":
         -p '
-        #if $analysis_param_type.p_nonref_model.__str__ != "None" and len($analysis_param_type.p_nonref_model.__str__) > 0:
-          --p_nonref_model $analysis_param_type.p_nonref_model
-        #end if
         #if $analysis_param_type.heterozygosity.__str__.strip() != '':
             --heterozygosity $analysis_param_type.heterozygosity
         #end if
@@ -42,8 +42,8 @@
         #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES':
             --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}"
         #end if
-        #if $analysis_param_type.output_mode.__str__ != "None" and len($analysis_param_type.output_mode.__str__) > 0:
-          --output_mode $analysis_param_type.output_mode
+        #if not $analysis_param_type.emitRefConfidence is None:
+          --emitRefConfidence $analysis_param_type.emitRefConfidence
         #end if
 
         ## files
@@ -53,9 +53,6 @@
         #if str($analysis_param_type.comp) != 'None':
             --comp "$analysis_param_type.comp"
         #end if
-        #if str($analysis_param_type.dbsnp) != 'None':
-            --dbsnp "$analysis_param_type.dbsnp"
-        #end if
         ##  
         #if str( $analysis_param_type.annotation ) != "None":
             #for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','):
@@ -80,9 +77,6 @@
         #if $analysis_param_type.contamination_fraction_to_filter.__str__.strip() != '':
             --contamination_fraction_to_filter $analysis_param_type.contamination_fraction_to_filter
         #end if
-        #if $analysis_param_type.downsampleRegion.__str__.strip() != '':
-            --downsampleRegion $analysis_param_type.downsampleRegion
-        #end if
         #if $analysis_param_type.minPruning.__str__.strip() != '':
             --minPruning $analysis_param_type.minPruning
         #end if
@@ -99,9 +93,7 @@
             --max_alternate_alleles $analysis_param_type.max_alternate_alleles
         #end if
         ## mode selections
-        #if $analysis_param_type.genotyping_mode.__str__ != "None" and len($analysis_param_type.genotyping_mode.__str__) > 0:
-          --genotyping_mode $analysis_param_type.genotyping_mode
-        #end if
+
         #if $analysis_param_type.pair_hmm_implementation.__str__ != "None" and len($analysis_param_type.pair_hmm_implementation.__str__) > 0:
           --pair_hmm_implementation $analysis_param_type.pair_hmm_implementation
         #end if
@@ -121,35 +113,31 @@
     #end if
   </command>
   <inputs>
-    <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="-BQSR,--BQSR &amp;lt;recal_file&amp;gt;" >
-      <help>The input covariates table file which enables on-the-fly base quality score recalibration. 
-            Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. 
-            Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-      </help>
-    </param>
+    <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &amp;lt;recal_file&amp;gt;)" />
     <conditional name="reference_source">
       <expand macro="reference_source_selector_param" />
       <when value="cached">
-        <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;">
-          <validator type="unspecified_build" />
-          <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
-        </param>
+        <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &amp;lt;input_file&amp;gt;">
+          <param name="input_bam" type="data" format="bam" label="BAM file">
+            <validator type="unspecified_build" />
+            <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
+          </param>
+        </repeat>
         <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" >
           <options from_data_table="gatk2_picard_indexes">
-            <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/>
+            <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...-->
           </options>
           <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
         </param>
       </when>
       <when value="history">
-        <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;" />
-        <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
-          <options>
-            <filter type="data_meta" key="dbkey" ref="input_bam" />
-          </options>
-        </param>
+        <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &amp;lt;input_file&amp;gt;">
+          <param name="input_bam" type="data" format="bam" label="BAM file" />
+        </repeat>
+        <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
       </when>
     </conditional>
+    <expand macro="dbsnp_param" />
     
     <expand macro="gatk_param_type_conditional" />
 
@@ -211,9 +199,7 @@
         <param name="contamination_fraction_to_filter" type="float" value="0.05" optional="true" label="contamination_fraction_to_filter" help="--contamination_fraction_to_filter / -contamination  Fraction of contamination in sequencing data (for all samples) to aggressively remove">
             <validator type="in_range" message="value between 0.00 and 1.00" min="0" max="1"/>
         </param>
-        <param name="dbsnp" type="data" format="vcf" optional="true" label="dbsnp" help="--dbsnp / -D  dbSNP file"/>
         <param name="debug" type="boolean" checked="False" truevalue="-debug" falsevalue="" label="debug" help="--debug / -debug  If specified, print out very verbose debug information about each triggering active region"/>
-        <param name="downsampleRegion" type="integer" value="1000" optional="true" label="downsampleRegion" help="--downsampleRegion / -dr  coverage, per-sample, to downsample each active region to"/>
 
         <conditional name="genotyping_mode_type">
           <param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping" help="-gt_mode,--genotyping_mode &amp;lt;genotyping_mode&amp;gt;">
@@ -234,10 +220,11 @@
         <param name="minPruning" type="integer" value="1" optional="true" label="minPruning" help="--minPruning / -minPruning  The minimum allowed pruning factor in assembly graph. Paths with &gt;= X supporting kmers are pruned from the graph">
             <validator type="in_range" message="value between 0 and 127" min="0" max="127"/>
         </param>
-        <param name="output_mode" type="select" optional="true" label="output_mode" help="--output_mode / -out_mode  Specifies which type of calls we should output">
-              <option value="EMIT_VARIANTS_ONLY" selected="True">EMIT_VARIANTS_ONLY</option>
-              <option value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option>
-              <option value="EMIT_ALL_SITES">EMIT_ALL_SITES</option>
+        <!-- http://www.broadinstitute.org/gatk/guide/article?id=2940 -->
+        <param name="emitRefConfidence" type="select" optional="true" label="Output confidence estimates" help="Emitting a per-bp or summarized confidence estimate for a site being strictly homozygous-reference (--emitRefConfidence)">
+              <option value="NONE" selected="True">don't emit anything</option>
+              <option value="BP_RESOLUTION">BP_RESOLUTION (emit detailed information for each BP)</option>
+              <option value="GVCF">GVCF (emit a block summarized version of the BP_RESOLUTION data)</option>
         </param>
         <param name="pair_hmm_implementation" type="select" optional="true" label="pair_hmm_implementation" help="--pair_hmm_implementation / -pairHMM  The PairHMM implementation to use for genotype likelihood calculations">
               <option value="EXACT">EXACT</option>
@@ -252,13 +239,6 @@
         <param name="gcpHMM" type="integer" value="10" optional="true" label="gcpHMM" help="--gcpHMM / -gcpHMM  Flat gap continuation penalty for use in the Pair HMM"/>
         <param name="genotypeFullActiveRegion" type="boolean" checked="False" truevalue="-genotypeFullActiveRegion" falsevalue="" label="genotypeFullActiveRegion" help="--genotypeFullActiveRegion / -genotypeFullActiveRegion  If specified, alternate alleles are considered to be the full active region for the purposes of genotyping"/>
         <param name="max_alternate_alleles" type="integer" value="6" optional="true" label="max_alternate_alleles" help="--max_alternate_alleles / -maxAltAlleles  Maximum number of alternate alleles to genotype"/>
-        <param name="p_nonref_model" type="select" optional="true" label="p_nonref_model" help="--p_nonref_model / -pnrm  Non-reference probability calculation model to employ">
-              <option value="EXACT_INDEPENDENT" selected="True">EXACT_INDEPENDENT experimental implementation - for testing only</option>
-              <option value="EXACT_REFERENCE">EXACT_REFERENCE reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles</option>
-              <option value="EXACT_ORIGINAL">EXACT_ORIGINAL original biallelic exact model, for testing only</option>
-              <option value="EXACT_GENERAL_PLOIDY">implementation that supports any sample ploidy</option>
-        </param>
-
       </when>
     </conditional>
   </inputs>
@@ -323,14 +303,12 @@
  contamination               Fraction of contamination in sequencing data (for all samples) to aggressively remove
  dbsnp                       dbSNP file
  debug                       If specified, print out very verbose debug information about each triggering active region
- downsampleRegion            coverage, per-sample, to downsample each active region to
  excludeAnnotation           One or more specific annotations to exclude
  genotyping_mode             Specifies how to determine the alternate alleles to use for genotyping
  graphOutput                 File to which debug assembly graph information should be written
  group                       One or more classes/groups of annotations to apply to variant calls
  heterozygosity              Heterozygosity value used to compute prior likelihoods for any locus
  minPruning                  The minimum allowed pruning factor in assembly graph. Paths with less than or equal supporting kmers are pruned from the graph
- output_mode                 Specifies which type of calls we should output
  pair_hmm_implementation     The PairHMM implementation to use for genotype likelihood calculations
  stand_call_conf             The minimum phred-scaled confidence threshold at which variants should be called
  stand_emit_conf             The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)
@@ -339,9 +317,6 @@
  gcpHMM                      Flat gap continuation penalty for use in the Pair HMM
  genotypeFullActiveRegion    If specified, alternate alleles are considered to be the full active region for the purposes of genotyping
  max_alternate_alleles       Maximum number of alternate alleles to genotype
- p_nonref_model              Non-reference probability calculation model to employ
-
-------
 
 @CITATION_SECTION@
   </help>
--- a/indel_realigner.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/indel_realigner.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -21,8 +21,6 @@
     ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
     --num_cpu_threads_per_data_thread 1
 
-    @THREADS@
-
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
     #end if
--- a/print_reads.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/print_reads.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -18,9 +18,8 @@
     \$GATK2_SITE_OPTIONS
 
     ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
-    --num_cpu_threads_per_data_thread 8
+    --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-6}
 
-    @THREADS@
 
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
--- a/readme.rst	Mon Dec 02 10:36:02 2013 -0500
+++ b/readme.rst	Sat Jan 18 07:21:33 2014 -0500
@@ -13,7 +13,6 @@
 http://www.broadinstitute.org/gatk
 http://www.broadinstitute.org/gatk/about/citing-gatk
 
-
 GATK is Free for academics, and fee for commercial use. Please study the GATK licensing website:
 http://www.broadinstitute.org/gatk/about/#licensing
 
@@ -23,35 +22,42 @@
 
 The recommended installation is by means of the toolshed_.
 
-.. _toolshed: http://toolshed.g2.bx.psu.edu/view/bjoern-gruening/augustus
+.. _toolshed: http://toolshed.g2.bx.psu.edu/view/iuc/gatk2
 
-Galaxy should be able to automatically install samtools dependencies automatically
+Galaxy should be able to install samtools dependencies automatically
 for you. GATK2, and its new licence model, does not allow us to distribute the GATK binaries.
-As a consequence you need to install GATK2 by your own, please see the GATK website for more informations:
+As a consequence you need to install GATK2 by your own, please see the GATK website for more information:
 
 http://www.broadinstitute.org/gatk/download
 
-Once you have installed GATK2 you need to edit the env.sh file that is installed with these wrappers.
-You will find this env.sh file under:
+Once you have installed GATK2, you need to edit the env.sh files that are installed together with the wrappers.
+You must edit the GATK2_PATH environment variable in the file:
 
-<tool_dependency_dir>/gatk2/<version>/iuc/<hash_string>/env.sh
+<tool_dependency_dir>/environment_settings/GATK2_PATH/iuc/gatk2/<hash_string>/env.sh
+
+to point to the folder where you have installed GATK2.
+
+Optionally, you may also want to edit the GATK2_SITE_OPTIONS environment variable in the file:
 
-You should edit the GATK2_PATH environment variable to point to the folder you have installed GATK2
-and if you want to deactivate the 'call home feature' from GATK you can set
+<tool_dependency_dir>/environment_settings/GATK2_SITE_OPTIONS/iuc/gatk2/<hash_string>/env.sh
 
-GATK2_SITE_OPTIONS='-et "NO_ET" -K "/data/gatk2_key_file"'
+to deactivate the 'call home feature' of GATK with something like:
 
-GATK2_SITE_OPTIONS can be used to insert specific options into every GATK2 wrapper 
-during runtime, without changing the actuall wrapper.
+GATK2_SITE_OPTIONS='-et NO_ET -K /data/gatk2_key_file'
 
-Read more about the "Phone Home" problem under:
+GATK2_SITE_OPTIONS can be also used to insert other specific options into every GATK2 wrapper
+at runtime, without changing the actual wrapper.
+
+Read more about the "Phone Home" problem at:
 http://www.broadinstitute.org/gatk/guide/article?id=1250
 
+Optionally, you may also want to add some commands to be executed before GATK (e.g. to load modules) to the file:
+
+<tool_dependency_dir>/gatk2/default/env.sh
 
 Finally, you should fill in additional information about your genomes and 
 annotations in the gatk2_picard_index.loc and gatk2_annotations.txt. 
-You can find them under ./tool-data/.
-
+You can find them in the tool-data/ Galaxy directory.
 
 
 History
@@ -80,5 +86,3 @@
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
-
-
--- a/reduce_reads.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/reduce_reads.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -21,8 +21,6 @@
     ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
     --num_cpu_threads_per_data_thread 1
 
-    @THREADS@
-
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
     #end if
@@ -68,12 +66,7 @@
     #end if
   </command>
   <inputs>
-    <param name="input_recal" type="data" format="csv" optional="true" label="Covariates table recalibration file" help="-BQSR,--BQSR &amp;lt;recal_file&amp;gt;" >
-      <help>The input covariates table file which enables on-the-fly base quality score recalibration. 
-            Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. 
-            Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-      </help>
-    </param>
+    <param name="input_recal" type="data" format="csv" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &amp;lt;recal_file&amp;gt;)" />
     <conditional name="reference_source">
       <expand macro="reference_source_selector_param" />
       <when value="cached">
@@ -228,8 +221,6 @@
  -noclip_ad / --dont_hardclip_adaptor_sequences ( boolean with default value false )
  Do not hard clip adaptor sequences. Note: You don't have to turn this on for reads that are not mate paired. The program will behave correctly in those cases.
 
-------
-
 @CITATION_SECTION@
   </help>
 </tool>
--- a/unified_genotyper.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/unified_genotyper.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -31,22 +31,12 @@
     --standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}"
     --standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}"
    '
-    #set $rod_binding_names = dict()
-    #for $rod_binding in $rod_bind:
-        #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
-            #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
-        #else
-            #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
-        #end if
-        #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
-        -d "--dbsnp:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
-    #end for
-   
+    @DBSNP_OPTIONS@
+
     #include source=$standard_gatk_options#
     ##start analysis specific options
     #if $analysis_param_type.analysis_param_type_selector == "advanced":
         -p '
-        --p_nonref_model "${analysis_param_type.p_nonref_model}"
         --heterozygosity "${analysis_param_type.heterozygosity}"
         --pcr_error_rate "${analysis_param_type.pcr_error_rate}"
         --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}"
@@ -120,30 +110,7 @@
         <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
       </when>
     </conditional>
-    
-    <repeat name="rod_bind" title="Binding for reference-ordered data" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
-        <conditional name="rod_bind_type">
-          <param name="rod_bind_type_selector" type="select" label="Binding Type">
-            <option value="dbsnp" selected="True">dbSNP</option>
-            <option value="snps">SNPs</option>
-            <option value="indels">INDELs</option>
-            <option value="custom">Custom</option>
-          </param>
-          <when value="dbsnp">
-              <param name="input_rod" type="data" format="vcf" label="ROD file" />
-          </when>
-          <when value="snps">
-              <param name="input_rod" type="data" format="vcf" label="ROD file" />
-          </when>
-          <when value="indels">
-              <param name="input_rod" type="data" format="vcf" label="ROD file" />
-          </when>
-          <when value="custom">
-              <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/>
-              <param name="input_rod" type="data" format="vcf" label="ROD file" />
-          </when>
-        </conditional>
-    </repeat>
+    <expand macro="dbsnp_param" />
     
     <param name="genotype_likelihoods_model" type="select" label="Genotype likelihoods calculation model to employ" help="-glm,--genotype_likelihoods_model &amp;lt;genotype_likelihoods_model&amp;gt;">
       <option value="BOTH" selected="True">BOTH</option>
@@ -158,10 +125,6 @@
     <expand macro="gatk_param_type_conditional" />
     
     <expand macro="analysis_type_conditional">
-        <param name="p_nonref_model" type="select" label="Non-reference probability calculation model to employ" help="-pnrm,--p_nonref_model &amp;lt;p_nonref_model&amp;gt;">
-          <option value="EXACT_GENERAL_PLOIDY" selected="True">EXACT_GENERAL_PLOIDY (supports any sample ploidy)</option>
-          <option value="EXACT_REFERENCE">EXACT_REFERENCE (multi-allelic EXACT model.  Extremely slow for many alternate alleles)</option>
-        </param>
         <param name="heterozygosity" type="float" value="1e-3" label="Heterozygosity value used to compute prior likelihoods for any locus" help="-hets,--heterozygosity &amp;lt;heterozygosity&amp;gt;" />
         <param name="pcr_error_rate" type="float" value="1e-4" label="The PCR error rate to be used for computing fragment-based likelihoods" help="-pcr_error,--pcr_error_rate &amp;lt;pcr_error_rate&amp;gt;" />
         <conditional name="genotyping_mode_type">
@@ -187,13 +150,11 @@
         <param name="max_alternate_alleles" type="integer" value="6" label="Maximum number of alternate alleles to genotype" help="-maxAlleles,--max_alternate_alleles &amp;lt;max_alternate_alleles&amp;gt;" />
         <param name="min_indel_count_for_genotyping" type="integer" value="5" label="Minimum number of consensus indels required to trigger genotyping run" help="-minIndelCnt,--min_indel_count_for_genotyping &amp;lt;min_indel_count_for_genotyping&amp;gt;" />
         <param name="indel_heterozygosity" type="float" value="0.000125" label="Heterozygosity for indel calling" help="1.0/8000==0.000125 (-indelHeterozygosity,--indel_heterozygosity &amp;lt;indel_heterozygosity&amp;gt;)"/>
-        <param name="indelGapContinuationPenalty" type="integer" value="10" label="Indel gap continuation penalty" help="--indelGapContinuationPenalty">
-          <help>Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.</help>
-          <validator type="in_range" message="value between 0 and 255" min="0" max="255"/> 
+        <param name="indelGapContinuationPenalty" type="integer" value="10" label="Indel gap continuation penalty" help="As Phred-scaled probability, i.e. 30 => 10^-30/10 (--indelGapContinuationPenalty)">
+          <validator type="in_range" message="value between 0 and 255" min="0" max="255" />
         </param>
-        <param name="indelGapOpenPenalty" type="integer" value="45" label="Indel gap open penalty" help="--indelGapOpenPenalty" >
-          <help>Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.</help>
-          <validator type="in_range" message="value between 0 and 255" min="0" max="255"/> 
+        <param name="indelGapOpenPenalty" type="integer" value="45" label="Indel gap open penalty" help="As Phred-scaled probability, i.e. 30 => 10^-30/10 (--indelGapOpenPenalty)">
+          <validator type="in_range" message="value between 0 and 255" min="0" max="255" />
         </param>
         <!-- indelHaplotypeSize - Gone in GATK 2.4? -->
         <param name="indelHaplotypeSize" type="integer" value="80" label="Indel haplotype size" help="--indelHaplotypeSize" />
@@ -252,14 +213,14 @@
           <param name="reference_source_selector" value="history" />
           <param name="ref_file" value="phiX.fasta" ftype="fasta" />
           <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
-          <param name="rod_bind_type_selector" value="dbsnp" />
-          <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
+          <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
+          <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
+          <param name="dbsnp_rod_name" value="dbsnp" />
           <param name="standard_min_confidence_threshold_for_calling" value="0" />
           <param name="standard_min_confidence_threshold_for_emitting" value="4" />
           <param name="gatk_param_type_selector" value="basic" />
           <param name="analysis_param_type_selector" value="advanced" />
           <param name="genotype_likelihoods_model" value="BOTH" />
-          <param name="p_nonref_model" value="EXACT" />
           <param name="heterozygosity" value="0.001" />
           <param name="pcr_error_rate" value="0.0001" />
           <param name="genotyping_mode" value="DISCOVERY" />
@@ -310,7 +271,6 @@
 **Settings**::
 
  genotype_likelihoods_model                        Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only (SNP|INDEL|BOTH)
- p_nonref_model                                    Non-reference probability calculation model to employ -- EXACT_GENERAL_PLOIDY is the default option, while EXACT_REFERENCE is also available. (EXACT_INDEPENDENT,EXACT_REFERENCE,EXACT_ORIGINAL,EXACT_GENERAL_PLOIDY)
  heterozygosity                                    Heterozygosity value used to compute prior likelihoods for any locus
  pcr_error_rate                                    The PCR error rate to be used for computing fragment-based likelihoods
  genotyping_mode                                   Should we output confident genotypes (i.e. including ref calls) or just the variants? (DISCOVERY|GENOTYPE_GIVEN_ALLELES)
--- a/variant_annotator.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_annotator.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -51,10 +51,7 @@
         -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}"
     #end for
     
-    #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
-        -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
-    #end if
-    
+    @DBSNP_OPTIONS@
     
     #for $rod_binding in $resource_rod_bind:
         -d "--resource:${rod_binding.resource_rod_name},%(file_type)s" "${rod_binding.resource_input_rod}" "${rod_binding.resource_input_rod.ext}" "input_resource_${rod_binding.resource_rod_name}"
@@ -136,20 +133,7 @@
       <param name="comp_input_rod" type="data" format="vcf" label="ROD file" />
       <param name="comp_rod_name" type="text" value="Unnamed" label="ROD Name"/>
     </repeat>
-    
-    <conditional name="dbsnp_rod_bind_type">
-      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
-        <option value="set_dbsnp" selected="True">Set dbSNP</option>
-        <option value="exclude_dbsnp">Don't set dbSNP</option>
-      </param>
-      <when value="exclude_dbsnp">
-        <!-- Do nothing here -->
-      </when>
-      <when value="set_dbsnp">
-        <param name="dbsnp_input_rod" type="data" format="vcf" label="ROD file" />
-        <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="ROD Name"/>
-      </when>
-    </conditional>
+    <expand macro="dbsnp_param" />
     
     <repeat name="resource_rod_bind" title="Binding for reference-ordered resource data" help="-resource,--resource &amp;lt;resource&amp;gt;">
       <param name="resource_input_rod" type="data" format="vcf" label="ROD file" />
@@ -210,6 +194,7 @@
           <param name="additional_annotations" value="0" />
           <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
           <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
+          <param name="dbsnp_rod_name" value="dbsnp" />
           <param name="snpEff_rod_bind_type_selector" value="exclude_snpEff" />
           <param name="gatk_param_type_selector" value="basic" />
           <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="4" /> 
--- a/variant_eval.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_eval.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -32,9 +32,9 @@
         #end if
     #end for
     
-    #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
+    #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
         -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
-        #if str( $dbsnp_rod_bind_type.dbsnp_known_names ):
+        #if $dbsnp_rod_bind_type.dbsnp_known_names
             -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"'
         #end if
     #end if
@@ -122,24 +122,25 @@
       </when>
     </conditional>
     
-    <repeat name="comp_rod_bind" title="Binding for reference-ordered comparison data" help="-comp,--comp &amp;lt;comp&amp;gt;">
+    <repeat name="comp_rod_bind" title="Comparison Reference-Ordered Data (ROD) file" help="-comp,--comp &amp;lt;comp&amp;gt;">
       <param name="comp_input_rod" type="data" format="vcf" label="Comparison ROD file" />
-      <param name="comp_rod_name" type="text" value="Unnamed" label="Comparison ROD Name"/>
-      <param name="comp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use Comparison ROD as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;"/>
+      <param name="comp_rod_name" type="text" value="" label="Comparison ROD name">
+          <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
+      </param>
+      <param name="comp_known_names" type="boolean" label="Use comparison ROD file as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;"/>
     </repeat>
-    
     <conditional name="dbsnp_rod_bind_type">
-      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
+      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP Reference-Ordered Data (ROD) file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
         <option value="set_dbsnp" selected="True">Set dbSNP</option>
         <option value="exclude_dbsnp">Don't set dbSNP</option>
       </param>
-      <when value="exclude_dbsnp">
-        <!-- Do nothing here -->
-      </when>
+      <when value="exclude_dbsnp" />
       <when value="set_dbsnp">
         <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" />
-        <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="dbSNP ROD Name"/>
-        <param name="dbsnp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use dbSNP ROD as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;" />
+        <param name="dbsnp_rod_name" type="text" value="dbsnp" label="dbsnp ROD name">
+          <validator type="regex" message="Value must be a not empty string composed by alphanumeric characters and underscores">^\w+$</validator>
+        </param>
+        <param name="dbsnp_known_names" type="boolean" label="Use dbSNP ROD file as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;" />
       </when>
     </conditional>
     
@@ -228,6 +229,7 @@
           <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
           <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
           <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
+          <param name="dbsnp_rod_name" value="dbsnp" />
           <param name="dbsnp_known_names" value="True"/>
           <param name="comp_rod_bind" value="0" />
           <param name="gatk_param_type_selector" value="basic" />
--- a/variant_filtration.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_filtration.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -15,8 +15,6 @@
     -T "VariantFiltration"
     \$GATK2_SITE_OPTIONS
 
-    @THREADS@
-
     -o "${output_vcf}"
 
     #if $reference_source.reference_source_selector != "history":
@@ -83,7 +81,7 @@
     
     <conditional name="mask_rod_bind_type">
       <param name="mask_rod_bind_type_selector" type="select" label="Provide a Mask reference-ordered data file">
-        <option value="set_mask" selected="True">Set maskP</option>
+        <option value="set_mask" selected="True">Set mask</option>
         <option value="exclude_mask">Don't set mask</option>
       </param>
       <when value="exclude_mask">
--- a/variant_validate.xml	Mon Dec 02 10:36:02 2013 -0500
+++ b/variant_validate.xml	Sat Jan 18 07:21:33 2014 -0500
@@ -14,18 +14,13 @@
     
     \$GATK2_SITE_OPTIONS
 
-    @THREADS@
-
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
     #end if
     ${warn_on_errors}
     ${do_not_validate_filtered_records}
    '
-   
-    #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
-        -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
-    #end if
+    @DBSNP_OPTIONS@
    
     #include source=$standard_gatk_options#
   </command>
@@ -47,20 +42,7 @@
         <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
       </when>
     </conditional>
-    
-    <conditional name="dbsnp_rod_bind_type">
-      <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
-        <option value="set_dbsnp" selected="True">Set dbSNP</option>
-        <option value="exclude_dbsnp">Don't set dbSNP</option>
-      </param>
-      <when value="exclude_dbsnp">
-        <!-- Do nothing here -->
-      </when>
-      <when value="set_dbsnp">
-        <param name="dbsnp_input_rod" type="data" format="vcf" label="ROD file" />
-        <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="ROD Name"/>
-      </when>
-    </conditional>
+    <expand macro="dbsnp_param" />
     
     <param name="warn_on_errors" type="boolean" checked="False" truevalue="-warnOnErrors" falsevalue="" label="instead of terminating the run at the first error, print warning messages for each error seen." help="-warnOnErrors,--warnOnErrors"/>
     <param name="do_not_validate_filtered_records" type="boolean" checked="False" truevalue="-doNotValidateFilteredRecords" falsevalue="" label="do not try to validate records that are FILTERed." help="-doNotValidateFilteredRecords,--doNotValidateFilteredRecords"/>
@@ -78,6 +60,7 @@
           <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
           <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
           <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
+          <param name="dbsnp_rod_name" value="dbsnp" />
           <param name="warn_on_errors" value="True"/>
           <param name="do_not_validate_filtered_records" />
           <param name="gatk_param_type_selector" value="basic" />