sharplabtool: tools/gatk/unified_genotyper.xml comparison

comparison tools/gatk/unified_genotyper.xml @ 0:9071e359b9a3

Uploaded

author	xuebing
date	Fri, 09 Mar 2012 19:37:19 -0500
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:9071e359b9a3
+<tool id="gatk_unified_genotyper" name="Unified Genotyper" version="0.0.1">
+<description>SNP and indel caller</description>
+<command interpreter="python">gatk_wrapper.py
+--stdout "${output_log}"
+#for $i, $input_bam in enumerate( $reference_source.input_bams ):
+-d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"
+-d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index
+#end for
+-p 'java
+-jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
+-T "UnifiedGenotyper"
+-o "${output_vcf}"
+-et "NO_ET" ##ET no phone home
+##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
+#if $reference_source.reference_source_selector != "history":
+-R "${reference_source.ref_file.fields.path}"
+#end if
+--standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}"
+--standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}"
+'
+#set $rod_binding_names = dict()
+#if str( $input_dbsnp_rod ) != "None":
+-d "-D" "${input_dbsnp_rod}" "${input_dbsnp_rod.ext}" "dbsnp_rod"
+#end if
+#for $rod_binding in $rod_bind:
+#if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
+#set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
+#else
+#set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
+#end if
+#set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
+-d "-B:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
+#if str( $rod_binding.rod_bind_type.rodToIntervalTrackName ):
+-p '--rodToIntervalTrackName "${rod_bind_name}"'
+#end if
+#end for
+##start standard gatk options
+#if $gatk_param_type.gatk_param_type_selector == "advanced":
+#for $sample_metadata in $gatk_param_type.sample_metadata:
+-p '--sample_metadata "${sample_metadata.sample_metadata_file}"'
+#end for
+#for $read_filter in $gatk_param_type.read_filter:
+-p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
+###raise Exception( str( dir( $read_filter ) ) )
+#for $name, $param in $read_filter.read_filter_type.iteritems():
+#if $name not in [ "__current_case__", "read_filter_type_selector" ]:
+--${name} "${param}"
+#end if
+#end for
+'
+#end for
+#if str( $gatk_param_type.input_intervals ) != "None":
+-d "-L" "${gatk_param_type.input_intervals}" "${gatk_param_type.input_intervals.ext}" "input_intervals"
+#end if
+#if str( $gatk_param_type.input_exclude_intervals ) != "None":
+-d "-XL" "${gatk_param_type.input_exclude_intervals}" "${gatk_param_type.input_exclude_intervals.ext}" "input_intervals"
+#end if
+-p '--BTI_merge_rule "${gatk_param_type.BTI_merge_rule}"'
+-p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
+#if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
+-p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
+#end if
+-p '
+--baq "${gatk_param_type.baq}"
+--baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
+${gatk_param_type.use_original_qualities}
+--defaultBaseQualities "${gatk_param_type.default_base_qualities}"
+--validation_strictness "${gatk_param_type.validation_strictness}"
+--interval_merging "${gatk_param_type.interval_merging}"
+'
+#if str( $gatk_param_type.read_group_black_list ) != "None":
+-d "-read_group_black_list" "${gatk_param_type.read_group_black_list}" "txt" "input_read_group_black_list"
+#end if
+#end if
+#if $reference_source.reference_source_selector == "history":
+-d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
+#end if
+##end standard gatk options
+##start analysis specific options
+#if $analysis_param_type.analysis_param_type_selector == "advanced":
+-p '
+--genotype_likelihoods_model "${analysis_param_type.genotype_likelihoods_model}"
+--p_nonref_model "${analysis_param_type.p_nonref_model}"
+--heterozygosity "${analysis_param_type.heterozygosity}"
+--pcr_error_rate "${analysis_param_type.pcr_error_rate}"
+--genotyping_mode "${analysis_param_type.genotyping_mode}"
+--output_mode "${analysis_param_type.output_mode}"
+${analysis_param_type.noSLOD}
+--min_base_quality_score "${analysis_param_type.min_base_quality_score}"
+--min_mapping_quality_score "${analysis_param_type.min_mapping_quality_score}"
+--max_deletion_fraction "${analysis_param_type.max_deletion_fraction}"
+--min_indel_count_for_genotyping "${analysis_param_type.min_indel_count_for_genotyping}"
+--indel_heterozygosity "${analysis_param_type.indel_heterozygosity}"
+--indelGapContinuationPenalty "${analysis_param_type.indelGapContinuationPenalty}"
+--indelGapOpenPenalty "${analysis_param_type.indelGapOpenPenalty}"
+--indelHaplotypeSize "${analysis_param_type.indelHaplotypeSize}"
+${analysis_param_type.doContextDependentGapPenalties}
+#if $analysis_param_type.annotation.value:
+#for $annotation in $analysis_param_type.annotation.value:
+--annotation "${annotation}"
+#end for
+#end if
+#if $analysis_param_type.group.value:
+#for $group in $analysis_param_type.group.value:
+--group "${group}"
+#end for
+#end if
+'
+#end if
+</command>
+<inputs>
+<conditional name="reference_source">
+<param name="reference_source_selector" type="select" label="Choose the source for the reference list">
+<option value="cached">Locally cached</option>
+<option value="history">History</option>
+</param>
+<when value="cached">
+<repeat name="input_bams" title="Sample BAM file" min="1">
+<param name="input_bam" type="data" format="bam" label="BAM file">
+<validator type="unspecified_build" />
+<validator type="dataset_metadata_in_file" filename="picard_index.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
+</param>
+</repeat>
+<param name="ref_file" type="select" label="Using reference genome">
+<options from_data_table="picard_indexes">
+<!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...-->
+</options>
+</param>
+</when>
+<when value="history"> <!-- FIX ME!!!! -->
+<repeat name="input_bams" title="Sample BAM file" min="1">
+<param name="input_bam" type="data" format="bam" label="BAM file" />
+</repeat>
+<param name="ref_file" type="data" format="fasta" label="Using reference file" />
+</when>
+</conditional>
+<param name="input_dbsnp_rod" type="data" format="gatk_dbsnp" optional="True" label="dbSNP reference ordered data (ROD)" />
+<repeat name="rod_bind" title="Binding for reference-ordered data">
+<conditional name="rod_bind_type">
+	      <param name="rod_bind_type_selector" type="select" label="Binding Type">
+	        <option value="snps" selected="True">SNPs</option>
+	        <option value="indels">INDELs</option>
+	        <option value="custom">Custom</option>
+	      </param>
+<when value="snps">
+<param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
+<param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
+</when>
+<when value="indels">
+<param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
+<param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
+</when>
+<when value="custom">
+<param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/>
+<param name="input_rod" type="data" format="vcf,gatk_dbsnp,bed" label="ROD file" />
+<param name="rodToIntervalTrackName" type="boolean" truevalue="--rodToIntervalTrackName" falsevalue="" label="Use ROD as interval List (-BTI, --rodToIntervalTrackName)" help="Only one ROD may have this option specified" />
+</when>
+</conditional>
+</repeat>
+<param name="standard_min_confidence_threshold_for_calling" type="float" value="30.0" label="The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called" />
+<param name="standard_min_confidence_threshold_for_emitting" type="float" value="30.0" label="The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)" />
+<conditional name="gatk_param_type">
+<param name="gatk_param_type_selector" type="select" label="Basic or Advanced GATK options">
+<option value="basic" selected="True">Basic</option>
+<option value="advanced">Advanced</option>
+</param>
+<when value="basic">
+<!-- Do nothing here -->
+</when>
+<when value="advanced">
+<repeat name="sample_metadata" title="Sample Metadata">
+<param name="sample_metadata_file" type="data" format="txt" label="Sample file(s) in JSON format" />
+</repeat>
+<repeat name="read_filter" title="Read Filter">
+<conditional name="read_filter_type">
+		      <param name="read_filter_type_selector" type="select" label="Read Filter Type">
+		        <option value="MaxReadLength" selected="True">MaxReadLength</option>
+		        <option value="ZeroMappingQualityRead">ZeroMappingQualityRead</option>
+		      </param>
+	          <when value="ZeroMappingQualityRead">
+	              <!-- no extra options -->
+	          </when>
+	          <when value="MaxReadLength">
+	              <param name="maxReadLength" type="integer" value="76" label="Max Read Length"/>
+	          </when>
+</conditional>
+</repeat>
+<param name="input_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals over which to operate" />
+<param name="input_exclude_intervals" type="data" format="picard_interval_list" optional="True" label="A list of genomic intervals to exclude from processing" />
+<param name="BTI_merge_rule" type="select" label="BTI merge rule">
+<option value="UNION" selected="True">UNION</option>
+<option value="INTERSECTION">INTERSECTION</option>
+</param>
+<conditional name="downsampling_type">
+<param name="downsampling_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
+<option value="NONE" selected="True">NONE</option>
+<option value="ALL_READS">ALL_READS</option>
+<option value="BY_SAMPLE">BY_SAMPLE</option>
+</param>
+<when value="NONE">
+	          <!-- no more options here -->
+	      </when>
+<when value="ALL_READS">
+	          <conditional name="downsample_to_type">
+	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
+	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
+	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
+	              </param>
+	              <when value="downsample_to_fraction">
+	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
+	              </when>
+	              <when value="downsample_to_coverage">
+	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
+	              </when>
+	          </conditional>
+	      </when>
+<when value="BY_SAMPLE">
+	          <conditional name="downsample_to_type">
+	              <param name="downsample_to_type_selector" type="select" label="Type of reads downsampling to employ at a given locus" help="Downsampling Type">
+	                  <option value="downsample_to_fraction" selected="True">Downsample by Fraction</option>
+	                  <option value="downsample_to_coverage">Downsample by Coverage</option>
+	              </param>
+	              <when value="downsample_to_fraction">
+	                  <param name="downsample_to_value" type="float" label="Fraction [0.0-1.0] of reads to downsample to" value="0.1"/>
+	              </when>
+	              <when value="downsample_to_coverage">
+	                  <param name="downsample_to_value" type="integer" label="Coverage to downsample to at any given locus" value="0"/>
+	              </when>
+	          </conditional>
+	      </when>
+</conditional>
+<param name="baq" type="select" label="Type of BAQ calculation to apply in the engine">
+<option value="OFF" selected="True">OFF</option>
+<option value="CALCULATE_AS_NECESSARY">CALCULATE_AS_NECESSARY</option>
+<option value="RECALCULATE">RECALCULATE</option>
+</param>
+<param name="baq_gap_open_penalty" type="integer" label="BAQ gap open penalty (Phred Scaled)" value="40" help="Default value is 40. 30 is perhaps better for whole genome call sets."/>
+<param name="use_original_qualities" type="boolean" truevalue="--useOriginalQualities" falsevalue="" label="Use the original base quality scores from the OQ tag" />
+<param name="default_base_qualities" type="integer" label="Value to be used for all base quality scores, when some are missing" value="-1"/>
+<param name="validation_strictness" type="select" label="How strict should we be with validation">
+<option value="STRICT" selected="True">STRICT</option>
+<option value="LENIENT">LENIENT</option>
+<option value="SILENT">SILENT</option>
+</param>
+<param name="interval_merging" type="select" label="Interval merging rule">
+<option value="ALL" selected="True">ALL</option>
+<option value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option>
+</param>
+<param name="read_group_black_list" type="data" format="txt" optional="True" label="Read group black list" />
+</when>
+</conditional>
+<conditional name="analysis_param_type">
+<param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
+<option value="basic" selected="True">Basic</option>
+<option value="advanced">Advanced</option>
+</param>
+<when value="basic">
+<!-- Do nothing here -->
+</when>
+<when value="advanced">
+<param name="genotype_likelihoods_model" type="select" label="Genotype likelihoods calculation model to employ">
+<option value="BOTH" selected="True">BOTH</option>
+<option value="SNP">SNP</option>
+<option value="INDEL">INDEL</option>
+</param>
+<param name="p_nonref_model" type="select" label="Non-reference probability calculation model to employ">
+<option value="EXACT" selected="True">EXACT</option>
+<option value="GRID_SEARCH">GRID_SEARCH</option>
+</param>
+<param name="heterozygosity" type="float" value="1e-3" label="Heterozygosity value used to compute prior likelihoods for any locus" />
+<param name="pcr_error_rate" type="float" value="1e-4" label="The PCR error rate to be used for computing fragment-based likelihoods" />
+<param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping">
+<option value="DISCOVERY" selected="True">DISCOVERY</option>
+<option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option>
+</param>
+<param name="output_mode" type="select" label="Should we output confident genotypes (i.e. including ref calls) or just the variants?">
+<option value="EMIT_VARIANTS_ONLY" selected="True">EMIT_VARIANTS_ONLY</option>
+<option value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option>
+<option value="EMIT_ALL_SITES">EMIT_ALL_SITES</option>
+</param>
+<param name="noSLOD" type="boolean" truevalue="--noSLOD" falsevalue="" label="Do not calculate the SLOD" />
+<param name="min_base_quality_score" type="integer" value="17" label="Minimum base quality required to consider a base for calling" />
+<param name="min_mapping_quality_score" type="integer" value="20" label="Minimum read mapping quality required to consider a read for calling" />
+<param name="max_deletion_fraction" type="float" value="0.05" label="Maximum fraction of reads with deletions spanning this locus for it to be callable" help="to disable, set to &lt; 0 or &gt; 1" />
+<param name="min_indel_count_for_genotyping" type="integer" value="5" label="Minimum number of consensus indels required to trigger genotyping run" />
+<param name="indel_heterozygosity" type="float" value="0.000125" label="Heterozygosity for indel calling" help="1.0/8000==0.000125"/>
+<param name="indelGapContinuationPenalty" type="float" value="10.0" label="Indel gap continuation penalty" />
+<param name="indelGapOpenPenalty" type="float" value="45.0" label="Indel gap open penalty" />
+<param name="indelHaplotypeSize" type="integer" value="80" label="Indel haplotype size" />
+<param name="doContextDependentGapPenalties" type="boolean" truevalue="--doContextDependentGapPenalties" falsevalue="" label="Vary gap penalties by context" />
+	    <param name="annotation" type="select" multiple="True" display="checkboxes" label="Annotation Types">
+	      <option value="AlleleBalance">AlleleBalance</option>
+	      <option value="BaseQualityRankSumTest">BaseQualityRankSumTest</option>
+	      <option value="DepthOfCoverage">DepthOfCoverage</option>
+	      <option value="HomopolymerRun">HomopolymerRun</option>
+	      <option value="MappingQualityRankSumTest">MappingQualityRankSumTest</option>
+	      <option value="MappingQualityZero">MappingQualityZero</option>
+	      <option value="QualByDepth">QualByDepth</option>
+	      <option value="RMSMappingQuality">RMSMappingQuality</option>
+	      <option value="SpanningDeletions">SpanningDeletions</option>
+	      <option value="HaplotypeScore">HaplotypeScore</option>
+	    </param>
+	    <param name="group" type="select" multiple="True" display="checkboxes" label="Annotation Interfaces/Groups">
+	      <option value="Standard">Standard</option>
+	      <option value="Experimental">Experimental</option>
+	      <option value="WorkInProgress">WorkInProgress</option>
+	      <!-- <option value="none">none</option> -->
+	    </param>
+</when>
+</conditional>
+</inputs>
+<outputs>
+<data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (VCF)" />
+<data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
+</outputs>
+<tests>
+<test>
+<param name="reference_source_selector" value="history" />
+<param name="ref_file" value="phiX.fasta" ftype="fasta" />
+<param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
+<param name="input_dbsnp_rod"  />
+<param name="rod_bind_type_selector" value="snps" />
+<param name="input_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
+<param name="rodToIntervalTrackName" />
+<param name="standard_min_confidence_threshold_for_calling" value="4" />
+<param name="standard_min_confidence_threshold_for_emitting" value="4" />
+<param name="gatk_param_type_selector" value="basic" />
+<param name="analysis_param_type_selector" value="advanced" />
+<param name="genotype_likelihoods_model" value="BOTH" />
+<param name="p_nonref_model" value="EXACT" />
+<param name="heterozygosity" value="0.001" />
+<param name="pcr_error_rate" value="0.0001" />
+<param name="genotyping_mode" value="DISCOVERY" />
+<param name="output_mode" value="EMIT_ALL_CONFIDENT_SITES" />
+<param name="noSLOD" />
+<param name="min_base_quality_score" value="17" />
+<param name="min_mapping_quality_score" value="20" />
+<param name="max_deletion_fraction" value="-1" />
+<param name="min_indel_count_for_genotyping" value="2" />
+<param name="indel_heterozygosity" value="0.000125" />
+<param name="indelGapContinuationPenalty" value="10" />
+<param name="indelGapOpenPenalty" value="3" />
+<param name="indelHaplotypeSize" value="80" />
+<param name="doContextDependentGapPenalties" />
+<!-- <param name="annotation" value="" />
+<param name="group" value="" /> -->
+<output name="output_interval" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.vcf" lines_diff="2"/>
+<output name="output_log" file="gatk/gatk_unified_genotyper/gatk_unified_genotyper_out_1.log.contains" compare="contains"/>
+</test>
+</tests>
+<help>
+**What it does**
+A variant caller which unifies the approaches of several disparate callers.  Works for single-sample and
+multi-sample data.  The user can choose from several different incorporated calculation models.
+------
+Please cite the website "http://addlink.here" as well as:
+Add citation here 2011.
+------
+**Input formats**
+GenomeAnalysisTK: UnifiedGenotyper accepts an aligned BAM input file.
+------
+**Outputs**
+The output is in VCF format, see http://addlink.here for more details.
+-------
+**Settings**::
+genotype_likelihoods_model                           Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only (SNP|INDEL|BOTH)
+p_nonref_model                                                  Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available. (EXACT|GRID_SEARCH)
+heterozygosity                                                  Heterozygosity value used to compute prior likelihoods for any locus
+pcr_error_rate                                             The PCR error rate to be used for computing fragment-based likelihoods
+genotyping_mode                                             Should we output confident genotypes (i.e. including ref calls) or just the variants? (DISCOVERY|GENOTYPE_GIVEN_ALLELES)
+output_mode                                                    Should we output confident genotypes (i.e. including ref calls) or just the variants? (EMIT_VARIANTS_ONLY|EMIT_ALL_CONFIDENT_SITES|EMIT_ALL_SITES)
+standard_min_confidence_threshold_for_calling                         The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called
+standard_min_confidence_threshold_for_emitting                        The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)
+noSLOD                                                                            If provided, we will not calculate the SLOD
+min_base_quality_score                                   Minimum base quality required to consider a base for calling
+min_mapping_quality_score                             Minimum read mapping quality required to consider a read for calling
+max_deletion_fraction                               Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to &lt; 0 or &gt; 1; default:0.05]
+min_indel_count_for_genotyping           Minimum number of consensus indels required to trigger genotyping run
+indel_heterozygosity                       Heterozygosity for indel calling
+indelGapContinuationPenalty                    Indel gap continuation penalty
+indelGapOpenPenalty                                    Indel gap open penalty
+indelHaplotypeSize                                    Indel haplotype size
+doContextDependentGapPenalties                                                  Vary gap penalties by context
+indel_recal_file                                         Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE
+indelDebug                                                                 Output indel debug info
+out                                                                           File to which variants should be written
+annotation                                                             One or more specific annotations to apply to variant calls
+group                                                                       One or more classes/groups of annotations to apply to variant calls
+</help>
+</tool>

Mercurial > repos > xuebing > sharplabtool

comparison tools/gatk/unified_genotyper.xml @ 0:9071e359b9a3