Mercurial > repos > iuc > gatk2
diff realigner_target_creator.xml @ 0:340633249b3d draft
Uploaded
author | bgruening |
---|---|
date | Mon, 02 Dec 2013 06:18:36 -0500 |
parents | |
children | f244b8209eb8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/realigner_target_creator.xml Mon Dec 02 06:18:36 2013 -0500 @@ -0,0 +1,167 @@ +<tool id="gatk2_realigner_target_creator" name="Realigner Target Creator" version="0.0.7"> + <description>for use in local realignment</description> + <expand macro="requirements" /> + <macros> + <import>gatk2_macros.xml</import> + </macros> + <command interpreter="python"> + gatk2_wrapper.py + --stdout "${output_log}" + -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if + -p ' + @JAR_PATH@ + -T "RealignerTargetCreator" + -o "${output_interval}" + + \$GATK2_SITE_OPTIONS + + ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975 + --num_cpu_threads_per_data_thread 1 + + @THREADS@ + + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + ' + #set $rod_binding_names = dict() + #for $rod_binding in $rod_bind: + #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': + #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name + #else + #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #end if + #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 + -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + #end for + + #include source=$standard_gatk_options# + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + --minReadsAtLocus "${analysis_param_type.minReadsAtLocus}" + --windowSize "${analysis_param_type.windowSize}" + --mismatchFraction "${analysis_param_type.mismatchFraction}" + --maxIntervalSize "${analysis_param_type.maxIntervalSize}" + ' + #end if + </command> + <inputs> + <conditional name="reference_source"> + <expand macro="reference_source_selector_param" /> + <when value="cached"> + <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &lt;input_file&gt;"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> + </param> + <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &lt;reference_sequence&gt;" > + <options from_data_table="gatk2_picard_indexes"> + <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &lt;input_file&gt;" /> + <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;"> + <options> + <filter type="data_meta" key="dbkey" ref="input_bam" /> + </options> + </param> + </when> + </conditional> + + <repeat name="rod_bind" title="Known Variants" help="Using data sets of known variants (-known,--known &lt;known&gt;)"> + <conditional name="rod_bind_type"> + <param name="rod_bind_type_selector" type="select" label="Variant Type"> + <option value="dbsnp" selected="True">dbSNP</option> + <option value="snps">SNPs</option> + <option value="indels">INDELs</option> + <option value="custom">Custom</option> + </param> + <when value="dbsnp"> + <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" /> + </when> + <when value="snps"> + <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" /> + </when> + <when value="indels"> + <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" /> + </when> + <when value="custom"> + <param name="custom_rod_name" type="text" value="Unknown" label="Customer's variant file" /> + <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" /> + </when> + </conditional> + </repeat> + + <expand macro="gatk_param_type_conditional" /> + + <expand macro="analysis_type_conditional"> + <param name="windowSize" type="integer" value="10" label="Window size for calculating entropy or SNP clusters (windowSize)" help="-window,--windowSize &lt;windowSize&gt;" /> + <param name="mismatchFraction" type="float" value="0.15" label="Fraction of base qualities needing to mismatch for a position to have high entropy (mismatchFraction)" help="to disable set to <= 0 or > 1 (-mismatch,--mismatchFraction &lt;mismatchFraction&gt;)"/> + <param name="minReadsAtLocus" type="integer" value="4" label="Minimum reads at a locus to enable using the entropy calculation (minReadsAtLocus)" help="-minReads,--minReadsAtLocus &lt;minReadsAtLocus&gt;" /> + <param name="maxIntervalSize" type="integer" value="500" label="Maximum interval size" help="-maxInterval,--maxIntervalSize &lt;maxIntervalSize&gt;" /> + </expand> + </inputs> + <outputs> + <data format="gatk_interval" name="output_interval" label="${tool.name} on ${on_string} (GATK intervals)" /> + <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="phiX.fasta" ftype="fasta" /> + <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" /> + <param name="rod_bind_type_selector" value="dbsnp" /> + <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> + <param name="gatk_param_type_selector" value="basic" /> + <param name="analysis_param_type_selector" value="advanced" /> + <param name="windowSize" value="10" /> + <param name="mismatchFraction" value="0.15" /> + <param name="minReadsAtLocus" value="4" /> + <param name="maxIntervalSize" value="500" /> + <output name="output_interval" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.gatk_interval" /> + <output name="output_log" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.log.contains" compare="contains"/> + </test> + </tests> + <help> +**What it does** + +Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string. + +For more information on local realignment around indels using the GATK, see this `tool specific page <http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_indels_RealignerTargetCreator.html>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gatk/guide/topic?name=best-practices>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gatk/guide/topic?name=faqs>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: RealignerTargetCreator accepts an aligned BAM input file. + + +**Outputs** + +The output is in GATK Interval format. + + +Go `here <http://www.broadinstitute.org/gatk/guide/topic?name=intro>`_ for details on GATK file formats. + +------- + +**Settings**:: + + windowSize window size for calculating entropy or SNP clusters + mismatchFraction fraction of base qualities needing to mismatch for a position to have high entropy; to disable set to <= 0 or > 1 + minReadsAtLocus minimum reads at a locus to enable using the entropy calculation + maxIntervalSize maximum interval size + +@CITATION_SECTION@ + </help> +</tool>