comparison realigner_target_creator.xml @ 0:340633249b3d draft

Uploaded
author bgruening
date Mon, 02 Dec 2013 06:18:36 -0500
parents
children f244b8209eb8
comparison
equal deleted inserted replaced
-1:000000000000 0:340633249b3d
1 <tool id="gatk2_realigner_target_creator" name="Realigner Target Creator" version="0.0.7">
2 <description>for use in local realignment</description>
3 <expand macro="requirements" />
4 <macros>
5 <import>gatk2_macros.xml</import>
6 </macros>
7 <command interpreter="python">
8 gatk2_wrapper.py
9 --stdout "${output_log}"
10 -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
11 #if str( $reference_source.input_bam.metadata.bam_index ) != "None":
12 -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
13 #end if
14 -p '
15 @JAR_PATH@
16 -T "RealignerTargetCreator"
17 -o "${output_interval}"
18
19 \$GATK2_SITE_OPTIONS
20
21 ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
22 --num_cpu_threads_per_data_thread 1
23
24 @THREADS@
25
26 #if $reference_source.reference_source_selector != "history":
27 -R "${reference_source.ref_file.fields.path}"
28 #end if
29 '
30 #set $rod_binding_names = dict()
31 #for $rod_binding in $rod_bind:
32 #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
33 #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
34 #else
35 #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
36 #end if
37 #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
38 -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
39 #end for
40
41 #include source=$standard_gatk_options#
42 ##start analysis specific options
43 #if $analysis_param_type.analysis_param_type_selector == "advanced":
44 -p '
45 --minReadsAtLocus "${analysis_param_type.minReadsAtLocus}"
46 --windowSize "${analysis_param_type.windowSize}"
47 --mismatchFraction "${analysis_param_type.mismatchFraction}"
48 --maxIntervalSize "${analysis_param_type.maxIntervalSize}"
49 '
50 #end if
51 </command>
52 <inputs>
53 <conditional name="reference_source">
54 <expand macro="reference_source_selector_param" />
55 <when value="cached">
56 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;">
57 <validator type="unspecified_build" />
58 <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
59 </param>
60 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" >
61 <options from_data_table="gatk2_picard_indexes">
62 <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/>
63 </options>
64 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
65 </param>
66 </when>
67 <when value="history">
68 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;" />
69 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
70 <options>
71 <filter type="data_meta" key="dbkey" ref="input_bam" />
72 </options>
73 </param>
74 </when>
75 </conditional>
76
77 <repeat name="rod_bind" title="Known Variants" help="Using data sets of known variants (-known,--known &amp;lt;known&amp;gt;)">
78 <conditional name="rod_bind_type">
79 <param name="rod_bind_type_selector" type="select" label="Variant Type">
80 <option value="dbsnp" selected="True">dbSNP</option>
81 <option value="snps">SNPs</option>
82 <option value="indels">INDELs</option>
83 <option value="custom">Custom</option>
84 </param>
85 <when value="dbsnp">
86 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
87 </when>
88 <when value="snps">
89 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
90 </when>
91 <when value="indels">
92 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
93 </when>
94 <when value="custom">
95 <param name="custom_rod_name" type="text" value="Unknown" label="Customer's variant file" />
96 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
97 </when>
98 </conditional>
99 </repeat>
100
101 <expand macro="gatk_param_type_conditional" />
102
103 <expand macro="analysis_type_conditional">
104 <param name="windowSize" type="integer" value="10" label="Window size for calculating entropy or SNP clusters (windowSize)" help="-window,--windowSize &amp;lt;windowSize&amp;gt;" />
105 <param name="mismatchFraction" type="float" value="0.15" label="Fraction of base qualities needing to mismatch for a position to have high entropy (mismatchFraction)" help="to disable set to &lt;= 0 or &gt; 1 (-mismatch,--mismatchFraction &amp;lt;mismatchFraction&amp;gt;)"/>
106 <param name="minReadsAtLocus" type="integer" value="4" label="Minimum reads at a locus to enable using the entropy calculation (minReadsAtLocus)" help="-minReads,--minReadsAtLocus &amp;lt;minReadsAtLocus&amp;gt;" />
107 <param name="maxIntervalSize" type="integer" value="500" label="Maximum interval size" help="-maxInterval,--maxIntervalSize &amp;lt;maxIntervalSize&amp;gt;" />
108 </expand>
109 </inputs>
110 <outputs>
111 <data format="gatk_interval" name="output_interval" label="${tool.name} on ${on_string} (GATK intervals)" />
112 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
113 </outputs>
114 <tests>
115 <test>
116 <param name="reference_source_selector" value="history" />
117 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
118 <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" />
119 <param name="rod_bind_type_selector" value="dbsnp" />
120 <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
121 <param name="gatk_param_type_selector" value="basic" />
122 <param name="analysis_param_type_selector" value="advanced" />
123 <param name="windowSize" value="10" />
124 <param name="mismatchFraction" value="0.15" />
125 <param name="minReadsAtLocus" value="4" />
126 <param name="maxIntervalSize" value="500" />
127 <output name="output_interval" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.gatk_interval" />
128 <output name="output_log" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.log.contains" compare="contains"/>
129 </test>
130 </tests>
131 <help>
132 **What it does**
133
134 Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
135
136 For more information on local realignment around indels using the GATK, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_indels_RealignerTargetCreator.html&gt;`_.
137
138 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
139
140 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
141
142 ------
143
144 **Inputs**
145
146 GenomeAnalysisTK: RealignerTargetCreator accepts an aligned BAM input file.
147
148
149 **Outputs**
150
151 The output is in GATK Interval format.
152
153
154 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
155
156 -------
157
158 **Settings**::
159
160 windowSize window size for calculating entropy or SNP clusters
161 mismatchFraction fraction of base qualities needing to mismatch for a position to have high entropy; to disable set to &lt;= 0 or &gt; 1
162 minReadsAtLocus minimum reads at a locus to enable using the entropy calculation
163 maxIntervalSize maximum interval size
164
165 @CITATION_SECTION@
166 </help>
167 </tool>