annotate realigner_target_creator.xml @ 15:01ff8dd37d4d draft default tip

Uploaded
author lz_hust
date Sat, 01 Jun 2019 07:20:41 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
1 <tool id="gatk2_realigner_target_creator" name="Realigner Target Creator" version="@VERSION@.1">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
2 <description>for use in local realignment</description>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
3 <macros>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
4 <import>gatk2_macros.xml</import>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
5 </macros>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
6 <expand macro="requirements" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
7 <expand macro="version_command" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
8 <command interpreter="python">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
9 gatk2_wrapper.py
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
10 --stdout "${output_log}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
11 -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
12 #if str( $reference_source.input_bam.metadata.bam_index ) != "None":
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
13 -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
14 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
15 -p '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
16 @JAR_PATH@
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
17 -T "RealignerTargetCreator"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
18 -o "${output_interval}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
19
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
20 \$GATK2_SITE_OPTIONS
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
21
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
22 ## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
23 --num_cpu_threads_per_data_thread 1
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
24
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
25 @THREADS@
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
26
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
27 #if $reference_source.reference_source_selector != "history":
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
28 -R "${reference_source.ref_file.fields.path}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
29 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
30 '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
31 #set $rod_binding_names = dict()
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
32 #for $rod_binding in $rod_bind:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
33 #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
34 #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
35 #else
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
36 #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
37 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
38 #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
39 -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
40 #end for
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
41
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
42 $allow_n_cigar_reads
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
43 #include source=$standard_gatk_options#
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
44 ##start analysis specific options
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
45 #if $analysis_param_type.analysis_param_type_selector == "advanced":
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
46 -p '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
47 --minReadsAtLocus "${analysis_param_type.minReadsAtLocus}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
48 --windowSize "${analysis_param_type.windowSize}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
49 --mismatchFraction "${analysis_param_type.mismatchFraction}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
50 --maxIntervalSize "${analysis_param_type.maxIntervalSize}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
51 '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
52 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
53 </command>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
54 <inputs>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
55 <conditional name="reference_source">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
56 <expand macro="reference_source_selector_param" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
57 <when value="cached">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
58 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
59 <validator type="unspecified_build" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
60 <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
61 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
62 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" >
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
63 <options from_data_table="gatk2_picard_indexes">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
64 <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
65 </options>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
66 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
67 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
68 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
69 <when value="history">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
70 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
71 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
72 <options>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
73 <filter type="data_meta" key="dbkey" ref="input_bam" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
74 </options>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
75 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
76 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
77 </conditional>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
78
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
79 <repeat name="rod_bind" title="Known Variants" help="Using data sets of known variants (-known,--known &amp;lt;known&amp;gt;)">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
80 <conditional name="rod_bind_type">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
81 <param name="rod_bind_type_selector" type="select" label="Variant Type">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
82 <option value="dbsnp" selected="True">dbSNP</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
83 <option value="snps">SNPs</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
84 <option value="indels">INDELs</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
85 <option value="custom">Custom</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
86 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
87 <when value="dbsnp">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
88 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
89 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
90 <when value="snps">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
91 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
92 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
93 <when value="indels">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
94 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
95 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
96 <when value="custom">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
97 <param name="custom_rod_name" type="text" value="Unknown" label="Customer's variant file" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
98 <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
99 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
100 </conditional>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
101 </repeat>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
102
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
103 <expand macro="allow_n_cigar_reads" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
104 <expand macro="gatk_param_type_conditional" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
105
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
106 <expand macro="analysis_type_conditional">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
107 <param name="windowSize" type="integer" value="10" label="Window size for calculating entropy or SNP clusters (windowSize)"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
108 help="-window,--windowSize &amp;lt;windowSize&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
109 <param name="mismatchFraction" type="float" value="0.15" label="Fraction of base qualities needing to mismatch for a position to have high entropy (mismatchFraction)"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
110 help="to disable set to &lt;= 0 or &gt; 1 (-mismatch,--mismatchFraction &amp;lt;mismatchFraction&amp;gt;)"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
111 <param name="minReadsAtLocus" type="integer" value="4" label="Minimum reads at a locus to enable using the entropy calculation (minReadsAtLocus)"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
112 help="-minReads,--minReadsAtLocus &amp;lt;minReadsAtLocus&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
113 <param name="maxIntervalSize" type="integer" value="500" label="Maximum interval size" help="-maxInterval,--maxIntervalSize &amp;lt;maxIntervalSize&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
114 </expand>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
115 </inputs>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
116 <outputs>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
117 <data format="gatk_interval" name="output_interval" label="${tool.name} on ${on_string} (GATK intervals)" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
118 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
119 </outputs>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
120 <tests>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
121 <test>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
122 <param name="reference_source_selector" value="history" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
123 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
124 <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
125 <param name="rod_bind_type_selector" value="dbsnp" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
126 <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
127 <param name="gatk_param_type_selector" value="basic" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
128 <param name="analysis_param_type_selector" value="advanced" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
129 <param name="windowSize" value="10" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
130 <param name="mismatchFraction" value="0.15" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
131 <param name="minReadsAtLocus" value="4" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
132 <param name="maxIntervalSize" value="500" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
133 <output name="output_interval" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.gatk_interval" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
134 <output name="output_log" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.log.contains" compare="contains"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
135 </test>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
136 </tests>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
137 <help>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
138 **What it does**
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
139
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
140 Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
141
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
142 For more information on local realignment around indels using the GATK, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_indels_RealignerTargetCreator.html&gt;`_.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
143
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
144 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
145
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
146 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
147
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
148 ------
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
149
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
150 **Inputs**
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
151
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
152 GenomeAnalysisTK: RealignerTargetCreator accepts an aligned BAM input file.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
153
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
154
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
155 **Outputs**
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
156
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
157 The output is in GATK Interval format.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
158
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
159
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
160 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
161
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
162 -------
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
163
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
164 **Settings**::
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
165
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
166 windowSize window size for calculating entropy or SNP clusters
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
167 mismatchFraction fraction of base qualities needing to mismatch for a position to have high entropy; to disable set to &lt;= 0 or &gt; 1
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
168 minReadsAtLocus minimum reads at a locus to enable using the entropy calculation
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
169 maxIntervalSize maximum interval size
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
170
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
171 @CITATION_SECTION@
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
172 </help>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
173 <expand macro="citations" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
174 </tool>