comparison bismark_methylation_extractor.xml @ 0:62c6da72dd4a draft

Uploaded
author bgruening
date Sat, 06 Jul 2013 09:57:36 -0400
parents
children 91f07ff056ca
comparison
equal deleted inserted replaced
-1:000000000000 0:62c6da72dd4a
1 <tool id="bismark_methylation_extractor" name="Bismark" version="0.7.12">
2 <!-- Wrapper compatible with Bismark version 0.7.7 -->
3 <description>methylation extractor</description>
4 <!--<version_command>bismark_methylation_extractor version</version_command>-->
5 <requirements>
6 <requirement type="set_environment">SCRIPT_PATH</requirement>
7 <requirement type="package" version="0.12.8">bowtie</requirement>
8 <requirement type="package" version="2.0.0-beta7">bowtie2</requirement>
9 </requirements>
10 <parallelism method="basic"></parallelism>
11 <command interpreter="python">
12 bismark_methylation_extractor.py
13
14 --infile $input
15
16 --bismark_path \$SCRIPT_PATH
17
18 #if $singlePaired.sPaired == "single":
19 --single-end
20 #else:
21 --paired-end
22 $singlePaired.no_overlap
23 #end if
24
25 #if str($ignore_bps) != "0":
26 --ignore $ignore_bps
27 #end if
28
29 #if $report:
30 --report-file $o_report
31 #end if
32
33 #if $comprehensive:
34 --comprehensive
35 #end if
36
37 #if $merge_non_cpg:
38 --merge-non-cpg
39 #end if
40
41 #if $compress:
42 --compress $compressed_output
43 #else:
44 #if $comprehensive == False and $merge_non_cpg == False:
45 ##twelfe files
46 --cpg_ot $cpg_ot
47 --chg_ot $chg_ot
48 --chh_ot $chh_ot
49 --cpg_ctot $cpg_ctot
50 --chg_ctot $chg_ctot
51 --chh_ctot $chh_ctot
52 --cpg_ob $cpg_ob
53 --chg_ob $chg_ob
54 --chh_ob $chh_ob
55 --cpg_ctob $cpg_ctob
56 --chg_ctob $chg_ctob
57 --chh_ctob $chh_ctob
58 #elif $merge_non_cpg and $comprehensive:
59 ## two files
60 --non_cpg_context $non_cpg_context
61 --cpg_context $cpg_context
62 #elif $comprehensive:
63 ## three files
64 --cpg_context $cpg_context
65 --chg_context $chg_context
66 --chh_context $chh_context
67 #elif $merge_non_cpg:
68 ## eight files
69 --non_cpg_context_ctot $non_cpg_context_ctot
70 --non_cpg_context_ot $non_cpg_context_ot
71 --non_cpg_context_ob $non_cpg_context_ob
72 --non_cpg_context_ctob $non_cpg_context_ctob
73 --cpg_ot $cpg_ot
74 --cpg_ctot $cpg_ctot
75 --cpg_ob $cpg_ob
76 --cpg_ctob $cpg_ctob
77 #end if
78 ## end compress
79 #end if
80
81 </command>
82 <inputs>
83 <!-- Input Parameters -->
84 <param name="input" type="data" format="sam" label="SAM file from Bismark bisulfid mapper" />
85 <conditional name="singlePaired">
86 <param name="sPaired" type="select" label="Is this library mate-paired?">
87 <option value="single">Single-end</option>
88 <option value="paired">Paired-end</option>
89 </param>
90 <when value="single" />
91 <when value="paired">
92 <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two" help="" />
93 </when>
94 </conditional>
95
96 <param name="ignore_bps" type="integer" value="0" label="Ignore the first N bp when processing the methylation call string" />
97 <param name="comprehensive" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all four possible strand-specific methylation info
98 into context-dependent output files" help="" />
99 <param name="merge_non_cpg" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all non-CpG contexts into one file" help="This will produce eight strand-specific output files, or two output files in comprehensive mode." />
100 <param name="report" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Short methylation summary output" />
101 <param name="compress" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Compress all result files and output one single file" />
102
103 </inputs>
104 <outputs>
105 <!--
106 OT – original top strand
107 CTOT – complementary to original top strand
108 OB – original bottom strand
109 CTOB – complementary to original bottom strand
110 -->
111 <data format="tabular" name="o_report" label="${tool.name} on ${on_string}: Report file">
112 <filter> ( report is True ) </filter>
113 </data>
114
115 <!-- default output 12 files -->
116 <data format="tabular" name="cpg_ot" label="${tool.name} on ${on_string}: CpG original top strand">
117 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
118 </data>
119 <data format="tabular" name="chg_ot" label="${tool.name} on ${on_string}: CHG original top strand">
120 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
121 </data>
122 <data format="tabular" name="chh_ot" label="${tool.name} on ${on_string}: CHH original top strand">
123 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
124 </data>
125 <data format="tabular" name="cpg_ctot" label="${tool.name} on ${on_string}: CpG complementary to top strand">
126 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
127 </data>
128 <data format="tabular" name="chg_ctot" label="${tool.name} on ${on_string}: CHG complementary to top strand">
129 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
130 </data>
131 <data format="tabular" name="chh_ctot" label="${tool.name} on ${on_string}: CHH complementary to top strand">
132 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
133 </data>
134
135 <data format="tabular" name="cpg_ob" label="${tool.name} on ${on_string}: CpG original bottom strand">
136 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
137 </data>
138 <data format="tabular" name="chg_ob" label="${tool.name} on ${on_string}: CHG original bottom strand">
139 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
140 </data>
141 <data format="tabular" name="chh_ob" label="${tool.name} on ${on_string}: CHH original bottom strand">
142 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
143 </data>
144 <data format="tabular" name="cpg_ctob" label="${tool.name} on ${on_string}: CpG complementary to bottom strand">
145 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
146 </data>
147 <data format="tabular" name="chg_ctob" label="${tool.name} on ${on_string}: CHG complementary to bottom strand">
148 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
149 </data>
150 <data format="tabular" name="chh_ctob" label="${tool.name} on ${on_string}: CHH complementary to bottom strand">
151 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter>
152 </data>
153
154 <!-- Context-dependent methylation output files (comprehensive option) -->
155 <data format="tabular" name="cpg_context" label="${tool.name} on ${on_string}: CpG context dependent">
156 <filter> ( compress == False and comprehensive) </filter>
157 </data>
158 <data format="tabular" name="chg_context" label="${tool.name} on ${on_string}: CHG context dependent">
159 <filter> ( compress == False and comprehensive and merge_non_CpG == False) </filter>
160 </data>
161 <data format="tabular" name="chh_context" label="${tool.name} on ${on_string}: CHH context dependent">
162 <filter> ( compress == False and comprehensive and merge_non_CpG == False) </filter>
163 </data>
164
165 <data format="tabular" name="non_cpg_context" label="${tool.name} on ${on_string}: Non CpG context dependent">
166 <filter> ( compress == False and comprehensive and merge_non_cpg) </filter>
167 </data>
168
169 <data format="tabular" name="non_cpg_context_ot" label="${tool.name} on ${on_string}: Non CpG context dependent on original top strand">
170 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
171 </data>
172 <data format="tabular" name="non_cpg_context_ctot" label="${tool.name} on ${on_string}: Non CpG context dependent on complementary to top strand">
173 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
174 </data>
175 <data format="tabular" name="non_cpg_context_ob" label="${tool.name} on ${on_string}: Non CpG context dependent on bottom top strand">
176 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
177 </data>
178 <data format="tabular" name="non_cpg_context_ctob" label="${tool.name} on ${on_string}: Non CpG context dependent on complementary to bottom strand">
179 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter>
180 </data>
181
182 <data format="gzipped" name="compressed_output" label="${tool.name} on ${on_string}: Result archive.">
183 <filter> ( compress ) </filter>
184 </data>
185 </outputs>
186
187 <tests>
188 </tests>
189
190 <help>
191
192 **What it does**
193
194 The following is a brief description of all options to control the Bismark_
195 methylation extractor. The script reads in a bisulfite read alignment results file
196 produced by the Bismark bisulfite mapper and extracts the methylation information
197 for individual cytosines. This information is found in the methylation call field
198 which can contain the following characters:
199
200
201 - X = for methylated C in CHG context (was protected)
202 - x = for not methylated C CHG (was converted)
203 - H = for methylated C in CHH context (was protected)
204 - h = for not methylated C in CHH context (was converted)
205 - Z = for methylated C in CpG context (was protected)
206 - z = for not methylated C in CpG context (was converted)
207 - . = for any bases not involving cytosines
208
209
210 The methylation extractor outputs result files for cytosines in CpG, CHG and CHH
211 context (this distinction is actually already made in Bismark itself). As the methylation
212 information for every C analysed can produce files which easily have tens or even hundreds of
213 millions of lines, file sizes can become very large and more difficult to handle. The C
214 methylation info additionally splits cytosine methylation calls up into one of the four possible
215 strands a given bisulfite read aligned against:
216
217 - OT = original top strand
218 - CTOT = complementary to original top strand
219
220 - OB = original bottom strand
221 - CTOB = complementary to original bottom strand
222
223 Thus, by default twelve individual output files are being generated per input file (unless
224 --comprehensive is specified, see below). The output files can be imported into a genome
225 viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact
226 unless the bisulfite reads were generated preserving directionality it doesn't make any
227 sense to look at the data in a strand-specific manner). Strand-specific output files can
228 optionally be skipped, in which case only three output files for CpG, CHG or CHH context
229 will be generated. For both the strand-specific and comprehensive outputs there is also
230 the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.
231
232
233 .. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/
234
235
236 It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2.
237
238 -------
239
240 **Bismark settings**
241
242 All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin.
243
244 ------
245
246 **Outputs**
247
248 The output files are in the following format (tab delimited)::
249
250
251 Column Description
252 -------- --------------------------------------------------------
253 1 seq-ID
254 2 strand
255 3 chromosome
256 4 position
257 5 methylation call
258
259
260 * Methylated cytosines receive a '+' orientation,
261 * Unmethylated cytosines receive a '-' orientation.
262
263 ------
264
265 **OPTIONS**
266
267 Input::
268
269 -s/--single-end Input file(s) are Bismark result file(s) generated from single-end
270 read data. Specifying either --single-end or --paired-end is
271 mandatory.
272
273 -p/--paired-end Input file(s) are Bismark result file(s) generated from paired-end
274 read data. Specifying either --paired-end or --single-end is
275 mandatory.
276
277 --no_overlap For paired-end reads it is theoretically possible that read_1 and
278 read_2 overlap. This option avoids scoring overlapping methylation
279 calls twice. Whilst this removes a bias towards more methylation calls
280 towards the center of sequenced fragments it can de facto remove
281 a good proportion of the data.
282
283 --ignore INT Ignore the first INT bp at the 5' end of each read when processing the
284 methylation call string. This can remove e.g. a restriction enzyme site
285 at the start of each read.
286
287 Output::
288
289 --comprehensive Specifying this option will merge all four possible strand-specific
290 methylation info into context-dependent output files. The default
291 contexts are:
292 - CpG context
293 - CHG context
294 - CHH context
295
296 --merge_non_CpG This will produce two output files (in --comprehensive mode) or eight
297 strand-specific output files (default) for Cs in
298 - CpG context
299 - non-CpG context
300
301 --report Prints out a short methylation summary as well as the paramaters used to run
302 this script.
303
304
305 </help>
306 </tool>