Mercurial > repos > bgruening > bismark
comparison bismark_methylation_extractor.xml @ 0:62c6da72dd4a draft
Uploaded
author | bgruening |
---|---|
date | Sat, 06 Jul 2013 09:57:36 -0400 |
parents | |
children | 91f07ff056ca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:62c6da72dd4a |
---|---|
1 <tool id="bismark_methylation_extractor" name="Bismark" version="0.7.12"> | |
2 <!-- Wrapper compatible with Bismark version 0.7.7 --> | |
3 <description>methylation extractor</description> | |
4 <!--<version_command>bismark_methylation_extractor version</version_command>--> | |
5 <requirements> | |
6 <requirement type="set_environment">SCRIPT_PATH</requirement> | |
7 <requirement type="package" version="0.12.8">bowtie</requirement> | |
8 <requirement type="package" version="2.0.0-beta7">bowtie2</requirement> | |
9 </requirements> | |
10 <parallelism method="basic"></parallelism> | |
11 <command interpreter="python"> | |
12 bismark_methylation_extractor.py | |
13 | |
14 --infile $input | |
15 | |
16 --bismark_path \$SCRIPT_PATH | |
17 | |
18 #if $singlePaired.sPaired == "single": | |
19 --single-end | |
20 #else: | |
21 --paired-end | |
22 $singlePaired.no_overlap | |
23 #end if | |
24 | |
25 #if str($ignore_bps) != "0": | |
26 --ignore $ignore_bps | |
27 #end if | |
28 | |
29 #if $report: | |
30 --report-file $o_report | |
31 #end if | |
32 | |
33 #if $comprehensive: | |
34 --comprehensive | |
35 #end if | |
36 | |
37 #if $merge_non_cpg: | |
38 --merge-non-cpg | |
39 #end if | |
40 | |
41 #if $compress: | |
42 --compress $compressed_output | |
43 #else: | |
44 #if $comprehensive == False and $merge_non_cpg == False: | |
45 ##twelfe files | |
46 --cpg_ot $cpg_ot | |
47 --chg_ot $chg_ot | |
48 --chh_ot $chh_ot | |
49 --cpg_ctot $cpg_ctot | |
50 --chg_ctot $chg_ctot | |
51 --chh_ctot $chh_ctot | |
52 --cpg_ob $cpg_ob | |
53 --chg_ob $chg_ob | |
54 --chh_ob $chh_ob | |
55 --cpg_ctob $cpg_ctob | |
56 --chg_ctob $chg_ctob | |
57 --chh_ctob $chh_ctob | |
58 #elif $merge_non_cpg and $comprehensive: | |
59 ## two files | |
60 --non_cpg_context $non_cpg_context | |
61 --cpg_context $cpg_context | |
62 #elif $comprehensive: | |
63 ## three files | |
64 --cpg_context $cpg_context | |
65 --chg_context $chg_context | |
66 --chh_context $chh_context | |
67 #elif $merge_non_cpg: | |
68 ## eight files | |
69 --non_cpg_context_ctot $non_cpg_context_ctot | |
70 --non_cpg_context_ot $non_cpg_context_ot | |
71 --non_cpg_context_ob $non_cpg_context_ob | |
72 --non_cpg_context_ctob $non_cpg_context_ctob | |
73 --cpg_ot $cpg_ot | |
74 --cpg_ctot $cpg_ctot | |
75 --cpg_ob $cpg_ob | |
76 --cpg_ctob $cpg_ctob | |
77 #end if | |
78 ## end compress | |
79 #end if | |
80 | |
81 </command> | |
82 <inputs> | |
83 <!-- Input Parameters --> | |
84 <param name="input" type="data" format="sam" label="SAM file from Bismark bisulfid mapper" /> | |
85 <conditional name="singlePaired"> | |
86 <param name="sPaired" type="select" label="Is this library mate-paired?"> | |
87 <option value="single">Single-end</option> | |
88 <option value="paired">Paired-end</option> | |
89 </param> | |
90 <when value="single" /> | |
91 <when value="paired"> | |
92 <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two" help="" /> | |
93 </when> | |
94 </conditional> | |
95 | |
96 <param name="ignore_bps" type="integer" value="0" label="Ignore the first N bp when processing the methylation call string" /> | |
97 <param name="comprehensive" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all four possible strand-specific methylation info | |
98 into context-dependent output files" help="" /> | |
99 <param name="merge_non_cpg" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all non-CpG contexts into one file" help="This will produce eight strand-specific output files, or two output files in comprehensive mode." /> | |
100 <param name="report" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Short methylation summary output" /> | |
101 <param name="compress" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Compress all result files and output one single file" /> | |
102 | |
103 </inputs> | |
104 <outputs> | |
105 <!-- | |
106 OT – original top strand | |
107 CTOT – complementary to original top strand | |
108 OB – original bottom strand | |
109 CTOB – complementary to original bottom strand | |
110 --> | |
111 <data format="tabular" name="o_report" label="${tool.name} on ${on_string}: Report file"> | |
112 <filter> ( report is True ) </filter> | |
113 </data> | |
114 | |
115 <!-- default output 12 files --> | |
116 <data format="tabular" name="cpg_ot" label="${tool.name} on ${on_string}: CpG original top strand"> | |
117 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
118 </data> | |
119 <data format="tabular" name="chg_ot" label="${tool.name} on ${on_string}: CHG original top strand"> | |
120 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
121 </data> | |
122 <data format="tabular" name="chh_ot" label="${tool.name} on ${on_string}: CHH original top strand"> | |
123 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
124 </data> | |
125 <data format="tabular" name="cpg_ctot" label="${tool.name} on ${on_string}: CpG complementary to top strand"> | |
126 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
127 </data> | |
128 <data format="tabular" name="chg_ctot" label="${tool.name} on ${on_string}: CHG complementary to top strand"> | |
129 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
130 </data> | |
131 <data format="tabular" name="chh_ctot" label="${tool.name} on ${on_string}: CHH complementary to top strand"> | |
132 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
133 </data> | |
134 | |
135 <data format="tabular" name="cpg_ob" label="${tool.name} on ${on_string}: CpG original bottom strand"> | |
136 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
137 </data> | |
138 <data format="tabular" name="chg_ob" label="${tool.name} on ${on_string}: CHG original bottom strand"> | |
139 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
140 </data> | |
141 <data format="tabular" name="chh_ob" label="${tool.name} on ${on_string}: CHH original bottom strand"> | |
142 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
143 </data> | |
144 <data format="tabular" name="cpg_ctob" label="${tool.name} on ${on_string}: CpG complementary to bottom strand"> | |
145 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
146 </data> | |
147 <data format="tabular" name="chg_ctob" label="${tool.name} on ${on_string}: CHG complementary to bottom strand"> | |
148 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
149 </data> | |
150 <data format="tabular" name="chh_ctob" label="${tool.name} on ${on_string}: CHH complementary to bottom strand"> | |
151 <filter> ( compress == False and comprehensive == False and merge_non_CpG == False) </filter> | |
152 </data> | |
153 | |
154 <!-- Context-dependent methylation output files (comprehensive option) --> | |
155 <data format="tabular" name="cpg_context" label="${tool.name} on ${on_string}: CpG context dependent"> | |
156 <filter> ( compress == False and comprehensive) </filter> | |
157 </data> | |
158 <data format="tabular" name="chg_context" label="${tool.name} on ${on_string}: CHG context dependent"> | |
159 <filter> ( compress == False and comprehensive and merge_non_CpG == False) </filter> | |
160 </data> | |
161 <data format="tabular" name="chh_context" label="${tool.name} on ${on_string}: CHH context dependent"> | |
162 <filter> ( compress == False and comprehensive and merge_non_CpG == False) </filter> | |
163 </data> | |
164 | |
165 <data format="tabular" name="non_cpg_context" label="${tool.name} on ${on_string}: Non CpG context dependent"> | |
166 <filter> ( compress == False and comprehensive and merge_non_cpg) </filter> | |
167 </data> | |
168 | |
169 <data format="tabular" name="non_cpg_context_ot" label="${tool.name} on ${on_string}: Non CpG context dependent on original top strand"> | |
170 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter> | |
171 </data> | |
172 <data format="tabular" name="non_cpg_context_ctot" label="${tool.name} on ${on_string}: Non CpG context dependent on complementary to top strand"> | |
173 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter> | |
174 </data> | |
175 <data format="tabular" name="non_cpg_context_ob" label="${tool.name} on ${on_string}: Non CpG context dependent on bottom top strand"> | |
176 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter> | |
177 </data> | |
178 <data format="tabular" name="non_cpg_context_ctob" label="${tool.name} on ${on_string}: Non CpG context dependent on complementary to bottom strand"> | |
179 <filter> ( compress == False and comprehensive == False and merge_non_cpg) </filter> | |
180 </data> | |
181 | |
182 <data format="gzipped" name="compressed_output" label="${tool.name} on ${on_string}: Result archive."> | |
183 <filter> ( compress ) </filter> | |
184 </data> | |
185 </outputs> | |
186 | |
187 <tests> | |
188 </tests> | |
189 | |
190 <help> | |
191 | |
192 **What it does** | |
193 | |
194 The following is a brief description of all options to control the Bismark_ | |
195 methylation extractor. The script reads in a bisulfite read alignment results file | |
196 produced by the Bismark bisulfite mapper and extracts the methylation information | |
197 for individual cytosines. This information is found in the methylation call field | |
198 which can contain the following characters: | |
199 | |
200 | |
201 - X = for methylated C in CHG context (was protected) | |
202 - x = for not methylated C CHG (was converted) | |
203 - H = for methylated C in CHH context (was protected) | |
204 - h = for not methylated C in CHH context (was converted) | |
205 - Z = for methylated C in CpG context (was protected) | |
206 - z = for not methylated C in CpG context (was converted) | |
207 - . = for any bases not involving cytosines | |
208 | |
209 | |
210 The methylation extractor outputs result files for cytosines in CpG, CHG and CHH | |
211 context (this distinction is actually already made in Bismark itself). As the methylation | |
212 information for every C analysed can produce files which easily have tens or even hundreds of | |
213 millions of lines, file sizes can become very large and more difficult to handle. The C | |
214 methylation info additionally splits cytosine methylation calls up into one of the four possible | |
215 strands a given bisulfite read aligned against: | |
216 | |
217 - OT = original top strand | |
218 - CTOT = complementary to original top strand | |
219 | |
220 - OB = original bottom strand | |
221 - CTOB = complementary to original bottom strand | |
222 | |
223 Thus, by default twelve individual output files are being generated per input file (unless | |
224 --comprehensive is specified, see below). The output files can be imported into a genome | |
225 viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact | |
226 unless the bisulfite reads were generated preserving directionality it doesn't make any | |
227 sense to look at the data in a strand-specific manner). Strand-specific output files can | |
228 optionally be skipped, in which case only three output files for CpG, CHG or CHH context | |
229 will be generated. For both the strand-specific and comprehensive outputs there is also | |
230 the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context. | |
231 | |
232 | |
233 .. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ | |
234 | |
235 | |
236 It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2. | |
237 | |
238 ------- | |
239 | |
240 **Bismark settings** | |
241 | |
242 All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin. | |
243 | |
244 ------ | |
245 | |
246 **Outputs** | |
247 | |
248 The output files are in the following format (tab delimited):: | |
249 | |
250 | |
251 Column Description | |
252 -------- -------------------------------------------------------- | |
253 1 seq-ID | |
254 2 strand | |
255 3 chromosome | |
256 4 position | |
257 5 methylation call | |
258 | |
259 | |
260 * Methylated cytosines receive a '+' orientation, | |
261 * Unmethylated cytosines receive a '-' orientation. | |
262 | |
263 ------ | |
264 | |
265 **OPTIONS** | |
266 | |
267 Input:: | |
268 | |
269 -s/--single-end Input file(s) are Bismark result file(s) generated from single-end | |
270 read data. Specifying either --single-end or --paired-end is | |
271 mandatory. | |
272 | |
273 -p/--paired-end Input file(s) are Bismark result file(s) generated from paired-end | |
274 read data. Specifying either --paired-end or --single-end is | |
275 mandatory. | |
276 | |
277 --no_overlap For paired-end reads it is theoretically possible that read_1 and | |
278 read_2 overlap. This option avoids scoring overlapping methylation | |
279 calls twice. Whilst this removes a bias towards more methylation calls | |
280 towards the center of sequenced fragments it can de facto remove | |
281 a good proportion of the data. | |
282 | |
283 --ignore INT Ignore the first INT bp at the 5' end of each read when processing the | |
284 methylation call string. This can remove e.g. a restriction enzyme site | |
285 at the start of each read. | |
286 | |
287 Output:: | |
288 | |
289 --comprehensive Specifying this option will merge all four possible strand-specific | |
290 methylation info into context-dependent output files. The default | |
291 contexts are: | |
292 - CpG context | |
293 - CHG context | |
294 - CHH context | |
295 | |
296 --merge_non_CpG This will produce two output files (in --comprehensive mode) or eight | |
297 strand-specific output files (default) for Cs in | |
298 - CpG context | |
299 - non-CpG context | |
300 | |
301 --report Prints out a short methylation summary as well as the paramaters used to run | |
302 this script. | |
303 | |
304 | |
305 </help> | |
306 </tool> |