comparison bismark_methyl_extractor/bismark_methylation_extractor.xml @ 7:fcadce4d9a06 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
author bgruening
date Sat, 06 May 2017 13:18:09 -0400
parents
children
comparison
equal deleted inserted replaced
6:0f8646f22b8d 7:fcadce4d9a06
1 <tool id="bismark_methylation_extractor" name="Bismark Meth. Extractor" version="0.16.3">
2
3 <description>Reports on methylation status of reads mapped by Bismark</description>
4 <!--<version_command>bismark_methylation_extractor version</version_command>-->
5
6 <requirements>
7 <requirement type="package" version="0.1.19">samtools</requirement>
8 <requirement type="package" version="2.1.0">bowtie2</requirement>
9 </requirements>
10
11 <parallelism method="basic"></parallelism>
12
13 <command interpreter="python">
14 <![CDATA[
15 bismark_methylation_extractor.py
16
17 --infile "$input"
18
19 ##--bismark_path \$SCRIPT_PATH
20 --bismark_path "$__tool_directory__"
21
22 #if $singlePaired.sPaired == "single":
23 --single-end
24 #else:
25 --paired-end
26 $singlePaired.no_overlap
27 #end if
28
29 #if str( $singlePaired['ignore_r1'] ) != "0":
30 --ignore $singlePaired['ignore_r1']
31 #end if
32 #if str( $singlePaired['ignore_3prime_r1'] ) != "0":
33 --ignore_3prime $singlePaired['ignore_3prime_r1']
34 #end if
35
36 #if $singlePaired.sPaired == "paired":
37 #if str( $singlePaired['ignore_r2'] ) != "0":
38 --ignore_r2 $singlePaired['ignore_r2']
39 #end if
40 #if str( $singlePaired['ignore_3prime_r2'] ) != "0":
41 --ignore_3prime_r2 $singlePaired['ignore_3prime_r2']
42 #end if
43 #end if
44
45 #if $splitting_report:
46 --splitting_report "$output_splitting_report"
47 #end if
48
49 #if $mbias_report:
50 --mbias_report "$output_mbias_report"
51 #end if
52
53 #if $cytosine_report['cytosine_report_selector']:
54 --cytosine_report "$output_cytosine_report"
55 --genome_file "${cytosine_report.built_in_fasta.fields.path}"
56 #if not $cytosine_report['cpg_context']:
57 --cx_context
58 #end if
59 #end if
60
61 #if $output_settings['comprehensive']:
62 --comprehensive
63 #end if
64
65 #if $output_settings['merge_non_cpg']:
66 --merge-non-cpg
67 #end if
68
69 --compress "$compressed_output"
70 ]]>
71 </command>
72
73 <inputs>
74 <!-- Input Parameters -->
75 <param name="input" type="data" format="sam,bam" label="SAM/BAM file from Bismark bisulfite mapper" />
76 <conditional name="singlePaired">
77 <param name="sPaired" type="select" label="Is this library mate-paired?">
78 <option value="single">Single-end</option>
79 <option value="paired">Paired-end</option>
80 </param>
81 <when value="single">
82 <param name="ignore_r1" type="integer" value="0" label="Ignore the first N bp from the 5’ end of single-end read when processing the methylation call string." />
83 <param name="ignore_3prime_r1" type="integer" value="0" label="Ignore the last N bp from the 3' end of single-end read when processing the methylation call string."/>
84 </when>
85 <when value="paired">
86 <param name="ignore_r1" type="integer" value="0" label="Ignore the first N bp from the 5’ end of Read 1 when processing the methylation call string." />
87 <param name="ignore_3prime_r1" type="integer" value="0" label="Ignore the last N bp from the 3' end of Read 1 when processing the methylation call string."/>
88 <param name="ignore_r2" type="integer" value="0" label="Ignore the first N bp from the 5' end of Read 2 of paired-end sequencing results" />
89 <param name="ignore_3prime_r2" type="integer" value="0" label="Ignore the last N bp from the 3' end of Read 2 of paired-end sequencing results"/>
90 <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two" help="" />
91 </when>
92 </conditional>
93 <param name="splitting_report" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Short methylation summary output (Splitting Report)" />
94 <param name="mbias_report" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Methylation proportion report for each possible position in the read (Mbias Report)" />
95 <conditional name="cytosine_report">
96 <param name="cytosine_report_selector" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Genome-wide methylation report for all cytosines in the genome (Cytosine Report)" />
97 <when value="true">
98 <param name="built_in_fasta" type="select" label="Select the Reference Genome (fasta) used for the mapping">
99 <options from_data_table="all_fasta">
100 <filter type="sort_by" column="name"/>
101 <validator type="no_options" message="No genomes in fasta are available for the selected input dataset"/>
102 </options>
103 </param>
104 <param name="cpg_context" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Reports Cpg Context only" />
105 </when>
106 <when value="false"></when>
107 </conditional>
108 <section name="output_settings" title="Advanced output settings" expanded="False">
109 <param name="comprehensive" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Merge all four possible strand-specific methylation info
110 into context-dependent output files" help="" />
111 <param name="merge_non_cpg" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all non-CpG contexts into one file" help="This will produce eight strand-specific output files, or two output files in comprehensive mode." />
112 </section>
113 </inputs>
114 <outputs>
115 <!--
116 OT – original top strand
117 CTOT – complementary to original top strand
118 OB – original bottom strand
119 CTOB – complementary to original bottom strand
120 -->
121 <data format="tabular" name="output_splitting_report" label="${tool.name} on ${on_string}: Splitting Report">
122 <filter> ( splitting_report is True ) </filter>
123 </data>
124 <data format="txt" name="output_mbias_report" label="${tool.name} on ${on_string}: Mbias Report">
125 <filter> ( mbias_report is True ) </filter>
126 </data>
127 <data format="txt" name="output_cytosine_report" label="${tool.name} on ${on_string}: Genome-wide methylation report.">
128 <filter> ( cytosine_report['cytosine_report_selector'] ) </filter>
129 </data>
130 <data format="gzipped" name="compressed_output" label="${tool.name} on ${on_string}: Result archive.">
131 <filter> ( output_settings['compress'] ) </filter>
132 </data>
133 </outputs>
134
135 <tests>
136 </tests>
137
138 <help>
139 <![CDATA[
140
141 **What it does**
142
143 | The following is a brief description of all options to control the Bismark_methylation extractor. The script reads in a bisulfite read alignment results file produced by the Bismark bisulfite mapper and extracts the methylation information for individual cytosines. This information is found in the methylation call field which can contain the following characters:
144 |
145
146
147 - X = for methylated C in CHG context (was protected)
148 - x = for not methylated C CHG (was converted)
149 - H = for methylated C in CHH context (was protected)
150 - h = for not methylated C in CHH context (was converted)
151 - Z = for methylated C in CpG context (was protected)
152 - z = for not methylated C in CpG context (was converted)
153 - . = for any bases not involving cytosines
154
155 | The methylation extractor outputs result files for cytosines in CpG, CHG and CHH context (this distinction is actually already made in Bismark itself). As the methylation information for every C analysed can produce files which easily have tens or even hundreds of millions of lines, file sizes can become very large and more difficult to handle. The C methylation info additionally splits cytosine methylation calls up into one of the four possible strands a given bisulfite read aligned against:
156 |
157
158 - OT = original top strand
159 - CTOT = complementary to original top strand
160
161 - OB = original bottom strand
162 - CTOB = complementary to original bottom strand
163
164 | Thus, by default twelve individual output files are being generated per input file (unless --comprehensive is specified, see below). The output files can be imported into a genome viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact unless the bisulfite reads were generated preserving directionality it doesn't make any sense to look at the data in a strand-specific manner). Strand-specific output files can optionally be skipped, in which case only three output files for CpG, CHG or CHH context will be generated. For both the strand-specific and comprehensive outputs there is also the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.
165 |
166 | It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2.
167
168 .. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/
169
170 ----
171
172 **Outputs**
173
174 - The output files are in the following format (tab delimited)::
175
176
177 Column Description
178 -------- --------------------------------------------------------
179 1 seq-ID
180 2 strand
181 3 chromosome
182 4 position
183 5 methylation call
184
185
186 - Methylated cytosines receive a '+' orientation,
187 - Unmethylated cytosines receive a '-' orientation.
188
189 ----
190
191 **Note on Bismark settings**
192
193 | All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin.
194
195 ----
196
197 **Settings**
198
199 * **If Single-End Reads**
200
201 * **Ignore the first N bp from the 5’ end of single-end read when processing the methylation call string.**
202
203 | This can remove e.g. a restriction enzyme site at the start of each read or any other source of bias (e.g. PBAT-Seq data).
204 |
205 | *Input option --ignore <INT>*
206
207 * **Ignore the last N bp from the 3' end of single-end read when processing the methylation call string.**
208
209 | This can remove unwanted biases from the end of reads.
210 |
211 | *Input option --ignore_3prime <INT>*
212
213 * **If Paired-End Reads**
214
215 * **Ignore the first N bp from the 5’ end of Read 1 when processing the methylation call string**
216
217 | This can remove e.g. a restriction enzyme site at the start of each read or any other source of bias (e.g. PBAT-Seq data).
218 |
219 | *Input option --ignore <INT>*
220
221 * **Ignore the last N bp from the 3’ end of Read 1 when processing the methylation call string**
222
223 | This can remove unwanted biases from the end of reads.
224 |
225 | *Input option --ignore_3prime <INT>*
226
227 * **Ignore the first N bp from the 5' end of Read 2 of paired-end sequencing results**
228
229 | Since the first couple of bases in Read 2 of BS-Seq experiments show a severe bias towards non-methylation as a result of end-repairing sonicated fragments with unmethylated cytosines (see M-bias plot), it is recommended that the first couple of bp of Read 2 are removed before starting downstream analysis. Please see the section on M-bias plots in the Bismark User Guide for more details. The options --ignore <int> and --ignore_r2 <int> can be combined in any desired way.
230 |
231 | *Input option --ignore_r2*
232
233 * **Ignore the last N bp from the 3' end of Read 2 of paired-end sequencing results**
234
235 | This can remove unwanted biases from the end of reads.
236 |
237 | *Input option --ignore_3prime_r2*
238
239 * **This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two**
240
241 | For paired-end reads it is theoretically possible that read_1 and read_2 overlap. This option avoids scoring overlapping methylation calls twice (only methylation calls of read 1 are used for in the process since read 1 has historically higher quality basecalls than read 2). Whilst this option removes a bias towards more methylation calls in the center of sequenced fragments it may de facto remove a sizable proportion of the data. This option is highly recommended for paired-end data.
242 |
243 | *Input option --no_overlap*
244
245 * **Short methylation summary output (Splitting Report)**
246
247 | Prints out a short methylation summary as well as the paramaters used to run this script.
248 |
249 | *Output option --report*
250
251 * **Methylation proportion report for each possible position in the read (Mbias Report)**
252
253 | This report shows the methylation proportion across each possible position in the read (described in further detail in:Hansen et al., Genome Biology, 2012, 13:R83). The data for the M-bias plot is also written into a text file and is in the following format:
254 |
255 | <read position> <count methylated> <count unmethylated> <% methylation> <total coverage>
256 |
257 | This allows generating nice graphs by alternative means, e.g. using R or Excel
258
259 * **Genome-wide methylation report for all cytosines in the genome**
260
261 | the option --cytosine_report produces a genome-wide methylation report for all cytosines in the genome.
262
263 * **If CpG Context only**
264
265 | the output uses 1-based chromosome coordinates (zero-based cords are optional) and reports CpG context only (all cytosine context is optional). The output considers all Cs on both forward and reverse strands and reports their position, strand, trinucleotide content and methylation state (counts are 0 if not covered).
266 |
267 | *Genome-wide cytosine methylation report specific option --bedgraph --cytosine_report --genome_folder <path>*
268
269 * **If not CpG Context only**
270
271 | The output file contains information on every single cytosine in the genome irrespective of its context. This applies to both forward and reverse strands. Please be aware that this will generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse. Default: OFF (i.e. Default = CpG context only).
272 |
273 | *Genome-wide cytosine methylation report specific option --bedgraph --CX_context --cytosine_report --CX_context --genome_folder <path>*
274
275 * **Merge all four possible strand-specific methylation info into context-dependent output files**
276
277 | Specifying this option will merge all four possible strand-specific methylation info into context-dependent output files. The default contexts are:
278 | - CpG context
279 | - CHG context
280 | - CHH context
281 |
282 | *Output option --comprehensive*
283
284 * **Merge all non-CpG contexts into one file**
285
286 | This will produce two output files (in --comprehensive mode) or eight strand-specific output files (default) for Cs in
287 | - CpG context
288 | - non-CpG context
289 |
290 | *Output option --merge_non_CpG*
291
292 * **Compress all result files and output one single file**
293
294 | The methylation extractor files (CpG_OT..., CpG_OB... etc) will be written out in a GZIP compressed form to save disk space. This option does not work on bedGraph and genome-wide cytosine reports as they are 'tiny' anyway.
295 |
296 | *Output option --gzip*
297
298 ]]>
299 </help>
300
301 <citations>
302 <citation type="doi">10.1093/bioinformatics/btr167</citation>
303 </citations>
304 </tool>