comparison PileOMeth.xml @ 0:65575e70af7e draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/pileometh commit 033c712216994524fdd120b771052ac4ca9e51c0-dirty
author bgruening
date Fri, 18 Sep 2015 11:39:55 -0400
parents
children d1b66015effd
comparison
equal deleted inserted replaced
-1:000000000000 0:65575e70af7e
1 <tool id="pileometh" name="PileOMeth" version="0.1.5">
2 <description>A tool for processing bisulfite sequencing alignments</description>
3 <requirements>
4 <requirement type="package" version="0.1.5">pileometh</requirement>
5 </requirements>
6 <stdio>
7 <!-- Anything other than zero is an error -->
8 <exit_code range="1:" />
9 <exit_code range=":-1" />
10 <!-- In case the return code has not been set propery check stderr too -->
11 <regex match="Error:" />
12 <regex match="Exception:" />
13 </stdio>
14 <version_command><![CDATA[PileOMeth 2>&1 | head -n 2 | tail -n 1]]></version_command>
15 <command><![CDATA[
16 ln -s $reference_source.ref_file reference.fasta &&
17
18 PileOMeth
19 $main_task.task
20
21 #if $main_task.task == "extract":
22 -o output
23 $main_task.mergeContext
24 #end if
25
26 #if $advanced_options.options=="yes":
27 #if $advanced_options.mbias_regionString:
28 -r $advanced_options.mbias_regionString
29 #end if
30 $advanced_options.keepDupes
31 $advanced_options.keepSingleton
32 -q $advanced_options.min_mapq
33 -p $advanced_options.min_phred
34 -D $advanced_options.max_pbdepth
35 $advanced_options.CHG
36 $advanced_options.CHH
37 #end if
38
39 reference.fasta
40
41 $input_sortedAlignBAM
42
43 #if $main_task.task == "mbias":
44 out_mbias
45 #end if
46 ]]></command>
47 <inputs>
48 <conditional name="reference_source">
49 <param name="reference_source_selector" type="select" label="Load reference genome from">
50 <option value="cached">Local cache</option>
51 <option value="history">History</option>
52 </param>
53 <when value="cached">
54 <param name="ref_file" type="select" label="Using reference genome" help="Reference sequence">
55 <options from_data_table="all_fasta"/>
56 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
57 </param>
58 </when>
59 <when value="history">
60 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" />
61 </when>
62 </conditional>
63
64 <param name="input_sortedAlignBAM" multiple="False" type="data" format="bam" label="sorted_alignments.bam"/>
65 <conditional name="main_task">
66 <param name="task" type="select" label="What do you want to do?" >
67 <option value="extract">Extract methylation metrics from an alignment file in BAM/CRAM format</option>
68 <option value="mbias">Determine the position-dependent methylation bias in a dataset, producing diagnostic SVG images</option>
69 </param>
70 <when value="extract">
71 <param name="mergeContext" type="boolean" checked="false" truevalue="--mergeContext" falsevalue=""
72 label="Merge per-Cytosine metrics from CpG and CHG contexts into per-CPG or per-CHG metrics" help="(--mergeContext)" />
73 </when>
74 <when value="mbias"/>
75 </conditional>
76 <conditional name="advanced_options">
77 <param name="options" type="select" label="Advanced options">
78 <option value="">Hide advanced options</option>
79 <option value="yes">Display advanced options</option>
80 </param>
81 <when value="yes">
82 <param name="mbias_regionString" type="text" value="" label="Region string in which to extract methylation"/>
83 <param name="keepDupes" type="boolean" checked="false" truevalue="--keepDupes" falsevalue=""
84 label="By default, any alignment marked as a duplicate is ignored. This option causes them to be incorporated" />
85 <param name="keepSingleton" type="boolean" checked="false" truevalue="--keepSingleton" falsevalue=""
86 label="By default, if only one read in a pair aligns (a singleton) then it's ignored." />
87 <param name="min_mapq" type="integer" value="10" label="Minimum MAPQ threshold to include an alignment (default 10)"/>
88 <param name="min_phred" type="integer" value="5" label="Minimum Phred threshold to include a base (default 5). This must be >0."/>
89 <param name="max_pbdepth" type="integer" value="2000" label="Maximum per-base depth (default 2000)"/>
90
91 <param name="CHG" type="boolean" checked="false" truevalue="--CHG" falsevalue=""
92 label="Additional output file with CHG methylation metrics" />
93 <param name="CHH" type="boolean" checked="false" truevalue="--CHH" falsevalue=""
94 label="Additional output file with CHH methylation metrics" />
95 </when>
96 <when value=""/>
97 </conditional>
98 </inputs>
99 <outputs>
100 <data name="outFileExtractCpG" format="bedgraph" from_work_dir="output_CpG.bedGraph"
101 label="${tool.name} on ${on_string}">
102 <filter>main_task['task'] == "extract"</filter>
103 </data>
104 <data name="outFileExtractCHG" format="bedgraph" from_work_dir="output_CHG.bedGraph"
105 label="${tool.name} on ${on_string} (CHG)">
106 <filter>main_task['task'] == 'extract'</filter>
107 <filter>advanced_options['options'] == "yes"</filter>
108 <filter>advanced_options['CHG'] == "--CHG"</filter>
109 </data>
110 <data name="outFileExtractCHH" format="bedgraph" from_work_dir="output_CHH.bedGraph"
111 label="${tool.name} on ${on_string} (CHH)">
112 <filter>main_task['task'] == 'extract'</filter>
113 <filter>advanced_options['options'] == "yes"</filter>
114 <filter>advanced_options['CHH'] == "--CHH" </filter>
115 </data>
116 <data name="outFileMbiasCpG" format="svg" from_work_dir="out_mbias_OT.svg"
117 label="${tool.name} on ${on_string} (methylation bias)">
118 <filter>main_task['task'] == 'mbias'</filter>
119 </data>
120 </outputs>
121 <tests>
122 <test>
123 <param name="task" value="extract" />
124 <param name="min_mapq" value="2" />
125 <param name="reference_source_selector" value="history" />
126 <param name="ref_file" value="cg100.fa" ftype="fasta" />
127 <param name="input_sortedAlignBAM" value="cg_aln.bam" ftype="bam"/>
128 <param name="mergeContext" value="false"/>
129 <param name="options" value="yes"/>
130 <output name="outFileExtractCpG" file="test_1.bedGraph" ftype="bedgraph" compare="diff"/>
131 </test>
132 <test>
133 <param name="task" value="mbias" />
134 <param name="min_mapq" value="2" />
135 <param name="options" value="yes"/>
136 <param name="reference_source_selector" value="history" />
137 <param name="ref_file" value="cg100.fa" ftype="fasta" />
138 <param name="input_sortedAlignBAM" value="cg_aln.bam" ftype="bam"/>
139 <output name="outFileMbiasCpG" file="test_2_output.svg" ftype="svg" compare="diff"/>
140 </test>
141 </tests>
142 <help><![CDATA[
143 **What it does**
144
145 PileOMeth (using a PILEup to extract METHylation metrics) will process a coordinate-sorted and indexed BAM or CRAM file containing some form of BS-seq alignments and extract per-base methylation metrics from them. PileOMeth requires an indexed fasta file containing the reference genome as well.
146
147 By default, PileOMeth will only calculate metrics for Cytosines in a CpG context, but metrics for those in CHG and CHH contexts are supported as well.
148
149 **Methylation context**
150
151 PileOMeth groups all Cytosines into one of three sequence contexts: CpG, CHG, and CHH. Here, H is the IUPAC ambiguity code for any nucleotide other than G. If an N is encountered in the reference sequence, then the context will be assigned to CHG or CHH, as appropriate (e.g., CNG would be categorized as in a CHG context and CNC as in a CHH context). If a Cytosine is close enough to the end of a chromosome/contig such that its context can't be inferred, then it is categorized as CHH (e.g., a Cytosine as the last base of a chromosome is considered as being in a CHH context).
152
153
154 **Output information**
155
156 If no methylation can be found, the output will be empty.
157
158 Otherwise a variant of bedGraph that's similar to the "coverage" file is produced. In short, each line consists of 6 tab separated columns:
159
160 1. The chromosome/contig/scaffold name
161 2. The start coordinate
162 3. The end coordinate
163 4. The methylation percentage rounded to an integer
164 5. The number of alignments/pairs reporting methylated bases
165 6. The number of alignments/pairs reporting unmethylated bases
166
167 All coordinates are 0-based half open, which conforms to the bedGraph definition. When paired-end reads are aligned, it can often occur that their alignments overlap. In such cases, PileOMeth will not count both reads of the pair in its output, as doing so would lead to incorrect downstream statistical results.
168
169 An example of the output is below::
170
171 #track type="bedGraph" description="SRR1182519.sorted CpG methylation levels"
172 #1 25115 25116 100 3 0
173 #1 29336 29337 50 1 1
174
175 Note the header line, which starts with "track". The "description" field is used as a label in programs such as IGV. Each of the subsequent lines describe single Cytosines, the 25116th and 29337th base on chromosome 1, respectively. The first position has 3 alignments (or pairs of alignments) indicating methylation and 0 indicating unmethylation (100% methylation) and the second position has 1 alignment each supporting methylation and unmethylation (50% methylation).
176
177 **Per-CpG/CHG metrics**
178
179 In many circumstances, it's desireable for metrics from individual Cytosines in a CpG to be merged, producing per-CpG metrics rather than per-Cytosine metrics. This can be accomplished with the **Merge per-Cytosine** parameter. If this is used, then this output::
180
181 #track type="bedGraph" description="SRR1182519.sorted CpG methylation levels"
182 #1 25114 25115 100 2 1
183 #1 25115 25116 100 3 0
184
185 is changed to this::
186
187 #track type="bedGraph" description="SRR1182519.sorted merged CpG methylation levels"
188 #1 25114 25116 100 5 1
189
190 This also works for CHG-level metrics. If bedGraph files containing per-Cytosine metrics already exist, they can be converted to instead contain per-CpG/CHG metrics with PileOMeth mergeContext.
191
192 **Methylation bias plotting and correction**
193
194 In an ideal experiment, we expect that the probability of observing a methylated C is constant across the length of any given read. In practice, however, there are often increases/decreases in observed methylation rate at the ends of reads and/or more global changes. These are termed methylation bias and including such regions in the extracted methylation metrics will result in noisier and less accurate data. For this reason, users are strongly encouraged to make a methylation bias plot.
195
196 That command will create a methylation bias (mbias for short) plot for each of the strands for which there are valid alignments.
197 The resulting mbias graphs are in SVG format and can be viewed in most modern web browsers:
198
199 .. image:: example.svg
200
201
202 If you have paired-end data, both reads in the pair will be shown separately, as is the case above. The program will suggest regions for inclusion ("--OT 2,0,0,98" above) and mark them on the plot, if applicable. The format of this output is described in PileOMeth extract -h. These suggestions should not be accepted blindly; users are strongly encouraged to have a look for themselves and tweak the actual bounds as appropriate. The lines indicate the average methylation percentage at a given position and the shaded regions the 99.9% confidence interval around it. This is useful in gauging how many methylation calls a given position has relative to its neighbors. Note the spike in methylation at the end of read #2 and the corresponding dip at the beginning of read #1. This is common and these regions can be ignored with the suggested trimming bounds. Note also that the numbers refer to the first and last base that should be included during methylation extraction, not the last and first base to ignore!.
203
204 -----
205
206 **PileOMeth** is a Free and Open Source Software, see more details on the PileOMeth_ Website.
207
208 .. _PileOMeth: https://github.com/dpryan79/PileOMeth
209 ]]></help>
210 <citations>
211 </citations>
212 </tool>