comparison moabs.xml @ 1:8c8cc81b34cd draft

"planemo upload for repository https://github.com/sunnyisgalaxy/moabs commit 6a45aa4c34b4f3b73ab0c6c3d9e7a315b62bf761"
author iuc
date Sat, 11 Apr 2020 04:14:34 -0400
parents 26d7ec4af119
children 214874e24cd6
comparison
equal deleted inserted replaced
0:26d7ec4af119 1:8c8cc81b34cd
1 <tool id="moabs" name="MOABS" profile="16.04" version="@VERSION@"> 1 <tool id="moabs" name="MOABS" profile="16.04" version="@VERSION@+galaxy1">
2 <description>MOdel based Analysis of Bisulfite Sequencing data</description> 2 <description>MOdel based Analysis of Bisulfite Sequencing data</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements"/>
9 #if str( $mcomp_advanced.doComp.compare_selector ) == "0": 9 #if str( $mcomp_advanced.doComp.compare_selector ) == "0":
10 cp -f '$mcomp_advanced.doComp.compFile' comp.g1.vs.g2.txt && 10 cp -f '$mcomp_advanced.doComp.compFile' comp.g1.vs.g2.txt &&
11 #end if 11 #end if
12 moabs -v 1 --def MMAP.p="\${GALAXY_SLOTS:-4}" --def MCALL.p="\${GALAXY_SLOTS:-4}" --def MCOMP.p="\${GALAXY_SLOTS:-4}" --cf '$cfg_file' && 12 moabs -v 1 --def MMAP.p="\${GALAXY_SLOTS:-4}" --def MCALL.p="\${GALAXY_SLOTS:-4}" --def MCOMP.p="\${GALAXY_SLOTS:-4}" --cf '$cfg_file' &&
13 #if "1" in $output_selector: 13 #if "1" in $output_selector:
14 cp -f dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr '$output1' && 14 cp -f dmc_M2_g1.G.bed_vs_g2.G.bed.txt '$output1' &&
15 #end if 15 #end if
16 #if "2" in $output_selector: 16 #if "2" in $output_selector:
17 cp -f comp.g1.vs.g2.txt '$output2' && 17 cp -f dmr_M2_g1.G.bed_vs_g2.G.bed.txt '$output2' &&
18 #end if
19 #if "3" in $output_selector:
20 cp -f comp.g1.vs.g2.txt '$output3' &&
18 #end if 21 #end if
19 echo Done 22 echo Done
20 ]]> 23 ]]>
21 </command> 24 </command>
22 <configfiles> 25 <configfiles>
60 Path=bsmap 63 Path=bsmap
61 d='${reference_fasta_filename}' 64 d='${reference_fasta_filename}'
62 #if str( $bsmap_advanced.bsmap_mismatch.bsmap_mismatch_selector ) != "0": 65 #if str( $bsmap_advanced.bsmap_mismatch.bsmap_mismatch_selector ) != "0":
63 v=$bsmap_advanced.bsmap_mismatch.v 66 v=$bsmap_advanced.bsmap_mismatch.v
64 #end if 67 #end if
68 s=$bsmap_advanced.s
69 w=$bsmap_advanced.w
70 #if $bsmap_advanced.D:
71 D=$bsmap_advanced.D
72 #end if
73 S=$bsmap_advanced.S
65 n=$bsmap_advanced.n 74 n=$bsmap_advanced.n
75 q=$bsmap_advanced.q
76 z=$bsmap_advanced.z
77 f=$bsmap_advanced.f
78 #if $bsmap_advanced.A:
79 A=$bsmap_advanced.A
80 #end if
66 r=$bsmap_advanced.r 81 r=$bsmap_advanced.r
67 R='' 82 #if str( $bsmap_advanced.R ) == "1":
83 R=''
84 #end if
85 #if str( $bsmap_advanced.u ) == "1":
86 u=''
87 #end if
88 m=$bsmap_advanced.m
89 x=$bsmap_advanced.x
68 90
69 [MCALL] 91 [MCALL]
70 Path=mcall 92 Path=mcall
71 r='${reference_fasta_filename}' 93 r='${reference_fasta_filename}'
94 cytosineMinScore=$mcall_advanced.cytosineMinScore
95 nextBaseMinScore=$mcall_advanced.nextBaseMinScore
96 qualityScoreBase=$mcall_advanced.qualityScoreBase
97 trimWGBSEndRepairPE2Seq=$mcall_advanced.trimWGBSEndRepairPE2Seq
98 trimWGBSEndRepairPE1Seq=$mcall_advanced.trimWGBSEndRepairPE1Seq
99 processPEOverlapSeq=$mcall_advanced.processPEOverlapSeq
100 trimRRBSEndRepairSeq=$mcall_advanced.trimRRBSEndRepairSeq
101 minFragSize=$mcall_advanced.minFragSize
102 minMMFragSize=$mcall_advanced.minMMFragSize
103 reportCpX=$mcall_advanced.reportCpX
72 104
73 [MCOMP] 105 [MCOMP]
74 Path=mcomp 106 Path=mcomp
75 reference='${reference_fasta_filename}'
76 doComp=$mcomp_advanced.doComp.compare_selector 107 doComp=$mcomp_advanced.doComp.compare_selector
108 d=$mcomp_advanced.d
109 filterCredibleDif=$mcomp_advanced.filterCredibleDif
110 pFetDmc=$mcomp_advanced.pFetDmc
111 pFetDmr=$mcomp_advanced.pFetDmr
112 minNominalDif=$mcomp_advanced.minNominalDif
113 minCredibleDif=$mcomp_advanced.minCredibleDif
114 minDmcsInDmr=$mcomp_advanced.minDmcsInDmr
115 maxDistConsDmcs=$mcomp_advanced.maxDistConsDmcs
77 </configfile> 116 </configfile>
78 </configfiles> 117 </configfiles>
79 118
80 <inputs> 119 <inputs>
81 <conditional name="reference_source"> 120 <conditional name="reference_source">
132 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> 171 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
133 </when> 172 </when>
134 </conditional> 173 </conditional>
135 </repeat> 174 </repeat>
136 <section name="bsmap_advanced" title="Advanced options for BSMAP" expanded="False"> 175 <section name="bsmap_advanced" title="Advanced options for BSMAP" expanded="False">
176 <param argument="-s" type="integer" value="16" min="8" max="16" label="Seed size" help="The seed size for the HASH table. BSMAP implements a seeding algorithm by indexing reference for all possible k-mers, i.e. seeds. As for the seed size, i.e. the length of k-mers, 16 is suggested for the WGBS mode, and 12 is suggested for the RRBS mode. Min=8, max=16."/>
137 <conditional name="bsmap_mismatch"> 177 <conditional name="bsmap_mismatch">
138 <param name="bsmap_mismatch_selector" type="select" label="Set the mismatch rate or number?" help=""> 178 <param name="bsmap_mismatch_selector" type="select" label="Set the mismatch rate or number?" help="When `Do not set` selected, BSMAP will allow a default suggested 8% mismatch rate. Otherwise, a customized mismatch can be controlled by specifying a mismatch rate or a mismatch number.">
139 <option value="0">Do not set</option> 179 <option value="0" selected="true">Do not set</option>
140 <option value="1">Set the mismatch rate</option> 180 <option value="1">Set the mismatch rate</option>
141 <option value="2">Set the mismatch number</option> 181 <option value="2">Set the mismatch number</option>
142 </param> 182 </param>
143 <when value="1"> 183 <when value="1">
144 <param argument="-v" type="float" value="0.08" min="0" max="1" label="Mismatch rate" help="The mismatch rate w.r.t to the read length"/> 184 <param argument="-v" type="float" value="0.08" min="0" max="1" label="Mismatch rate" help="The mismatch rate w.r.t to the read length"/>
145 </when> 185 </when>
146 <when value="2"> 186 <when value="2">
147 <param argument="-v" type="integer" value="3" min="0" label="Mismatch number" help="The maximum number of mismatches allowed on a read"/> 187 <param argument="-v" type="integer" value="3" min="0" label="Mismatch number" help="The maximum number of mismatches allowed on a read"/>
148 </when> 188 </when>
149 </conditional> 189 </conditional>
150 <param argument="-n" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Mapping to four strands?" help="Yes: map SE or PE reads to all 4 strands, i.e. ++, +-, -+, --; No: only map to 2 forward strands, i.e. BSW(++) and BSC(-+)"/> 190 <param argument="-w" type="integer" value="1000" min="0" max="1000" label="Maximum number of equal best hits to count" help="Maximum number of equal best hits to count. When multiple mapping occurs for a read, it should control the number of records to report. Default: 1000."/>
151 <param argument="-r" type="select" label="How to report repeat hits" help="0=none(unique hit/pair); 1=random one; 2=all(slow)"> 191 <param argument="-D" type="text" label="Restriction enzyme digestion sites for RRBS mode" help="For RRBS data, this option activates RRBS mapping mode and set restriction enzyme digestion sites. Digestion position marked by '-', example: -D C-CGG for MspI digestion. default: none (WGBS mode).">
192 <validator type="regex" message="Use A/T/C/G/- for restriction enzyme digestion sites">^[ATCG-]*$</validator>
193 </param>
194 <param argument="-S" type="integer" value="0" label="Random seed" help="Seed for random number generation used in selecting multiple hits. Other seed values generate pseudo random number based on read index number, to allow reproducible mapping results. Default=0 (get seed from system clock, mapping results not resproducible)."/>
195 <param argument="-n" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Mapping to four strands?" help="Yes: map SE or PE reads to all 4 strands, i.e. ++, +-, -+, --; No: only map to 2 forward strands, i.e. BSW(++) and BSC(-+). For example, for a traditional library construction, two forward strands, ++ and -+, are sufficient for alignments. However, a Pico library construction needs all four-strand mappings."/>
196 <param argument="-q" type="integer" value="0" min="0" max="40" label="Quality threshold in trimming" help="The quality threshold to trim read bases. To obtain an accurate mapping, low-quality bases should be skipped beforehand. Min=0, max=40. Default=0 (no trim)."/>
197 <param argument="-z" type="select" label="Base quality" help="Base quality for sequencing reads, Illumina or Sanger.">
198 <option value="33" selected="true">Sanger</option>
199 <option value="64">Illumina</option>
200 </param>
201 <param argument="-f" type="integer" value="5" min="0" label="Maximum number of Ns in a read to filter out" help="To filter out low-quality reads containing >n Ns. Default=5."/>
202 <param argument="-A" type="text" label="3' adapter sequence to trim" help="To trim 3' adapter sequence. Default: none (no trim).">
203 <validator type="regex" message="Use A/T/C/G for adapter sequences, and the length should be greater equal to 12 bases.">^[ATCG]{12,}$|^$</validator>
204 </param>
205 <param argument="-r" type="select" label="How to report repeat hits" help="0=none(unique hit/pair); 1=random one; 2=all(slow). When input reads coverage is high, it is suggested to report only unique hits (r=0). For a low-depth library, a random one from multiple mappings (r=1), or all multiple mappings (r=2) can be specified to increase read coverage, yet be cautious about bias caused by ambiguous mappings.">
152 <option value="0" selected="true">0</option> 206 <option value="0" selected="true">0</option>
153 <option value="1">1</option> 207 <option value="1">1</option>
154 <option value="2">2</option> 208 <option value="2">2</option>
155 </param> 209 </param>
210 <param argument="-R" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Print corresponding reference sequences?" help="Yes: print corresponding reference sequences in mapping records, a `RS:` tag will be added in record attributes; No: do not print reference sequences."/>
211 <param argument="-u" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Report unmapped reads?" help="Yes: print unmapped reads; No: do not print unmapped reads."/>
212 <param argument="-m" type="integer" value="28" min="0" label="Minimal insert size allowed in paired-end mapping" help="For paired-end reads, the minimal insert size allowed in two end mapping. Default=28."/>
213 <param argument="-x" type="integer" value="1000" min="0" label="Maximal insert size allowed in paired-end mapping" help="For paired-end reads, the maximal insert size allowed in two end mapping. Default=1000."/>
214 </section>
215 <section name="mcall_advanced" title="Advanced options for MCALL" expanded="False">
216 <param argument="--cytosineMinScore" type="integer" value="20" min="0" label="Threshold for cytosine quality score" help="Threshold for cytosine quality score. Discard the base if threshold is not reached. Default=20."/>
217 <param argument="--nextBaseMinScore" type="integer" value="3" min="-1" label="Threshold for the next base quality score" help="Threshold for the next base quality score. Possible values: -1 makes the program not to check if next base matches reference; any positive integer or zero makes the program to check if next base matches reference and reaches this score threshold; default=3, i.e., better than 'B' or '#'."/>
218 <param argument="--qualityScoreBase" type="select" label="Specify the quality score system" help="Sanger, Solexa, or Illumina. See wiki FASTQ_format for details. Default: auto-detection.">
219 <option value="0" selected="true">Auto-detection</option>
220 <option value="33">Sanger</option>
221 <option value="59">Solexa</option>
222 <option value="64">Illumina</option>
223 </param>
224 <param argument="--trimWGBSEndRepairPE2Seq" type="integer" value="3" min="0" label="Bases to trim end-repair sequences from +-/--" help="To trim end-repair sequence from begin of +-/-- reads from Pair End WGBS Sequencing. 0: no trim; n (positive integer): trim n bases from begin of +-/-- reads. Default: trim 3 bases."/>
225 <param argument="--trimWGBSEndRepairPE1Seq" type="integer" value="3" min="0" label="Bases to trim end-repair sequences from ++/-+" help="To trim end-repair sequence from end of ++/-+ reads from Pair End WGBS Sequencing. 0: no trim; n (positive integer): trim n bases from end of ++/-+ reads. Default: trim 3 bases."/>
226 <param argument="--processPEOverlapSeq" type="select" label="Count once or twice the overlap sequence of two pairs" help="Two ends of paired-end reads may be overlapped in mapping. The overlap sequencce will be counted once or twice for cytosine methylation measurements. Default: once.">
227 <option value="1" selected="true">Once</option>
228 <option value="0">Twice</option>
229 </param>
230 <param argument="--trimRRBSEndRepairSeq" type="select" label="How to trim end-repair sequence for RRBS reads?" help="To trim end-repair sequence for RRBS reads. 0: no trim; 2: trim the last CG at exactly end of ++/-+ reads and trim the first CG at exactly begin of +-/-- reads like the WGBS situation. Default=2.">
231 <option value="2" selected="true">2</option>
232 <option value="0">0</option>
233 </param>
234 <param argument="--minFragSize" type="integer" value="0" min="0" label="Minimal fragment size for properly mapped reads" help="To retain properly mapped and large enough fragment sizes. The 9th field in the BAM file is the fragment size of the mapping, and non-properly-paired read has 0 at the 9th field. This option is set to require properly paired and large enough fragment size. Default=0 for all records."/>
235 <param argument="--minMMFragSize" type="integer" value="0" min="0" label="Minimal fragment size for multiply matched read" help="Same as --minFragSize but this option is only applicable to reads with flag 0x100 set as 1, i.e., reads multiply mapped. Default=0 for all records."/>
236 <param argument="--reportCpX" type="select" label="Generates CpG/A/C/T methylation?" help="To generate methylation for CpG, or CpA/CpC/CpT. Default=CpG.">
237 <option value="G" selected="true">CpG</option>
238 <option value="C">CpC</option>
239 <option value="A">CpA</option>
240 <option value="T">CpT</option>
241 </param>
156 </section> 242 </section>
157 <section name="mcomp_advanced" title="Advanced options for MCOMP" expanded="False"> 243 <section name="mcomp_advanced" title="Advanced options for MCOMP" expanded="False">
158 <conditional name="doComp"> 244 <conditional name="doComp">
159 <param name="compare_selector" type="select" label="Run the comparison or not" help="Yes: compare; No: do not compare, using the comparison result by `-c`"> 245 <param name="compare_selector" type="select" label="Run the comparison or not" help="Yes: compare; No: do not compare, using the comparison result by `-c`">
160 <option value="1">Yes</option> 246 <option value="1" selected="true">Yes</option>
161 <option value="0">No</option> 247 <option value="0">No</option>
162 </param> 248 </param>
163 <when value="0"> 249 <when value="0">
164 <param argument="-c" name="compFile" type="data" format="txt" label="Input comparison results" help="Previously generated comparison file from history"/> 250 <param argument="-c" name="compFile" type="data" format="txt" label="Input comparison results" help="Previously generated comparison file from history"/>
165 </when> 251 </when>
166 </conditional> 252 </conditional>
253 <param argument="-d" type="integer" value="3" min="0" label="Minimum depth for a site coverage" help="If a site has depth less than `d`, this site is ignored for statistical tests. This option affects much of nominal ratios but none of credible ratios. This option may be reset during later DMC/DMR rescan to filter sites with depth less than `d`. Default=3."/>
254 <param argument="--filterCredibleDif" type="float" value="-10" label="Minimum absolute credible methylation difference (CDIF)" help="If absolute value of CDIF for a site less than filterCredibleDif, this site is ignored for regional calculation. Use a small value, such as 0.01, to filter all sites with no difference; use 0.20 (for example) to select DMCs. Any negative number means no filter. Default=-10."/>
255 <param argument="--pFetDmc" type="float" value="0.05" min="0" max="1" label="Cutoff of Pvalue from Fisher Exact Test for DMC scan" help="Cutoff of P value from Fisher Exact Test for DMC scan. Default=`0.05`."/>
256 <param argument="--pFetDmr" type="float" value="0.05" min="0" max="1" label="Cutoff of Pvalue from Fisher Exact Test for DMR scan" help="Cutoff of P value from Fisher Exact Test for DMR scan. Default=`0.05`."/>
257 <param argument="--minNominalDif" type="float" value="0.33333" min="0" max="1" label="Minimal nominal methylation difference for DMC and DMR calling" help="Minimal nominal methylation difference for DMC and DMR. Default=`0.3333`."/>
258 <param argument="--minCredibleDif" type="float" value="0.2" min="0" max="1" label="Minimal credible methylation difference for DMC calling" help="Minimal CDIF for DMC calling. Default=`0.2`."/>
259 <param argument="--minDmcsInDmr" type="integer" value="3" min="1" label="Minimum number of DMCs in a DMR" help="Minimum number of DMCs in a DMR. Default=3."/>
260 <param argument="--maxDistConsDmcs" type="integer" value="300" min="1" label="Maximum distance between two consective DMCs for a DMR" help="Maximum distance between two consective DMCs for a DMR. Default=300."/>
167 </section> 261 </section>
168 <param name="output_selector" type="select" multiple="true" optional="true" label="Select output files" help=""> 262 <param name="output_selector" type="select" multiple="true" optional="true" label="Select output files" help="">
169 <option value="1"> dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr </option> 263 <option value="1" selected="true">dmc_M2_g1.G.bed_vs_g2.G.bed.txt</option>
170 <option value="2"> comp.g1.vs.g2.txt </option> 264 <option value="2" selected="true">dmr_M2_g1.G.bed_vs_g2.G.bed.txt</option>
171 <option value="3"> BAM files </option> 265 <option value="3" selected="true">comp.g1.vs.g2.txt</option>
172 <option value="4"> Methylation calling BED files </option> 266 <option value="4" selected="true">BAM files</option>
267 <option value="5" selected="true">Methylation calling BED files</option>
173 </param> 268 </param>
174 </inputs> 269 </inputs>
175 <outputs> 270 <outputs>
176 <data name="output1" format="interval" label="${tool.name} on ${on_string} : dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr"> 271 <data name="output1" format="interval" label="${tool.name} on ${on_string} : dmc_M2_g1.G.bed_vs_g2.G.bed.txt">
177 <filter> "1" in output_selector </filter> 272 <filter> "1" in output_selector </filter>
178 </data> 273 </data>
179 <data name="output2" format="interval" label="${tool.name} on ${on_string} : comp.g1.vs.g2.txt"> 274 <data name="output2" format="interval" label="${tool.name} on ${on_string} : dmr_M2_g1.G.bed_vs_g2.G.bed.txt">
180 <filter> "2" in output_selector </filter> 275 <filter> "2" in output_selector </filter>
181 </data> 276 </data>
277 <data name="output3" format="interval" label="${tool.name} on ${on_string} : comp.g1.vs.g2.txt">
278 <filter> "3" in output_selector </filter>
279 </data>
182 <collection name="output_collection_bam" type="list" label="BAM files"> 280 <collection name="output_collection_bam" type="list" label="BAM files">
183 <filter> "3" in output_selector </filter> 281 <filter> "4" in output_selector </filter>
184 <discover_datasets pattern="(?P&lt;designation&gt;.+\.bam$)" ext='bam'/> 282 <discover_datasets pattern="(?P&lt;designation&gt;.+\.bam$)" ext='bam'/>
185 </collection> 283 </collection>
186 <collection name="output_collection_bed" type="list" label="Methylation calling BED files"> 284 <collection name="output_collection_bed" type="list" label="Methylation calling BED files">
187 <filter> "4" in output_selector </filter> 285 <filter> "5" in output_selector </filter>
188 <discover_datasets pattern="(?P&lt;designation&gt;g[12]\.G\.bed$)" ext='interval'/> 286 <discover_datasets pattern="(?P&lt;designation&gt;g[12]\.G\.bed$)" ext='interval'/>
189 </collection> 287 </collection>
190 </outputs> 288 </outputs>
191 <tests> 289 <tests>
192 <test> 290 <test>
224 <!-- 322 <!--
225 <conditional name="doComp"> 323 <conditional name="doComp">
226 <param name="compare_selector" value="1"/> 324 <param name="compare_selector" value="1"/>
227 </conditional> 325 </conditional>
228 --> 326 -->
229 <param name="output_selector" value="1,2,3,4"/> 327 <param name="output_selector" value="1,2,3,4,5"/>
230 <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/> 328 <output name="output1" file="SE_dmc_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
231 <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/> 329 <output name="output2" file="SE_dmr_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
330 <output name="output3" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
232 <output_collection name="output_collection_bam" count="4"> 331 <output_collection name="output_collection_bam" count="4">
233 <element name="g1_r1.bam" file="SE_g1_r1.bam" compare="sim_size"/> 332 <element name="g1_r1.bam" file="SE_g1_r1.bam" ftype="bam" lines_diff="2"/>
234 <element name="g1_r2.bam" file="SE_g1_r2.bam" compare="sim_size"/> 333 <element name="g1_r2.bam" file="SE_g1_r2.bam" ftype="bam" lines_diff="2"/>
235 <element name="g2_r1.bam" file="SE_g2_r1.bam" compare="sim_size"/> 334 <element name="g2_r1.bam" file="SE_g2_r1.bam" ftype="bam" lines_diff="2"/>
236 <element name="g2_r2.bam" file="SE_g2_r2.bam" compare="sim_size"/> 335 <element name="g2_r2.bam" file="SE_g2_r2.bam" ftype="bam" lines_diff="2"/>
237 </output_collection> 336 </output_collection>
238 <output_collection name="output_collection_bed" count="2"> 337 <output_collection name="output_collection_bed" count="2">
239 <element name="g1.G.bed" file="SE_g1.G.bed" ftype="interval" lines_diff="1"/> 338 <element name="g1.G.bed" file="SE_g1.G.bed" ftype="interval" lines_diff="1"/>
240 <element name="g2.G.bed" file="SE_g2.G.bed" ftype="interval" lines_diff="1"/> 339 <element name="g2.G.bed" file="SE_g2.G.bed" ftype="interval" lines_diff="1"/>
241 </output_collection> 340 </output_collection>
265 <!-- 364 <!--
266 <conditional name="doComp"> 365 <conditional name="doComp">
267 <param name="compare_selector" value="1"/> 366 <param name="compare_selector" value="1"/>
268 </conditional> 367 </conditional>
269 --> 368 -->
270 <param name="output_selector" value="1,2"/> 369 <param name="output_selector" value="1,2,3"/>
271 <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/> 370 <output name="output1" file="PE_dmc_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
272 <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/> 371 <output name="output2" file="PE_dmr_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
372 <output name="output3" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
273 </test> 373 </test>
274 <test> 374 <test>
275 <!-- test paired collection --> 375 <!-- test paired collection -->
276 <param name="reference_source_selector" value="history"/> 376 <param name="reference_source_selector" value="history"/>
277 <param name="ref_file" ftype="fasta" value="seg.fa"/> 377 <param name="ref_file" ftype="fasta" value="seg.fa"/>
304 <!-- 404 <!--
305 <conditional name="doComp"> 405 <conditional name="doComp">
306 <param name="compare_selector" value="1"/> 406 <param name="compare_selector" value="1"/>
307 </conditional> 407 </conditional>
308 --> 408 -->
309 <param name="output_selector" value="1,2"/> 409 <param name="output_selector" value="1,2,3"/>
310 <output name="output1" file="PE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/> 410 <output name="output1" file="PE_dmc_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
311 <output name="output2" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/> 411 <output name="output2" file="PE_dmr_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
412 <output name="output3" file="PE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
312 </test> 413 </test>
313 <test> 414 <test>
314 <!-- test data table reference --> 415 <!-- test data table reference -->
315 <param name="reference_source_selector" value="cached"/> 416 <param name="reference_source_selector" value="cached"/>
316 <param name="ref_file" value="chr11"/> 417 <param name="ref_file" value="chr11"/>
345 <!-- 446 <!--
346 <conditional name="doComp"> 447 <conditional name="doComp">
347 <param name="compare_selector" value="1"/> 448 <param name="compare_selector" value="1"/>
348 </conditional> 449 </conditional>
349 --> 450 -->
350 <param name="output_selector" value="1,2"/> 451 <param name="output_selector" value="1,2,3"/>
351 <output name="output1" file="SE_dmr_M3_g1.G.bed_vs_g2.G.bed.txt.dmr" ftype="interval" lines_diff="1"/> 452 <output name="output1" file="SE_dmc_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
352 <output name="output2" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/> 453 <output name="output2" file="SE_dmr_M2_g1.G.bed_vs_g2.G.bed.txt" ftype="interval" lines_diff="1"/>
454 <output name="output3" file="SE_comp.g1.vs.g2.txt" ftype="interval" lines_diff="1"/>
353 </test> 455 </test>
354 </tests> 456 </tests>
355 <help> 457 <help>
356 <![CDATA[ 458 <![CDATA[
357 **MOABS: MOdel based Analysis of Bisulfite Sequencing data** 459 **MOABS: MOdel based Analysis of Bisulfite Sequencing data**
374 KO vs WT. Each group of reads may have combined sequencing library, i.e. 476 KO vs WT. Each group of reads may have combined sequencing library, i.e.
375 single-end reads and/or paired-end reads. Multiple replicates can be specified in each group. 477 single-end reads and/or paired-end reads. Multiple replicates can be specified in each group.
376 478
377 **Outputs** 479 **Outputs**
378 480
379 Four output files can be selected to report, namely 481 Five output files can be selected to report, namely
380 482
381 1. **DMR region file** - the major result file 483 1. **DMC site file** - the major DMC result file
382 2. **Comparison file between two groups** - the intermediate comparion result 484 2. **DMR region file** - the major DMR result file
383 3. **BAM files** - intermediate BAM files 485 3. **Comparison file between two groups** - the intermediate comparion result
384 4. **Methylation BED files** - intermediate methylation BED files 486 4. **BAM files** - intermediate BAM files
487 5. **Methylation BED files** - intermediate methylation BED files
385 488
386 ----- 489 -----
387 490
388 MOABS will detect differential methylated regions (DMRs) using the input BS-Seq 491 MOABS detects differentially methylated cytosines (DMCs) and differentially
389 reads. The output file is a tab-delimited text file (not strictly a BED 492 methylated regions (DMRs) using the input BS-Seq reads. The output DMC and DMR
390 format), representing DMRs. It has 8 columns as below. 493 file are tab-delimited text files (not strictly a BED format), representing
391 494 DMCs and DMRs.
392 chrom<TAB>start<TAB>end<TAB>methylation_state<TAB>CpGsites<TAB>DMCcount<TAB>nonDMCcount<TAB>hidden_state 495
496 A DMC site file has 15 columns as below.
497
498 chrom<TAB>start<TAB>end<TAB>totalC_0<TAB>nominalRatio_0<TAB>ratioCI_0<TAB>totalC_1<TAB>nominalRatio_1<TAB>ratioCI_1<TAB>nominalDif_1-0<TAB>credibleDif_1-0<TAB>difCI_1-0<TAB>p_sim_1_v_0<TAB>p_fet_1_v_0<TAB>class
499
500 1. **chrom** - The chromosome of the CpG site.
501 2. **start** - The start genomic locus of the CpG site.
502 3. **end** - The end genomic locus of the CpG site.
503 4. **totalC_0** - The total number of CpG read coverage in group 0.
504 5. **nominalRatio_0** - The nominal methylation ratio of the CpG in group 0.
505 6. **ratioCI_0** - The confidence interval (CI) of the nominal methylation ratio at the CpG site in group 0.
506 7. **totalC_1** - The total number of CpG read coverage in group 1.
507 8. **nominalRatio_1** - The nominal methylation ratio of the CpG in group 1.
508 9. **ratioCI_1** - The confidence interval (CI) of the nominal methylation ratio at the CpG site in group 1.
509 10. **nominalDif_1-0** - The nominal methylation difference between the group 1 and the group 0.
510 11. **credibleDif_1-0** - The credible methylation difference (CDIF) between the group 1 and the group 0.
511 12. **difCI_1-0** - The difference of ratio CIs between the group 1 and the group 0.
512 13. **p_sim_1_v_0** - P-value according to the similarity probablities.
513 14. **p_fet_1_v_0** - P-value according to the Fisher exact test.
514 15. **class** - 5-state class labels by methylation differences and p-values.
515
516 For example, CpGs in the DMC file are recorded in the following format.
517
518 @DMCExample@
519
520 A DMR result file has 12 columns as below.
521
522 chrom<TAB>start<TAB>end<TAB>meanRatio_0<TAB>totalC_0<TAB>cSites_0<TAB>meanRatio_1<TAB>totalC_1<TAB>cSites_1<TAB>methDif_1-0<TAB>p_1_v_0<TAB>class_1_v_0
393 523
394 1. **chrom** - The chromosome of the region. 524 1. **chrom** - The chromosome of the region.
395 2. **start** - The start genomic locus of the region. 525 2. **start** - The start genomic locus of the region.
396 3. **end** - The end genomic locus of the region. 526 3. **end** - The end genomic locus of the region.
397 4. **methylation_state** - The methylation state of the region, "+"/"-" representing hyper- or hypo-methylation regions. 527 4. **meanRatio_0** - Mean methylation ratio of the region in group 0.
398 5. **CpGsites** - Total number of CpG sites in the region. 528 5. **totalC_0** - Total cytosine coverage of the region in group 0.
399 6. **DMCcount** - The number of differential methylated CpG sites (DMCs) in the region. 529 6. **cSites_0** - The number of CpG sites of the region in group 0.
400 7. **nonDMCcount** - The number of non-DMCs in the region. 530 7. **meanRatio_1** - Mean methylation ratio of the region in group 1.
401 8. **hidden_state** - The hidden state prediced by Hidden Markov Model (HMM), "1"/"-1" representing hyper- or hypo-methylation states. 531 8. **totalC_1** - Total cytosine coverage of the region in group 1.
402 532 9. **cSites_1** - The number of CpG sites of the region in group 1.
403 For example, six DMRs are identified in the following format. 533 10. **methDif_1-0** - Average methylation difference of the region between group 1 and group 0.
534 11. **p_1_v_0** - P-value from Fisher exact test of the region between group 1 and group 0.
535 12. **class_1_v_0** - 5-state class labels for the DMR.
536
537 For example, four DMRs are identified in the following format.
404 538
405 @DMRExample@ 539 @DMRExample@
406 540
407 ----- 541 -----
408 542