12
|
1 <tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="2.0.0">
|
|
2
|
|
3 <description></description>
|
|
4
|
|
5 <parallelism method="basic"></parallelism>
|
|
6
|
|
7 <requirements>
|
|
8 <requirement type="binary">R</requirement>
|
|
9 </requirements>
|
|
10
|
|
11 <command interpreter="perl">
|
|
12 mosaics_wrapper.pl
|
|
13 ## ChIP file info
|
|
14 $readFileType.chipParams.chip
|
|
15 $readFileType.chipParams.chipFileFormat
|
|
16 ## control file info
|
|
17 $readFileType.controlParams.control
|
|
18 $readFileType.controlParams.controlFileFormat
|
|
19 ## peak file info
|
|
20 $out_peak
|
|
21 $OutfileFormat
|
|
22 ## analysis type
|
|
23 IO
|
|
24 ## optional output
|
|
25 $report_summary
|
|
26 $report_gof
|
|
27 $report_exploratory
|
|
28 ## settings for model fitting and peak calling: required (FALSE, FALSE, 0.05, 200, 50, 0)
|
|
29 $readFileType.pet
|
|
30 $by_chr
|
|
31 $fdrLevel
|
|
32 $fragLen
|
|
33 $binSize
|
|
34 $capping
|
|
35 #if $fitParams.fSettingsType == "preSet"
|
|
36 ## settings for model fitting and peak calling: optional
|
|
37 BIC
|
|
38 automatic
|
|
39 0.25
|
|
40 200
|
|
41 50
|
|
42 10
|
|
43 ## setting for parallel computing
|
|
44 TRUE
|
|
45 8
|
|
46 #else
|
|
47 $fitParams.signalModel
|
|
48 $fitParams.bgEst
|
|
49 $fitParams.d
|
|
50 $fitParams.maxgap
|
|
51 $fitParams.minsize
|
|
52 $fitParams.thres
|
|
53 $fitParams.parallel
|
|
54 $fitParams.nCore
|
|
55 #end if
|
|
56 </command>
|
|
57
|
|
58 <inputs>
|
|
59 <conditional name="readFileType">
|
|
60 <param name="pet" type="select" label="Paired-end tag (PET) or single-end tag (SET) data">
|
|
61 <option value="FALSE">Single-end tag (SET) data</option>
|
|
62 <option value="TRUE">Paired-end tag (PET) data</option>
|
|
63 </param>
|
|
64 <when value="FALSE"> <!-- SET -->
|
|
65 <conditional name="chipParams">
|
|
66 <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM file formats for single-end tag (SET) data.">
|
|
67 <option value="eland_result">Eland result</option>
|
|
68 <option value="eland_extended">Eland extended</option>
|
|
69 <option value="eland_export">Eland export</option>
|
|
70 <option value="bowtie">Bowtie default</option>
|
|
71 <option value="sam">SAM</option>
|
|
72 <option value="bed">BED</option>
|
|
73 <option value="csem">CSEM</option>
|
|
74 </param>
|
|
75 <when value="eland_result">
|
|
76 <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
|
|
77 </when>
|
|
78 <when value="eland_extended">
|
|
79 <param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/>
|
|
80 </when>
|
|
81 <when value="eland_export">
|
|
82 <param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/>
|
|
83 </when>
|
|
84 <when value="bowtie">
|
|
85 <param name="chip" type="data" label="Bowtie default file for ChIP sample"/>
|
|
86 </when>
|
|
87 <when value="sam">
|
|
88 <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
|
|
89 </when>
|
|
90 <when value="bed">
|
|
91 <param name="chip" type="data" format="bed" label="BED file for ChIP sample"/>
|
|
92 </when>
|
|
93 <when value="csem">
|
|
94 <param name="chip" type="data" format="csem" label="CSEM file for ChIP sample"/>
|
|
95 </when>
|
|
96 </conditional> <!-- chipParams -->
|
|
97
|
|
98 <conditional name="controlParams">
|
|
99 <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM file formats for single-end tag (SET) data.">
|
|
100 <option value="eland_result">Eland result</option>
|
|
101 <option value="eland_extended">Eland extended</option>
|
|
102 <option value="eland_export">Eland export</option>
|
|
103 <option value="bowtie">Bowtie default</option>
|
|
104 <option value="sam">SAM</option>
|
|
105 <option value="bed">BED</option>
|
|
106 <option value="csem">CSEM</option>
|
|
107 </param>
|
|
108 <when value="eland_result">
|
|
109 <param name="control" type="data" format="eland" label="Eland result file for control sample"/>
|
|
110 </when>
|
|
111 <when value="eland_extended">
|
|
112 <param name="control" type="data" format="eland" label="Eland extended file for control sample"/>
|
|
113 </when>
|
|
114 <when value="eland_export">
|
|
115 <param name="control" type="data" format="eland" label="Eland export file for control sample"/>
|
|
116 </when>
|
|
117 <when value="bowtie">
|
|
118 <param name="control" type="data" label="Bowtie default file for control sample"/>
|
|
119 </when>
|
|
120 <when value="sam">
|
|
121 <param name="control" type="data" format="sam" label="SAM file for control sample"/>
|
|
122 </when>
|
|
123 <when value="bed">
|
|
124 <param name="control" type="data" format="bed" label="BED file for control sample"/>
|
|
125 </when>
|
|
126 <when value="csem">
|
|
127 <param name="control" type="data" format="csem" label="CSEM file for control sample"/>
|
|
128 </when>
|
|
129 </conditional> <!-- controlParams -->
|
|
130 </when>
|
|
131 <when value="TRUE"> <!-- PET -->
|
|
132 <conditional name="chipParams">
|
|
133 <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result and SAM file formats for paired-end tag (PET) data.">
|
|
134 <option value="eland_result">Eland result</option>
|
|
135 <option value="sam">SAM</option>
|
|
136 </param>
|
|
137 <when value="eland_result">
|
|
138 <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
|
|
139 </when>
|
|
140 <when value="sam">
|
|
141 <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
|
|
142 </when>
|
|
143 </conditional> <!-- chipParams -->
|
|
144
|
|
145 <conditional name="controlParams">
|
|
146 <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result and SAM file formats for paired-end tag (PET) data.">
|
|
147 <option value="eland_result">Eland result</option>
|
|
148 <option value="sam">SAM</option>
|
|
149 </param>
|
|
150 <when value="eland_result">
|
|
151 <param name="control" type="data" format="eland" label="Eland result file for control sample"/>
|
|
152 </when>
|
|
153 <when value="sam">
|
|
154 <param name="control" type="data" format="sam" label="SAM file for control sample"/>
|
|
155 </when>
|
|
156 </conditional> <!-- controlParams -->
|
|
157 </when>
|
|
158 </conditional><!-- readFileType -->
|
|
159
|
|
160 <param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table.">
|
|
161 <option value="bed">BED</option>
|
|
162 <option value="gff">GFF</option>
|
|
163 <option value="txt">table</option>
|
|
164 </param>
|
|
165 <param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" />
|
|
166 <param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" />
|
|
167 <param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" />
|
|
168
|
|
169 <param name="by_chr" type="select" label="Genome-wide analysis or chromosome-wise analysis" help="If genome-wide analysis is used, one model is fitted for all the chromosomes. If chromosome-wise analysis is used, different model is fitted for each chromosome separately." >
|
|
170 <option value="FALSE">Genome-wide analysis</option>
|
|
171 <option value="TRUE">Chromosome-wise analysis</option>
|
|
172 </param>
|
|
173 <param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" />
|
|
174 <param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." />
|
|
175 <param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." />
|
|
176 <param name="capping" type="integer" value="0" label="Maximum number of reads allowed to start at each nucleotide position" help="If non-positive value is specified (e.g., 0), any number of reads are allowed at each nucleotide position (i.e., no filtering). By default, filtering is NOT used." />
|
|
177
|
|
178 <conditional name="fitParams">
|
|
179 <param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'.">
|
|
180 <option value="preSet">Commonly used</option>
|
|
181 <option value="full">Full parameter list</option>
|
|
182 </param>
|
|
183 <when value="preSet" />
|
|
184 <when value="full">
|
|
185 <param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC.">
|
|
186 <option value="BIC">Automatic model selection based on BIC</option>
|
|
187 <option value="1S">One-signal-component model</option>
|
|
188 <option value="2S">Two-signal-component model</option>
|
|
189 </param>
|
|
190 <param name="bgEst" type="select" label="Background estimation approach" help="By default, background estimation approach is automatically determined based on the data.">
|
|
191 <option value="automatic">Automatic selection based on the data</option>
|
|
192 <option value="matchLow">Based on bins with low tag counts</option>
|
|
193 <option value="rMOM">Robust method of moment (MOM)</option>
|
|
194 </param>
|
|
195 <param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." />
|
|
196 <param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." />
|
|
197 <param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." />
|
|
198 <param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." />
|
|
199 <param name="parallel" type="select" label="Use parallel computing?">
|
|
200 <option value="TRUE">Use parallel computing.</option>
|
|
201 <option value="FALSE">NOT use parallel computing.</option>
|
|
202 </param>
|
|
203 <param name="nCore" type="integer" value="8" label="Number of CPUs" help="Number of CPUs used for parallel computing. Relevant only when parallel computing is used. Default is to use 8 CPUs." />
|
|
204 </when> <!-- full -->
|
|
205 </conditional> <!-- fitParams -->
|
|
206 </inputs>
|
|
207
|
|
208 <outputs>
|
|
209 <data format="tabular" name="out_peak">
|
|
210 <change_format>
|
|
211 <when input="OutfileFormat" value="bed" format="bed" />
|
|
212 <when input="OutfileFormat" value="gff" format="gff" />
|
|
213 </change_format>
|
|
214 </data>
|
|
215 <data format="txt" name="report_summary">
|
|
216 <filter>summary == 1</filter>
|
|
217 </data>
|
|
218 <data format="pdf" name="report_gof">
|
|
219 <filter>gof == 1</filter>
|
|
220 </data>
|
|
221 <data format="pdf" name="report_exploratory">
|
|
222 <filter>exploratory == 1</filter>
|
|
223 </data>
|
|
224 </outputs>
|
|
225
|
|
226 <help>
|
|
227
|
|
228 **What it does**
|
|
229
|
|
230 MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions).
|
|
231 MOSAiCS is also available in Bioconductor_ as a R package.
|
|
232 We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_.
|
|
233
|
|
234 Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," *Journal of the American Statistical Association*, Vol. 106, pp. 891--903.
|
|
235
|
|
236 .. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.11/bioc/html/mosaics.html
|
|
237 .. _Google group: http://groups.google.com/group/mosaics_user_group
|
|
238 .. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706
|
|
239
|
|
240 ------
|
|
241
|
|
242 **Input formats**
|
|
243
|
|
244 MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM formats for single-end tag (SET) data. For paired-end tag (PET) data, MOSAiCS accepts Eland result and SAM formats.
|
|
245
|
|
246 ------
|
|
247
|
|
248 **Outputs**
|
|
249
|
|
250 Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak.
|
|
251
|
|
252 If the output is a table, it has the following columns::
|
|
253
|
|
254 Column Description
|
|
255 -------- --------------------------------------------------------
|
|
256 1 Chromosome of the peak
|
|
257 2 Start position of the peak
|
|
258 3 End position of the peak
|
|
259 4 Width of the peak
|
|
260 5 Averaged posterior probability of the peak
|
|
261 6 Minimum posterior probability of the peak
|
|
262 7 Averaged ChIP tag counts of the peak
|
|
263 8 Maximum ChIP tag counts of the peak
|
|
264 9 Averaged control tag counts of the peak
|
|
265 10 Averaged control tag counts of the peak, scaled by sequencing depth
|
|
266 11 Averaged log base 2 ratio of ChIP over input tag counts
|
|
267
|
|
268 If the output is in BED format, it has the following columns::
|
|
269
|
|
270 Column Description
|
|
271 ------------ --------------------------------------------------------
|
|
272 1 chrom Chromosome of the peak
|
|
273 2 chromStart Start position of the peak
|
|
274 3 chromEnd End position of the peak
|
|
275 4 name Always "MOSAiCS_peak"
|
|
276 5 score Averaged ChIP tag counts of the peak
|
|
277
|
|
278 If the output is in GFF format, it has the following columns::
|
|
279
|
|
280 Column Description
|
|
281 --------- --------------------------------------------------------
|
|
282 1 seqname Chromosome of the peak
|
|
283 2 source Always "MOSAiCS"
|
|
284 3 feature Always "MOSAiCS_peak"
|
|
285 4 start Start position of the peak
|
|
286 5 end End position of the peak
|
|
287 6 score Averaged ChIP tag counts of the peak
|
|
288 7 strand Always "."
|
|
289 8 frame Always "."
|
|
290 9 group Always "."
|
|
291
|
|
292 ------
|
|
293
|
|
294 **Reports for diagnostics**
|
|
295
|
|
296 *Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results.
|
|
297
|
|
298 *Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data.
|
|
299
|
|
300 *Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts.
|
|
301
|
|
302 More details regarding these reports can be found here_:
|
|
303
|
|
304 ------
|
|
305
|
|
306 **Settings for model fitting and peak calling**
|
|
307
|
|
308 More details about the tuning of these parameters can be found here_:
|
|
309
|
|
310 .. _here: http://www.bioconductor.org/packages/2.11/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf
|
|
311
|
|
312 </help>
|
|
313 </tool>
|