0
|
1 <tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="1.0.0">
|
|
2
|
|
3 <description></description>
|
|
4
|
|
5 <parallelism method="basic"></parallelism>
|
|
6
|
|
7 <requirements>
|
|
8 <requirement type="binary">R</requirement>
|
|
9 </requirements>
|
|
10
|
|
11 <command interpreter="perl">
|
|
12 mosaics_wrapper.pl
|
|
13 ## input file name (chip and control)
|
|
14 $chipParams.chip
|
|
15 $controlParams.control
|
|
16 ## input file format (chip and control)
|
|
17 $chipParams.chipFileFormat
|
|
18 $controlParams.controlFileFormat
|
|
19 ## peak file name
|
|
20 $out_peak
|
|
21 ## peak file format
|
|
22 $OutfileFormat
|
|
23 ## analysis type
|
|
24 IO
|
|
25 ## optional output
|
|
26 $report_summary
|
|
27 $report_gof
|
|
28 $report_exploratory
|
|
29 ## settings for model fitting and peak calling: required (0.05, 200, 50)
|
|
30 $fdrLevel
|
|
31 $fragLen
|
|
32 $binSize
|
|
33 $capping
|
|
34 ## settings for model fitting and peak calling: optional
|
|
35 #if $fitParams.fSettingsType == "preSet"
|
|
36 BIC
|
|
37 0.25
|
|
38 200
|
|
39 50
|
|
40 10
|
|
41 #else
|
|
42 $fitParams.signalModel
|
|
43 $fitParams.d
|
|
44 $fitParams.maxgap
|
|
45 $fitParams.minsize
|
|
46 $fitParams.thres
|
|
47 #end if
|
|
48 ## Number of cores to use
|
|
49 8
|
|
50 </command>
|
|
51
|
|
52 <inputs>
|
|
53 <conditional name="chipParams">
|
|
54 <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS can accept aligned read files.">
|
|
55 <option value="eland_result">Eland result</option>
|
|
56 <option value="eland_extended">Eland extended</option>
|
|
57 <option value="eland_export">Eland export</option>
|
|
58 <option value="bowtie">Bowtie default</option>
|
|
59 <option value="sam">SAM</option>
|
|
60 </param>
|
|
61 <when value="eland_result">
|
|
62 <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
|
|
63 </when>
|
|
64 <when value="eland_extended">
|
|
65 <param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/>
|
|
66 </when>
|
|
67 <when value="eland_export">
|
|
68 <param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/>
|
|
69 </when>
|
|
70 <when value="bowtie">
|
|
71 <param name="chip" type="data" label="Bowtie default file for ChIP sample"/>
|
|
72 </when>
|
|
73 <when value="sam">
|
|
74 <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
|
|
75 </when>
|
|
76 </conditional> <!-- chipParams -->
|
|
77 <conditional name="controlParams">
|
|
78 <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS can accept aligned read files.">
|
|
79 <option value="eland_result">Eland result</option>
|
|
80 <option value="eland_extended">Eland extended</option>
|
|
81 <option value="eland_export">Eland export</option>
|
|
82 <option value="bowtie">Bowtie default</option>
|
|
83 <option value="sam">SAM</option>
|
|
84 </param>
|
|
85 <when value="eland_result">
|
|
86 <param name="control" type="data" format="eland" label="Eland result file for control sample"/>
|
|
87 </when>
|
|
88 <when value="eland_extended">
|
|
89 <param name="control" type="data" format="eland" label="Eland extended file for control sample"/>
|
|
90 </when>
|
|
91 <when value="eland_export">
|
|
92 <param name="control" type="data" format="eland" label="Eland export file for control sample"/>
|
|
93 </when>
|
|
94 <when value="bowtie">
|
|
95 <param name="control" type="data" label="Bowtie default file for control sample"/>
|
|
96 </when>
|
|
97 <when value="sam">
|
|
98 <param name="control" type="data" format="sam" label="SAM file for control sample"/>
|
|
99 </when>
|
|
100 </conditional> <!-- inputParams -->
|
|
101
|
|
102 <param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table.">
|
|
103 <option value="bed">BED</option>
|
|
104 <option value="gff">GFF</option>
|
|
105 <option value="txt">table</option>
|
|
106 </param>
|
|
107 <param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" />
|
|
108 <param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" />
|
|
109 <param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" />
|
|
110
|
|
111 <param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" />
|
|
112 <param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." />
|
|
113 <param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." />
|
|
114 <param name="capping" type="integer" value="3" label="Maximum number of reads allowed to start at each nucleotide position" help="Small value (e.g., 3) are recommended for the ChIP-seq data with low sequencing depth and large value (e.g., 10000) for the ChIP-seq data with high sequencing depth." />
|
|
115
|
|
116 <conditional name="fitParams">
|
|
117 <param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'.">
|
|
118 <option value="preSet">Commonly used</option>
|
|
119 <option value="full">Full parameter list</option>
|
|
120 </param>
|
|
121 <when value="preSet" />
|
|
122 <when value="full">
|
|
123 <param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC. Instead, user can specify signal model among one or two signal component models.">
|
|
124 <option value="BIC">Automatic model selection based on BIC</option>
|
|
125 <option value="1S">One-signal-component model</option>
|
|
126 <option value="2S">Two-signal-component model</option>
|
|
127 </param>
|
|
128 <param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." />
|
|
129 <param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." />
|
|
130 <param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." />
|
|
131 <param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." />
|
|
132 </when> <!-- full -->
|
|
133 </conditional> <!-- fitParams -->
|
|
134 </inputs>
|
|
135
|
|
136 <outputs>
|
|
137 <data format="tabular" name="out_peak">
|
|
138 <change_format>
|
|
139 <when input="OutfileFormat" value="bed" format="bed" />
|
|
140 <when input="OutfileFormat" value="gff" format="gff" />
|
|
141 </change_format>
|
|
142 </data>
|
|
143 <data format="txt" name="report_summary">
|
|
144 <filter>summary == 1</filter>
|
|
145 </data>
|
|
146 <data format="pdf" name="report_gof">
|
|
147 <filter>gof == 1</filter>
|
|
148 </data>
|
|
149 <data format="pdf" name="report_exploratory">
|
|
150 <filter>exploratory == 1</filter>
|
|
151 </data>
|
|
152 </outputs>
|
|
153
|
|
154 <help>
|
|
155
|
|
156 **What it does**
|
|
157
|
|
158 MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions).
|
|
159 MOSAiCS is also available in Bioconductor_ as a R package.
|
|
160 We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_.
|
|
161
|
|
162 Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," To appear in the *Journal of the American Statistical Association*.
|
|
163
|
|
164 .. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.8/bioc/html/mosaics.html
|
|
165 .. _Google group: http://groups.google.com/group/mosaics_user_group
|
|
166 .. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706
|
|
167
|
|
168 ------
|
|
169
|
|
170 **Input formats**
|
|
171
|
|
172 MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts single-end reads, in Eland result, Eland extended, Eland export, Bowtie default, and SAM formats.
|
|
173
|
|
174 ------
|
|
175
|
|
176 **Outputs**
|
|
177
|
|
178 Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak.
|
|
179
|
|
180 If the output is a table, it has the following columns::
|
|
181
|
|
182 Column Description
|
|
183 -------- --------------------------------------------------------
|
|
184 1 Chromosome of the peak
|
|
185 2 Start position of the peak
|
|
186 3 End position of the peak
|
|
187 4 Width of the peak
|
|
188 5 Averaged posterior probability of the peak
|
|
189 6 Minimum posterior probability of the peak
|
|
190 7 Averaged ChIP tag counts of the peak
|
|
191 8 Maximum ChIP tag counts of the peak
|
|
192 9 Averaged control tag counts of the peak
|
|
193 10 Averaged control tag counts of the peak, scaled by sequencing depth
|
|
194 11 Averaged log base 2 ratio of ChIP over input tag counts
|
|
195
|
|
196 If the output is in BED format, it has the following columns::
|
|
197
|
|
198 Column Description
|
|
199 ------------ --------------------------------------------------------
|
|
200 1 chrom Chromosome of the peak
|
|
201 2 chromStart Start position of the peak
|
|
202 3 chromEnd End position of the peak
|
|
203 4 name Always "MOSAiCS_peak"
|
|
204 5 score Averaged ChIP tag counts of the peak
|
|
205
|
|
206 If the output is in GFF format, it has the following columns::
|
|
207
|
|
208 Column Description
|
|
209 --------- --------------------------------------------------------
|
|
210 1 seqname Chromosome of the peak
|
|
211 2 source Always "MOSAiCS"
|
|
212 3 feature Always "MOSAiCS_peak"
|
|
213 4 start Start position of the peak
|
|
214 5 end End position of the peak
|
|
215 6 score Averaged ChIP tag counts of the peak
|
|
216 7 strand Always "."
|
|
217 8 frame Always "."
|
|
218 9 group Always "."
|
|
219
|
|
220 ------
|
|
221
|
|
222 **Reports for diagnostics**
|
|
223
|
|
224 *Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results.
|
|
225
|
|
226 *Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data.
|
|
227
|
|
228 *Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts.
|
|
229
|
|
230 More details regarding these reports can be found here_:
|
|
231
|
|
232 ------
|
|
233
|
|
234 **Settings for model fitting and peak calling**
|
|
235
|
|
236 More details about the tuning of these parameters can be found here_:
|
|
237
|
|
238 .. _here: http://www.bioconductor.org/packages/2.8/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf
|
|
239
|
|
240 </help>
|
|
241 </tool>
|