comparison mosaics.xml @ 0:b2567f7ff12f

Uploaded
author dongjun
date Wed, 21 Sep 2011 03:27:06 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b2567f7ff12f
1 <tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="1.0.0">
2
3 <description></description>
4
5 <parallelism method="basic"></parallelism>
6
7 <requirements>
8 <requirement type="binary">R</requirement>
9 </requirements>
10
11 <command interpreter="perl">
12 mosaics_wrapper.pl
13 ## input file name (chip and control)
14 $chipParams.chip
15 $controlParams.control
16 ## input file format (chip and control)
17 $chipParams.chipFileFormat
18 $controlParams.controlFileFormat
19 ## peak file name
20 $out_peak
21 ## peak file format
22 $OutfileFormat
23 ## analysis type
24 IO
25 ## optional output
26 $report_summary
27 $report_gof
28 $report_exploratory
29 ## settings for model fitting and peak calling: required (0.05, 200, 50)
30 $fdrLevel
31 $fragLen
32 $binSize
33 $capping
34 ## settings for model fitting and peak calling: optional
35 #if $fitParams.fSettingsType == "preSet"
36 BIC
37 0.25
38 200
39 50
40 10
41 #else
42 $fitParams.signalModel
43 $fitParams.d
44 $fitParams.maxgap
45 $fitParams.minsize
46 $fitParams.thres
47 #end if
48 ## Number of cores to use
49 8
50 </command>
51
52 <inputs>
53 <conditional name="chipParams">
54 <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS can accept aligned read files.">
55 <option value="eland_result">Eland result</option>
56 <option value="eland_extended">Eland extended</option>
57 <option value="eland_export">Eland export</option>
58 <option value="bowtie">Bowtie default</option>
59 <option value="sam">SAM</option>
60 </param>
61 <when value="eland_result">
62 <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
63 </when>
64 <when value="eland_extended">
65 <param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/>
66 </when>
67 <when value="eland_export">
68 <param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/>
69 </when>
70 <when value="bowtie">
71 <param name="chip" type="data" label="Bowtie default file for ChIP sample"/>
72 </when>
73 <when value="sam">
74 <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
75 </when>
76 </conditional> <!-- chipParams -->
77 <conditional name="controlParams">
78 <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS can accept aligned read files.">
79 <option value="eland_result">Eland result</option>
80 <option value="eland_extended">Eland extended</option>
81 <option value="eland_export">Eland export</option>
82 <option value="bowtie">Bowtie default</option>
83 <option value="sam">SAM</option>
84 </param>
85 <when value="eland_result">
86 <param name="control" type="data" format="eland" label="Eland result file for control sample"/>
87 </when>
88 <when value="eland_extended">
89 <param name="control" type="data" format="eland" label="Eland extended file for control sample"/>
90 </when>
91 <when value="eland_export">
92 <param name="control" type="data" format="eland" label="Eland export file for control sample"/>
93 </when>
94 <when value="bowtie">
95 <param name="control" type="data" label="Bowtie default file for control sample"/>
96 </when>
97 <when value="sam">
98 <param name="control" type="data" format="sam" label="SAM file for control sample"/>
99 </when>
100 </conditional> <!-- inputParams -->
101
102 <param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table.">
103 <option value="bed">BED</option>
104 <option value="gff">GFF</option>
105 <option value="txt">table</option>
106 </param>
107 <param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" />
108 <param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" />
109 <param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" />
110
111 <param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" />
112 <param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." />
113 <param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." />
114 <param name="capping" type="integer" value="3" label="Maximum number of reads allowed to start at each nucleotide position" help="Small value (e.g., 3) are recommended for the ChIP-seq data with low sequencing depth and large value (e.g., 10000) for the ChIP-seq data with high sequencing depth." />
115
116 <conditional name="fitParams">
117 <param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'.">
118 <option value="preSet">Commonly used</option>
119 <option value="full">Full parameter list</option>
120 </param>
121 <when value="preSet" />
122 <when value="full">
123 <param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC. Instead, user can specify signal model among one or two signal component models.">
124 <option value="BIC">Automatic model selection based on BIC</option>
125 <option value="1S">One-signal-component model</option>
126 <option value="2S">Two-signal-component model</option>
127 </param>
128 <param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." />
129 <param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." />
130 <param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." />
131 <param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." />
132 </when> <!-- full -->
133 </conditional> <!-- fitParams -->
134 </inputs>
135
136 <outputs>
137 <data format="tabular" name="out_peak">
138 <change_format>
139 <when input="OutfileFormat" value="bed" format="bed" />
140 <when input="OutfileFormat" value="gff" format="gff" />
141 </change_format>
142 </data>
143 <data format="txt" name="report_summary">
144 <filter>summary == 1</filter>
145 </data>
146 <data format="pdf" name="report_gof">
147 <filter>gof == 1</filter>
148 </data>
149 <data format="pdf" name="report_exploratory">
150 <filter>exploratory == 1</filter>
151 </data>
152 </outputs>
153
154 <help>
155
156 **What it does**
157
158 MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions).
159 MOSAiCS is also available in Bioconductor_ as a R package.
160 We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_.
161
162 Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," To appear in the *Journal of the American Statistical Association*.
163
164 .. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.8/bioc/html/mosaics.html
165 .. _Google group: http://groups.google.com/group/mosaics_user_group
166 .. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706
167
168 ------
169
170 **Input formats**
171
172 MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts single-end reads, in Eland result, Eland extended, Eland export, Bowtie default, and SAM formats.
173
174 ------
175
176 **Outputs**
177
178 Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak.
179
180 If the output is a table, it has the following columns::
181
182 Column Description
183 -------- --------------------------------------------------------
184 1 Chromosome of the peak
185 2 Start position of the peak
186 3 End position of the peak
187 4 Width of the peak
188 5 Averaged posterior probability of the peak
189 6 Minimum posterior probability of the peak
190 7 Averaged ChIP tag counts of the peak
191 8 Maximum ChIP tag counts of the peak
192 9 Averaged control tag counts of the peak
193 10 Averaged control tag counts of the peak, scaled by sequencing depth
194 11 Averaged log base 2 ratio of ChIP over input tag counts
195
196 If the output is in BED format, it has the following columns::
197
198 Column Description
199 ------------ --------------------------------------------------------
200 1 chrom Chromosome of the peak
201 2 chromStart Start position of the peak
202 3 chromEnd End position of the peak
203 4 name Always "MOSAiCS_peak"
204 5 score Averaged ChIP tag counts of the peak
205
206 If the output is in GFF format, it has the following columns::
207
208 Column Description
209 --------- --------------------------------------------------------
210 1 seqname Chromosome of the peak
211 2 source Always "MOSAiCS"
212 3 feature Always "MOSAiCS_peak"
213 4 start Start position of the peak
214 5 end End position of the peak
215 6 score Averaged ChIP tag counts of the peak
216 7 strand Always "."
217 8 frame Always "."
218 9 group Always "."
219
220 ------
221
222 **Reports for diagnostics**
223
224 *Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results.
225
226 *Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data.
227
228 *Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts.
229
230 More details regarding these reports can be found here_:
231
232 ------
233
234 **Settings for model fitting and peak calling**
235
236 More details about the tuning of these parameters can be found here_:
237
238 .. _here: http://www.bioconductor.org/packages/2.8/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf
239
240 </help>
241 </tool>