Mercurial > repos > dongjun > mosaics
comparison mosaics.xml @ 0:b2567f7ff12f
Uploaded
author | dongjun |
---|---|
date | Wed, 21 Sep 2011 03:27:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b2567f7ff12f |
---|---|
1 <tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="1.0.0"> | |
2 | |
3 <description></description> | |
4 | |
5 <parallelism method="basic"></parallelism> | |
6 | |
7 <requirements> | |
8 <requirement type="binary">R</requirement> | |
9 </requirements> | |
10 | |
11 <command interpreter="perl"> | |
12 mosaics_wrapper.pl | |
13 ## input file name (chip and control) | |
14 $chipParams.chip | |
15 $controlParams.control | |
16 ## input file format (chip and control) | |
17 $chipParams.chipFileFormat | |
18 $controlParams.controlFileFormat | |
19 ## peak file name | |
20 $out_peak | |
21 ## peak file format | |
22 $OutfileFormat | |
23 ## analysis type | |
24 IO | |
25 ## optional output | |
26 $report_summary | |
27 $report_gof | |
28 $report_exploratory | |
29 ## settings for model fitting and peak calling: required (0.05, 200, 50) | |
30 $fdrLevel | |
31 $fragLen | |
32 $binSize | |
33 $capping | |
34 ## settings for model fitting and peak calling: optional | |
35 #if $fitParams.fSettingsType == "preSet" | |
36 BIC | |
37 0.25 | |
38 200 | |
39 50 | |
40 10 | |
41 #else | |
42 $fitParams.signalModel | |
43 $fitParams.d | |
44 $fitParams.maxgap | |
45 $fitParams.minsize | |
46 $fitParams.thres | |
47 #end if | |
48 ## Number of cores to use | |
49 8 | |
50 </command> | |
51 | |
52 <inputs> | |
53 <conditional name="chipParams"> | |
54 <param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS can accept aligned read files."> | |
55 <option value="eland_result">Eland result</option> | |
56 <option value="eland_extended">Eland extended</option> | |
57 <option value="eland_export">Eland export</option> | |
58 <option value="bowtie">Bowtie default</option> | |
59 <option value="sam">SAM</option> | |
60 </param> | |
61 <when value="eland_result"> | |
62 <param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/> | |
63 </when> | |
64 <when value="eland_extended"> | |
65 <param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/> | |
66 </when> | |
67 <when value="eland_export"> | |
68 <param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/> | |
69 </when> | |
70 <when value="bowtie"> | |
71 <param name="chip" type="data" label="Bowtie default file for ChIP sample"/> | |
72 </when> | |
73 <when value="sam"> | |
74 <param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/> | |
75 </when> | |
76 </conditional> <!-- chipParams --> | |
77 <conditional name="controlParams"> | |
78 <param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS can accept aligned read files."> | |
79 <option value="eland_result">Eland result</option> | |
80 <option value="eland_extended">Eland extended</option> | |
81 <option value="eland_export">Eland export</option> | |
82 <option value="bowtie">Bowtie default</option> | |
83 <option value="sam">SAM</option> | |
84 </param> | |
85 <when value="eland_result"> | |
86 <param name="control" type="data" format="eland" label="Eland result file for control sample"/> | |
87 </when> | |
88 <when value="eland_extended"> | |
89 <param name="control" type="data" format="eland" label="Eland extended file for control sample"/> | |
90 </when> | |
91 <when value="eland_export"> | |
92 <param name="control" type="data" format="eland" label="Eland export file for control sample"/> | |
93 </when> | |
94 <when value="bowtie"> | |
95 <param name="control" type="data" label="Bowtie default file for control sample"/> | |
96 </when> | |
97 <when value="sam"> | |
98 <param name="control" type="data" format="sam" label="SAM file for control sample"/> | |
99 </when> | |
100 </conditional> <!-- inputParams --> | |
101 | |
102 <param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table."> | |
103 <option value="bed">BED</option> | |
104 <option value="gff">GFF</option> | |
105 <option value="txt">table</option> | |
106 </param> | |
107 <param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" /> | |
108 <param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" /> | |
109 <param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" /> | |
110 | |
111 <param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" /> | |
112 <param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." /> | |
113 <param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." /> | |
114 <param name="capping" type="integer" value="3" label="Maximum number of reads allowed to start at each nucleotide position" help="Small value (e.g., 3) are recommended for the ChIP-seq data with low sequencing depth and large value (e.g., 10000) for the ChIP-seq data with high sequencing depth." /> | |
115 | |
116 <conditional name="fitParams"> | |
117 <param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'."> | |
118 <option value="preSet">Commonly used</option> | |
119 <option value="full">Full parameter list</option> | |
120 </param> | |
121 <when value="preSet" /> | |
122 <when value="full"> | |
123 <param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC. Instead, user can specify signal model among one or two signal component models."> | |
124 <option value="BIC">Automatic model selection based on BIC</option> | |
125 <option value="1S">One-signal-component model</option> | |
126 <option value="2S">Two-signal-component model</option> | |
127 </param> | |
128 <param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." /> | |
129 <param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." /> | |
130 <param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." /> | |
131 <param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." /> | |
132 </when> <!-- full --> | |
133 </conditional> <!-- fitParams --> | |
134 </inputs> | |
135 | |
136 <outputs> | |
137 <data format="tabular" name="out_peak"> | |
138 <change_format> | |
139 <when input="OutfileFormat" value="bed" format="bed" /> | |
140 <when input="OutfileFormat" value="gff" format="gff" /> | |
141 </change_format> | |
142 </data> | |
143 <data format="txt" name="report_summary"> | |
144 <filter>summary == 1</filter> | |
145 </data> | |
146 <data format="pdf" name="report_gof"> | |
147 <filter>gof == 1</filter> | |
148 </data> | |
149 <data format="pdf" name="report_exploratory"> | |
150 <filter>exploratory == 1</filter> | |
151 </data> | |
152 </outputs> | |
153 | |
154 <help> | |
155 | |
156 **What it does** | |
157 | |
158 MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions). | |
159 MOSAiCS is also available in Bioconductor_ as a R package. | |
160 We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_. | |
161 | |
162 Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," To appear in the *Journal of the American Statistical Association*. | |
163 | |
164 .. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.8/bioc/html/mosaics.html | |
165 .. _Google group: http://groups.google.com/group/mosaics_user_group | |
166 .. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706 | |
167 | |
168 ------ | |
169 | |
170 **Input formats** | |
171 | |
172 MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts single-end reads, in Eland result, Eland extended, Eland export, Bowtie default, and SAM formats. | |
173 | |
174 ------ | |
175 | |
176 **Outputs** | |
177 | |
178 Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak. | |
179 | |
180 If the output is a table, it has the following columns:: | |
181 | |
182 Column Description | |
183 -------- -------------------------------------------------------- | |
184 1 Chromosome of the peak | |
185 2 Start position of the peak | |
186 3 End position of the peak | |
187 4 Width of the peak | |
188 5 Averaged posterior probability of the peak | |
189 6 Minimum posterior probability of the peak | |
190 7 Averaged ChIP tag counts of the peak | |
191 8 Maximum ChIP tag counts of the peak | |
192 9 Averaged control tag counts of the peak | |
193 10 Averaged control tag counts of the peak, scaled by sequencing depth | |
194 11 Averaged log base 2 ratio of ChIP over input tag counts | |
195 | |
196 If the output is in BED format, it has the following columns:: | |
197 | |
198 Column Description | |
199 ------------ -------------------------------------------------------- | |
200 1 chrom Chromosome of the peak | |
201 2 chromStart Start position of the peak | |
202 3 chromEnd End position of the peak | |
203 4 name Always "MOSAiCS_peak" | |
204 5 score Averaged ChIP tag counts of the peak | |
205 | |
206 If the output is in GFF format, it has the following columns:: | |
207 | |
208 Column Description | |
209 --------- -------------------------------------------------------- | |
210 1 seqname Chromosome of the peak | |
211 2 source Always "MOSAiCS" | |
212 3 feature Always "MOSAiCS_peak" | |
213 4 start Start position of the peak | |
214 5 end End position of the peak | |
215 6 score Averaged ChIP tag counts of the peak | |
216 7 strand Always "." | |
217 8 frame Always "." | |
218 9 group Always "." | |
219 | |
220 ------ | |
221 | |
222 **Reports for diagnostics** | |
223 | |
224 *Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results. | |
225 | |
226 *Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data. | |
227 | |
228 *Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts. | |
229 | |
230 More details regarding these reports can be found here_: | |
231 | |
232 ------ | |
233 | |
234 **Settings for model fitting and peak calling** | |
235 | |
236 More details about the tuning of these parameters can be found here_: | |
237 | |
238 .. _here: http://www.bioconductor.org/packages/2.8/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf | |
239 | |
240 </help> | |
241 </tool> |