Mercurial > repos > shians > shrnaseq
comparison hairpinTool.xml @ 2:076ca575208f
First commit
author | shian_su <registertonysu@gmail.com> |
---|---|
date | Fri, 21 Feb 2014 12:52:56 +1100 |
parents | |
children | 3d04308a99f9 |
comparison
equal
deleted
inserted
replaced
1:aa02cf19e1b3 | 2:076ca575208f |
---|---|
1 <tool id="shRNAseq" name="shRNAseq Tool" version="1.0.5"> | |
2 <description> | |
3 Analyse hairpin differential representation using edgeR | |
4 </description> | |
5 | |
6 <requirements> | |
7 <requirement type="R-module">edgeR</requirement> | |
8 <requirement type="R-module">limma</requirement> | |
9 </requirements> | |
10 | |
11 <stdio> | |
12 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
13 </stdio> | |
14 | |
15 <command interpreter="Rscript"> | |
16 hairpinTool.R $inputOpt.type | |
17 #if $inputOpt.type=="fastq": | |
18 #for $i, $fas in enumerate($inputOpt.fastq): | |
19 fastq::$fas.file | |
20 #end for | |
21 | |
22 $inputOpt.hairpin | |
23 $inputOpt.samples | |
24 | |
25 #if $inputOpt.positions.option=="yes": | |
26 $inputOpt.positions.barstart | |
27 $inputOpt.positions.barend | |
28 $inputOpt.positions.hpstart | |
29 $inputOpt.positions.hpend | |
30 #else: | |
31 1 | |
32 5 | |
33 37 | |
34 57 | |
35 #end if | |
36 #else: | |
37 $inputOpt.counts | |
38 $inputOpt.anno | |
39 "$inputOpt.factors" | |
40 0 0 0 | |
41 #end if | |
42 | |
43 #if $filterCPM.option=="yes": | |
44 $filterCPM.cpmReq | |
45 $filterCPM.sampleReq | |
46 #else: | |
47 -Inf | |
48 -Inf | |
49 #end if | |
50 | |
51 $fdr | |
52 $lfc | |
53 $workMode.mode | |
54 $outFile | |
55 $outFile.files_path | |
56 | |
57 #if $workMode.mode=="classic": | |
58 "$workMode.pair1" | |
59 "$workMode.pair2" | |
60 #else: | |
61 "$workMode.contrast" | |
62 $workMode.roast.option | |
63 #if $workMode.roast.option=="yes": | |
64 $workMode.roast.hairpinReq | |
65 $workMode.roast.select.option | |
66 "$workMode.roast.select.selection" | |
67 #else: | |
68 0 | |
69 0 | |
70 0 | |
71 #end if | |
72 #end if | |
73 </command> | |
74 | |
75 <inputs> | |
76 <conditional name="inputOpt"> | |
77 <param name="type" type="select" label="Input File Type"> | |
78 <option value="fastq">FastQ File</option> | |
79 <option value="counts">Table of Counts</option> | |
80 </param> | |
81 | |
82 <when value="fastq"> | |
83 <param name="hairpin" type="data" format="tabular" | |
84 label="Hairpin Annotation"/> | |
85 | |
86 | |
87 <param name="samples" type="data" format="tabular" | |
88 label="Sample Annotation"/> | |
89 | |
90 <repeat name="fastq" title="FastQ Files"> | |
91 <param name="file" type="data" format="fastq"/> | |
92 </repeat> | |
93 | |
94 <conditional name="positions"> | |
95 <param name="option" type="select" | |
96 label="Specify Barcode and Hairpin Locations?" | |
97 help="Default Positions: Barcode: 1 to 5, Hairpin: 37 to 57."> | |
98 <option value="no" selected="True">No</option> | |
99 <option value="yes">Yes</option> | |
100 </param> | |
101 | |
102 <when value="yes"> | |
103 <param name="barstart" type="integer" value="1" | |
104 label="Barcode Starting Position"/> | |
105 <param name="barend" type="integer" value="5" | |
106 label="Barcode Ending Position"/> | |
107 | |
108 <param name="hpstart" type="integer" value="37" | |
109 label="Hairpin Starting Position"/> | |
110 | |
111 <param name="hpend" type="integer" value="57" | |
112 label="Hairpin Ending Position"/> | |
113 </when> | |
114 | |
115 <when value="no"/> | |
116 </conditional> | |
117 </when> | |
118 | |
119 <when value="counts"> | |
120 <param name="counts" type="data" format="tabular" label="Counts Table"/> | |
121 <param name="anno" type="data" format="tabular" | |
122 label="Hairpin Annotation"/> | |
123 <param name="factors" type="data" format="tabular" | |
124 label="Sample Annotation"/> | |
125 </when> | |
126 </conditional> | |
127 | |
128 <conditional name="filterCPM"> | |
129 <param name="option" type="select" label="Filter Low CPM?" | |
130 help="Ignore hairpins with very low representation when performing | |
131 analysis."> | |
132 <option value="yes">Yes</option> | |
133 <option value="no">No</option> | |
134 </param> | |
135 | |
136 <when value="yes"> | |
137 <param name="cpmReq" type="float" value="0.5" min="0" max="1" | |
138 label="Minimum CPM"/> | |
139 | |
140 <param name="sampleReq" type="integer" value="1" min="0" | |
141 label="Minimum Samples" | |
142 help="Filter out all the genes that do not meet the minimum | |
143 CPM in at least this many samples."/> | |
144 </when> | |
145 | |
146 <when value="no"/> | |
147 | |
148 </conditional> | |
149 | |
150 <conditional name="workMode"> | |
151 <param name="mode" type="select" label="Analysis Type" | |
152 help="Classic Exact Tests are useful for simple comparisons across | |
153 two sampling groups. Generalised linear models allow for more | |
154 complex contrasts and gene level analysis to be made."> | |
155 <option value="classic">Classic Exact Test</option> | |
156 <option value="glm">Generalised Linear Model</option> | |
157 </param> | |
158 | |
159 <when value="classic"> | |
160 <param name="pair1" type="text" label="Compare" size="40"/> | |
161 <param name="pair2" type="text" label="To" size="40" | |
162 help="The analysis will subtract values of this group from those | |
163 in the group above to establish the difference."/> | |
164 </when> | |
165 | |
166 <when value="glm"> | |
167 <param name="contrast" type="text" size="60" | |
168 label="Contrasts of interest" | |
169 help="Specify equations defining contrasts to be made. Eg. | |
170 KD-Control will result in positive fold change if KD has | |
171 greater expression and negative if Control has greater | |
172 expression."/> | |
173 | |
174 <conditional name="roast"> | |
175 <param name="option" type="select" | |
176 label="Perform Gene Level Analysis?" | |
177 help="Analyse LogFC tendencies for hairpins belonging | |
178 to the same gene."> | |
179 <option value="no">No</option> | |
180 <option value="yes">Yes</option> | |
181 </param> | |
182 | |
183 <when value="yes"> | |
184 <param name="hairpinReq" type="integer" value="2" min="2" | |
185 label="Minimum Hairpins" | |
186 help="Only genes with at least this many hairpins will | |
187 be analysed."/> | |
188 | |
189 <conditional name="select"> | |
190 <param name="option" type="select" | |
191 label="Gene Selection Method"> | |
192 <option value="rank">By p-value Rank</option> | |
193 <option value="geneID">By Gene Identifier</option> | |
194 </param> | |
195 <when value="rank"> | |
196 <param name="selection" type="text" size="40" value="1:5" | |
197 label="Ranks of Top Genes to Plot" | |
198 help="Genes are ranked in ascending p-value for | |
199 differential representation, individual ranks can | |
200 be entered seperated by comma or a range seperated | |
201 by colon."/> | |
202 </when> | |
203 <when value="geneID"> | |
204 <param name="selection" type="text" size="80" value="" | |
205 label="Symbols of Genes to Plot" | |
206 help="Select genes based on their identifier in the | |
207 'Gene' column of the sample information file. | |
208 Please ensure exact match with the values in input | |
209 file and separate selections with commas."/> | |
210 </when> | |
211 </conditional> | |
212 | |
213 | |
214 </when> | |
215 | |
216 <when value="no"/> | |
217 </conditional> | |
218 </when> | |
219 </conditional> | |
220 | |
221 <param name="fdr" type="float" value="0.05" min="0" max="1" | |
222 label="FDR Threshold" | |
223 help="All observations below this threshold will be highlighted | |
224 in the smear plot."/> | |
225 <param name="lfc" type="float" value="0" min="0" | |
226 label="Absolute LogFC Threshold" | |
227 help="In additional to meeting the FDR requirement, the absolute | |
228 value of the log-fold-change of the observation must be above | |
229 this threshold to be highlighted."/> | |
230 </inputs> | |
231 | |
232 <outputs> | |
233 <data format="html" name="outFile" label="shRNAseq Analysis"/> | |
234 </outputs> | |
235 | |
236 <help> | |
237 .. class:: infomark | |
238 | |
239 **What it does** | |
240 | |
241 Given tables containing information about the hairpins and their associated | |
242 barcodes, information about the samples and fastq file containing the hairpin | |
243 reads. This tool will generate plots and tables for the analysis of differential | |
244 representation. | |
245 | |
246 ----- | |
247 | |
248 .. class:: infomark | |
249 | |
250 **INPUTS** | |
251 | |
252 **Input File Type:** | |
253 | |
254 This tool is able to either generate counts from a raw FastQ file given the | |
255 information regarding the samples and hairpins. Alternatively if a table of | |
256 counts has already been generated it can also be used. | |
257 | |
258 **Counts Table (Counts Input):** | |
259 | |
260 A tab delimited text table of information regarding the counts of hairpins. | |
261 Should have a column 'ID' to denote the hairpins that counts correspond to. Each | |
262 additional column should have titles corresponding to the label for the sample. | |
263 | |
264 Example:: | |
265 | |
266 ID Sample1 Sample2 Sample3 | |
267 Control1 49802 48014 40148 | |
268 Control2 12441 16352 14232 | |
269 Control3 9842 9148 9111 | |
270 Hairpin1 3300 3418 2914 | |
271 Hairpin2 91418 95812 93174 | |
272 Hairpin3 32985 31975 35104 | |
273 Hairpin4 12082 14081 14981 | |
274 Hairpin5 2491 2769 2691 | |
275 Hairpin6 1294 1486 1642 | |
276 Hairpin7 49501 49076 47611 | |
277 ... | |
278 | |
279 **Hairpin Annotation:** | |
280 | |
281 A tab delimited text table of information regarding the hairpins. Should have | |
282 columns 'ID', 'Sequences' and 'Gene' to uniquely identify the hairpin, align it | |
283 with the reads to produce counts and identify which gene the hairpin acts on. | |
284 | |
285 NOTE: the column names are case sensitive and should be input exactly as they | |
286 are shown here. | |
287 | |
288 Example:: | |
289 | |
290 ID Sequences Gene | |
291 Control1 TCTCGCTTGGGCGAGAGTAAG 2 | |
292 Control2 CCGCCTGAAGTCTCTGATTAA 2 | |
293 Control3 AGGAATTATAATGCTTATCTA 2 | |
294 Hairpin1 AAGGCAGAGACTGACCACCTA 4 | |
295 Hairpin2 GAGCGACCTGGTGTTACTCTA 4 | |
296 Hairpin3 ATGGTGTAAATAGAGCTGTTA 4 | |
297 Hairpin4 CAGCTCATCTTCTGTGAAGAA 4 | |
298 Hairpin5 CAGCTCTGTGGGTCAGAAGAA 4 | |
299 Hairpin6 CCAGGCACAGATCTCAAGATA 4 | |
300 Hairpin7 ATGACAAGAAAGACATCTCAA 7 | |
301 ... | |
302 | |
303 **Sample Annotation (FastQ Input):** | |
304 | |
305 A tab delimited text table of information regarding the samples. Should have | |
306 columns 'ID', 'Sequences' and 'group' to uniquely identify each sample, identify | |
307 the sample in the reads by its barcode sequence and correctly group replicates | |
308 for analysis. Additional columns may inserted for annotation purposes and will | |
309 not interfere with analysis as long as the necessary columns are present. | |
310 | |
311 NOTE: the column names are case sensitive and should be input exactly as they | |
312 are shown here. | |
313 | |
314 Example:: | |
315 | |
316 ID Sequences group Replicate | |
317 3 GAAAG Day 2 1 | |
318 6 GAACC Day 10 1 | |
319 9 GAAGA Day 5 GFP neg 1 | |
320 16 GAATT Day 5 GFP pos 1 | |
321 18 GACAC Day 2 2 | |
322 21 GACCA Day 10 2 | |
323 28 GACGT Day 5 GFP neg 2 | |
324 31 GACTG Day 5 GFP pos 2 | |
325 33 GAGAA Day 2 3 | |
326 40 GAGCT Day 10 3 | |
327 ... | |
328 | |
329 **Specify Barcode and Hairpin Locations (FastQ Input):** | |
330 | |
331 It is assumed that in the sequencing reads that the first 5 bases are the | |
332 barcodes and that bases 37-57 are the hairpins. If this is not the case then the | |
333 values of the positions can be changed, however it still requires the barcodes | |
334 and hairpins to be in a consistent location an in a continuous sequence. | |
335 | |
336 **Filter Low CPM?:** | |
337 | |
338 Often in a large screen there may members with very low counts which are of no | |
339 interest in the experiment, these may be filtered out to speed up computations. | |
340 Filtering will be based on counts per million in a required number of samples. | |
341 | |
342 **Analysis Type:** | |
343 | |
344 * **Classic Exact Test:** This allows two experimental groups to be compared and | |
345 p-values for differential representation derivec for each hairpin. Simple and | |
346 fast for straightforward comparisons. In this option you will have the option of | |
347 "*Compare* x *To* y" which implicitly subtracts the data from y from that of x | |
348 to produce the comparison. | |
349 | |
350 * **Generalised Linear Model:** This allow for complex contrasts to be specified | |
351 and also gene level analysis to be performed. If this option is chosen then | |
352 contrasts must be explicitly stated in equations and multiple contrasts can be | |
353 made. In addition there will be the option to analyse hairpins on a per-gene | |
354 basis to see if hairpins belonging to a particular gene have any overall | |
355 tendencies for the direction of their log-fold-change. | |
356 | |
357 **FDR Threshold:** | |
358 The smear plot in the output will have hairpins highlighted to signify | |
359 significant differential representation. The significance is determined by | |
360 contorlling the false discovery rate, only those with a FDR lower than the | |
361 threshold will be highlighted in the plot. | |
362 | |
363 ----- | |
364 | |
365 **Citations:** | |
366 | |
367 .. class:: infomark | |
368 | |
369 limma | |
370 | |
371 Please cite the paper below for the limma software itself. Please also try | |
372 to cite the appropriate methodology articles that describe the statistical | |
373 methods implemented in limma, depending on which limma functions you are | |
374 using. The methodology articles are listed in Section 2.1 of the limma | |
375 User's Guide. | |
376 | |
377 * Smyth, GK (2005). Limma: linear models for microarray data. In: | |
378 'Bioinformatics and Computational Biology Solutions using R and | |
379 Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, | |
380 W. Huber (eds), Springer, New York, pages 397-420. | |
381 | |
382 .. class:: infomark | |
383 | |
384 edgeR | |
385 | |
386 Please cite the first paper for the software itself and the other papers for | |
387 the various original statistical methods implemented in edgeR. See | |
388 Section 1.2 in the User's Guide for more detail. | |
389 | |
390 * Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor | |
391 package for differential expression analysis of digital gene expression | |
392 data. Bioinformatics 26, 139-140 | |
393 | |
394 * Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing | |
395 differences in tag abundance. Bioinformatics 23, 2881-2887 | |
396 | |
397 * Robinson MD and Smyth GK (2008). Small-sample estimation of negative | |
398 binomial dispersion, with applications to SAGE data. | |
399 Biostatistics, 9, 321-332 | |
400 | |
401 * McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis | |
402 of multifactor RNA-Seq experiments with respect to biological variation. | |
403 Nucleic Acids Research 40, 4288-4297 | |
404 | |
405 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html | |
406 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html | |
407 </help> | |
408 </tool> | |
409 |