0
|
1 <macros>
|
|
2 <xml name="requirements">
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.2.0">r-sartools</requirement>
|
|
5 <requirement type="package" version="1.3.0">r-optparse</requirement>
|
|
6 </requirements>
|
|
7 </xml>
|
|
8
|
|
9 <xml name="stdio">
|
|
10 <stdio>
|
|
11 <exit_code range="1" level="fatal" />
|
|
12 <regex match="Execution halted"
|
|
13 source="both"
|
|
14 level="fatal"
|
|
15 description="Execution halted" />
|
|
16 <regex match="rsync error"
|
|
17 source="both"
|
|
18 level="fatal"
|
|
19 description="rsync error" />
|
|
20 </stdio>
|
|
21 </xml>
|
|
22
|
|
23 <token name="@COMMAND_BASIC_PARAMETERS@">
|
|
24 --projectName $projectName
|
|
25 --author $author
|
|
26 --targetFile $targetFile
|
|
27 --rawDir $rawDir
|
|
28 --featuresToRemove $featuresToRemove
|
|
29 --varInt $varInt
|
|
30 --condRef $condRef
|
|
31 </token>
|
|
32
|
|
33 <token name="@COMMAND_BATCH_PARAM@">
|
|
34 #if $advanced_parameters.batch_condition.condition:
|
|
35 --batch $advanced_parameters.batch_condition.batch
|
|
36 #else:
|
|
37 --batch NULL
|
|
38 #end if
|
|
39 </token>
|
|
40
|
|
41 <token name="@COMMAND_OUTPUTS@">
|
|
42 --figures_html $figures_html
|
|
43 --figures_html_files_path $figures_html.files_path
|
|
44 --tables_html $tables_html
|
|
45 --tables_html_files_path $tables_html.files_path
|
|
46 --rdata $rdata
|
|
47 --report_html $report_html
|
|
48 --log $log
|
|
49 </token>
|
|
50
|
|
51 <macro name="basic_parameters">
|
|
52 <param name="projectName" type="text" value="Project" label="Name of the project used for the report" help="(-P, --projectName) No space allowed." >
|
|
53 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
|
|
54 </param>
|
|
55 <param name="author" type="text" value="Galaxy" label="Name of the report author" help="(-A, --author) No space allowed." >
|
|
56 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
|
|
57 </param>
|
|
58 <param name="targetFile" type="data" format="txt" label="Design / target file" help="(-t, --targetFile) See the help section below for details on the required format." />
|
|
59 <param name="rawDir" type="data" format="no_unzip.zip,zip" label="Zip file containing raw counts files" help="(-r, --rawDir) See the help section below for details on the required format." />
|
|
60 <param name="featuresToRemove" type="text" size="100" value="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual" label="Names of the features to be removed" help="(-F, --featuresToRemove) Separate the features with a comma, no space allowed. More than once can be specified. Specific HTSeq-count information and rRNA for example. Default are 'alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual'." >
|
|
61 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
|
|
62 </param>
|
|
63 <param name="varInt" type="text" value="group" label="Factor of interest" help="(-v, --varInt) Biological condition in the target file. Default is 'group'." >
|
|
64 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
|
|
65 </param>
|
|
66 <param name="condRef" type="text" value="WT" label="Reference biological condition" help="(-c, --condRef) Reference biological condition used to compute fold-changes, must be one of the levels of 'Factor of interest'." >
|
|
67 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
|
|
68 </param>
|
|
69 </macro>
|
|
70
|
|
71 <macro name="batch_param">
|
|
72 <conditional name="batch_condition">
|
|
73 <param name="condition" type="boolean" checked="false" truevalue="batch" falsevalue="NULL" label="Add a blocking factor" help="(-b, --batch) Adjustment variable to use as a batch effect. Default: unchecked if no batch effect needs to be taken into account."/>
|
|
74 <when value="NULL" />
|
|
75 <when value="batch">
|
|
76 <param name="batch" type="text" value="batch" label="Blocking factor value" help="Must be a column of the target file" >
|
|
77 <validator type="empty_field"/>
|
|
78 </param>
|
|
79 </when>
|
|
80 </conditional>
|
|
81 </macro>
|
|
82
|
|
83 <macro name="alpha_param">
|
|
84 <param name="alpha" type="float" value="0.05" min="0" max="1" label="Threshold of statistical significance" help="(-a, --alpha) Significance threshold applied to the adjusted p-values to select the differentially expressed features. Default is 0.05. The comma is not allowed as decimal separator, use a point instead." />
|
|
85 </macro>
|
|
86
|
|
87 <macro name="padjustmethod_param">
|
|
88 <param name="pAdjustMethod" type="select" label="p-value adjustment method" help="(-p, --pAdjustMethod) p-value adjustment method for multiple testing. 'BH' by default, 'BY' or any value of p.adjust.methods." >
|
|
89 <option value="BH" selected="true">BH</option>
|
|
90 <option value="BY">BY</option>
|
|
91 <option value="bonferroni">bonferroni</option>
|
|
92 <option value="fdr">fdr</option>
|
|
93 <option value="hochberg">hochberg</option>
|
|
94 <option value="holm">holm</option>
|
|
95 <option value="hommel">hommel</option>
|
|
96 </param>
|
|
97 </macro>
|
|
98
|
|
99 <macro name="colors_param">
|
|
100 <param name="colors" type="text" size="100" value="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange" label="Colors of each biological condition on the plots: 'col1,col2,col3,col4'" help="(-C, --colors) Separate the colors with a comma, no space allowed. Default are 'dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange'." >
|
|
101 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
|
|
102 </param>
|
|
103 </macro>
|
|
104
|
|
105 <macro name="outputs">
|
|
106 <data name="report_html" format="html" label="${tool.name} report" />
|
|
107 <data name="tables_html" format="html" label="${tool.name} tables" />
|
|
108 <data name="figures_html" format="html" label="${tool.name} figures" />
|
|
109 <data name="log" format="txt" label="${tool.name} R log" />
|
|
110 <data name="rdata" format="data" label="${tool.name} R objects (.RData)" />
|
|
111 </macro>
|
|
112
|
|
113 <token name="@HELP_AUTHORS@">
|
|
114 .. class:: infomark
|
|
115
|
|
116 **Authors** M.-A. Dillies and H. Varet
|
|
117
|
|
118 | If you use this tool, please cite: H. Varet, L. Brillet-Guéguen, J.-Y. Coppee and M.-A. Dillies, SARTools: A DESeq2- and EdgeR-Based R Pipeline for Comprehensive Differential Analysis of RNA-Seq Data, PLoS One, 2016, doi: http://dx.doi.org/10.1371/journal.pone.0157022
|
|
119 | For details about this tool, please go to https://github.com/PF2-pasteur-fr/SARTools
|
|
120
|
|
121 .. class:: infomark
|
|
122
|
|
123 **Galaxy integration** Loraine Brillet-Guéguen, Institut Français de Bioinformatique
|
|
124
|
|
125 | Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool.
|
|
126
|
|
127 ---------------------------------------------------
|
|
128 </token>
|
|
129
|
|
130 <token name="@HELP_DESCRIPTION@">
|
|
131 | SARTools is a R package dedicated to the differential analysis of RNA-seq data. It provides tools to generate descriptive and diagnostic graphs, to run the differential analysis with one of the well known DESeq2 or edgeR packages and to export the results into easily readable tab-delimited files. It also facilitates the generation of a HTML report which displays all the figures produced, explains the statistical methods and gives the results of the differential analysis.
|
|
132 | Note that SARTools does not intend to replace DESeq2 or edgeR: it simply provides an environment to go with them. For more details about the methodology behind DESeq2 or edgeR, the user should read their documentations and papers.
|
|
133 </token>
|
|
134
|
|
135 <token name="@HELP_INPUT_FILES@">
|
|
136 .. class:: warningmark
|
|
137
|
|
138 If the counts and the target files are not supplied in the required formats, the workflow will probably crash and will not be able to run the analysis.
|
|
139
|
|
140
|
|
141 +---------------------------+-----------+
|
|
142 | Parameter : num + label | Format |
|
|
143 +===========================+===========+
|
|
144 | 1 : Design / target file | tabular |
|
|
145 +---------------------------+-----------+
|
|
146 | 2 : Raw counts files | zip |
|
|
147 +---------------------------+-----------+
|
|
148
|
|
149
|
|
150 Design/target file:
|
|
151 | The user has to supply a tab delimited file which describes the experiment, i.e. which contains the name of the biological condition associated with each sample. This file is called ”target” as a reference to the target file needed when using the limma package [1]. This file has one row per sample and is composed of at least three columns with headers:
|
|
152
|
|
153 * column 1 : unique names of the samples (short but informative as they will be displayed on all the figures);
|
|
154 * column 2 : name of the count files;
|
|
155 * column 3 : biological conditions;
|
|
156 * optional columns : further information about the samples (day of library preparation for example).
|
|
157
|
|
158
|
|
159 - Example of a target file::
|
|
160
|
|
161 label files group
|
|
162 s1c1 count_file_sample1_cond1.txt cond1
|
|
163 s2c1 count_file_sample2_cond1.txt cond1
|
|
164 s1c2 count_file_sample1_cond2.txt cond2
|
|
165 s2c2 count_file_sample2_cond2.txt cond2
|
|
166
|
|
167
|
|
168 Zip file containing raw counts files:
|
|
169 | The statistical analysis assumes that reads have already been mapped and that counts per feature (gene or transcript) are available. If counting has been done with HTSeq-count [2, 3], output files are ready to be loaded in R with the dedicated SARTools function. If not, the user must supply, in a zip file, one count file per sample with two tab delimited columns without header:
|
|
170
|
|
171 * column 1 : the unique IDs of the features;
|
|
172 * column 2 : the raw counts associated with these features (null or positive integers).
|
|
173 </token>
|
|
174
|
|
175 <token name="@HELP_BASIC_PARAMETERS@">
|
|
176 * **projectName:** name of the project;
|
|
177 * **author:** author of the analysis;
|
|
178 * **featuresToRemove:** character vector containing the IDs of the features to remove before running the analysis (default are "alignment not unique", "ambiguous", "no feature", "not aligned", "too low aQual" to remove HTSeq-count specific rows);
|
|
179 * **varInt:** variable of interest, i.e. biological condition, in the target file ("group" by default);
|
|
180 * **condRef:** reference biological condition used to compute fold-changes (no default, must be one of the levels of varInt);
|
|
181 </token>
|
|
182
|
|
183 <token name="@HELP_OUTPUT_FILES@">
|
|
184 **Report:**
|
|
185
|
|
186
|
|
187 | Give details about the methodology, the different steps and the results. It displays all the figures produced and the most important results of the differential analysis as the number of up- and down-regulated features.
|
|
188 | The user should read the full HTML report and closely analyze each figure to check that the analysis ran smoothly.
|
|
189
|
|
190
|
|
191 **Tables:**
|
|
192
|
|
193
|
|
194 * **TestVsRef.complete.txt:** contains all the features studied;
|
|
195 * **TestVsRef.down.txt:** contains only significant down-regulated features, i.e. less expressed in Test than in Ref;
|
|
196 * **TestVsRef.up.txt:** contains only significant up-regulated features i.e. more expressed in Test than in Ref.
|
|
197
|
|
198
|
|
199 **Figures:**
|
|
200
|
|
201
|
|
202 * **MAplot.png:** MA-plot for each comparison (log ratio of the means vs intensity).
|
|
203 * **PCA.png:** first and second factorial planes of the PCA on the samples based on VST or rlog data;
|
|
204 * **barplotNull.png:** percentage of null counts per sample;
|
|
205 * **barplotTC.png:** total number of reads per sample;
|
|
206 * **cluster.png:** hierachical clustering of the samples (based on VST or rlog data);
|
|
207 * **countsBoxplot.png:** boxplots on raw and normalized counts;
|
|
208 * **densplot.png:** estimation of the density of the counts for each sample;
|
|
209 * **diagSizeFactorsHist.png:** diagnostic of the estimation of the size factors;
|
|
210 * **diagSizeFactorsTC.png:** plot of the size factors vs the total number of reads;
|
|
211 * **dispersionsPlot.png:** graph of the estimations of the dispersions and diagnostic of log-linearity of the dispersions;
|
|
212 * **majSeq.png:** percentage of reads caught by the feature having the highest count in each sample;
|
|
213 * **pairwiseScatter.png:** pairwise scatter plot between each pair of samples and SERE values;
|
|
214 * **rawpHist.png:** histogram of the raw p-values for each comparison;
|
|
215 * **volcanoPlot.png:** vulcano plot for each comparison (− log10 (adjusted P value) vs log ratio of the means).
|
|
216
|
|
217
|
|
218 **R log file:**
|
|
219
|
|
220
|
|
221 | Give the R console outputs.
|
|
222
|
|
223
|
|
224 **R objects (.RData file):**
|
|
225
|
|
226
|
|
227 | Give all the R objects created during the analysis is saved: it may be used to perform downstream analyses.
|
|
228 </token>
|
|
229
|
|
230 <macro name="common_citations">
|
|
231 <citation type="doi">10.1371/journal.pone.0157022</citation>
|
|
232 <citation type="bibtex">@INBOOK{Smyth05,
|
|
233 author = {G.-K. Smyth},
|
|
234 editor = {R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, and W. Huber},
|
|
235 chapter = {Limma: linear models for microarray data},
|
|
236 title = {Bioinformatics and Computational Biology Solutions Using R and Bioconductor},
|
|
237 publisher = {Springer},
|
|
238 year = {2005},
|
|
239 pages = {397–420}
|
|
240 }</citation>
|
|
241 <citation type="doi">10.1093/bioinformatics/btu638</citation>
|
|
242 <citation type="bibtex">@ARTICLE{Benjamini95,
|
|
243 author = {Y. Benjamini and Y. Hochberg},
|
|
244 title = {Controlling the false discovery rate: a practical and powerful approach to multiple testing},
|
|
245 journal = {Journal of the Royal Statistical Society B},
|
|
246 year = {1995},
|
|
247 volume = {57},
|
|
248 pages = {289–300}
|
|
249 }</citation>
|
|
250 <citation type="bibtex">@ARTICLE{Benjamini01,
|
|
251 author = {Y. Benjamini and D. Yekutieli},
|
|
252 title = {The control of the false discovery rate in multiple testing under dependency},
|
|
253 journal = {Ann. Statist.},
|
|
254 year = {2001},
|
|
255 volume = {29},
|
|
256 number = {4},
|
|
257 pages = {1165–1188}
|
|
258 }</citation>
|
|
259 </macro>
|
|
260
|
|
261 </macros>
|