Mercurial > repos > mbernt > proteomicsr_msigdb_workflow
comparison msigdb_workflow.xml @ 0:0fbb062e0cf5 draft default tip
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/tools/proteomicsr commit a73787be689a9af5641ff1b594c9a35d29093247-dirty
author | mbernt |
---|---|
date | Tue, 19 Dec 2023 15:51:04 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0fbb062e0cf5 |
---|---|
1 <tool id="proteomicsr_msigdb_workflow" name="proteomicsr: enrichment using MSigDB gene sets" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> | |
2 <macros> | |
3 <import>macros.xml</import> | |
4 </macros> | |
5 <expand macro="requirements"/> | |
6 <stdio> | |
7 <regex source="stdout" level="fatal" match="ERROR: Timeout" description="The ENSEMBL server timed out. A retry may help."/> | |
8 </stdio> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 Rscript '$rscript' | |
11 && mv Rdata/Summary_ALL.csv . | |
12 ]]></command> | |
13 <configfiles> | |
14 <configfile name="rscript"><![CDATA[ | |
15 library(proteomicsr) | |
16 | |
17 #if $dat_calculated.ext == 'csv' | |
18 dat_calculated <- read.csv("$dat_calculated", row.names = 1) | |
19 #else | |
20 dat_calculated <- read.delim("$dat_calculated", header = TRUE, row.names = 1, sep = "\t") | |
21 #end if | |
22 @READ_SAMPLE_GENES_MAPPING@ | |
23 | |
24 null <- run_msigdb_workflow( | |
25 dat_calculated, | |
26 msigdb_category = "$msigdb_category", | |
27 #if $msigdb_subcategory | |
28 msigdb_subcategory = "$msigdb_subcategory", | |
29 #end if | |
30 ## knowledgebase = NULL, not needed | |
31 sampleGenes = NULL, | |
32 sampleMapping = NULL, | |
33 pvalue_decision = "$pvalue_decision", | |
34 significance_cutoff_candidates = $significance_cutoff_candidates, | |
35 get_ID_to_map = NULL, | |
36 ID_provided = "$ID_provided", | |
37 organism = "$organism", | |
38 padjust_method = "$padjust_method", | |
39 significance_cutoff_terms = $significance_cutoff_terms, | |
40 direction_calculation = "$direction_calculation", | |
41 topx = $topx, | |
42 topx_per_comparison = $topx_per_comparison, | |
43 #if $plot_term_candidates | |
44 plot_term_candidates = "$plot_term_candidates", | |
45 #end if | |
46 color_up = "${color_up}FF", | |
47 color_down = "${color_down}FF" | |
48 ) | |
49 ]]></configfile> | |
50 </configfiles> | |
51 <inputs> | |
52 | |
53 <param argument="dat_calculated" type="data" format="csv,tabular" label="Sample table" help="Rows: unique identifiers (e.g. uniprot accessions), Columns: samples. Replicates should be indicated using _1, _2, .... Content should be numeric."/> | |
54 <param argument="msigdb_category" type="select" label="Gene set knowledgebase" help="Visit https://www.gsea-msigdb.org/gsea/msigdb/human/collections.jsp to get more information on MSigDB categories and if the chosen category needs the definition of a subcategory. MEDICUS and LEGACY gene sets seem to be not supported yet."> | |
55 <option value="H">Hallmark gene sets</option> | |
56 <option value="C1">Positional gene sets</option> | |
57 <option value="C2">Curated gene sets</option> | |
58 <option value="C3">Regulatory target gene sets</option> | |
59 <option value="C4">Computational gene sets</option> | |
60 <option value="C5">Ontology gene sets</option> | |
61 <option value="C6">Oncogenic signature gene sets</option> | |
62 <option value="C7">Immunologic signature gene sets</option> | |
63 <option value="C8">Cell type signature gene sets</option> | |
64 </param> | |
65 <param argument="msigdb_subcategory" type="select" optional="true" label="Gene set knowledgebase subcategory" help="Visit https://www.gsea-msigdb.org/gsea/msigdb/human/collections.jsp to get more information on MSigDB categories and if the chosen category needs the definition of a subcategory. MEDICUS and LEGACY gene sets seem to be not supported yet."> | |
66 <option value="CGP">C2 subcategory: chemical and genetic perturbations</option> | |
67 <option value="CP">C2 subcategory: canonical pathways</option> | |
68 <option value="CP:BIOCARTA">C2 subcategory: BioCarta canonical pathways</option> | |
69 <option value="CP:KEGG">C2 subcategory: KEGG canonical pathways (KEGG_MEDICUS and KEGG_LEGACY seem to be not supported yet)</option> | |
70 <option value="CP:PID">C2 subcategory: PID canonical pathways</option> | |
71 <option value="CP:REACTOME">C2 subcategory: Reactome canonical pathways</option> | |
72 <option value="MIR:MIRDB">C3 subcategory: gene sets containing high-confidence gene-level predictions of human miRNA targets as catalogued by miRDB v6.0 algorithm (MIR_LEGACY seems to be not supported yet)</option> | |
73 <option value="TFT:GTRD">C3 subcategory: genes that share GTRD predicted transcription factor binding sites in the region -1000,+100 bp around the TSS for the indicated transcription factor.</option> | |
74 <option value="CGN">C4 subcategory: cancer gene neighborhoods</option> | |
75 <option value="CM">C4 subcategory: cancer modules</option> | |
76 <option value="GO:BP">C5 subcategory: gene sets derived from the GO Biological Process ontology</option> | |
77 <option value="GO:CC">C5 subcategory: gene sets derived from the GO Cellular Component ontology</option> | |
78 <option value="GO:MF">C5 subcategory: gene sets derived from the GO Molecular Function ontology</option> | |
79 <option value="HPO">C5 subcategory: Human Phenotype Ontology</option> | |
80 <option value="IMMUNESIGDB">C7 subcategory: gene sets representing chemical and genetic perturbations of the immune system generated by manual curation of published studies in human and mouse immunology</option> | |
81 <option value="VAX">C7 subcategory: gene sets curated by the Human Immunology Project Consortium (HIPC) describing human transcriptomic immune responses to vaccinations</option> | |
82 </param> | |
83 <!-- <param argument="knowledgebase" type="text" value="" label="Pattern to add to ouput, i.e. the database used for enrichment" help="Default is NULL, thus nothing is added to the output."/> --> | |
84 <expand macro="sample_genes_mapping"/> | |
85 <param argument="ID_provided" type="text" value="uniprotswissprot" label="Define provided identifier" help="Define the ID type used in your dataframe of average Log2(FCs) and (adjusted) p-values. The ID should relate to attributes available using attributes = biomaRt::listAttributes(biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl")) or the attributes specific for the defined organism (e.g. "mmusculus_gene_ensembl" or "rnorvegicus_gene_ensembl")."/> | |
86 <!-- set of supported species can be determined with msigdbr::msigdbr_species() | |
87 TODO commented species require input to get_ID_to_map --> | |
88 <param argument="organism" type="select" label="Organism used" help=""> | |
89 <!-- <option value="Anolis carolinensis"/> --> | |
90 <option value="Bos taurus"/> | |
91 <option value="Caenorhabditis elegans"/> | |
92 <option value="Canis lupus familiaris"/> | |
93 <option value="Danio rerio"/> | |
94 <option value="Drosophila melanogaster"/> | |
95 <!-- <option value="Equus caballus"/> --> | |
96 <!-- <option value="Felis catus"/> --> | |
97 <option value="Gallus gallus"/> | |
98 <option value="Homo sapiens" selected="true"/> | |
99 <!-- <option value="Macaca mulatta"/> --> | |
100 <!-- <option value="Monodelphis domestica"/> --> | |
101 <option value="Mus musculus"/> | |
102 <!-- <option value="Ornithorhynchus anatinus"/> --> | |
103 <!-- <option value="Pan troglodytes"/> --> | |
104 <option value="Rattus norvegicus"/> | |
105 <option value="Saccharomyces cerevisiae"/> | |
106 <!-- <option value="Schizosaccharomyces pombe 972h-"/> --> | |
107 <option value="Sus scrofa"/> | |
108 <!-- <option value="Xenopus tropicalis"/> --> | |
109 </param> | |
110 <!-- should be pvalue pvalueadj if used downstream of fc_workflow or intensity_workflow --> | |
111 <param argument="pvalue_decision" type="text" value="pvalueadj" label="Pattern to select columns containing p-values to use" help="Examples: When pvalue, all columns ending on _pvalue are used to filter for significantly altered candidates, whereas the pattern pvalueadj will use all columns ending with this pattern"/> | |
112 <param argument="significance_cutoff_candidates" type="float" value="0.05" min="0" max="1" label="Significance cutoff to filter for candidates used for enrichment" help="All candidates with (adjusted) p-value below this threshold will be subjected to enrichment analysis"/> | |
113 <param argument="significance_cutoff_terms" type="float" value="0.05" min="0" max="1" label="Significance cutoff to identify significantly enriched terms" help="All terms with (adjusted) p-value below this threshold will be considered significantly enriched"/> | |
114 <param argument="padjust_method" type="select" label="Method for p-value adjustment during enrichment analysis" help=""> | |
115 <option value="holm">Holm</option> | |
116 <option value="hochberg">Hochberg</option> | |
117 <option value="hommel">Hommel</option> | |
118 <option value="bonferroni">Bonferroni</option> | |
119 <option value="BY">Benjamini & Yekutieli (BY)</option> | |
120 <option value="fdr" selected="true">Benjamini & Hochberg (BH/fdr)</option> | |
121 <option value="none">None</option> | |
122 </param> | |
123 <param argument="direction_calculation" type="select" label="Decide how to calculate the direction of the term regulation" help="Decide whether to use median or mean values of the Log2(fold changes) of the candidates used for enrichment and assigned to the term."> | |
124 <option value="median">Hallmark gene sets</option> | |
125 <option value="mean">Positional gene sets</option> | |
126 </param> | |
127 <param argument="topx" type="integer" min="1" value="10" label="Number of top enriched pathways to return and visualize" help="In addition to exporting and visualizing all enriched terms and the significantly enriched terms, the top enriched terms will be exported and visualized based on the value defined here."/> | |
128 <param argument="topx_per_comparison" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Extract the top enriched terms condition-wise" help="Decide whether to extract the top enriched terms condition-wise or based on their summed enrichment over all conditions"/> | |
129 <param argument="plot_term_candidates" type="select" optional="true" label="Decide whether to visualize candidates assigned to enriched terms" help=""> | |
130 <option value="significant">Candidates of significantly enriched terms</option> | |
131 <option value="all">Candidates of all enriched terms</option> | |
132 </param> | |
133 <param argument="color_up" type="color" value="#DC0000" label="Color for up-regulated candidates"/> | |
134 <param argument="color_down" type="color" value="#3C5488" label="Color for down-regulated candidates"/> | |
135 <param name="out_select" type="select" multiple="true" optional="true" label="Optional outputs"> | |
136 <option value="tables" selected="true">Detailed tables</option> | |
137 <option value="plots" selected="true">Plots</option> | |
138 </param> | |
139 </inputs> | |
140 <outputs> | |
141 <data name="summary" format="csv" from_work_dir="Summary_ALL.csv"/> | |
142 <collection name="output" type="list" label="${tool.name} on ${on_string}: additional tables"> | |
143 <discover_datasets pattern="__name_and_ext__" directory="Rdata"/> | |
144 <filter>out_select and "tables" in out_select</filter> | |
145 </collection> | |
146 <collection name="plots" type="list" label="${tool.name} on ${on_string}: plots"> | |
147 <discover_datasets pattern="__name_and_ext__" directory="Plots"/> | |
148 <filter>out_select and "plots" in out_select</filter> | |
149 </collection> | |
150 | |
151 <collection name="sig_output" type="list" label="${tool.name} on ${on_string}: additional tables for significantly enriched terms"> | |
152 <discover_datasets pattern="__name_and_ext__" directory="CandidatesSignificantTerms/Rdata"/> | |
153 <filter>"significant" in plot_term_candidates</filter> | |
154 <filter>out_select and "tables" in out_select</filter> | |
155 </collection> | |
156 <collection name="sig_plots" type="list" label="${tool.name} on ${on_string}: plots for significantly enriched terms"> | |
157 <discover_datasets pattern="__name_and_ext__" directory="CandidatesSignificantTerms/Plots"/> | |
158 <filter>"significant" in plot_term_candidates</filter> | |
159 <filter>out_select and "plots" in out_select</filter> | |
160 </collection> | |
161 | |
162 <collection name="all_output" type="list" label="${tool.name} on ${on_string}: additional tables for all enriched terms"> | |
163 <discover_datasets pattern="__name_and_ext__" directory="CandidatesAllTerms/Rdata"/> | |
164 <filter>"all" in plot_term_candidates</filter> | |
165 <filter>out_select and "tables" in out_select</filter> | |
166 </collection> | |
167 <collection name="all_plots" type="list" label="${tool.name} on ${on_string}: plots for all enriched terms"> | |
168 <discover_datasets pattern="__name_and_ext__" directory="CandidatesAllTerms/Plots"/> | |
169 <filter>"all" in plot_term_candidates</filter> | |
170 <filter>out_select and "plots" in out_select</filter> | |
171 </collection> | |
172 </outputs> | |
173 <tests> | |
174 <test expect_num_outputs="5"> | |
175 <param name="dat_calculated" value="dat_calculated.csv" ftype="csv"/> | |
176 <param name="plot_term_candidates" value="significant"/> | |
177 <output name="summary"> | |
178 <assert_contents> | |
179 <has_n_lines n="89"/> | |
180 <has_n_columns sep="," n="7"/> | |
181 </assert_contents> | |
182 </output> | |
183 <output_collection name="output" count="5" type="list"> | |
184 <element name="Enrichment_results_log.p.adjust_pvalueadj_0.05" ftype="csv"> | |
185 <assert_contents> | |
186 <has_n_lines n="45"/> | |
187 <has_n_columns sep="," n="3"/> | |
188 </assert_contents> | |
189 </element> | |
190 <element name="Enrichment_results_long_pvalueadj_0.05_median_FC" ftype="csv"> | |
191 <assert_contents> | |
192 <has_n_lines n="70"/> | |
193 <has_n_columns sep="," n="12"/> | |
194 </assert_contents> | |
195 </element> | |
196 <element name="Enrichment_results_median_FC_pvalueadj_0.05" ftype="csv"> | |
197 <assert_contents> | |
198 <has_n_lines n="45"/> | |
199 <has_n_columns sep="," n="3"/> | |
200 </assert_contents> | |
201 </element> | |
202 <element name="MSigDB_gene_set_ID_mapping" ftype="csv"> | |
203 <assert_contents> | |
204 <has_n_lines n="8210"/> | |
205 <has_n_columns sep="," n="2"/> | |
206 </assert_contents> | |
207 </element> | |
208 <element name="Summary_Top10_combined" ftype="csv"> | |
209 <assert_contents> | |
210 <has_n_lines n="29"/> | |
211 <has_n_columns sep="," n="7"/> | |
212 </assert_contents> | |
213 </element> | |
214 </output_collection> | |
215 <output_collection name="plots" count="9" type="list"/> | |
216 <output_collection name="sig_output" count="8" type="list"> | |
217 <element name="Candidates_HALLMARK_COMPLEMENT" ftype="csv"> | |
218 <assert_contents> | |
219 <has_n_lines n="13"/> | |
220 <has_n_columns sep="," n="8"/> | |
221 </assert_contents> | |
222 </element> | |
223 <element name="Candidates_HALLMARK_INFLAMMATORY_RESPONSE" ftype="csv"> | |
224 <assert_contents> | |
225 <has_n_lines n="10"/> | |
226 <has_n_columns sep="," n="8"/> | |
227 </assert_contents> | |
228 </element> | |
229 <element name="Candidates_HALLMARK_TNFA_SIGNALING_VIA_NFKB" ftype="csv"> | |
230 <assert_contents> | |
231 <has_n_lines n="16"/> | |
232 <has_n_columns sep="," n="8"/> | |
233 </assert_contents> | |
234 </element> | |
235 <element name="Candidates_HALLMARK_UV_RESPONSE_UP" ftype="csv"> | |
236 <assert_contents> | |
237 <has_n_lines n="8"/> | |
238 <has_n_columns sep="," n="8"/> | |
239 </assert_contents> | |
240 </element> | |
241 <element name="Candidates_ggplot_HALLMARK_COMPLEMENT" ftype="csv"> | |
242 <assert_contents> | |
243 <has_n_lines n="25"/> | |
244 <has_n_columns sep="," n="8"/> | |
245 </assert_contents> | |
246 </element> | |
247 <element name="Candidates_ggplot_HALLMARK_INFLAMMATORY_RESPONSE" ftype="csv"> | |
248 <assert_contents> | |
249 <has_n_lines n="19"/> | |
250 <has_n_columns sep="," n="8"/> | |
251 </assert_contents> | |
252 </element> | |
253 <element name="Candidates_ggplot_HALLMARK_TNFA_SIGNALING_VIA_NFKB" ftype="csv"> | |
254 <assert_contents> | |
255 <has_n_lines n="31"/> | |
256 <has_n_columns sep="," n="8"/> | |
257 </assert_contents> | |
258 </element> | |
259 <element name="Candidates_ggplot_HALLMARK_UV_RESPONSE_UP" ftype="csv"> | |
260 <assert_contents> | |
261 <has_n_lines n="15"/> | |
262 <has_n_columns sep="," n="8"/> | |
263 </assert_contents> | |
264 </element> | |
265 </output_collection> | |
266 <output_collection name="sig_plots" count="8" type="list"/> | |
267 </test> | |
268 <!-- same + sample genes --> | |
269 <test expect_num_outputs="5"> | |
270 <param name="dat_calculated" value="dat_calculated.csv" ftype="csv"/> | |
271 <param name="sampleGenes" value="sampleGenes.csv" ftype="csv"/> | |
272 <param name="plot_term_candidates" value="significant"/> | |
273 <output name="summary"> | |
274 <assert_contents> | |
275 <has_n_lines n="89"/> | |
276 <has_n_columns sep="," n="7"/> | |
277 </assert_contents> | |
278 </output> | |
279 <output_collection name="output" count="5" type="list"/> | |
280 <output_collection name="plots" count="9" type="list"/> | |
281 <output_collection name="sig_output" count="8" type="list"/> | |
282 <output_collection name="sig_plots" count="8" type="list"/> | |
283 </test> | |
284 <!-- same + sample genes + sample mapping --> | |
285 <test expect_num_outputs="5"> | |
286 <param name="dat_calculated" value="dat_calculated.csv" ftype="csv"/> | |
287 <param name="sampleGenes" value="sampleGenes.csv" ftype="csv"/> | |
288 <param name="sampleMapping" value="sampleMapping.csv" ftype="csv"/> | |
289 <param name="plot_term_candidates" value="significant"/> | |
290 <output name="summary"> | |
291 <assert_contents> | |
292 <has_n_lines n="89"/> | |
293 <has_n_columns sep="," n="7"/> | |
294 </assert_contents> | |
295 </output> | |
296 <output_collection name="output" count="5" type="list"/> | |
297 <output_collection name="plots" count="9" type="list"/> | |
298 <output_collection name="sig_output" count="8" type="list"/> | |
299 <output_collection name="sig_plots" count="8" type="list"/> | |
300 </test> | |
301 <!-- same as 1st test but plot all candidates + only output tables --> | |
302 <test expect_num_outputs="3"> | |
303 <param name="dat_calculated" value="dat_calculated.csv" ftype="csv"/> | |
304 <param name="plot_term_candidates" value="all"/> | |
305 <param name="out_select" value="tables"/> | |
306 <output name="summary"> | |
307 <assert_contents> | |
308 <has_n_lines n="89"/> | |
309 <has_n_columns sep="," n="7"/> | |
310 </assert_contents> | |
311 </output> | |
312 <output_collection name="output" count="5" type="list"/> | |
313 <output_collection name="all_output" count="88" type="list"/> | |
314 </test> | |
315 </tests> | |
316 <help><![CDATA[ | |
317 Enrichment analysis workflow using MSigDB gene sets | |
318 | |
319 Providing a table with average Log2(FCs) and (adjusted) p-values, enrichment analysis is conducted against the gene sets provided by the MSigDB | |
320 ]]></help> | |
321 <expand macro="citations"/> | |
322 </tool> |