Mercurial > repos > iuc > multigsea
comparison multiGSEA.R @ 0:28e29a3d0eda draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 5c1b8a2b105a80e236f88e71a743147d79925ac4
author | iuc |
---|---|
date | Wed, 07 Jun 2023 19:48:50 +0000 |
parents | |
children | e48b10ce08b8 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:28e29a3d0eda |
---|---|
1 library(multiGSEA, | |
2 quietly = TRUE, | |
3 warn.conflicts = FALSE) | |
4 library(argparse, quietly = TRUE, warn.conflicts = FALSE) | |
5 | |
6 ################################################################################ | |
7 ### Input Processing | |
8 ################################################################################ | |
9 | |
10 | |
11 # Collect arguments from command line | |
12 parser <- ArgumentParser(description = "multiGSEA R script") | |
13 | |
14 parser$add_argument("--transcriptomics", required = FALSE, | |
15 help = "Transcriptomics data") | |
16 parser$add_argument( | |
17 "--transcriptome_ids", | |
18 required = FALSE, | |
19 help = "Transcriptomics ids", | |
20 default = "SYMBOL" | |
21 ) | |
22 parser$add_argument("--proteomics", required = FALSE, | |
23 help = "Proteomics data") | |
24 parser$add_argument( | |
25 "--proteome_ids", | |
26 required = FALSE, | |
27 help = "Proteomics ids", | |
28 default = "SYMBOL" | |
29 ) | |
30 parser$add_argument("--metabolomics", required = FALSE, | |
31 help = "Metabolomics data") | |
32 parser$add_argument( | |
33 "--metabolome_ids", | |
34 required = FALSE, | |
35 help = "Metabolomics ids", | |
36 default = "HMDB" | |
37 ) | |
38 parser$add_argument("--organism", required = TRUE, | |
39 help = "Organism") | |
40 parser$add_argument("--combine_pvalues", required = TRUE, | |
41 help = "Combine p-values method") | |
42 parser$add_argument("--padj_method", required = TRUE, | |
43 help = "P-adjustment method") | |
44 parser$add_argument("--databases", | |
45 required = TRUE, | |
46 help = "Pathway databases") | |
47 | |
48 args <- parser$parse_args() | |
49 | |
50 ## ----Load library------------------------------------------------------------- | |
51 | |
52 organism_mapping <- c( | |
53 "hsapiens" = "org.Hs.eg.db", | |
54 "mmusculus" = "org.Mm.eg.db", | |
55 "rnorvegicus" = "org.Rn.eg.db", | |
56 "cfamiliaris" = "org.Cf.eg.db", | |
57 "btaurus" = "org.Bt.eg.db", | |
58 "sscrofa" = "org.Ss.eg.db", | |
59 "ggallus" = "org.Gg.eg.db", | |
60 "drerio" = "org.Xl.eg.db", | |
61 "xlaevis" = "org.Dr.eg.db", | |
62 "dmelanogaster" = "org.Dm.eg.db", | |
63 "celegans" = "org.Ce.eg.db" | |
64 ) | |
65 | |
66 library(organism_mapping[args$organism], character.only = TRUE) | |
67 | |
68 | |
69 ## ----Load omics data---------------------------------------------------------- | |
70 | |
71 layer <- c() | |
72 | |
73 if (!is.null(args$transcriptomics)) { | |
74 transcriptome <- read.csv( | |
75 args$transcriptomics, | |
76 header = TRUE, | |
77 sep = "\t", | |
78 dec = "." | |
79 ) | |
80 layer <- append(layer, "transcriptome") | |
81 } | |
82 | |
83 if (!is.null(args$proteomics)) { | |
84 proteome <- read.csv(args$proteomics, | |
85 header = TRUE, | |
86 sep = "\t", | |
87 dec = ".") | |
88 layer <- append(layer, "proteome") | |
89 } | |
90 | |
91 if (!is.null(args$metabolomics)) { | |
92 metabolome <- read.csv(args$metabolomics, | |
93 header = TRUE, | |
94 sep = "\t", | |
95 dec = ".") | |
96 layer <- append(layer, "metabolome") | |
97 } | |
98 | |
99 ## ----rank_features------------------------------------------------------------ | |
100 | |
101 # create data structure | |
102 omics_data <- initOmicsDataStructure(layer) | |
103 | |
104 ## add transcriptome layer | |
105 if (!is.null(args$transcriptomics)) { | |
106 omics_data$transcriptome <- rankFeatures(transcriptome$logFC, | |
107 transcriptome$pValue) | |
108 names(omics_data$transcriptome) <- transcriptome$Symbol | |
109 } | |
110 | |
111 ## add proteome layer | |
112 if (!is.null(args$proteomics)) { | |
113 omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue) | |
114 names(omics_data$proteome) <- proteome$Symbol | |
115 } | |
116 | |
117 ## add metabolome layer | |
118 ## HMDB features have to be updated to the new HMDB format | |
119 if (!is.null(args$metabolomics)) { | |
120 omics_data$metabolome <- | |
121 rankFeatures(metabolome$logFC, metabolome$pValue) | |
122 names(omics_data$metabolome) <- metabolome$HMDB | |
123 names(omics_data$metabolome) <- gsub("HMDB", "HMDB00", | |
124 names(omics_data$metabolome)) | |
125 } | |
126 | |
127 | |
128 ## remove NA's and sort feature ranks | |
129 omics_data <- lapply(omics_data, function(vec) { | |
130 sort(vec[!is.na(vec)]) | |
131 }) | |
132 | |
133 ## ----Pathway definitions------------------------------------------------------ | |
134 | |
135 pathways <- | |
136 getMultiOmicsFeatures( | |
137 dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)), | |
138 layer = layer, | |
139 returnTranscriptome = args$transcriptome_ids, | |
140 returnProteome = args$proteome_ids, | |
141 returnMetabolome = args$metabolome_ids, | |
142 organism = args$organism, | |
143 useLocal = FALSE | |
144 ) | |
145 | |
146 ## ----calculate enrichment----------------------------------------------------- | |
147 | |
148 enrichment_scores <- | |
149 multiGSEA(pathways, omics_data) | |
150 | |
151 ## ----combine_pvalues---------------------------------------------------------- | |
152 | |
153 df <- extractPvalues(enrichmentScores = enrichment_scores, | |
154 pathwayNames = names(pathways[[1]])) | |
155 | |
156 df$combined_pval <- | |
157 combinePvalues(df, method = args$combine_pvalues) | |
158 df$combined_padj <- | |
159 p.adjust(df$combined_pval, method = args$padj_method) | |
160 | |
161 df <- cbind(data.frame(pathway = names(pathways[[1]])), df) | |
162 | |
163 ## ----Write output------------------------------------------------------------- | |
164 | |
165 write.table( | |
166 df, | |
167 file = "results.tsv", | |
168 quote = FALSE, | |
169 sep = "\t", | |
170 col.names = TRUE, | |
171 row.names = FALSE | |
172 ) |