comparison multiGSEA.R @ 0:28e29a3d0eda draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 5c1b8a2b105a80e236f88e71a743147d79925ac4
author iuc
date Wed, 07 Jun 2023 19:48:50 +0000
parents
children e48b10ce08b8
comparison
equal deleted inserted replaced
-1:000000000000 0:28e29a3d0eda
1 library(multiGSEA,
2 quietly = TRUE,
3 warn.conflicts = FALSE)
4 library(argparse, quietly = TRUE, warn.conflicts = FALSE)
5
6 ################################################################################
7 ### Input Processing
8 ################################################################################
9
10
11 # Collect arguments from command line
12 parser <- ArgumentParser(description = "multiGSEA R script")
13
14 parser$add_argument("--transcriptomics", required = FALSE,
15 help = "Transcriptomics data")
16 parser$add_argument(
17 "--transcriptome_ids",
18 required = FALSE,
19 help = "Transcriptomics ids",
20 default = "SYMBOL"
21 )
22 parser$add_argument("--proteomics", required = FALSE,
23 help = "Proteomics data")
24 parser$add_argument(
25 "--proteome_ids",
26 required = FALSE,
27 help = "Proteomics ids",
28 default = "SYMBOL"
29 )
30 parser$add_argument("--metabolomics", required = FALSE,
31 help = "Metabolomics data")
32 parser$add_argument(
33 "--metabolome_ids",
34 required = FALSE,
35 help = "Metabolomics ids",
36 default = "HMDB"
37 )
38 parser$add_argument("--organism", required = TRUE,
39 help = "Organism")
40 parser$add_argument("--combine_pvalues", required = TRUE,
41 help = "Combine p-values method")
42 parser$add_argument("--padj_method", required = TRUE,
43 help = "P-adjustment method")
44 parser$add_argument("--databases",
45 required = TRUE,
46 help = "Pathway databases")
47
48 args <- parser$parse_args()
49
50 ## ----Load library-------------------------------------------------------------
51
52 organism_mapping <- c(
53 "hsapiens" = "org.Hs.eg.db",
54 "mmusculus" = "org.Mm.eg.db",
55 "rnorvegicus" = "org.Rn.eg.db",
56 "cfamiliaris" = "org.Cf.eg.db",
57 "btaurus" = "org.Bt.eg.db",
58 "sscrofa" = "org.Ss.eg.db",
59 "ggallus" = "org.Gg.eg.db",
60 "drerio" = "org.Xl.eg.db",
61 "xlaevis" = "org.Dr.eg.db",
62 "dmelanogaster" = "org.Dm.eg.db",
63 "celegans" = "org.Ce.eg.db"
64 )
65
66 library(organism_mapping[args$organism], character.only = TRUE)
67
68
69 ## ----Load omics data----------------------------------------------------------
70
71 layer <- c()
72
73 if (!is.null(args$transcriptomics)) {
74 transcriptome <- read.csv(
75 args$transcriptomics,
76 header = TRUE,
77 sep = "\t",
78 dec = "."
79 )
80 layer <- append(layer, "transcriptome")
81 }
82
83 if (!is.null(args$proteomics)) {
84 proteome <- read.csv(args$proteomics,
85 header = TRUE,
86 sep = "\t",
87 dec = ".")
88 layer <- append(layer, "proteome")
89 }
90
91 if (!is.null(args$metabolomics)) {
92 metabolome <- read.csv(args$metabolomics,
93 header = TRUE,
94 sep = "\t",
95 dec = ".")
96 layer <- append(layer, "metabolome")
97 }
98
99 ## ----rank_features------------------------------------------------------------
100
101 # create data structure
102 omics_data <- initOmicsDataStructure(layer)
103
104 ## add transcriptome layer
105 if (!is.null(args$transcriptomics)) {
106 omics_data$transcriptome <- rankFeatures(transcriptome$logFC,
107 transcriptome$pValue)
108 names(omics_data$transcriptome) <- transcriptome$Symbol
109 }
110
111 ## add proteome layer
112 if (!is.null(args$proteomics)) {
113 omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
114 names(omics_data$proteome) <- proteome$Symbol
115 }
116
117 ## add metabolome layer
118 ## HMDB features have to be updated to the new HMDB format
119 if (!is.null(args$metabolomics)) {
120 omics_data$metabolome <-
121 rankFeatures(metabolome$logFC, metabolome$pValue)
122 names(omics_data$metabolome) <- metabolome$HMDB
123 names(omics_data$metabolome) <- gsub("HMDB", "HMDB00",
124 names(omics_data$metabolome))
125 }
126
127
128 ## remove NA's and sort feature ranks
129 omics_data <- lapply(omics_data, function(vec) {
130 sort(vec[!is.na(vec)])
131 })
132
133 ## ----Pathway definitions------------------------------------------------------
134
135 pathways <-
136 getMultiOmicsFeatures(
137 dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)),
138 layer = layer,
139 returnTranscriptome = args$transcriptome_ids,
140 returnProteome = args$proteome_ids,
141 returnMetabolome = args$metabolome_ids,
142 organism = args$organism,
143 useLocal = FALSE
144 )
145
146 ## ----calculate enrichment-----------------------------------------------------
147
148 enrichment_scores <-
149 multiGSEA(pathways, omics_data)
150
151 ## ----combine_pvalues----------------------------------------------------------
152
153 df <- extractPvalues(enrichmentScores = enrichment_scores,
154 pathwayNames = names(pathways[[1]]))
155
156 df$combined_pval <-
157 combinePvalues(df, method = args$combine_pvalues)
158 df$combined_padj <-
159 p.adjust(df$combined_pval, method = args$padj_method)
160
161 df <- cbind(data.frame(pathway = names(pathways[[1]])), df)
162
163 ## ----Write output-------------------------------------------------------------
164
165 write.table(
166 df,
167 file = "results.tsv",
168 quote = FALSE,
169 sep = "\t",
170 col.names = TRUE,
171 row.names = FALSE
172 )