diff multiGSEA.R @ 1:e48b10ce08b8 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 945cc63f011002e3f61d7e848d556b647e9c8878
author iuc
date Wed, 21 Feb 2024 15:41:52 +0000
parents 28e29a3d0eda
children
line wrap: on
line diff
--- a/multiGSEA.R	Wed Jun 07 19:48:50 2023 +0000
+++ b/multiGSEA.R	Wed Feb 21 15:41:52 2024 +0000
@@ -1,6 +1,7 @@
 library(multiGSEA,
-        quietly = TRUE,
-        warn.conflicts = FALSE)
+    quietly = TRUE,
+    warn.conflicts = FALSE
+)
 library(argparse, quietly = TRUE, warn.conflicts = FALSE)
 
 ################################################################################
@@ -11,56 +12,69 @@
 # Collect arguments from command line
 parser <- ArgumentParser(description = "multiGSEA R script")
 
-parser$add_argument("--transcriptomics",  required = FALSE,
-                    help = "Transcriptomics data")
+parser$add_argument("--transcriptomics",
+    required = FALSE,
+    help = "Transcriptomics data"
+)
 parser$add_argument(
-  "--transcriptome_ids",
-  required = FALSE,
-  help = "Transcriptomics ids",
-  default = "SYMBOL"
+    "--transcriptome_ids",
+    required = FALSE,
+    help = "Transcriptomics ids",
+    default = "SYMBOL"
 )
-parser$add_argument("--proteomics",  required = FALSE,
-                    help = "Proteomics data")
+parser$add_argument("--proteomics",
+    required = FALSE,
+    help = "Proteomics data"
+)
 parser$add_argument(
-  "--proteome_ids",
-  required = FALSE,
-  help = "Proteomics ids",
-  default = "SYMBOL"
+    "--proteome_ids",
+    required = FALSE,
+    help = "Proteomics ids",
+    default = "SYMBOL"
 )
-parser$add_argument("--metabolomics",  required = FALSE,
-                    help = "Metabolomics data")
+parser$add_argument("--metabolomics",
+    required = FALSE,
+    help = "Metabolomics data"
+)
 parser$add_argument(
-  "--metabolome_ids",
-  required = FALSE,
-  help = "Metabolomics ids",
-  default = "HMDB"
+    "--metabolome_ids",
+    required = FALSE,
+    help = "Metabolomics ids",
+    default = "HMDB"
 )
-parser$add_argument("--organism",  required = TRUE,
-                    help = "Organism")
-parser$add_argument("--combine_pvalues",  required = TRUE,
-                    help = "Combine p-values method")
-parser$add_argument("--padj_method",  required = TRUE,
-                    help = "P-adjustment method")
+parser$add_argument("--organism",
+    required = TRUE,
+    help = "Organism"
+)
+parser$add_argument("--combine_pvalues",
+    required = TRUE,
+    help = "Combine p-values method"
+)
+parser$add_argument("--padj_method",
+    required = TRUE,
+    help = "P-adjustment method"
+)
 parser$add_argument("--databases",
-                    required = TRUE,
-                    help = "Pathway databases")
+    required = TRUE,
+    help = "Pathway databases"
+)
 
 args <- parser$parse_args()
 
 ## ----Load library-------------------------------------------------------------
 
 organism_mapping <- c(
-  "hsapiens" = "org.Hs.eg.db",
-  "mmusculus" = "org.Mm.eg.db",
-  "rnorvegicus" = "org.Rn.eg.db",
-  "cfamiliaris" = "org.Cf.eg.db",
-  "btaurus" = "org.Bt.eg.db",
-  "sscrofa" = "org.Ss.eg.db",
-  "ggallus" = "org.Gg.eg.db",
-  "drerio" = "org.Xl.eg.db",
-  "xlaevis" = "org.Dr.eg.db",
-  "dmelanogaster" = "org.Dm.eg.db",
-  "celegans" = "org.Ce.eg.db"
+    "hsapiens" = "org.Hs.eg.db",
+    "mmusculus" = "org.Mm.eg.db",
+    "rnorvegicus" = "org.Rn.eg.db",
+    "cfamiliaris" = "org.Cf.eg.db",
+    "btaurus" = "org.Bt.eg.db",
+    "sscrofa" = "org.Ss.eg.db",
+    "ggallus" = "org.Gg.eg.db",
+    "drerio" = "org.Xl.eg.db",
+    "xlaevis" = "org.Dr.eg.db",
+    "dmelanogaster" = "org.Dm.eg.db",
+    "celegans" = "org.Ce.eg.db"
 )
 
 library(organism_mapping[args$organism], character.only = TRUE)
@@ -71,29 +85,31 @@
 layer <- c()
 
 if (!is.null(args$transcriptomics)) {
-  transcriptome <- read.csv(
-    args$transcriptomics,
-    header = TRUE,
-    sep = "\t",
-    dec = "."
-  )
-  layer <- append(layer, "transcriptome")
+    transcriptome <- read.csv(
+        args$transcriptomics,
+        header = TRUE,
+        sep = "\t",
+        dec = "."
+    )
+    layer <- append(layer, "transcriptome")
 }
 
 if (!is.null(args$proteomics)) {
-  proteome <- read.csv(args$proteomics,
-                       header = TRUE,
-                       sep = "\t",
-                       dec = ".")
-  layer <- append(layer, "proteome")
+    proteome <- read.csv(args$proteomics,
+        header = TRUE,
+        sep = "\t",
+        dec = "."
+    )
+    layer <- append(layer, "proteome")
 }
 
 if (!is.null(args$metabolomics)) {
-  metabolome <- read.csv(args$metabolomics,
-                         header = TRUE,
-                         sep = "\t",
-                         dec = ".")
-  layer <- append(layer, "metabolome")
+    metabolome <- read.csv(args$metabolomics,
+        header = TRUE,
+        sep = "\t",
+        dec = "."
+    )
+    layer <- append(layer, "metabolome")
 }
 
 ## ----rank_features------------------------------------------------------------
@@ -103,70 +119,76 @@
 
 ## add transcriptome layer
 if (!is.null(args$transcriptomics)) {
-  omics_data$transcriptome <- rankFeatures(transcriptome$logFC,
-                                           transcriptome$pValue)
-  names(omics_data$transcriptome) <- transcriptome$Symbol
+    omics_data$transcriptome <- rankFeatures(
+        transcriptome$logFC,
+        transcriptome$pValue
+    )
+    names(omics_data$transcriptome) <- transcriptome$Symbol
 }
 
 ## add proteome layer
 if (!is.null(args$proteomics)) {
-  omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
-  names(omics_data$proteome) <- proteome$Symbol
+    omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
+    names(omics_data$proteome) <- proteome$Symbol
 }
 
 ## add metabolome layer
 ## HMDB features have to be updated to the new HMDB format
 if (!is.null(args$metabolomics)) {
-  omics_data$metabolome <-
-    rankFeatures(metabolome$logFC, metabolome$pValue)
-  names(omics_data$metabolome) <- metabolome$HMDB
-  names(omics_data$metabolome) <- gsub("HMDB", "HMDB00",
-                                       names(omics_data$metabolome))
+    omics_data$metabolome <-
+        rankFeatures(metabolome$logFC, metabolome$pValue)
+    names(omics_data$metabolome) <- metabolome$HMDB
+    names(omics_data$metabolome) <- gsub(
+        "HMDB", "HMDB00",
+        names(omics_data$metabolome)
+    )
 }
 
 
 ## remove NA's and sort feature ranks
 omics_data <- lapply(omics_data, function(vec) {
-  sort(vec[!is.na(vec)])
+    sort(vec[!is.na(vec)])
 })
 
 ## ----Pathway definitions------------------------------------------------------
 
 pathways <-
-  getMultiOmicsFeatures(
-    dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)),
-    layer = layer,
-    returnTranscriptome = args$transcriptome_ids,
-    returnProteome = args$proteome_ids,
-    returnMetabolome = args$metabolome_ids,
-    organism = args$organism,
-    useLocal = FALSE
-  )
+    getMultiOmicsFeatures(
+        dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)),
+        layer = layer,
+        returnTranscriptome = args$transcriptome_ids,
+        returnProteome = args$proteome_ids,
+        returnMetabolome = args$metabolome_ids,
+        organism = args$organism,
+        useLocal = FALSE
+    )
 
 ## ----calculate enrichment-----------------------------------------------------
 
 enrichment_scores <-
-  multiGSEA(pathways, omics_data)
+    multiGSEA(pathways, omics_data)
 
 ## ----combine_pvalues----------------------------------------------------------
 
-df <- extractPvalues(enrichmentScores = enrichment_scores,
-                     pathwayNames = names(pathways[[1]]))
+df <- extractPvalues(
+    enrichmentScores = enrichment_scores,
+    pathwayNames = names(pathways[[1]])
+)
 
 df$combined_pval <-
-  combinePvalues(df, method = args$combine_pvalues)
+    combinePvalues(df, method = args$combine_pvalues)
 df$combined_padj <-
-  p.adjust(df$combined_pval, method = args$padj_method)
+    p.adjust(df$combined_pval, method = args$padj_method)
 
 df <- cbind(data.frame(pathway = names(pathways[[1]])), df)
 
 ## ----Write output-------------------------------------------------------------
 
 write.table(
-  df,
-  file = "results.tsv",
-  quote = FALSE,
-  sep = "\t",
-  col.names = TRUE,
-  row.names = FALSE
+    df,
+    file = "results.tsv",
+    quote = FALSE,
+    sep = "\t",
+    col.names = TRUE,
+    row.names = FALSE
 )