mt2mq: MT2MQ.R comparison

comparison MT2MQ.R @ 2:9c8e7137d331 draft

"planemo upload commit 59afcdaf7afdf574c475f0faae73127f0e563328"

author	galaxyp
date	Wed, 12 Aug 2020 17:36:53 -0400
parents	e50ec3a9a3f9
children

comparison

equal deleted inserted replaced

-:e50ec3a9a3f9
+:9c8e7137d331
 # MT2MQ: prepares metatranscriptomic outputs from ASaiM (HUMAnN2 and metaphlan) for metaquantome
 # Load libraries
 suppressPackageStartupMessages(library(tidyverse))
-#default_locale()
+suppressPackageStartupMessages(library(taxize))
 # Set parameters from arguments
-args = commandArgs(trailingOnly = TRUE)
+args <- commandArgs(trailingOnly = TRUE)
 data <- args[1]
 # data: full path to file or directory:
 #   - if in functional or f-t mode, should be a tsv file of HUMAnN2 gene families, after regrouping and renaming to GO, joining samples, and renormalizing to CPM.
 #   - if in taxonomic mode, should be a directory of tsv files of metaphlan genus-level results
 mode <- args[2]
 #   -"t": taxonomy
 #   -"ft": function-taxonomy
 ontology <- unlist(strsplit(args[3], split = ","))
 # ontology: only for function or f-t mode. A string of the GO namespace(s) to include, separated by commas.
 #   ex: to include all: "molecular_function,biological_process,cellular_component"
-outfile <- args[4]
-# outfile: full path with pathname and extension for output
+int_file <- args[4]
+# int_file: full path and file name and extension to write intensity file
+func_file <- args[5]
+# func_file: full path and file name and extension to write func file
+tax_file <- args[6]
+# tax_file: full path and file name and extension to write tax file
 # Functional mode
-if (mode == "f"){
+if (mode == "f") {
-out <- read.delim(file=data, header=TRUE, sep='\t') %>%
+int <- read.delim(file = data, header = TRUE, sep = "\t") %>%
-filter(!grepl(".+g__.+",X..Gene.Family)) %>%
+filter(!grepl(".+g__.+", X..Gene.Family)) %>%
-separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>%
+separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>%
-separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>%
+separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>%
 mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>%
 filter(namespace %in% ontology) %>%
 select(id, name, namespace, 4:ncol(.))
+func <- int %>%
+select(id) %>%
+mutate(gos = id)
+write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE)
+write.table(x = func, file = func_file, quote = FALSE, sep = "\t", row.names = FALSE)
 }
 # Taxonomic mode
-if (mode == "t"){
+if (mode == "t") {
 files <- dir(path = data)
-out <- tibble(filename = files) %>%
+int <- tibble(filename = files) %>%
-mutate(file_contents= map(filename, ~read.delim(file=file.path(data, .), header=TRUE, sep = "\t"))) %>%
+mutate(file_contents = map(filename, ~read.delim(file = file.path(data, .), header = TRUE, sep = "\t"))) %>%
 unnest(cols = c(file_contents)) %>%
 rename(sample = filename) %>%
-separate(col = sample, into = c("sample",NA), sep=".tsv") %>%
+separate(col = sample, into = c("sample", NA), sep = ".tsv") %>%
 pivot_wider(names_from = sample, values_from = abundance) %>%
 mutate(rank = "genus") %>%
 rename(name = genus) %>%
-mutate(id = row_number(name)) %>% # filler for taxon id but should eventually find a way to get id from ncbi database
+mutate(name = as.character(name)) %>%
+mutate(id = get_uid(name, key = NULL, messages = FALSE)) %>%
 select(id, name, rank, 2:ncol(.))
+tax <- int %>%
+select(id) %>%
+mutate(tax = id)
+write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE)
+write.table(x = tax, file = tax_file, quote = FALSE, sep = "\t", row.names = FALSE)
 }
 # Function-taxonomy mode
-if (mode == "ft"){
+if (mode == "ft") {
-out <- read.delim(file=data, header=TRUE, sep='\t') %>%
+ft <- read.delim(file = data, header = TRUE, sep = "\t") %>%
-filter(grepl(".+g__.+",X..Gene.Family)) %>%
+filter(grepl(".+g__.+", X..Gene.Family)) %>%
-separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>%
+separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>%
-separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>%
+separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>%
-separate(col = name, into = c("name", "taxa"), sep="\\|", extra = "merge") %>%
+separate(col = name, into = c("name", "taxa"), sep = "\\|", extra = "merge") %>%
-separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% select(-"Extra") %>%
+separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>%
-mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>%
+select(-"Extra") %>%
-mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>%
+mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>%
-mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>%
+mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>%
-filter(namespace %in% ontology) %>%
+mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>%
+filter(namespace %in% ontology) %>%
 select(id, name, namespace, 4:ncol(.))
+write.table(x = ft, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE)
 }
-# Write file
-write.table(x = out, file = outfile, quote = FALSE, sep = "\t", row.names = FALSE)

Mercurial > repos > galaxyp > mt2mq

comparison MT2MQ.R @ 2:9c8e7137d331 draft