# HG changeset patch # User galaxyp # Date 1597268213 14400 # Node ID 9c8e7137d331f892a0f8d7e6e20fa85137bbeca8 # Parent e50ec3a9a3f9aff53a4fe4a72d585ff4c56949ba "planemo upload commit 59afcdaf7afdf574c475f0faae73127f0e563328" diff -r e50ec3a9a3f9 -r 9c8e7137d331 MT2MQ.R --- a/MT2MQ.R Fri Jun 26 11:15:17 2020 -0400 +++ b/MT2MQ.R Wed Aug 12 17:36:53 2020 -0400 @@ -2,10 +2,10 @@ # Load libraries suppressPackageStartupMessages(library(tidyverse)) -#default_locale() +suppressPackageStartupMessages(library(taxize)) # Set parameters from arguments -args = commandArgs(trailingOnly = TRUE) +args <- commandArgs(trailingOnly = TRUE) data <- args[1] # data: full path to file or directory: # - if in functional or f-t mode, should be a tsv file of HUMAnN2 gene families, after regrouping and renaming to GO, joining samples, and renormalizing to CPM. @@ -18,49 +18,67 @@ ontology <- unlist(strsplit(args[3], split = ",")) # ontology: only for function or f-t mode. A string of the GO namespace(s) to include, separated by commas. # ex: to include all: "molecular_function,biological_process,cellular_component" -outfile <- args[4] - # outfile: full path with pathname and extension for output + +int_file <- args[4] + # int_file: full path and file name and extension to write intensity file + +func_file <- args[5] + # func_file: full path and file name and extension to write func file + +tax_file <- args[6] + # tax_file: full path and file name and extension to write tax file + # Functional mode -if (mode == "f"){ - out <- read.delim(file=data, header=TRUE, sep='\t') %>% - filter(!grepl(".+g__.+",X..Gene.Family)) %>% - separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% - separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% - mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% - filter(namespace %in% ontology) %>% +if (mode == "f") { + int <- read.delim(file = data, header = TRUE, sep = "\t") %>% + filter(!grepl(".+g__.+", X..Gene.Family)) %>% + separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>% + separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>% + mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% + filter(namespace %in% ontology) %>% select(id, name, namespace, 4:ncol(.)) + func <- int %>% + select(id) %>% + mutate(gos = id) + write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE) + write.table(x = func, file = func_file, quote = FALSE, sep = "\t", row.names = FALSE) } # Taxonomic mode -if (mode == "t"){ +if (mode == "t") { files <- dir(path = data) - out <- tibble(filename = files) %>% - mutate(file_contents= map(filename, ~read.delim(file=file.path(data, .), header=TRUE, sep = "\t"))) %>% - unnest(cols = c(file_contents)) %>% - rename(sample = filename) %>% - separate(col = sample, into = c("sample",NA), sep=".tsv") %>% - pivot_wider(names_from = sample, values_from = abundance) %>% - mutate(rank = "genus") %>% - rename(name = genus) %>% - mutate(id = row_number(name)) %>% # filler for taxon id but should eventually find a way to get id from ncbi database + int <- tibble(filename = files) %>% + mutate(file_contents = map(filename, ~read.delim(file = file.path(data, .), header = TRUE, sep = "\t"))) %>% + unnest(cols = c(file_contents)) %>% + rename(sample = filename) %>% + separate(col = sample, into = c("sample", NA), sep = ".tsv") %>% + pivot_wider(names_from = sample, values_from = abundance) %>% + mutate(rank = "genus") %>% + rename(name = genus) %>% + mutate(name = as.character(name)) %>% + mutate(id = get_uid(name, key = NULL, messages = FALSE)) %>% select(id, name, rank, 2:ncol(.)) + tax <- int %>% + select(id) %>% + mutate(tax = id) + write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE) + write.table(x = tax, file = tax_file, quote = FALSE, sep = "\t", row.names = FALSE) } # Function-taxonomy mode -if (mode == "ft"){ - out <- read.delim(file=data, header=TRUE, sep='\t') %>% - filter(grepl(".+g__.+",X..Gene.Family)) %>% - separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% - separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% - separate(col = name, into = c("name", "taxa"), sep="\\|", extra = "merge") %>% - separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% select(-"Extra") %>% - mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% - mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% - mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% - filter(namespace %in% ontology) %>% +if (mode == "ft") { + ft <- read.delim(file = data, header = TRUE, sep = "\t") %>% + filter(grepl(".+g__.+", X..Gene.Family)) %>% + separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>% + separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>% + separate(col = name, into = c("name", "taxa"), sep = "\\|", extra = "merge") %>% + separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% + select(-"Extra") %>% + mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% + mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% + mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% + filter(namespace %in% ontology) %>% select(id, name, namespace, 4:ncol(.)) + write.table(x = ft, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE) } - -# Write file -write.table(x = out, file = outfile, quote = FALSE, sep = "\t", row.names = FALSE) diff -r e50ec3a9a3f9 -r 9c8e7137d331 MT2MQ.xml --- a/MT2MQ.xml Fri Jun 26 11:15:17 2020 -0400 +++ b/MT2MQ.xml Wed Aug 12 17:36:53 2020 -0400 @@ -1,18 +1,19 @@ - + Tool to prepare metatranscriptomic outputs from ASaiM for Metaquantome r-tidyverse + r-taxize @@ -49,7 +50,14 @@ - + + + options['mode'] == "f" + + + options['mode'] == "t" + + @@ -60,7 +68,7 @@ - + @@ -74,7 +82,7 @@ - + @@ -88,7 +96,7 @@ - + @@ -111,21 +119,16 @@ - **Taxonomic**: takes in genus-level MetaPhlAn2 results for each sample. The input files should be named as the sample. - - Output: a single tabular file formatted for use as input for Metaquantome's taxonomic mode. + - Output: a taxonomy file and an intensity file to use in Metaquantome's taxonomy mode. The "peptide" column name is "id" and the taxon column name is "tax". - **Functional**: takes in a single file of HUMAnN2 results, regrouped and renamed to GO terms, with all samples joined together into one table, and renormalized to CPM. See the MT2MQ functional workflow for these processing steps. User can choose which GO namespace(s) to include. - - Output: a single tabular file formatted for use as input for Metaquantome's functional mode. + - Output: a function file and an intensity file to use in Metaquantome's functional mode. The "peptide" column name is "id" and the functional column name is "gos". - **Functional/taxonomic**: takes the same input as the functional mode. User can choose which GO namespace(s) to include. - Output: a single tabular file including all GO terms and the taxa which express them and their abundances for each sample. This file *cannot* be used as input for Metaquantome. -**Outputs**: ------------- - -MT2MQ produces a single tabular output, formatted to be used as input for Metaquantome or for other analysis. - ]]>