Mercurial > repos > galaxyp > mt2mq
comparison MT2MQ.R @ 2:9c8e7137d331 draft
"planemo upload commit 59afcdaf7afdf574c475f0faae73127f0e563328"
author | galaxyp |
---|---|
date | Wed, 12 Aug 2020 17:36:53 -0400 |
parents | e50ec3a9a3f9 |
children |
comparison
equal
deleted
inserted
replaced
1:e50ec3a9a3f9 | 2:9c8e7137d331 |
---|---|
1 # MT2MQ: prepares metatranscriptomic outputs from ASaiM (HUMAnN2 and metaphlan) for metaquantome | 1 # MT2MQ: prepares metatranscriptomic outputs from ASaiM (HUMAnN2 and metaphlan) for metaquantome |
2 | 2 |
3 # Load libraries | 3 # Load libraries |
4 suppressPackageStartupMessages(library(tidyverse)) | 4 suppressPackageStartupMessages(library(tidyverse)) |
5 #default_locale() | 5 suppressPackageStartupMessages(library(taxize)) |
6 | 6 |
7 # Set parameters from arguments | 7 # Set parameters from arguments |
8 args = commandArgs(trailingOnly = TRUE) | 8 args <- commandArgs(trailingOnly = TRUE) |
9 data <- args[1] | 9 data <- args[1] |
10 # data: full path to file or directory: | 10 # data: full path to file or directory: |
11 # - if in functional or f-t mode, should be a tsv file of HUMAnN2 gene families, after regrouping and renaming to GO, joining samples, and renormalizing to CPM. | 11 # - if in functional or f-t mode, should be a tsv file of HUMAnN2 gene families, after regrouping and renaming to GO, joining samples, and renormalizing to CPM. |
12 # - if in taxonomic mode, should be a directory of tsv files of metaphlan genus-level results | 12 # - if in taxonomic mode, should be a directory of tsv files of metaphlan genus-level results |
13 mode <- args[2] | 13 mode <- args[2] |
16 # -"t": taxonomy | 16 # -"t": taxonomy |
17 # -"ft": function-taxonomy | 17 # -"ft": function-taxonomy |
18 ontology <- unlist(strsplit(args[3], split = ",")) | 18 ontology <- unlist(strsplit(args[3], split = ",")) |
19 # ontology: only for function or f-t mode. A string of the GO namespace(s) to include, separated by commas. | 19 # ontology: only for function or f-t mode. A string of the GO namespace(s) to include, separated by commas. |
20 # ex: to include all: "molecular_function,biological_process,cellular_component" | 20 # ex: to include all: "molecular_function,biological_process,cellular_component" |
21 outfile <- args[4] | 21 |
22 # outfile: full path with pathname and extension for output | 22 int_file <- args[4] |
23 # int_file: full path and file name and extension to write intensity file | |
24 | |
25 func_file <- args[5] | |
26 # func_file: full path and file name and extension to write func file | |
27 | |
28 tax_file <- args[6] | |
29 # tax_file: full path and file name and extension to write tax file | |
30 | |
23 | 31 |
24 # Functional mode | 32 # Functional mode |
25 if (mode == "f"){ | 33 if (mode == "f") { |
26 out <- read.delim(file=data, header=TRUE, sep='\t') %>% | 34 int <- read.delim(file = data, header = TRUE, sep = "\t") %>% |
27 filter(!grepl(".+g__.+",X..Gene.Family)) %>% | 35 filter(!grepl(".+g__.+", X..Gene.Family)) %>% |
28 separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% | 36 separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>% |
29 separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% | 37 separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>% |
30 mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% | 38 mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% |
31 filter(namespace %in% ontology) %>% | 39 filter(namespace %in% ontology) %>% |
32 select(id, name, namespace, 4:ncol(.)) | 40 select(id, name, namespace, 4:ncol(.)) |
41 func <- int %>% | |
42 select(id) %>% | |
43 mutate(gos = id) | |
44 write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE) | |
45 write.table(x = func, file = func_file, quote = FALSE, sep = "\t", row.names = FALSE) | |
33 } | 46 } |
34 | 47 |
35 # Taxonomic mode | 48 # Taxonomic mode |
36 if (mode == "t"){ | 49 if (mode == "t") { |
37 files <- dir(path = data) | 50 files <- dir(path = data) |
38 out <- tibble(filename = files) %>% | 51 int <- tibble(filename = files) %>% |
39 mutate(file_contents= map(filename, ~read.delim(file=file.path(data, .), header=TRUE, sep = "\t"))) %>% | 52 mutate(file_contents = map(filename, ~read.delim(file = file.path(data, .), header = TRUE, sep = "\t"))) %>% |
40 unnest(cols = c(file_contents)) %>% | 53 unnest(cols = c(file_contents)) %>% |
41 rename(sample = filename) %>% | 54 rename(sample = filename) %>% |
42 separate(col = sample, into = c("sample",NA), sep=".tsv") %>% | 55 separate(col = sample, into = c("sample", NA), sep = ".tsv") %>% |
43 pivot_wider(names_from = sample, values_from = abundance) %>% | 56 pivot_wider(names_from = sample, values_from = abundance) %>% |
44 mutate(rank = "genus") %>% | 57 mutate(rank = "genus") %>% |
45 rename(name = genus) %>% | 58 rename(name = genus) %>% |
46 mutate(id = row_number(name)) %>% # filler for taxon id but should eventually find a way to get id from ncbi database | 59 mutate(name = as.character(name)) %>% |
60 mutate(id = get_uid(name, key = NULL, messages = FALSE)) %>% | |
47 select(id, name, rank, 2:ncol(.)) | 61 select(id, name, rank, 2:ncol(.)) |
62 tax <- int %>% | |
63 select(id) %>% | |
64 mutate(tax = id) | |
65 write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE) | |
66 write.table(x = tax, file = tax_file, quote = FALSE, sep = "\t", row.names = FALSE) | |
48 } | 67 } |
49 | 68 |
50 # Function-taxonomy mode | 69 # Function-taxonomy mode |
51 if (mode == "ft"){ | 70 if (mode == "ft") { |
52 out <- read.delim(file=data, header=TRUE, sep='\t') %>% | 71 ft <- read.delim(file = data, header = TRUE, sep = "\t") %>% |
53 filter(grepl(".+g__.+",X..Gene.Family)) %>% | 72 filter(grepl(".+g__.+", X..Gene.Family)) %>% |
54 separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% | 73 separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>% |
55 separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% | 74 separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>% |
56 separate(col = name, into = c("name", "taxa"), sep="\\|", extra = "merge") %>% | 75 separate(col = name, into = c("name", "taxa"), sep = "\\|", extra = "merge") %>% |
57 separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% select(-"Extra") %>% | 76 separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% |
58 mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% | 77 select(-"Extra") %>% |
59 mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% | 78 mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% |
60 mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% | 79 mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% |
61 filter(namespace %in% ontology) %>% | 80 mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% |
81 filter(namespace %in% ontology) %>% | |
62 select(id, name, namespace, 4:ncol(.)) | 82 select(id, name, namespace, 4:ncol(.)) |
83 write.table(x = ft, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE) | |
63 } | 84 } |
64 | |
65 # Write file | |
66 write.table(x = out, file = outfile, quote = FALSE, sep = "\t", row.names = FALSE) |