changeset 2:9c8e7137d331 draft

"planemo upload commit 59afcdaf7afdf574c475f0faae73127f0e563328"
author galaxyp
date Wed, 12 Aug 2020 17:36:53 -0400
parents e50ec3a9a3f9
children 08dda0f86758
files MT2MQ.R MT2MQ.xml
diffstat 2 files changed, 70 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/MT2MQ.R	Fri Jun 26 11:15:17 2020 -0400
+++ b/MT2MQ.R	Wed Aug 12 17:36:53 2020 -0400
@@ -2,10 +2,10 @@
 
 # Load libraries
 suppressPackageStartupMessages(library(tidyverse))
-#default_locale()
+suppressPackageStartupMessages(library(taxize))
 
 # Set parameters from arguments
-args = commandArgs(trailingOnly = TRUE)
+args <- commandArgs(trailingOnly = TRUE)
 data <- args[1]
   # data: full path to file or directory:
   #   - if in functional or f-t mode, should be a tsv file of HUMAnN2 gene families, after regrouping and renaming to GO, joining samples, and renormalizing to CPM.
@@ -18,49 +18,67 @@
 ontology <- unlist(strsplit(args[3], split = ","))
   # ontology: only for function or f-t mode. A string of the GO namespace(s) to include, separated by commas.
   #   ex: to include all: "molecular_function,biological_process,cellular_component"
-outfile <- args[4]
-  # outfile: full path with pathname and extension for output
+
+int_file <- args[4]
+  # int_file: full path and file name and extension to write intensity file
+
+func_file <- args[5]
+  # func_file: full path and file name and extension to write func file
+
+tax_file <- args[6]
+  # tax_file: full path and file name and extension to write tax file
+
 
 # Functional mode
-if (mode == "f"){
-  out <- read.delim(file=data, header=TRUE, sep='\t') %>% 
-    filter(!grepl(".+g__.+",X..Gene.Family)) %>% 
-    separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% 
-    separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% 
-    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% 
-    filter(namespace %in% ontology) %>% 
+if (mode == "f") {
+  int <- read.delim(file = data, header = TRUE, sep = "\t") %>%
+    filter(!grepl(".+g__.+", X..Gene.Family)) %>%
+    separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>%
+    separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>%
+    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>%
+    filter(namespace %in% ontology) %>%
     select(id, name, namespace, 4:ncol(.))
+  func <- int %>%
+    select(id) %>%
+    mutate(gos = id)
+  write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE)
+  write.table(x = func, file = func_file, quote = FALSE, sep = "\t", row.names = FALSE)
 }
 
 # Taxonomic mode
-if (mode == "t"){
+if (mode == "t") {
   files <- dir(path = data)
-  out <- tibble(filename = files) %>% 
-    mutate(file_contents= map(filename, ~read.delim(file=file.path(data, .), header=TRUE, sep = "\t"))) %>% 
-    unnest(cols = c(file_contents)) %>% 
-    rename(sample = filename) %>% 
-    separate(col = sample, into = c("sample",NA), sep=".tsv") %>% 
-    pivot_wider(names_from = sample, values_from = abundance) %>% 
-    mutate(rank = "genus") %>% 
-    rename(name = genus) %>% 
-    mutate(id = row_number(name)) %>% # filler for taxon id but should eventually find a way to get id from ncbi database
+  int <- tibble(filename = files) %>%
+    mutate(file_contents = map(filename, ~read.delim(file = file.path(data, .), header = TRUE, sep = "\t"))) %>%
+    unnest(cols = c(file_contents)) %>%
+    rename(sample = filename) %>%
+    separate(col = sample, into = c("sample", NA), sep = ".tsv") %>%
+    pivot_wider(names_from = sample, values_from = abundance) %>%
+    mutate(rank = "genus") %>%
+    rename(name = genus) %>%
+    mutate(name = as.character(name)) %>%
+    mutate(id = get_uid(name, key = NULL, messages = FALSE)) %>%
     select(id, name, rank, 2:ncol(.))
+  tax <- int %>%
+    select(id) %>%
+    mutate(tax = id)
+  write.table(x = int, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE)
+  write.table(x = tax, file = tax_file, quote = FALSE, sep = "\t", row.names = FALSE)
 }
 
 # Function-taxonomy mode
-if (mode == "ft"){
-  out <- read.delim(file=data, header=TRUE, sep='\t') %>% 
-    filter(grepl(".+g__.+",X..Gene.Family)) %>% 
-    separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% 
-    separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% 
-    separate(col = name, into = c("name", "taxa"), sep="\\|", extra = "merge") %>%
-    separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% select(-"Extra") %>%
-    mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% 
-    mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% 
-    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% 
-    filter(namespace %in% ontology) %>% 
+if (mode == "ft") {
+  ft <- read.delim(file = data, header = TRUE, sep = "\t") %>%
+    filter(grepl(".+g__.+", X..Gene.Family)) %>%
+    separate(col = X..Gene.Family, into = c("id", "Extra"), sep = ": ", fill = "left") %>%
+    separate(col = Extra, into = c("namespace", "name"), sep = " ", fill = "left", extra = "merge") %>%
+    separate(col = name, into = c("name", "taxa"), sep = "\\|", extra = "merge") %>%
+    separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>%
+    select(-"Extra") %>%
+    mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>%
+    mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>%
+    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>%
+    filter(namespace %in% ontology) %>%
     select(id, name, namespace, 4:ncol(.))
+  write.table(x = ft, file = int_file, quote = FALSE, sep = "\t", row.names = FALSE)
 }
-
-# Write file
-write.table(x = out, file = outfile, quote = FALSE, sep = "\t", row.names = FALSE)
--- a/MT2MQ.xml	Fri Jun 26 11:15:17 2020 -0400
+++ b/MT2MQ.xml	Wed Aug 12 17:36:53 2020 -0400
@@ -1,18 +1,19 @@
-<tool id="mt2mq" name="MT2MQ" version="1.0">
+<tool id="mt2mq" name="MT2MQ" version="1.1.0">
     <description>Tool to prepare metatranscriptomic outputs from ASaiM for Metaquantome</description>
     <requirements>
         <requirement type="package" version="1.2.1">r-tidyverse</requirement>
+        <requirement type="package" version="0.9.97">r-taxize</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
 
 #if $options.mode == "f" or $options.mode == "ft":
-    Rscript '$__tool_directory__/MT2MQ.R' '$options.input_files' '$options.mode' $options.ontology '$mq_output'
+    Rscript --vanilla '$__tool_directory__/MT2MQ.R' '$options.input_files' '$options.mode' '$options.ontology' '$int_output' '$func_output' '$tax_output'
 #elif $options.mode == "t":
     mkdir in_dir
     #for $input in $options.input_files:
         && cp '$input' 'in_dir/${input.name.rsplit('.',1)[0]}'
     #end for
-    && Rscript '$__tool_directory__/MT2MQ.R' in_dir t NA '$mq_output'
+    && Rscript --vanilla '$__tool_directory__/MT2MQ.R' in_dir t NA '$int_output' '$func_output' '$tax_output'
 #end if
 
     ]]>
@@ -49,7 +50,14 @@
     </inputs>
     
     <outputs>
-        <data name="mq_output" format="tabular" label="${options.mode}_output.tabular"/>
+        <data name="int_output" format="tabular" label="${options.mode}_int.tabular"/>
+        <data name="func_output" format="tabular" label="func.tabular">
+            <filter>options['mode'] == "f"</filter>
+        </data>
+        <data name="tax_output" format="tabular" label="tax.tabular">
+            <filter>options['mode'] == "t"</filter>
+        </data>
+
     </outputs>
     
     
@@ -60,7 +68,7 @@
                 <param name="input_files" value="T4A.tsv,T4B.tsv,T4C.tsv,T7A.tsv,T7B.tsv,T7C.tsv" ftype="tsv"/>
                 <param name="ontology" value="NA"/>
             </conditional>
-            <output name="mq_output">
+            <output name="int_output">
                 <assert_contents>
                     <has_text text="rank"/>
                     <has_text text="genus"/>
@@ -74,7 +82,7 @@
                 <param name="input_files" value="T4T7_func.tsv" ftype="tsv"/>
                 <param name="ontology" value="molecular_function"/>
             </conditional>
-            <output name="mq_output">
+            <output name="int_output">
                 <assert_contents>
                     <has_text text="namespace"/>
                     <has_text text="molecular_function"/>
@@ -88,7 +96,7 @@
                 <param name="input_files" value="T4T7_func.tsv" ftype="tsv"/>
                 <param name="ontology" value="biological_process"/>
             </conditional>
-            <output name="mq_output">
+            <output name="int_output">
                 <assert_contents>
                     <has_text text="namespace"/>
                     <has_text text="genus"/>
@@ -111,21 +119,16 @@
 
 - **Taxonomic**: takes in genus-level MetaPhlAn2 results for each sample. The input files should be named as the sample. 
 
-	- Output: a single tabular file formatted for use as input for Metaquantome's taxonomic mode.
+	- Output: a taxonomy file and an intensity file to use in Metaquantome's taxonomy mode. The "peptide" column name is "id" and the taxon column name is "tax".
 
 - **Functional**: takes in a single file of HUMAnN2 results, regrouped and renamed to GO terms, with all samples joined together into one table, and renormalized to CPM. See the MT2MQ functional workflow for these processing steps. User can choose which GO namespace(s) to include.
 
-	- Output: a single tabular file formatted for use as input for Metaquantome's functional mode.
+	- Output: a function file and an intensity file to use in Metaquantome's functional mode. The "peptide" column name is "id" and the functional column name is "gos". 
 
 - **Functional/taxonomic**: takes the same input as the functional mode. User can choose which GO namespace(s) to include.
 
 	- Output: a single tabular file including all GO terms and the taxa which express them and their abundances for each sample. This file *cannot* be used as input for Metaquantome.
 
-**Outputs**:
-------------
-
-MT2MQ produces a single tabular output, formatted to be used as input for Metaquantome or for other analysis.
-
     ]]></help>
     
     <citations>