Repository 'mt2mq'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/mt2mq

Changeset 0:6bee94458567 (2020-06-23)
Next changeset 1:e50ec3a9a3f9 (2020-06-26)
Commit message:
"planemo upload commit 53bcf55b73cb251446150026242b4d47d49d3469"
added:
MT2MQ.R
MT2MQ.xml
test-data/T4A.tsv
test-data/T4B.tsv
test-data/T4C.tsv
test-data/T4T7_func.tsv
test-data/T7A.tsv
test-data/T7B.tsv
test-data/T7C.tsv
test-data/f_output.tabular
test-data/ft_output.tabular
test-data/t_output.tabular
b
diff -r 000000000000 -r 6bee94458567 MT2MQ.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MT2MQ.R Tue Jun 23 07:46:07 2020 -0400
[
@@ -0,0 +1,66 @@
+# MT2MQ: prepares metatranscriptomic outputs from ASaiM (HUMAnN2 and metaphlan) for metaquantome
+
+# Load libraries
+suppressPackageStartupMessages(library(tidyverse))
+#default_locale()
+
+# Set parameters from arguments
+args = commandArgs(trailingOnly = TRUE)
+data <- args[1]
+  # data: full path to file or directory:
+  #   - if in functional or f-t mode, should be a tsv file of HUMAnN2 gene families, after regrouping and renaming to GO, joining samples, and renormalizing to CPM.
+  #   - if in taxonomic mode, should be a directory of tsv files of metaphlan genus-level results
+mode <- args[2]
+  # mode:
+  #   -"f": function
+  #   -"t": taxonomy
+  #   -"ft": function-taxonomy
+ontology <- unlist(strsplit(args[3], split = ","))
+  # ontology: only for function or f-t mode. A string of the GO namespace(s) to include, separated by commas.
+  #   ex: to include all: "molecular_function,biological_process,cellular_component"
+outfile <- args[4]
+  # outfile: full path with pathname and extension for output
+
+# Functional mode
+if (mode == "f"){
+  out <- read.delim(file=data, header=TRUE, sep='\t') %>% 
+    filter(!grepl(".+g__.+",X..Gene.Family)) %>% 
+    separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% 
+    separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% 
+    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% 
+    filter(namespace %in% ontology) %>% 
+    select(id, name, namespace, 4:ncol(.))
+}
+
+# Taxonomic mode
+if (mode == "t"){
+  files <- dir(path = data)
+  out <- tibble(filename = files) %>% 
+    mutate(file_contents= map(filename, ~read.delim(file=file.path(data, .), header=TRUE, sep = "\t"))) %>% 
+    unnest(cols = c(file_contents)) %>% 
+    rename(sample = filename) %>% 
+    separate(col = sample, into = c("sample",NA), sep=".tsv") %>% 
+    pivot_wider(names_from = sample, values_from = abundance) %>% 
+    mutate(rank = "genus") %>% 
+    rename(name = genus) %>% 
+    mutate(id = row_number(name)) %>% # filler for taxon id but should eventually find a way to get id from ncbi database
+    select(id, name, rank, 2:ncol(.))
+}
+
+# Function-taxonomy mode
+if (mode == "ft"){
+  out <- read.delim(file=data, header=TRUE, sep='\t') %>% 
+    filter(grepl(".+g__.+",X..Gene.Family)) %>% 
+    separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% 
+    separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% 
+    separate(col = name, into = c("name", "taxa"), sep="\\|", extra = "merge") %>%
+    separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% select(-"Extra") %>%
+    mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% 
+    mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% 
+    mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% 
+    filter(namespace %in% ontology) %>% 
+    select(id, name, namespace, 4:ncol(.))
+}
+
+# Write file
+write.table(x = out, file = outfile, quote = FALSE, sep = "\t");
b
diff -r 000000000000 -r 6bee94458567 MT2MQ.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MT2MQ.xml Tue Jun 23 07:46:07 2020 -0400
[
@@ -0,0 +1,140 @@
+<tool id="mt2mq" name="MT2MQ" version="1.0">
+    <description>Tool to prepare metatranscriptomic outputs from ASaiM for Metaquantome</description>
+    <requirements>
+        <requirement type="package" version="1.2.1">r-tidyverse</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+#if $options.mode == "f" or $options.mode == "ft":
+    Rscript '$__tool_directory__/MT2MQ.R' '$options.input_files' '$options.mode' $options.ontology '$mq_output'
+#elif $options.mode == "t":
+    mkdir in_dir
+    #for $input in $options.input_files:
+        && cp '$input' 'in_dir/${input.name.rsplit('.',1)[0]}'
+    #end for
+    && Rscript '$__tool_directory__/MT2MQ.R' in_dir t NA '$mq_output'
+#end if
+
+    ]]>
+    </command>
+    
+    <inputs>
+        <conditional name="options">
+            <param type="select" display="radio" name="mode" label="Mode">
+                <option value="f">Functional</option>
+                <option value="t" selected="true">Taxonomic</option>
+                <option value="ft">Functional-Taxonomic</option>
+            </param>
+            <when value="t">
+                <param name="ontology" type="hidden" value="NA" />
+                <param type="data" name="input_files" format="tsv,tabular,txt" label="Files from ASaiM for all samples (named after sample)" multiple="true" />
+            </when>
+            <when value="f">
+                <param type="select" name="ontology" label="GO namespace" multiple="true" optional="false">
+                    <option value="molecular_function">molecular function</option>
+                    <option value="biological_process">biological proces</option>
+                    <option value="cellular_component">cellular component</option>
+                </param>
+                <param type="data" name="input_files" format="tsv,tabular,txt" label="File from HUMAnN2 after regrouping, renaming, joining, and renormalizing" />
+            </when>
+            <when value="ft">
+                <param type="select" name="ontology" label="GO namespace" multiple="true" optional="false">
+                    <option value="molecular_function">molecular function</option>
+                    <option value="biological_process">biological proces</option>
+                    <option value="cellular_component">cellular component</option>
+                </param>
+                <param type="data" name="input_files" format="tsv,tabular,txt" label="File from HUMAnN2 after regrouping, renaming, joining, and renormalizing" />
+            </when>
+        </conditional>
+    </inputs>
+    
+    <outputs>
+        <data name="mq_output" format="tabular" label="${options.mode}_output.tabular"/>
+    </outputs>
+    
+    
+    <tests>
+        <test>
+            <conditional name="options">
+ <param name="mode" value="t"/>
+                <param name="input_files" value="T4A.tsv,T4B.tsv,T4C.tsv,T7A.tsv,T7B.tsv,T7C.tsv" ftype="tsv"/>
+                <param name="ontology" value="NA"/>
+            </conditional>
+            <output name="mq_output">
+                <assert_contents>
+                    <has_text text="rank"/>
+                    <has_text text="genus"/>
+                    <has_text text="Clostridium"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="options">
+ <param name="mode" value="f"/>
+                <param name="input_files" value="T4T7_func.tsv" ftype="tsv"/>
+                <param name="ontology" value="molecular_function"/>
+            </conditional>
+            <output name="mq_output">
+                <assert_contents>
+                    <has_text text="namespace"/>
+                    <has_text text="molecular_function"/>
+                    <has_text text="0000014"/> 
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="options">
+ <param name="mode" value="ft"/>
+                <param name="input_files" value="T4T7_func.tsv" ftype="tsv"/>
+                <param name="ontology" value="biological_process"/>
+            </conditional>
+            <output name="mq_output">
+                <assert_contents>
+                    <has_text text="namespace"/>
+                    <has_text text="genus"/>
+                    <has_text text="biological_process"/>
+                    <has_text text="Clostridium"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    
+    
+    
+    
+    <help><![CDATA[
+
+**MT2MQ**: Metatranscriptomics to Metaquantome
+----------------------------------------------------
+
+MT2MQ is a tool to prepare metatrascriptomic results from ASaiM for further analysis with MetaQuantome, which currently only supports metaproteomic data. This tool has three modes:
+
+- **Taxonomic**: takes in genus-level MetaPhlAn2 results for each sample. The input files should be named as the sample. 
+
+ - Output: a single tabular file formatted for use as input for Metaquantome's taxonomic mode.
+
+- **Functional**: takes in a single file of HUMAnN2 results, regrouped and renamed to GO terms, with all samples joined together into one table, and renormalized to CPM. See the MT2MQ functional workflow for these processing steps. User can choose which GO namespace(s) to include.
+
+ - Output: a single tabular file formatted for use as input for Metaquantome's functional mode.
+
+- **Functional/taxonomic**: takes the same input as the functional mode. User can choose which GO namespace(s) to include.
+
+ - Output: a single tabular file including all GO terms and the taxa which express them and their abundances for each sample. This file *cannot* be used as input for Metaquantome.
+
+**Outputs**:
+------------
+
+MT2MQ produces a single tabular output, formatted to be used as input for Metaquantome or for other analysis.
+
+    ]]></help>
+    
+    <citations>
+        <citation type="bibtex">
+            @misc{MT2MQ, 
+            author={Crane, Marie},
+            year={2020},
+            title={Metatranscriptomics to MetaQuantome}
+            }
+        </citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 6bee94458567 test-data/T4A.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T4A.tsv Tue Jun 23 07:46:07 2020 -0400
b
@@ -0,0 +1,5 @@
+genus abundance
+Clostridium 68.36603
+Coprothermobacter 31.23635
+Methanothermobacter 0.3807
+Escherichia 0.01692
b
diff -r 000000000000 -r 6bee94458567 test-data/T4B.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T4B.tsv Tue Jun 23 07:46:07 2020 -0400
b
@@ -0,0 +1,4 @@
+genus abundance
+Clostridium 60.78776
+Coprothermobacter 38.9515
+Methanothermobacter 0.26075
b
diff -r 000000000000 -r 6bee94458567 test-data/T4C.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T4C.tsv Tue Jun 23 07:46:07 2020 -0400
b
@@ -0,0 +1,4 @@
+genus abundance
+Clostridium 68.49482
+Coprothermobacter 31.0739
+Methanothermobacter 0.43128
b
diff -r 000000000000 -r 6bee94458567 test-data/T4T7_func.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T4T7_func.tsv Tue Jun 23 07:46:07 2020 -0400
[
b'@@ -0,0 +1,7475 @@\n+# Gene Family\tdataset_21368137\tdataset_21368138\tdataset_21368139\tdataset_21368140\tdataset_21368141\tdataset_21368142\n+GO:0000014: [MF] single-stranded DNA endodeoxyribonuclease activity\t0.158851\t0.457588\t0.367933\t0.0406378\t0.212081\t0.0528417\n+GO:0000014: [MF] single-stranded DNA endodeoxyribonuclease activity|g__Methanothermobacter.s__Methanothermobacter_thermautotrophicus\t0.158851\t0.457588\t0.367933\t0.0406378\t0.212081\t0.0528417\n+GO:0000015: [CC] phosphopyruvate hydratase complex\t49.83\t66.6584\t66.4705\t147.031\t140.778\t92.7014\n+GO:0000015: [CC] phosphopyruvate hydratase complex|g__Clostridium.s__Clostridium_thermocellum\t3.85773\t22.1513\t21.5629\t82.9536\t74.2882\t59.5495\n+GO:0000015: [CC] phosphopyruvate hydratase complex|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t45.8416\t44.4224\t44.6618\t63.8743\t66.3513\t33.0344\n+GO:0000015: [CC] phosphopyruvate hydratase complex|g__Escherichia.s__Escherichia_coli\t0.0423134\t0\t0\t0\t0\t0\n+GO:0000015: [CC] phosphopyruvate hydratase complex|g__Methanothermobacter.s__Methanothermobacter_thermautotrophicus\t0.0882966\t0.0847593\t0.245785\t0.20289\t0.137987\t0.117487\n+GO:0000023: [BP] maltose metabolic process\t14.0012\t10.3973\t8.39276\t14.0098\t14.1239\t9.89496\n+GO:0000023: [BP] maltose metabolic process|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t14.0012\t10.3973\t8.39276\t14.0098\t14.1239\t9.89496\n+GO:0000025: [BP] maltose catabolic process\t0.0251304\t0\t0\t0\t0\t0\n+GO:0000025: [BP] maltose catabolic process|g__Escherichia.s__Escherichia_coli\t0.0251304\t0\t0\t0\t0\t0\n+GO:0000027: [BP] ribosomal large subunit assembly\t49.0504\t105.108\t91.5859\t184.412\t170.33\t145.589\n+GO:0000027: [BP] ribosomal large subunit assembly|g__Clostridium.s__Clostridium_thermocellum\t4.99521\t34.9239\t31.4675\t127.056\t96.2502\t105.199\n+GO:0000027: [BP] ribosomal large subunit assembly|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t44.0266\t70.1837\t60.1184\t57.3553\t74.0795\t40.3249\n+GO:0000027: [BP] ribosomal large subunit assembly|g__Escherichia.s__Escherichia_coli\t0.0285816\t0\t0\t0\t0\t0.0653892\n+GO:0000030: [MF] mannosyltransferase activity\t2.53579\t4.38419\t2.75771\t22.4856\t20.2827\t17.4859\n+GO:0000030: [MF] mannosyltransferase activity|g__Clostridium.s__Clostridium_thermocellum\t2.50346\t4.38419\t2.69903\t22.4856\t20.2827\t17.4859\n+GO:0000030: [MF] mannosyltransferase activity|g__Escherichia.s__Escherichia_coli\t0.0323487\t0\t0.0586834\t0\t0\t0\n+GO:0000034: [MF] adenine deaminase activity\t0.163663\t0.127232\t0.273796\t0.0508594\t0.103349\t0.308642\n+GO:0000034: [MF] adenine deaminase activity|g__Escherichia.s__Escherichia_coli\t0.0149956\t0\t0.0278306\t0\t0\t0\n+GO:0000034: [MF] adenine deaminase activity|g__Methanothermobacter.s__Methanothermobacter_thermautotrophicus\t0.148644\t0.127232\t0.24592\t0.0508594\t0.103349\t0.308642\n+GO:0000036: [MF] ACP phosphopantetheine attachment site binding involved in fatty acid biosynthetic process\t43.4545\t136.691\t71.8587\t537.892\t408.492\t557.491\n+GO:0000036: [MF] ACP phosphopantetheine attachment site binding involved in fatty acid biosynthetic process|g__Clostridium.s__Clostridium_thermocellum\t26.7934\t119.275\t48.5853\t525.102\t397.776\t543.996\n+GO:0000036: [MF] ACP phosphopantetheine attachment site binding involved in fatty acid biosynthetic process|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t16.661\t17.4161\t23.2322\t12.7903\t10.7163\t13.3766\n+GO:0000036: [MF] ACP phosphopantetheine attachment site binding involved in fatty acid biosynthetic process|g__Escherichia.s__Escherichia_coli\t0\t0\t0.0413174\t0\t0\t0.118199\n+GO:0000041: [BP] transition metal ion transport\t2.90142\t29.9683\t29.9432\t38.8478\t38.0279\t31.5089\n+GO:0000041: [BP] transition metal ion transport|g__Clostridium.s__Clostridium_thermocellum\t2.80498\t29.9683\t29.3116\t37.9571\t37.3748\t29.8371\n+GO:0000041: [BP] transition metal ion transport|g__Methanothermobacter.s__Methanothermobacter_thermautotrophicus\t0.0964385\t0\t0.631534\t0.890674\t0.653019\t1.67182\n+GO:0000049: [MF] tRNA binding\t637.77\t984.375\t722.889\t2383.93\t2185.47\t2207.09\n+GO:0000049: [MF] tRNA bindin'..b'dehydrogenase (fumarate) activity\t0.864033\t0.484005\t0.13365\t0.257804\t0.353627\t0.431174\n+GO:1990663: [MF] dihydroorotate dehydrogenase (fumarate) activity|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t0.864033\t0.484005\t0.13365\t0.257804\t0.353627\t0.431174\n+GO:2000142: [BP] regulation of DNA-templated transcription, initiation\t0\t0\t0.0968433\t0\t0\t0\n+GO:2000142: [BP] regulation of DNA-templated transcription, initiation|g__Escherichia.s__Escherichia_coli\t0\t0\t0.0968433\t0\t0\t0\n+GO:2000143: [BP] negative regulation of DNA-templated transcription, initiation\t1.42047\t0\t0.406363\t0\t0\t0\n+GO:2000143: [BP] negative regulation of DNA-templated transcription, initiation|g__Escherichia.s__Escherichia_coli\t1.42047\t0\t0.406363\t0\t0\t0\n+GO:2000144: [BP] positive regulation of DNA-templated transcription, initiation\t0.090727\t0\t0\t0\t0\t0\n+GO:2000144: [BP] positive regulation of DNA-templated transcription, initiation|g__Escherichia.s__Escherichia_coli\t0.090727\t0\t0\t0\t0\t0\n+GO:2000145: [BP] regulation of cell motility\t0.16534\t0\t0\t0\t0\t0\n+GO:2000145: [BP] regulation of cell motility|g__Escherichia.s__Escherichia_coli\t0.16534\t0\t0\t0\t0\t0\n+GO:2000147: [BP] positive regulation of cell motility\t1.05166\t0\t0\t0\t0\t0\n+GO:2000147: [BP] positive regulation of cell motility|g__Escherichia.s__Escherichia_coli\t1.05166\t0\t0\t0\t0\t0\n+GO:2000186: [BP] negative regulation of phosphate transmembrane transport\t0.0476602\t0.184127\t1.33312\t0.689474\t0.256333\t0.748484\n+GO:2000186: [BP] negative regulation of phosphate transmembrane transport|g__Escherichia.s__Escherichia_coli\t0\t0\t0\t0\t0\t0.112378\n+GO:2000186: [BP] negative regulation of phosphate transmembrane transport|g__Methanothermobacter.s__Methanothermobacter_thermautotrophicus\t0.0476602\t0.184127\t1.33312\t0.689474\t0.256333\t0.636106\n+GO:2000678: [BP] negative regulation of transcription regulatory region DNA binding\t0.0430911\t0\t0\t0\t0\t0\n+GO:2000678: [BP] negative regulation of transcription regulatory region DNA binding|g__Escherichia.s__Escherichia_coli\t0.0430911\t0\t0\t0\t0\t0\n+GO:2000884: [BP] glucomannan catabolic process\t7.87794\t11.7939\t11.5945\t20.9763\t16.3192\t17.9495\n+GO:2000884: [BP] glucomannan catabolic process|g__Clostridium.s__Clostridium_thermocellum\t7.87794\t11.7939\t11.5945\t20.9763\t16.3192\t17.9495\n+GO:2001059: [BP] D-tagatose 6-phosphate catabolic process\t135.133\t134.531\t133.449\t207.117\t206.169\t216.128\n+GO:2001059: [BP] D-tagatose 6-phosphate catabolic process|g__Clostridium.s__Clostridium_thermocellum\t9.86686\t28.7165\t21.6438\t133.033\t107.045\t143.081\n+GO:2001059: [BP] D-tagatose 6-phosphate catabolic process|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t125.074\t105.814\t111.725\t74.0842\t99.1241\t73.0471\n+GO:2001059: [BP] D-tagatose 6-phosphate catabolic process|g__Escherichia.s__Escherichia_coli\t0.191467\t0\t0.0810561\t0\t0\t0\n+GO:2001070: [MF] starch binding\t11.5\t46.4104\t40.8239\t383.819\t405.859\t540.485\n+GO:2001070: [MF] starch binding|g__Clostridium.s__Clostridium_thermocellum\t11.5\t46.4104\t40.8239\t383.819\t405.859\t540.485\n+GO:2001118: [BP] tetrahydromethanopterin biosynthetic process\t0.265619\t0.71938\t1.42789\t0.202194\t0.11852\t0.439097\n+GO:2001118: [BP] tetrahydromethanopterin biosynthetic process|g__Methanothermobacter.s__Methanothermobacter_thermautotrophicus\t0.265619\t0.71938\t1.42789\t0.202194\t0.11852\t0.439097\n+GO:2001295: [BP] malonyl-CoA biosynthetic process\t0.0405149\t0\t0.150294\t0\t0\t0.0539089\n+GO:2001295: [BP] malonyl-CoA biosynthetic process|g__Escherichia.s__Escherichia_coli\t0.0405149\t0\t0.150294\t0\t0\t0.0539089\n+UNGROUPED\t21895.8\t45848.3\t45265\t92374.4\t80593.4\t104793\n+UNGROUPED|g__Clostridium.s__Clostridium_thermocellum\t7879.65\t38500.4\t34463.3\t86554\t73736.3\t97953\n+UNGROUPED|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t8867.57\t7005.55\t8228.2\t5587.68\t6707.31\t5857.03\n+UNGROUPED|g__Escherichia.s__Escherichia_coli\t4965.26\t0\t2303.56\t0\t0\t633.422\n+UNGROUPED|g__Methanothermobacter.s__Methanothermobacter_thermautotrophicus\t183.337\t342.351\t269.96\t232.667\t149.877\t349.377\n+UNMAPPED\t136621\t308057\t290676\t242912\t242851\t243733\n'
b
diff -r 000000000000 -r 6bee94458567 test-data/T7A.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T7A.tsv Tue Jun 23 07:46:07 2020 -0400
b
@@ -0,0 +1,5 @@
+genus abundance
+Coprothermobacter 64.59208
+Clostridium 34.51004
+Methanothermobacter 0.83333
+Escherichia 0.06455
b
diff -r 000000000000 -r 6bee94458567 test-data/T7B.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T7B.tsv Tue Jun 23 07:46:07 2020 -0400
b
@@ -0,0 +1,5 @@
+genus abundance
+Coprothermobacter 60.9348
+Clostridium 38.22246
+Methanothermobacter 0.77289
+Escherichia 0.06985
b
diff -r 000000000000 -r 6bee94458567 test-data/T7C.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T7C.tsv Tue Jun 23 07:46:07 2020 -0400
b
@@ -0,0 +1,5 @@
+genus abundance
+Coprothermobacter 90.44415
+Clostridium 8.90992
+Methanothermobacter 0.57389
+Escherichia 0.07204
b
diff -r 000000000000 -r 6bee94458567 test-data/f_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/f_output.tabular Tue Jun 23 07:46:07 2020 -0400
b
b"@@ -0,0 +1,2457 @@\n+id\tname\tnamespace\tdataset_21368137\tdataset_21368138\tdataset_21368139\tdataset_21368140\tdataset_21368141\tdataset_21368142\n+GO:0000014\tsingle-stranded DNA endodeoxyribonuclease activity\tmolecular_function\t0.158851\t0.457588\t0.367933\t0.0406378\t0.212081\t0.0528417\n+GO:0000023\tmaltose metabolic process\tbiological_process\t14.0012\t10.3973\t8.3927600000000009\t14.0098\t14.1239\t9.89496\n+GO:0000025\tmaltose catabolic process\tbiological_process\t0.0251304\t0\t0\t0\t0\t0\n+GO:0000027\tribosomal large subunit assembly\tbiological_process\t49.0504\t105.108\t91.5859\t184.412\t170.33\t145.589\n+GO:0000030\tmannosyltransferase activity\tmolecular_function\t2.53579\t4.38419\t2.75771\t22.4856\t20.2827\t17.4859\n+GO:0000034\tadenine deaminase activity\tmolecular_function\t0.163663\t0.127232\t0.273796\t0.0508594\t0.103349\t0.308642\n+GO:0000036\tACP phosphopantetheine attachment site binding involved in fatty acid biosynthetic process\tmolecular_function\t43.4545\t136.691\t71.8587\t537.892\t408.492\t557.491\n+GO:0000041\ttransition metal ion transport\tbiological_process\t2.90142\t29.9683\t29.9432\t38.8478\t38.0279\t31.5089\n+GO:0000049\ttRNA binding\tmolecular_function\t637.77\t984.375\t722.889\t2383.93\t2185.47\t2207.09\n+GO:0000050\turea cycle\tbiological_process\t0.101785\t0\t0.0755531\t0\t0\t0\n+GO:0000053\targininosuccinate metabolic process\tbiological_process\t0.101785\t0\t0.0755531\t0\t0\t0\n+GO:0000062\tfatty-acyl-CoA binding\tmolecular_function\t0.065864\t0\t0.0611642\t0\t0\t0\n+GO:0000103\tsulfate assimilation\tbiological_process\t60.966\t642.736\t558.276\t836.882\t834.009\t1015.57\n+GO:0000104\tsuccinate dehydrogenase activity\tmolecular_function\t0.227194\t0.0702438\t0.13577\t0.299411\t0.260999\t0.0973401\n+GO:0000105\thistidine biosynthetic process\tbiological_process\t70.3367\t124.495\t86.3493\t328.999\t320.599\t334.269\n+GO:0000107\timidazoleglycerol-phosphate synthase activity\tmolecular_function\t9.78513\t22.2865\t12.5845\t59.3043\t65.3597\t51.9861\n+GO:0000150\trecombinase activity\tmolecular_function\t2.14012\t110.849\t78.5557\t19.3377\t20.0455\t17.1905\n+GO:0000155\tphosphorelay sensor kinase activity\tmolecular_function\t71.7961\t132.911\t124.388\t305.795\t298.096\t267.332\n+GO:0000156\tphosphorelay response regulator activity\tmolecular_function\t3.93995\t9.1295\t9.92015\t86.9172\t82.5069\t83.8827\n+GO:0000160\tphosphorelay signal transduction system\tbiological_process\t175.21\t590.524\t562.222\t1563.84\t1551.44\t1764.96\n+GO:0000162\ttryptophan biosynthetic process\tbiological_process\t27.7221\t153.213\t351.528\t478.509\t266.019\t315.287\n+GO:0000166\tnucleotide binding\tmolecular_function\t401.183\t277.657\t300.909\t533.263\t507.807\t472.773\n+GO:0000175\t3'-5'-exoribonuclease activity\tmolecular_function\t35.4858\t71.3135\t57.6284\t96.6615\t103.215\t97.4708\n+GO:0000179\trRNA (adenine-N6,N6-)-dimethyltransferase activity\tmolecular_function\t11.6088\t12.831\t10.0897\t19.3102\t19.6073\t19.7291\n+GO:0000213\ttRNA-intron endonuclease activity\tmolecular_function\t0.265668\t0.514297\t0\t0.411078\t0.0597915\t0\n+GO:0000256\tallantoin catabolic process\tbiological_process\t0.0191516\t0\t0\t0\t0\t0.0947853\n+GO:0000270\tpeptidoglycan metabolic process\tbiological_process\t3.74251\t22.5746\t12.9726\t63.0136\t57.2558\t69.5533\n+GO:0000271\tpolysaccharide biosynthetic process\tbiological_process\t12.6439\t82.0637\t66.8783\t187.799\t171.763\t234.555\n+GO:0000272\tpolysaccharide catabolic process\tbiological_process\t274.016\t450.016\t409.901\t1307.84\t1243.32\t1384.78\n+GO:0000286\talanine dehydrogenase activity\tmolecular_function\t0.111701\t0.321908\t0.103745\t0\t0.124727\t0.0743471\n+GO:0000287\tmagnesium ion binding\tmolecular_function\t795.245\t1511.25\t1421.82\t2903.56\t2775.35\t2674.32\n+GO:0000302\tresponse to reactive oxygen species\tbiological_process\t16.8257\t159.381\t159.194\t520.725\t417.94\t348.992\n+GO:0000309\tnicotinamide-nucleotide adenylyltransferase activity\tmolecular_function\t0\t0.235748\t0.227832\t0\t0.109621\t0.235621\n+GO:0000398\tmRNA splicing, via spliceosome\tbiological_process\t0\t0\t0.939701\t0.259072\t1.13035\t1.35099\n+GO:0000413\tprotein peptidyl-prolyl isomerization\tbiological_process\t0.103098\t0\t0\t0\t0\t0\n+GO:0000453\tenzyme-directed rRNA 2'-O-methylation\tbiological"..b'\tregulation of single-species biofilm formation\tbiological_process\t0.0483165\t0\t0\t0\t0\t0\n+GO:1900191\tnegative regulation of single-species biofilm formation\tbiological_process\t1.52821\t0\t0\t0\t0\t0\n+GO:1900751\t4-(trimethylammonio)butanoate transport\tbiological_process\t0.0349249\t0\t0\t0\t0\t0\n+GO:1900753\tdoxorubicin transport\tbiological_process\t0\t0.391685\t0.32851\t0.1772\t0\t0.225143\n+GO:1901137\tcarbohydrate derivative biosynthetic process\tbiological_process\t19.441\t17.737\t11.7986\t51.7246\t42.9469\t34.2831\n+GO:1901264\tcarbohydrate derivative transport\tbiological_process\t0.0959524\t0\t0\t0\t0\t0.0780984\n+GO:1901285\t5,6,7,8-tetrahydromethanopterin biosynthetic process\tbiological_process\t0.170687\t0.166391\t0.479796\t0.177249\t0.232048\t0.115288\n+GO:1901530\tresponse to hypochlorite\tbiological_process\t0.174382\t0\t0\t0\t0\t0\n+GO:1901652\tresponse to peptide\tbiological_process\t0.0514031\t0\t0\t0\t0\t0\n+GO:1901682\tsulfur compound transmembrane transporter activity\tmolecular_function\t0.300422\t0\t0\t0\t0\t0\n+GO:1902021\tregulation of bacterial-type flagellum-dependent cell motility\tbiological_process\t0.0604685\t0\t0\t0\t0\t0\n+GO:1902201\tnegative regulation of bacterial-type flagellum-dependent cell motility\tbiological_process\t0.519645\t0\t0\t0\t0\t0.122855\n+GO:1902209\tnegative regulation of bacterial-type flagellum assembly\tbiological_process\t0.0632148\t0\t0\t0\t0\t0\n+GO:1902475\tL-alpha-amino acid transmembrane transport\tbiological_process\t0.707175\t0\t0.523098\t0\t0\t0.179351\n+GO:1902599\tsulfathiazole transmembrane transport\tbiological_process\t0.0266372\t0\t0.0494366\t0\t0\t0\n+GO:1902760\tMo(VI)-molybdopterin cytosine dinucleotide biosynthetic process\tbiological_process\t0.169958\t0\t0.210286\t0\t0\t0\n+GO:1902765\tL-arginine import into cell\tbiological_process\t0.0835087\t0\t0.154985\t0\t0\t0\n+GO:1902777\t6-sulfoquinovose(1-) catabolic process\tbiological_process\t0.162084\t0\t0.0826348\t0\t0\t0\n+GO:1903401\tL-lysine transmembrane transport\tbiological_process\t0.129711\t0\t0\t0\t0\t0\n+GO:1903506\tregulation of nucleic acid-templated transcription\tbiological_process\t0.0604685\t0\t0\t0\t0\t0\n+GO:1903658\tpositive regulation of type IV pilus biogenesis\tbiological_process\t0.201602\t0\t0.187552\t0\t0\t0\n+GO:1903716\tguanine transmembrane transport\tbiological_process\t0.0803249\t0\t0\t0\t0\t0.0530681\n+GO:1903785\tL-valine transmembrane transport\tbiological_process\t0.26941\t0\t0\t0\t0\t0\n+GO:1903791\turacil transmembrane transport\tbiological_process\t0\t0\t0\t0\t0\t0.0548468\n+GO:1903825\torganic acid transmembrane transport\tbiological_process\t0.0374282\t0\t0.0694638\t0\t0\t0\n+GO:1903874\tferrous iron transmembrane transport\tbiological_process\t0.135349\t0\t0\t0\t0\t0\n+GO:1990663\tdihydroorotate dehydrogenase (fumarate) activity\tmolecular_function\t0.864033\t0.484005\t0.13365\t0.257804\t0.353627\t0.431174\n+GO:2000142\tregulation of DNA-templated transcription, initiation\tbiological_process\t0\t0\t0.0968433\t0\t0\t0\n+GO:2000143\tnegative regulation of DNA-templated transcription, initiation\tbiological_process\t1.42047\t0\t0.406363\t0\t0\t0\n+GO:2000144\tpositive regulation of DNA-templated transcription, initiation\tbiological_process\t0.090727\t0\t0\t0\t0\t0\n+GO:2000145\tregulation of cell motility\tbiological_process\t0.16534\t0\t0\t0\t0\t0\n+GO:2000147\tpositive regulation of cell motility\tbiological_process\t1.05166\t0\t0\t0\t0\t0\n+GO:2000186\tnegative regulation of phosphate transmembrane transport\tbiological_process\t0.0476602\t0.184127\t1.33312\t0.689474\t0.256333\t0.748484\n+GO:2000678\tnegative regulation of transcription regulatory region DNA binding\tbiological_process\t0.0430911\t0\t0\t0\t0\t0\n+GO:2000884\tglucomannan catabolic process\tbiological_process\t7.87794\t11.7939\t11.5945\t20.9763\t16.3192\t17.9495\n+GO:2001059\tD-tagatose 6-phosphate catabolic process\tbiological_process\t135.133\t134.531\t133.449\t207.117\t206.169\t216.128\n+GO:2001070\tstarch binding\tmolecular_function\t11.5\t46.4104\t40.8239\t383.819\t405.859\t540.485\n+GO:2001118\ttetrahydromethanopterin biosynthetic process\tbiological_process\t0.265619\t0.71938\t1.42789\t0.202194\t0.11852\t0.439097\n+GO:2001295\tmalonyl-CoA biosynthetic process\tbiological_process\t0.0405149\t0\t0.150294\t0\t0\t0.0539089\n'
b
diff -r 000000000000 -r 6bee94458567 test-data/ft_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ft_output.tabular Tue Jun 23 07:46:07 2020 -0400
b
b'@@ -0,0 +1,4605 @@\n+id\tname\tnamespace\tgenus\tspecies\tdataset_21368137\tdataset_21368138\tdataset_21368139\tdataset_21368140\tdataset_21368141\tdataset_21368142\n+GO:0000014\tsingle-stranded DNA endodeoxyribonuclease activity\tmolecular_function\tMethanothermobacter\tMethanothermobacter thermautotrophicus\t0.158851\t0.457588\t0.367933\t0.0406378\t0.212081\t0.0528417\n+GO:0000023\tmaltose metabolic process\tbiological_process\tCoprothermobacter\tCoprothermobacter proteolyticus\t14.0012\t10.3973\t8.3927600000000009\t14.0098\t14.1239\t9.89496\n+GO:0000025\tmaltose catabolic process\tbiological_process\tEscherichia\tEscherichia coli\t0.0251304\t0\t0\t0\t0\t0\n+GO:0000027\tribosomal large subunit assembly\tbiological_process\tClostridium\tClostridium thermocellum\t4.99521\t34.9239\t31.4675\t127.056\t96.2502\t105.199\n+GO:0000027\tribosomal large subunit assembly\tbiological_process\tCoprothermobacter\tCoprothermobacter proteolyticus\t44.0266\t70.1837\t60.1184\t57.3553\t74.0795\t40.3249\n+GO:0000027\tribosomal large subunit assembly\tbiological_process\tEscherichia\tEscherichia coli\t0.0285816\t0\t0\t0\t0\t0.0653892\n+GO:0000030\tmannosyltransferase activity\tmolecular_function\tClostridium\tClostridium thermocellum\t2.50346\t4.38419\t2.69903\t22.4856\t20.2827\t17.4859\n+GO:0000030\tmannosyltransferase activity\tmolecular_function\tEscherichia\tEscherichia coli\t0.0323487\t0\t0.0586834\t0\t0\t0\n+GO:0000034\tadenine deaminase activity\tmolecular_function\tEscherichia\tEscherichia coli\t0.0149956\t0\t0.0278306\t0\t0\t0\n+GO:0000034\tadenine deaminase activity\tmolecular_function\tMethanothermobacter\tMethanothermobacter thermautotrophicus\t0.148644\t0.127232\t0.24592\t0.0508594\t0.103349\t0.308642\n+GO:0000036\tACP phosphopantetheine attachment site binding involved in fatty acid biosynthetic process\tmolecular_function\tClostridium\tClostridium thermocellum\t26.7934\t119.275\t48.5853\t525.102\t397.776\t543.996\n+GO:0000036\tACP phosphopantetheine attachment site binding involved in fatty acid biosynthetic process\tmolecular_function\tCoprothermobacter\tCoprothermobacter proteolyticus\t16.661\t17.4161\t23.2322\t12.7903\t10.7163\t13.3766\n+GO:0000036\tACP phosphopantetheine attachment site binding involved in fatty acid biosynthetic process\tmolecular_function\tEscherichia\tEscherichia coli\t0\t0\t0.0413174\t0\t0\t0.118199\n+GO:0000041\ttransition metal ion transport\tbiological_process\tClostridium\tClostridium thermocellum\t2.80498\t29.9683\t29.3116\t37.9571\t37.3748\t29.8371\n+GO:0000041\ttransition metal ion transport\tbiological_process\tMethanothermobacter\tMethanothermobacter thermautotrophicus\t0.0964385\t0\t0.631534\t0.890674\t0.653019\t1.67182\n+GO:0000049\ttRNA binding\tmolecular_function\tClostridium\tClostridium thermocellum\t120.077\t577.193\t382.252\t1919.14\t1656.7\t1821.14\n+GO:0000049\ttRNA binding\tmolecular_function\tCoprothermobacter\tCoprothermobacter proteolyticus\t514.943\t404.284\t333.851\t458.679\t525.602\t378.856\n+GO:0000049\ttRNA binding\tmolecular_function\tEscherichia\tEscherichia coli\t0.979988\t0\t0.574971\t0\t0\t0.0577249\n+GO:0000049\ttRNA binding\tmolecular_function\tMethanothermobacter\tMethanothermobacter thermautotrophicus\t1.76975\t2.89815\t6.21105\t6.1176\t3.16754\t7.03193\n+GO:0000050\turea cycle\tbiological_process\tEscherichia\tEscherichia coli\t0.101785\t0\t0.0755531\t0\t0\t0\n+GO:0000053\targininosuccinate metabolic process\tbiological_process\tEscherichia\tEscherichia coli\t0.101785\t0\t0.0755531\t0\t0\t0\n+GO:0000062\tfatty-acyl-CoA binding\tmolecular_function\tEscherichia\tEscherichia coli\t0.065864\t0\t0.0611642\t0\t0\t0\n+GO:0000103\tsulfate assimilation\tbiological_process\tClostridium\tClostridium thermocellum\t60.4918\t642.736\t558.276\t836.882\t834.009\t1015.38\n+GO:0000103\tsulfate assimilation\tbiological_process\tEscherichia\tEscherichia coli\t0.474196\t0\t0\t0\t0\t0.189053\n+GO:0000104\tsuccinate dehydrogenase activity\tmolecular_function\tEscherichia\tEscherichia coli\t0.190617\t0\t0\t0\t0\t0\n+GO:0000104\tsuccinate dehydrogenase activity\tmolecular_function\tMethanothermobacter\tMethanothermobacter thermautotrophicus\t0.0365776\t0.0702438\t0.13577\t0.299411\t0.260999\t0.0973401\n+GO:0000105\thistidine biosynthetic process\tbiological_process\tClostridium\tClostridiu'..b'ichia coli\t0.0632148\t0\t0\t0\t0\t0\n+GO:1902475\tL-alpha-amino acid transmembrane transport\tbiological_process\tEscherichia\tEscherichia coli\t0.707175\t0\t0.523098\t0\t0\t0.179351\n+GO:1902599\tsulfathiazole transmembrane transport\tbiological_process\tEscherichia\tEscherichia coli\t0.0266372\t0\t0.0494366\t0\t0\t0\n+GO:1902760\tMo(VI)-molybdopterin cytosine dinucleotide biosynthetic process\tbiological_process\tEscherichia\tEscherichia coli\t0.169958\t0\t0.210286\t0\t0\t0\n+GO:1902765\tL-arginine import into cell\tbiological_process\tEscherichia\tEscherichia coli\t0.0835087\t0\t0.154985\t0\t0\t0\n+GO:1902777\t6-sulfoquinovose(1-) catabolic process\tbiological_process\tEscherichia\tEscherichia coli\t0.162084\t0\t0.0826348\t0\t0\t0\n+GO:1903401\tL-lysine transmembrane transport\tbiological_process\tEscherichia\tEscherichia coli\t0.129711\t0\t0\t0\t0\t0\n+GO:1903506\tregulation of nucleic acid-templated transcription\tbiological_process\tEscherichia\tEscherichia coli\t0.0604685\t0\t0\t0\t0\t0\n+GO:1903658\tpositive regulation of type IV pilus biogenesis\tbiological_process\tEscherichia\tEscherichia coli\t0.201602\t0\t0.187552\t0\t0\t0\n+GO:1903716\tguanine transmembrane transport\tbiological_process\tEscherichia\tEscherichia coli\t0.0803249\t0\t0\t0\t0\t0.0530681\n+GO:1903785\tL-valine transmembrane transport\tbiological_process\tEscherichia\tEscherichia coli\t0.26941\t0\t0\t0\t0\t0\n+GO:1903791\turacil transmembrane transport\tbiological_process\tEscherichia\tEscherichia coli\t0\t0\t0\t0\t0\t0.0548468\n+GO:1903825\torganic acid transmembrane transport\tbiological_process\tEscherichia\tEscherichia coli\t0.0374282\t0\t0.0694638\t0\t0\t0\n+GO:1903874\tferrous iron transmembrane transport\tbiological_process\tEscherichia\tEscherichia coli\t0.135349\t0\t0\t0\t0\t0\n+GO:1990663\tdihydroorotate dehydrogenase (fumarate) activity\tmolecular_function\tCoprothermobacter\tCoprothermobacter proteolyticus\t0.864033\t0.484005\t0.13365\t0.257804\t0.353627\t0.431174\n+GO:2000142\tregulation of DNA-templated transcription, initiation\tbiological_process\tEscherichia\tEscherichia coli\t0\t0\t0.0968433\t0\t0\t0\n+GO:2000143\tnegative regulation of DNA-templated transcription, initiation\tbiological_process\tEscherichia\tEscherichia coli\t1.42047\t0\t0.406363\t0\t0\t0\n+GO:2000144\tpositive regulation of DNA-templated transcription, initiation\tbiological_process\tEscherichia\tEscherichia coli\t0.090727\t0\t0\t0\t0\t0\n+GO:2000145\tregulation of cell motility\tbiological_process\tEscherichia\tEscherichia coli\t0.16534\t0\t0\t0\t0\t0\n+GO:2000147\tpositive regulation of cell motility\tbiological_process\tEscherichia\tEscherichia coli\t1.05166\t0\t0\t0\t0\t0\n+GO:2000186\tnegative regulation of phosphate transmembrane transport\tbiological_process\tEscherichia\tEscherichia coli\t0\t0\t0\t0\t0\t0.112378\n+GO:2000186\tnegative regulation of phosphate transmembrane transport\tbiological_process\tMethanothermobacter\tMethanothermobacter thermautotrophicus\t0.0476602\t0.184127\t1.33312\t0.689474\t0.256333\t0.636106\n+GO:2000678\tnegative regulation of transcription regulatory region DNA binding\tbiological_process\tEscherichia\tEscherichia coli\t0.0430911\t0\t0\t0\t0\t0\n+GO:2000884\tglucomannan catabolic process\tbiological_process\tClostridium\tClostridium thermocellum\t7.87794\t11.7939\t11.5945\t20.9763\t16.3192\t17.9495\n+GO:2001059\tD-tagatose 6-phosphate catabolic process\tbiological_process\tClostridium\tClostridium thermocellum\t9.86686\t28.7165\t21.6438\t133.033\t107.045\t143.081\n+GO:2001059\tD-tagatose 6-phosphate catabolic process\tbiological_process\tCoprothermobacter\tCoprothermobacter proteolyticus\t125.074\t105.814\t111.725\t74.0842\t99.1241\t73.0471\n+GO:2001059\tD-tagatose 6-phosphate catabolic process\tbiological_process\tEscherichia\tEscherichia coli\t0.191467\t0\t0.0810561\t0\t0\t0\n+GO:2001070\tstarch binding\tmolecular_function\tClostridium\tClostridium thermocellum\t11.5\t46.4104\t40.8239\t383.819\t405.859\t540.485\n+GO:2001118\ttetrahydromethanopterin biosynthetic process\tbiological_process\tMethanothermobacter\tMethanothermobacter thermautotrophicus\t0.265619\t0.71938\t1.42789\t0.202194\t0.11852\t0.439097\n+GO:2001295\tmalonyl-CoA biosynthetic process\tbiological_process\tEscherichia\tEscherichia coli\t0.0405149\t0\t0.150294\t0\t0\t0.0539089\n'
b
diff -r 000000000000 -r 6bee94458567 test-data/t_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/t_output.tabular Tue Jun 23 07:46:07 2020 -0400
b
@@ -0,0 +1,5 @@
+id name rank T4A T4B T4C T7A T7B T7C
+1 Clostridium genus 68.36603 60.78776 68.49482 34.51004 38.22246 8.90992
+2 Coprothermobacter genus 31.23635 38.9515 31.0739 64.59208 60.9348 90.44415
+4 Methanothermobacter genus 0.3807 0.26075 0.43128 0.83333 0.77289 0.57389
+3 Escherichia genus 0.01692 NA NA 0.06455 0.06985 0.07204