Repository 'merge_metaphlan_tables'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/merge_metaphlan_tables

Changeset 0:3b469e230e82 (2017-03-04)
Next changeset 1:35c831cdbf7a (2021-03-19)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan2/ commit 345fb7ef485456ae833be5ad2d2ce4f8765652c8
added:
generate_test_data.sh
merge_metaphlan_tables.xml
metaphlan2_macros.xml
repository_dependencies.xml
test-data/community_profile.tabular
test-data/formatted_to_krona.tabular
test-data/heatmap.pdf
test-data/heatmap.png
test-data/heatmap.svg
test-data/input_sequences.fasta
test-data/marker_metadata.json
test-data/marker_sequences.fasta
test-data/merged_community_profile.tabular
tool-data/metaphlan2_database.loc.sample
tool_data_table_conf.xml.sample
transform_json_to_pkl.py
b
diff -r 000000000000 -r 3b469e230e82 generate_test_data.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/generate_test_data.sh Sat Mar 04 12:23:25 2017 -0500
b
@@ -0,0 +1,49 @@
+#/usr/bin/env bash
+
+metaphlan_hclust_heatmap.py \
+    --in test-data/merged_community_profile.tabular \
+    --out test-data/heatmap.png \
+    -m 'average' \
+    -d 'braycurtis' \
+    -f 'correlation' \
+    --minv '0' \
+    --tax_lev 'a' \
+    --sdend_h '0.1' \
+    --fdend_w '0.1' \
+    --cm_h '0.03' \
+    --font_size '7' \
+    --clust_line_w '1' \
+    --perc '90' \
+    -c 'jet'
+
+metaphlan_hclust_heatmap.py \
+    --in test-data/merged_community_profile.tabular \
+    --out test-data/heatmap.pdf \
+    -m 'ward' \
+    -d 'euclidean' \
+    -f 'euclidean' \
+    --minv '0' \
+    --tax_lev 'a' \
+    --sdend_h '0.1' \
+    --fdend_w '0.1' \
+    --cm_h '0.03' \
+    --font_size '7' \
+    --clust_line_w '1' \
+    --perc '90' \
+    -c 'pink'
+
+metaphlan_hclust_heatmap.py \
+    --in test-data/merged_community_profile.tabular \
+    --out test-data/heatmap.svg \
+    -m 'complete' \
+    -d 'hamming' \
+    -f 'matching' \
+    --minv '0' \
+    --tax_lev 'a' \
+    --sdend_h '0.1' \
+    --fdend_w '0.1' \
+    --cm_h '0.03' \
+    --font_size '7' \
+    --clust_line_w '1' \
+    --perc '90' \
+    -c 'pink'
\ No newline at end of file
b
diff -r 000000000000 -r 3b469e230e82 merge_metaphlan_tables.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_metaphlan_tables.xml Sat Mar 04 12:23:25 2017 -0500
[
@@ -0,0 +1,55 @@
+<tool id="merge_metaphlan_tables" name="Merge" version="@WRAPPER_VERSION@.0">
+
+    <description>MetaPhlAn2 files</description>
+
+    <macros>
+        <import>metaphlan2_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <version_command>merge_metaphlan_tables.py -v</version_command>
+
+    <command>
+<![CDATA[
+        merge_metaphlan_tables.py
+            #for $element in $input_files
+                '$element'
+            #end for
+            > '$output'
+]]>
+    </command>
+
+    <inputs>
+        <param name="input_files" type="data" format="tabular,txt" multiple="True" label="MetaPhlAn2 output file to join" />
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="output" label="${tool.name} on ${on_string}: Merged MetaPhlAn2 files" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_files" value="community_profile.tabular,community_profile.tabular"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="#SampleID" />
+                    <has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron|t__Bacteroides_thetaiotaomicron_unclassified" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+**What it does**
+
+MetaPhlAn is a computational tool to profile the structure and the composition of microbial communities (Bacteria, Archaea, Eukaryotes and Viruses) from metagenomic shotgun sequencing data with species level resolution. For more information, check the `user manual <https://bitbucket.org/biobakery/metaphlan2/>`_.
+
+This tool performs a table join on one or more metaphlan output files
+
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 3b469e230e82 metaphlan2_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/metaphlan2_macros.xml Sat Mar 04 12:23:25 2017 -0500
b
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@WRAPPER_VERSION@">2.6.0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@WRAPPER_VERSION@">metaphlan2</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal"   description="" />
+            <regex match="ERROR"
+                   source="stderr"
+                   level="fatal"
+                   description="" />
+            <regex match="WARNING"
+                   source="stderr"
+                   level="warning"
+                   description="" />
+        </stdio>
+    </xml>
+    <xml name="citations">
+      <citations>
+          <citation type="doi">10.1038/nmeth.3589</citation>
+      </citations>
+    </xml>
+    <xml name="tax_lev">
+      <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output">
+        <option value="a" selected="true">All taxonomic levels</option>
+        <option value="k">Kingdoms (Bacteria and Archaea) only</option>
+        <option value="p">Phyla only</option>
+        <option value="c">Classes only</option>
+        <option value="o">Orders only</option>
+        <option value="f">Families only</option>
+        <option value="g">Genera only</option>
+        <option value="s">Species only</option>
+      </param>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 3b469e230e82 repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Sat Mar 04 12:23:25 2017 -0500
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the MetaPhlAn2 data manager definition to install all required databases.">
+    <repository changeset_revision="9c4ad82be5bd" name="data_manager_metaphlan2_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
b
diff -r 000000000000 -r 3b469e230e82 test-data/community_profile.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/community_profile.tabular Sat Mar 04 12:23:25 2017 -0500
b
@@ -0,0 +1,11 @@
+#SampleID Metaphlan2_Analysis
+k__Bacteria 100.0
+k__Bacteria|p__Bacteroidetes 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris 68.05418
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron 31.94582
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris|t__Bacteroides_stercoris_unclassified 68.05418
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron|t__Bacteroides_thetaiotaomicron_unclassified 31.94582
b
diff -r 000000000000 -r 3b469e230e82 test-data/formatted_to_krona.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/formatted_to_krona.tabular Sat Mar 04 12:23:25 2017 -0500
b
@@ -0,0 +1,4 @@
+68.05418 Bacteria Bacteroidetes Bacteroidia Bacteroidales Bacteroidaceae Bacteroides Bacteroides_stercoris
+31.94582 Bacteria Bacteroidetes Bacteroidia Bacteroidales Bacteroidaceae Bacteroides Bacteroides_thetaiotaomicron
+68.05418 Bacteria Bacteroidetes Bacteroidia Bacteroidales Bacteroidaceae Bacteroides Bacteroides_stercoris Bacteroides_stercoris_unclassified
+31.94582 Bacteria Bacteroidetes Bacteroidia Bacteroidales Bacteroidaceae Bacteroides Bacteroides_thetaiotaomicron Bacteroides_thetaiotaomicron_unclassified
b
diff -r 000000000000 -r 3b469e230e82 test-data/heatmap.pdf
b
Binary file test-data/heatmap.pdf has changed
b
diff -r 000000000000 -r 3b469e230e82 test-data/heatmap.png
b
Binary file test-data/heatmap.png has changed
b
diff -r 000000000000 -r 3b469e230e82 test-data/heatmap.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/heatmap.svg Sat Mar 04 12:23:25 2017 -0500
b
b'@@ -0,0 +1,1291 @@\n+<?xml version="1.0" encoding="utf-8" standalone="no"?>\n+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n+<!-- Created with matplotlib (http://matplotlib.org/) -->\n+<svg height="316pt" version="1.1" viewBox="0 0 382 316" width="382pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n+ <defs>\n+  <style type="text/css">\n+*{stroke-linecap:butt;stroke-linejoin:round;}\n+  </style>\n+ </defs>\n+ <g id="figure_1">\n+  <g id="patch_1">\n+   <path d="M 0 316.460719 \n+L 382.864688 316.460719 \n+L 382.864688 0 \n+L 0 0 \n+z\n+" style="fill:#ffffff;"/>\n+  </g>\n+  <g id="axes_1">\n+   <g id="matplotlib.axis_1"/>\n+   <g id="matplotlib.axis_2"/>\n+   <g id="LineCollection_1">\n+    <path clip-path="url(#p35d92c4cfd)" d="M 39.5 118.124 \n+L 39.5 118.124 \n+L 39.5 103.724 \n+L 39.5 103.724 \n+" style="fill:none;stroke:#000000;"/>\n+    <path clip-path="url(#p35d92c4cfd)" d="M 39.5 132.524 \n+L 39.5 132.524 \n+L 39.5 110.924 \n+L 39.5 110.924 \n+" style="fill:none;stroke:#000000;"/>\n+    <path clip-path="url(#p35d92c4cfd)" d="M 39.5 146.924 \n+L 39.5 146.924 \n+L 39.5 121.724 \n+L 39.5 121.724 \n+" style="fill:none;stroke:#000000;"/>\n+    <path clip-path="url(#p35d92c4cfd)" d="M 39.5 161.324 \n+L 39.5 161.324 \n+L 39.5 134.324 \n+L 39.5 134.324 \n+" style="fill:none;stroke:#000000;"/>\n+    <path clip-path="url(#p35d92c4cfd)" d="M 39.5 175.724 \n+L 39.5 175.724 \n+L 39.5 147.824 \n+L 39.5 147.824 \n+" style="fill:none;stroke:#000000;"/>\n+    <path clip-path="url(#p35d92c4cfd)" d="M 39.5 190.124 \n+L 39.5 190.124 \n+L 39.5 161.774 \n+L 39.5 161.774 \n+" style="fill:none;stroke:#000000;"/>\n+    <path clip-path="url(#p35d92c4cfd)" d="M 39.5 204.524 \n+L 39.5 204.524 \n+L 39.5 175.949 \n+L 39.5 175.949 \n+" style="fill:none;stroke:#000000;"/>\n+    <path clip-path="url(#p35d92c4cfd)" d="M 39.5 218.924 \n+L 39.5 218.924 \n+L 39.5 190.2365 \n+L 39.5 190.2365 \n+" style="fill:none;stroke:#000000;"/>\n+    <path clip-path="url(#p35d92c4cfd)" d="M 39.5 233.324 \n+L 39.5 233.324 \n+L 39.5 204.58025 \n+L 39.5 204.58025 \n+" style="fill:none;stroke:#000000;"/>\n+   </g>\n+  </g>\n+  <g id="axes_2">\n+   <g id="matplotlib.axis_3"/>\n+   <g id="matplotlib.axis_4"/>\n+   <g id="LineCollection_2">\n+    <path clip-path="url(#p4770d5a667)" d="M 104.3 67.724 \n+L 104.3 67.724 \n+L 176.3 67.724 \n+L 176.3 67.724 \n+" style="fill:none;stroke:#000000;"/>\n+   </g>\n+  </g>\n+  <g id="axes_3">\n+   <g id="patch_2">\n+    <path d="M 68.3 240.524 \n+L 212.3 240.524 \n+L 212.3 96.524 \n+L 68.3 96.524 \n+z\n+" style="fill:#1e0000;"/>\n+   </g>\n+   <g clip-path="url(#p82d1fad6e8)">\n+    <image height="144" id="imageaa5496bdc6" transform="scale(1 -1)translate(0 -144)" width="144" x="68.3" xlink:href="data:image/png;base64,\n+iVBORw0KGgoAAAANSUhEUgAAAlgAAAJYCAYAAAC+ZpjcAAAABHNCSVQICAgIfAhkiAAACrxJREFUeJzt2EENAzEQBEE7QEL7gByUANqQaMmSVYVgnq3Z37VmAQCQ+ZweAABwG4EFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAMYEFABATWAAAsf17nzk9AgDgJh4sAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksAICYwAIAiAksA'..b'8125 37.15625 52.796875 32.3125 \n+Q 56.78125 27.484375 56.78125 20.515625 \n+Q 56.78125 9.90625 50.3125 4.234375 \n+Q 43.84375 -1.421875 31.78125 -1.421875 \n+Q 19.734375 -1.421875 13.25 4.234375 \n+Q 6.78125 9.90625 6.78125 20.515625 \n+Q 6.78125 27.484375 10.78125 32.3125 \n+Q 14.796875 37.15625 21.921875 38.8125 \n+M 18.3125 54.390625 \n+Q 18.3125 48.734375 21.84375 45.5625 \n+Q 25.390625 42.390625 31.78125 42.390625 \n+Q 38.140625 42.390625 41.71875 45.5625 \n+Q 45.3125 48.734375 45.3125 54.390625 \n+Q 45.3125 60.0625 41.71875 63.234375 \n+Q 38.140625 66.40625 31.78125 66.40625 \n+Q 25.390625 66.40625 21.84375 63.234375 \n+Q 18.3125 60.0625 18.3125 54.390625 \n+" id="DejaVuSans-38"/>\n+      </defs>\n+      <g transform="translate(165.527027 36.842906)scale(0.07 -0.07)">\n+       <use xlink:href="#DejaVuSans-38"/>\n+       <use x="63.623047" xlink:href="#DejaVuSans-30"/>\n+      </g>\n+     </g>\n+    </g>\n+    <g id="xtick_8">\n+     <g id="line2d_18">\n+      <g>\n+       <use style="stroke:#000000;stroke-width:0.8;" x="191.140389" xlink:href="#m479c52ad5c" y="24.524"/>\n+      </g>\n+     </g>\n+     <g id="text_18">\n+      <!-- 90 -->\n+      <defs>\n+       <path d="M 10.984375 1.515625 \n+L 10.984375 10.5 \n+Q 14.703125 8.734375 18.5 7.8125 \n+Q 22.3125 6.890625 25.984375 6.890625 \n+Q 35.75 6.890625 40.890625 13.453125 \n+Q 46.046875 20.015625 46.78125 33.40625 \n+Q 43.953125 29.203125 39.59375 26.953125 \n+Q 35.25 24.703125 29.984375 24.703125 \n+Q 19.046875 24.703125 12.671875 31.3125 \n+Q 6.296875 37.9375 6.296875 49.421875 \n+Q 6.296875 60.640625 12.9375 67.421875 \n+Q 19.578125 74.21875 30.609375 74.21875 \n+Q 43.265625 74.21875 49.921875 64.515625 \n+Q 56.59375 54.828125 56.59375 36.375 \n+Q 56.59375 19.140625 48.40625 8.859375 \n+Q 40.234375 -1.421875 26.421875 -1.421875 \n+Q 22.703125 -1.421875 18.890625 -0.6875 \n+Q 15.09375 0.046875 10.984375 1.515625 \n+M 30.609375 32.421875 \n+Q 37.25 32.421875 41.125 36.953125 \n+Q 45.015625 41.5 45.015625 49.421875 \n+Q 45.015625 57.28125 41.125 61.84375 \n+Q 37.25 66.40625 30.609375 66.40625 \n+Q 23.96875 66.40625 20.09375 61.84375 \n+Q 16.21875 57.28125 16.21875 49.421875 \n+Q 16.21875 41.5 20.09375 36.953125 \n+Q 23.96875 32.421875 30.609375 32.421875 \n+" id="DejaVuSans-39"/>\n+      </defs>\n+      <g transform="translate(186.686639 36.842906)scale(0.07 -0.07)">\n+       <use xlink:href="#DejaVuSans-39"/>\n+       <use x="63.623047" xlink:href="#DejaVuSans-30"/>\n+      </g>\n+     </g>\n+    </g>\n+    <g id="xtick_9">\n+     <g id="line2d_19">\n+      <g>\n+       <use style="stroke:#000000;stroke-width:0.8;" x="212.3" xlink:href="#m479c52ad5c" y="24.524"/>\n+      </g>\n+     </g>\n+     <g id="text_19">\n+      <!-- 100 -->\n+      <defs>\n+       <path d="M 12.40625 8.296875 \n+L 28.515625 8.296875 \n+L 28.515625 63.921875 \n+L 10.984375 60.40625 \n+L 10.984375 69.390625 \n+L 28.421875 72.90625 \n+L 38.28125 72.90625 \n+L 38.28125 8.296875 \n+L 54.390625 8.296875 \n+L 54.390625 0 \n+L 12.40625 0 \n+z\n+" id="DejaVuSans-31"/>\n+      </defs>\n+      <g transform="translate(205.619375 36.842906)scale(0.07 -0.07)">\n+       <use xlink:href="#DejaVuSans-31"/>\n+       <use x="63.623047" xlink:href="#DejaVuSans-30"/>\n+       <use x="127.246094" xlink:href="#DejaVuSans-30"/>\n+      </g>\n+     </g>\n+    </g>\n+   </g>\n+   <g id="matplotlib.axis_10"/>\n+   <g id="patch_16">\n+    <path d="M 68.3 24.524 \n+L 68.8625 24.524 \n+L 211.7375 24.524 \n+L 212.3 24.524 \n+L 212.3 10.7 \n+L 211.7375 10.7 \n+L 68.8625 10.7 \n+L 68.3 10.7 \n+z\n+" style="fill:none;stroke:#000000;stroke-linejoin:miter;stroke-width:0.8;"/>\n+   </g>\n+  </g>\n+ </g>\n+ <defs>\n+  <clipPath id="p35d92c4cfd">\n+   <rect height="144" width="57.6" x="10.7" y="96.524"/>\n+  </clipPath>\n+  <clipPath id="p4770d5a667">\n+   <rect height="57.6" width="144" x="68.3" y="38.924"/>\n+  </clipPath>\n+  <clipPath id="p82d1fad6e8">\n+   <rect height="144" width="144" x="68.3" y="96.524"/>\n+  </clipPath>\n+  <clipPath id="pa21fd958a4">\n+   <rect height="13.824" width="144" x="68.3" y="10.7"/>\n+  </clipPath>\n+ </defs>\n+</svg>\n'
b
diff -r 000000000000 -r 3b469e230e82 test-data/input_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_sequences.fasta Sat Mar 04 12:23:25 2017 -0500
b
b'@@ -0,0 +1,2260 @@\n+>r2|637000026.fna|5753889|5754040|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln383_#0/1\n+CTCCGTCCGCTGGTAGAACGTCTGGTAGAAGCTCAAAAAGCATTGGCAACCAAATACCTTTCCGAAGCCAAACGACTGATTGCCTCCAACGACAAGAAGGAAGTGGAAGAAGGATTCCTTGCCCTTTATCGTAGCCACAAGTGTCTTCCGA\n+>r3|637000026.fna|1749333|1749484|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln165_#0/1\n+AAGTAAAAGTCTACACACAGGAAGGAACGATAATGGAAAGTACGATTGAAGTAAATCGTCCGATGGAGATAGCAGGATGGAAAATTTATCAGCTTAGTTATGATGAGTCGAAAGGTCGCTGGAGCGATATCAGTGTCTTTGAGCTGGTTCG\n+>r8|637000026.fna|2991814|2991965|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln402_#0/1\n+AGAAAGTTTTGATCAAAGCATGGGATGCTGAAAACGGACCGGTAATTATCGACTCAGGAGAAAGTACCTACAACACAACCGCCAAAAAGTTCAGCCTCAAATATACGATCGGTAACACGCTATATGAAGAGCAACTGACCAAAGAAAAAGA\n+>r24|641736196.fna|214158|214309|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln388_#0/1\n+CAGATACGAGTACCAATTGGCCGTCAACGACTATTGGAAGGAAGTCGGCGGATTGCAGATGCTGCCCGGAACCAACCGCTCCAGCGACCGCTTTGTACGCGCGTCATTCTACATTCATGCCATTCCGCAGACAGCAGACGCGGCGATTGCA\n+>r32|637000026.fna|4140951|4141102|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln422_#0/1\n+TATTCTCAGGGGACTAATTGGTGATATAGTGCTCGATTATGATAGAGGTGAAGGAATTATGTCCTTTAAGACTCAAAAGGTAGGTTATATATCAGGATATTATTTGGGATGTACTACATATTGTTATGCCATGGGATTTTATCCTACATAT\n+>r55|637000026.fna|6212867|6213018|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln342_#0/1\n+CAGGCGTCGGGTGAGCTGGACTTTGAAGAGTTGTGCGAAGCGATCACCAGTCGTTCTACTTGTACGGAAACGGATGTACGTGCGGCTATTTCGGGTATTCTTTACGAAGTGAAGCGTGCGTTGAAGGCAGGAAGAATTGCGAGACTGGGTG\n+>r94|641736196.fna|124792|124943|_from_ |NZ_ABFZ02000020_Bacteroides_stercoris_ATCC_43183,|_ln396_#0/1\n+AATCAGTTTCATACGCTATCAATATCCGGGGTCAATAGTTTCCATTTGTTGCTCGCTTGACACGCTTTCAAAGCTAACATCTTCCGCATCAATCAACTGTTTGTTTGCGTAATTGTCGGTCAATATTTCACGCTGTGAAGTTTCGTCACCC\n+>r115|641736196.fna|175677|175828|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln360_#0/1\n+CTGTCAGATACGTGAAAGATTACATTCGATACGTGAGTATGTAATGAAATATCCTGAGTTTGACGATAGAAACGATATTGACCCGTCAATGCGAATGTTCTATATCCAGTCTGTTGAAGCACAAGCAAAAAATCGAAGTTATTCAGATGCA\n+>r121|641736196.fna|7934|8085|_from_ |NZ_ABFZ02000017_Bacteroides_stercoris_ATCC_43183,|_ln383_#0/1\n+TGAGCGGTTCGGTCTATACCCTTATGAATAAAATCTTCAGGGATTCAAAAGAACCGCTGTATGGCCGTGCTGACAATATCATGAAGTTAGCTCCTTTTACTACTTCCGTATTGAAAGAAATCATATCGGATCATAAGGCCGACTATACTAA\n+>r145|637000026.fna|1378791|1378942|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln396_#0/1\n+TCCGTCTGATTCGTAGTGTACCAGCATATCCGCCATGACATCGCTCACTGTAGCGCTGAAATCTTTCAGAAGTTTATGGCTGCCCACCCAGTAAGTCTTGCCTTGATAAGACACCTTTATACCTTTGCCTGTGATGCTCTCAAAACTGCTC\n+>r158|637000026.fna|3242194|3242345|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln388_#0/1\n+GGACAGCAGTATCAAACGGTTAAAATACGTGAGATACGCAGATGACTTTCTGATTGGAATTACCGGTAATCTTGAAGACTGCAAAACAGTAAAAGAGGATATTAAGAATTATTTGAATGAAGCTCTTAAACTGGAACTGTCAGACGAAAAG\n+>r160|637000026.fna|3948359|3948510|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln347_#0/1\n+CGGAAGTGACTCATACATATCAGGTGGTTCTTCCAGTTATAAGAACCTGTCTGCAAGTGAAATGAAGAACACATCTTCATTCACAAACTGGAATTTCAGTACGGTTTGGGAAATGGGAAGTGAATACCCGACATTACAGGGCTTATTAAAA\n+>r187|637000026.fna|5220534|5220685|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln396_#0/1\n+GTTACCTTGATCAATGGATTTGCATATTCCAGCAACCACGAGATAGAGGCGTGTCCTGCTTCATGCAAGGCAATCGAGCGTCTTTCCGCTTCTGTGGTAATCTTGGTCTTCTTTTCCAGACCACCGATGATACGGTCTACCGCATCCAGAA\n+>r192|637000026.fna|2858128|2858279|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln325_#0/1\n+ACGTGGACGGTTACGCACTGCCGGAAAAAATCAGGAACGCATTCCTGGGATTGGAGGAAAAGGAGAAAACACTCATCAGTTACTTCACCCAGCACAATGAACAGTATGCAAAGAAGGTCGGCAAGACTGCCACGCAAAAGACCTATTCCCG\n+>r218|641736196.fna|272465|272616|_from_ |NZ_ABFZ02000022_Bacteroides_stercoris_ATCC_43183,|_ln348_#0/1\n+ATTTCTTCGTCGGTTTCCGTATGGAAGAAGTACACCACGGCCAACTAGGTGAAGTGACGGATGTGGACACCTCTACCATCAACACTCTGTTCGTGGTAGATTATAAAGGAGAAGAATTGCTGATTCCGGCACAGGAAGATTTCATTATGGA\n+>r222|637000026.fna|2630231|2630382|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_'..b'CGGACTTTCGGTATCTATCTATATGTCTGAGGTCGCCAATCCGAAGGTGCGCAACTGGCTGAAACCGATCATTGAA\n+>r12070|641736196.fna|375596|375747|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln407_#0/1\n+GAATGTTTTTCCATGAATATTTATGTTTTCTGACCGGGAGTCTTTCATGGAATTAATCAATAAATCATACATATGTTCCATTGCTCCCTTAGAAGGAGCTTTGATGTAGCTAACGGTTAAACTGCTGACACCATGTCCTCGAAGTTGCAAT\n+>r12072|637000026.fna|2532372|2532523|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln392_#0/1\n+GGCTCCGGGATTGATGTGCAGCATGTCCAACGTTTTGTCGTATTTCACCTTTAATATATGGGAGTGTCCGCTGATGAAAAGTTTCGGCGGACGGGCCATCAAACTGCCGATAACCGAAGGGTCGTACTTGCCCGGATACCCGCCGATATGC\n+>r12073|637000026.fna|6194446|6194597|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln420_#0/1\n+CTGAATATTATAGTCATATCCCTTCACCGGTTCTTCCAGCTCTTTGAATATCACAAAGGCGCGTTGCCCGTCTTTATTCTTGTCACTGATCCATCCGTTTCCATTGCTCGGGAACATTGTTTTTCCATCATCCAGAGTGAAATAGAAATCC\n+>r12077|641736196.fna|245225|245376|_from_ |NZ_ABFZ02000016_Bacteroides_stercoris_ATCC_43183,|_ln342_#0/1\n+ATGCAGTGTCGCCCACCATGTCCATGTCGCTCTCACGGTCGAAAAGCAATGTATAATAATTCTGTTTGGTAAACAGGTAGTCGTGCTTCCAGAAGCGTTCCTCTATCTTCGAAGGAGTGTCCCGGTTGTAATAAGGCAAAGGAAACTCCGT\n+>r12078|641736196.fna|76492|76643|_from_ |NZ_ABFZ02000017_Bacteroides_stercoris_ATCC_43183,|_ln385_#0/1\n+GATCCTTTCAGTTCGTACCACTCGTCATCCGGCAATCCGTTTCCGTTTATATCTTGCATGACCCATACGATACCCGGTTCGGAACTGCCGTCAAAGGCGTTCCCCTGAACACAGAAATCATATTGGTTACCCGAATTGGGAATACTGTGGT\n+>r12088|641736196.fna|108362|108513|_from_ |NZ_ABFZ02000016_Bacteroides_stercoris_ATCC_43183,|_ln368_#0/1\n+AGGTTTCCACACTTCCTCGCCAAAATAGTTGTGAGGACCACCGAAGCGGCAGTCCAATATTCCGGCAAGCGCCGCTTCCGGATAGCCGGAATTGGGACTGGCATGCTTGCTGCCGTATTTCCCCACAAAAAGCAGCAACGGGAATCTGCCG\n+>r12120|637000026.fna|4257090|4257241|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln262_#0/1\n+ACGGGCTTCTTCGCCAATGCTCTTCCCATAAAGATAAGACATTTCGGGATTCCATGTTGCAGAAAGGCAAGTCAATGCAGGATAGGCTATACAGGAATCGTTTGTCCATCCTGCCTGATCCCACTCATCCCATAATACTTCGGGACGGATG\n+>r12152|637000026.fna|2685134|2685285|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln353_#0/1\n+CACTCAAATCATCAATCAAGCTGACTTTCTTTAATGTATCCTTCTTAGCATCATAATATTTCATCTCCAGATTATTAGCAATCGAGAAACTAACCGTTCCCGAACCCTCACGTGAAGGGACTCCAAAAGGCTGTCCCGTATAGGGTGAATA\n+>r12163|637000026.fna|5651468|5651619|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln367_#0/1\n+AATGAACAACACCATTAAAAACGGTTGGCTATTCGACTGTGATGATGAGAGCTTCGAGCAAGTAAGCAAGCGCATTGAAGATGCTGTATATGTATATAATCATGTGCGGCCTCATCAAGGGATAAACATGAGGACACCTATGGAAGTGGTC\n+>r12201|641736196.fna|253613|253764|_from_ |NZ_ABFZ02000023_Bacteroides_stercoris_ATCC_43183,|_ln348_#0/1\n+CCACTCCGGAAGGTATTATTGGCAATGAAGACTGCGGGCAGATGTCGGCATGGTATATTCTTTCAAGTATGGGACTTTATTCGGTTTGTCCTGGTAGCAACGAATATGTTCTTACTACTCCCTTGTTCGAGAAAGTTGTAGTGCATTTGGC\n+>r12208|637000026.fna|1887139|1887290|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln435_#0/1\n+ACATCGCGGTTGCTTTTCGCTACCCGTACCTTTCCGCGTCTCTTTCCCCAGTCCAGAATCGGAATCTGAACGCCTACCTGCACAATCTGATTATCCTGCAAGTTCCTGTAGACACTGGAAAGTTCACGACTCTCTCCCGTATATCCTACAC\n+>r12218|641736196.fna|422226|422377|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln296_#0/1\n+GTCCAATTGCGCACGTGCCAGCCGGGTGGCATTAACCAGTTTTGTACCGTTGAAGAGTGTCCAACTGAGGCTTGGCGCTATTTCAAACGTCATACTGTTCCGTTTGGTGAAATCTTTCAGGTCATGGGAAGAATATCCTACGGAACCTTTT\n+>r12223|637000026.fna|4646221|4646372|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln436_#0/1\n+CGGTACTGCTTTTAGAAAGGAACGGATACGGGGTTGAGTCCATTTCTTTTTGTCATAGAAGAACATCCAGGTCATTTGTAACCATTGGGCTGCTGAGTCTACCTGATGAATCGACTCGTAAATCTTGCTTGAAACATTGGCTAAAAAATCC\n+>r12228|641736196.fna|168782|168933|_from_ |NZ_ABFZ02000023_Bacteroides_stercoris_ATCC_43183,|_ln401_#0/1\n+GGTCTCTACCATTTCCGAAATGTTACCTTTCTCTACACGGAACATTTTATAATCACTGCCCACCTTATGCAGAATACGATCCAAATGTTCACGATTGGTATCTGTTATAAAGATCTGTCCAAAGTTATCACCTGCCACCAGTTTAATTATC\n+>r12236|637000026.fna|5656063|5656214|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln344_#0/1\n+TTCAGTGAGACAGATGTGCGGACCATGTTTCAGTACTATAAAGACGCCGGACAACTGCCGGCAGATACAGACATAGATGCACTGATTCGGGAAATAAAACCGTGGTATGACAATTATTGCTTTGCCAAAGAAAGTCTGGAACGTGATCCGA\n'
b
diff -r 000000000000 -r 3b469e230e82 test-data/marker_metadata.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_metadata.json Sat Mar 04 12:23:25 2017 -0500
[
b'@@ -0,0 +1,1 @@\n+{"taxonomy": {"k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Micromonosporaceae|g__Salinispora|s__Salinispora_pacifica|t__GCF_000374725": 5438612, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Mononegavirales|f__Rhabdoviridae|g__Nucleorhabdovirus|s__Maize_mosaic_virus|t__PRJNA14920": 12133, "k__Archaea|p__Euryarchaeota|c__Halobacteria|o__Halobacteriales|f__Halobacteriaceae|g__Halorubrum|s__Halorubrum_sp_T3|t__GCF_000296615": 3168011, "k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Propionibacteriaceae|g__Propionibacterium|s__Propionibacterium_acnes|t__GCF_000145075": 2614131, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__GCF_000485655": 5022965, "k__Bacteria|p__Chlamydiae|c__Chlamydiia|o__Chlamydiales|f__Chlamydiaceae|g__Chlamydia|s__Chlamydia_trachomatis|t__GCF_000304515": 1042736, "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Leuconostocaceae|g__Weissella|s__Weissella_koreensis|t__GCF_000277645": 1728940, "k__Bacteria|p__Spirochaetes|c__Spirochaetia|o__Spirochaetales|f__Leptospiraceae|g__Leptospira|s__Leptospira_interrogans|t__GCF_000244635": 4459519, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Buchnera|s__Buchnera_aphidicola|t__GCF_000007365": 641454, "k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae|g__Ruminococcus|s__Ruminococcus_sp|t__GCF_000209835": 3545606, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Acinetobacter|s__Acinetobacter_baumannii|t__GCF_000302035": 3973165, "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_mitis_oralis_pneumoniae|t__GCF_000495335": 2144825, "k__Bacteria|p__Proteobacteria|c__Alphaproteobacteria|o__Rickettsiales|f__Rickettsiaceae|g__Rickettsia|s__Rickettsia_parkeri|t__GCF_000284195": 1300386, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__GCF_000316745": 5198097, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__GCF_000355055": 5070534, "k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Clostridiaceae|g__Candidatus_Arthromitus|s__Candidatus_Arthromitus_sp_SFB_2|t__GCF_000252685": 1135256, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Caudovirales|f__Siphoviridae|g__Siphoviridae_noname|s__Lactobacillus_phage_J_1|t__PRJNA227005": 40931, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Vibrionales|f__Vibrionaceae|g__Vibrio|s__Vibrio_cholerae|t__GCF_000305055": 3943387, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Caudovirales|f__Siphoviridae|g__Siphoviridae_noname|s__Bacillus_phage_Fah|t__PRJNA16382": 37974, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Acinetobacter|s__Acinetobacter_johnsonii|t__GCF_000162055": 3690010, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Klebsiella|s__Klebsiella_pneumoniae|t__GCF_000409125": 5671251, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Viruses_noname|f__Viruses_noname|g__Viruses_noname|s__Geobacillus_virus_E2|t__PRJNA19797": 40863, "k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinobacteria_noname|f__Actinobacteria_noname|g__Actinobacteria_noname|s__actinobacterium_SCGC_AAA278_O22|t__GCF_000372185": 1138490, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Alteromonadales|f__Pseudoalteromonadaceae|g__Pseudoalteromonas|s__Pseudoalteromonas_sp_BSi20429|t__GCF_000238895": 4495777, "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_agalactiae|t__GCF_000427035": 2138694, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__P'..b'teria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|514974163|ref|NZ_KE340313.1|:332123-333478": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 1356, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|384096987|ref|NZ_JH636042.1|:382588-383040": {"ext": ["GCF_000162515", "GCF_000159875"], "score": 2.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 453, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|514974745|ref|NZ_KE340318.1|:c38550-38392": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 159, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|512436172|ref|NZ_KE159460.1|:c349752-348871": {"ext": ["GCF_000159875"], "score": 1.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 882, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|514974549|ref|NZ_KE340315.1|:c186066-185032": {"ext": ["GCF_000374365"], "score": 1.0, "clade": "s__Bacteroides_stercoris", "len": 1035, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|384096980|ref|NZ_JH636035.1|:50682-52622": {"ext": ["GCF_000159875"], "score": 1.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 1941, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|223955876|ref|NZ_DS499677.1|:c358511-356622": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 1890, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|223955871|ref|NZ_DS499672.1|:29963-30130": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 168, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|514973000|ref|NZ_KE340309.1|:c227121-226093": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 1029, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|512436172|ref|NZ_KE159460.1|:c266283-265069": {"ext": ["GCF_000159875"], "score": 1.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 1215, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|298384780|ref|NZ_GG774704.1|:496496-497020": {"ext": ["GCF_000162515", "GCF_000159875"], "score": 2.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 525, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|223955871|ref|NZ_DS499672.1|:193538-194158": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 621, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|298385318|ref|NZ_GG774705.1|:324177-324773": {"ext": ["GCF_000162515", "GCF_000159875"], "score": 2.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 597, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|223955873|ref|NZ_DS499674.1|:c270435-267715": {"ext": ["GCF_000243215"], "score": 1.0, "clade": "s__Bacteroides_stercoris", "len": 2721, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}}}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 3b469e230e82 test-data/marker_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_sequences.fasta Sat Mar 04 12:23:25 2017 -0500
b
b'@@ -0,0 +1,7704 @@\n+>gi|223955874|ref|NZ_DS499675.1|:82124-83086\n+ATGAAAAAATCGATACTGTTAATGGGCCTTGCAGCTCTCATAGCAAGCTGTTCGGACGAT\n+GACCGGAACAACGATACTGATAATCAGCATCTTGTCAGTCCCTCAATGGCTGTGAGCATC\n+AGCAACACCGACGGCCGCCAAAGCCCGTTTACCGGCATTCTTACAATTATGCCATGCGAT\n+GCAAACAGTTCCATATACTATGGAAATTATGTAAAAGGGAAACTGTCTCCTTTCTACGGC\n+TATTACCGGGTAAAGGACGGAAGTTTCCATGACAATTCCATCAATAGGGAGATTTCTCTG\n+CCGATAGGCACTTACAATATGATTTATTGGGGAACACCCCAATACCAAACTCCCATTTAT\n+GCCCACCCTGCCATAAAAGAGGCGGCTCACAGCATAGGTGCCGATATGTCCAAACAGTCT\n+CTCGGCTTGTTCAGAATATCAGCCGATACCATTTACTATCCCGTATTCGACCTGGTGTAC\n+GCCACACAGCCGGTAAACATCGGAAGCGAAAGCCTCAGTGGCTCTTTAAAACGAGTGGTA\n+GCCGGAATAAAGGTTGTCCTGAAGGACAAAGACAATGCCGTACTAAGTTCCAGTATAGAC\n+AGTGTGTCCGTACGCATCACCAACATTTACAGCGAACTCAACTATTATACGGGAAAACCG\n+CAAGGTACACCACGTACAATCGCGTTCCCTCTGATTCGTTCGAACGATGGTACTCAGATG\n+AGCAACAGCACCGTTATGCTGTTCCCGTCCGCCGGAAAACCGGAATTCCAACTAGCAATC\n+ATTCTTAAAAACGGTAATGTGAAAAGCTTCAGACAAGCATTGAGCAGCCCGCTCGATGCC\n+AATGCCAAACTGACATTAACGTTAAGTCTGGGTGATATTTTCTCTGAAGAATCCTCCGGT\n+GACTTTACCATCGATGACTGGAACGAGAAAAACGAGAATATTGATATACCGATAATAGAG\n+TAG\n+>gi|29345410|ref|NC_004663.1|:2392014-2392814\n+ATGAAAATTACAATTATAGGAGCCGGAAACATAGGGAGCGCAGTTGCTGCCTGTCTGGCA\n+AAAGGACATCTCTACAACGAAAAAGATATTATCATTTCTACCCCTCATACAGACAAGCTA\n+GAAAATCTGCACAAACAATTTCCTGCGATACGCATAATGACAGAGAATCAGTATGCCATA\n+TCGGAAGCCGACATTATTATTCTGGCTGTCAAACCATGTATAGTAGACGAAGTATTATCT\n+CCGTTACGATTCTCACGCACCCAAATCCTGGTTTCACTGGTAACCGGAATATCCATTTCT\n+CACCTGGCACATTTATCGGAAACCGAAATGCCTATATTCCGGGTTGTTCCAAACATCGCC\n+ATTACGGAACATTCGAGCCTGACCCTGATAACTTCACGCAAAGCCGGCAAAGAACACCAG\n+CAACTCATAAAACAGACATTTGAAGAAGGAGGAAAGTGTTTGTTCGCAGAAGAGAAACAG\n+CTGGATATCATTTCTGCATTAACTTCCAGTGGAATCGCTTTCGCATTAAAATATATTCAT\n+GCAGCCATGCAAGCCGGCATAGAATTGGGTATCTCCGCTGAAGATGCAATGCGAATGACG\n+GCATATAGCATGGAAGGAGCTACAGAACTGATTTTAAATCACGACACCCACCCGCTGTTA\n+GAAATAGAAAAAGCAGCTACTCCCGGTGGAGCCACCATTAAAGGATTGAATGAACTGGAG\n+CACAGGGGATTTACTTCCGCCGTTATTCATGCCATAAAAAGTAGCGCCACAGTATCGACC\n+GATAAAGAGACTGAAGAATAG\n+>gi|298385318|ref|NZ_GG774705.1|:275923-276495\n+ATGATAAATTCAATTAATATACAAATCAGAGAGACCAATACAGATGATTTCGACAGCATA\n+ATGACTGTTGAAAAACAAGCGTTTGGCTATGACAAAGAAGCACAGTTGGTAGCCGATCTG\n+CTGGCTGACAAAACTGCAAAACCAATGGTTTCGTTGCTCGCTTTCTACAAAGGTGAGGCT\n+GTCGGACACATTCTCTTCACCAGAGCCTATTTTGATGGTCAGGGAGCACAACAGATGATG\n+CACATTCTTGCACCTTTAGCTGTCAAGCCGGAATATCAACGGCAGGGTATAGGCGGAATG\n+CTGATACGGGCAGGTATAGAGAGGTTGCAGGAAAAAGGTTCGTGCCTTGTGTTTGTATTG\n+GGGCATAAAGAATATTATCCAAAATATGGTTTTATACCGGATGCAGCCAGGCTGGGTTAT\n+CCTGCTCCTTACCCGATACTGGAACAGTTCTCGGATTATTGGATGGTTCAGGCAATCAGT\n+CCGAAGGGATTTGATGTGGATAAAGGAAAGATCAGGTGTTCGGATGAGTTGAACAAACCT\n+GAACATTGGAGAGATGATGAATCGGACAGATAG\n+>gi|514973850|ref|NZ_KE340312.1|:c10221-7987\n+ATGAAACATTTGTCTGTTATTATATCTGTATTATTTTCAATACTTTGTATCGGATGCTCT\n+AAGGAGGATTTTAACGATGGAAAAACCAATGATGTTCAGAACAAGCATTCTCTCCGGCTT\n+TTGGTCTACACACCGACTTCAGAAACTGTTCTTAGTACGGACCTGCCGGGAAACATAGAG\n+GCTTATCTGTTCAAAGAGGGAGTTCTGTCCGACGTTTACAAAAATCTGACAGTGGATAAG\n+AACGGATACACTACAATCAGCTCGTTAGCCGAAGGAGAGCAAATCTATTTCTTTGTAAAC\n+ACCGGTAATCTGCTGGATGGGATTACACAGGAAATAGGCCGATTGAAAGAGAACGAACTG\n+CTTGCCACTACAATACTCTCTGCCTCTCCTCAAGCAGATGGAGAGAAACCGGTGATGACC\n+GGAAAGGCAGACTTAACCGGAAGCCAGGAGAGTACCACCCAAGTATTGCTTACACGGGCC\n+ATTGCCAGAGTAGACTTAAACATAGCTGACGATGCCGACATACAAATCAATAGGATAAGT\n+ATGGATAACATACATTGTGAAGCATTCCTGCTTCCGCAAAATCCGGTCAGTTCGCCGTCC\n+GGTGCAGCATTGGCAAAAATAGACACTACATTCAATACCCCGTTGAAGCCGGGTGAATAT\n+GCCGGATTGGTCCACCTGTATGAACAAGTGGGGGACGGTATTCCTGTGGAGCTGCATGGC\n+ACCCTTGAAGGCGATCCTGTCACACTTTCACTGGCACTGCCTAACACAATCCATAGAAAC\n+CACATTTATAAAATAAAGTTATTTAGCGGAGACAGTTCTAATTTGCAGGCAAGCATATCC\n+GTGGAAAATGAAAGTTGGGAAGTCGAAGAAACTATCACGGCAAAACCTTCTACCAATATT\n+CTGGTTAATAGTGAACTTTCTACTTTGGCTGAAGGGGCGTATATAAGTGCAACAAAAGAT\n+ACGGTTTACCTCCCAAGCAAGGAGTCAACGTCAATACTTGTATTGGATAAGGTGCCGGAA\n+GATGCGGAATTCACAATTGATGGGACAACAGCCTCCATTACTCCGTATACGGAAACTCGT\n+GCCGACCTTCAAGGGAAAAAGTTCTTGGTACGCAATTCTTGGAAAAAGCCGGGTACTAAG\n+ACGGAATATATGTACCTGAATATGCACAGCAAGAGGCACCCGGACTACTACAGTGGCCGG\n+TTGGTAATTGTTCAGTCCAATGCAACGACCTTCAAGGGAGAATTGTATAATCATCTTACA\n+AACACTCCTCCCTATAACATTCATTTCAATAAATATGTCGACAGTGCTTTAGGCCAGATA\n+GAGGTACCT'..b'CGCTTTTCCTATAGGGGGAATTGGTGCAGGTATGTATTGT\n+CTGGAAGGAACAGGCTATATATCTCATATGTCAGTATGGCATCGACCGGAAGTTTTTCAT\n+GAACCGGGAATGTTTGCTGCTCTGTATGTGAAGGGGGTATGTAATGGGGCTAAGGTACTT\n+GAAGGACCTGTATCTGATTGGAGAAAATTTGGAATGCCCAATTACGGTACAGGAGGCAGT\n+ATGGGATCAATATTAGGACTTCCCCGTTTTGATACGGTTGAATTTGAAGCACGTTTTCCG\n+TTTGCCAAAGTTTCATTAACAGATAAAGATATCCCCGTTAAGGTAACCATTTTGGGGTGG\n+AGTCCTTTTATTCCGGGTGATCCGGATAATTCCAGTTTACCGGTAGGAGGATTGGAATAT\n+AGTTTAGAGAATACTAGCAAAGAGGTTCAGGAAACTATTTTCTCCTACCATGCTCGTAAC\n+TTTCTGAGTTCGGGTAAAGGATTGGATGCTATAAAAACGATGCCTCATGGGTTTATCCTT\n+TCACAGTCAGGTACAGAAACGGAGCCTCATTTGCAGGGTGATTTTGCTATCTTTACGGAT\n+CAGGATTCTTTAAAGATCAATTATTGTTGGTTTCGTGGAGGATGGTTTGATAGTCTTACG\n+ATGGTTTGGAACGCTATTGAGACAGGTTTAATGCCACAATCTCCAGCAATAGAGAAAGGT\n+GCACCAGGAGCTTCTATGTTTGTCCCGGTAACATTGATGCCAGGGGAAAAGAAAACAATC\n+AGAATTTATACGGCGTGGTATGTTCCTAACTCGACTTTAAGGCTTGGAAAGGAACCGGAA\n+GACTGGAATGACAATAATGTCGACTCCGCAAGACTAGCTGTAGAAAAGGCAGATAAGGGT\n+AATTATAAACCTTGGTATAGTAGCCGCTTTACAGGAGTGAATGAGGTTATTGATTATTTT\n+CTGTCTCATTATAAGATTTTGCGCAATCAGACGGAAAGGTTCACAGACTCTTTTTATCGT\n+TCTACCTTACCGCCTGAAGTGATTGAAGCTGTTTCTGCTAACTTAAGTATTTTAAAGTCT\n+CCAACGGTGATGCGGCAATATGATGGACGCTTATGGACTTGGGAAGGATGTGCCGATAAT\n+TGGGGCTCGTGTCATGGCTCATGTACTCATGTCTGGAATTATGCACAAGCTATTCCACAC\n+TTATTTCCTTCCTTGGAACGTTCGTTAAGGCATACTGAATTTGAAGAAGGGCAAGATTTG\n+AAAGGCCATCAAGTGTTTCGTGCCAATTTACCGATTCGTCCTACTCGGCACGACTTTCAC\n+TCAGCTGCTGACGGGCAATTGGGAGGTATAATGAAAGTATATCGTGAGTGGCGAATTTCA\n+GGCGATAATGAGTTCCTTATCTCTATGTATCCAAAAGTAAAAAAGAGTTTGGACTACTGT\n+ATCTCAACTTGGGATCCTCGTAGGGTAGGAAGTATTGAAGAACCACACCATAATACTTAT\n+GATATTGAGTTCTGGGGGCCGGATGGTATGCATAATAGTTTCTATTATGGAGCTTTATCA\n+GCTTTCATTCGTATGAGTGAGTTTCTTGATAAAGACGTTACTGAATATAAGAAACTATTG\n+AAAAAAGGACGAAAATTTACTGAAACAGGCTTGTTTAATGGTGAGTATTTTATCCAAAAG\n+ATAGAGTGGAGGGGATTGAATGCAAAAGATCCGACTGTTGCACAAAGTTTCCATAGTTCT\n+TATTCTCCCGAAGCGAAAGAAATACTGGAGAAGGAAGGTCCTAAATATCAGTATGGGAAC\n+GGTTGTCTGTCTGATGGAGTTTTGGGGTCGTGGCTCTCCCGAATGTGTGGGATGGAAGAA\n+ACTCTGAATACAGAAAAAGTGAAAAGCCATTTATTATCAGTACATCGATATAATTTTAAA\n+AAAGATCTGACTGATCACGCCAATCCTCAACGTTCCCCTTACGCTTTAGGCAAAGAAGGA\n+GGCTTATTATTAGGAAGTTGGCCTAAAGGGAGCAAGTTGTCATTGCCTTTTGTCTATAGT\n+AATGAAGTCTGGACAGGAATAGAATATCAAGTAGCTTCACATTTGATGCTACAGGGAGAG\n+GTAGAAAAAGGCCTTGAGATCGTGCGTGCCTGTAGACAACGTTATGATGGAAGTGTCCGT\n+AACCCTTTTAATGAGTATGAGTGTGGACATTGGTACGGACGAGCGTTATCTAGTTATGGC\n+TTACTTCAAGGATTGACAGGAGTCCGATATGATGCTGTAGATAAAACACTGTATATTAAT\n+TCAAAAATAGGAGACTTTATTAGCTTTATCAGTACGGAGTCTGGATTTGGTAATGTAGAA\n+CTTCGTTCCGGAAAACCTTTTGTAAAAGTAGTATCCGGTCATATAGAGGTCGACAGATTT\n+ATCGTATCAGGGAAAGTAGTTGAATAA\n+>gi|298386634|ref|NZ_GG774708.1|:c143839-142661\n+ATGGAGCAAAAGAAAATCGTGCTATTCATCCTGATCATTCATCTGGCAGCATTTCTAGCA\n+GGATGCAGCGGAAACAAAAATAGCGGAAATAATGACTCCTCTGATCTATGGAATAAGTTA\n+TCGAGCTACTTCCGCACGCCCGCAGAATACGAAAATGTATATGGGAATTTTCGCTCTCCT\n+TTATTATATTATAATGGGGATACAGTCAGAACCGTTGAAGACTGGCAAAGACGACGAACT\n+GAAATCAAGGACAGATGGATGAGCCTGTTAGGACAATGGCCGCCTGTCATTACCGGACAA\n+ACATTTGAAATTCTGGATACTCTCCACCGTGAAAACTTCATGCAATACCGTGTCCGCTTC\n+TACTGGACTCCCAACGAACAGACTGAAGGTTATTTGCTGGTTCCGGACAAAGAAGGCAAA\n+AAGCCTGCCGTTATCACTACCTTTTATGAACCCGAAACGGCTATCGGATTGGGTGGAAAA\n+CCTTATAGAGACTTTGCATATCAATTGACGAAAAGAGGATTTGTCACATTATCAATAGGA\n+ACAACGAAGACCACAGAGAATCAGACATATTCCATTTATTACCCCAGCATTGAAAATGCA\n+ACTCTCCAACCACTTTCAGCATTAGCTTATGCAGCCGCAAATGCATGGGAAGTATTAGCC\n+AAAGTACAGGACGTCGATTCTACAAGAATAGGCATCACAGGGCATTCTTATGGTGGGAAG\n+TGGGCAATGTTTGCCTCATGCCTATACGAAAAGTTCGCTTGTGCGGCATGGGGAGACCCC\n+GGAATTGTATTCGACGAAACAAAAGAGGGATATATCAATTACTGGGAACCCTGGTATTTG\n+GGATATTATCCGCCACCATGGGAAAATACATGGAGTAAAAATGGGCATGATTATGCTAAA\n+GGCATTTATCCGAAGCTCCGCAAAGAAGGATATGATTTGCATGAATTGCATGCGCTGATG\n+GCACCTCGCCCATTTCTTGTTTCCGGAGGATACTCTGACGGAACAGACCGGTGGATAGCG\n+CTAAACCATACAATAGCGGTCAACCGGCTCTTAGGATACCGCAATAATGTCGCGATGAGC\n+AACAGAGTCAATCATGACCCAACCCCTGAATCAAACGAAATTATATATGATTTTTTTAAA\n+TGGTACTTGCATTCAGCAAATAAATCTACCAAAGAGTAG\n+>gi|223955875|ref|NZ_DS499676.1|:c386522-386337\n+TTGGCTTTTAGTTCTGTCCAGTTTTCTTATATTTTTGTAGAAAAAGTATGGGACAGGTAT\n+ATAAAAACAGCCTGGGTTTACGTATTCAACGTAATCCCAGGCTGTTTTTACGGGTATTTG\n+CGGGGGCTTGTTTACCGATTGTTTTTTATTCCGCCAAAGTCGCCCTTCTACTTCAGTCCG\n+AAATAA\n'
b
diff -r 000000000000 -r 3b469e230e82 test-data/merged_community_profile.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merged_community_profile.tabular Sat Mar 04 12:23:25 2017 -0500
b
@@ -0,0 +1,11 @@
+ID community_profile community_profile
+k__Bacteria 100.0 100.0
+k__Bacteria|p__Bacteroidetes 100.0 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia 100.0 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales 100.0 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae 100.0 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides 100.0 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris 68.05418 68.05418
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris|t__Bacteroides_stercoris_unclassified 68.05418 68.05418
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron 31.94582 31.94582
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron|t__Bacteroides_thetaiotaomicron_unclassified 31.94582 31.94582
b
diff -r 000000000000 -r 3b469e230e82 tool-data/metaphlan2_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metaphlan2_database.loc.sample Sat Mar 04 12:23:25 2017 -0500
b
@@ -0,0 +1,4 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+#02_16_2014  MetaPhlAn2 clade-specific marker genes db_v20 /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r 3b469e230e82 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Sat Mar 04 12:23:25 2017 -0500
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="metaphlan2_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/metaphlan2_database.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 3b469e230e82 transform_json_to_pkl.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/transform_json_to_pkl.py Sat Mar 04 12:23:25 2017 -0500
[
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+import bz2
+import cPickle as pickle
+import json
+
+
+def transform_json_to_pkl(args):
+    with open(args.json_input, 'r') as json_file:
+        json_str = json_file.read()
+        metadata = json.loads(json_str)
+
+        for marker in metadata["markers"]:
+            a_set = set(metadata["markers"][marker]["ext"])
+            metadata["markers"][marker]["ext"] = a_set
+
+    pkl_output = bz2.BZ2File(args.pkl_output, 'w')
+    pickle.dump(metadata, pkl_output, pickle.HIGHEST_PROTOCOL)
+    pkl_output.close()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--json_input', required=True)
+    parser.add_argument('--pkl_output', required=True)
+    args = parser.parse_args()
+
+    transform_json_to_pkl(args)