Repository 'recentrifuge'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/recentrifuge

Changeset 0:09b7b0b2e2c2 (2022-06-27)
Next changeset 1:fe733f05c2f8 (2022-08-26)
Commit message:
planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/recentrifuge commit fdcec50b71967011e4351eb347a9df2840be6bee
added:
macro.xml
recentrifuge.xml
test-data/centrifuge_test/centrifuge.out
test-data/centrifuge_test/test2_csv.log
test-data/centrifuge_test/test2_csv.rcf.data.csv
test-data/centrifuge_test/test2_csv.rcf.stat.csv
test-data/kraken_test/kraken.out
test-data/kraken_test/test1_csv.log
test-data/kraken_test/test1_csv.rcf.data.csv
test-data/kraken_test/test1_csv.rcf.html
test-data/kraken_test/test1_csv.rcf.stat.csv
test-data/kraken_test/test3_rcf.data.tsv
test-data/kraken_test/test3_rcf.stat.tsv
test-data/kraken_test/test3_tsv.log
test-data/ncbi_taxonomy.loc
test-data/test-db/delnodes.dmp
test-data/test-db/division.dmp
test-data/test-db/gc.prt
test-data/test-db/gencode.dmp
test-data/test-db/merged.dmp
test-data/test-db/names.dmp
test-data/test-db/nodes.dmp
test-data/test-db/readme.txt
tool-data/ncbi_taxonomy.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 09b7b0b2e2c2 macro.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macro.xml Mon Jun 27 11:03:22 2022 +0000
[
@@ -0,0 +1,33 @@
+<?xml version="1.0"?>
+<macros>
+  <token name="@TOOL_VERSION@">1.9.1</token>
+  <token name="@VERSION_SUFFIX@">0</token>
+  <token name="@PROFILE@">21.05</token>
+  <xml name="version_command">
+     <version_command><![CDATA[rcf -V]]></version_command>
+  </xml>
+  <xml name="xrefs">
+    <xrefs>
+      <xref type='bio.tools'>Recentrifuge</xref>
+    </xrefs>
+  </xml>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="@TOOL_VERSION@">recentrifuge</requirement>
+    </requirements>
+  </xml>
+  <xml name="input_database">
+    <section name="database" title="Database type" expanded="true">
+      <param name="cached_db" label="Cached database with clade-specific marker genes" type="select">
+        <options from_data_table="recentrifuge_database">
+          <validator message="No recentrifuge database is available" type="no_options" />
+        </options>
+      </param>
+    </section>
+  </xml>
+  <xml name="citations">
+      <citations>
+          <citation type="doi">10.1371/journal.pcbi.1006967</citation>
+      </citations>
+  </xml>
+</macros>
b
diff -r 000000000000 -r 09b7b0b2e2c2 recentrifuge.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/recentrifuge.xml Mon Jun 27 11:03:22 2022 +0000
[
b'@@ -0,0 +1,349 @@\n+<?xml version="1.0" encoding="UTF-8"?>\n+\n+<tool id="recentrifuge" name="Recentrifuge" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+    <description>\n+        Robust comparative analysis and contamination removal for metagenomics\n+    </description>\n+    <macros>\n+        <import>macro.xml</import>\n+    </macros>\n+    <expand macro=\'xrefs\'/>\n+    <expand macro="requirements" />\n+    <expand macro="version_command" />\n+    <command detect_errors="aggressive"><![CDATA[\n+        #*======================================\n+                  database input\n+        ======================================*#\n+        rcf\n+        -n $database.database_name.fields.path\n+        #*======================================\n+                  Recentrifuge input file\n+        ======================================*#\n+        #if $file_type.filetype == "centrifuge"\n+            -f \'$input_file\'\n+            #set $default_scoring = \'SHEL\'\n+        #else if $file_type.filetype == "lmat"\n+            -l \'$input_file\'\n+            #set $default_scoring = \'LMAT\'\n+        #else if $file_type.filetype == "clark"\n+            -r \'$input_file\'\n+            #set $default_scoring = \'SHEL\'\n+        #else if $file_type.filetype == "kraken"\n+            -k \'$input_file\'\n+            #set $default_scoring = \'KRAKEN\'\n+        #else if $file_type.filetype == "generic"\n+            -g \'$input_file\'\n+            --format \'$file_type.format\'\n+            #set $default_scoring = \'GENERIC\'\n+        #end if\n+        #*======================================\n+                  Output options\n+        ======================================*#\n+        -e $output_option.extra\n+        -o output\n+        $output_option.nohtml\n+        #*======================================\n+                  Advanced options\n+        ======================================*#\n+        #if $advanced_option.controls\n+            --controls \'$advanced_option.controls\'\n+        #end if\n+        #if $advanced_option.scoring\n+            --scoring \'$advanced_option.scoring\'\n+        #else\n+            --scoring \'$default_scoring\'\n+        #end if\n+        #if $advanced_option.minscore_value\n+            --minscore \'$advanced_option.minscore_value\'\n+        #end if\n+        #if $advanced_option.mintaxa\n+            --mintaxa \'$advanced_option.mintaxa\'\n+        #end if\n+        #if $advanced_option.exclude_taxa_name\n+            --exclude \'$advanced_option.exclude_taxa_name\'\n+        #end if\n+        #if $advanced_option.include_taxa_name\n+            --include \'$advanced_option.include_taxa_name\'\n+        #end if\n+        $advanced_option.avoidcross\n+        #*======================================\n+                  More advanced options\n+        ======================================*#\n+        #if $more_advanced_option.ctrlminscore\n+            --ctrlminscore \'$more_advanced_option.ctrlminscore\'\n+        #end if\n+        #if $more_advanced_option.ctrlmintaxa\n+            --ctrlmintaxa \'$more_advanced_option.ctrlmintaxa\'\n+        #end if\n+            --summary $more_advanced_option.summary\n+        $more_advanced_option.takeoutroot\n+        $more_advanced_option.nokollapse\n+        $more_advanced_option.strain\n+        $more_advanced_option.sequential\n+        #*======================================\n+                  Log file output\n+        ======================================*#\n+        &> $logfile\n+        ]]>\n+    </command>\n+    <inputs>\n+        <!-- INPUT FILES -->\n+        <param name="input_file" type="data" format="tabular" label="Select taxonomy file tabular formated"/>\n+        <conditional name="file_type">\n+            <param name="filetype" type="select" display="radio" label="Type of input file (Centrifuge, CLARK, Generic, Kraken, LMAT)" help="(-f, -r, -g, -k, -l)">\n+                <option value="centrifuge">Centrifuge</option>\n+                <option value="clark">CLARK</option>\n+                <option value="generic">Generic</o'..b'format need a string like : \'TYP:csv,TID:1,LEN:3,SCO:6,UNC:0\'.\n+Where TYP are csv/tsv/ssv, and the rest of fields indicate the number of column used (starting in 1)\n+for the TaxIDs assigned,the LENgth of the read, the SCOre given to the assignment"\n+\n+\n+**Database for recentrifuge**\n+  Recentrifuge first need the taxonomic database from NCBI (nodes.dmp and names.dmp).\n+  We also provide the option to directly load necessary files from history as a dataset list.\n+  1. cached for already installed taxonomic databases\n+  2. history to load from your history\n+\n+\n+**Output options**\n+  1. Depending of the option provided, the file output format can be csv, tsv or xlsx and be combine in one or more files (extra).\n+  3. By default a html file is generated to visualize data, could be remove using the nohtml option\n+\n+\n+**Advanced options**\n+  1. Recentrifuge can integrate sample in the data which are negative control to normalize the data\n+  2. Scoring is an option to choose the score method for the read classified by taxonomic tools :\n+     SHEL (Single Hit Equivalent Length): This is a score value in pair bases roughly equivalent to a single hit to the database.\n+     KRAKEN: This scoring scheme is only available for this classifier. It divides the k-mer hit count of the top assignment by the total k-mers in the read and multiplies the result by 100 to give a percentage of coverage (the fraction of the read k-mers covered by k-mers belonging to the read final assignment). This is the default scoring scheme for Kraken samples, and it supports the mixing of samples with different read length.\n+     LENGTH: The score of a read will be its length (or the combined length of mate pairs).\n+     LOGLENGTH: Logarithm (base 10) of the length score.\n+     NORMA: This score is the normalized score SHEL / LENGTH in percentage, so it takes into account both the assignment quality and the length of the read. Very useful when both the score assignments and lengths are variable among the reads.\n+     LMAT: This scoring scheme is only available for this classifier.\n+     CLARK_C: This scoring scheme is not available for other classifiers. It takes the confidence score as the score for a read, conf=h1/(h1+h2), or 1-conf=h2/(h1+h2) in case the majority of a read is not classified (1st assignment unclassified). See CLARK\'s README file for details on how h1 and h2 are calculated. If you use this scoring, you will probably want to filter to a minimum of 0.5 (-y 0.5) or beyond, as under 0.5 the assignments have very low confidence.\n+     CLARK_G: This scheme scores every read with its CLARK gamma score, so it is only available for this classifier.\n+  3. You can choose a filter for read quality using the minscore option (--minscore)\n+  4. You can include or exclude specific taxa using the NCBI taxid code\n+\n+\n+**More advanced options**\n+  1. You can choose a filter for read quality specifically on the control samples\n+  2. You cans specify the minimum taxa value to avoid collapsing one level into parent\n+  3. A summary option is available produce a summary file\n+     Some other options are available and explicite in the more advanced panel of the tool\n+\n+\n+rcf - Release 1.8.1 - Mar 2022\n+\n+  Copyright (C) 2017\xe2\x80\x932022, Jose Manuel Mart\xc3\xad Mart\xc3\xadnez\n+\n+  This program is free software: you can redistribute it and/or modify\n+  it under the terms of the GNU Affero General Public License as\n+  published by the Free Software Foundation, either version 3 of the\n+  License, or (at your option) any later version.\n+\n+  This program is distributed in the hope that it will be useful,\n+  but WITHOUT ANY WARRANTY; without even the implied warranty of\n+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n+  GNU Affero General Public License for more details.\n+\n+  You should have received a copy of the GNU Affero General Public License\n+  along with this program.  If not, see <https://www.gnu.org/licenses/>.\n+  ]]></help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/centrifuge_test/centrifuge.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/centrifuge_test/centrifuge.out Mon Jun 27 11:03:22 2022 +0000
b
b'@@ -0,0 +1,11000 @@\n+readID\tseqID\ttaxID\tscore\t2ndBestScore\thitLength\tqueryLength\tnumMatches\n+M03963:301:000000000-CT5Y5:1:1101:10606:1000\tunclassified\t0\t0\t0\t0\t486\t1\n+M03963:301:000000000-CT5Y5:1:1101:17509:1002\tNZ_KI973153.1\t208224\t38416\t38416\t211\t513\t2\n+M03963:301:000000000-CT5Y5:1:1101:17509:1002\tNZ_AP022126.1\t208224\t38416\t38416\t211\t513\t2\n+M03963:301:000000000-CT5Y5:1:1101:16741:1003\tNZ_AP022126.1\t208224\t10513\t10513\t175\t176\t3\n+M03963:301:000000000-CT5Y5:1:1101:16741:1003\tNZ_CP082147.1\t208224\t10513\t10513\t175\t176\t3\n+M03963:301:000000000-CT5Y5:1:1101:16741:1003\tNZ_KI973153.1\t208224\t10513\t10513\t175\t176\t3\n+M03963:301:000000000-CT5Y5:1:1101:20657:1003\tNZ_CP082147.1\t208224\t60516\t0\t261\t602\t1\n+M03963:301:000000000-CT5Y5:1:1101:11961:1003\tunclassified\t0\t0\t0\t0\t192\t1\n+M03963:301:000000000-CT5Y5:1:1101:17530:1006\tNZ_CP082147.1\t208224\t31329\t0\t192\t494\t1\n+M03963:301:000000000-CT5Y5:1:1101:11134:1006\tNZ_KI973153.1\t208224\t81225\t81225\t300\t602\t3\n+M03963:301:000000000-CT5Y5:1:1101:11134:1006\tNZ_CP082147.1\t208224\t81225\t81225\t300\t602\t3\n+M03963:301:000000000-CT5Y5:1:1101:11134:1006\tNZ_AP022126.1\t208224\t81225\t81225\t300\t602\t3\n+M03963:301:000000000-CT5Y5:1:1101:16099:1007\tNZ_AP022126.1\t208224\t55585\t55585\t299\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:16099:1007\tNZ_KI973153.1\t208224\t55585\t55585\t299\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:14350:1007\tNZ_KI973153.1\t208224\t28132\t28132\t220\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:14350:1007\tNZ_AP022126.1\t208224\t28132\t28132\t220\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:14857:1012\tspecies\t208224\t1741\t1741\t89\t90\t5\n+M03963:301:000000000-CT5Y5:1:1101:14857:1012\tNZ_CP050966.1\t158484\t1741\t1741\t89\t90\t5\n+M03963:301:000000000-CT5Y5:1:1101:14857:1012\tNZ_LR134485.1\t133448\t1741\t1741\t89\t90\t5\n+M03963:301:000000000-CT5Y5:1:1101:14857:1012\tNZ_CP044098.1\t1639133\t1741\t1741\t89\t90\t5\n+M03963:301:000000000-CT5Y5:1:1101:14857:1012\tNZ_CP073048.1\t1639133\t1741\t1741\t89\t90\t5\n+M03963:301:000000000-CT5Y5:1:1101:16190:1013\tNZ_AP022126.1\t208224\t15665\t0\t207\t208\t1\n+M03963:301:000000000-CT5Y5:1:1101:9353:1014\tNZ_KI973153.1\t208224\t10225\t10225\t173\t174\t3\n+M03963:301:000000000-CT5Y5:1:1101:9353:1014\tNZ_AP022126.1\t208224\t10225\t10225\t173\t174\t3\n+M03963:301:000000000-CT5Y5:1:1101:9353:1014\tNZ_CP082147.1\t208224\t10225\t10225\t173\t174\t3\n+M03963:301:000000000-CT5Y5:1:1101:17080:1007\tgenus\t547\t56169\t0\t252\t554\t1\n+M03963:301:000000000-CT5Y5:1:1101:12530:1014\tNZ_KI973153.1\t208224\t15440\t15440\t202\t505\t2\n+M03963:301:000000000-CT5Y5:1:1101:12530:1014\tNZ_AP022126.1\t208224\t15440\t15440\t202\t505\t2\n+M03963:301:000000000-CT5Y5:1:1101:14081:1015\tNZ_KI973153.1\t208224\t21013\t21013\t235\t236\t3\n+M03963:301:000000000-CT5Y5:1:1101:14081:1015\tNZ_CP082147.1\t208224\t21013\t21013\t235\t236\t3\n+M03963:301:000000000-CT5Y5:1:1101:14081:1015\tNZ_AP022126.1\t208224\t21013\t21013\t235\t236\t3\n+M03963:301:000000000-CT5Y5:1:1101:10572:1018\tNZ_CP082147.1\t208224\t23741\t3136\t298\t602\t1\n+M03963:301:000000000-CT5Y5:1:1101:21652:1019\tNZ_AP022126.1\t208224\t50625\t50625\t240\t548\t2\n+M03963:301:000000000-CT5Y5:1:1101:21652:1019\tNZ_KI973153.1\t208224\t50625\t50625\t240\t548\t2\n+M03963:301:000000000-CT5Y5:1:1101:15156:1022\tNZ_CP082147.1\t208224\t11552\t11552\t182\t485\t3\n+M03963:301:000000000-CT5Y5:1:1101:15156:1022\tNZ_AP022126.1\t208224\t11552\t11552\t182\t485\t3\n+M03963:301:000000000-CT5Y5:1:1101:15156:1022\tNZ_KI973153.1\t208224\t11552\t11552\t182\t485\t3\n+M03963:301:000000000-CT5Y5:1:1101:19202:1023\tNZ_AP022126.1\t208224\t24336\t24336\t171\t473\t3\n+M03963:301:000000000-CT5Y5:1:1101:19202:1023\tNZ_CP082147.1\t208224\t24336\t24336\t171\t473\t3\n+M03963:301:000000000-CT5Y5:1:1101:19202:1023\tNZ_KI973153.1\t208224\t24336\t24336\t171\t473\t3\n+M03963:301:000000000-CT5Y5:1:1101:22318:1029\tunclassified\t0\t0\t0\t0\t601\t1\n+M03963:301:000000000-CT5Y5:1:1101:14070:1030\tNZ_CP082147.1\t208224\t36389\t0\t237\t540\t1\n+M03963:301:000000000-CT5Y5:1:1101:20703:1032\tNZ_AP022126.1\t208224\t4500\t4500\t192\t206\t3\n+M03963:301:000000000-CT5Y5:1:1101:20703:1032\tNZ_CP082147.1\t208224\t4500\t4500\t192\t206\t3\n+M03963:301:000000000-CT5Y5:1:1101:20703:1032\tNZ_KI973153.1\t208224\t4500\t4500\t192\t206\t3\n+M03963:301:000'..b'00000000-CT5Y5:1:1101:24054:5307\tNZ_CP022525.1\t2908851\t123412\t0\t520\t602\t1\n+M03963:301:000000000-CT5Y5:1:1101:24773:5318\tfamily\t543\t20000\t20000\t230\t230\t3\n+M03963:301:000000000-CT5Y5:1:1101:24773:5318\tgenus\t2100764\t20000\t20000\t230\t230\t3\n+M03963:301:000000000-CT5Y5:1:1101:24773:5318\tgenus\t613\t20000\t20000\t230\t230\t3\n+M03963:301:000000000-CT5Y5:1:1101:9413:5315\tNZ_KI973153.1\t208224\t100990\t100990\t546\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:9413:5315\tNZ_AP022126.1\t208224\t100990\t100990\t546\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:6038:5318\tfamily\t543\t18818\t18818\t224\t224\t3\n+M03963:301:000000000-CT5Y5:1:1101:6038:5318\tgenus\t613\t18818\t18818\t224\t224\t3\n+M03963:301:000000000-CT5Y5:1:1101:6038:5318\tgenus\t2100764\t18818\t18818\t224\t224\t3\n+M03963:301:000000000-CT5Y5:1:1101:21823:5318\tNZ_CP082147.1\t208224\t70688\t0\t406\t406\t1\n+M03963:301:000000000-CT5Y5:1:1101:24209:5321\tNZ_CP011863.1\t61645\t13070\t4045\t219\t220\t1\n+M03963:301:000000000-CT5Y5:1:1101:15800:5318\tNZ_AP022126.1\t208224\t49834\t49834\t454\t456\t2\n+M03963:301:000000000-CT5Y5:1:1101:15800:5318\tNZ_KI973153.1\t208224\t49834\t49834\t454\t456\t2\n+M03963:301:000000000-CT5Y5:1:1101:5824:5316\tNZ_AP022126.1\t208224\t56455\t0\t469\t601\t1\n+M03963:301:000000000-CT5Y5:1:1101:16336:5322\tNZ_KI973153.1\t208224\t69938\t69938\t404\t404\t3\n+M03963:301:000000000-CT5Y5:1:1101:16336:5322\tNZ_AP022126.1\t208224\t69938\t69938\t404\t404\t3\n+M03963:301:000000000-CT5Y5:1:1101:16336:5322\tNZ_CP082147.1\t208224\t69938\t69938\t404\t404\t3\n+M03963:301:000000000-CT5Y5:1:1101:18232:5323\tNZ_AP022126.1\t208224\t9522\t9522\t168\t168\t3\n+M03963:301:000000000-CT5Y5:1:1101:18232:5323\tNZ_KI973153.1\t208224\t9522\t9522\t168\t168\t3\n+M03963:301:000000000-CT5Y5:1:1101:18232:5323\tNZ_CP082147.1\t208224\t9522\t9522\t168\t168\t3\n+M03963:301:000000000-CT5Y5:1:1101:10058:5318\tNZ_AP022126.1\t208224\t37421\t29988\t442\t602\t1\n+M03963:301:000000000-CT5Y5:1:1101:4834:5320\tNZ_CP082147.1\t208224\t65536\t3349\t271\t602\t1\n+M03963:301:000000000-CT5Y5:1:1101:10449:5326\tNZ_AP022126.1\t208224\t14600\t14600\t296\t298\t3\n+M03963:301:000000000-CT5Y5:1:1101:10449:5326\tNZ_CP082147.1\t208224\t14600\t14600\t296\t298\t3\n+M03963:301:000000000-CT5Y5:1:1101:10449:5326\tNZ_KI973153.1\t208224\t14600\t14600\t296\t298\t3\n+M03963:301:000000000-CT5Y5:1:1101:24457:5327\tunclassified\t0\t0\t0\t0\t362\t1\n+M03963:301:000000000-CT5Y5:1:1101:23515:5334\tNZ_AP022126.1\t208224\t7938\t7938\t156\t156\t2\n+M03963:301:000000000-CT5Y5:1:1101:23515:5334\tNZ_KI973153.1\t208224\t7938\t7938\t156\t156\t2\n+M03963:301:000000000-CT5Y5:1:1101:8587:5327\tNZ_AP022126.1\t208224\t59226\t59226\t505\t566\t2\n+M03963:301:000000000-CT5Y5:1:1101:8587:5327\tNZ_KI973153.1\t208224\t59226\t59226\t505\t566\t2\n+M03963:301:000000000-CT5Y5:1:1101:17592:5335\tNZ_KI973153.1\t208224\t38642\t38642\t308\t308\t3\n+M03963:301:000000000-CT5Y5:1:1101:17592:5335\tNZ_AP022126.1\t208224\t38642\t38642\t308\t308\t3\n+M03963:301:000000000-CT5Y5:1:1101:17592:5335\tNZ_CP082147.1\t208224\t38642\t38642\t308\t308\t3\n+M03963:301:000000000-CT5Y5:1:1101:19382:5328\tNZ_AP022126.1\t208224\t146312\t146312\t570\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:19382:5328\tNZ_CP082147.1\t208224\t146312\t146312\t570\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:18774:5334\tNZ_KI973153.1\t208224\t119538\t119538\t575\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:18774:5334\tNZ_AP022126.1\t208224\t119538\t119538\t575\t602\t2\n+M03963:301:000000000-CT5Y5:1:1101:7082:5336\tNZ_CP082147.1\t208224\t9090\t7586\t156\t300\t1\n+M03963:301:000000000-CT5Y5:1:1101:18565:5334\tunclassified\t0\t0\t0\t0\t602\t1\n+M03963:301:000000000-CT5Y5:1:1101:5483:5337\tunclassified\t0\t0\t0\t0\t238\t1\n+M03963:301:000000000-CT5Y5:1:1101:20275:5333\tNZ_CP082147.1\t208224\t47075\t0\t492\t600\t1\n+M03963:301:000000000-CT5Y5:1:1101:19208:5335\tgenus\t642\t57122\t57122\t368\t368\t4\n+M03963:301:000000000-CT5Y5:1:1101:19208:5335\tgenus\t2100764\t57122\t57122\t368\t368\t4\n+M03963:301:000000000-CT5Y5:1:1101:19208:5335\tgenus\t613\t57122\t57122\t368\t368\t4\n+M03963:301:000000000-CT5Y5:1:1101:19208:5335\tfamily\t543\t57122\t57122\t368\t368\t4\n+M03963:301:000000000-CT5Y5:1:1101:19048:5331\tNZ_KI973153.1\t208224\t81796\t81796\t301\t602\t3\n+M03963:301:000000000-CT5Y5:1:1101:19048:5331\tNZ_AP022126.1\t208224\t81796\t81796\t301\t602\t3\n'
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/centrifuge_test/test2_csv.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/centrifuge_test/test2_csv.log Mon Jun 27 11:03:22 2022 +0000
[
@@ -0,0 +1,28 @@
+
+=-= /home/pierre/anaconda3/envs/rcf/bin/rcf =-= v1.8.1 - Mar 2022 =-= by Jose Manuel Martí =-=
+
+Loading NCBI nodes... OK! 
+Loading NCBI names... OK! 
+Building dict of parent to children taxa... OK! 
+
+Please, wait, processing files in parallel...
+
+Loading output file centrifuge.out... OK!
+  Seqs read: 10_999 [4.22 Mnt]
+  Seqs clas: 10_354 (5.86% unclassified)
+  Seqs pass: 10_354 (0.00% rejected)
+  Scores: min = 23.0, max = 400.1, avr = 190.7
+  Length: min = 70 nt, max = 602 nt, avr = 387 nt
+  TaxIds: by classifier = 215, by filter = 215
+Building from raw data with mintaxa = 4 ... 
+  Check for more seqs lost ([in/ex]clude affects)... 
+  Info: 10279 additional seqs discarded (99.276% of accepted)
+
+  Warning! 210 orphan taxids (rerun with --debug for details)
+centrifuge sample OK!
+Load elapsed time: 0.0305 sec
+
+
+Building the taxonomy multiple tree... OK!
+Generating csv extra output ([test2_csv.rcf.]*.csv)... OK!
+Total elapsed time: 00:00:00
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/centrifuge_test/test2_csv.rcf.data.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/centrifuge_test/test2_csv.rcf.data.csv Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,11 @@
+Samples,centrifuge,centrifuge,centrifuge,Details,Details
+Stats,count,unassigned,score,Rank,Name
+Id,,,,,
+1,75,0,51.07625068955332,no_rank,root
+2,75,0,51.07625068955332,superkingdom,Bacteria
+1224,75,0,51.07625068955332,phylum,Proteobacteria
+1236,75,2,51.07625068955332,class,Gammaproteobacteria
+91347,73,6,51.061174675863136,order,Enterobacteriales
+543,67,32,53.07536131266087,family,Enterobacteriaceae
+561,35,4,49.49719057816962,genus,Escherichia
+562,31,31,47.36655707970506,species,Escherichia coli
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/centrifuge_test/test2_csv.rcf.stat.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/centrifuge_test/test2_csv.rcf.stat.csv Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,16 @@
+,centrifuge
+Seqs. read,10999.0
+Seqs. unclass.,645.0
+Seqs. class.,10354.0
+Seqs. filtered,10354.0
+Score min,23.0
+Score mean,190.73954464627033
+Score max,400.1051804377604
+Length min,70.0
+Length mean,387.0
+Length max,602.0
+Total nt read,4218210.0
+TIDs class.,215.0
+TIDs filtered,215.0
+TIDs folded,5.0
+Score limit,1.0
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/kraken_test/kraken.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_test/kraken.out Mon Jun 27 11:03:22 2022 +0000
b
b'@@ -0,0 +1,100 @@\n+C\tM03963:215:000000000-C292Y:1:1101:14399:1896\tEnterobacter cloacae (taxid 550)\t193|300\t543:3 547:1 543:4 547:11 543:5 550:3 0:66 354276:5 0:17 543:2 91347:5 547:9 543:5 547:4 91347:4 547:9 91347:2 547:4 |:| 0:213 9606:3 0:50\n+C\tM03963:215:000000000-C292Y:1:1101:21024:1897\tEnterobacteriaceae (taxid 543)\t208|300\t543:6 0:5 543:2 0:9 1:1 0:16 543:36 1:3 543:1 0:9 1:5 0:21 1:2 0:3 1:1 0:4 1:36 543:4 0:10 |:| 0:223 9606:5 0:1 9606:5 0:6 9606:5 0:9 9606:3 0:9\n+C\tM03963:215:000000000-C292Y:1:1101:13613:1913\tEnterobacter cloacae complex (taxid 354276)\t259|300\t543:9 91347:7 543:3 547:14 354276:11 543:3 354276:6 547:2 354276:6 543:4 547:22 543:5 547:9 0:32 547:31 354276:7 547:5 354276:1 547:5 0:8 547:1 0:7 547:5 0:12 543:1 0:7 543:1 0:1 |:| 0:9 547:12 0:1 547:7 0:199 9606:1 0:37\n+C\tM03963:215:000000000-C292Y:1:1101:23661:1934\tEnterobacter cloacae complex (taxid 354276)\t299|300\t0:11 354276:1 0:5 354276:1 0:5 547:34 354276:20 547:9 354276:15 0:5 547:1 0:13 547:2 0:10 547:17 354276:17 547:5 354276:20 547:1 0:10 354276:3 0:60 |:| 0:39 9606:1 0:5 9606:1 0:63 9606:1 0:33 9606:5 0:7 1379:5 0:40 807:5 0:32 9606:4 0:25\n+C\tM03963:215:000000000-C292Y:1:1101:12218:1949\tEscherichia coli (taxid 562)\t128|128\t1:8 562:11 1:5 562:1 1:5 562:5 1:29 1236:23 1:7 |:| 1:2 0:5 1236:4 2562449:1 1236:5 2:1 0:6 1236:3 0:19 1:13 0:10 1236:1 0:24\n+C\tM03963:215:000000000-C292Y:1:1101:18165:1952\tEnterobacter (taxid 547)\t221|300\t547:5 543:13 547:54 543:1 0:1 547:5 0:27 547:4 0:11 543:4 0:18 91347:3 543:12 91347:1 543:19 547:7 543:2 |:| 0:248 9606:5 0:13\n+C\tM03963:215:000000000-C292Y:1:1101:12867:1959\tEnterobacter cloacae complex (taxid 354276)\t299|300\t543:5 91347:2 543:5 547:24 543:1 547:3 543:6 547:6 543:4 547:22 354276:5 547:2 354276:2 547:25 543:4 547:69 0:1 547:9 0:70 |:| 0:266\n+C\tM03963:215:000000000-C292Y:1:1101:18362:1968\tEnterobacter (taxid 547)\t197|300\t547:56 543:5 547:48 0:14 547:1 0:1 547:2 0:10 547:26 |:| 0:6 547:1 0:44 547:2 0:140 2759:2 0:5 9606:1 0:57 9606:3 0:5\n+C\tM03963:215:000000000-C292Y:1:1101:22758:1973\tEnterobacter cloacae complex (taxid 354276)\t300|300\t543:1 547:14 354276:5 547:1 354276:3 543:4 547:6 354276:5 543:5 354276:3 547:6 543:5 547:5 91347:3 543:7 547:14 354276:15 547:3 543:13 547:27 543:7 91347:2 543:5 547:4 0:8 91347:6 547:2 0:5 543:3 0:79 |:| 0:266\n+C\tM03963:215:000000000-C292Y:1:1101:8446:1974\tEnterobacter (taxid 547)\t260|300\t547:11 543:4 547:5 91347:4 547:12 91347:3 547:17 543:6 91347:25 547:10 543:1 547:5 543:3 547:31 91347:7 547:11 543:3 547:6 91347:4 547:8 543:2 91347:14 543:21 547:5 0:4 547:2 0:2 |:| 0:266\n+C\tM03963:215:000000000-C292Y:1:1101:8672:1975\tEnterobacter cloacae complex (taxid 354276)\t216|300\t547:4 354276:9 547:3 354276:9 547:3 354276:7 547:20 543:9 2:5 547:25 543:2 547:9 543:1 547:19 91347:5 547:1 543:2 91347:6 543:43 |:| 543:2 0:193 9606:6 0:2 9606:6 0:4 9606:1 0:21 9606:1 0:30\n+C\tM03963:215:000000000-C292Y:1:1101:12735:1975\tEnterobacter sp. R4-368 (taxid 1166130)\t253|300\t547:1 0:1 547:4 0:106 543:5 547:4 0:7 547:1 0:3 547:5 0:82 |:| 0:22 1166130:5 543:2 0:235 9606:2\n+C\tM03963:215:000000000-C292Y:1:1101:14378:1976\tEnterobacter cloacae (taxid 550)\t162|162\t547:9 354276:15 0:6 550:1 0:7 354276:2 0:16 547:12 543:36 547:10 543:1 547:13 |:| 547:13 543:1 547:10 543:36 547:3 0:6 547:3 0:32 354276:15 547:9\n+C\tM03963:215:000000000-C292Y:1:1101:9860:1977\tHomo sapiens (taxid 9606)\t52|300\t0:18 |:| 0:79 9606:2 0:46 9606:5 0:2 9606:2 0:9 9606:21 0:8 9606:2 0:2 9606:7 0:1 9606:39 0:41\n+C\tM03963:215:000000000-C292Y:1:1101:14448:1979\tEnterobacter (taxid 547)\t300|300\t543:1 0:123 91347:1 0:47 547:5 0:3 543:5 0:5 543:1 547:5 0:70 |:| 0:195 9606:5 0:66\n+C\tM03963:215:000000000-C292Y:1:1101:14609:1981\tEnterobacter hormaechei subsp. steigerwaltii (taxid 299766)\t202|300\t158836:18 543:1 158836:19 547:5 158836:7 547:1 158836:16 0:7 158836:7 0:13 299766:2 158836:25 543:1 158836:2 543:5 158836:5 543:7 158836:2 354276:1 547:1 543:8 547:5 158836:10 |:| 0:10 547:5 0:5 543:3 0:21 158836:2 0:1 158836:4 0:42 158836:1 0:172\n+C\tM03963'..b'6:7 0:7 9606:5 0:15\n+C\tM03963:215:000000000-C292Y:1:1101:14334:2098\tEnterobacter cloacae complex (taxid 354276)\t293|299\t547:18 543:2 91347:18 547:1 543:5 547:45 543:2 547:5 543:15 354276:2 543:5 354276:13 547:5 354276:9 547:10 543:3 547:18 354276:3 547:5 91347:1 547:1 91347:1 354276:5 91347:3 354276:7 547:2 354276:5 547:1 91347:4 543:9 354276:4 547:27 354276:5 |:| 354276:5 547:27 354276:4 543:9 91347:4 547:1 354276:5 547:2 354276:7 91347:3 354276:5 91347:1 547:1 91347:1 547:5 354276:3 547:4 0:4 547:5 0:5 543:3 0:9 547:1 0:7 354276:2 547:5 354276:13 543:5 354276:2 543:15 0:102\n+C\tM03963:215:000000000-C292Y:1:1101:11162:2099\tEnterobacter cloacae complex (taxid 354276)\t112|112\t354276:6 547:15 354276:2 547:5 354276:2 547:22 354276:14 547:3 354276:6 547:3 |:| 547:3 354276:6 547:3 354276:14 547:22 354276:2 547:5 354276:2 547:15 354276:6\n+C\tM03963:215:000000000-C292Y:1:1101:10571:2100\tEnterobacter cloacae complex (taxid 354276)\t301|300\t547:5 543:5 1236:1 354276:5 547:1 354276:3 547:7 354276:19 543:7 547:8 543:1 547:5 543:5 547:6 543:5 547:2 543:2 1236:11 0:1 543:5 0:11 354276:2 0:16 547:1 0:25 543:2 354276:7 547:3 354276:2 543:5 1236:2 354276:6 547:8 0:5 543:7 0:1 61646:7 0:1 547:5 0:3 61646:2 543:5 0:37 |:| 0:266\n+C\tM03963:215:000000000-C292Y:1:1101:10821:2103\tEnterobacter cloacae complex (taxid 354276)\t298|300\t543:1 547:28 354276:1 547:7 354276:13 543:5 354276:2 547:2 543:15 547:2 543:3 547:4 543:5 547:20 543:2 547:5 543:7 547:26 0:7 547:1 0:13 543:3 0:1 543:3 0:1 547:22 0:22 547:5 0:38 |:| 0:18 543:5 0:243\n+C\tM03963:215:000000000-C292Y:1:1101:15960:2104\tEnterobacter cloacae complex (taxid 354276)\t300|301\t543:8 91347:7 354276:8 543:17 91347:5 543:2 91347:20 543:9 91347:9 543:15 91347:9 543:4 91347:5 543:11 91347:3 543:5 91347:26 354276:20 91347:5 354276:30 543:3 354276:3 543:42 |:| 91347:33 543:44 0:2 543:7 0:3 543:5 0:5 543:5 0:4 543:13 354276:3 543:3 354276:30 91347:5 354276:11 0:35 91347:8 543:5 91347:1 543:2 0:43\n+C\tM03963:215:000000000-C292Y:1:1101:8204:2104\tEnterobacter cloacae complex (taxid 354276)\t183|183\t91347:6 547:7 91347:3 547:30 543:8 547:5 543:23 0:16 543:5 0:11 354276:3 543:10 354276:1 91347:7 547:6 0:3 547:5 |:| 0:27 543:5 0:3 91347:5 0:5 543:8 0:96\n+C\tM03963:215:000000000-C292Y:1:1101:15359:2107\tEnterobacter cloacae complex (taxid 354276)\t161|161\t547:14 543:4 547:2 543:5 547:5 543:1 547:12 543:4 547:56 354276:4 547:3 354276:1 547:5 354276:11 |:| 354276:11 547:5 354276:1 547:3 354276:4 547:56 543:4 547:12 543:1 547:5 543:5 547:2 543:4 547:14\n+C\tM03963:215:000000000-C292Y:1:1101:15652:2109\tEnterobacter (taxid 547)\t266|266\t547:5 543:12 547:5 543:1 547:13 543:17 547:4 543:8 91347:4 547:1 543:5 547:3 91347:4 547:48 543:3 547:6 543:1 547:2 91347:5 543:2 547:16 543:1 547:10 543:5 91347:10 543:2 91347:3 543:36 |:| 543:36 91347:3 543:2 91347:10 543:5 547:10 543:1 547:13 0:27 547:43 91347:4 547:3 543:5 547:1 91347:4 0:39 547:3 0:23\n+C\tM03963:215:000000000-C292Y:1:1101:9247:2109\tEnterobacter cloacae complex (taxid 354276)\t300|300\t543:67 354276:43 543:11 354276:29 543:19 354276:22 543:5 354276:5 543:24 0:2 543:1 0:33 543:5 |:| 0:19 543:26 354276:9 543:5 0:207\n+C\tM03963:215:000000000-C292Y:1:1101:22441:2110\tEnterobacter cloacae (taxid 550)\t253|300\t0:1 354276:9 547:2 0:8 354276:1 0:6 354276:12 547:11 354276:24 0:29 547:37 543:5 547:52 0:4 547:2 0:10 354276:6 |:| 0:18 547:9 0:4 550:5 0:20 547:2 0:208\n+C\tM03963:215:000000000-C292Y:1:1101:16192:2111\tEnterobacter cloacae complex (taxid 354276)\t301|300\t547:1 543:1 547:4 543:5 547:1 543:5 547:11 543:8 547:1 543:10 91347:5 543:12 91347:12 543:10 91347:2 543:8 547:5 543:1 547:89 354276:1 547:5 354276:21 547:5 354276:1 547:5 354276:9 547:3 354276:2 547:6 354276:3 0:15 |:| 547:5 354276:13 547:1 354276:3 547:3 543:5 547:23 0:19 547:7 0:5 547:33 0:4 547:1 0:31 547:1 0:14 354276:2 0:3 354276:8 0:17 354276:4 0:64\n+C\tM03963:215:000000000-C292Y:1:1101:19635:2112\tEnterobacter (taxid 547)\t99|99\t543:16 547:7 543:1 547:8 543:3 0:7 547:2 0:21 |:| 0:7 547:6 0:8 547:9 543:3 547:8 543:1 547:7 543:16\n'
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/kraken_test/test1_csv.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_test/test1_csv.log Mon Jun 27 11:03:22 2022 +0000
[
@@ -0,0 +1,30 @@
+
+=-= /home/pierre/anaconda3/envs/__recentrifuge@1.8.1/bin/rcf =-= v1.8.1 - Mar 2022 =-= by Jose Manuel Martí =-=
+
+Loading NCBI nodes... OK! 
+Loading NCBI names... OK! 
+Building dict of parent to children taxa... OK! 
+
+Please, wait, processing files in parallel...
+
+Loading output file /tmp/tmpz5ieggzi/files/d/5/5/dataset_d55a31f8-0f74-4938-9a4f-dce196b7467b.dat... OK!
+  Seqs read: 99 [52.81 knt]
+  Seqs clas: 99 (0.00% unclassified)
+  Seqs pass: 99 (0.00% rejected)
+  Scores SHEL: min = 36.0, max = 347.0, avr = 99.9
+  Coverage(%): min = 0.2, max = 88.1, avr = 15.4
+  Read length: min = 198 nt, max = 602 nt, avr = 533 nt
+  TaxIds: by classifier = 13, by filter = 13
+Building from raw data with mintaxa = 2 ... 
+  Check for more seqs lost ([in/ex]clude affects)... 
+  Info: 90 additional seqs discarded (90.909% of accepted)
+
+  Warning! 11 orphan taxids (rerun with --debug for details)
+/tmp/tmpz5ieggzi/files/d/5/5/dataset_d55a31f8-0f74-4938-9a4f-dce196b7467b sample OK!
+Load elapsed time: 0.0028 sec
+
+
+Building the taxonomy multiple tree... OK!
+Generating interactive plot (output.rcf.html)... OK!
+Generating csv extra output ([output.rcf.]*.csv)... OK!
+Total elapsed time: 00:00:00
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/kraken_test/test1_csv.rcf.data.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_test/test1_csv.rcf.data.csv Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,9 @@
+Samples,/tmp/tmpz5ieggzi/files/d/5/5/dataset_d55a31f8-0f74-4938-9a4f-dce196b7467b,/tmp/tmpz5ieggzi/files/d/5/5/dataset_d55a31f8-0f74-4938-9a4f-dce196b7467b,/tmp/tmpz5ieggzi/files/d/5/5/dataset_d55a31f8-0f74-4938-9a4f-dce196b7467b,Details,Details
+Stats,count,unassigned,score,Rank,Name
+Id,,,,,
+1,9,0,13.614815253856928,no_rank,root
+2,9,0,13.614815253856928,superkingdom,Bacteria
+1224,9,0,13.614815253856928,phylum,Proteobacteria
+1236,9,0,13.614815253856928,class,Gammaproteobacteria
+91347,9,0,13.614815253856928,order,Enterobacteriales
+543,9,9,13.614815253856928,family,Enterobacteriaceae
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/kraken_test/test1_csv.rcf.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_test/test1_csv.rcf.html Mon Jun 27 11:03:22 2022 +0000
b
b'@@ -0,0 +1,6581 @@\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta charset="utf-8"><link rel="shortcut icon" href="data:image/x-icon;base64,AAABAAMAEBAAAAEAIABoBAAANgAAABgYAAABACAAiAkAAJ4EAAAgIAAAAQAgAKgQAAAmDgAAKAAAABAAAAAgAAAAAQAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP///wCAgIAC////AP///wC0tKJHlZWSqI6OmuRxcXn9koaK9J2Ym8uNm5V73PPoFv///wAAAAAD////AP///wCAgIAC////AP//7xCHh4OodHSi/2xqw/9nZ9H/XVuU/9eFqv/njb7/yYiq/5GAiOifqKJV////AAAAAAL///8A////AO3t2w56eoHPdHLH/3Vz3vx4dtz7dXTb/mVimv/XhKn/8ZPG/fSVx/vvlcT/kG+E/32Himb///8AAAAAA////wB0dHageHbO/3585fl5d9r/dnTX/3V02/5mYpz80X2i/eaIuv/ukcD/wXak/I6Maf+OkXD9iIKjL////wCEhFo+c3Kv/4F+6P18etz/e3nd/3l33P57eeT/aWag/9F5oP/sib3+tmuZ/3uEXf/P1YX7zdKG/3x9dbX///8Ac3J9nYKA4f+CgOb7fnzf/3174P58et//bm2v8Vtgc7SdbIHNqWmM/4aLZP/L0YT/0dWH/tfdiP+go3P6YlyWJ3BvmduGg+n/g4Hk/oF+4/6AfuX/bm2k7pCUUUX///8Ajv/jCVBmXqPBxoH/1NmI/c7Uhf/b4Yn9wMWB/2BdcGBraqX6iYbs/4SB5P+Eguj7hYPm/2Vlcov///8AACRtB////wAAAIAWrLB29uHojv/X3on+3eSK+9PYh/9XWVN7bWum+ouI7/+Gg+f/hoTq+4iF6f9lZXKL////ACQAbQf///8AAABoFoF+Xva0snL/y818/uDmiPvZ34n/WVlTe3Jxm9uMiu//iofr/oiG6f6Jhu3/cnGo7oyQQ0X///8A/znjCXF8dKNotpL/Za2O/WuVe/+FkGv9mJ5w/2pqaGBzc3+djYrr/42K8fuKh+v/i4js/oqH6/93dbfxYmJztGaTec1yxJr/gOGu/4nptf+K6Lf+i+q8/2eXhfqDNG8ngIBKPnt6uP+Rjvf9jInr/4yJ7f+Miu3+jor0/25xoP920pz/gOGw/oHerv+H4rL/kPC9+5Ttvf98ioC1////AP///wB0dHSgioff/5SR+/mOi+7/jYvt/4+L8P5scJ78etKe/YXis/+H47P/kO+8/JLxv/92m4f9nXKNL////wD///8A7e3bDnt7hM+Ihdv/ko/4/JSS+PuUkPb+bnKg/4Lapv+Q7739kfO/+43puf9/qJH/mYeUZv///wAAAAADgICAAv///wD//+8QhoaBqIB/sf+Iht7/jYjw/2tvnf+D3Kf/ieS1/4bIpP+AkYfoqJOfVf///wAAAAAC////AP///wCAgIAC////AP///wCwsJdHlZWQqJORn+RzdHv9hZOK9Jecmcuej5l7/+jzFv///wAAAAAD////AP///wD4PwAA4A8AAMAHAACAAwAAgAEAAAABAAADgQAAA8EAAAPBAAADgQAAAAEAAIABAACAAwAAwAcAAOAPAAD4PwAAKAAAABgAAAAwAAAAAQAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP///wD///8A////AP///wEAAAAB////AP///wi6urdDubmxj6ennsuQkIzvc3Nz/YSIh/WWoZzWrrezoLe6t1X///8T////AP///wCAgIAC////AP///wD///8A////AP///wD///8A////Af///wD///8Ax8fDQJeYkLV4eH39WlmM/19dpP9dX6b/WFRr/611j/+/fJ//om2I/4N3ff+Jko7Ntri2Xv///wD///8AgICAAv///wD///8A////AP///wD///8B////AP///wOfn5t9c3N29GRjn/9oZ8P/c3LU/XVy3P9rbdH/YVt8/9eIrv/5mcz/75bE/t+Mt/67epz/fnN5/4uTkKP///8W////AAAAAAL///8A////AP///wH///8A////A5iYkZJoaH3/bWvB/3h23Px4dt3+dHPX/3Rx1v9sbcv/YFp6/82ApP/rj8D/65DA//KVxv/1mcn83424/5t1iP98gX7A////F////wCAgIAC////AAAAAAL///8AkJCHdWZlf/9zcs7/fHrj/Hh32/92ddn/dXTX/3Zz2P9vcM//YFt7/8t9of/pi73/54u7/+eNvf/vk8P/7ZbE/pxsh/xLVU7/iomJqP///wD///8A////Af///wCfn5Y4ZmV07XZ0zv9+fOX8enjc/3p43P94dtv/dnXZ/3d12f9xcdD/YFp6/8h5nf/mh7r/44e3/+qMvf/ljrv/jV59/3F+V/68wH3/gYNp/4uIlWf///8AAAAAAv///wJ3d22pc3G6/4KA5/t9e9//fHre/3t53f96eNz/eHbZ/3l23P50dNb9Ylx+/ch2m/3jg7b+44W2/+CItv+NXXz/bnpW/8XIgv/Y3Yv8ur57/3Fxbtni4uIa////AHl5XTdoZ4b4f3zb/4F+5P5+fN//fXvf/3x63v96edz/ennf/nt54P9xccb/Xlxy/7Rvj//gf7P/3IKw/olZeP9te1b/xsqD/9DVh//P1IX/09iG/pWYcf9nZ3Nt////AISEb35ycLb/hYPr/YB+4f9/feH/fnzh/3173/99e+L+enjX/2dmmfhlZXiwYGBiknJkaqGVZ37pg11x/3eBXP7GyoP/0NWH/87Uhf/R1ob/2eCK+8HFff9tbXWv////BG9vdb16eMn/hYPq/4F/4/+BfuP/gH7i/4B94v59e93/Y2KO+ISEbX7y8uQT////AP///weAjo5aT1tP5L7Cf//T2Ij+ztOF/9HWhv/S14b/2d+K/s3Sgv+BgnLcg4OSI11deeaBf9r/hYPo/4OB5P+DgeX/gX/i/4OB6v10c77/cXFjmv///wL///8A////AP///wD///8AeHN9apuecP/U2Yf/0teG/tPZhv/V24f/2uCJ/tTahv+KjGj6AABINV9egfuGg+P/hoTp/4WC5v+EgeX/goDk/4WD6/1wbq//WFgAQ////wAzMzMFAAAAAVVVVQP///8AAABjEoiLZ/Df5Y7/2d+J/dfch//W3If/2d+H/9vhif6ZnW3/AAA5P19egvuHheX/iIXq/4eE6P+Gg+f/hILl/4eE7P1xb7D/WFgAQ////wAzMzMFAAAAAVVVVQP///8AAABVEnd1WvDEx37/y9GD/dnfif/g54z/4OeL/97kiv6anWz/AAA1P15eeuaFg93/ioft/4iF6f+Hhen/hoPn/4mG7/14d8L/cXFimv///wP///8A////AP///wD///8AenV3a1dyZf9QY1//bGVW/pORY/+trnD/xch7/tTZhP+Pkmn6AABINXBwdb1/fc7/jYry/4mH6v+Jhur/iIbp/4iG6/6Gg+X/Z2aT+IKEaX7y8uQT////AP///wiXgY9bXndp5HXLnv95z6P+dsif/2Wrjv9RcWv/Z3Rk/nyBYv9xcWbbfHyDI4SEa354d73/kI31/YuI6/+LiOz/iofr/4mG6v+Lh+3+hoPi/21soPhnZ3qwYGBikmNza6JhinXpcsWa/3vdq/6C4a//iOi1/4zpt/+O6Lf/kOu8+3O1l/9jVmKv////BHl5WDdraor4iofn/4+M8v6Miez/i4nt/4uI7P+L'..b'XxM7eBAVcqcRKtBisXXT16HWfKsKP2ozuIlVL2HV6zvUQb4q9ziStwQ74/NPRzOO4Bdb47y8Nsara2Os2FlxwWqaJxjQyselHf2c39ZP0HE7XD4lJkeuM2OswUPyUMv+J6L2PnNQnsmrUYdZ11Rh3Tqilv47WEIQQx2yXY5apdpt85nR6IVbe82yaLwYd7ugQka1tMa4XKsNf0StXC1xqGutEylDRYRqHAH25vwRxJ24CNm0rUiyZEucJVvirNipHCQ27JUU2qSjzw1Aw5DgmAaCzg19HNtIcFoLH8c31l59jiNFLzGl6HNzxwwGWypbpDJGwNxLwyGzlnFEIXKPHJ3zLD7xVySX60xmmuQKBrf3M9jmjNVu66BvVELdoCDbX9FK8glS+szdMhhqPuZBN9jrwJTIejEpfAVC9AL5FhXYjG2QI2iQIzgiVGGB+gLEBWJS+BQxJfypuUsGQ83HWFKG5GI1qegz4Fw5JtgJ6b8ZFYo/vxKrEEPKtxG+x5hc9Jow0RgMBiNSBkN5sSpZCdwob2vwR0rDg0D8AjiHzKQyl8D/gOcIiH+LCeHN5g4YajhHkDzJ4kEK0Us54q8pDWNEyuBOrO7fvQflPjpDjiCX/NyzgN4gT0F5BaZjZUWBtQjxMcTfwed/Rzxw4EfT2oZaxFjgVo1yT6JcvlPRSPN7i6t7wxiRMqQvWE9QBEXzsKJiyCH4aZHTkgBtiNMGfG1A5iBkPeL4ED4/yH1IuRshChFsJu7/hnr714hxaZ/+NxhqArou7gOtcTvVGaPumtfbZpreYDAYDKk4Gf1grAUprtUWdchX51p3maY3GAwGQyoCqIDSOsKyD+jncJ1sVFQdXcE71zS9wWAwGHR42YW4lAL/QgUi7oqKY3odKt6n7jWKUZE2DAaDwWBIyRVkLl+T3esd0+QGg8Fg0CUIbK5Ekfq1aXKDwWAwuGFEJQnUJlJnQjcYDAaD4RAEKjVLpkVqiGlqg8FgMKRDG1RKi0wJ1D9NExsMBoOhIvRCZTX3WqDmY5b5DAaDweABfVDJDb0SqJk4ZN42GAwGgyEdmgFzKihOu4FrMUluDQaDwZAhBgFvoFLH64rTNuCvQMPD+YcbZTUYDIbDh9aoILP9gBNRKT4ao0Iq7UMFqv0KWAD8h8Mgynkq/h885rfKXRQafwAAAABJRU5ErkJggg==\n+" style="display:none"><noscript>Javascript must be enabled to view this page.</noscript><div style="display:none"><krona collapse="true" key="true" chart="TAXOMIC"><attributes magnitude="count"><attribute display="Count" dataAll="members" tip="Number of reads assigned to this and child taxa">count</attribute><attribute display="Unassigned" dataNode="members" tip="Number of reads assigned specifically to this taxon">unassigned</attribute><attribute display="TaxID" mono="true" hrefBase="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=" tip="Taxonomic identifier">tid</attribute><attribute display="Rank" mono="true" tip="Taxonomic rank/level">rank</attribute><attribute display="Kmer coverage (%)" tip="Averaged score of reads assigned to this and child taxa">score</attribute></attributes><datasets rawSamples="1"><dataset isctr="False" sread="99" sclas="99" sfilt="99" scmin="36" scavg="99.94949494949495" scmax="347" lnmin="198 nt" lnavg="533 nt" lnmax="602 nt" tclas="13" tfilt="13" tfold="1" sclim="None" totnt="52.81 knt">/tmp/tmpz5ieggzi/files/d/5/5/dataset_d55a31f8-0f74-4938-9a4f-dce196b7467b</dataset></datasets><color attribute="score" hueStart="0" hueEnd="300" valueStart="13.6" valueEnd="13.6" default="true"> </color><node name="root" href="https://www.google.com/search?q=root"><count><val>9</val></count><unassigned><val></val></unassigned><tid><val href="1">1</val></tid><rank><val>no_rank</val></rank><score><val>13.6</val></score><node name="Bacteria" href="https://www.google.com/search?q=Bacteria"><count><val>9</val></count><unassigned><val></val></unassigned><tid><val href="2">2</val></tid><rank><val>superkingdom</val></rank><score><val>13.6</val></score><node name="Proteobacteria" href="https://www.google.com/search?q=Proteobacteria"><count><val>9</val></count><unassigned><val></val></unassigned><tid><val href="1224">1224</val></tid><rank><val>phylum</val></rank><score><val>13.6</val></score><node name="Gammaproteobacteria" href="https://www.google.com/search?q=Gammaproteobacteria"><count><val>9</val></count><unassigned><val></val></unassigned><tid><val href="1236">1236</val></tid><rank><val>class</val></rank><score><val>13.6</val></score><node name="Enterobacteriales" href="https://www.google.com/search?q=Enterobacteriales"><count><val>9</val></count><unassigned><val></val></unassigned><tid><val href="91347">91347</val></tid><rank><val>order</val></rank><score><val>13.6</val></score><node name="Enterobacteriaceae" href="https://www.google.com/search?q=Enterobacteriaceae"><count><val>9</val></count><unassigned><val>9</val></unassigned><tid><val href="543">543</val></tid><rank><val>family</val></rank><score><val>13.6</val></score></node></node></node></node></node></node></krona></div></body></html>\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/kraken_test/test1_csv.rcf.stat.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_test/test1_csv.rcf.stat.csv Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,16 @@
+,/tmp/tmpz5ieggzi/files/d/5/5/dataset_d55a31f8-0f74-4938-9a4f-dce196b7467b
+Seqs. read,99.0
+Seqs. unclass.,0.0
+Seqs. class.,99.0
+Seqs. filtered,99.0
+Score min,36.0
+Score mean,99.94949494949495
+Score max,347.0
+Length min,198.0
+Length mean,533.0
+Length max,602.0
+Total nt read,52814.0
+TIDs class.,13.0
+TIDs filtered,13.0
+TIDs folded,1.0
+Score limit,
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/kraken_test/test3_rcf.data.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_test/test3_rcf.data.tsv Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,9 @@
+Samples /tmp/tmpz5ieggzi/files/f/9/b/dataset_f9b4a772-d0f2-4391-a7bb-7f055b4fce1e /tmp/tmpz5ieggzi/files/f/9/b/dataset_f9b4a772-d0f2-4391-a7bb-7f055b4fce1e /tmp/tmpz5ieggzi/files/f/9/b/dataset_f9b4a772-d0f2-4391-a7bb-7f055b4fce1e Details Details
+Stats count unassigned score Rank Name
+Id
+1 9 0 2.7230463906265316 no_rank root
+2 9 0 2.7230463906265316 superkingdom Bacteria
+1224 9 0 2.7230463906265316 phylum Proteobacteria
+1236 9 0 2.7230463906265316 class Gammaproteobacteria
+91347 9 0 2.7230463906265316 order Enterobacteriales
+543 9 9 2.7230463906265316 family Enterobacteriaceae
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/kraken_test/test3_rcf.stat.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_test/test3_rcf.stat.tsv Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,16 @@
+ /tmp/tmpz5ieggzi/files/f/9/b/dataset_f9b4a772-d0f2-4391-a7bb-7f055b4fce1e
+Seqs. read 99.0
+Seqs. unclass. 0.0
+Seqs. class. 99.0
+Seqs. filtered 99.0
+Score min 36.0
+Score mean 99.94949494949495
+Score max 347.0
+Length min 198.0
+Length mean 533.0
+Length max 602.0
+Total nt read 52814.0
+TIDs class. 13.0
+TIDs filtered 13.0
+TIDs folded 1.0
+Score limit
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/kraken_test/test3_tsv.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_test/test3_tsv.log Mon Jun 27 11:03:22 2022 +0000
[
@@ -0,0 +1,30 @@
+
+=-= /home/pierre/anaconda3/envs/__recentrifuge@1.8.1/bin/rcf =-= v1.8.1 - Mar 2022 =-= by Jose Manuel Martí =-=
+
+CAUTION! --strain experimental mode activated!
+Loading NCBI nodes... OK! 
+Loading NCBI names... OK! 
+Building dict of parent to children taxa... OK! 
+
+Please, wait, processing files in parallel...
+
+Loading output file /tmp/tmpz5ieggzi/files/f/9/b/dataset_f9b4a772-d0f2-4391-a7bb-7f055b4fce1e.dat... OK!
+  Seqs read: 99 [52.81 knt]
+  Seqs clas: 99 (0.00% unclassified)
+  Seqs pass: 99 (0.00% rejected)
+  Scores SHEL: min = 36.0, max = 347.0, avr = 99.9
+  Coverage(%): min = 0.2, max = 88.1, avr = 15.4
+  Read length: min = 198 nt, max = 602 nt, avr = 533 nt
+  TaxIds: by classifier = 13, by filter = 13
+Building from raw data with mintaxa = 2 ... 
+  Check for more seqs lost ([in/ex]clude affects)... 
+  Info: 90 additional seqs discarded (90.909% of accepted)
+
+  Warning! 11 orphan taxids (rerun with --debug for details)
+/tmp/tmpz5ieggzi/files/f/9/b/dataset_f9b4a772-d0f2-4391-a7bb-7f055b4fce1e sample OK!
+Load elapsed time: 0.00373 sec
+
+
+Building the taxonomy multiple tree... OK!
+Generating tsv extra output ([output.rcf.]*.tsv)... OK!
+Total elapsed time: 00:00:00
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/ncbi_taxonomy.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ncbi_taxonomy.loc Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,1 @@
+test-db-2022 Test Database ${__HERE__}/test-db
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/test-db/delnodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/delnodes.dmp Mon Jun 27 11:03:22 2022 +0000
b
b'@@ -0,0 +1,70000 @@\n+2923441\t|\n+2923440\t|\n+2923439\t|\n+2923438\t|\n+2923437\t|\n+2923436\t|\n+2923435\t|\n+2923434\t|\n+2923433\t|\n+2923432\t|\n+2923431\t|\n+2923430\t|\n+2923429\t|\n+2923428\t|\n+2923427\t|\n+2923426\t|\n+2923425\t|\n+2923424\t|\n+2923423\t|\n+2923422\t|\n+2923421\t|\n+2923420\t|\n+2923419\t|\n+2923418\t|\n+2923417\t|\n+2923416\t|\n+2923415\t|\n+2923414\t|\n+2923413\t|\n+2923412\t|\n+2923411\t|\n+2923410\t|\n+2923409\t|\n+2923408\t|\n+2923407\t|\n+2923406\t|\n+2923405\t|\n+2923404\t|\n+2923403\t|\n+2923402\t|\n+2923401\t|\n+2923400\t|\n+2923399\t|\n+2923398\t|\n+2923397\t|\n+2923396\t|\n+2923395\t|\n+2923394\t|\n+2923393\t|\n+2923392\t|\n+2923391\t|\n+2923390\t|\n+2923389\t|\n+2923388\t|\n+2923387\t|\n+2923386\t|\n+2923385\t|\n+2923384\t|\n+2923383\t|\n+2923382\t|\n+2923381\t|\n+2923380\t|\n+2923379\t|\n+2923378\t|\n+2923377\t|\n+2923376\t|\n+2923375\t|\n+2923374\t|\n+2923373\t|\n+2923372\t|\n+2923371\t|\n+2923370\t|\n+2923369\t|\n+2923367\t|\n+2923366\t|\n+2923365\t|\n+2923364\t|\n+2923363\t|\n+2923362\t|\n+2923361\t|\n+2923360\t|\n+2923359\t|\n+2923358\t|\n+2923357\t|\n+2923356\t|\n+2923355\t|\n+2923354\t|\n+2923353\t|\n+2923351\t|\n+2923350\t|\n+2923349\t|\n+2923348\t|\n+2923347\t|\n+2923346\t|\n+2923345\t|\n+2923344\t|\n+2923343\t|\n+2923342\t|\n+2923341\t|\n+2923340\t|\n+2923339\t|\n+2923338\t|\n+2923337\t|\n+2923336\t|\n+2923335\t|\n+2923334\t|\n+2923333\t|\n+2923332\t|\n+2923331\t|\n+2923330\t|\n+2923329\t|\n+2923328\t|\n+2923327\t|\n+2923326\t|\n+2923324\t|\n+2923323\t|\n+2923322\t|\n+2923321\t|\n+2923320\t|\n+2923319\t|\n+2923318\t|\n+2923317\t|\n+2923316\t|\n+2923315\t|\n+2923314\t|\n+2923313\t|\n+2923312\t|\n+2923311\t|\n+2923310\t|\n+2923309\t|\n+2923308\t|\n+2923307\t|\n+2923306\t|\n+2923305\t|\n+2923304\t|\n+2923303\t|\n+2923302\t|\n+2923301\t|\n+2923300\t|\n+2923299\t|\n+2923298\t|\n+2923297\t|\n+2923296\t|\n+2923295\t|\n+2923294\t|\n+2923293\t|\n+2923292\t|\n+2923291\t|\n+2923287\t|\n+2923286\t|\n+2923285\t|\n+2923284\t|\n+2923283\t|\n+2923282\t|\n+2923281\t|\n+2923280\t|\n+2923279\t|\n+2923278\t|\n+2923277\t|\n+2923276\t|\n+2923275\t|\n+2923274\t|\n+2923273\t|\n+2923272\t|\n+2923271\t|\n+2923270\t|\n+2923269\t|\n+2923268\t|\n+2923267\t|\n+2923266\t|\n+2923264\t|\n+2923263\t|\n+2923262\t|\n+2923261\t|\n+2923260\t|\n+2923259\t|\n+2923258\t|\n+2923257\t|\n+2923256\t|\n+2923255\t|\n+2923254\t|\n+2923253\t|\n+2923252\t|\n+2923251\t|\n+2923250\t|\n+2923249\t|\n+2923247\t|\n+2923246\t|\n+2923245\t|\n+2923244\t|\n+2923243\t|\n+2923242\t|\n+2923241\t|\n+2923240\t|\n+2923239\t|\n+2923238\t|\n+2923237\t|\n+2923236\t|\n+2923235\t|\n+2923234\t|\n+2923233\t|\n+2923232\t|\n+2923231\t|\n+2923230\t|\n+2923229\t|\n+2923228\t|\n+2923227\t|\n+2923226\t|\n+2923225\t|\n+2923224\t|\n+2923223\t|\n+2923222\t|\n+2923221\t|\n+2923220\t|\n+2923219\t|\n+2923218\t|\n+2923217\t|\n+2923216\t|\n+2923215\t|\n+2923214\t|\n+2923213\t|\n+2923212\t|\n+2923211\t|\n+2923210\t|\n+2923209\t|\n+2923208\t|\n+2923207\t|\n+2923206\t|\n+2923205\t|\n+2923204\t|\n+2923203\t|\n+2923202\t|\n+2923201\t|\n+2923200\t|\n+2923199\t|\n+2923198\t|\n+2923197\t|\n+2923196\t|\n+2923195\t|\n+2923194\t|\n+2923193\t|\n+2923192\t|\n+2923191\t|\n+2923190\t|\n+2923189\t|\n+2923188\t|\n+2923187\t|\n+2923186\t|\n+2923185\t|\n+2923184\t|\n+2923183\t|\n+2923182\t|\n+2923181\t|\n+2923180\t|\n+2923179\t|\n+2923178\t|\n+2923177\t|\n+2923176\t|\n+2923175\t|\n+2923174\t|\n+2923173\t|\n+2923172\t|\n+2923171\t|\n+2923170\t|\n+2923169\t|\n+2923168\t|\n+2923167\t|\n+2923166\t|\n+2923165\t|\n+2923164\t|\n+2923163\t|\n+2923162\t|\n+2923161\t|\n+2923160\t|\n+2923159\t|\n+2923158\t|\n+2923157\t|\n+2923156\t|\n+2923155\t|\n+2923154\t|\n+2923153\t|\n+2923152\t|\n+2923151\t|\n+2923150\t|\n+2923149\t|\n+2923148\t|\n+2923147\t|\n+2923146\t|\n+2923145\t|\n+2923144\t|\n+2923143\t|\n+2923142\t|\n+2923141\t|\n+2923140\t|\n+2923139\t|\n+2923138\t|\n+2923137\t|\n+2923136\t|\n+2923135\t|\n+2923134\t|\n+2923133\t|\n+2923132\t|\n+2923131\t|\n+2923130\t|\n+2923129\t|\n+2923128\t|\n+2923127\t|\n+2923126\t|\n+2923125\t|\n+2923124\t|\n+2923123\t|\n+2923122\t|\n+2923121\t|\n+2923120\t|\n+2923119\t|\n+2923118\t|\n+2923117\t|\n+2923116\t|\n+2923115\t|\n+2923114\t|\n+2923113\t|\n+2923112\t|\n+2923111\t|\n+2923110\t|\n+2923109\t|\n+2923108\t|\n+2923107\t|\n+2923106\t|\n+2923105\t|\n+2923104\t|\n+2923103\t|\n+2923102\t|\n+2923101\t|\n+2923100\t|\n+2923099\t|\n+2923098\t|\n+2923097\t|\n+2923096\t|\n+2923095\t|\n+2923094\t|\n+2923093\t|\n+2923092\t|\n+2923091\t|\n+2923090\t|\n+2923089\t|\n+2923088\t|\n+2923087\t|\n+2923086\t|\n+2923085\t|\n+2923084\t|\n+2923083\t|\n+2923082\t|\n+2923081\t|\n+2923080\t|\n+2923078\t|\n+2923077\t|\n+2923076\t|\n+2923075\t|\n+2923074\t|\n+2923073\t|\n+2923072\t|\n+2923071'..b'004\t|\n+2673003\t|\n+2673001\t|\n+2673000\t|\n+2672999\t|\n+2672998\t|\n+2672997\t|\n+2672996\t|\n+2672994\t|\n+2672992\t|\n+2672991\t|\n+2672990\t|\n+2672989\t|\n+2672988\t|\n+2672986\t|\n+2672985\t|\n+2672984\t|\n+2672983\t|\n+2672981\t|\n+2672980\t|\n+2672979\t|\n+2672978\t|\n+2672977\t|\n+2672976\t|\n+2672973\t|\n+2672972\t|\n+2672971\t|\n+2672970\t|\n+2672969\t|\n+2672968\t|\n+2672966\t|\n+2672965\t|\n+2672964\t|\n+2672962\t|\n+2672961\t|\n+2672958\t|\n+2672957\t|\n+2672956\t|\n+2672955\t|\n+2672953\t|\n+2672952\t|\n+2672950\t|\n+2672949\t|\n+2672946\t|\n+2672945\t|\n+2672944\t|\n+2672943\t|\n+2672942\t|\n+2672941\t|\n+2672939\t|\n+2672938\t|\n+2672936\t|\n+2672934\t|\n+2672933\t|\n+2672932\t|\n+2672930\t|\n+2672929\t|\n+2672928\t|\n+2672927\t|\n+2672926\t|\n+2672923\t|\n+2672922\t|\n+2672920\t|\n+2672919\t|\n+2672918\t|\n+2672917\t|\n+2672916\t|\n+2672915\t|\n+2672914\t|\n+2672913\t|\n+2672912\t|\n+2672910\t|\n+2672909\t|\n+2672908\t|\n+2672907\t|\n+2672906\t|\n+2672905\t|\n+2672904\t|\n+2672900\t|\n+2672899\t|\n+2672895\t|\n+2672894\t|\n+2672893\t|\n+2672891\t|\n+2672890\t|\n+2672889\t|\n+2672887\t|\n+2672886\t|\n+2672885\t|\n+2672881\t|\n+2672880\t|\n+2672878\t|\n+2672877\t|\n+2672876\t|\n+2672875\t|\n+2672874\t|\n+2672872\t|\n+2672870\t|\n+2672869\t|\n+2672868\t|\n+2672866\t|\n+2672863\t|\n+2672860\t|\n+2672859\t|\n+2672858\t|\n+2672854\t|\n+2672853\t|\n+2672851\t|\n+2672850\t|\n+2672849\t|\n+2672848\t|\n+2672847\t|\n+2672846\t|\n+2672845\t|\n+2672844\t|\n+2672843\t|\n+2672840\t|\n+2672837\t|\n+2672833\t|\n+2672832\t|\n+2672831\t|\n+2672830\t|\n+2672829\t|\n+2672828\t|\n+2672825\t|\n+2672824\t|\n+2672823\t|\n+2672817\t|\n+2672815\t|\n+2672814\t|\n+2672810\t|\n+2672809\t|\n+2672808\t|\n+2672807\t|\n+2672805\t|\n+2672804\t|\n+2672803\t|\n+2672802\t|\n+2672801\t|\n+2672800\t|\n+2672799\t|\n+2672798\t|\n+2672797\t|\n+2672795\t|\n+2672794\t|\n+2672792\t|\n+2672791\t|\n+2672790\t|\n+2672789\t|\n+2672788\t|\n+2672786\t|\n+2672783\t|\n+2672782\t|\n+2672781\t|\n+2672780\t|\n+2672779\t|\n+2672778\t|\n+2672776\t|\n+2672775\t|\n+2672773\t|\n+2672772\t|\n+2672770\t|\n+2672769\t|\n+2672768\t|\n+2672767\t|\n+2672766\t|\n+2672765\t|\n+2672764\t|\n+2672762\t|\n+2672759\t|\n+2672756\t|\n+2672755\t|\n+2672754\t|\n+2672753\t|\n+2672752\t|\n+2672751\t|\n+2672750\t|\n+2672748\t|\n+2672745\t|\n+2672744\t|\n+2672743\t|\n+2672741\t|\n+2672740\t|\n+2672739\t|\n+2672738\t|\n+2672737\t|\n+2672736\t|\n+2672735\t|\n+2672734\t|\n+2672733\t|\n+2672732\t|\n+2672731\t|\n+2672730\t|\n+2672729\t|\n+2672728\t|\n+2672726\t|\n+2672724\t|\n+2672723\t|\n+2672721\t|\n+2672719\t|\n+2672718\t|\n+2672717\t|\n+2672716\t|\n+2672715\t|\n+2672714\t|\n+2672712\t|\n+2672710\t|\n+2672707\t|\n+2672706\t|\n+2672705\t|\n+2672704\t|\n+2672703\t|\n+2672702\t|\n+2672700\t|\n+2672699\t|\n+2672698\t|\n+2672694\t|\n+2672693\t|\n+2672692\t|\n+2672691\t|\n+2672689\t|\n+2672688\t|\n+2672687\t|\n+2672685\t|\n+2672684\t|\n+2672683\t|\n+2672682\t|\n+2672681\t|\n+2672679\t|\n+2672678\t|\n+2672676\t|\n+2672675\t|\n+2672674\t|\n+2672673\t|\n+2672672\t|\n+2672671\t|\n+2672670\t|\n+2672669\t|\n+2672667\t|\n+2672666\t|\n+2672665\t|\n+2672664\t|\n+2672663\t|\n+2672662\t|\n+2672661\t|\n+2672658\t|\n+2672656\t|\n+2672655\t|\n+2672654\t|\n+2672649\t|\n+2672648\t|\n+2672647\t|\n+2672646\t|\n+2672640\t|\n+2672635\t|\n+2672634\t|\n+2672632\t|\n+2672631\t|\n+2672630\t|\n+2672629\t|\n+2672628\t|\n+2672627\t|\n+2672626\t|\n+2672624\t|\n+2672623\t|\n+2672620\t|\n+2672619\t|\n+2672618\t|\n+2672617\t|\n+2672616\t|\n+2672614\t|\n+2672613\t|\n+2672612\t|\n+2672611\t|\n+2672610\t|\n+2672609\t|\n+2672608\t|\n+2672607\t|\n+2672606\t|\n+2672605\t|\n+2672604\t|\n+2672603\t|\n+2672602\t|\n+2672598\t|\n+2672597\t|\n+2672592\t|\n+2672591\t|\n+2672589\t|\n+2672588\t|\n+2672585\t|\n+2672584\t|\n+2672582\t|\n+2672580\t|\n+2672578\t|\n+2672577\t|\n+2672576\t|\n+2672575\t|\n+2672573\t|\n+2672566\t|\n+2672154\t|\n+2672150\t|\n+2672147\t|\n+2672146\t|\n+2672145\t|\n+2672144\t|\n+2672143\t|\n+2672140\t|\n+2672139\t|\n+2672138\t|\n+2672137\t|\n+2672136\t|\n+2672134\t|\n+2672133\t|\n+2672132\t|\n+2672131\t|\n+2672129\t|\n+2672127\t|\n+2672126\t|\n+2672124\t|\n+2672123\t|\n+2672122\t|\n+2672121\t|\n+2672120\t|\n+2672119\t|\n+2672118\t|\n+2672117\t|\n+2672114\t|\n+2672109\t|\n+2672108\t|\n+2672106\t|\n+2672105\t|\n+2672104\t|\n+2672099\t|\n+2672096\t|\n+2672095\t|\n+2672093\t|\n+2672090\t|\n+2672087\t|\n+2672086\t|\n+2672085\t|\n+2672082\t|\n+2672079\t|\n+2672075\t|\n+2672074\t|\n+2672073\t|\n+2672071\t|\n+2672068\t|\n+2672065\t|\n+2672064\t|\n+2672063\t|\n+2672062\t|\n+2672060\t|\n+2672059\t|\n+2672058\t|\n+2672057\t|\n+2672056\t|\n+2672055\t|\n+2672054\t|\n+2672053\t|\n+2672052\t|\n+2672050\t|\n+2672048\t|\n+2672047\t|\n+2672046\t|\n'
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/test-db/division.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/division.dmp Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,12 @@
+0 | BCT | Bacteria |   |
+1 | INV | Invertebrates |   |
+2 | MAM | Mammals |   |
+3 | PHG | Phages |   |
+4 | PLN | Plants and Fungi |   |
+5 | PRI | Primates |   |
+6 | ROD | Rodents |   |
+7 | SYN | Synthetic and Chimeric |   |
+8 | UNA | Unassigned | No species nodes should inherit this division assignment |
+9 | VRL | Viruses |   |
+10 | VRT | Vertebrates |   |
+11 | ENV | Environmental samples | Anonymous sequences cloned directly from the environment |
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/test-db/gc.prt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/gc.prt Mon Jun 27 11:03:22 2022 +0000
b
b'@@ -0,0 +1,358 @@\n+--**************************************************************************\n+--  This is the NCBI genetic code table\n+--  Initial base data set from Andrzej Elzanowski while at PIR International\n+--  Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI\n+--  Base 1-3 of each codon have been added as comments to facilitate\n+--    readability at the suggestion of Peter Rice, EMBL\n+--  Later additions by Taxonomy Group staff at NCBI\n+--\n+--  Version 4.6\n+--     Renamed genetic code 24 to Rhabdopleuridae Mitochondrial\n+--\n+--  Version 4.5\n+--     Added Cephalodiscidae mitochondrial genetic code 33\n+--\n+--  Version 4.4\n+--     Added GTG as start codon for genetic code 3\n+--     Added Balanophoraceae plastid genetic code 32\n+--\n+--  Version 4.3\n+--     Change to CTG -> Leu in genetic codes 27, 28, 29, 30\n+--\n+--  Version 4.2\n+--     Added Karyorelict nuclear genetic code 27\n+--     Added Condylostoma nuclear genetic code 28\n+--     Added Mesodinium nuclear genetic code 29\n+--     Added Peritrich nuclear genetic code 30\n+--     Added Blastocrithidia nuclear genetic code 31\n+--\n+--  Version 4.1\n+--     Added Pachysolen tannophilus nuclear genetic code 26\n+--\n+--  Version 4.0\n+--     Updated version to reflect numerous undocumented changes:\n+--     Corrected start codons for genetic code 25\n+--     Name of new genetic code is Candidate Division SR1 and Gracilibacteria\n+--     Added candidate division SR1 nuclear genetic code 25\n+--     Added GTG as start codon for genetic code 24\n+--     Corrected Pterobranchia Mitochondrial genetic code (24)\n+--     Added genetic code 24, Pterobranchia Mitochondrial\n+--     Genetic code 11 is now Bacterial, Archaeal and Plant Plastid\n+--     Fixed capitalization of mitochondrial in codes 22 and 23\n+--     Added GTG, ATA, and TTG as alternative start codons to code 13\n+--\n+--  Version 3.9\n+--     Code 14 differs from code 9 only by translating UAA to Tyr rather than\n+--     STOP.  A recent study (Telford et al, 2000) has found no evidence that\n+--     the codon UAA codes for Tyr in the flatworms, but other opinions exist.\n+--     There are very few GenBank records that are translated with code 14,\n+--     but a test translation shows that retranslating these records with code\n+--     9 can cause premature terminations.  Therefore, GenBank will maintain\n+--     code 14 until further information becomes available.\n+--\n+--  Version 3.8\n+--     Added GTG start to Echinoderm mitochondrial code, code 9\n+--\n+--  Version 3.7\n+--     Added code 23 Thraustochytrium mitochondrial code\n+--        formerly OGMP code 93\n+--        submitted by Gertraude Berger, Ph.D.\n+--\n+--  Version 3.6\n+--     Added code 22 TAG-Leu, TCA-stop\n+--        found in mitochondrial DNA of Scenedesmus obliquus\n+--        submitted by Gertraude Berger, Ph.D.\n+--        Organelle Genome Megasequencing Program, Univ Montreal\n+--\n+--  Version 3.5\n+--     Added code 21, Trematode Mitochondrial\n+--       (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)\n+--     Added code 16, Chlorophycean Mitochondrial\n+--       (TAG can translated to Leucine instaed to STOP in chlorophyceans\n+--        and fungi)\n+--\n+--  Version 3.4\n+--     Added CTG,TTG as allowed alternate start codons in Standard code.\n+--        Prats et al. 1989, Hann et al. 1992\n+--\n+--  Version 3.3 - 10/13/95\n+--     Added alternate intiation codon ATC to code 5\n+--        based on complete mitochondrial genome of honeybee\n+--        Crozier and Crozier (1993)\n+--\n+--  Version 3.2 - 6/24/95\n+--  Code       Comments\n+--   10        Alternative Ciliate Macronuclear renamed to Euplotid Macro...\n+--   15        Blepharisma Macro.. code added\n+--    5        Invertebrate Mito.. GTG allowed as alternate initiator\n+--   11        Eubacterial renamed to Bacterial as most alternate starts\n+--               have been found in Archea\n+--\n+--\n+--  Version 3.1 - 1995\n+--  Updated as per Andrzej Elzanowski at NCBI\n+--     Complete documentation in NCBI'..b'5 ,\n+  ncbieaa  "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "---M------**-----------------------M---------------M------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Pachysolen tannophilus Nuclear" ,\n+  id 26 ,\n+  ncbieaa  "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "----------**--*----M---------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Karyorelict Nuclear" ,\n+  id 27 ,\n+  ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "--------------*--------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Condylostoma Nuclear" ,\n+  id 28 ,\n+  ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "----------**--*--------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Mesodinium Nuclear" ,\n+  id 29 ,\n+  ncbieaa  "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "--------------*--------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Peritrich Nuclear" ,\n+  id 30 ,\n+  ncbieaa  "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "--------------*--------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Blastocrithidia Nuclear" ,\n+  id 31 ,\n+  ncbieaa  "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "----------**-----------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Balanophoraceae Plastid" ,\n+  id 32 ,\n+  ncbieaa  "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "---M------*---*----M------------MMMM---------------M------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Cephalodiscidae Mitochondrial" ,\n+  id 33 ,\n+  ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",\n+  sncbieaa "---M-------*-------M---------------M---------------M------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ }\n+}\n'
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/test-db/gencode.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/gencode.dmp Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,28 @@
+0 | | Unspecified |                                                                   |                                                                   |
+1 | | Standard | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ---M------**--*----M---------------M----------------------------  |
+2 | | Vertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG  | ----------**--------------------MMMM----------**---M------------  |
+3 | | Yeast Mitochondrial | FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**----------------------MM---------------M------------  |
+4 | | Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --MM------**-------M------------MMMM---------------M------------  |
+5 | | Invertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG  | ---M------**--------------------MMMM---------------M------------  |
+6 | | Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear | FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --------------*--------------------M----------------------------  |
+9 | | Echinoderm Mitochondrial; Flatworm Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG  | ----------**-----------------------M---------------M------------  |
+10 | | Euplotid Nuclear | FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**-----------------------M----------------------------  |
+11 | | Bacterial, Archaeal and Plant Plastid | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ---M------**--*----M------------MMMM---------------M------------  |
+12 | | Alternative Yeast Nuclear | FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**--*----M---------------M----------------------------  |
+13 | | Ascidian Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG  | ---M------**----------------------MM---------------M------------  |
+14 | | Alternative Flatworm Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG  | -----------*-----------------------M----------------------------  |
+15 | | Blepharisma Macronuclear | FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------*---*--------------------M----------------------------  |
+16 | | Chlorophycean Mitochondrial | FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------*---*--------------------M----------------------------  |
+21 | | Trematode Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG  | ----------**-----------------------M---------------M------------  |
+22 | | Scenedesmus obliquus mitochondrial | FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ------*---*---*--------------------M----------------------------  |
+23 | | Thraustochytrium mitochondrial code | FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --*-------**--*-----------------M--M---------------M------------  |
+24 | | Rhabdopleuridae Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG | ---M------**-------M---------------M---------------M------------ |
+25 | | Candidate Division SR1 and Gracilibacteria | FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ---M------**-----------------------M---------------M------------  |
+26 | | Pachysolen tannophilus Nuclear | FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**--*----M---------------M----------------------------  |
+27 | | Karyorelict Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --------------*--------------------M----------------------------  |
+28 | | Condylostoma Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**--*--------------------M----------------------------  |
+29 | | Mesodinium Nuclear | FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --------------*--------------------M----------------------------  |
+30 | | Peritrich Nuclear | FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --------------*--------------------M----------------------------  |
+31 | | Blastocrithidia Nuclear | FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**-----------------------M----------------------------  |
+32 | | Balanophoraceae Plastid | FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ---M------*---*----M------------MMMM---------------M------------  |
+33 | | Cephalodiscidae Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG  | ---M-------*-------M---------------M---------------M------------  |
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/test-db/merged.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/merged.dmp Mon Jun 27 11:03:22 2022 +0000
b
b'@@ -0,0 +1,55000 @@\n+12\t|\t74109\t|\n+30\t|\t29\t|\n+36\t|\t184914\t|\n+37\t|\t42\t|\n+46\t|\t39\t|\n+67\t|\t32033\t|\n+76\t|\t155892\t|\n+77\t|\t74311\t|\n+79\t|\t74313\t|\n+80\t|\t155892\t|\n+90\t|\t34102\t|\n+91\t|\t34103\t|\n+111\t|\t328552\t|\n+121\t|\t125\t|\n+129\t|\t99\t|\n+130\t|\t223389\t|\n+177\t|\t183\t|\n+219\t|\t210\t|\n+229\t|\t23\t|\n+230\t|\t24\t|\n+257\t|\t143223\t|\n+260\t|\t143224\t|\n+261\t|\t37931\t|\n+273\t|\t274\t|\n+284\t|\t80866\t|\n+289\t|\t47421\t|\n+291\t|\t40137\t|\n+330\t|\t301\t|\n+334\t|\t50946\t|\n+341\t|\t456327\t|\n+345\t|\t40324\t|\n+348\t|\t29444\t|\n+360\t|\t359\t|\n+362\t|\t358\t|\n+363\t|\t358\t|\n+364\t|\t358\t|\n+365\t|\t358\t|\n+366\t|\t358\t|\n+367\t|\t358\t|\n+368\t|\t358\t|\n+369\t|\t358\t|\n+377\t|\t186901\t|\n+383\t|\t382\t|\n+395\t|\t7\t|\n+397\t|\t384\t|\n+401\t|\t400\t|\n+412\t|\t410\t|\n+419\t|\t39774\t|\n+420\t|\t39775\t|\n+443\t|\t442\t|\n+457\t|\t466\t|\n+473\t|\t107673\t|\n+509\t|\t85698\t|\n+510\t|\t106590\t|\n+514\t|\t518\t|\n+515\t|\t85698\t|\n+530\t|\t52959\t|\n+557\t|\t549\t|\n+559\t|\t66271\t|\n+560\t|\t553\t|\n+567\t|\t66695\t|\n+591\t|\t28901\t|\n+592\t|\t149539\t|\n+593\t|\t98360\t|\n+601\t|\t90370\t|\n+602\t|\t90371\t|\n+603\t|\t59203\t|\n+627\t|\t29488\t|\n+643\t|\t648\t|\n+653\t|\t29489\t|\n+665\t|\t55601\t|\n+667\t|\t51367\t|\n+677\t|\t90736\t|\n+681\t|\t669\t|\n+684\t|\t85581\t|\n+692\t|\t32013\t|\n+705\t|\t662\t|\n+706\t|\t28171\t|\n+707\t|\t28174\t|\n+708\t|\t29498\t|\n+710\t|\t657\t|\n+711\t|\t660\t|\n+737\t|\t732\t|\n+746\t|\t75985\t|\n+748\t|\t44283\t|\n+778\t|\t943\t|\n+791\t|\t77133\t|\n+802\t|\t773\t|\n+804\t|\t807\t|\n+805\t|\t33047\t|\n+806\t|\t38323\t|\n+811\t|\t83558\t|\n+812\t|\t83554\t|\n+825\t|\t28131\t|\n+857\t|\t143361\t|\n+874\t|\t899\t|\n+877\t|\t52561\t|\n+887\t|\t453230\t|\n+900\t|\t872\t|\n+910\t|\t41294\t|\n+922\t|\t34007\t|\n+925\t|\t76588\t|\n+946\t|\t948\t|\n+977\t|\t89373\t|\n+993\t|\t70993\t|\n+994\t|\t986\t|\n+1000\t|\t107401\t|\n+1020\t|\t135617\t|\n+1024\t|\t70993\t|\n+1055\t|\t1054\t|\n+1077\t|\t1076\t|\n+1139\t|\t269084\t|\n+1141\t|\t32046\t|\n+1155\t|\t129910\t|\n+1156\t|\t118562\t|\n+1170\t|\t264691\t|\n+1171\t|\t264691\t|\n+1172\t|\t264691\t|\n+1174\t|\t103690\t|\n+1196\t|\t188910\t|\n+1212\t|\t1213\t|\n+1217\t|\t1213\t|\n+1221\t|\t1890436\t|\n+1230\t|\t35798\t|\n+1250\t|\t33964\t|\n+1256\t|\t51669\t|\n+1278\t|\t1275\t|\n+1289\t|\t1309\t|\n+1291\t|\t1287\t|\n+1312\t|\t119602\t|\n+1315\t|\t1335\t|\n+1330\t|\t44008\t|\n+1342\t|\t41997\t|\n+1344\t|\t1902\t|\n+1389\t|\t44250\t|\n+1403\t|\t44252\t|\n+1414\t|\t86665\t|\n+1453\t|\t44251\t|\n+1460\t|\t44253\t|\n+1463\t|\t44161\t|\n+1466\t|\t78058\t|\n+1469\t|\t1478\t|\n+1475\t|\t78057\t|\n+1546\t|\t146817\t|\n+1594\t|\t1585\t|\n+1617\t|\t29397\t|\n+1619\t|\t2751\t|\n+1644\t|\t1641\t|\n+1675\t|\t1710\t|\n+1700\t|\t37929\t|\n+1706\t|\t41170\t|\n+1723\t|\t144185\t|\n+1734\t|\t29347\t|\n+1746\t|\t1752\t|\n+1756\t|\t28896\t|\n+1761\t|\t85007\t|\n+1803\t|\t36812\t|\n+1815\t|\t36819\t|\n+1819\t|\t37919\t|\n+1820\t|\t33910\t|\n+1825\t|\t1707\t|\n+1826\t|\t1710\t|\n+1834\t|\t43767\t|\n+1840\t|\t2045\t|\n+1842\t|\t52699\t|\n+1845\t|\t1835\t|\n+1846\t|\t28042\t|\n+1864\t|\t28056\t|\n+1870\t|\t1865\t|\n+1875\t|\t1877\t|\n+1882\t|\t85011\t|\n+1899\t|\t1892\t|\n+1937\t|\t68280\t|\n+1939\t|\t1919\t|\n+1953\t|\t68280\t|\n+1959\t|\t65497\t|\n+1966\t|\t66429\t|\n+1973\t|\t1883\t|\n+1974\t|\t68215\t|\n+1975\t|\t1931\t|\n+1977\t|\t97398\t|\n+1978\t|\t66429\t|\n+1979\t|\t53446\t|\n+1980\t|\t51201\t|\n+1981\t|\t35621\t|\n+1982\t|\t35621\t|\n+1983\t|\t68246\t|\n+1984\t|\t97400\t|\n+1986\t|\t1951\t|\n+2003\t|\t47990\t|\n+2007\t|\t147065\t|\n+2008\t|\t58118\t|\n+2009\t|\t58118\t|\n+2010\t|\t147065\t|\n+2011\t|\t147065\t|\n+2027\t|\t37482\t|\n+2032\t|\t33882\t|\n+2058\t|\t53355\t|\n+2059\t|\t60443\t|\n+2069\t|\t1970\t|\n+2073\t|\t1847\t|\n+2076\t|\t37331\t|\n+2081\t|\t862\t|\n+2091\t|\t2085\t|\n+2126\t|\t880447\t|\n+2140\t|\t2136\t|\n+2154\t|\t2155\t|\n+2165\t|\t145262\t|\n+2166\t|\t145262\t|\n+2167\t|\t145261\t|\n+2169\t|\t2186\t|\n+2170\t|\t2180\t|\n+2185\t|\t2163\t|\n+2212\t|\t2209\t|\n+2227\t|\t2175\t|\n+2228\t|\t39664\t|\n+2240\t|\t2242\t|\n+2241\t|\t2242\t|\n+2244\t|\t335819\t|\n+2274\t|\t2275\t|\n+2282\t|\t12914\t|\n+2324\t|\t1754\t|\n+2328\t|\t44000\t|\n+2329\t|\t44001\t|\n+2338\t|\t77133\t|\n+2435\t|\t36549\t|\n+2519\t|\t38738\t|\n+2521\t|\t2102\t|\n+2610\t|\t394\t|\n+2703\t|\t68335\t|\n+2729\t|\t118099\t|\n+2732\t|\t2334\t|\n+2775\t|\t2510777\t|\n+2776\t|\t2510778\t|\n+2778\t|\t2782\t|\n+2780\t|\t172962\t|\n+2793\t|\t35688\t|\n+2795\t|\t2822\t|\n+2827\t|\t2081491\t|\n+2832\t|\t2081491\t|\n+2839\t|\t1514140\t|\n+2859\t|\t1003145\t|\n+2863\t|\t426639\t|\n+2883\t|\t2885\t|\n+2884\t|\t2882\t|\n+2895\t|\t3027\t|\n+2906\t|\t2907\t|\n+2913\t|\t160621\t|\n+2921\t|\t35672\t|\n+2930\t|\t2666306\t|\n+2941\t|\t2920\t|\n+2956\t|\t407301\t|\n+2963\t|\t326570\t|\n+3045\t|\t3046\t|\n+3053\t|\t3054\t|\n+3058\t|\t3055\t|\n+3060\t|\t35704\t|\n+3069\t|\t3042\t|\n+3096\t|\t577483\t|\n+3115\t|\t3118\t|\n+3126\t|\t33104\t|\n+3137\t|\t35862\t|'..b'265\t|\t1816064\t|\n+1814268\t|\t1816065\t|\n+1814269\t|\t1806990\t|\n+1814314\t|\t1833893\t|\n+1814315\t|\t1833893\t|\n+1814316\t|\t1833893\t|\n+1814317\t|\t1833893\t|\n+1814318\t|\t1833893\t|\n+1814319\t|\t1833893\t|\n+1814320\t|\t1833894\t|\n+1814321\t|\t1833894\t|\n+1814322\t|\t1833894\t|\n+1814323\t|\t1833894\t|\n+1814324\t|\t1833894\t|\n+1814325\t|\t1833894\t|\n+1814326\t|\t1833894\t|\n+1814327\t|\t1833894\t|\n+1814328\t|\t1833894\t|\n+1814898\t|\t411221\t|\n+1814900\t|\t415350\t|\n+1814904\t|\t289425\t|\n+1814905\t|\t430494\t|\n+1814906\t|\t430493\t|\n+1814907\t|\t430498\t|\n+1814915\t|\t289425\t|\n+1814932\t|\t1814931\t|\n+1814936\t|\t166462\t|\n+1814938\t|\t1814939\t|\n+1814941\t|\t1929509\t|\n+1815513\t|\t1819601\t|\n+1815514\t|\t1819601\t|\n+1815531\t|\t754041\t|\n+1815584\t|\t1815583\t|\n+1815585\t|\t1815583\t|\n+1815586\t|\t1815583\t|\n+1815600\t|\t481034\t|\n+1815617\t|\t2136233\t|\n+1815618\t|\t2136232\t|\n+1815622\t|\t2136234\t|\n+1815624\t|\t1165970\t|\n+1815626\t|\t700572\t|\n+1816076\t|\t1816067\t|\n+1816312\t|\t259674\t|\n+1816323\t|\t1561751\t|\n+1816454\t|\t223910\t|\n+1816604\t|\t1751046\t|\n+1816615\t|\t1751046\t|\n+1817281\t|\t704294\t|\n+1817673\t|\t1422\t|\n+1817674\t|\t33936\t|\n+1817959\t|\t1783356\t|\n+1818569\t|\t240491\t|\n+1818578\t|\t27753\t|\n+1818607\t|\t2025795\t|\n+1818879\t|\t1818608\t|\n+1818902\t|\t68879\t|\n+1818941\t|\t68879\t|\n+1818954\t|\t68879\t|\n+1819010\t|\t68879\t|\n+1819069\t|\t68879\t|\n+1819116\t|\t68879\t|\n+1819140\t|\t68879\t|\n+1819150\t|\t68879\t|\n+1819237\t|\t1395084\t|\n+1819238\t|\t1395081\t|\n+1819374\t|\t511518\t|\n+1819375\t|\t511520\t|\n+1819376\t|\t265234\t|\n+1819401\t|\t1002073\t|\n+1819504\t|\t1819505\t|\n+1819728\t|\t2527775\t|\n+1819729\t|\t2527775\t|\n+1819731\t|\t2527775\t|\n+1819733\t|\t576611\t|\n+1819734\t|\t2527775\t|\n+1819756\t|\t191328\t|\n+1819788\t|\t1814128\t|\n+1819790\t|\t2899743\t|\n+1819866\t|\t2731660\t|\n+1819997\t|\t1820002\t|\n+1819999\t|\t1820002\t|\n+1820005\t|\t2911857\t|\n+1820017\t|\t1812935\t|\n+1820018\t|\t1812935\t|\n+1820019\t|\t881260\t|\n+1820020\t|\t2494701\t|\n+1820021\t|\t2364150\t|\n+1820022\t|\t1812935\t|\n+1820023\t|\t2364151\t|\n+1820024\t|\t1812935\t|\n+1820063\t|\t198641\t|\n+1820132\t|\t1843190\t|\n+1820133\t|\t1820131\t|\n+1820134\t|\t1857294\t|\n+1820298\t|\t2108222\t|\n+1820606\t|\t1513794\t|\n+1820916\t|\t1564506\t|\n+1820918\t|\t651032\t|\n+1821223\t|\t86664\t|\n+1821261\t|\t94625\t|\n+1821277\t|\t1732530\t|\n+1821577\t|\t652142\t|\n+1821756\t|\t2170064\t|\n+1821982\t|\t1816081\t|\n+1821983\t|\t1816081\t|\n+1821984\t|\t1816081\t|\n+1821985\t|\t1816081\t|\n+1821986\t|\t1816081\t|\n+1821987\t|\t1816081\t|\n+1821988\t|\t1816081\t|\n+1821989\t|\t1816081\t|\n+1821990\t|\t1816081\t|\n+1821991\t|\t1816081\t|\n+1821992\t|\t1816081\t|\n+1821993\t|\t1816081\t|\n+1821994\t|\t1816081\t|\n+1821995\t|\t1816081\t|\n+1822014\t|\t1785128\t|\n+1822015\t|\t1785128\t|\n+1822016\t|\t1785128\t|\n+1822070\t|\t2755460\t|\n+1822335\t|\t2021423\t|\n+1822368\t|\t2804545\t|\n+1822420\t|\t2748684\t|\n+1822506\t|\t2887155\t|\n+1822520\t|\t2339898\t|\n+1822537\t|\t498667\t|\n+1823646\t|\t2707526\t|\n+1823647\t|\t1912848\t|\n+1823648\t|\t1912848\t|\n+1823649\t|\t1912848\t|\n+1823650\t|\t1912847\t|\n+1823651\t|\t1912847\t|\n+1823652\t|\t1817995\t|\n+1823654\t|\t1573944\t|\n+1823655\t|\t1573944\t|\n+1823656\t|\t1912924\t|\n+1823657\t|\t1912924\t|\n+1823659\t|\t1912924\t|\n+1823660\t|\t1912926\t|\n+1823661\t|\t1912926\t|\n+1823663\t|\t2707527\t|\n+1823664\t|\t2707527\t|\n+1823666\t|\t2043609\t|\n+1823667\t|\t1499204\t|\n+1823668\t|\t1817995\t|\n+1823670\t|\t2043609\t|\n+1823671\t|\t2043609\t|\n+1823672\t|\t2043609\t|\n+1823745\t|\t1815593\t|\n+1823749\t|\t1938882\t|\n+1823750\t|\t1938882\t|\n+1823756\t|\t2741497\t|\n+1824476\t|\t659607\t|\n+1824477\t|\t659607\t|\n+1824478\t|\t659607\t|\n+1824534\t|\t643435\t|\n+1824540\t|\t209159\t|\n+1824894\t|\t228088\t|\n+1824915\t|\t1986089\t|\n+1824939\t|\t2516209\t|\n+1825003\t|\t1262309\t|\n+1825038\t|\t1825039\t|\n+1825043\t|\t181160\t|\n+1825056\t|\t2516207\t|\n+1825057\t|\t2516207\t|\n+1825058\t|\t2516206\t|\n+1825059\t|\t2516204\t|\n+1825060\t|\t2516204\t|\n+1825061\t|\t2516204\t|\n+1825062\t|\t2516206\t|\n+1825063\t|\t2516204\t|\n+1825064\t|\t2516204\t|\n+1825065\t|\t2516204\t|\n+1825066\t|\t2516205\t|\n+1825290\t|\t1793177\t|\n+1825539\t|\t1245\t|\n+1825592\t|\t1896326\t|\n+1825594\t|\t1825591\t|\n+1825597\t|\t1896331\t|\n+1825658\t|\t1825660\t|\n+1825659\t|\t1825660\t|\n+1825667\t|\t1892481\t|\n+1825731\t|\t102325\t|\n+1825750\t|\t137691\t|\n+1825753\t|\t1620164\t|\n+1825862\t|\t2804545\t|\n+1825934\t|\t1184720\t|\n+1825947\t|\t67631\t|\n+1825951\t|\t1118826\t|\n+1825952\t|\t155669\t|\n+1825954\t|\t96390\t|\n+1825955\t|\t155672\t|\n+1825956\t|\t2100102\t|\n'
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/test-db/names.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/names.dmp Mon Jun 27 11:03:22 2022 +0000
[
@@ -0,0 +1,74 @@
+83333 | Escherichia coli K-12 | | scientific name |
+83333 | Escherichia coli K12 | | equivalent name |
+562 | "Bacillus coli" Migula 1895 | | authority |
+562 | "Bacterium coli commune" Escherich 1885 | | authority |
+562 | "Bacterium coli" (Migula 1895) Lehmann and Neumann 1896 | | authority |
+562 | ATCC 11775 | | type material |
+562 | Bacillus coli | | synonym |
+562 | Bacterium coli | | synonym |
+562 | Bacterium coli commune | | synonym |
+562 | CCUG 24 | | type material |
+562 | CCUG 29300 | | type material |
+562 | CIP 54.8 | | type material |
+562 | DSM 30083 | | type material |
+562 | Enterococcus coli | | synonym |
+562 | Escherchia coli | | misspelling |
+562 | Escherichia coli | | scientific name |
+562 | Escherichia coli (Migula 1895) Castellani and Chalmers 1919 | | authority |
+562 | Escherichia sp. MAR | | includes |
+562 | Escherichia/Shigella coli | | equivalent name |
+562 | Eschericia coli | | misspelling |
+562 | JCM 1649 | | type material |
+562 | LMG 2092 | | type material |
+562 | NBRC 102203 | | type material |
+562 | NCCB 54008 | | type material |
+562 | NCTC 9001 | | type material |
+562 | bacterium 10a | | includes |
+562 | bacterium E3 | | includes |
+561 | Escherchia | | misspelling |
+561 | Escherichia | | scientific name |
+561 | Escherichia Castellani and Chalmers 1919 | | authority |
+543 | Enterobacteraceae | | synonym |
+543 | Enterobacteraceae (ex Lapage 1979) Lapage 1982, fam. nov., nom. rev. | | synonym |
+543 | Enterobacteriaceae | | scientific name |
+543 | Enterobacteriaceae (ex Rahn 1937) Ewing et al. 1980, fam. nov., nom. rev. | | synonym |
+543 | Enterobacteriaceae Rahn 1937 | | synonym |
+543 | gamma-3 proteobacteria | gamma-3 proteobacteria <#1> | in-part |
+91347 | 'Enterobacteriales' | | synonym |
+91347 | Enterobacteriaceae and related endosymbionts | | synonym |
+91347 | Enterobacteriaceae group | | synonym |
+91347 | Enterobacteriales | | scientific name |
+91347 | enterobacteria | enterobacteria<blast91347> | blast name |
+91347 | gamma-3 proteobacteria | gamma-3 proteobacteria <#5> | in-part |
+1236 | Gammaproteobacteria | | scientific name |
+1236 | Gammaproteobacteria Garrity et al. 2005 | | synonym |
+1236 | Proteobacteria gamma subdivision | | synonym |
+1236 | Purple bacteria, gamma subdivision | | synonym |
+1236 | g-proteobacteria | gamma proteos<blast1236> | blast name |
+1236 | gamma proteobacteria | | synonym |
+1236 | gamma subdivision | | synonym |
+1236 | gamma subgroup | | synonym |
+1224 | Proteobacteria | | scientific name |
+1224 | Proteobacteria Garrity et al. 2005 | | authority |
+1224 | Proteobacteria [class] Stackebrandt et al. 1988 | | authority |
+1224 | not Proteobacteria Cavalier-Smith 2002 | | authority |
+1224 | proteobacteria | proteobacteria<blast1224> | blast name |
+1224 | purple bacteria | | common name |
+1224 | purple bacteria and relatives | | common name |
+1224 | purple non-sulfur bacteria | | common name |
+1224 | purple photosynthetic bacteria | | common name |
+1224 | purple photosynthetic bacteria and relatives | | common name |
+2 | Bacteria | Bacteria <prokaryote> | scientific name |
+2 | Monera | Monera <Bacteria> | in-part |
+2 | Procaryotae | Procaryotae <Bacteria> | in-part |
+2 | Prokaryota | Prokaryota <Bacteria> | in-part |
+2 | Prokaryotae | Prokaryotae <Bacteria> | in-part |
+2 | bacteria | bacteria <blast2> | blast name |
+2 | eubacteria | | genbank common name |
+2 | not Bacteria Haeckel 1894 | | synonym |
+2 | prokaryote | prokaryote <Bacteria> | in-part |
+2 | prokaryotes | prokaryotes <Bacteria> | in-part |
+1 | all | | synonym |
+1 | root | | scientific name |
+131567 | biota | | synonym |
+131567 | cellular organisms | | scientific name |
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/test-db/nodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/nodes.dmp Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,10 @@
+83333 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
+131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | |
+1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
b
diff -r 000000000000 -r 09b7b0b2e2c2 test-data/test-db/readme.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/readme.txt Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,61 @@
+*.dmp files are bcp-like dump from GenBank taxonomy database.
+
+General information.
+Field terminator is "\t|\t"
+Row terminator is "\t|\n"
+
+nodes.dmp file consists of taxonomy nodes. The description for each node includes the following
+fields:
+ tax_id -- node id in GenBank taxonomy database
+  parent tax_id -- parent node id in GenBank taxonomy database
+  rank -- rank of this node (superkingdom, kingdom, ...) 
+  embl code -- locus-name prefix; not unique
+  division id -- see division.dmp file
+  inherited div flag  (1 or 0) -- 1 if node inherits division from parent
+  genetic code id -- see gencode.dmp file
+  inherited GC  flag  (1 or 0) -- 1 if node inherits genetic code from parent
+  mitochondrial genetic code id -- see gencode.dmp file
+  inherited MGC flag  (1 or 0) -- 1 if node inherits mitochondrial gencode from parent
+  GenBank hidden flag (1 or 0)            -- 1 if name is suppressed in GenBank entry lineage
+  hidden subtree root flag (1 or 0)       -- 1 if this subtree has no sequence data yet
+  comments -- free-text comments and citations
+
+Taxonomy names file (names.dmp):
+ tax_id -- the id of node associated with this name
+ name_txt -- name itself
+ unique name -- the unique variant of this name if name not unique
+ name class -- (synonym, common name, ...)
+
+Divisions file (division.dmp):
+ division id -- taxonomy database division id
+ division cde -- GenBank division code (three characters)
+ division name -- e.g. BCT, PLN, VRT, MAM, PRI...
+ comments
+
+Genetic codes file (gencode.dmp):
+ genetic code id -- GenBank genetic code id
+ abbreviation -- genetic code name abbreviation
+ name -- genetic code name
+ cde -- translation table for this genetic code
+ starts -- start codons for this genetic code
+
+Deleted nodes file (delnodes.dmp):
+ tax_id -- deleted node id
+
+Merged nodes file (merged.dmp):
+ old_tax_id                              -- id of nodes which has been merged
+ new_tax_id                              -- id of nodes which is result of merging
+
+Citations file (citations.dmp):
+ cit_id -- the unique id of citation
+ cit_key -- citation key
+ pubmed_id -- unique id in PubMed database (0 if not in PubMed)
+ medline_id -- unique id in MedLine database (0 if not in MedLine)
+ url -- URL associated with citation
+ text -- any text (usually article name and authors).
+ -- The following characters are escaped in this text by a backslash:
+ -- newline (appear as "\n"),
+ -- tab character ("\t"),
+ -- double quotes ('\"'),
+ -- backslash character ("\\").
+ taxid_list -- list of node ids separated by a single space
b
diff -r 000000000000 -r 09b7b0b2e2c2 tool-data/ncbi_taxonomy.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ncbi_taxonomy.loc.sample Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,2 @@
+#value name path
+test-db-2022 "Test Database"  ${__HERE__}/test-db
b
diff -r 000000000000 -r 09b7b0b2e2c2 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+        <!-- Locations of taxonomy data downloaded from NCBI -->
+    <table name="ncbi_taxonomy" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/ncbi_taxonomy.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 09b7b0b2e2c2 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Mon Jun 27 11:03:22 2022 +0000
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+        <!-- Locations of taxonomy data downloaded from NCBI -->
+    <table name="ncbi_taxonomy" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/ncbi_taxonomy.loc" />
+    </table>
+</tables>