Repository 'biobox_add_taxid'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/biobox_add_taxid

Changeset 0:1e10251b9615 (2024-08-25)
Next changeset 1:d368e312aebd (2024-08-30)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/amber/ commit 91090b1565baeddbd3c96e74bc08a37990fafd3a
added:
biobox_add_taxid.xml
macros.xml
test-data/cami2_mouse_gut_average_genome_coverage.tsv
test-data/elated_franklin_0
test-data/goofy_hypatia_2
test-data/gsa_mapping.binning
test-data/naughty_carson_2
test-data/ncbi_taxonomy.loc
test-data/test-db/delnodes.dmp
test-data/test-db/division.dmp
test-data/test-db/gc.prt
test-data/test-db/gencode.dmp
test-data/test-db/merged.dmp
test-data/test-db/names.dmp
test-data/test-db/nodes.amber.ft
test-data/test-db/nodes.dmp
test-data/test-db/readme.txt
test-data/test_add.fasta.gz
test-data/test_add.tsv
test-data/test_add1.fasta
test-data/test_add2.fasta
test-data/test_bat_file.tsv
test-data/test_binning.tsv
test-data/test_binning2.tsv
test-data/test_biobox_file.tsv
test-data/test_biobox_file_add_taxid_bat.tsv
test-data/test_biobox_file_add_taxid_gtdb.tsv
test-data/test_gold.tsv
test-data/test_gtdb_file.tsv
test-data/test_gtdb_to_taxdump_file.tsv
test-data/test_taxonkit_file.tsv
test-data/unique_common.tsv
tool-data/ncbi_taxonomy.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 1e10251b9615 biobox_add_taxid.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/biobox_add_taxid.xml Sun Aug 25 13:19:14 2024 +0000
[
@@ -0,0 +1,119 @@
+<tool id="biobox_add_taxid" name="Biobox add taxid" version="@SCRIPT_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Add taxid output from BAT or GTDB to biobox binning data</description>
+    <macros>
+        <import>macros.xml</import>
+        <token name="@SCRIPT_VERSION@">0.4</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@SCRIPT_VERSION@">biobox_add_taxid</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+    <![CDATA[
+
+        mkdir -p input &&
+
+        ln -s '${biobox_file}' '$biobox_file.element_identifier' &&
+
+        #for $i, $f in enumerate($tool_type.input):
+            ln -s '$f' 'input/${i}.tsv' &&
+        #end for
+
+        biobox_add_taxid.py
+        '$biobox_file.element_identifier'
+        '$tool_type.is_select'
+        'input'
+        #if $tool_type.is_select == 'GTDB':
+            -g '$gtdb_to_taxdump'
+            -t '$taxonkit'
+            -c $column
+        #end if
+        &&
+
+        #if $tool_type.is_select == 'GTDB':
+            cp *_add_taxid_GTDB* $output
+        #else:
+            cp *_add_taxid_BAT* $output
+        #end if
+
+    ]]>
+    </command>
+    <inputs>
+        <param name="biobox_file" type="data" format="tabular" label="CAMI amber biobox file input" help="Input the CAMI amber biobox file here which are corespond with the biner and the BAT/GTDB output which did used the biner as input! "/>
+        <conditional name="tool_type">
+            <param name="is_select" type="select" label="Select the tool which output should be used here" help="Select BAT when you use the bin2classifier file(s) or select GTDB when using the summary file(s)">
+                <option value="BAT">BAT</option>
+                <option value="GTDB">GTDB</option>
+            </param>
+            <when value="GTDB">
+                <param argument="--gtdb_to_taxdump" type="data" format="tabular" label="Input the output from gtdb_to_taxdump here" help="Use the output from gtdb_to_taxdump here since we need the mapped names from GTDB to NCBI to get the arcoding taxids from NCBI"/>
+                <param argument="--taxonkit" type="data" format="tabular" label="Input the output from Taxonkit here" help="Use the output from Taxonkit here since the need the mapped NCBI names to the arcording taxids"/>
+                <param argument="--column" type="data_column" data_ref="taxonkit" label="Input the colum with the NCBI names"/>
+                <param name="input" type="data" multiple="true" format="tabular" label="Input the GTDB-Tk summary file(s) here"
+                    help="Use the GTDB-Tk file(s) which are coresponding with the binning file!"/>
+            </when>
+            <when value="BAT">
+                <param name="input" type="data" multiple="true" format="tabular" label="Input bin2classifier file(s) from BAT here"
+                    help="Use the BAT file(s) which are coresponding with the binning file!"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name}: BIOBOX ADD TAXID COLUMN"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="biobox_file" ftype="tabular" value="test_biobox_file.tsv"/>
+            <conditional name="tool_type">
+                <param name="is_select" value="BAT"/>
+                <param name="input" ftype="tabular" value="test_bat_file.tsv"/>
+            </conditional>
+            <output name="output" file="test_biobox_file_add_taxid_bat.tsv"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="biobox_file" ftype="tabular" value="test_biobox_file.tsv"/>
+            <conditional name="tool_type">
+                <param name="is_select" value="GTDB"/>
+                <param name="gtdb_to_taxdump" ftype="tabular" value="test_gtdb_to_taxdump_file.tsv"/>
+                <param name="taxonkit" ftype="tabular" value="test_taxonkit_file.tsv"/>
+                <param name="column" value="1"/>
+                <param name="input" ftype="tabular" value="test_gtdb_file.tsv"/>
+            </conditional>
+            <output name="output" file="test_biobox_file_add_taxid_gtdb.tsv"/>
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+        
+        **USAGE OF THIS TOOL**
+
+        This tool was desgin for using the BAT/GTDB-Tk output to expand the CAMI amber biobox file with the taxid column to access taxa evaluation with amber!
+
+        **IMPORTAND NOTE WHEN USING GTDB OUTPUT**
+
+        Befor using GTDB-Tk output you have to use 2 other tools to map the GTDB names to the NCBI names and then map the NCBI names to there taxids.
+        To map the GTDB names to the NCBI names you extract the GTDB names from the summary file and with them you can now use the gtdb_to_taxdum tool!
+        After using the gtdb_to_taxdum tool you have the extract the NCBI names here and then use the tool Taxonkit[name2taxid] to finaly get the taxids.
+
+        **INPUTS**
+        - The CAMI amber biobox file
+        - BAT[bin2classifier] file(s) OR GTDB-Tk[Summary] file(s)
+        - The gtdb_to_taxdum output[WHEN USING GTDB-Tk]
+        - The Taxonkit[name2taxid] output[WHEN USING GTDB-Tk]
+
+        **OUTPUT**
+
+        A CAMI amber biobox file with a taxid colum
+    ]]> 
+    </help>
+    <citations>
+        <citation type="bibtex">@misc{BibEntry2024Aug,
+            title = {{biobox{$\_$}add{$\_$}taxid}},
+            author = {Santino Faack (SantaMcCloud)},
+            journal = {GitHub},
+            year = {2024},
+            month = aug,
+            note = {[Online; accessed 22. Aug. 2024]},
+            url = {https://github.com/SantaMcCloud/biobox_add_taxid/tree/release-0.3}
+        }</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 1e10251b9615 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,17 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">cami-amber</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">2.0.4</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/gigascience/giy069</citation>
+            <yield/>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 test-data/cami2_mouse_gut_average_genome_coverage.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cami2_mouse_gut_average_genome_coverage.tsv Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,793 @@\n+@SampleID:gsa_pooled\n+@@GENOMEID\tCOVERAGE\n+4378740.0\t82.85111527272727\n+4378740.1\t27.159305090909097\n+denovo10559.0\t2.1596957142857143\n+179927.0\t1.6946866666666667\n+denovo8373.1\t2.07144\n+136604.0\t9.150489565217391\n+denovo8373.0\t1.1413460000000002\n+269378.0\t8.051563333333332\n+190114.0\t18.253119629629627\n+228140.0\t3.078681818181818\n+135956.0\t121.52672015625001\n+259846.0\t12.298210588235296\n+162576.0\t9.57867191489362\n+184966.0\t14.461031521739134\n+609589.0\t1.5715442857142856\n+264155.1\t1.1258205000000001\n+264155.0\t1.7604114285714287\n+167420.0\t2.1672111111111114\n+3957916.0\t3.1968282352941175\n+336277.0\t26.192326190476187\n+denovo4860.0\t1.7104116666666669\n+228785.0\t1.4085062500000003\n+173916.0\t5.575463703703704\n+129394.0\t5.108499999999999\n+190547.0\t3.5334664705882353\n+230268.0\t2.673463809523809\n+851733.0\t153.3585745901639\n+189840.0\t10.205699821428576\n+182712.1\t2.2765548387096772\n+182712.0\t2.175317096774194\n+3919797.0\t4.9489\n+189789.0\t1.5305385714285715\n+187400.0\t3.648125882352941\n+187400.1\t1.1916073529411764\n+denovo3138.0\t1.8725216666666669\n+180535.0\t2.469453809523809\n+164237.0\t15.727489782608691\n+665703.0\t4.169013333333334\n+1105328.0\t1.8377339285714287\n+denovo404.0\t1.5360288888888889\n+193680.1\t1.8556033333333335\n+174824.0\t17.01344791666667\n+180944.0\t17.386915312499998\n+322900.0\t1.2745033333333335\n+322900.1\t1.8254233333333332\n+134065.1\t3.4375706666666668\n+134065.0\t6.014496444444443\n+330296.0\t1.5970694736842104\n+180972.0\t6.945454117647059\n+463794.1\t53.0255842857143\n+463794.0\t3.409252857142857\n+318764.0\t10.406176136363635\n+649107.1\t1.613303\n+649107.0\t1.284263\n+831178.0\t5.336312666666667\n+1107303.0\t11.12115\n+133892.0\t2.8669129411764707\n+259983.0\t2.4335405882352945\n+267042.0\t6.82141923076923\n+2883968.1\t1.7986943999999998\n+2883968.0\t1.8012223999999994\n+denovo6222.0\t2.4582062500000004\n+263687.0\t4.310272888888889\n+178480.0\t5.468152058823529\n+175758.0\t6.541879333333333\n+271185.0\t1.3544850000000002\n+271185.1\t1.2028362499999998\n+268844.0\t2.0753652380952383\n+268844.1\t1.3869676190476188\n+197875.0\t14.972434799999997\n+269125.1\t1.1814268181818184\n+269125.0\t2.3737795454545454\n+289627.0\t1.515152222222222\n+denovo1457.0\t2.2307066666666664\n+193680.0\t1.742774\n+denovo2280.0\t4.200409743589743\n+331878.0\t1.3200399999999999\n+637934.0\t1.89482\n+denovo4635.0\t1.7092482352941178\n+4372003.0\t16.02861487804878\n+4380294.0\t2.040776923076923\n+denovo2990.1\t1.1583124999999999\n+denovo2990.0\t1.3709716666666667\n+70830.1\t1.654118\n+70830.0\t3.399687\n+460611.0\t35.80852840909092\n+173965.0\t2.793195\n+4449518.0\t40.59577175\n+288521.0\t2.0466306060606065\n+269828.0\t1.5252745\n+197644.1\t1.35091\n+197644.0\t1.1274035714285715\n+4476780.0\t6.827021470588235\n+denovo11208.0\t1.878425\n+1992.0\t6.613449318181819\n+denovo12065.0\t1.1252816666666665\n+1033413.0\t2.905784\n+174573.0\t46.430335714285725\n+335965.0\t1.8228435294117644\n+175485.0\t1.3735716666666666\n+4094866.0\t2.642081111111111\n+denovo6432.0\t1.4932475\n+3141094.0\t1.425924375\n+4390319.0\t1.7217578571428571\n+339549.0\t1.8419992592592591\n+denovo10472.0\t1.389072\n+denovo3780.1\t1.1407775\n+denovo3780.0\t2.5409425\n+575041.0\t1.3064888888888886\n+denovo2874.0\t1.7649675\n+162639.0\t2.5080925925925923\n+47365.0\t36.200065714285714\n+188610.0\t4.44078\n+denovo8413.0\t2.38694\n+4339832.1\t1.3861775\n+denovo717.1\t1.7312518181818182\n+177917.0\t1.8002877272727273\n+192316.0\t8.26455279069768\n+192316.1\t3.217902325581395\n+1756274.0\t1.77177\n+163997.0\t1.8599438888888888\n+denovo4995.1\t1.1159\n+denovo10319.0\t1.54661\n+190673.0\t5.8158084\n+denovo1211.0\t2.1048324999999997\n+4390319.1\t1.1700242857142857\n+178546.0\t3.74888448275862\n+171232.0\t60.66539861111112\n+276484.0\t36.12193222222223\n+174805.0\t2.2008216666666667\n+denovo4995.0\t1.5272266666666665\n+denovo2465.0\t2.505867142857143\n+denovo12065.1\t1.392035\n+4455308.0\t3.4237583333333337\n+denovo159.0\t1.840666\n+306306.0\t1.7133856666666667\n+4397402.0\t16.11684301886792\n+denovo9105.0\t2.974808888888889\n+4339832.0\t1.1458575\n+4340358.0\t3.9148617948717943\n+denovo7143.0\t2.8229515789473685\n+denovo12129.0\t2.9517788888888887\n+denovo1842.0\t1.617959999'..b'4783333333334\n+denovo944.0\t1.5240216666666666\n+denovo7784.1\t1.2434660000000002\n+denovo1684.0\t1.52699\n+denovo2276.0\t1.282336\n+denovo2276.1\t1.9411040000000002\n+denovo12370.0\t2.5710957894736843\n+denovo2333.0\t1.4894864285714284\n+328617.0\t1.3604233333333333\n+4464113.0\t1.14693875\n+4464113.1\t1.47087875\n+denovo4399.0\t1.4359316666666666\n+3016478.0\t2.008706\n+169845.0\t6.580205\n+437137.0\t2.7682249999999997\n+4444771.0\t1.3548569230769232\n+denovo10694.0\t1.363445\n+denovo4276.0\t1.4869480000000002\n+1678333.0\t1.408765\n+1678333.1\t3.8684187499999996\n+180869.0\t3.258762222222222\n+denovo4266.0\t1.7358233333333333\n+271602.0\t2.134602\n+270984.0\t1.3818425\n+denovo5877.0\t1.661642857142857\n+324894.1\t1.22871125\n+1871227.0\t2.63583125\n+4417539.0\t22.10454333333334\n+358007.1\t1.2915514285714285\n+358007.0\t1.2368914285714288\n+denovo5418.0\t1.6863737499999998\n+denovo8991.0\t1.4045477777777777\n+denovo329.0\t1.5336966666666667\n+327900.0\t1.9920683333333336\n+denovo7795.0\t1.3862385714285714\n+1109559.0\t5.674169999999999\n+denovo9496.0\t1.41716\n+324894.0\t1.36645\n+denovo5602.0\t2.5779677777777783\n+175712.0\t1.439605\n+181961.0\t5.828380714285713\n+193418.0\t2.3771136363636365\n+193418.1\t3.1666690909090907\n+4444771.1\t1.6866776923076923\n+258485.0\t1.9761415384615382\n+258485.1\t1.3308069230769233\n+184470.0\t1.633547\n+14094.0\t19.643514999999997\n+denovo1235.0\t1.632027142857143\n+267451.0\t7.209709999999999\n+denovo2950.0\t2.235615\n+132338.0\t1.76194\n+denovo4689.0\t1.776592857142857\n+262642.0\t2.8247864285714286\n+denovo8298.0\t26.416171666666667\n+denovo8298.1\t4.104799999999999\n+denovo9942.1\t1.5720314285714285\n+denovo9942.0\t1.132172857142857\n+262326.0\t1.506686923076923\n+denovo6995.0\t1.259812857142857\n+denovo6995.1\t1.4129157142857143\n+270448.1\t1.547211111111111\n+270448.0\t1.14935\n+4327881.0\t4.055258\n+174358.0\t2.8452849999999996\n+321902.0\t1.6972900000000002\n+132929.0\t2.5845619999999996\n+262917.0\t9.391\n+denovo1064.0\t3.830295\n+denovo13345.0\t2.4504333333333332\n+262326.1\t1.8635176923076922\n+262104.0\t5.03491\n+10512.0\t5.898263333333333\n+4365109.0\t8.2298475\n+184925.0\t3.50690875\n+denovo8648.0\t2.15935125\n+176850.0\t13.794300399999997\n+324985.0\t1.42818\n+denovo3289.1\t1.269956\n+denovo3289.0\t1.281234\n+4439360.0\t1.3851820000000001\n+269546.0\t2.0251725\n+827195.0\t1.6229333333333331\n+denovo10247.0\t1.2836083333333332\n+denovo10247.1\t1.5977866666666665\n+denovo12921.0\t2.036295714285714\n+denovo9305.0\t2.120885\n+3377773.0\t1.5157440000000002\n+denovo8150.0\t1.669357142857143\n+denovo420.0\t2.1933128571428573\n+denovo1369.0\t1.8959074999999999\n+denovo9737.0\t1.617026\n+661259.1\t1.566606\n+661259.0\t3.020904\n+denovo2181.1\t1.1970625\n+denovo2181.0\t1.3529841666666667\n+392918.0\t3.14006\n+259087.0\t1.6471485714285714\n+4413214.0\t2.6894292857142856\n+334395.0\t1.8571783333333334\n+4355314.0\t1.6827836363636364\n+276236.0\t1.771365\n+denovo6811.0\t1.7322949999999997\n+271230.0\t3.20669\n+denovo9575.0\t1.6087300000000002\n+4379247.0\t1.6002522222222224\n+denovo4069.0\t3.776562\n+3029968.0\t3.6485949999999994\n+164664.0\t1.533347142857143\n+denovo8312.0\t2.4125924999999997\n+261315.0\t1.3815999999999997\n+261315.1\t1.11611875\n+995147.0\t1.8867720000000001\n+denovo3118.0\t1.3638081818181818\n+denovo5222.0\t2.2673900000000002\n+denovo5222.1\t1.5511666666666666\n+4315396.0\t4.306291666666666\n+denovo1719.0\t3.392265\n+denovo6615.1\t1.4909085714285712\n+denovo6615.0\t1.23878\n+261373.0\t1.6409980000000002\n+denovo1277.0\t2.01133\n+4365130.0\t2.6612250000000004\n+denovo8808.1\t13.514236\n+denovo8808.0\t20.840552\n+264225.1\t1.5198928571428572\n+264225.0\t1.113217142857143\n+denovo2529.0\t4.42105\n+4336940.0\t2.0403977777777778\n+316381.0\t2.1045225000000003\n+denovo1739.0\t1.620848888888889\n+denovo10769.0\t1.6201025\n+188299.0\t1.6661728571428571\n+833816.0\t6.542272857142858\n+denovo7482.0\t1.7217740000000001\n+192684.0\t1.4398116666666667\n+denovo9308.0\t1.8554099999999998\n+denovo12977.0\t1.598456\n+832848.0\t3.6251450000000003\n+839200.0\t2.013764\n+denovo5086.0\t1.5241740000000001\n+816702.0\t2.17915\n+191251.0\t1.6226375000000002\n+denovo6433.0\t1.676135\n+268099.0\t1.4233933333333333\n+556126.0\t1.91262\n+denovo7014.0\t1.760235\n+782953.0\t3.75117\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/elated_franklin_0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/elated_franklin_0 Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,5680 @@\n+@Version:0.9.0\n+@SampleID:CAMI_low\n+@@SEQUENCEID\tBINID\n+RL|S1|C7433\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C8350\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C13557\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C11124\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C1599\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C2428\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C10143\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C16047\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C8738\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C10539\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C2706\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C14243\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C12558\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C17887\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C14870\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C11005\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C4642\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C13291\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.1.fa\n+RL|S1|C8674\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.10.fa\n+RL|S1|C8023\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.10.fa\n+RL|S1|C1203\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.10.fa\n+RL|S1|C60\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.10.fa\n+RL|S1|C16829\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.10.fa\n+RL|S1|C4203\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.10.fa\n+RL|S1|C9605\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.10.fa\n+RL|S1|'..b'bly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C1192\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C18480\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C9199\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C8810\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C7829\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C5607\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C919\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C3296\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C5475\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C328\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C9205\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C4046\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C8043\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C15302\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C18097\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C17450\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C18322\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C19115\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C7707\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C6687\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C5294\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C11379\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C11166\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C7045\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n+RL|S1|C10012\tBINS/CAMI_low_RL_S001__insert_270_GoldStandardAssembly.fasta.metabat-bins-_--minContig_1500_--minSamples_5_--numThreads_8_--superspecific.9.fa\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/goofy_hypatia_2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/goofy_hypatia_2 Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,8440 @@\n+#CAMI concoct default\n+@SampleID:CAMI_low\n+@Version:0.9.0\n+@@SEQUENCEID\tBINID\n+RL|S1|C10\t30\n+RL|S1|C100\t5\n+RL|S1|C1000\t30\n+RL|S1|C10000\t4\n+RL|S1|C10002\t8\n+RL|S1|C10004\t11\n+RL|S1|C10008\t30\n+RL|S1|C10011\t11\n+RL|S1|C10012\t16\n+RL|S1|C10014\t8\n+RL|S1|C10015\t14\n+RL|S1|C10017\t30\n+RL|S1|C1002\t5\n+RL|S1|C10020\t11\n+RL|S1|C10022\t30\n+RL|S1|C10026\t30\n+RL|S1|C10028\t30\n+RL|S1|C1003\t0\n+RL|S1|C10032\t1\n+RL|S1|C10034\t18\n+RL|S1|C10036\t33\n+RL|S1|C10037\t17\n+RL|S1|C10039\t5\n+RL|S1|C1004\t25\n+RL|S1|C10040\t11\n+RL|S1|C10041\t8\n+RL|S1|C10046\t17\n+RL|S1|C10047\t30\n+RL|S1|C10052\t5\n+RL|S1|C10056\t14\n+RL|S1|C10059\t14\n+RL|S1|C1006\t30\n+RL|S1|C10060\t30\n+RL|S1|C10069\t17\n+RL|S1|C1007\t18\n+RL|S1|C10070\t24\n+RL|S1|C10071\t28\n+RL|S1|C10073\t30\n+RL|S1|C10075\t30\n+RL|S1|C10080\t18\n+RL|S1|C10083\t30\n+RL|S1|C10084\t30\n+RL|S1|C10091\t14\n+RL|S1|C10093\t30\n+RL|S1|C10095\t11\n+RL|S1|C10096\t30\n+RL|S1|C10098\t12\n+RL|S1|C101\t14\n+RL|S1|C1010\t11\n+RL|S1|C10103\t30\n+RL|S1|C10104\t30\n+RL|S1|C10108\t18\n+RL|S1|C10109\t14\n+RL|S1|C1011\t5\n+RL|S1|C10110\t11\n+RL|S1|C10113\t5\n+RL|S1|C10117\t11\n+RL|S1|C10118\t16\n+RL|S1|C10122\t9\n+RL|S1|C10124\t17\n+RL|S1|C10127\t17\n+RL|S1|C10128\t5\n+RL|S1|C10129\t11\n+RL|S1|C1013\t30\n+RL|S1|C10132\t5\n+RL|S1|C10136\t18\n+RL|S1|C10143\t2\n+RL|S1|C1015\t11\n+RL|S1|C10150\t33\n+RL|S1|C10153\t17\n+RL|S1|C10154\t1\n+RL|S1|C10157\t11\n+RL|S1|C10162\t30\n+RL|S1|C10168\t30\n+RL|S1|C10170\t8\n+RL|S1|C10171\t30\n+RL|S1|C10173\t30\n+RL|S1|C10175\t30\n+RL|S1|C10179\t3\n+RL|S1|C10183\t11\n+RL|S1|C10184\t14\n+RL|S1|C10187\t8\n+RL|S1|C10188\t11\n+RL|S1|C1019\t4\n+RL|S1|C10193\t11\n+RL|S1|C10194\t15\n+RL|S1|C10197\t30\n+RL|S1|C102\t11\n+RL|S1|C10204\t5\n+RL|S1|C10207\t30\n+RL|S1|C10208\t30\n+RL|S1|C10209\t14\n+RL|S1|C10210\t30\n+RL|S1|C10212\t5\n+RL|S1|C10213\t9\n+RL|S1|C1022\t30\n+RL|S1|C10221\t30\n+RL|S1|C10223\t14\n+RL|S1|C10224\t8\n+RL|S1|C10225\t30\n+RL|S1|C10226\t30\n+RL|S1|C10229\t5\n+RL|S1|C1023\t14\n+RL|S1|C10230\t17\n+RL|S1|C10234\t8\n+RL|S1|C10235\t30\n+RL|S1|C10236\t30\n+RL|S1|C10237\t28\n+RL|S1|C10240\t30\n+RL|S1|C10242\t9\n+RL|S1|C10243\t11\n+RL|S1|C10244\t11\n+RL|S1|C10246\t30\n+RL|S1|C10249\t4\n+RL|S1|C1025\t31\n+RL|S1|C10251\t8\n+RL|S1|C10252\t8\n+RL|S1|C10255\t11\n+RL|S1|C10258\t30\n+RL|S1|C10259\t11\n+RL|S1|C1026\t30\n+RL|S1|C10261\t18\n+RL|S1|C10262\t11\n+RL|S1|C10266\t5\n+RL|S1|C10267\t30\n+RL|S1|C10268\t11\n+RL|S1|C10269\t11\n+RL|S1|C1027\t7\n+RL|S1|C10271\t30\n+RL|S1|C10273\t14\n+RL|S1|C10277\t1\n+RL|S1|C10278\t11\n+RL|S1|C10279\t5\n+RL|S1|C10280\t8\n+RL|S1|C10286\t30\n+RL|S1|C10287\t5\n+RL|S1|C10289\t18\n+RL|S1|C1029\t30\n+RL|S1|C10290\t5\n+RL|S1|C10294\t11\n+RL|S1|C10295\t30\n+RL|S1|C10297\t11\n+RL|S1|C10298\t1\n+RL|S1|C10299\t30\n+RL|S1|C1030\t14\n+RL|S1|C10302\t11\n+RL|S1|C10303\t30\n+RL|S1|C10306\t30\n+RL|S1|C10307\t8\n+RL|S1|C10308\t18\n+RL|S1|C10309\t11\n+RL|S1|C10311\t8\n+RL|S1|C10313\t17\n+RL|S1|C10315\t31\n+RL|S1|C10319\t30\n+RL|S1|C1032\t18\n+RL|S1|C10322\t14\n+RL|S1|C10323\t11\n+RL|S1|C10325\t18\n+RL|S1|C10328\t30\n+RL|S1|C10330\t5\n+RL|S1|C10332\t5\n+RL|S1|C10333\t8\n+RL|S1|C10335\t18\n+RL|S1|C10337\t8\n+RL|S1|C10349\t17\n+RL|S1|C1035\t11\n+RL|S1|C10350\t8\n+RL|S1|C10355\t30\n+RL|S1|C10356\t11\n+RL|S1|C10358\t11\n+RL|S1|C10359\t3\n+RL|S1|C10360\t14\n+RL|S1|C10361\t5\n+RL|S1|C10363\t33\n+RL|S1|C10364\t11\n+RL|S1|C10365\t30\n+RL|S1|C10366\t31\n+RL|S1|C10368\t5\n+RL|S1|C1037\t13\n+RL|S1|C10373\t9\n+RL|S1|C10374\t13\n+RL|S1|C10379\t24\n+RL|S1|C10383\t26\n+RL|S1|C10386\t30\n+RL|S1|C10388\t14\n+RL|S1|C1039\t11\n+RL|S1|C10390\t8\n+RL|S1|C10391\t30\n+RL|S1|C10392\t30\n+RL|S1|C10394\t30\n+RL|S1|C10395\t30\n+RL|S1|C10396\t5\n+RL|S1|C10397\t30\n+RL|S1|C10398\t30\n+RL|S1|C10399\t8\n+RL|S1|C104\t30\n+RL|S1|C1040\t8\n+RL|S1|C10408\t24\n+RL|S1|C10413\t8\n+RL|S1|C10414\t5\n+RL|S1|C10415\t14\n+RL|S1|C10417\t18\n+RL|S1|C10419\t5\n+RL|S1|C10420\t11\n+RL|S1|C10421\t8\n+RL|S1|C10422\t5\n+RL|S1|C10424\t1\n+RL|S1|C10426\t30\n+RL|S1|C10428\t8\n+RL|S1|C10430\t16\n+RL|S1|C10431\t8\n+RL|S1|C10432\t14\n+RL|S1|C10434\t30\n+RL|S1|C10436\t13\n+RL|S1|C10437\t30\n+RL|S1|C10438\t13\n+RL|S1|C10439\t30\n+RL|S1|C1044\t30\n+RL|S1|C10440\t5\n+RL|S1|C10442\t8\n+RL|S1|C10447\t8\n+RL|S1|C1045\t30\n+RL|S1|C10450\t9\n+RL|S1|C10453\t11\n+RL|S1|C10454\t22\n+RL|S1|C10455\t31\n+RL|S1|C10456\t24\n+RL|S1|C10457\t5\n+RL|S1|C10458\t8\n+RL|S1|C10459\t4\n+RL|S1|C1046\t14\n+RL|S1|C10461\t30\n+RL|S1|C10463\t8\n+RL|S1|C10464\t8\n+RL|S1|C1047\t3'..b'\t28\n+RL|S1|C9474\t11\n+RL|S1|C9475\t8\n+RL|S1|C9478\t8\n+RL|S1|C9479\t9\n+RL|S1|C9482\t8\n+RL|S1|C9489\t11\n+RL|S1|C9490\t32\n+RL|S1|C9492\t11\n+RL|S1|C9493\t17\n+RL|S1|C9494\t11\n+RL|S1|C9498\t9\n+RL|S1|C9499\t30\n+RL|S1|C95\t8\n+RL|S1|C950\t30\n+RL|S1|C9504\t8\n+RL|S1|C9505\t14\n+RL|S1|C9507\t30\n+RL|S1|C9508\t11\n+RL|S1|C9511\t30\n+RL|S1|C9514\t12\n+RL|S1|C9516\t15\n+RL|S1|C9517\t14\n+RL|S1|C952\t8\n+RL|S1|C9520\t18\n+RL|S1|C9521\t8\n+RL|S1|C9525\t24\n+RL|S1|C9527\t12\n+RL|S1|C953\t5\n+RL|S1|C9531\t17\n+RL|S1|C9537\t31\n+RL|S1|C9540\t30\n+RL|S1|C9541\t5\n+RL|S1|C9544\t11\n+RL|S1|C9545\t1\n+RL|S1|C9546\t11\n+RL|S1|C9547\t30\n+RL|S1|C9550\t30\n+RL|S1|C9555\t10\n+RL|S1|C9557\t14\n+RL|S1|C9558\t14\n+RL|S1|C9559\t14\n+RL|S1|C956\t8\n+RL|S1|C9561\t11\n+RL|S1|C9563\t8\n+RL|S1|C9567\t8\n+RL|S1|C9568\t8\n+RL|S1|C9569\t17\n+RL|S1|C957\t30\n+RL|S1|C9570\t11\n+RL|S1|C9571\t30\n+RL|S1|C9573\t18\n+RL|S1|C9576\t5\n+RL|S1|C958\t31\n+RL|S1|C9580\t8\n+RL|S1|C9581\t11\n+RL|S1|C9582\t5\n+RL|S1|C9583\t33\n+RL|S1|C9586\t14\n+RL|S1|C9588\t30\n+RL|S1|C9589\t11\n+RL|S1|C9590\t30\n+RL|S1|C9594\t30\n+RL|S1|C9595\t18\n+RL|S1|C9597\t32\n+RL|S1|C9603\t30\n+RL|S1|C9604\t31\n+RL|S1|C9605\t13\n+RL|S1|C9608\t18\n+RL|S1|C961\t11\n+RL|S1|C9610\t11\n+RL|S1|C9613\t11\n+RL|S1|C9614\t30\n+RL|S1|C9618\t30\n+RL|S1|C9619\t30\n+RL|S1|C9622\t8\n+RL|S1|C9624\t9\n+RL|S1|C9625\t30\n+RL|S1|C9626\t8\n+RL|S1|C9627\t9\n+RL|S1|C9628\t11\n+RL|S1|C9632\t30\n+RL|S1|C9634\t11\n+RL|S1|C9636\t11\n+RL|S1|C9639\t14\n+RL|S1|C964\t30\n+RL|S1|C9642\t8\n+RL|S1|C9643\t31\n+RL|S1|C9645\t30\n+RL|S1|C9647\t30\n+RL|S1|C9649\t1\n+RL|S1|C965\t23\n+RL|S1|C9653\t8\n+RL|S1|C9657\t8\n+RL|S1|C9659\t8\n+RL|S1|C9660\t11\n+RL|S1|C9661\t8\n+RL|S1|C9664\t30\n+RL|S1|C9667\t8\n+RL|S1|C967\t14\n+RL|S1|C9671\t13\n+RL|S1|C9672\t5\n+RL|S1|C9675\t30\n+RL|S1|C9678\t30\n+RL|S1|C9680\t30\n+RL|S1|C9681\t11\n+RL|S1|C9683\t14\n+RL|S1|C9685\t33\n+RL|S1|C9688\t17\n+RL|S1|C9691\t30\n+RL|S1|C9692\t12\n+RL|S1|C9693\t33\n+RL|S1|C9696\t30\n+RL|S1|C9698\t30\n+RL|S1|C970\t30\n+RL|S1|C9700\t14\n+RL|S1|C9706\t30\n+RL|S1|C9708\t8\n+RL|S1|C9712\t8\n+RL|S1|C9713\t30\n+RL|S1|C9717\t11\n+RL|S1|C9719\t11\n+RL|S1|C9726\t30\n+RL|S1|C9732\t12\n+RL|S1|C9733\t14\n+RL|S1|C9734\t11\n+RL|S1|C9735\t15\n+RL|S1|C9736\t8\n+RL|S1|C9737\t8\n+RL|S1|C9740\t11\n+RL|S1|C9745\t8\n+RL|S1|C9747\t8\n+RL|S1|C9750\t19\n+RL|S1|C9752\t18\n+RL|S1|C9753\t18\n+RL|S1|C9754\t8\n+RL|S1|C9757\t30\n+RL|S1|C9762\t18\n+RL|S1|C9763\t8\n+RL|S1|C9765\t14\n+RL|S1|C9766\t27\n+RL|S1|C9768\t11\n+RL|S1|C977\t30\n+RL|S1|C9774\t11\n+RL|S1|C9775\t17\n+RL|S1|C9776\t33\n+RL|S1|C9777\t17\n+RL|S1|C9778\t30\n+RL|S1|C9779\t11\n+RL|S1|C9780\t12\n+RL|S1|C9782\t11\n+RL|S1|C9783\t8\n+RL|S1|C9784\t11\n+RL|S1|C9785\t28\n+RL|S1|C9786\t18\n+RL|S1|C9789\t11\n+RL|S1|C9792\t8\n+RL|S1|C9793\t5\n+RL|S1|C9796\t11\n+RL|S1|C9798\t30\n+RL|S1|C9799\t8\n+RL|S1|C980\t30\n+RL|S1|C9801\t8\n+RL|S1|C9803\t11\n+RL|S1|C9805\t30\n+RL|S1|C9806\t8\n+RL|S1|C9807\t11\n+RL|S1|C9809\t30\n+RL|S1|C9810\t11\n+RL|S1|C9815\t30\n+RL|S1|C9816\t30\n+RL|S1|C9818\t8\n+RL|S1|C9819\t27\n+RL|S1|C982\t30\n+RL|S1|C9823\t30\n+RL|S1|C9824\t30\n+RL|S1|C9825\t11\n+RL|S1|C9826\t5\n+RL|S1|C983\t12\n+RL|S1|C9832\t30\n+RL|S1|C9834\t9\n+RL|S1|C9836\t11\n+RL|S1|C9837\t30\n+RL|S1|C9839\t14\n+RL|S1|C9841\t8\n+RL|S1|C9846\t17\n+RL|S1|C985\t8\n+RL|S1|C9851\t18\n+RL|S1|C9853\t11\n+RL|S1|C9855\t30\n+RL|S1|C9856\t14\n+RL|S1|C9857\t8\n+RL|S1|C9858\t12\n+RL|S1|C986\t30\n+RL|S1|C9863\t4\n+RL|S1|C9868\t8\n+RL|S1|C9869\t16\n+RL|S1|C9871\t11\n+RL|S1|C9873\t12\n+RL|S1|C9877\t8\n+RL|S1|C9879\t7\n+RL|S1|C9882\t8\n+RL|S1|C9883\t5\n+RL|S1|C9885\t18\n+RL|S1|C9890\t30\n+RL|S1|C9891\t18\n+RL|S1|C9892\t8\n+RL|S1|C9894\t27\n+RL|S1|C9895\t27\n+RL|S1|C9897\t8\n+RL|S1|C9898\t5\n+RL|S1|C9899\t11\n+RL|S1|C99\t11\n+RL|S1|C9901\t5\n+RL|S1|C9902\t22\n+RL|S1|C9904\t4\n+RL|S1|C9905\t8\n+RL|S1|C9907\t30\n+RL|S1|C9909\t14\n+RL|S1|C9910\t30\n+RL|S1|C9912\t11\n+RL|S1|C9913\t30\n+RL|S1|C9915\t30\n+RL|S1|C9917\t18\n+RL|S1|C992\t8\n+RL|S1|C9924\t8\n+RL|S1|C9925\t8\n+RL|S1|C9927\t30\n+RL|S1|C9928\t8\n+RL|S1|C9932\t9\n+RL|S1|C9941\t18\n+RL|S1|C9943\t30\n+RL|S1|C9947\t31\n+RL|S1|C9951\t30\n+RL|S1|C9956\t11\n+RL|S1|C9958\t8\n+RL|S1|C996\t15\n+RL|S1|C9963\t5\n+RL|S1|C9964\t11\n+RL|S1|C9965\t14\n+RL|S1|C9967\t17\n+RL|S1|C9970\t11\n+RL|S1|C9972\t11\n+RL|S1|C9973\t8\n+RL|S1|C9974\t5\n+RL|S1|C9976\t30\n+RL|S1|C9979\t8\n+RL|S1|C998\t8\n+RL|S1|C9984\t18\n+RL|S1|C9986\t30\n+RL|S1|C9987\t30\n+RL|S1|C9988\t30\n+RL|S1|C9990\t11\n+RL|S1|C9991\t8\n+RL|S1|C9994\t11\n+RL|S1|C9995\t5\n+RL|S1|C9997\t5\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/gsa_mapping.binning
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gsa_mapping.binning Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,19503 @@\n+@Version:0.9.1\n+@SampleID:CAMI_low\n+\n+@@SEQUENCEID\tBINID\t_LENGTH\n+RL|S1|C10817\tSample18_57\t20518\n+RL|S1|C11497\tSample22_57\t37672\n+RL|S1|C6571\tevo_1286_AP.033\t69914\n+RL|S1|C10560\tevo_1286_AP.033\t995657\n+RL|S1|C13546\tevo_1286_AP.033\t626775\n+RL|S1|C7390\tevo_1286_AP.033\t307287\n+RL|S1|C9605\tevo_1286_AP.033\t260633\n+RL|S1|C2337\tevo_1286_AP.033\t198840\n+RL|S1|C6899\tevo_1286_AP.033\t1141\n+RL|S1|C17063\tevo_1286_AP.033\t8496\n+RL|S1|C18596\tevo_1286_AP.033\t162263\n+RL|S1|C18150\tevo_1286_AP.033\t584\n+RL|S1|C6305\tevo_1286_AP.033\t158795\n+RL|S1|C1203\tevo_1286_AP.033\t194915\n+RL|S1|C4627\tevo_1286_AP.033\t54407\n+RL|S1|C702\tevo_1286_AP.033\t41852\n+RL|S1|C1928\tevo_1286_AP.033\t14239\n+RL|S1|C50\tevo_1286_AP.033\t1157\n+RL|S1|C18657\tevo_1286_AP.033\t1046\n+RL|S1|C18863\tevo_1286_AP.026\t70515\n+RL|S1|C14115\tevo_1286_AP.026\t1152509\n+RL|S1|C16829\tevo_1286_AP.026\t680165\n+RL|S1|C2013\tevo_1286_AP.026\t278809\n+RL|S1|C17254\tevo_1286_AP.026\t259780\n+RL|S1|C254\tevo_1286_AP.026\t168127\n+RL|S1|C10773\tevo_1286_AP.026\t1149\n+RL|S1|C7050\tevo_1286_AP.026\t8495\n+RL|S1|C7899\tevo_1286_AP.026\t163218\n+RL|S1|C17647\tevo_1286_AP.026\t594\n+RL|S1|C5284\tevo_1286_AP.026\t159261\n+RL|S1|C4841\tevo_1286_AP.026\t194513\n+RL|S1|C17586\tevo_1286_AP.026\t54414\n+RL|S1|C10374\tevo_1286_AP.026\t13916\n+RL|S1|C2878\tevo_1286_AP.026\t3269\n+RL|S1|C16502\tevo_1286_AP.026\t1156\n+RL|S1|C17522\tevo_1286_AP.026\t1048\n+RL|S1|C9894\t1036755\t134159\n+RL|S1|C16110\t1036755\t26817\n+RL|S1|C16018\t1036755\t343251\n+RL|S1|C1907\t1036755\t336391\n+RL|S1|C8157\t1036755\t296234\n+RL|S1|C18843\t1036755\t258302\n+RL|S1|C9330\t1036755\t239653\n+RL|S1|C16258\t1036755\t213990\n+RL|S1|C16354\t1036755\t213652\n+RL|S1|C4285\t1036755\t210921\n+RL|S1|C14455\t1036755\t156208\n+RL|S1|C8670\t1036755\t150320\n+RL|S1|C1152\t1036755\t60049\n+RL|S1|C5546\t1036755\t84466\n+RL|S1|C5748\t1036755\t140977\n+RL|S1|C2422\t1036755\t136556\n+RL|S1|C1409\t1036755\t132095\n+RL|S1|C17415\t1036755\t128018\n+RL|S1|C562\t1036755\t126726\n+RL|S1|C2330\t1036755\t121401\n+RL|S1|C5910\t1036755\t494\n+RL|S1|C13983\t1036755\t117146\n+RL|S1|C16410\t1036755\t113667\n+RL|S1|C11887\t1036755\t103196\n+RL|S1|C7755\t1036755\t3054\n+RL|S1|C7046\t1036755\t101545\n+RL|S1|C5208\t1036755\t94701\n+RL|S1|C8160\t1036755\t93996\n+RL|S1|C9766\t1036755\t92205\n+RL|S1|C5229\t1036755\t91166\n+RL|S1|C4512\t1036755\t86003\n+RL|S1|C3775\t1036755\t83170\n+RL|S1|C10490\t1036755\t1073\n+RL|S1|C8477\t1036755\t84080\n+RL|S1|C17860\t1036755\t81707\n+RL|S1|C6822\t1036755\t51796\n+RL|S1|C7429\t1036755\t29736\n+RL|S1|C5872\t1036755\t77171\n+RL|S1|C9895\t1036755\t76710\n+RL|S1|C16820\t1036755\t74605\n+RL|S1|C19174\t1036755\t71556\n+RL|S1|C11385\t1036755\t65101\n+RL|S1|C18700\t1036755\t64936\n+RL|S1|C15519\t1036755\t62839\n+RL|S1|C19312\t1036755\t59179\n+RL|S1|C17561\t1036755\t55270\n+RL|S1|C6546\t1036755\t51470\n+RL|S1|C8943\t1036755\t51286\n+RL|S1|C15261\t1036755\t47945\n+RL|S1|C660\t1036755\t46364\n+RL|S1|C199\t1036755\t46297\n+RL|S1|C121\t1036755\t43173\n+RL|S1|C7922\t1036755\t42491\n+RL|S1|C12353\t1036755\t42301\n+RL|S1|C12115\t1036755\t39094\n+RL|S1|C17259\t1036755\t36116\n+RL|S1|C5558\t1036755\t35446\n+RL|S1|C9819\t1036755\t34716\n+RL|S1|C15452\t1036755\t34397\n+RL|S1|C3281\t1036755\t32966\n+RL|S1|C5197\t1036755\t31018\n+RL|S1|C15571\t1036755\t30385\n+RL|S1|C12837\t1036755\t30294\n+RL|S1|C13458\t1036755\t30183\n+RL|S1|C703\t1036755\t29718\n+RL|S1|C4153\t1036755\t25253\n+RL|S1|C16440\t1036755\t24764\n+RL|S1|C1242\t1036755\t24490\n+RL|S1|C8453\t1036755\t21580\n+RL|S1|C1606\t1036755\t21335\n+RL|S1|C4585\t1036755\t21233\n+RL|S1|C4906\t1036755\t18237\n+RL|S1|C2894\t1036755\t15336\n+RL|S1|C15761\t1036755\t15392\n+RL|S1|C15447\t1036755\t15352\n+RL|S1|C795\t1036755\t15067\n+RL|S1|C7601\t1036755\t14817\n+RL|S1|C9023\t1036755\t13773\n+RL|S1|C8184\t1036755\t12763\n+RL|S1|C3003\t1036755\t12405\n+RL|S1|C6795\t1036755\t11843\n+RL|S1|C694\t1036755\t11545\n+RL|S1|C13338\t1036755\t11254\n+RL|S1|C16360\t1036755\t8397\n+RL|S1|C7888\t1036755\t7910\n+RL|S1|C13323\t1036755\t6977\n+RL|S1|C18851\t1036755\t6033\n+RL|S1|C14415\t1036755\t4680\n+RL|S1|C5820\t1036755\t4006\n+RL|S1|C2253\t1036755\t3336\n+RL|S1|C922\t1036755\t3147\n+RL|S1|C17273\t1036755\t2796\n+RL|S1|C680\t1036755\t2408\n+RL|S1|C5060\t1036755\t2198\n+RL|S1|C1782\t1036755\t2124\n+RL|S1|C5157\t1036755\t2112\n+RL|S1|C12222\t103675'..b'0919\n+RL|S1|C424\tevo_1049056.031\t9564\n+RL|S1|C4817\tevo_1049056.031\t6599\n+RL|S1|C2803\tevo_1049056.031\t6234\n+RL|S1|C1594\tevo_1049056.031\t6012\n+RL|S1|C3199\tevo_1049056.031\t5545\n+RL|S1|C17521\tevo_1049056.031\t3960\n+RL|S1|C4113\tevo_1049056.031\t2610\n+RL|S1|C57\tevo_1049056.031\t2389\n+RL|S1|C18020\tevo_1049056.031\t2346\n+RL|S1|C6672\tevo_1049056.031\t2214\n+RL|S1|C17127\tevo_1049056.031\t2076\n+RL|S1|C11378\tevo_1049056.031\t2041\n+RL|S1|C14633\tevo_1049056.031\t1895\n+RL|S1|C11238\tevo_1049056.031\t1494\n+RL|S1|C15889\tevo_1049056.031\t1421\n+RL|S1|C2730\tevo_1049056.031\t1275\n+RL|S1|C13529\tSample9_65\t4862\n+RL|S1|C10071\tSample9_65\t7461\n+RL|S1|C5242\t1053058\t272601\n+RL|S1|C10000\t1053058\t253048\n+RL|S1|C8113\t1053058\t248323\n+RL|S1|C2316\t1053058\t247019\n+RL|S1|C10818\t1053058\t242042\n+RL|S1|C17437\t1053058\t233932\n+RL|S1|C5818\t1053058\t200472\n+RL|S1|C7763\t1053058\t190976\n+RL|S1|C2691\t1053058\t189586\n+RL|S1|C12627\t1053058\t182671\n+RL|S1|C12813\t1053058\t181611\n+RL|S1|C8059\t1053058\t162117\n+RL|S1|C2333\t1053058\t157517\n+RL|S1|C7221\t1053058\t155007\n+RL|S1|C10643\t1053058\t141711\n+RL|S1|C2240\t1053058\t128620\n+RL|S1|C8459\t1053058\t115073\n+RL|S1|C15604\t1053058\t113320\n+RL|S1|C10249\t1053058\t107627\n+RL|S1|C6318\t1053058\t91424\n+RL|S1|C15437\t1053058\t85494\n+RL|S1|C1948\t1053058\t85441\n+RL|S1|C15200\t1053058\t73938\n+RL|S1|C19151\t1053058\t67790\n+RL|S1|C18447\t1053058\t65804\n+RL|S1|C7554\t1053058\t61799\n+RL|S1|C10459\t1053058\t54435\n+RL|S1|C12315\t1053058\t54282\n+RL|S1|C12566\t1053058\t52261\n+RL|S1|C8167\t1053058\t51845\n+RL|S1|C3964\t1053058\t51263\n+RL|S1|C13808\t1053058\t48250\n+RL|S1|C14311\t1053058\t47996\n+RL|S1|C5454\t1053058\t42812\n+RL|S1|C5232\t1053058\t38071\n+RL|S1|C5605\t1053058\t36622\n+RL|S1|C11994\t1053058\t33294\n+RL|S1|C6683\t1053058\t32703\n+RL|S1|C14519\t1053058\t30990\n+RL|S1|C8346\t1053058\t30653\n+RL|S1|C16581\t1053058\t30412\n+RL|S1|C8547\t1053058\t29458\n+RL|S1|C9199\t1053058\t28627\n+RL|S1|C19284\t1053058\t27543\n+RL|S1|C14364\t1053058\t25656\n+RL|S1|C9863\t1053058\t24264\n+RL|S1|C2114\t1053058\t22884\n+RL|S1|C16787\t1053058\t21704\n+RL|S1|C7715\t1053058\t21339\n+RL|S1|C9433\t1053058\t20839\n+RL|S1|C12841\t1053058\t20547\n+RL|S1|C290\t1053058\t19481\n+RL|S1|C16514\t1053058\t16419\n+RL|S1|C2332\t1053058\t16085\n+RL|S1|C10816\t1053058\t14669\n+RL|S1|C15540\t1053058\t14232\n+RL|S1|C7885\t1053058\t12434\n+RL|S1|C2317\t1053058\t11831\n+RL|S1|C269\t1053058\t11505\n+RL|S1|C2200\t1053058\t2138\n+RL|S1|C2728\t1053058\t7100\n+RL|S1|C16907\t1053058\t9207\n+RL|S1|C6145\t1053058\t9124\n+RL|S1|C5957\t1053058\t8701\n+RL|S1|C14541\t1053058\t8590\n+RL|S1|C10430\t1053058\t7522\n+RL|S1|C7174\t1053058\t7410\n+RL|S1|C15272\t1053058\t6251\n+RL|S1|C16909\t1053058\t6024\n+RL|S1|C1862\t1053058\t5882\n+RL|S1|C15246\t1053058\t5605\n+RL|S1|C10118\t1053058\t4822\n+RL|S1|C9147\t1053058\t4267\n+RL|S1|C4971\t1053058\t4027\n+RL|S1|C5607\t1053058\t3525\n+RL|S1|C18556\t1053058\t3326\n+RL|S1|C17832\t1053058\t3169\n+RL|S1|C15631\t1053058\t2592\n+RL|S1|C18519\t1053058\t2559\n+RL|S1|C16754\t1053058\t2447\n+RL|S1|C7334\t1053058\t2270\n+RL|S1|C13363\t1053058\t2271\n+RL|S1|C16269\t1053058\t2070\n+RL|S1|C1952\t1053058\t2042\n+RL|S1|C17071\t1053058\t1926\n+RL|S1|C4634\t1053058\t1626\n+RL|S1|C11892\t1053058\t1589\n+RL|S1|C17054\t1053058\t1586\n+RL|S1|C14756\t1053058\t1289\n+RL|S1|C8291\t1053058\t1234\n+RL|S1|C435\t1053058\t1223\n+RL|S1|C19089\t1053058\t1191\n+RL|S1|C17978\t1053058\t1177\n+RL|S1|C18379\t1053058\t1175\n+RL|S1|C3083\t1053058\t1021\n+RL|S1|C9869\t1053058\t1019\n+RL|S1|C15312\t1049089\t785947\n+RL|S1|C6421\t1049089\t777941\n+RL|S1|C19031\t1049089\t164316\n+RL|S1|C2849\t1049089\t441324\n+RL|S1|C6216\t1049089\t284772\n+RL|S1|C14058\t1049089\t122679\n+RL|S1|C12357\t1049089\t119931\n+RL|S1|C4286\t1049089\t73158\n+RL|S1|C1478\t1049089\t65193\n+RL|S1|C19376\t1049089\t62404\n+RL|S1|C13381\t1049089\t49792\n+RL|S1|C16012\t1049089\t39978\n+RL|S1|C6274\t1049089\t22507\n+RL|S1|C543\t1049089\t21101\n+RL|S1|C16689\t1049089\t20266\n+RL|S1|C9750\t1049089\t19732\n+RL|S1|C15516\t1049089\t13393\n+RL|S1|C18602\t1049089\t8352\n+RL|S1|C6687\t1049089\t8049\n+RL|S1|C8043\t1049089\t6824\n+RL|S1|C1117\t1049089\t6539\n+RL|S1|C1747\t1049089\t3696\n+RL|S1|C10098\t1049089\t1992\n+RL|S1|C3619\t1049089\t1211\n+RL|S1|C9260\t1049089\t1088\n+RL|S1|C6466\t1049089\t1080\n+RL|S1|C6018\t1049089\t1024\n+RL|S1|C4514\tSample19_56\t29366\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/naughty_carson_2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/naughty_carson_2 Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,8176 @@\n+#CAMI Format for Binning\n+@Version:0.9.0\n+@SampleID:CAMI_low\n+@@SEQUENCEID\tBINID\n+RL|S1|C10\tBin_034\n+RL|S1|C100\tBin_023\n+RL|S1|C1000\tBin_034\n+RL|S1|C10000\tBin_019\n+RL|S1|C10002\tBin_035\n+RL|S1|C10004\tBin_035\n+RL|S1|C10008\tBin_034\n+RL|S1|C10011\tBin_035\n+RL|S1|C10012\tBin_013\n+RL|S1|C10014\tBin_035\n+RL|S1|C10015\tBin_033\n+RL|S1|C10017\tBin_034\n+RL|S1|C1002\tBin_018\n+RL|S1|C10020\tBin_036\n+RL|S1|C10022\tBin_034\n+RL|S1|C10026\tBin_034\n+RL|S1|C10028\tBin_034\n+RL|S1|C10032\tBin_027\n+RL|S1|C10034\tBin_032\n+RL|S1|C10036\tBin_006\n+RL|S1|C10037\tBin_034\n+RL|S1|C10039\tBin_018\n+RL|S1|C1004\tBin_003\n+RL|S1|C10040\tBin_036\n+RL|S1|C10041\tBin_035\n+RL|S1|C10046\tBin_034\n+RL|S1|C10047\tBin_034\n+RL|S1|C10052\tBin_018\n+RL|S1|C10056\tBin_033\n+RL|S1|C10059\tBin_033\n+RL|S1|C1006\tBin_034\n+RL|S1|C10060\tBin_034\n+RL|S1|C10069\tBin_030\n+RL|S1|C1007\tBin_032\n+RL|S1|C10070\tBin_008\n+RL|S1|C10071\tBin_029\n+RL|S1|C10073\tBin_034\n+RL|S1|C10075\tBin_029\n+RL|S1|C10080\tBin_032\n+RL|S1|C10083\tBin_034\n+RL|S1|C10084\tBin_034\n+RL|S1|C10091\tBin_033\n+RL|S1|C10093\tBin_034\n+RL|S1|C10095\tBin_036\n+RL|S1|C10096\tBin_034\n+RL|S1|C10098\tBin_029\n+RL|S1|C101\tBin_033\n+RL|S1|C1010\tBin_036\n+RL|S1|C10103\tBin_034\n+RL|S1|C10104\tBin_034\n+RL|S1|C10108\tBin_032\n+RL|S1|C10109\tBin_033\n+RL|S1|C1011\tBin_012\n+RL|S1|C10110\tBin_036\n+RL|S1|C10113\tBin_018\n+RL|S1|C10117\tBin_036\n+RL|S1|C10122\tBin_016\n+RL|S1|C10124\tBin_034\n+RL|S1|C10127\tBin_034\n+RL|S1|C10128\tBin_018\n+RL|S1|C10129\tBin_036\n+RL|S1|C1013\tBin_034\n+RL|S1|C10132\tBin_018\n+RL|S1|C10136\tBin_032\n+RL|S1|C10143\tBin_001\n+RL|S1|C1015\tBin_036\n+RL|S1|C10150\tBin_006\n+RL|S1|C10153\tBin_034\n+RL|S1|C10154\tBin_027\n+RL|S1|C10157\tBin_036\n+RL|S1|C10162\tBin_034\n+RL|S1|C10168\tBin_034\n+RL|S1|C10170\tBin_035\n+RL|S1|C10171\tBin_034\n+RL|S1|C10173\tBin_034\n+RL|S1|C10175\tBin_034\n+RL|S1|C10179\tBin_030\n+RL|S1|C10183\tBin_036\n+RL|S1|C10184\tBin_027\n+RL|S1|C10187\tBin_036\n+RL|S1|C10188\tBin_036\n+RL|S1|C1019\tBin_019\n+RL|S1|C10193\tBin_036\n+RL|S1|C10194\tBin_004\n+RL|S1|C10197\tBin_034\n+RL|S1|C102\tBin_036\n+RL|S1|C10204\tBin_018\n+RL|S1|C10207\tBin_034\n+RL|S1|C10208\tBin_031\n+RL|S1|C10209\tBin_033\n+RL|S1|C10210\tBin_031\n+RL|S1|C10212\tBin_018\n+RL|S1|C10213\tBin_016\n+RL|S1|C1022\tBin_034\n+RL|S1|C10221\tBin_034\n+RL|S1|C10223\tBin_033\n+RL|S1|C10224\tBin_035\n+RL|S1|C10225\tBin_034\n+RL|S1|C10226\tBin_034\n+RL|S1|C10229\tBin_018\n+RL|S1|C1023\tBin_033\n+RL|S1|C10230\tBin_034\n+RL|S1|C10234\tBin_035\n+RL|S1|C10235\tBin_034\n+RL|S1|C10236\tBin_034\n+RL|S1|C10237\tBin_029\n+RL|S1|C10240\tBin_034\n+RL|S1|C10242\tBin_016\n+RL|S1|C10243\tBin_033\n+RL|S1|C10244\tBin_033\n+RL|S1|C10246\tBin_034\n+RL|S1|C1025\tBin_025\n+RL|S1|C10251\tBin_033\n+RL|S1|C10252\tBin_035\n+RL|S1|C10255\tBin_036\n+RL|S1|C10258\tBin_034\n+RL|S1|C10259\tBin_036\n+RL|S1|C1026\tBin_034\n+RL|S1|C10261\tBin_032\n+RL|S1|C10262\tBin_036\n+RL|S1|C10266\tBin_018\n+RL|S1|C10267\tBin_034\n+RL|S1|C10268\tBin_036\n+RL|S1|C10269\tBin_036\n+RL|S1|C1027\tBin_031\n+RL|S1|C10271\tBin_034\n+RL|S1|C10273\tBin_033\n+RL|S1|C10277\tBin_027\n+RL|S1|C10278\tBin_036\n+RL|S1|C10279\tBin_018\n+RL|S1|C10280\tBin_035\n+RL|S1|C10286\tBin_034\n+RL|S1|C10287\tBin_018\n+RL|S1|C10289\tBin_032\n+RL|S1|C1029\tBin_031\n+RL|S1|C10290\tBin_018\n+RL|S1|C10294\tBin_036\n+RL|S1|C10295\tBin_031\n+RL|S1|C10297\tBin_036\n+RL|S1|C10298\tBin_027\n+RL|S1|C10299\tBin_034\n+RL|S1|C1030\tBin_035\n+RL|S1|C10302\tBin_036\n+RL|S1|C10303\tBin_034\n+RL|S1|C10306\tBin_034\n+RL|S1|C10307\tBin_035\n+RL|S1|C10308\tBin_032\n+RL|S1|C10309\tBin_036\n+RL|S1|C10311\tBin_035\n+RL|S1|C10313\tBin_034\n+RL|S1|C10315\tBin_012\n+RL|S1|C10319\tBin_031\n+RL|S1|C1032\tBin_032\n+RL|S1|C10322\tBin_033\n+RL|S1|C10323\tBin_035\n+RL|S1|C10325\tBin_032\n+RL|S1|C10328\tBin_034\n+RL|S1|C10330\tBin_023\n+RL|S1|C10332\tBin_018\n+RL|S1|C10333\tBin_035\n+RL|S1|C10335\tBin_032\n+RL|S1|C10337\tBin_035\n+RL|S1|C10349\tBin_034\n+RL|S1|C1035\tBin_036\n+RL|S1|C10350\tBin_035\n+RL|S1|C10355\tBin_034\n+RL|S1|C10356\tBin_036\n+RL|S1|C10358\tBin_036\n+RL|S1|C10359\tBin_030\n+RL|S1|C10360\tBin_033\n+RL|S1|C10361\tBin_018\n+RL|S1|C10363\tBin_006\n+RL|S1|C10364\tBin_036\n+RL|S1|C10365\tBin_034\n+RL|S1|C10366\tBin_012\n+RL|S1|C10368\tBin_018\n+RL|S1|C10373\tBin_027\n+RL|S1|C10374\tBin_011\n+RL|S1|C10'..b'018\n+RL|S1|C9583\tBin_006\n+RL|S1|C9586\tBin_033\n+RL|S1|C9588\tBin_034\n+RL|S1|C9589\tBin_036\n+RL|S1|C9590\tBin_031\n+RL|S1|C9594\tBin_034\n+RL|S1|C9595\tBin_032\n+RL|S1|C9597\tBin_023\n+RL|S1|C9603\tBin_034\n+RL|S1|C9605\tBin_015\n+RL|S1|C9608\tBin_032\n+RL|S1|C961\tBin_036\n+RL|S1|C9610\tBin_036\n+RL|S1|C9613\tBin_035\n+RL|S1|C9614\tBin_030\n+RL|S1|C9618\tBin_034\n+RL|S1|C9619\tBin_034\n+RL|S1|C9622\tBin_035\n+RL|S1|C9624\tBin_016\n+RL|S1|C9625\tBin_034\n+RL|S1|C9626\tBin_035\n+RL|S1|C9627\tBin_024\n+RL|S1|C9628\tBin_036\n+RL|S1|C9632\tBin_034\n+RL|S1|C9634\tBin_036\n+RL|S1|C9636\tBin_036\n+RL|S1|C9639\tBin_033\n+RL|S1|C964\tBin_034\n+RL|S1|C9642\tBin_035\n+RL|S1|C9643\tBin_025\n+RL|S1|C9645\tBin_034\n+RL|S1|C9647\tBin_034\n+RL|S1|C9649\tBin_027\n+RL|S1|C965\tBin_020\n+RL|S1|C9653\tBin_035\n+RL|S1|C9657\tBin_035\n+RL|S1|C9659\tBin_035\n+RL|S1|C9660\tBin_036\n+RL|S1|C9661\tBin_035\n+RL|S1|C9664\tBin_034\n+RL|S1|C9667\tBin_035\n+RL|S1|C967\tBin_036\n+RL|S1|C9672\tBin_018\n+RL|S1|C9675\tBin_034\n+RL|S1|C9678\tBin_030\n+RL|S1|C9680\tBin_034\n+RL|S1|C9681\tBin_036\n+RL|S1|C9683\tBin_032\n+RL|S1|C9685\tBin_006\n+RL|S1|C9688\tBin_034\n+RL|S1|C9691\tBin_034\n+RL|S1|C9693\tBin_006\n+RL|S1|C9696\tBin_034\n+RL|S1|C9698\tBin_031\n+RL|S1|C970\tBin_034\n+RL|S1|C9700\tBin_033\n+RL|S1|C9706\tBin_034\n+RL|S1|C9708\tBin_035\n+RL|S1|C9712\tBin_036\n+RL|S1|C9713\tBin_034\n+RL|S1|C9717\tBin_036\n+RL|S1|C9719\tBin_036\n+RL|S1|C9726\tBin_034\n+RL|S1|C9732\tBin_035\n+RL|S1|C9733\tBin_033\n+RL|S1|C9734\tBin_033\n+RL|S1|C9735\tBin_004\n+RL|S1|C9736\tBin_035\n+RL|S1|C9737\tBin_035\n+RL|S1|C9740\tBin_036\n+RL|S1|C9745\tBin_035\n+RL|S1|C9747\tBin_035\n+RL|S1|C9750\tBin_028\n+RL|S1|C9752\tBin_032\n+RL|S1|C9753\tBin_032\n+RL|S1|C9754\tBin_035\n+RL|S1|C9757\tBin_034\n+RL|S1|C9763\tBin_035\n+RL|S1|C9765\tBin_033\n+RL|S1|C9766\tBin_017\n+RL|S1|C9768\tBin_036\n+RL|S1|C977\tBin_034\n+RL|S1|C9774\tBin_036\n+RL|S1|C9775\tBin_034\n+RL|S1|C9776\tBin_006\n+RL|S1|C9777\tBin_034\n+RL|S1|C9778\tBin_034\n+RL|S1|C9779\tBin_036\n+RL|S1|C9780\tBin_006\n+RL|S1|C9782\tBin_036\n+RL|S1|C9783\tBin_035\n+RL|S1|C9784\tBin_035\n+RL|S1|C9785\tBin_029\n+RL|S1|C9786\tBin_032\n+RL|S1|C9789\tBin_033\n+RL|S1|C9792\tBin_036\n+RL|S1|C9793\tBin_018\n+RL|S1|C9796\tBin_033\n+RL|S1|C9798\tBin_034\n+RL|S1|C9799\tBin_035\n+RL|S1|C980\tBin_030\n+RL|S1|C9801\tBin_035\n+RL|S1|C9803\tBin_036\n+RL|S1|C9805\tBin_034\n+RL|S1|C9806\tBin_036\n+RL|S1|C9807\tBin_036\n+RL|S1|C9809\tBin_034\n+RL|S1|C9810\tBin_036\n+RL|S1|C9815\tBin_034\n+RL|S1|C9816\tBin_034\n+RL|S1|C9818\tBin_035\n+RL|S1|C9819\tBin_017\n+RL|S1|C982\tBin_034\n+RL|S1|C9823\tBin_034\n+RL|S1|C9824\tBin_034\n+RL|S1|C9825\tBin_036\n+RL|S1|C9826\tBin_018\n+RL|S1|C983\tBin_023\n+RL|S1|C9832\tBin_030\n+RL|S1|C9834\tBin_024\n+RL|S1|C9836\tBin_036\n+RL|S1|C9837\tBin_034\n+RL|S1|C9839\tBin_033\n+RL|S1|C9841\tBin_035\n+RL|S1|C9846\tBin_034\n+RL|S1|C985\tBin_035\n+RL|S1|C9851\tBin_032\n+RL|S1|C9853\tBin_036\n+RL|S1|C9855\tBin_034\n+RL|S1|C9856\tBin_033\n+RL|S1|C9857\tBin_035\n+RL|S1|C986\tBin_034\n+RL|S1|C9868\tBin_035\n+RL|S1|C9871\tBin_036\n+RL|S1|C9877\tBin_035\n+RL|S1|C9879\tBin_031\n+RL|S1|C9882\tBin_035\n+RL|S1|C9883\tBin_012\n+RL|S1|C9885\tBin_032\n+RL|S1|C9890\tBin_034\n+RL|S1|C9891\tBin_032\n+RL|S1|C9892\tBin_035\n+RL|S1|C9894\tBin_017\n+RL|S1|C9895\tBin_017\n+RL|S1|C9897\tBin_035\n+RL|S1|C9898\tBin_018\n+RL|S1|C9899\tBin_036\n+RL|S1|C99\tBin_036\n+RL|S1|C9901\tBin_018\n+RL|S1|C9902\tBin_002\n+RL|S1|C9904\tBin_019\n+RL|S1|C9905\tBin_035\n+RL|S1|C9907\tBin_034\n+RL|S1|C9909\tBin_033\n+RL|S1|C9910\tBin_029\n+RL|S1|C9912\tBin_036\n+RL|S1|C9913\tBin_034\n+RL|S1|C9915\tBin_034\n+RL|S1|C9917\tBin_032\n+RL|S1|C992\tBin_035\n+RL|S1|C9924\tBin_033\n+RL|S1|C9925\tBin_035\n+RL|S1|C9927\tBin_034\n+RL|S1|C9928\tBin_035\n+RL|S1|C9932\tBin_007\n+RL|S1|C9941\tBin_032\n+RL|S1|C9943\tBin_034\n+RL|S1|C9947\tBin_027\n+RL|S1|C9951\tBin_034\n+RL|S1|C9956\tBin_036\n+RL|S1|C9958\tBin_036\n+RL|S1|C996\tBin_004\n+RL|S1|C9963\tBin_018\n+RL|S1|C9964\tBin_036\n+RL|S1|C9965\tBin_033\n+RL|S1|C9967\tBin_034\n+RL|S1|C9970\tBin_036\n+RL|S1|C9972\tBin_036\n+RL|S1|C9973\tBin_035\n+RL|S1|C9974\tBin_018\n+RL|S1|C9976\tBin_034\n+RL|S1|C9979\tBin_035\n+RL|S1|C998\tBin_035\n+RL|S1|C9984\tBin_032\n+RL|S1|C9986\tBin_034\n+RL|S1|C9988\tBin_031\n+RL|S1|C9990\tBin_036\n+RL|S1|C9991\tBin_035\n+RL|S1|C9994\tBin_036\n+RL|S1|C9995\tBin_018\n+RL|S1|C9997\tBin_018\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/ncbi_taxonomy.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ncbi_taxonomy.loc Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,1 @@
+test-db-tox Test Database ${__HERE__}/test-db
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 test-data/test-db/delnodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/delnodes.dmp Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,70000 @@\n+2923441\t|\n+2923440\t|\n+2923439\t|\n+2923438\t|\n+2923437\t|\n+2923436\t|\n+2923435\t|\n+2923434\t|\n+2923433\t|\n+2923432\t|\n+2923431\t|\n+2923430\t|\n+2923429\t|\n+2923428\t|\n+2923427\t|\n+2923426\t|\n+2923425\t|\n+2923424\t|\n+2923423\t|\n+2923422\t|\n+2923421\t|\n+2923420\t|\n+2923419\t|\n+2923418\t|\n+2923417\t|\n+2923416\t|\n+2923415\t|\n+2923414\t|\n+2923413\t|\n+2923412\t|\n+2923411\t|\n+2923410\t|\n+2923409\t|\n+2923408\t|\n+2923407\t|\n+2923406\t|\n+2923405\t|\n+2923404\t|\n+2923403\t|\n+2923402\t|\n+2923401\t|\n+2923400\t|\n+2923399\t|\n+2923398\t|\n+2923397\t|\n+2923396\t|\n+2923395\t|\n+2923394\t|\n+2923393\t|\n+2923392\t|\n+2923391\t|\n+2923390\t|\n+2923389\t|\n+2923388\t|\n+2923387\t|\n+2923386\t|\n+2923385\t|\n+2923384\t|\n+2923383\t|\n+2923382\t|\n+2923381\t|\n+2923380\t|\n+2923379\t|\n+2923378\t|\n+2923377\t|\n+2923376\t|\n+2923375\t|\n+2923374\t|\n+2923373\t|\n+2923372\t|\n+2923371\t|\n+2923370\t|\n+2923369\t|\n+2923367\t|\n+2923366\t|\n+2923365\t|\n+2923364\t|\n+2923363\t|\n+2923362\t|\n+2923361\t|\n+2923360\t|\n+2923359\t|\n+2923358\t|\n+2923357\t|\n+2923356\t|\n+2923355\t|\n+2923354\t|\n+2923353\t|\n+2923351\t|\n+2923350\t|\n+2923349\t|\n+2923348\t|\n+2923347\t|\n+2923346\t|\n+2923345\t|\n+2923344\t|\n+2923343\t|\n+2923342\t|\n+2923341\t|\n+2923340\t|\n+2923339\t|\n+2923338\t|\n+2923337\t|\n+2923336\t|\n+2923335\t|\n+2923334\t|\n+2923333\t|\n+2923332\t|\n+2923331\t|\n+2923330\t|\n+2923329\t|\n+2923328\t|\n+2923327\t|\n+2923326\t|\n+2923324\t|\n+2923323\t|\n+2923322\t|\n+2923321\t|\n+2923320\t|\n+2923319\t|\n+2923318\t|\n+2923317\t|\n+2923316\t|\n+2923315\t|\n+2923314\t|\n+2923313\t|\n+2923312\t|\n+2923311\t|\n+2923310\t|\n+2923309\t|\n+2923308\t|\n+2923307\t|\n+2923306\t|\n+2923305\t|\n+2923304\t|\n+2923303\t|\n+2923302\t|\n+2923301\t|\n+2923300\t|\n+2923299\t|\n+2923298\t|\n+2923297\t|\n+2923296\t|\n+2923295\t|\n+2923294\t|\n+2923293\t|\n+2923292\t|\n+2923291\t|\n+2923287\t|\n+2923286\t|\n+2923285\t|\n+2923284\t|\n+2923283\t|\n+2923282\t|\n+2923281\t|\n+2923280\t|\n+2923279\t|\n+2923278\t|\n+2923277\t|\n+2923276\t|\n+2923275\t|\n+2923274\t|\n+2923273\t|\n+2923272\t|\n+2923271\t|\n+2923270\t|\n+2923269\t|\n+2923268\t|\n+2923267\t|\n+2923266\t|\n+2923264\t|\n+2923263\t|\n+2923262\t|\n+2923261\t|\n+2923260\t|\n+2923259\t|\n+2923258\t|\n+2923257\t|\n+2923256\t|\n+2923255\t|\n+2923254\t|\n+2923253\t|\n+2923252\t|\n+2923251\t|\n+2923250\t|\n+2923249\t|\n+2923247\t|\n+2923246\t|\n+2923245\t|\n+2923244\t|\n+2923243\t|\n+2923242\t|\n+2923241\t|\n+2923240\t|\n+2923239\t|\n+2923238\t|\n+2923237\t|\n+2923236\t|\n+2923235\t|\n+2923234\t|\n+2923233\t|\n+2923232\t|\n+2923231\t|\n+2923230\t|\n+2923229\t|\n+2923228\t|\n+2923227\t|\n+2923226\t|\n+2923225\t|\n+2923224\t|\n+2923223\t|\n+2923222\t|\n+2923221\t|\n+2923220\t|\n+2923219\t|\n+2923218\t|\n+2923217\t|\n+2923216\t|\n+2923215\t|\n+2923214\t|\n+2923213\t|\n+2923212\t|\n+2923211\t|\n+2923210\t|\n+2923209\t|\n+2923208\t|\n+2923207\t|\n+2923206\t|\n+2923205\t|\n+2923204\t|\n+2923203\t|\n+2923202\t|\n+2923201\t|\n+2923200\t|\n+2923199\t|\n+2923198\t|\n+2923197\t|\n+2923196\t|\n+2923195\t|\n+2923194\t|\n+2923193\t|\n+2923192\t|\n+2923191\t|\n+2923190\t|\n+2923189\t|\n+2923188\t|\n+2923187\t|\n+2923186\t|\n+2923185\t|\n+2923184\t|\n+2923183\t|\n+2923182\t|\n+2923181\t|\n+2923180\t|\n+2923179\t|\n+2923178\t|\n+2923177\t|\n+2923176\t|\n+2923175\t|\n+2923174\t|\n+2923173\t|\n+2923172\t|\n+2923171\t|\n+2923170\t|\n+2923169\t|\n+2923168\t|\n+2923167\t|\n+2923166\t|\n+2923165\t|\n+2923164\t|\n+2923163\t|\n+2923162\t|\n+2923161\t|\n+2923160\t|\n+2923159\t|\n+2923158\t|\n+2923157\t|\n+2923156\t|\n+2923155\t|\n+2923154\t|\n+2923153\t|\n+2923152\t|\n+2923151\t|\n+2923150\t|\n+2923149\t|\n+2923148\t|\n+2923147\t|\n+2923146\t|\n+2923145\t|\n+2923144\t|\n+2923143\t|\n+2923142\t|\n+2923141\t|\n+2923140\t|\n+2923139\t|\n+2923138\t|\n+2923137\t|\n+2923136\t|\n+2923135\t|\n+2923134\t|\n+2923133\t|\n+2923132\t|\n+2923131\t|\n+2923130\t|\n+2923129\t|\n+2923128\t|\n+2923127\t|\n+2923126\t|\n+2923125\t|\n+2923124\t|\n+2923123\t|\n+2923122\t|\n+2923121\t|\n+2923120\t|\n+2923119\t|\n+2923118\t|\n+2923117\t|\n+2923116\t|\n+2923115\t|\n+2923114\t|\n+2923113\t|\n+2923112\t|\n+2923111\t|\n+2923110\t|\n+2923109\t|\n+2923108\t|\n+2923107\t|\n+2923106\t|\n+2923105\t|\n+2923104\t|\n+2923103\t|\n+2923102\t|\n+2923101\t|\n+2923100\t|\n+2923099\t|\n+2923098\t|\n+2923097\t|\n+2923096\t|\n+2923095\t|\n+2923094\t|\n+2923093\t|\n+2923092\t|\n+2923091\t|\n+2923090\t|\n+2923089\t|\n+2923088\t|\n+2923087\t|\n+2923086\t|\n+2923085\t|\n+2923084\t|\n+2923083\t|\n+2923082\t|\n+2923081\t|\n+2923080\t|\n+2923078\t|\n+2923077\t|\n+2923076\t|\n+2923075\t|\n+2923074\t|\n+2923073\t|\n+2923072\t|\n+2923071'..b'004\t|\n+2673003\t|\n+2673001\t|\n+2673000\t|\n+2672999\t|\n+2672998\t|\n+2672997\t|\n+2672996\t|\n+2672994\t|\n+2672992\t|\n+2672991\t|\n+2672990\t|\n+2672989\t|\n+2672988\t|\n+2672986\t|\n+2672985\t|\n+2672984\t|\n+2672983\t|\n+2672981\t|\n+2672980\t|\n+2672979\t|\n+2672978\t|\n+2672977\t|\n+2672976\t|\n+2672973\t|\n+2672972\t|\n+2672971\t|\n+2672970\t|\n+2672969\t|\n+2672968\t|\n+2672966\t|\n+2672965\t|\n+2672964\t|\n+2672962\t|\n+2672961\t|\n+2672958\t|\n+2672957\t|\n+2672956\t|\n+2672955\t|\n+2672953\t|\n+2672952\t|\n+2672950\t|\n+2672949\t|\n+2672946\t|\n+2672945\t|\n+2672944\t|\n+2672943\t|\n+2672942\t|\n+2672941\t|\n+2672939\t|\n+2672938\t|\n+2672936\t|\n+2672934\t|\n+2672933\t|\n+2672932\t|\n+2672930\t|\n+2672929\t|\n+2672928\t|\n+2672927\t|\n+2672926\t|\n+2672923\t|\n+2672922\t|\n+2672920\t|\n+2672919\t|\n+2672918\t|\n+2672917\t|\n+2672916\t|\n+2672915\t|\n+2672914\t|\n+2672913\t|\n+2672912\t|\n+2672910\t|\n+2672909\t|\n+2672908\t|\n+2672907\t|\n+2672906\t|\n+2672905\t|\n+2672904\t|\n+2672900\t|\n+2672899\t|\n+2672895\t|\n+2672894\t|\n+2672893\t|\n+2672891\t|\n+2672890\t|\n+2672889\t|\n+2672887\t|\n+2672886\t|\n+2672885\t|\n+2672881\t|\n+2672880\t|\n+2672878\t|\n+2672877\t|\n+2672876\t|\n+2672875\t|\n+2672874\t|\n+2672872\t|\n+2672870\t|\n+2672869\t|\n+2672868\t|\n+2672866\t|\n+2672863\t|\n+2672860\t|\n+2672859\t|\n+2672858\t|\n+2672854\t|\n+2672853\t|\n+2672851\t|\n+2672850\t|\n+2672849\t|\n+2672848\t|\n+2672847\t|\n+2672846\t|\n+2672845\t|\n+2672844\t|\n+2672843\t|\n+2672840\t|\n+2672837\t|\n+2672833\t|\n+2672832\t|\n+2672831\t|\n+2672830\t|\n+2672829\t|\n+2672828\t|\n+2672825\t|\n+2672824\t|\n+2672823\t|\n+2672817\t|\n+2672815\t|\n+2672814\t|\n+2672810\t|\n+2672809\t|\n+2672808\t|\n+2672807\t|\n+2672805\t|\n+2672804\t|\n+2672803\t|\n+2672802\t|\n+2672801\t|\n+2672800\t|\n+2672799\t|\n+2672798\t|\n+2672797\t|\n+2672795\t|\n+2672794\t|\n+2672792\t|\n+2672791\t|\n+2672790\t|\n+2672789\t|\n+2672788\t|\n+2672786\t|\n+2672783\t|\n+2672782\t|\n+2672781\t|\n+2672780\t|\n+2672779\t|\n+2672778\t|\n+2672776\t|\n+2672775\t|\n+2672773\t|\n+2672772\t|\n+2672770\t|\n+2672769\t|\n+2672768\t|\n+2672767\t|\n+2672766\t|\n+2672765\t|\n+2672764\t|\n+2672762\t|\n+2672759\t|\n+2672756\t|\n+2672755\t|\n+2672754\t|\n+2672753\t|\n+2672752\t|\n+2672751\t|\n+2672750\t|\n+2672748\t|\n+2672745\t|\n+2672744\t|\n+2672743\t|\n+2672741\t|\n+2672740\t|\n+2672739\t|\n+2672738\t|\n+2672737\t|\n+2672736\t|\n+2672735\t|\n+2672734\t|\n+2672733\t|\n+2672732\t|\n+2672731\t|\n+2672730\t|\n+2672729\t|\n+2672728\t|\n+2672726\t|\n+2672724\t|\n+2672723\t|\n+2672721\t|\n+2672719\t|\n+2672718\t|\n+2672717\t|\n+2672716\t|\n+2672715\t|\n+2672714\t|\n+2672712\t|\n+2672710\t|\n+2672707\t|\n+2672706\t|\n+2672705\t|\n+2672704\t|\n+2672703\t|\n+2672702\t|\n+2672700\t|\n+2672699\t|\n+2672698\t|\n+2672694\t|\n+2672693\t|\n+2672692\t|\n+2672691\t|\n+2672689\t|\n+2672688\t|\n+2672687\t|\n+2672685\t|\n+2672684\t|\n+2672683\t|\n+2672682\t|\n+2672681\t|\n+2672679\t|\n+2672678\t|\n+2672676\t|\n+2672675\t|\n+2672674\t|\n+2672673\t|\n+2672672\t|\n+2672671\t|\n+2672670\t|\n+2672669\t|\n+2672667\t|\n+2672666\t|\n+2672665\t|\n+2672664\t|\n+2672663\t|\n+2672662\t|\n+2672661\t|\n+2672658\t|\n+2672656\t|\n+2672655\t|\n+2672654\t|\n+2672649\t|\n+2672648\t|\n+2672647\t|\n+2672646\t|\n+2672640\t|\n+2672635\t|\n+2672634\t|\n+2672632\t|\n+2672631\t|\n+2672630\t|\n+2672629\t|\n+2672628\t|\n+2672627\t|\n+2672626\t|\n+2672624\t|\n+2672623\t|\n+2672620\t|\n+2672619\t|\n+2672618\t|\n+2672617\t|\n+2672616\t|\n+2672614\t|\n+2672613\t|\n+2672612\t|\n+2672611\t|\n+2672610\t|\n+2672609\t|\n+2672608\t|\n+2672607\t|\n+2672606\t|\n+2672605\t|\n+2672604\t|\n+2672603\t|\n+2672602\t|\n+2672598\t|\n+2672597\t|\n+2672592\t|\n+2672591\t|\n+2672589\t|\n+2672588\t|\n+2672585\t|\n+2672584\t|\n+2672582\t|\n+2672580\t|\n+2672578\t|\n+2672577\t|\n+2672576\t|\n+2672575\t|\n+2672573\t|\n+2672566\t|\n+2672154\t|\n+2672150\t|\n+2672147\t|\n+2672146\t|\n+2672145\t|\n+2672144\t|\n+2672143\t|\n+2672140\t|\n+2672139\t|\n+2672138\t|\n+2672137\t|\n+2672136\t|\n+2672134\t|\n+2672133\t|\n+2672132\t|\n+2672131\t|\n+2672129\t|\n+2672127\t|\n+2672126\t|\n+2672124\t|\n+2672123\t|\n+2672122\t|\n+2672121\t|\n+2672120\t|\n+2672119\t|\n+2672118\t|\n+2672117\t|\n+2672114\t|\n+2672109\t|\n+2672108\t|\n+2672106\t|\n+2672105\t|\n+2672104\t|\n+2672099\t|\n+2672096\t|\n+2672095\t|\n+2672093\t|\n+2672090\t|\n+2672087\t|\n+2672086\t|\n+2672085\t|\n+2672082\t|\n+2672079\t|\n+2672075\t|\n+2672074\t|\n+2672073\t|\n+2672071\t|\n+2672068\t|\n+2672065\t|\n+2672064\t|\n+2672063\t|\n+2672062\t|\n+2672060\t|\n+2672059\t|\n+2672058\t|\n+2672057\t|\n+2672056\t|\n+2672055\t|\n+2672054\t|\n+2672053\t|\n+2672052\t|\n+2672050\t|\n+2672048\t|\n+2672047\t|\n+2672046\t|\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/test-db/division.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/division.dmp Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,12 @@
+0 | BCT | Bacteria |   |
+1 | INV | Invertebrates |   |
+2 | MAM | Mammals |   |
+3 | PHG | Phages |   |
+4 | PLN | Plants and Fungi |   |
+5 | PRI | Primates |   |
+6 | ROD | Rodents |   |
+7 | SYN | Synthetic and Chimeric |   |
+8 | UNA | Unassigned | No species nodes should inherit this division assignment |
+9 | VRL | Viruses |   |
+10 | VRT | Vertebrates |   |
+11 | ENV | Environmental samples | Anonymous sequences cloned directly from the environment |
b
diff -r 000000000000 -r 1e10251b9615 test-data/test-db/gc.prt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/gc.prt Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,358 @@\n+--**************************************************************************\n+--  This is the NCBI genetic code table\n+--  Initial base data set from Andrzej Elzanowski while at PIR International\n+--  Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI\n+--  Base 1-3 of each codon have been added as comments to facilitate\n+--    readability at the suggestion of Peter Rice, EMBL\n+--  Later additions by Taxonomy Group staff at NCBI\n+--\n+--  Version 4.6\n+--     Renamed genetic code 24 to Rhabdopleuridae Mitochondrial\n+--\n+--  Version 4.5\n+--     Added Cephalodiscidae mitochondrial genetic code 33\n+--\n+--  Version 4.4\n+--     Added GTG as start codon for genetic code 3\n+--     Added Balanophoraceae plastid genetic code 32\n+--\n+--  Version 4.3\n+--     Change to CTG -> Leu in genetic codes 27, 28, 29, 30\n+--\n+--  Version 4.2\n+--     Added Karyorelict nuclear genetic code 27\n+--     Added Condylostoma nuclear genetic code 28\n+--     Added Mesodinium nuclear genetic code 29\n+--     Added Peritrich nuclear genetic code 30\n+--     Added Blastocrithidia nuclear genetic code 31\n+--\n+--  Version 4.1\n+--     Added Pachysolen tannophilus nuclear genetic code 26\n+--\n+--  Version 4.0\n+--     Updated version to reflect numerous undocumented changes:\n+--     Corrected start codons for genetic code 25\n+--     Name of new genetic code is Candidate Division SR1 and Gracilibacteria\n+--     Added candidate division SR1 nuclear genetic code 25\n+--     Added GTG as start codon for genetic code 24\n+--     Corrected Pterobranchia Mitochondrial genetic code (24)\n+--     Added genetic code 24, Pterobranchia Mitochondrial\n+--     Genetic code 11 is now Bacterial, Archaeal and Plant Plastid\n+--     Fixed capitalization of mitochondrial in codes 22 and 23\n+--     Added GTG, ATA, and TTG as alternative start codons to code 13\n+--\n+--  Version 3.9\n+--     Code 14 differs from code 9 only by translating UAA to Tyr rather than\n+--     STOP.  A recent study (Telford et al, 2000) has found no evidence that\n+--     the codon UAA codes for Tyr in the flatworms, but other opinions exist.\n+--     There are very few GenBank records that are translated with code 14,\n+--     but a test translation shows that retranslating these records with code\n+--     9 can cause premature terminations.  Therefore, GenBank will maintain\n+--     code 14 until further information becomes available.\n+--\n+--  Version 3.8\n+--     Added GTG start to Echinoderm mitochondrial code, code 9\n+--\n+--  Version 3.7\n+--     Added code 23 Thraustochytrium mitochondrial code\n+--        formerly OGMP code 93\n+--        submitted by Gertraude Berger, Ph.D.\n+--\n+--  Version 3.6\n+--     Added code 22 TAG-Leu, TCA-stop\n+--        found in mitochondrial DNA of Scenedesmus obliquus\n+--        submitted by Gertraude Berger, Ph.D.\n+--        Organelle Genome Megasequencing Program, Univ Montreal\n+--\n+--  Version 3.5\n+--     Added code 21, Trematode Mitochondrial\n+--       (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)\n+--     Added code 16, Chlorophycean Mitochondrial\n+--       (TAG can translated to Leucine instaed to STOP in chlorophyceans\n+--        and fungi)\n+--\n+--  Version 3.4\n+--     Added CTG,TTG as allowed alternate start codons in Standard code.\n+--        Prats et al. 1989, Hann et al. 1992\n+--\n+--  Version 3.3 - 10/13/95\n+--     Added alternate intiation codon ATC to code 5\n+--        based on complete mitochondrial genome of honeybee\n+--        Crozier and Crozier (1993)\n+--\n+--  Version 3.2 - 6/24/95\n+--  Code       Comments\n+--   10        Alternative Ciliate Macronuclear renamed to Euplotid Macro...\n+--   15        Blepharisma Macro.. code added\n+--    5        Invertebrate Mito.. GTG allowed as alternate initiator\n+--   11        Eubacterial renamed to Bacterial as most alternate starts\n+--               have been found in Archea\n+--\n+--\n+--  Version 3.1 - 1995\n+--  Updated as per Andrzej Elzanowski at NCBI\n+--     Complete documentation in NCBI'..b'5 ,\n+  ncbieaa  "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "---M------**-----------------------M---------------M------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Pachysolen tannophilus Nuclear" ,\n+  id 26 ,\n+  ncbieaa  "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "----------**--*----M---------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Karyorelict Nuclear" ,\n+  id 27 ,\n+  ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "--------------*--------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Condylostoma Nuclear" ,\n+  id 28 ,\n+  ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "----------**--*--------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Mesodinium Nuclear" ,\n+  id 29 ,\n+  ncbieaa  "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "--------------*--------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Peritrich Nuclear" ,\n+  id 30 ,\n+  ncbieaa  "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "--------------*--------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Blastocrithidia Nuclear" ,\n+  id 31 ,\n+  ncbieaa  "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "----------**-----------------------M----------------------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Balanophoraceae Plastid" ,\n+  id 32 ,\n+  ncbieaa  "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+  sncbieaa "---M------*---*----M------------MMMM---------------M------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+  name "Cephalodiscidae Mitochondrial" ,\n+  id 33 ,\n+  ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",\n+  sncbieaa "---M-------*-------M---------------M---------------M------------"\n+  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ }\n+}\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/test-db/gencode.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/gencode.dmp Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,28 @@
+0 | | Unspecified |                                                                   |                                                                   |
+1 | | Standard | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ---M------**--*----M---------------M----------------------------  |
+2 | | Vertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG  | ----------**--------------------MMMM----------**---M------------  |
+3 | | Yeast Mitochondrial | FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**----------------------MM---------------M------------  |
+4 | | Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --MM------**-------M------------MMMM---------------M------------  |
+5 | | Invertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG  | ---M------**--------------------MMMM---------------M------------  |
+6 | | Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear | FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --------------*--------------------M----------------------------  |
+9 | | Echinoderm Mitochondrial; Flatworm Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG  | ----------**-----------------------M---------------M------------  |
+10 | | Euplotid Nuclear | FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**-----------------------M----------------------------  |
+11 | | Bacterial, Archaeal and Plant Plastid | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ---M------**--*----M------------MMMM---------------M------------  |
+12 | | Alternative Yeast Nuclear | FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**--*----M---------------M----------------------------  |
+13 | | Ascidian Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG  | ---M------**----------------------MM---------------M------------  |
+14 | | Alternative Flatworm Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG  | -----------*-----------------------M----------------------------  |
+15 | | Blepharisma Macronuclear | FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------*---*--------------------M----------------------------  |
+16 | | Chlorophycean Mitochondrial | FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------*---*--------------------M----------------------------  |
+21 | | Trematode Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG  | ----------**-----------------------M---------------M------------  |
+22 | | Scenedesmus obliquus mitochondrial | FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ------*---*---*--------------------M----------------------------  |
+23 | | Thraustochytrium mitochondrial code | FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --*-------**--*-----------------M--M---------------M------------  |
+24 | | Rhabdopleuridae Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG | ---M------**-------M---------------M---------------M------------ |
+25 | | Candidate Division SR1 and Gracilibacteria | FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ---M------**-----------------------M---------------M------------  |
+26 | | Pachysolen tannophilus Nuclear | FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**--*----M---------------M----------------------------  |
+27 | | Karyorelict Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --------------*--------------------M----------------------------  |
+28 | | Condylostoma Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**--*--------------------M----------------------------  |
+29 | | Mesodinium Nuclear | FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --------------*--------------------M----------------------------  |
+30 | | Peritrich Nuclear | FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | --------------*--------------------M----------------------------  |
+31 | | Blastocrithidia Nuclear | FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ----------**-----------------------M----------------------------  |
+32 | | Balanophoraceae Plastid | FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG  | ---M------*---*----M------------MMMM---------------M------------  |
+33 | | Cephalodiscidae Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG  | ---M-------*-------M---------------M---------------M------------  |
b
diff -r 000000000000 -r 1e10251b9615 test-data/test-db/names.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/names.dmp Sun Aug 25 13:19:14 2024 +0000
[
@@ -0,0 +1,74 @@
+83333 | Escherichia coli K-12 | | scientific name |
+83333 | Escherichia coli K12 | | equivalent name |
+562 | "Bacillus coli" Migula 1895 | | authority |
+562 | "Bacterium coli commune" Escherich 1885 | | authority |
+562 | "Bacterium coli" (Migula 1895) Lehmann and Neumann 1896 | | authority |
+562 | ATCC 11775 | | type material |
+562 | Bacillus coli | | synonym |
+562 | Bacterium coli | | synonym |
+562 | Bacterium coli commune | | synonym |
+562 | CCUG 24 | | type material |
+562 | CCUG 29300 | | type material |
+562 | CIP 54.8 | | type material |
+562 | DSM 30083 | | type material |
+562 | Enterococcus coli | | synonym |
+562 | Escherchia coli | | misspelling |
+562 | Escherichia coli | | scientific name |
+562 | Escherichia coli (Migula 1895) Castellani and Chalmers 1919 | | authority |
+562 | Escherichia sp. MAR | | includes |
+562 | Escherichia/Shigella coli | | equivalent name |
+562 | Eschericia coli | | misspelling |
+562 | JCM 1649 | | type material |
+562 | LMG 2092 | | type material |
+562 | NBRC 102203 | | type material |
+562 | NCCB 54008 | | type material |
+562 | NCTC 9001 | | type material |
+562 | bacterium 10a | | includes |
+562 | bacterium E3 | | includes |
+561 | Escherchia | | misspelling |
+561 | Escherichia | | scientific name |
+561 | Escherichia Castellani and Chalmers 1919 | | authority |
+543 | Enterobacteraceae | | synonym |
+543 | Enterobacteraceae (ex Lapage 1979) Lapage 1982, fam. nov., nom. rev. | | synonym |
+543 | Enterobacteriaceae | | scientific name |
+543 | Enterobacteriaceae (ex Rahn 1937) Ewing et al. 1980, fam. nov., nom. rev. | | synonym |
+543 | Enterobacteriaceae Rahn 1937 | | synonym |
+543 | gamma-3 proteobacteria | gamma-3 proteobacteria <#1> | in-part |
+91347 | 'Enterobacteriales' | | synonym |
+91347 | Enterobacteriaceae and related endosymbionts | | synonym |
+91347 | Enterobacteriaceae group | | synonym |
+91347 | Enterobacteriales | | scientific name |
+91347 | enterobacteria | enterobacteria<blast91347> | blast name |
+91347 | gamma-3 proteobacteria | gamma-3 proteobacteria <#5> | in-part |
+1236 | Gammaproteobacteria | | scientific name |
+1236 | Gammaproteobacteria Garrity et al. 2005 | | synonym |
+1236 | Proteobacteria gamma subdivision | | synonym |
+1236 | Purple bacteria, gamma subdivision | | synonym |
+1236 | g-proteobacteria | gamma proteos<blast1236> | blast name |
+1236 | gamma proteobacteria | | synonym |
+1236 | gamma subdivision | | synonym |
+1236 | gamma subgroup | | synonym |
+1224 | Proteobacteria | | scientific name |
+1224 | Proteobacteria Garrity et al. 2005 | | authority |
+1224 | Proteobacteria [class] Stackebrandt et al. 1988 | | authority |
+1224 | not Proteobacteria Cavalier-Smith 2002 | | authority |
+1224 | proteobacteria | proteobacteria<blast1224> | blast name |
+1224 | purple bacteria | | common name |
+1224 | purple bacteria and relatives | | common name |
+1224 | purple non-sulfur bacteria | | common name |
+1224 | purple photosynthetic bacteria | | common name |
+1224 | purple photosynthetic bacteria and relatives | | common name |
+2 | Bacteria | Bacteria <prokaryote> | scientific name |
+2 | Monera | Monera <Bacteria> | in-part |
+2 | Procaryotae | Procaryotae <Bacteria> | in-part |
+2 | Prokaryota | Prokaryota <Bacteria> | in-part |
+2 | Prokaryotae | Prokaryotae <Bacteria> | in-part |
+2 | bacteria | bacteria <blast2> | blast name |
+2 | eubacteria | | genbank common name |
+2 | not Bacteria Haeckel 1894 | | synonym |
+2 | prokaryote | prokaryote <Bacteria> | in-part |
+2 | prokaryotes | prokaryotes <Bacteria> | in-part |
+1 | all | | synonym |
+1 | root | | scientific name |
+131567 | biota | | synonym |
+131567 | cellular organisms | | scientific name |
b
diff -r 000000000000 -r 1e10251b9615 test-data/test-db/nodes.amber.ft
b
Binary file test-data/test-db/nodes.amber.ft has changed
b
diff -r 000000000000 -r 1e10251b9615 test-data/test-db/nodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/nodes.dmp Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,10 @@
+83333 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
+131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | |
+1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
b
diff -r 000000000000 -r 1e10251b9615 test-data/test-db/readme.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/readme.txt Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,61 @@
+*.dmp files are bcp-like dump from GenBank taxonomy database.
+
+General information.
+Field terminator is "\t|\t"
+Row terminator is "\t|\n"
+
+nodes.dmp file consists of taxonomy nodes. The description for each node includes the following
+fields:
+ tax_id -- node id in GenBank taxonomy database
+  parent tax_id -- parent node id in GenBank taxonomy database
+  rank -- rank of this node (superkingdom, kingdom, ...) 
+  embl code -- locus-name prefix; not unique
+  division id -- see division.dmp file
+  inherited div flag  (1 or 0) -- 1 if node inherits division from parent
+  genetic code id -- see gencode.dmp file
+  inherited GC  flag  (1 or 0) -- 1 if node inherits genetic code from parent
+  mitochondrial genetic code id -- see gencode.dmp file
+  inherited MGC flag  (1 or 0) -- 1 if node inherits mitochondrial gencode from parent
+  GenBank hidden flag (1 or 0)            -- 1 if name is suppressed in GenBank entry lineage
+  hidden subtree root flag (1 or 0)       -- 1 if this subtree has no sequence data yet
+  comments -- free-text comments and citations
+
+Taxonomy names file (names.dmp):
+ tax_id -- the id of node associated with this name
+ name_txt -- name itself
+ unique name -- the unique variant of this name if name not unique
+ name class -- (synonym, common name, ...)
+
+Divisions file (division.dmp):
+ division id -- taxonomy database division id
+ division cde -- GenBank division code (three characters)
+ division name -- e.g. BCT, PLN, VRT, MAM, PRI...
+ comments
+
+Genetic codes file (gencode.dmp):
+ genetic code id -- GenBank genetic code id
+ abbreviation -- genetic code name abbreviation
+ name -- genetic code name
+ cde -- translation table for this genetic code
+ starts -- start codons for this genetic code
+
+Deleted nodes file (delnodes.dmp):
+ tax_id -- deleted node id
+
+Merged nodes file (merged.dmp):
+ old_tax_id                              -- id of nodes which has been merged
+ new_tax_id                              -- id of nodes which is result of merging
+
+Citations file (citations.dmp):
+ cit_id -- the unique id of citation
+ cit_key -- citation key
+ pubmed_id -- unique id in PubMed database (0 if not in PubMed)
+ medline_id -- unique id in MedLine database (0 if not in MedLine)
+ url -- URL associated with citation
+ text -- any text (usually article name and authors).
+ -- The following characters are escaped in this text by a backslash:
+ -- newline (appear as "\n"),
+ -- tab character ("\t"),
+ -- double quotes ('\"'),
+ -- backslash character ("\\").
+ taxid_list -- list of node ids separated by a single space
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_add.fasta.gz
b
Binary file test-data/test_add.fasta.gz has changed
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_add.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_add.tsv Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,1464 @@\n+#CAMI Format for Binning\n+@Version:0.9.0\n+@SampleID:_SAMPLEID_\n+@@SEQUENCEID\tBINID\n+ERR1553846.1-ORL20E0-7-1-SSU_rRNA_eukarya/283-222\ttest_add/test_add.fasta\n+ERR1553846.10-ORL20E0-77-1-SSU_rRNA_eukarya/285-239\ttest_add/test_add.fasta\n+ERR1553846.100-ORL20E0-553-1-SSU_rRNA_eukarya/239-203\ttest_add/test_add.fasta\n+ERR1553846.10000-ORL20E0-51074-1-SSU_rRNA_eukarya/262-177\ttest_add/test_add.fasta\n+ERR1553846.100000-ORL20E0-512558-1-SSU_rRNA_eukarya/264-177\ttest_add/test_add.fasta\n+ERR1553846.100001-ORL20E0-512560-1-SSU_rRNA_eukarya/278-205\ttest_add/test_add.fasta\n+ERR1553846.100003-ORL20E0-512571-1-SSU_rRNA_eukarya/252-202\ttest_add/test_add.fasta\n+ERR1553846.100005-ORL20E0-512583-1-SSU_rRNA_eukarya/261-177\ttest_add/test_add.fasta\n+ERR1553846.100006-ORL20E0-512585-1-SSU_rRNA_eukarya/286-231\ttest_add/test_add.fasta\n+ERR1553846.100007-ORL20E0-512586-1-SSU_rRNA_eukarya/263-177\ttest_add/test_add.fasta\n+ERR1553846.100008-ORL20E0-512587-1-SSU_rRNA_eukarya/259-177\ttest_add/test_add.fasta\n+ERR1553846.100013-ORL20E0-512607-1-SSU_rRNA_eukarya/264-177\ttest_add/test_add.fasta\n+ERR1553846.100014-ORL20E0-512618-1-SSU_rRNA_eukarya/259-177\ttest_add/test_add.fasta\n+ERR1553846.100015-ORL20E0-512621-1-SSU_rRNA_eukarya/220-177\ttest_add/test_add.fasta\n+ERR1553846.100016-ORL20E0-512624-1-SSU_rRNA_eukarya/256-177\ttest_add/test_add.fasta\n+ERR1553846.10002-ORL20E0-51084-1-SSU_rRNA_eukarya/285-226\ttest_add/test_add.fasta\n+ERR1553846.100020-ORL20E0-512673-1-SSU_rRNA_eukarya/233-202\ttest_add/test_add.fasta\n+ERR1553846.100023-ORL20E0-512696-1-SSU_rRNA_eukarya/257-193\ttest_add/test_add.fasta\n+ERR1553846.100029-ORL20E0-512721-1-SSU_rRNA_eukarya/239-177\ttest_add/test_add.fasta\n+ERR1553846.100030-ORL20E0-512739-1-SSU_rRNA_eukarya/233-193\ttest_add/test_add.fasta\n+ERR1553846.100031-ORL20E0-512754-1-SSU_rRNA_eukarya/275-202\ttest_add/test_add.fasta\n+ERR1553846.100032-ORL20E0-512758-1-SSU_rRNA_eukarya/235-193\ttest_add/test_add.fasta\n+ERR1553846.100033-ORL20E0-512763-1-SSU_rRNA_eukarya/240-193\ttest_add/test_add.fasta\n+ERR1553846.100035-ORL20E0-512775-1-SSU_rRNA_eukarya/273-203\ttest_add/test_add.fasta\n+ERR1553846.100037-ORL20E0-512804-1-SSU_rRNA_eukarya/262-177\ttest_add/test_add.fasta\n+ERR1553846.100038-ORL20E0-512807-1-SSU_rRNA_eukarya/249-193\ttest_add/test_add.fasta\n+ERR1553846.10004-ORL20E0-51096-1-SSU_rRNA_eukarya/248-177\ttest_add/test_add.fasta\n+ERR1553846.100041-ORL20E0-512821-1-SSU_rRNA_eukarya/263-177\ttest_add/test_add.fasta\n+ERR1553846.100042-ORL20E0-512822-1-SSU_rRNA_eukarya/252-172\ttest_add/test_add.fasta\n+ERR1553846.100048-ORL20E0-512847-1-SSU_rRNA_eukarya/283-202\ttest_add/test_add.fasta\n+ERR1553846.100049-ORL20E0-512851-1-SSU_rRNA_eukarya/250-193\ttest_add/test_add.fasta\n+ERR1553846.100050-ORL20E0-512852-1-SSU_rRNA_eukarya/279-210\ttest_add/test_add.fasta\n+ERR1553846.100054-ORL20E0-512879-1-SSU_rRNA_eukarya/230-177\ttest_add/test_add.fasta\n+ERR1553846.100055-ORL20E0-512906-1-SSU_rRNA_eukarya/251-193\ttest_add/test_add.fasta\n+ERR1553846.100058-ORL20E0-512912-1-SSU_rRNA_eukarya/259-177\ttest_add/test_add.fasta\n+ERR1553846.100060-ORL20E0-512922-1-SSU_rRNA_eukarya/274-193\ttest_add/test_add.fasta\n+ERR1553846.100061-ORL20E0-512924-1-SSU_rRNA_eukarya/257-199\ttest_add/test_add.fasta\n+ERR1553846.100066-ORL20E0-512972-1-SSU_rRNA_eukarya/264-177\ttest_add/test_add.fasta\n+ERR1553846.100068-ORL20E0-512984-1-SSU_rRNA_eukarya/275-205\ttest_add/test_add.fasta\n+ERR1553846.100069-ORL20E0-512993-1-SSU_rRNA_eukarya/284-203\ttest_add/test_add.fasta\n+ERR1553846.100071-ORL20E0-513003-1-SSU_rRNA_eukarya/273-205\ttest_add/test_add.fasta\n+ERR1553846.100074-ORL20E0-513010-1-SSU_rRNA_eukarya/240-199\ttest_add/test_add.fasta\n+ERR1553846.100076-ORL20E0-513032-1-SSU_rRNA_eukarya/276-193\ttest_add/test_add.fasta\n+ERR1553846.100077-ORL20E0-513033-1-SSU_rRNA_eukarya/244-88\ttest_add/test_add.fasta\n+ERR1553846.100078-ORL20E0-513038-1-SSU_rRNA_eukarya/251-193\ttest_add/test_add.fasta\n+ERR1553846.100079-ORL20E0-513041-1-SSU_rRNA_eukarya/264-177\ttest_add/test_add.fasta\n+ERR1553846.10008'..b'.fasta\n+ERR1553846.102614-ORL20E0-526193-1-SSU_rRNA_eukarya/270-202\ttest_add/test_add.fasta\n+ERR1553846.102615-ORL20E0-526197-1-SSU_rRNA_eukarya/264-177\ttest_add/test_add.fasta\n+ERR1553846.102618-ORL20E0-526226-1-SSU_rRNA_eukarya/269-207\ttest_add/test_add.fasta\n+ERR1553846.102620-ORL20E0-526232-1-SSU_rRNA_eukarya/259-177\ttest_add/test_add.fasta\n+ERR1553846.102623-ORL20E0-526242-1-SSU_rRNA_eukarya/256-172\ttest_add/test_add.fasta\n+ERR1553846.102624-ORL20E0-526253-1-SSU_rRNA_eukarya/239-177\ttest_add/test_add.fasta\n+ERR1553846.102625-ORL20E0-526254-1-SSU_rRNA_eukarya/284-208\ttest_add/test_add.fasta\n+ERR1553846.102626-ORL20E0-526265-1-SSU_rRNA_eukarya/211-177\ttest_add/test_add.fasta\n+ERR1553846.102628-ORL20E0-526281-1-SSU_rRNA_eukarya/282-201\ttest_add/test_add.fasta\n+ERR1553846.102632-ORL20E0-526294-1-SSU_rRNA_eukarya/259-177\ttest_add/test_add.fasta\n+ERR1553846.102634-ORL20E0-526297-1-SSU_rRNA_eukarya/250-198\ttest_add/test_add.fasta\n+ERR1553846.102636-ORL20E0-526305-1-SSU_rRNA_eukarya/283-202\ttest_add/test_add.fasta\n+ERR1553846.102637-ORL20E0-526307-1-SSU_rRNA_eukarya/244-177\ttest_add/test_add.fasta\n+ERR1553846.102638-ORL20E0-526318-1-SSU_rRNA_eukarya/247-193\ttest_add/test_add.fasta\n+ERR1553846.102641-ORL20E0-526323-1-SSU_rRNA_eukarya/286-231\ttest_add/test_add.fasta\n+ERR1553846.102642-ORL20E0-526327-1-SSU_rRNA_eukarya/256-199\ttest_add/test_add.fasta\n+ERR1553846.102643-ORL20E0-526332-1-SSU_rRNA_eukarya/262-177\ttest_add/test_add.fasta\n+ERR1553846.102644-ORL20E0-526340-1-SSU_rRNA_eukarya/236-189\ttest_add/test_add.fasta\n+ERR1553846.102645-ORL20E0-526341-1-SSU_rRNA_eukarya/264-177\ttest_add/test_add.fasta\n+ERR1553846.102647-ORL20E0-526356-1-SSU_rRNA_eukarya/258-202\ttest_add/test_add.fasta\n+ERR1553846.102649-ORL20E0-526361-1-SSU_rRNA_eukarya/245-212\ttest_add/test_add.fasta\n+ERR1553846.102652-ORL20E0-526384-1-SSU_rRNA_eukarya/233-193\ttest_add/test_add.fasta\n+ERR1553846.102654-ORL20E0-526401-1-SSU_rRNA_eukarya/261-177\ttest_add/test_add.fasta\n+ERR1553846.102655-ORL20E0-526406-1-SSU_rRNA_eukarya/233-193\ttest_add/test_add.fasta\n+ERR1553846.102657-ORL20E0-526415-1-SSU_rRNA_eukarya/263-177\ttest_add/test_add.fasta\n+ERR1553846.102658-ORL20E0-526427-1-SSU_rRNA_eukarya/256-187\ttest_add/test_add.fasta\n+ERR1553846.102660-ORL20E0-526431-1-SSU_rRNA_eukarya/229-177\ttest_add/test_add.fasta\n+ERR1553846.102663-ORL20E0-526449-1-SSU_rRNA_eukarya/284-232\ttest_add/test_add.fasta\n+ERR1553846.102665-ORL20E0-526459-1-SSU_rRNA_eukarya/227-78\ttest_add/test_add.fasta\n+ERR1553846.102666-ORL20E0-526460-1-SSU_rRNA_eukarya/254-210\ttest_add/test_add.fasta\n+ERR1553846.102667-ORL20E0-526464-1-SSU_rRNA_eukarya/272-232\ttest_add/test_add.fasta\n+ERR1553846.102670-ORL20E0-526477-1-SSU_rRNA_eukarya/284-210\ttest_add/test_add.fasta\n+ERR1553846.102671-ORL20E0-526479-1-SSU_rRNA_eukarya/283-202\ttest_add/test_add.fasta\n+ERR1553846.102672-ORL20E0-526487-1-SSU_rRNA_eukarya/284-204\ttest_add/test_add.fasta\n+ERR1553846.102673-ORL20E0-526491-1-SSU_rRNA_eukarya/246-177\ttest_add/test_add.fasta\n+ERR1553846.102675-ORL20E0-526502-1-SSU_rRNA_eukarya/260-177\ttest_add/test_add.fasta\n+ERR1553846.102676-ORL20E0-526503-1-SSU_rRNA_eukarya/264-177\ttest_add/test_add.fasta\n+ERR1553846.102680-ORL20E0-526508-1-SSU_rRNA_eukarya/259-203\ttest_add/test_add.fasta\n+ERR1553846.102682-ORL20E0-526514-1-SSU_rRNA_eukarya/284-203\ttest_add/test_add.fasta\n+ERR1553846.102683-ORL20E0-526528-1-SSU_rRNA_eukarya/264-177\ttest_add/test_add.fasta\n+ERR1553846.102687-ORL20E0-526545-1-SSU_rRNA_eukarya/263-177\ttest_add/test_add.fasta\n+ERR1553846.10269-ORL20E0-52574-1-SSU_rRNA_eukarya/237-192\ttest_add/test_add.fasta\n+ERR1553846.102691-ORL20E0-526566-1-SSU_rRNA_eukarya/214-177\ttest_add/test_add.fasta\n+ERR1553846.102693-ORL20E0-526588-1-SSU_rRNA_eukarya/277-193\ttest_add/test_add.fasta\n+ERR1553846.102694-ORL20E0-526594-1-SSU_rRNA_eukarya/281-201\ttest_add/test_add.fasta\n+ERR1553846.102695-ORL20E0-526600-1-SSU_rRNA_eukarya/281-199\ttest_add/test_add.fasta\n+ERR1553846.102696-ORL20E0-526601-1-SSU_rRNA_eukarya/277-208\ttest_add/test_add.fasta\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_add1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_add1.fasta Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,3836 @@\n+>ERR1553846.1-ORL20E0-7-1-SSU_rRNA_eukarya/283-222\n+TCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCA\n+TT\n+>ERR1553846.10-ORL20E0-77-1-SSU_rRNA_eukarya/285-239\n+AAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT\n+>ERR1553846.100-ORL20E0-553-1-SSU_rRNA_eukarya/239-203\n+ACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT\n+>ERR1553846.10000-ORL20E0-51074-1-SSU_rRNA_eukarya/262-177\n+GGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAGAGTCGTAACAAGGTTTCC\n+GTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.100000-ORL20E0-512558-1-SSU_rRNA_eukarya/264-177\n+CGGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTT\n+CCGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.100001-ORL20E0-512560-1-SSU_rRNA_eukarya/278-205\n+GTTCAAACTTGGGCATTTAGATGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCT\n+GCGGCAGGATCATT\n+>ERR1553846.100003-ORL20E0-512571-1-SSU_rRNA_eukarya/252-202\n+AAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT\n+>ERR1553846.100005-ORL20E0-512583-1-SSU_rRNA_eukarya/261-177\n+GCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCG\n+TTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.100006-ORL20E0-512585-1-SSU_rRNA_eukarya/286-231\n+AGAGGAAGGTGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT\n+>ERR1553846.100007-ORL20E0-512586-1-SSU_rRNA_eukarya/263-177\n+GGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTC\n+GGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.100008-ORL20E0-512587-1-SSU_rRNA_eukarya/259-177\n+CGGAAAGTTATCCAAACTTGGTCATTTAGAGGAATTAAAAGTCGTAACAAGGTTTCCGTT\n+GGTGAACTAGCGGAAGGATCATT\n+>ERR1553846.100013-ORL20E0-512607-1-SSU_rRNA_eukarya/264-177\n+CGGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTT\n+CCGTTGGCGAACCAGCGGAAGGATCATT\n+>ERR1553846.100014-ORL20E0-512618-1-SSU_rRNA_eukarya/259-177\n+CGGAAAGTTATCCAAACTTGGGCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTT\n+GGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.100015-ORL20E0-512621-1-SSU_rRNA_eukarya/220-177\n+AGTCGTAACAAGGTTTCCGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.100016-ORL20E0-512624-1-SSU_rRNA_eukarya/256-177\n+AAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTTGGT\n+GAACCAGCGGAAGGATCATT\n+>ERR1553846.10002-ORL20E0-51084-1-SSU_rRNA_eukarya/285-226\n+ATTTAGAGGAAGTAAAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT\n+>ERR1553846.100020-ORL20E0-512673-1-SSU_rRNA_eukarya/233-202\n+GTTTCCGTAGGTGAACCTGCGGAAGGATCATT\n+>ERR1553846.100023-ORL20E0-512696-1-SSU_rRNA_eukarya/257-193\n+CGGTCGTTTAGAGGAAGTAAAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGA\n+TCATT\n+>ERR1553846.100029-ORL20E0-512721-1-SSU_rRNA_eukarya/239-177\n+GTCAATTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTTGGTGAACCAGCGGAAGGATC\n+ATT\n+>ERR1553846.100030-ORL20E0-512739-1-SSU_rRNA_eukarya/233-193\n+CGTAACAAGGTCTCCGTTGGTGAAACAGCGGAGGGATCATT\n+>ERR1553846.100031-ORL20E0-512754-1-SSU_rRNA_eukarya/275-202\n+GTTCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCT\n+GCGGAAGGATCATT\n+>ERR1553846.100032-ORL20E0-512758-1-SSU_rRNA_eukarya/235-193\n+GTCGTAACAAGGTCTCTGTTGGTGAACCAGCGGAGGGATCATT\n+>ERR1553846.100033-ORL20E0-512763-1-SSU_rRNA_eukarya/240-193\n+TAAAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT\n+>ERR1553846.100035-ORL20E0-512775-1-SSU_rRNA_eukarya/273-203\n+CAAACTTGGTTATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGGGAACCTGCG\n+GAAGGATCATT\n+>ERR1553846.100037-ORL20E0-512804-1-SSU_rRNA_eukarya/262-177\n+GGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC\n+GTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.100038-ORL20E0-512807-1-SSU_rRNA_eukarya/249-193\n+TAGAGGAAGTAAAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT\n+>ERR1553846.10004-ORL20E0-51096-1-SSU_rRNA_eukarya/248-177\n+CCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTTGGTGAACCAGC\n+GGAAGGATCATT\n+>ERR1553846.100041-ORL20E0-512821-1-SSU_rRNA_eukarya/263-177\n+GGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTC\n+CGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.100042-ORL20E0-512822-1-SSU_rRNA_eukarya/252-172\n+GGGAAATTGCCCAAACTTGCTTATTTAGAGGAAGTACAAGTCGTAACAAGGTTTCCGTAG\n+GTGAACCTGCGGAAGGATCAT\n+>ERR1553846.100048-ORL20E0-512847-1-SSU_rRNA_eukarya/283-202\n+GGAAAGTTGTTCAAACTTGGGCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAG\n+GTGAACCTGCGGAAGGATCATT\n+>ERR1553846.100049-ORL20E0-512851-1-SS'..b'ACAAGGTCTCCGGAGGTGAACCTGCGGAGGGATCATT\n+>ERR1553846.102645-ORL20E0-526341-1-SSU_rRNA_eukarya/264-177\n+CGGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTT\n+CCGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.102647-ORL20E0-526356-1-SSU_rRNA_eukarya/258-202\n+TGGTGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT\n+>ERR1553846.102649-ORL20E0-526361-1-SSU_rRNA_eukarya/245-212\n+AGGTAACCGGAGGTGAACCTGCGGTTGGATCATT\n+>ERR1553846.102652-ORL20E0-526384-1-SSU_rRNA_eukarya/233-193\n+CGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT\n+>ERR1553846.102654-ORL20E0-526401-1-SSU_rRNA_eukarya/261-177\n+GCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCG\n+TTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.102655-ORL20E0-526406-1-SSU_rRNA_eukarya/233-193\n+CGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT\n+>ERR1553846.102657-ORL20E0-526415-1-SSU_rRNA_eukarya/263-177\n+AGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTC\n+CGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.102658-ORL20E0-526427-1-SSU_rRNA_eukarya/256-187\n+AAACTCGGTCATTTAGAGGAAGTAAAAGTCGTAACGAGGTTTCCGTAGGTGAACCTGCGG\n+AAGGATCATT\n+>ERR1553846.102660-ORL20E0-526431-1-SSU_rRNA_eukarya/229-177\n+GGAAGTAAAAGTCGTAACAAGGTTTCCGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.102663-ORL20E0-526449-1-SSU_rRNA_eukarya/284-232\n+GGAAGTAAAAGTCGTAAAAAGGTTTCTGTAGGTGAACCTGCAGAAGGATCATT\n+>ERR1553846.102665-ORL20E0-526459-1-SSU_rRNA_eukarya/227-78\n+ACACCGCCCGTCGCTCCTACCGATTTCGAGTGATGAGGTGAACCCTCCGGACTGTGCGCG\n+CAAGCGTTGGGAAGTTGAGTAAACCTTATCACTTAGAGGAAGGAGAAGTCGTAACAAGGT\n+ATCTGTAGGTGAACCTGCAGATGGATCATT\n+>ERR1553846.102666-ORL20E0-526460-1-SSU_rRNA_eukarya/254-210\n+AAGTCGTAACAAGGTCTCCGTAGGTGAACCTGCTGAGGGATCATT\n+>ERR1553846.102667-ORL20E0-526464-1-SSU_rRNA_eukarya/272-232\n+CGTAACAAGGTTTCTGTAGGTGAACCTGCAGAAGGATCATT\n+>ERR1553846.102670-ORL20E0-526477-1-SSU_rRNA_eukarya/284-210\n+TGATCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACC\n+TGCGGAAGGATCATT\n+>ERR1553846.102671-ORL20E0-526479-1-SSU_rRNA_eukarya/283-202\n+GGAAAGCTGTCCAAACTGTGTCATTTAGAGGAAGGAAAAGTCGTAACAAGGTTTCCGTAG\n+GTGAACCTGCGGAAGGATCATT\n+>ERR1553846.102672-ORL20E0-526487-1-SSU_rRNA_eukarya/284-204\n+GAAAGTTGTTCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGG\n+TGAACCTGCGGAAGGATCATT\n+>ERR1553846.102673-ORL20E0-526491-1-SSU_rRNA_eukarya/246-177\n+AAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTTGGTGAACCAGCGG\n+AAGGATCAGT\n+>ERR1553846.102675-ORL20E0-526502-1-SSU_rRNA_eukarya/260-177\n+CCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGT\n+TGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.102676-ORL20E0-526503-1-SSU_rRNA_eukarya/264-177\n+CGGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGAAAAAGTCGTAACAAGGTTT\n+CCGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.102680-ORL20E0-526508-1-SSU_rRNA_eukarya/259-203\n+TAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT\n+>ERR1553846.102682-ORL20E0-526514-1-SSU_rRNA_eukarya/284-203\n+GGAAAGTTATTCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTTG\n+GTGAACCAGCGGAAGGGTCATT\n+>ERR1553846.102683-ORL20E0-526528-1-SSU_rRNA_eukarya/264-177\n+CGGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTT\n+CCGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.102687-ORL20E0-526545-1-SSU_rRNA_eukarya/263-177\n+GGGCCGGAAAGTTATCCAAACTTGGGCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTC\n+CGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.10269-ORL20E0-52574-1-SSU_rRNA_eukarya/237-192\n+AAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT\n+>ERR1553846.102691-ORL20E0-526566-1-SSU_rRNA_eukarya/214-177\n+AACAAGGTTTCCGTTGGTGAACCAGCGGAAGGATCATT\n+>ERR1553846.102693-ORL20E0-526588-1-SSU_rRNA_eukarya/277-193\n+GCCGGAAAGCTATCCAAACTCGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTCTCCG\n+TTGGTGAACCAGCGGAGGGATCATT\n+>ERR1553846.102694-ORL20E0-526594-1-SSU_rRNA_eukarya/281-201\n+GAGAGGTTGGTCAAACTTGGTCATTTAGATGAACTAAAGTCGTAACAAGGTAACCGTAGG\n+TGAACCTGCGGTTGGATCATT\n+>ERR1553846.102695-ORL20E0-526600-1-SSU_rRNA_eukarya/281-199\n+CGGAAAGTTGTCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTCTCCGTA\n+GGTGAACCTGCGGAGGGATCATT\n+>ERR1553846.102696-ORL20E0-526601-1-SSU_rRNA_eukarya/277-208\n+AAACTTGGTCATTTAGAGGAACTAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGG\n+TTGGATCATT\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_add2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_add2.fasta Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,84 @@
+>ERR1553846.1-ORL20E0-7-1-SSU_rRNA_eukarya/283-222
+TCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCA
+TT
+>ERR1553846.10-ORL20E0-77-1-SSU_rRNA_eukarya/285-239
+AAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT
+>ERR1553846.100-ORL20E0-553-1-SSU_rRNA_eukarya/239-203
+ACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT
+>ERR1553846.10000-ORL20E0-51074-1-SSU_rRNA_eukarya/262-177
+GGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAGAGTCGTAACAAGGTTTCC
+GTTGGTGAACCAGCGGAAGGATCATT
+>ERR1553846.100000-ORL20E0-512558-1-SSU_rRNA_eukarya/264-177
+CGGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTT
+CCGTTGGTGAACCAGCGGAAGGATCATT
+>ERR1553846.100001-ORL20E0-512560-1-SSU_rRNA_eukarya/278-205
+GTTCAAACTTGGGCATTTAGATGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCT
+GCGGCAGGATCATT
+>ERR1553846.100003-ORL20E0-512571-1-SSU_rRNA_eukarya/252-202
+AAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT
+>ERR1553846.100005-ORL20E0-512583-1-SSU_rRNA_eukarya/261-177
+GCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCG
+TTGGTGAACCAGCGGAAGGATCATT
+>ERR1553846.100006-ORL20E0-512585-1-SSU_rRNA_eukarya/286-231
+AGAGGAAGGTGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATT
+>ERR1553846.100007-ORL20E0-512586-1-SSU_rRNA_eukarya/263-177
+GGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTC
+GGTTGGTGAACCAGCGGAAGGATCATT
+>ERR1553846.100008-ORL20E0-512587-1-SSU_rRNA_eukarya/259-177
+CGGAAAGTTATCCAAACTTGGTCATTTAGAGGAATTAAAAGTCGTAACAAGGTTTCCGTT
+GGTGAACTAGCGGAAGGATCATT
+>ERR1553846.100013-ORL20E0-512607-1-SSU_rRNA_eukarya/264-177
+CGGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTT
+CCGTTGGCGAACCAGCGGAAGGATCATT
+>ERR1553846.100014-ORL20E0-512618-1-SSU_rRNA_eukarya/259-177
+CGGAAAGTTATCCAAACTTGGGCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTT
+GGTGAACCAGCGGAAGGATCATT
+>ERR1553846.100015-ORL20E0-512621-1-SSU_rRNA_eukarya/220-177
+AGTCGTAACAAGGTTTCCGTTGGTGAACCAGCGGAAGGATCATT
+>ERR1553846.100016-ORL20E0-512624-1-SSU_rRNA_eukarya/256-177
+AAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTTGGT
+GAACCAGCGGAAGGATCATT
+>ERR1553846.10002-ORL20E0-51084-1-SSU_rRNA_eukarya/285-226
+ATTTAGAGGAAGTAAAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT
+>ERR1553846.100020-ORL20E0-512673-1-SSU_rRNA_eukarya/233-202
+GTTTCCGTAGGTGAACCTGCGGAAGGATCATT
+>ERR1553846.100023-ORL20E0-512696-1-SSU_rRNA_eukarya/257-193
+CGGTCGTTTAGAGGAAGTAAAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGA
+TCATT
+>ERR1553846.100029-ORL20E0-512721-1-SSU_rRNA_eukarya/239-177
+GTCAATTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTTGGTGAACCAGCGGAAGGATC
+ATT
+>ERR1553846.100030-ORL20E0-512739-1-SSU_rRNA_eukarya/233-193
+CGTAACAAGGTCTCCGTTGGTGAAACAGCGGAGGGATCATT
+>ERR1553846.100031-ORL20E0-512754-1-SSU_rRNA_eukarya/275-202
+GTTCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCT
+GCGGAAGGATCATT
+>ERR1553846.100032-ORL20E0-512758-1-SSU_rRNA_eukarya/235-193
+GTCGTAACAAGGTCTCTGTTGGTGAACCAGCGGAGGGATCATT
+>ERR1553846.100033-ORL20E0-512763-1-SSU_rRNA_eukarya/240-193
+TAAAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT
+>ERR1553846.100035-ORL20E0-512775-1-SSU_rRNA_eukarya/273-203
+CAAACTTGGTTATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGGGAACCTGCG
+GAAGGATCATT
+>ERR1553846.100037-ORL20E0-512804-1-SSU_rRNA_eukarya/262-177
+GGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCC
+GTTGGTGAACCAGCGGAAGGATCATT
+>ERR1553846.100038-ORL20E0-512807-1-SSU_rRNA_eukarya/249-193
+TAGAGGAAGTAAAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT
+>ERR1553846.10004-ORL20E0-51096-1-SSU_rRNA_eukarya/248-177
+CCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTTGGTGAACCAGC
+GGAAGGATCATT
+>ERR1553846.100041-ORL20E0-512821-1-SSU_rRNA_eukarya/263-177
+GGGCCGGAAAGTTATCCAAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTC
+CGTTGGTGAACCAGCGGAAGGATCATT
+>ERR1553846.100042-ORL20E0-512822-1-SSU_rRNA_eukarya/252-172
+GGGAAATTGCCCAAACTTGCTTATTTAGAGGAAGTACAAGTCGTAACAAGGTTTCCGTAG
+GTGAACCTGCGGAAGGATCAT
+>ERR1553846.100048-ORL20E0-512847-1-SSU_rRNA_eukarya/283-202
+GGAAAGTTGTTCAAACTTGGGCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAG
+GTGAACCTGCGGAAGGATCATT
+>ERR1553846.100049-ORL20E0-512851-1-SSU_rRNA_eukarya/250-193
+TTAGAGGAAGTAAAAGTCGTAACAAGGTCTCCGTTGGTGAACCAGCGGAGGGATCATT
+>ERR1553846.100050-ORL20E0-512852-1-SSU_rRNA_eukarya/279-210
+AAACTTGGTCATTTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCAG
+AAGGATCATT
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_bat_file.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_bat_file.tsv Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,39 @@
+# bin classification reason lineage lineage scores
+1.FASTA taxid assigned based on 4161/4187 ORFs 1;131567;2;1783272;201174;1760;85007;182102;85043;1736349 1.00;1.00;1.00;1.00;1.00;1.00;0.99;0.99;0.99;0.31
+10.FASTA taxid assigned based on 1895/1915 ORFs 1;131567;2;1224;1236;135614;32033;2370;2371 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.94
+11.FASTA taxid assigned based on 4793/4923 ORFs 1;131567;2;1783272;1239;186801;186802;31979;1884934;191027* 1.00;1.00;1.00;0.99;0.99;0.98;0.98;0.83;0.81;0.81
+12.FASTA taxid assigned based on 4056/4136 ORFs 1;131567;2;1783270;68336;976;1853228;1853229;563835;398041;462367 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.99;0.99
+13.FASTA taxid assigned based on 3575/3644 ORFs 1;131567;2;1783272;544448;31969;186332;186333;2086;2088 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+14.FASTA taxid assigned based on 245/248 ORFs 1;131567;2;1224;28216;80840;75682;149698;1736522 1.00;1.00;1.00;0.99;0.91;0.91;0.91;0.89;0.35
+15.FASTA taxid assigned based on 3834/3853 ORFs 1;131567;2;1224;28211;204455;31989;1097466;404881 1.00;1.00;1.00;1.00;1.00;0.99;0.97;0.96;0.96
+16.FASTA taxid assigned based on 2824/2869 ORFs 1;131567;2;1783272;544448;31969;186332;186333;2086;2088 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+17.FASTA taxid assigned based on 2646/2678 ORFs 1;131567;2;1783272;1239;186801;186802;186804;214904;227138 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.90
+18.FASTA taxid assigned based on 6183/6302 ORFs 1;131567;2;1783272;200795;388447;388448;768649;768650* 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+19.FASTA taxid assigned based on 8855/8930 ORFs 1;131567;2;1783272;1239;91061;1385;539002;539742;33986* 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+2.FASTA taxid assigned based on 3440/3461 ORFs 1;131567;2;1783272;1239;91061;1385;539002;539742;33986* 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+20.FASTA taxid assigned based on 4264/4287 ORFs 1;131567;2;1224;28216;206351;1499392;397275;1121000 1.00;1.00;1.00;1.00;1.00;1.00;0.99;0.99;0.97
+21.FASTA taxid assigned based on 5835/5867 ORFs 1;131567;2;1224;28216;80840;75682;149698;1736522 1.00;1.00;1.00;1.00;0.98;0.98;0.98;0.97;0.59
+22.FASTA taxid assigned based on 2576/2598 ORFs 1;131567;2;1783270;68336;976;117743;200644;49546;363408;328515 1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.97;0.97;0.96;0.89
+23.FASTA taxid assigned based on 5759/5763 ORFs 1;131567;2 1.00;0.74;0.73
+24.FASTA taxid assigned based on 6458/6478 ORFs 1;131567;2;1224;28211;204455;31989;265;266 1.00;1.00;1.00;1.00;1.00;0.99;0.99;0.95;0.46
+25.FASTA taxid assigned based on 4145/4341 ORFs 1;131567;2;1783270;68336;976;1853228;1853229;563835;398041;462367 1.00;1.00;1.00;0.99;0.99;0.99;0.93;0.93;0.93;0.89;0.88
+26.FASTA taxid assigned based on 2208/2231 ORFs 1;131567;2;1783272;1239;186801;186802;1491775;42447;182411 1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.97;0.97;0.72
+27.FASTA taxid assigned based on 4557/4633 ORFs 1;131567;2;1783270;68336;976;1853228;1853229;563835;1004300;1004304 1.00;1.00;1.00;1.00;1.00;1.00;0.89;0.89;0.89;0.89;0.88
+28.FASTA taxid assigned based on 1856/1860 ORFs 1;131567;2;1783272;1239;91061;1385;539002;539742;33986* 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+29.FASTA taxid assigned based on 3780/3817 ORFs 1;131567;2;1783272;1239;91061;1385;539002;539742;33986* 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+3.FASTA taxid assigned based on 205/207 ORFs 1;131567;2;1783272;1239;909932;909929;1843491;55506;55507* 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.99;0.99
+30.FASTA taxid assigned based on 6218/6259 ORFs 1;131567;2;1224;28216;80840;75682;149698;1736532 1.00;1.00;1.00;1.00;0.98;0.98;0.98;0.97;0.57
+31.FASTA taxid assigned based on 2720/2744 ORFs 1;131567;2;1224;28216;80840;80864;198705;198706 1.00;1.00;1.00;1.00;0.99;0.99;0.98;0.97;0.97
+32.FASTA taxid assigned based on 3222/3245 ORFs 1;131567;2;1783272;201174;1760;85006;85021;99479;1736399 1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.99;0.96;0.53
+33.FASTA taxid assigned based on 292/297 ORFs 1;131567;2;1224;28211;204455;31989 1.00;1.00;0.99;0.89;0.84;0.50;0.49
+34.FASTA taxid assigned based on 3557/3604 ORFs 1;131567;2;1783272;1239;186801;186802;186803;653683;264463* 1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.99;0.99;0.99
+35.FASTA taxid assigned based on 2175/2205 ORFs 1;131567;2;1783272;1239;909932;909929;1843491;55506;55507* 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.99;0.99
+36.FASTA taxid assigned based on 3370/3422 ORFs 1;131567;2;1783270;68336;976;1853228;1853229;563835;1004300;1004304 1.00;1.00;1.00;1.00;1.00;1.00;0.89;0.89;0.89;0.89;0.87
+37.FASTA taxid assigned based on 4212/4264 ORFs 1;131567;2;1783270;68336;976;1853228;1853229;563835;1004300;1004304 1.00;1.00;1.00;1.00;1.00;1.00;0.92;0.92;0.92;0.92;0.91
+38.FASTA taxid assigned based on 8008/8038 ORFs 1;131567;2;1224;28211;204455;31989;265;266 1.00;1.00;1.00;1.00;1.00;0.99;0.99;0.98;0.40
+4.FASTA taxid assigned based on 8568/8682 ORFs 1;131567;2;1783270;68336;976;1853228;1853229;563835;1004300;1004304 1.00;1.00;1.00;1.00;1.00;1.00;0.92;0.92;0.92;0.92;0.90
+5.FASTA taxid assigned based on 2948/2952 ORFs 1;131567;2;1224;28211;204455;31989;205889;990712 1.00;1.00;1.00;1.00;1.00;0.87;0.87;0.86;0.86
+6.FASTA taxid assigned based on 2846/2901 ORFs 1;131567;2;1224;1236;135614;32033;68;1736549 1.00;1.00;1.00;0.96;0.94;0.92;0.85;0.65;0.52
+7.FASTA taxid assigned based on 5030/5081 ORFs 1;131567;2;1224;68525;28221;213118;213119;218207;259354 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;0.38
+8.FASTA taxid assigned based on 2537/2579 ORFs 1;131567;2;1783272;544448;31969;186332;186333;2086;2088 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+9.FASTA taxid assigned based on 1648/1673 ORFs 1;131567;2;1783272;544448;31969;186332;186333;2086;2088 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_binning.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_binning.tsv Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,7 @@
+@Version:0.9.1
+@SampleID:gsa
+
+@@SEQUENCEID BINID TAXID
+RH|P|C37126 Sample6_89 45202
+RH|P|C3274 Sample9_91 32644
+RH|P|C26099 1053046 765201
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_binning2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_binning2.tsv Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,6 @@
+@Version:0.9.1
+@SampleID:gsa
+
+@@SEQUENCEID BINID TAXID
+RH|P|C35075 1053046 765201
+RH|P|C20873 1053046 765201
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_biobox_file.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_biobox_file.tsv Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,13 @@
+#CAMI Format for Binning
+@Version:0.9.0
+@SampleID:gsa
+@@SEQUENCEID BINID
+RL|S1|C1198 1
+RL|S1|C3912 1
+RL|S1|C131 10
+RL|S1|C1410 10
+RL|S1|C2929 10
+RL|S1|C3754 10
+RL|S1|C1335 11
+RL|S1|C1344 11
+RL|S1|C1352 11
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_biobox_file_add_taxid_bat.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_biobox_file_add_taxid_bat.tsv Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,13 @@
+#CAMI Format for Binning
+@Version:0.9.0
+@SampleID:_SAMPLEID_
+@@SEQUENCEID BINID TAXID
+RL|S1|C1198 1 1736349
+RL|S1|C3912 1 1736349
+RL|S1|C131 10 2371
+RL|S1|C1410 10 2371
+RL|S1|C2929 10 2371
+RL|S1|C3754 10 2371
+RL|S1|C1335 11 191027
+RL|S1|C1344 11 191027
+RL|S1|C1352 11 191027
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_biobox_file_add_taxid_gtdb.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_biobox_file_add_taxid_gtdb.tsv Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,13 @@
+#CAMI Format for Binning
+@Version:0.9.0
+@SampleID:_SAMPLEID_
+@@SEQUENCEID BINID TAXID
+RL|S1|C1198 1 12908
+RL|S1|C3912 1 12908
+RL|S1|C131 10 2371
+RL|S1|C1410 10 2371
+RL|S1|C2929 10 2371
+RL|S1|C3754 10 2371
+RL|S1|C1335 11 191027
+RL|S1|C1344 11 191027
+RL|S1|C1352 11 191027
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_gold.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_gold.tsv Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,9 @@
+@Version:0.9.1
+@SampleID:gsa
+
+@@SEQUENCEID BINID TAXID LENGTH
+RH|P|C37126 Sample6_89 45202 25096
+RH|P|C3274 Sample9_91 32644 10009
+RH|P|C26099 1053046 765201 689201
+RH|P|C35075 1053046 765201 173282
+RH|P|C20873 1053046 765201 339258
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_gtdb_file.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_gtdb_file.tsv Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,39 @@\n+user_genome\tclassification\tfastani_reference\tfastani_reference_radius\tfastani_taxonomy\tfastani_ani\tfastani_af\tclosest_placement_reference\tclosest_placement_radius\tclosest_placement_taxonomy\tclosest_placement_ani\tclosest_placement_af\tpplacer_taxonomy\tclassification_method\tnote\tother_related_references(genome_id,species_name,radius,ANI,AF)\tmsa_percent\ttranslation_table\tred_value\twarnings\n+1\td__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Mycobacteriales;f__Mycobacteriaceae;g__Williamsia_A;s__Williamsia_A herbipolensis\tGCF_000964005.1\t95.0\td__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Mycobacteriales;f__Mycobacteriaceae;g__Williamsia_A;s__Williamsia_A herbipolensis\t95.01\t0.891\tGCF_000964005.1\t95.0\td__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Mycobacteriales;f__Mycobacteriaceae;g__Williamsia_A;s__Williamsia_A herbipolensis\t95.01\t0.891\td__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Mycobacteriales;f__Mycobacteriaceae;g__Williamsia_A;s__\ttaxonomic classification defined by topology and ANI\ttopological placement and ANI have congruent species assignments\tGCF_014635305.1, s__Williamsia_A phyllosphaerae, 95.0, 82.43, 0.702; GCA_016462415.1, s__Williamsia_A sp016462415, 95.0, 82.15, 0.649; GCF_018474015.1, s__Williamsia_A sp018474015, 95.0, 80.23, 0.523\t96.49\t11\tN/A\tN/A\n+10\td__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Xylella;s__Xylella fastidiosa\tGCF_011801475.1\t95.0\td__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Xylella;s__Xylella fastidiosa\t99.99\t1.0\tGCF_011801475.1\t95.0\td__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Xylella;s__Xylella fastidiosa\t99.99\t1.0\td__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Xylella;s__\ttaxonomic classification defined by topology and ANI\ttopological placement and ANI have congruent species assignments\tGCF_013177435.1, s__Xylella taiwanensis, 95.0, 82.85, 0.849\t85.13\t11\tN/A\tN/A\n+11\td__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Caminicellaceae;g__Paramaledivibacter;s__Paramaledivibacter caminithermalis\tGCF_900142245.1\t95.0\td__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Caminicellaceae;g__Paramaledivibacter;s__Paramaledivibacter caminithermalis\t99.97\t0.872\tGCF_900142245.1\t95.0\td__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Caminicellaceae;g__Paramaledivibacter;s__Paramaledivibacter caminithermalis\t99.97\t0.872\td__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Caminicellaceae;g__;s__\ttaxonomic classification defined by topology and ANI\ttopological placement and ANI have congruent species assignments\tN/A\t78.49\t11\tN/A\tGenome has more than 10.0% of markers with multiple hits\n+12\td__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Chitinophagales;f__Chitinophagaceae;g__Flavisolibacter;s__Flavisolibacter ginsengisoli\tGCF_900129295.1\t95.0\td__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Chitinophagales;f__Chitinophagaceae;g__Flavisolibacter;s__Flavisolibacter ginsengisoli\t100.0\t0.999\tGCF_900129295.1\t95.0\td__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Chitinophagales;f__Chitinophagaceae;g__Flavisolibacter;s__Flavisolibacter ginsengisoli\t100.0\t0.999\td__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Chitinophagales;f__Chitinophagaceae;g__Flavisolibacter;s__\ttaxonomic classification defined by topology and ANI\ttopological placement and ANI have congruent species assignments\tGCA_017745135.1, s__Flavisolibacter sp017745135, 95.0, 77.32, 0.1; GCA_013327885.1, s__Flavisolibacter sp013327885, 95.0, 77.25, 0.081; GCF_007970805.1, s__Flavisolibacter ginsenosidimutans, 95.0, 76.92, 0.056; GCF_003628535.1, s__Flavisolibacter nicotianae, 95.0, 76.88, 0.08; GCA_014163555.1, s__Flavisolibacter sp014163555, 95.0, 76.86, 0.077; GCA_002050435.1, s__Flavisolibacter sp002050435, 95.0, 76.62, 0.139\t97.8\t11\tN/A\tN/A\n+13\td__Bacteria;p__Fir'..b'eae;g__Anaeroplasma;s__Anaeroplasma bactoclasticum\tGCF_003550015.1\t95.0\td__Bacteria;p__Firmicutes;c__Bacilli;o__Acholeplasmatales;f__Anaeroplasmataceae;g__Anaeroplasma;s__Anaeroplasma bactoclasticum\t99.22\t0.892\tGCF_003550015.1\t95.0\td__Bacteria;p__Firmicutes;c__Bacilli;o__Acholeplasmatales;f__Anaeroplasmataceae;g__Anaeroplasma;s__Anaeroplasma bactoclasticum\t99.22\t0.892\td__Bacteria;p__Firmicutes;c__Bacilli;o__Acholeplasmatales;f__Anaeroplasmataceae;g__Anaeroplasma;s__\ttaxonomic classification defined by topology and ANI\ttopological placement and ANI have congruent species assignments\tGCA_017388605.1, s__Anaeroplasma sp017388605, 95.0, 83.89, 0.71; GCA_017449215.1, s__Anaeroplasma sp017449215, 95.0, 78.37, 0.262; GCA_017441525.1, s__Anaeroplasma sp017441525, 95.0, 78.17, 0.272; GCA_002449755.1, s__Anaeroplasma sp002449755, 95.0, 78.14, 0.275; GCA_017462645.1, s__Anaeroplasma sp017462645, 95.0, 78.0, 0.236; GCA_017524905.1, s__Anaeroplasma sp017524905, 95.0, 77.92, 0.242; GCA_016280975.1, s__Anaeroplasma sp016280975, 95.0, 77.86, 0.273; GCA_017444645.1, s__Anaeroplasma sp017444645, 95.0, 77.57, 0.26; GCA_900770055.1, s__Anaeroplasma sp900770055, 95.0, 77.54, 0.192; GCA_902783745.1, s__Anaeroplasma sp902783745, 95.0, 77.5, 0.231; GCA_016282815.1, s__Anaeroplasma sp016282815, 95.0, 77.44, 0.216; GCA_017542485.1, s__Anaeroplasma sp017542485, 95.0, 77.43, 0.189; GCA_016288855.1, s__Anaeroplasma sp016288855, 95.0, 77.42, 0.212; GCA_015059145.1, s__Anaeroplasma sp015059145, 95.0, 77.4, 0.199; GCA_900767915.1, s__Anaeroplasma sp900767915, 95.0, 77.38, 0.187; GCA_017454375.1, s__Anaeroplasma sp017454375, 95.0, 77.11, 0.128; GCA_902796895.1, s__Anaeroplasma sp902796895, 95.0, 76.99, 0.242; GCA_018715825.1, s__Anaeroplasma faecigallinarum, 95.0, 76.73, 0.118; GCA_905236965.1, s__Anaeroplasma sp905236965, 95.0, 76.07, 0.2; GCA_905236105.1, s__Anaeroplasma sp905236105, 95.0, 76.05, 0.133\t57.15\t11\tN/A\tGenome has more than 10.0% of markers with multiple hits\n+9\td__Bacteria;p__Firmicutes;c__Bacilli;o__Acholeplasmatales;f__Anaeroplasmataceae;g__Anaeroplasma;s__Anaeroplasma bactoclasticum\tGCF_003550015.1\t95.0\td__Bacteria;p__Firmicutes;c__Bacilli;o__Acholeplasmatales;f__Anaeroplasmataceae;g__Anaeroplasma;s__Anaeroplasma bactoclasticum\t100.0\t1.0\tGCF_003550015.1\t95.0\td__Bacteria;p__Firmicutes;c__Bacilli;o__Acholeplasmatales;f__Anaeroplasmataceae;g__Anaeroplasma;s__Anaeroplasma bactoclasticum\t100.0\t1.0\td__Bacteria;p__Firmicutes;c__Bacilli;o__Acholeplasmatales;f__Anaeroplasmataceae;g__Anaeroplasma;s__\ttaxonomic classification defined by topology and ANI\ttopological placement and ANI have congruent species assignments\tGCA_017388605.1, s__Anaeroplasma sp017388605, 95.0, 84.15, 0.658; GCA_002449755.1, s__Anaeroplasma sp002449755, 95.0, 78.65, 0.256; GCA_017441525.1, s__Anaeroplasma sp017441525, 95.0, 78.34, 0.265; GCA_017449215.1, s__Anaeroplasma sp017449215, 95.0, 78.33, 0.263; GCA_017524905.1, s__Anaeroplasma sp017524905, 95.0, 78.01, 0.244; GCA_017462645.1, s__Anaeroplasma sp017462645, 95.0, 78.0, 0.23; GCA_016280975.1, s__Anaeroplasma sp016280975, 95.0, 77.96, 0.23; GCA_017542485.1, s__Anaeroplasma sp017542485, 95.0, 77.74, 0.195; GCA_017444645.1, s__Anaeroplasma sp017444645, 95.0, 77.7, 0.258; GCA_900767915.1, s__Anaeroplasma sp900767915, 95.0, 77.52, 0.172; GCA_016288855.1, s__Anaeroplasma sp016288855, 95.0, 77.51, 0.196; GCA_902783745.1, s__Anaeroplasma sp902783745, 95.0, 77.5, 0.209; GCA_016282815.1, s__Anaeroplasma sp016282815, 95.0, 77.5, 0.201; GCA_900770055.1, s__Anaeroplasma sp900770055, 95.0, 77.43, 0.184; GCA_015059145.1, s__Anaeroplasma sp015059145, 95.0, 77.28, 0.183; GCA_902796895.1, s__Anaeroplasma sp902796895, 95.0, 77.2, 0.221; GCA_017454375.1, s__Anaeroplasma sp017454375, 95.0, 77.19, 0.129; GCA_018715825.1, s__Anaeroplasma faecigallinarum, 95.0, 76.79, 0.111; GCA_905236965.1, s__Anaeroplasma sp905236965, 95.0, 76.42, 0.19; GCA_905236105.1, s__Anaeroplasma sp905236105, 95.0, 76.12, 0.118\t66.58\t11\tN/A\tN/A\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_gtdb_to_taxdump_file.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_gtdb_to_taxdump_file.tsv Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,25 @@
+ncbi_taxonomy gtdb_taxonomy lca_frac target_tax_level lineage
+s__Williamsia_A herbipolensis unclassified NA NA NA
+s__Xylella fastidiosa s__Xylella fastidiosa 1.0 species d__bacteria;p__pseudomonadota;c__gammaproteobacteria;o__xanthomonadales;f__xanthomonadaceae;g__xylella;s__xylella fastidiosa
+s__Paramaledivibacter caminithermalis s__Paramaledivibacter caminithermalis 1.0 species d__bacteria;p__bacillota_a;c__clostridia;o__peptostreptococcales;f__caminicellaceae;g__paramaledivibacter;s__paramaledivibacter caminithermalis
+s__Flavisolibacter ginsengisoli s__Flavisolibacter ginsengisoli 1.0 species d__bacteria;p__bacteroidota;c__bacteroidia;o__chitinophagales;f__chitinophagaceae;g__flavisolibacter;s__flavisolibacter ginsengisoli
+s__Anaeroplasma bactoclasticum c__Bacilli 1.0 class d__bacteria;p__bacillota;c__bacilli
+s__Pseudoduganella sp001425045 unclassified NA NA NA
+s__Albidovulum denitrificans unclassified NA NA NA
+s__Tepidibacter formicigenes s__Tepidibacter formicigenes 1.0 species d__bacteria;p__bacillota_a;c__clostridia;o__peptostreptococcales;f__peptostreptococcaceae;g__tepidibacter;s__tepidibacter formicigenes
+s__Thermosporothrix hazakensis unclassified NA NA NA
+s__Exiguobacterium_A sp001423965 unclassified NA NA NA
+s__Andreprevotia lacus s__Andreprevotia lacus 1.0 species d__bacteria;p__pseudomonadota;c__gammaproteobacteria;o__burkholderiales;f__chitinibacteraceae;g__andreprevotia;s__andreprevotia lacus
+s__Nonlabens dokdonensis g__Nonlabens 1.0 genus d__bacteria;p__bacteroidota;c__bacteroidia;o__flavobacteriales;f__flavobacteriaceae;g__nonlabens
+s__Pseudomonas aeruginosa s__Pseudomonas aeruginosa 0.99 species d__bacteria;p__pseudomonadota;c__gammaproteobacteria;o__pseudomonadales;f__pseudomonadaceae;g__pseudomonas;s__pseudomonas aeruginosa
+s__Paracoccus denitrificans g__Paracoccus 1.0 genus d__bacteria;p__pseudomonadota;c__alphaproteobacteria;o__rhodobacterales;f__rhodobacteraceae;g__paracoccus
+N/A unclassified NA NA NA
+s__Anaerobranca californiensis s__Anaerobranca californiensis 1.0 species d__bacteria;p__bacillota_d;c__proteinivoracia;o__proteinivoracales;f__proteinivoraceae;g__anaerobranca;s__anaerobranca californiensis
+s__Hydrotalea sandarakina s__Hydrotalea sandarakina 1.0 species d__bacteria;p__bacteroidota;c__bacteroidia;o__chitinophagales;f__chitinophagaceae;g__hydrotalea;s__hydrotalea sandarakina
+s__Schwartzia succinivorans f__Selenomonadaceae 1.0 family d__bacteria;p__bacillota_c;c__negativicutes;o__selenomonadales;f__selenomonadaceae
+s__Pseudoduganella sp001425265 unclassified NA NA NA
+s__Lampropedia hyalina s__Lampropedia hyalina 1.0 species d__bacteria;p__pseudomonadota;c__gammaproteobacteria;o__burkholderiales;f__burkholderiaceae_b;g__lampropedia;s__lampropedia hyalina
+s__Phycicoccus sp001428065 unclassified NA NA NA
+s__Anaerosporobacter mobilis s__Anaerosporobacter mobilis 1.0 species d__bacteria;p__bacillota_a;c__clostridia;o__lachnospirales;f__lachnospiraceae;g__anaerosporobacter;s__anaerosporobacter mobilis
+s__Meinhardsimonia xiamenensis s__Meinhardsimonia xiamenensis 1.0 species d__bacteria;p__pseudomonadota;c__alphaproteobacteria;o__rhodobacterales;f__rhodobacteraceae;g__meinhardsimonia;s__meinhardsimonia xiamenensis
+s__Desulfatibacillum alkenivorans s__Desulfatibacillum alkenivorans 1.0 species d__bacteria;p__desulfobacterota;c__desulfobacteria;o__desulfobacterales;f__desulfatibacillaceae;g__desulfatibacillum;s__desulfatibacillum alkenivorans
b
diff -r 000000000000 -r 1e10251b9615 test-data/test_taxonkit_file.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_taxonkit_file.tsv Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,25 @@
+unclassified 12908
+Xylella fastidiosa 2371
+Paramaledivibacter caminithermalis 191027
+Flavisolibacter ginsengisoli 462367
+Bacilli 91061
+unclassified 12908
+unclassified 12908
+Tepidibacter formicigenes 227138
+unclassified 12908
+unclassified 12908
+Andreprevotia lacus 1121000
+Nonlabens 363408
+Pseudomonas aeruginosa 287
+Paracoccus 265
+Paracoccus 249411
+unclassified 12908
+Anaerobranca californiensis 182411
+Hydrotalea sandarakina 1004304
+Selenomonadaceae 1843491
+unclassified 12908
+Lampropedia hyalina 198706
+unclassified 12908
+Anaerosporobacter mobilis 264463
+Meinhardsimonia xiamenensis 990712
+Desulfatibacillum alkenivorans 259354
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 test-data/unique_common.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unique_common.tsv Sun Aug 25 13:19:14 2024 +0000
b
b'@@ -0,0 +1,1366 @@\n+evo_1035930.029\tcommon strain\n+1035930\tcommon strain\n+evo_1035930.032\tcommon strain\n+evo_1035930.011\tcommon strain\n+evo_1286_AP.033\tcommon strain\n+1286_AP\tcommon strain\n+evo_1286_AP.026\tcommon strain\n+evo_1286_AP.037\tcommon strain\n+evo_1286_AP.008\tcommon strain\n+1052944\tcommon strain\n+1053058\tcommon strain\n+1052947\tcommon strain\n+evo_1049056.013\tcommon strain\n+evo_1049056.031\tcommon strain\n+evo_1049056.011\tcommon strain\n+1049056\tcommon strain\n+evo_1049056.039\tcommon strain\n+evo_1049056.015\tcommon strain\n+1048783\tunique strain\n+1220_AD\tunique strain\n+1036608\tunique strain\n+1036728\tunique strain\n+1036743\tunique strain\n+1036539\tunique strain\n+1139_AG\tunique strain\n+1285_BH\tunique strain\n+1036707\tunique strain\n+1030755\tunique strain\n+1048993\tunique strain\n+1036755\tunique strain\n+1049005\tunique strain\n+1036554\tunique strain\n+1030752\tunique strain\n+1035900\tunique strain\n+1030878\tunique strain\n+1030836\tunique strain\n+1049089\tunique strain\n+1220_AJ\tunique strain\n+1365_A\tunique strain\n+1036641\tunique strain\n+evo_1030728.011\tcommon strain\n+evo_1030728.001\tcommon strain\n+1030728\tcommon strain\n+evo_1030728.035\tcommon strain\n+evo_1030728.009\tcommon strain\n+evo_1030728.038\tcommon strain\n+1052977\tcommon strain\n+1052974\tcommon strain\n+evo_1035921.028\tcommon strain\n+1035921\tcommon strain\n+evo_1035921.030\tcommon strain\n+evo_1035921.008\tcommon strain\n+evo_1035921.007\tcommon strain\n+1053034\tcommon strain\n+1053037\tcommon strain\n+1286_AO\tcommon strain\n+1286_Y\tcommon strain\n+1139_T\tcommon strain\n+1139_AW\tcommon strain\n+1220_BB\tcommon strain\n+1021_AS\tcommon strain\n+1021_AQ\tcommon strain\n+evo_1139_Y.018\tcommon strain\n+evo_1139_Y.035\tcommon strain\n+1139_Y\tcommon strain\n+1220_BD\tcommon strain\n+1053028\tcommon strain\n+1053022\tcommon strain\n+1053025\tcommon strain\n+1052968\tcommon strain\n+1048966\tcommon strain\n+1021_P\tcommon strain\n+1220_AX\tcommon strain\n+1139_B\tcommon strain\n+1287_N\tcommon strain\n+1052992\tcommon strain\n+1052995\tcommon strain\n+1285_BO\tcommon strain\n+1285_CK\tcommon strain\n+1285_AC\tcommon strain\n+1287_BD\tcommon strain\n+1220_AR\tcommon strain\n+1286_BU\tcommon strain\n+1285_BV\tcommon strain\n+1286_BD\tcommon strain\n+1286_G\tcommon strain\n+evo_1286_G.034\tcommon strain\n+evo_1286_G.010\tcommon strain\n+1052965\tcommon strain\n+1052959\tcommon strain\n+1287_BC\tcommon strain\n+1021_C\tcommon strain\n+1053004\tcommon strain\n+1052998\tcommon strain\n+1053055\tunique strain\n+1052953\tunique strain\n+1286_AF\tunique strain\n+1030905\tunique strain\n+1053031\tunique strain\n+1048861\tunique strain\n+1036782\tunique strain\n+1048870\tunique strain\n+1139_CO\tunique strain\n+1030776\tunique strain\n+1030740\tunique strain\n+1285_BD\tunique strain\n+1139_BF\tunique strain\n+1035939\tunique strain\n+1139_CJ\tunique strain\n+1030782\tunique strain\n+1036695\tunique strain\n+1139_CF\tunique strain\n+1285_BF\tunique strain\n+1030881\tunique strain\n+1048978\tunique strain\n+1036803\tunique strain\n+1052950\tunique strain\n+1036629\tunique strain\n+1035993\tunique strain\n+1220_E\tunique strain\n+1285_CF\tunique strain\n+1036614\tunique strain\n+1030896\tunique strain\n+1049035\tunique strain\n+1030869\tunique strain\n+1035906\tunique strain\n+1036011\tunique strain\n+1048942\tunique strain\n+1035969\tunique strain\n+1035981\tunique strain\n+1030806\tunique strain\n+1036635\tunique strain\n+1049032\tunique strain\n+1287_W\tunique strain\n+1030800\tunique strain\n+1030716\tunique strain\n+1053049\tunique strain\n+1287_BO\tunique strain\n+1021_AP\tunique strain\n+1035951\tunique strain\n+1220_CF\tunique strain\n+1030968\tunique strain\n+1030812\tunique strain\n+1287_BN\tunique strain\n+1139_AL\tunique strain\n+1036557\tunique strain\n+1285_AD\tunique strain\n+1286_BV\tunique strain\n+1036716\tunique strain\n+1036566\tunique strain\n+1053040\tunique strain\n+1220_AO\tunique strain\n+1049011\tunique strain\n+1048945\tunique strain\n+1036731\tunique strain\n+1036701\tunique strain\n+1139_AA\tunique strain\n+1220_A\tunique strain\n+1048885\tunique strain\n+1220_X\tunique strain\n+1049083\tunique strain\n+1139_AF\tunique strain\n+1052941\tunique strain\n+1036785\tunique strain\n+1036704\tunique strain\n+1'..b'ircular element\n+Sample18_70\tcircular element\n+Sample18_82\tcircular element\n+Sample18_83\tcircular element\n+Sample18_86\tcircular element\n+Sample18_87\tcircular element\n+Sample18_90\tcircular element\n+Sample18_91\tcircular element\n+Sample18_94\tcircular element\n+Sample18_97\tcircular element\n+Sample19_105\tcircular element\n+Sample19_106\tcircular element\n+Sample19_117\tcircular element\n+Sample19_13\tcircular element\n+Sample19_146\tcircular element\n+Sample19_153\tcircular element\n+Sample19_175\tcircular element\n+Sample19_180\tcircular element\n+Sample19_19\tcircular element\n+Sample19_209\tcircular element\n+Sample19_21\tcircular element\n+Sample19_212\tcircular element\n+Sample19_215\tcircular element\n+Sample19_220\tcircular element\n+Sample19_241\tcircular element\n+Sample19_242\tcircular element\n+Sample19_247\tcircular element\n+Sample19_254\tcircular element\n+Sample19_275\tcircular element\n+Sample19_280\tcircular element\n+Sample19_286\tcircular element\n+Sample19_293\tcircular element\n+Sample19_315\tcircular element\n+Sample19_32\tcircular element\n+Sample19_350\tcircular element\n+Sample19_366\tcircular element\n+Sample19_367\tcircular element\n+Sample19_37\tcircular element\n+Sample19_39\tcircular element\n+Sample19_391\tcircular element\n+Sample19_392\tcircular element\n+Sample19_62\tcircular element\n+Sample19_67\tcircular element\n+Sample19_75\tcircular element\n+Sample19_77\tcircular element\n+Sample19_78\tcircular element\n+Sample19_89\tcircular element\n+Sample20_1\tcircular element\n+Sample20_10\tcircular element\n+Sample20_101\tcircular element\n+Sample20_108\tcircular element\n+Sample20_122\tcircular element\n+Sample20_124\tcircular element\n+Sample20_130\tcircular element\n+Sample20_131\tcircular element\n+Sample20_136\tcircular element\n+Sample20_145\tcircular element\n+Sample20_154\tcircular element\n+Sample20_158\tcircular element\n+Sample20_175\tcircular element\n+Sample20_20\tcircular element\n+Sample20_24\tcircular element\n+Sample20_25\tcircular element\n+Sample20_33\tcircular element\n+Sample20_37\tcircular element\n+Sample20_40\tcircular element\n+Sample20_43\tcircular element\n+Sample20_5\tcircular element\n+Sample20_58\tcircular element\n+Sample20_70\tcircular element\n+Sample20_78\tcircular element\n+Sample20_79\tcircular element\n+Sample20_80\tcircular element\n+Sample20_85\tcircular element\n+Sample20_94\tcircular element\n+Sample6_102\tcircular element\n+Sample6_107\tcircular element\n+Sample6_110\tcircular element\n+Sample6_125\tcircular element\n+Sample6_130\tcircular element\n+Sample6_16\tcircular element\n+Sample6_171\tcircular element\n+Sample6_173\tcircular element\n+Sample6_180\tcircular element\n+Sample6_183\tcircular element\n+Sample6_186\tcircular element\n+Sample6_188\tcircular element\n+Sample6_206\tcircular element\n+Sample6_217\tcircular element\n+Sample6_219\tcircular element\n+Sample6_224\tcircular element\n+Sample6_227\tcircular element\n+Sample6_260\tcircular element\n+Sample6_273\tcircular element\n+Sample6_306\tcircular element\n+Sample6_33\tcircular element\n+Sample6_333\tcircular element\n+Sample6_342\tcircular element\n+Sample6_35\tcircular element\n+Sample6_389\tcircular element\n+Sample6_39\tcircular element\n+Sample6_391\tcircular element\n+Sample6_40\tcircular element\n+Sample6_421\tcircular element\n+Sample6_423\tcircular element\n+Sample6_47\tcircular element\n+Sample6_485\tcircular element\n+Sample6_506\tcircular element\n+Sample6_508\tcircular element\n+Sample6_535\tcircular element\n+Sample6_553\tcircular element\n+Sample6_564\tcircular element\n+Sample6_566\tcircular element\n+Sample6_588\tcircular element\n+Sample6_590\tcircular element\n+Sample6_608\tcircular element\n+Sample6_632\tcircular element\n+Sample6_634\tcircular element\n+Sample6_670\tcircular element\n+Sample6_72\tcircular element\n+Sample6_746\tcircular element\n+Sample6_752\tcircular element\n+Sample6_793\tcircular element\n+Sample6_823\tcircular element\n+Sample6_837\tcircular element\n+Sample6_84\tcircular element\n+Sample6_843\tcircular element\n+Sample6_863\tcircular element\n+Sample6_89\tcircular element\n+Sample6_894\tcircular element\n+Sample6_936\tcircular element\n+Sample6_94\tcircular element\n+Sample6_96\tcircular element\n'
b
diff -r 000000000000 -r 1e10251b9615 tool-data/ncbi_taxonomy.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ncbi_taxonomy.loc.sample Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,2 @@
+#value name path
+test-db-tox "Test Database"  ${__HERE__}/test-db
\ No newline at end of file
b
diff -r 000000000000 -r 1e10251b9615 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+        <!-- Locations of taxonomy data downloaded from NCBI -->
+    <table name="ncbi_taxonomy" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/ncbi_taxonomy.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 1e10251b9615 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Sun Aug 25 13:19:14 2024 +0000
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+        <!-- Locations of taxonomy data downloaded from NCBI -->
+    <table name="ncbi_taxonomy" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/ncbi_taxonomy.loc" />
+    </table>
+</tables>