Next changeset 1:b6dd55c620f8 (2024-08-14) |
Commit message:
planemo upload for repository https://github.com/shenwei356/taxonkit commit 695ea582a8d3bf7845dd4cddbc8b591e4b6c4e82 |
added:
macros.xml taxonkit_profile2cami.xml test-data/abundance.tsv test-data/ncbi_taxonomy.loc.test test-data/output1_basic_functionality.tsv test-data/output2_percentage_flag.tsv test-data/output3_recompute_abd.tsv test-data/output4_all_param.tsv test-data/test-db/delnodes.dmp test-data/test-db/division.dmp test-data/test-db/gc.prt test-data/test-db/gencode.dmp test-data/test-db/merged.dmp test-data/test-db/names.dmp test-data/test-db/nodes.dmp test-data/test-db/readme.txt tool-data/ncbi_taxonomy.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 000000000000 -r 0fd79958fac6 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,22 @@ + <macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">taxonkit</requirement> + <yield/> + </requirements> + </xml> + <token name="@TOOL_VERSION@">0.17.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">21.05</token> + <xml name="biotools"> + <xrefs> + <xref type="bio.tools">taxonkit</xref> + </xrefs> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1016/j.jgg.2021.03.006</citation> + <yield/> + </citations> + </xml> +</macros> \ No newline at end of file |
b |
diff -r 000000000000 -r 0fd79958fac6 taxonkit_profile2cami.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/taxonkit_profile2cami.xml Fri Jul 26 09:26:02 2024 +0000 |
[ |
@@ -0,0 +1,111 @@ +<tool id="profile2cami" name="Profile2CAMI" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Convert metagenomic profile table to CAMI format</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements" /> + <command detect_errors="exit_code"> + <![CDATA[ + taxonkit profile2cami + --data-dir '${taxonomy.fields.path}' + --abundance-field '${abundance_field}' + --taxid-field '${taxid_field}' + + $percentage + $recompute_abd + $keep_zero + $no_sum_up + + #if $sample_id: + -s '${sample_id}' + #end if + #if $taxonomy_id: + -t '${taxonomy_id}' + #end if + #if $ranks: + --show-rank '${ranks}' + #end if + ${input_file} + > '${cami_output}' + ]]> + </command> + <inputs> + <param name="input_file" type="data" format="txt" label="Input Profile File" help="A tab-delimited profile file with TaxId and abundance columns." /> + <param argument="--taxonomy" type="select" label="NCBI taxonomy" help="This NCBI database is used to map human-readable taxon names to TaxId's."> + <options from_data_table="ncbi_taxonomy"> + <validator message="No NCBI database is available" type="no_options"/> + </options> + </param> + <param name="abundance_field" type="integer" value="2" label="Abundance Field Index" help="Field index of abundance in the input data." /> + <param name="taxid_field" type="integer" value="1" label="TaxId Field Index" help="Field index of TaxId in the input data." /> + <param name="percentage" type="boolean" value="false" label="Abundance in Percentage" help="Check if the abundance values are in percentage." truevalue="-p" falsevalue=""/> + <param name="recompute_abd" type="boolean" value="false" label="Recompute Abundance" help="Check to recompute abundance if some TaxIds are deleted in the current taxonomy version." truevalue="-R" falsevalue=""/> + <param name="keep_zero" type="boolean" value="false" label="Keep Zero Abundances" help="Check to keep taxons with abundance of zero." truevalue="-0" falsevalue=""/> + <param name="no_sum_up" type="boolean" value="false" label="Do Not Sum Up Abundance" help="Do not sum up abundance from child to parent TaxIds." truevalue="-S" falsevalue="" /> + <param name="sample_id" type="text" value="" label="Sample ID" help="Optional sample ID to include in the result file." /> + <param name="taxonomy_id" type="text" value="" label="Taxonomy ID" help="Optional taxonomy ID to include in the result file." /> + <param name ="ranks" argument="--show-rank" type="select" multiple="true" label="Show Ranks" help="Specify the ranks to show in the result file (default [superkingdom,phylum,class,order,family,genus,species,strain])."> + <option value="superkingdom">Superkingdom</option> + <option value="phylum">Phylum</option> + <option value="class">Class</option> + <option value="order">Order</option> + <option value="family">Family</option> + <option value="genus">Genus</option> + <option value="species">Species</option> + <option value="strain">Strain</option> + </param> + </inputs> + <outputs> + <data name="cami_output" format="tsv" label="${tool.name} on ${on_string}" /> + </outputs> + <tests> + <!-- Test 1: Basic functionality with default parameters --> + <test expect_num_outputs="1"> + <param name="input_file" value="abundance.tsv" ftype="tsv" /> + <output name="cami_output" file="output1_basic_functionality.tsv" /> + </test> + + <!-- Test 2: Using percentage flag --> + <test expect_num_outputs="1"> + <param name="input_file" value="abundance.tsv" ftype="tsv" /> + <param name="percentage" value="true" /> + <output name="cami_output" file="output2_percentage_flag.tsv" /> + </test> + + <!-- Test 3: Recomputing abundance with deleted TaxIds --> + <test expect_num_outputs="1"> + <param name="input_file" value="abundance.tsv" ftype="tsv" /> + <param name="recompute_abd" value="true" /> + <output name="cami_output" file="output3_recompute_abd.tsv" /> + </test> + + <!-- Test 4: Profile2Cami with all parameters checked --> + <test expect_num_outputs="1"> + <param name="input_file" value="abundance.tsv" ftype="tsv" /> + <param name="percentage" value="true" /> + <param name="recompute_abd" value="true" /> + <param name="keep_zero" value="true" /> + <param name="no_sum_up" value="true" /> + <output name="cami_output" file="output4_all_param.tsv" /> + </test> + </tests> + <help> + <![CDATA[ + **What is Profile2CAMI** + + Profile2CAMI is a tool for converting metagenomic profile tables to CAMI format. + + **Inputs** + + - A tab-delimited profile file with TaxId and abundance columns. + + **Outputs** + + - A CAMI formatted file. + + For more information, please refer to the tool's documentation. + ]]> + </help> + <expand macro="citations" /> +</tool> \ No newline at end of file |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/abundance.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/abundance.tsv Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,4 @@ +83333 0.2 merged to 562 +83333 0.2 absord 562 +561 0.5 no change +91347 0.1 deleted \ No newline at end of file |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/ncbi_taxonomy.loc.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ncbi_taxonomy.loc.test Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,1 @@ +test-db-tox Test Database ${__HERE__}/test-db \ No newline at end of file |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/output1_basic_functionality.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output1_basic_functionality.tsv Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,12 @@ +@SampleID: +@Version:0.10.0 +@Ranks:superkingdom|phylum|class|order|family|genus|species|strain +@TaxonomyID: +@@TAXID RANK TAXPATH TAXPATHSN PERCENTAGE +2 superkingdom 2 Bacteria 100.000000000000000 +1224 phylum 2|1224 Bacteria|Proteobacteria 100.000000000000000 +1236 class 2|1224|1236 Bacteria|Proteobacteria|Gammaproteobacteria 100.000000000000000 +91347 order 2|1224|1236|91347 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales 100.000000000000000 +543 family 2|1224|1236|91347|543 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae 90.000000000000000 +561 genus 2|1224|1236|91347|543|561 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia 90.000000000000000 +562 species 2|1224|1236|91347|543|561|562 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia|Escherichia coli 40.000000000000000 |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/output2_percentage_flag.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output2_percentage_flag.tsv Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,12 @@ +@SampleID: +@Version:0.10.0 +@Ranks:superkingdom|phylum|class|order|family|genus|species|strain +@TaxonomyID: +@@TAXID RANK TAXPATH TAXPATHSN PERCENTAGE +2 superkingdom 2 Bacteria 1.000000000000000 +1224 phylum 2|1224 Bacteria|Proteobacteria 1.000000000000000 +1236 class 2|1224|1236 Bacteria|Proteobacteria|Gammaproteobacteria 1.000000000000000 +91347 order 2|1224|1236|91347 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales 1.000000000000000 +543 family 2|1224|1236|91347|543 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae 0.900000000000000 +561 genus 2|1224|1236|91347|543|561 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia 0.900000000000000 +562 species 2|1224|1236|91347|543|561|562 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia|Escherichia coli 0.400000000000000 |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/output3_recompute_abd.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output3_recompute_abd.tsv Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,12 @@ +@SampleID: +@Version:0.10.0 +@Ranks:superkingdom|phylum|class|order|family|genus|species|strain +@TaxonomyID: +@@TAXID RANK TAXPATH TAXPATHSN PERCENTAGE +2 superkingdom 2 Bacteria 190.000000000000000 +1224 phylum 2|1224 Bacteria|Proteobacteria 190.000000000000000 +1236 class 2|1224|1236 Bacteria|Proteobacteria|Gammaproteobacteria 190.000000000000000 +91347 order 2|1224|1236|91347 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales 190.000000000000000 +543 family 2|1224|1236|91347|543 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae 90.000000000000000 +561 genus 2|1224|1236|91347|543|561 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia 90.000000000000000 +562 species 2|1224|1236|91347|543|561|562 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia|Escherichia coli 40.000000000000000 |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/output4_all_param.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output4_all_param.tsv Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,12 @@ +@SampleID: +@Version:0.10.0 +@Ranks:superkingdom|phylum|class|order|family|genus|species|strain +@TaxonomyID: +@@TAXID RANK TAXPATH TAXPATHSN PERCENTAGE +2 superkingdom 2 Bacteria 0.500000000000000 +1224 phylum 2|1224 Bacteria|Proteobacteria 0.500000000000000 +1236 class 2|1224|1236 Bacteria|Proteobacteria|Gammaproteobacteria 0.500000000000000 +91347 order 2|1224|1236|91347 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales 0.500000000000000 +543 family 2|1224|1236|91347|543 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae 0.500000000000000 +561 genus 2|1224|1236|91347|543|561 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia 0.500000000000000 +562 species 2|1224|1236|91347|543|561|562 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia|Escherichia coli 0.400000000000000 |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/test-db/delnodes.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/delnodes.dmp Fri Jul 26 09:26:02 2024 +0000 |
b |
b'@@ -0,0 +1,15000 @@\n+2923441\t|\n+2923440\t|\n+2923439\t|\n+2923438\t|\n+2923437\t|\n+2923436\t|\n+2923435\t|\n+2923434\t|\n+2923433\t|\n+2923432\t|\n+2923431\t|\n+2923430\t|\n+2923429\t|\n+2923428\t|\n+2923427\t|\n+2923426\t|\n+2923425\t|\n+2923424\t|\n+2923423\t|\n+2923422\t|\n+2923421\t|\n+2923420\t|\n+2923419\t|\n+2923418\t|\n+2923417\t|\n+2923416\t|\n+2923415\t|\n+2923414\t|\n+2923413\t|\n+2923412\t|\n+2923411\t|\n+2923410\t|\n+2923409\t|\n+2923408\t|\n+2923407\t|\n+2923406\t|\n+2923405\t|\n+2923404\t|\n+2923403\t|\n+2923402\t|\n+2923401\t|\n+2923400\t|\n+2923399\t|\n+2923398\t|\n+2923397\t|\n+2923396\t|\n+2923395\t|\n+2923394\t|\n+2923393\t|\n+2923392\t|\n+2923391\t|\n+2923390\t|\n+2923389\t|\n+2923388\t|\n+2923387\t|\n+2923386\t|\n+2923385\t|\n+2923384\t|\n+2923383\t|\n+2923382\t|\n+2923381\t|\n+2923380\t|\n+2923379\t|\n+2923378\t|\n+2923377\t|\n+2923376\t|\n+2923375\t|\n+2923374\t|\n+2923373\t|\n+2923372\t|\n+2923371\t|\n+2923370\t|\n+2923369\t|\n+2923367\t|\n+2923366\t|\n+2923365\t|\n+2923364\t|\n+2923363\t|\n+2923362\t|\n+2923361\t|\n+2923360\t|\n+2923359\t|\n+2923358\t|\n+2923357\t|\n+2923356\t|\n+2923355\t|\n+2923354\t|\n+2923353\t|\n+2923351\t|\n+2923350\t|\n+2923349\t|\n+2923348\t|\n+2923347\t|\n+2923346\t|\n+2923345\t|\n+2923344\t|\n+2923343\t|\n+2923342\t|\n+2923341\t|\n+2923340\t|\n+2923339\t|\n+2923338\t|\n+2923337\t|\n+2923336\t|\n+2923335\t|\n+2923334\t|\n+2923333\t|\n+2923332\t|\n+2923331\t|\n+2923330\t|\n+2923329\t|\n+2923328\t|\n+2923327\t|\n+2923326\t|\n+2923324\t|\n+2923323\t|\n+2923322\t|\n+2923321\t|\n+2923320\t|\n+2923319\t|\n+2923318\t|\n+2923317\t|\n+2923316\t|\n+2923315\t|\n+2923314\t|\n+2923313\t|\n+2923312\t|\n+2923311\t|\n+2923310\t|\n+2923309\t|\n+2923308\t|\n+2923307\t|\n+2923306\t|\n+2923305\t|\n+2923304\t|\n+2923303\t|\n+2923302\t|\n+2923301\t|\n+2923300\t|\n+2923299\t|\n+2923298\t|\n+2923297\t|\n+2923296\t|\n+2923295\t|\n+2923294\t|\n+2923293\t|\n+2923292\t|\n+2923291\t|\n+2923287\t|\n+2923286\t|\n+2923285\t|\n+2923284\t|\n+2923283\t|\n+2923282\t|\n+2923281\t|\n+2923280\t|\n+2923279\t|\n+2923278\t|\n+2923277\t|\n+2923276\t|\n+2923275\t|\n+2923274\t|\n+2923273\t|\n+2923272\t|\n+2923271\t|\n+2923270\t|\n+2923269\t|\n+2923268\t|\n+2923267\t|\n+2923266\t|\n+2923264\t|\n+2923263\t|\n+2923262\t|\n+2923261\t|\n+2923260\t|\n+2923259\t|\n+2923258\t|\n+2923257\t|\n+2923256\t|\n+2923255\t|\n+2923254\t|\n+2923253\t|\n+2923252\t|\n+2923251\t|\n+2923250\t|\n+2923249\t|\n+2923247\t|\n+2923246\t|\n+2923245\t|\n+2923244\t|\n+2923243\t|\n+2923242\t|\n+2923241\t|\n+2923240\t|\n+2923239\t|\n+2923238\t|\n+2923237\t|\n+2923236\t|\n+2923235\t|\n+2923234\t|\n+2923233\t|\n+2923232\t|\n+2923231\t|\n+2923230\t|\n+2923229\t|\n+2923228\t|\n+2923227\t|\n+2923226\t|\n+2923225\t|\n+2923224\t|\n+2923223\t|\n+2923222\t|\n+2923221\t|\n+2923220\t|\n+2923219\t|\n+2923218\t|\n+2923217\t|\n+2923216\t|\n+2923215\t|\n+2923214\t|\n+2923213\t|\n+2923212\t|\n+2923211\t|\n+2923210\t|\n+2923209\t|\n+2923208\t|\n+2923207\t|\n+2923206\t|\n+2923205\t|\n+2923204\t|\n+2923203\t|\n+2923202\t|\n+2923201\t|\n+2923200\t|\n+2923199\t|\n+2923198\t|\n+2923197\t|\n+2923196\t|\n+2923195\t|\n+2923194\t|\n+2923193\t|\n+2923192\t|\n+2923191\t|\n+2923190\t|\n+2923189\t|\n+2923188\t|\n+2923187\t|\n+2923186\t|\n+2923185\t|\n+2923184\t|\n+2923183\t|\n+2923182\t|\n+2923181\t|\n+2923180\t|\n+2923179\t|\n+2923178\t|\n+2923177\t|\n+2923176\t|\n+2923175\t|\n+2923174\t|\n+2923173\t|\n+2923172\t|\n+2923171\t|\n+2923170\t|\n+2923169\t|\n+2923168\t|\n+2923167\t|\n+2923166\t|\n+2923165\t|\n+2923164\t|\n+2923163\t|\n+2923162\t|\n+2923161\t|\n+2923160\t|\n+2923159\t|\n+2923158\t|\n+2923157\t|\n+2923156\t|\n+2923155\t|\n+2923154\t|\n+2923153\t|\n+2923152\t|\n+2923151\t|\n+2923150\t|\n+2923149\t|\n+2923148\t|\n+2923147\t|\n+2923146\t|\n+2923145\t|\n+2923144\t|\n+2923143\t|\n+2923142\t|\n+2923141\t|\n+2923140\t|\n+2923139\t|\n+2923138\t|\n+2923137\t|\n+2923136\t|\n+2923135\t|\n+2923134\t|\n+2923133\t|\n+2923132\t|\n+2923131\t|\n+2923130\t|\n+2923129\t|\n+2923128\t|\n+2923127\t|\n+2923126\t|\n+2923125\t|\n+2923124\t|\n+2923123\t|\n+2923122\t|\n+2923121\t|\n+2923120\t|\n+2923119\t|\n+2923118\t|\n+2923117\t|\n+2923116\t|\n+2923115\t|\n+2923114\t|\n+2923113\t|\n+2923112\t|\n+2923111\t|\n+2923110\t|\n+2923109\t|\n+2923108\t|\n+2923107\t|\n+2923106\t|\n+2923105\t|\n+2923104\t|\n+2923103\t|\n+2923102\t|\n+2923101\t|\n+2923100\t|\n+2923099\t|\n+2923098\t|\n+2923097\t|\n+2923096\t|\n+2923095\t|\n+2923094\t|\n+2923093\t|\n+2923092\t|\n+2923091\t|\n+2923090\t|\n+2923089\t|\n+2923088\t|\n+2923087\t|\n+2923086\t|\n+2923085\t|\n+2923084\t|\n+2923083\t|\n+2923082\t|\n+2923081\t|\n+2923080\t|\n+2923078\t|\n+2923077\t|\n+2923076\t|\n+2923075\t|\n+2923074\t|\n+2923073\t|\n+2923072\t|\n+2923071'..b'302\t|\n+2901301\t|\n+2901300\t|\n+2901297\t|\n+2901292\t|\n+2901289\t|\n+2901278\t|\n+2901277\t|\n+2901276\t|\n+2901274\t|\n+2901273\t|\n+2901272\t|\n+2901265\t|\n+2901256\t|\n+2901255\t|\n+2901254\t|\n+2901253\t|\n+2901252\t|\n+2901251\t|\n+2901250\t|\n+2901249\t|\n+2901248\t|\n+2901247\t|\n+2901246\t|\n+2901245\t|\n+2901244\t|\n+2901243\t|\n+2901242\t|\n+2901238\t|\n+2901237\t|\n+2901235\t|\n+2901234\t|\n+2901233\t|\n+2901232\t|\n+2901231\t|\n+2901230\t|\n+2901229\t|\n+2901222\t|\n+2901221\t|\n+2901220\t|\n+2901219\t|\n+2901218\t|\n+2901217\t|\n+2901216\t|\n+2901215\t|\n+2901214\t|\n+2901213\t|\n+2901212\t|\n+2901211\t|\n+2901210\t|\n+2901208\t|\n+2901207\t|\n+2901206\t|\n+2901205\t|\n+2901204\t|\n+2901202\t|\n+2901201\t|\n+2901200\t|\n+2901199\t|\n+2901198\t|\n+2901197\t|\n+2901193\t|\n+2901191\t|\n+2901188\t|\n+2901186\t|\n+2901185\t|\n+2901184\t|\n+2901183\t|\n+2901182\t|\n+2901181\t|\n+2901180\t|\n+2901179\t|\n+2901178\t|\n+2901173\t|\n+2901171\t|\n+2901170\t|\n+2901169\t|\n+2901168\t|\n+2901167\t|\n+2901166\t|\n+2901165\t|\n+2901164\t|\n+2901163\t|\n+2901162\t|\n+2901161\t|\n+2901160\t|\n+2901159\t|\n+2901157\t|\n+2901156\t|\n+2901155\t|\n+2901147\t|\n+2901145\t|\n+2901144\t|\n+2901143\t|\n+2901139\t|\n+2901138\t|\n+2901137\t|\n+2901136\t|\n+2901135\t|\n+2901134\t|\n+2901133\t|\n+2901132\t|\n+2901131\t|\n+2901130\t|\n+2901129\t|\n+2901128\t|\n+2901127\t|\n+2901126\t|\n+2901125\t|\n+2901124\t|\n+2901123\t|\n+2901122\t|\n+2901121\t|\n+2901120\t|\n+2901119\t|\n+2901118\t|\n+2901117\t|\n+2901116\t|\n+2901115\t|\n+2901114\t|\n+2901113\t|\n+2901112\t|\n+2901111\t|\n+2901110\t|\n+2901109\t|\n+2901108\t|\n+2901107\t|\n+2901106\t|\n+2901105\t|\n+2901104\t|\n+2901103\t|\n+2901102\t|\n+2901101\t|\n+2901100\t|\n+2901099\t|\n+2901098\t|\n+2901097\t|\n+2901096\t|\n+2901095\t|\n+2901094\t|\n+2901093\t|\n+2901092\t|\n+2901091\t|\n+2901090\t|\n+2901089\t|\n+2901088\t|\n+2901087\t|\n+2901086\t|\n+2901085\t|\n+2901084\t|\n+2901083\t|\n+2901082\t|\n+2901081\t|\n+2901080\t|\n+2901079\t|\n+2901078\t|\n+2901077\t|\n+2901076\t|\n+2901075\t|\n+2901074\t|\n+2901073\t|\n+2901072\t|\n+2901071\t|\n+2901070\t|\n+2901069\t|\n+2901068\t|\n+2901067\t|\n+2901066\t|\n+2901065\t|\n+2901064\t|\n+2901063\t|\n+2901062\t|\n+2901061\t|\n+2901060\t|\n+2901059\t|\n+2901058\t|\n+2901057\t|\n+2901056\t|\n+2901055\t|\n+2901054\t|\n+2901053\t|\n+2901052\t|\n+2901051\t|\n+2901050\t|\n+2901049\t|\n+2901048\t|\n+2901047\t|\n+2901046\t|\n+2901045\t|\n+2901044\t|\n+2901043\t|\n+2901042\t|\n+2901041\t|\n+2901040\t|\n+2901039\t|\n+2901038\t|\n+2901037\t|\n+2901036\t|\n+2901035\t|\n+2901034\t|\n+2901033\t|\n+2901032\t|\n+2901031\t|\n+2901030\t|\n+2901029\t|\n+2901028\t|\n+2901027\t|\n+2901026\t|\n+2901025\t|\n+2901024\t|\n+2901023\t|\n+2901022\t|\n+2901021\t|\n+2901020\t|\n+2901019\t|\n+2901018\t|\n+2901017\t|\n+2901016\t|\n+2901015\t|\n+2901014\t|\n+2901013\t|\n+2901012\t|\n+2901011\t|\n+2901010\t|\n+2901009\t|\n+2901008\t|\n+2901007\t|\n+2901006\t|\n+2901005\t|\n+2901004\t|\n+2901003\t|\n+2901002\t|\n+2901001\t|\n+2901000\t|\n+2900999\t|\n+2900998\t|\n+2900997\t|\n+2900996\t|\n+2900995\t|\n+2900994\t|\n+2900993\t|\n+2900992\t|\n+2900991\t|\n+2900990\t|\n+2900989\t|\n+2900988\t|\n+2900987\t|\n+2900986\t|\n+2900985\t|\n+2900984\t|\n+2900983\t|\n+2900982\t|\n+2900981\t|\n+2900980\t|\n+2900979\t|\n+2900978\t|\n+2900977\t|\n+2900976\t|\n+2900975\t|\n+2900974\t|\n+2900973\t|\n+2900972\t|\n+2900971\t|\n+2900970\t|\n+2900969\t|\n+2900968\t|\n+2900967\t|\n+2900966\t|\n+2900965\t|\n+2900964\t|\n+2900963\t|\n+2900962\t|\n+2900961\t|\n+2900960\t|\n+2900959\t|\n+2900958\t|\n+2900957\t|\n+2900956\t|\n+2900955\t|\n+2900954\t|\n+2900953\t|\n+2900952\t|\n+2900951\t|\n+2900950\t|\n+2900949\t|\n+2900948\t|\n+2900947\t|\n+2900946\t|\n+2900945\t|\n+2900944\t|\n+2900943\t|\n+2900942\t|\n+2900941\t|\n+2900940\t|\n+2900939\t|\n+2900938\t|\n+2900937\t|\n+2900936\t|\n+2900935\t|\n+2900934\t|\n+2900933\t|\n+2900932\t|\n+2900931\t|\n+2900930\t|\n+2900929\t|\n+2900928\t|\n+2900927\t|\n+2900926\t|\n+2900925\t|\n+2900924\t|\n+2900923\t|\n+2900922\t|\n+2900921\t|\n+2900920\t|\n+2900919\t|\n+2900918\t|\n+2900917\t|\n+2900916\t|\n+2900915\t|\n+2900914\t|\n+2900913\t|\n+2900912\t|\n+2900911\t|\n+2900910\t|\n+2900909\t|\n+2900908\t|\n+2900907\t|\n+2900906\t|\n+2900905\t|\n+2900904\t|\n+2900903\t|\n+2900902\t|\n+2900901\t|\n+2900900\t|\n+2900899\t|\n+2900898\t|\n+2900897\t|\n+2900896\t|\n+2900895\t|\n+2900894\t|\n+2900893\t|\n+2900892\t|\n+2900891\t|\n+2900890\t|\n+2900889\t|\n+2900888\t|\n+2900887\t|\n+2900886\t|\n+2900885\t|\n+2900884\t|\n+2900883\t|\n+2900882\t|\n+2900881\t|\n+2900880\t|\n+2900879\t|\n+2900878\t|\n+2900877\t|\n+2900876\t|\n+2900875\t|\n+2900874\t|\n+2900873\t|\n+2900872\t|\n+2900871\t|\n+2900870\t|\n' |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/test-db/division.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/division.dmp Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,12 @@ +0 | BCT | Bacteria | | +1 | INV | Invertebrates | | +2 | MAM | Mammals | | +3 | PHG | Phages | | +4 | PLN | Plants and Fungi | | +5 | PRI | Primates | | +6 | ROD | Rodents | | +7 | SYN | Synthetic and Chimeric | | +8 | UNA | Unassigned | No species nodes should inherit this division assignment | +9 | VRL | Viruses | | +10 | VRT | Vertebrates | | +11 | ENV | Environmental samples | Anonymous sequences cloned directly from the environment | |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/test-db/gc.prt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/gc.prt Fri Jul 26 09:26:02 2024 +0000 |
b |
b'@@ -0,0 +1,358 @@\n+--**************************************************************************\n+-- This is the NCBI genetic code table\n+-- Initial base data set from Andrzej Elzanowski while at PIR International\n+-- Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI\n+-- Base 1-3 of each codon have been added as comments to facilitate\n+-- readability at the suggestion of Peter Rice, EMBL\n+-- Later additions by Taxonomy Group staff at NCBI\n+--\n+-- Version 4.6\n+-- Renamed genetic code 24 to Rhabdopleuridae Mitochondrial\n+--\n+-- Version 4.5\n+-- Added Cephalodiscidae mitochondrial genetic code 33\n+--\n+-- Version 4.4\n+-- Added GTG as start codon for genetic code 3\n+-- Added Balanophoraceae plastid genetic code 32\n+--\n+-- Version 4.3\n+-- Change to CTG -> Leu in genetic codes 27, 28, 29, 30\n+--\n+-- Version 4.2\n+-- Added Karyorelict nuclear genetic code 27\n+-- Added Condylostoma nuclear genetic code 28\n+-- Added Mesodinium nuclear genetic code 29\n+-- Added Peritrich nuclear genetic code 30\n+-- Added Blastocrithidia nuclear genetic code 31\n+--\n+-- Version 4.1\n+-- Added Pachysolen tannophilus nuclear genetic code 26\n+--\n+-- Version 4.0\n+-- Updated version to reflect numerous undocumented changes:\n+-- Corrected start codons for genetic code 25\n+-- Name of new genetic code is Candidate Division SR1 and Gracilibacteria\n+-- Added candidate division SR1 nuclear genetic code 25\n+-- Added GTG as start codon for genetic code 24\n+-- Corrected Pterobranchia Mitochondrial genetic code (24)\n+-- Added genetic code 24, Pterobranchia Mitochondrial\n+-- Genetic code 11 is now Bacterial, Archaeal and Plant Plastid\n+-- Fixed capitalization of mitochondrial in codes 22 and 23\n+-- Added GTG, ATA, and TTG as alternative start codons to code 13\n+--\n+-- Version 3.9\n+-- Code 14 differs from code 9 only by translating UAA to Tyr rather than\n+-- STOP. A recent study (Telford et al, 2000) has found no evidence that\n+-- the codon UAA codes for Tyr in the flatworms, but other opinions exist.\n+-- There are very few GenBank records that are translated with code 14,\n+-- but a test translation shows that retranslating these records with code\n+-- 9 can cause premature terminations. Therefore, GenBank will maintain\n+-- code 14 until further information becomes available.\n+--\n+-- Version 3.8\n+-- Added GTG start to Echinoderm mitochondrial code, code 9\n+--\n+-- Version 3.7\n+-- Added code 23 Thraustochytrium mitochondrial code\n+-- formerly OGMP code 93\n+-- submitted by Gertraude Berger, Ph.D.\n+--\n+-- Version 3.6\n+-- Added code 22 TAG-Leu, TCA-stop\n+-- found in mitochondrial DNA of Scenedesmus obliquus\n+-- submitted by Gertraude Berger, Ph.D.\n+-- Organelle Genome Megasequencing Program, Univ Montreal\n+--\n+-- Version 3.5\n+-- Added code 21, Trematode Mitochondrial\n+-- (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)\n+-- Added code 16, Chlorophycean Mitochondrial\n+-- (TAG can translated to Leucine instaed to STOP in chlorophyceans\n+-- and fungi)\n+--\n+-- Version 3.4\n+-- Added CTG,TTG as allowed alternate start codons in Standard code.\n+-- Prats et al. 1989, Hann et al. 1992\n+--\n+-- Version 3.3 - 10/13/95\n+-- Added alternate intiation codon ATC to code 5\n+-- based on complete mitochondrial genome of honeybee\n+-- Crozier and Crozier (1993)\n+--\n+-- Version 3.2 - 6/24/95\n+-- Code Comments\n+-- 10 Alternative Ciliate Macronuclear renamed to Euplotid Macro...\n+-- 15 Blepharisma Macro.. code added\n+-- 5 Invertebrate Mito.. GTG allowed as alternate initiator\n+-- 11 Eubacterial renamed to Bacterial as most alternate starts\n+-- have been found in Archea\n+--\n+--\n+-- Version 3.1 - 1995\n+-- Updated as per Andrzej Elzanowski at NCBI\n+-- Complete documentation in NCBI'..b'5 ,\n+ ncbieaa "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+ sncbieaa "---M------**-----------------------M---------------M------------"\n+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+ name "Pachysolen tannophilus Nuclear" ,\n+ id 26 ,\n+ ncbieaa "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+ sncbieaa "----------**--*----M---------------M----------------------------"\n+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+ name "Karyorelict Nuclear" ,\n+ id 27 ,\n+ ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+ sncbieaa "--------------*--------------------M----------------------------"\n+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+ name "Condylostoma Nuclear" ,\n+ id 28 ,\n+ ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+ sncbieaa "----------**--*--------------------M----------------------------"\n+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+ name "Mesodinium Nuclear" ,\n+ id 29 ,\n+ ncbieaa "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+ sncbieaa "--------------*--------------------M----------------------------"\n+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+ name "Peritrich Nuclear" ,\n+ id 30 ,\n+ ncbieaa "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+ sncbieaa "--------------*--------------------M----------------------------"\n+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+ name "Blastocrithidia Nuclear" ,\n+ id 31 ,\n+ ncbieaa "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+ sncbieaa "----------**-----------------------M----------------------------"\n+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+ name "Balanophoraceae Plastid" ,\n+ id 32 ,\n+ ncbieaa "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",\n+ sncbieaa "---M------*---*----M------------MMMM---------------M------------"\n+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ } ,\n+ {\n+ name "Cephalodiscidae Mitochondrial" ,\n+ id 33 ,\n+ ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",\n+ sncbieaa "---M-------*-------M---------------M---------------M------------"\n+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG\n+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG\n+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG\n+ }\n+}\n' |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/test-db/gencode.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/gencode.dmp Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,28 @@ +0 | | Unspecified | | | +1 | | Standard | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------**--*----M---------------M---------------------------- | +2 | | Vertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG | ----------**--------------------MMMM----------**---M------------ | +3 | | Yeast Mitochondrial | FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**----------------------MM---------------M------------ | +4 | | Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --MM------**-------M------------MMMM---------------M------------ | +5 | | Invertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG | ---M------**--------------------MMMM---------------M------------ | +6 | | Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear | FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- | +9 | | Echinoderm Mitochondrial; Flatworm Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ----------**-----------------------M---------------M------------ | +10 | | Euplotid Nuclear | FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**-----------------------M---------------------------- | +11 | | Bacterial, Archaeal and Plant Plastid | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------**--*----M------------MMMM---------------M------------ | +12 | | Alternative Yeast Nuclear | FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**--*----M---------------M---------------------------- | +13 | | Ascidian Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG | ---M------**----------------------MM---------------M------------ | +14 | | Alternative Flatworm Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | -----------*-----------------------M---------------------------- | +15 | | Blepharisma Macronuclear | FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------*---*--------------------M---------------------------- | +16 | | Chlorophycean Mitochondrial | FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------*---*--------------------M---------------------------- | +21 | | Trematode Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ----------**-----------------------M---------------M------------ | +22 | | Scenedesmus obliquus mitochondrial | FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ------*---*---*--------------------M---------------------------- | +23 | | Thraustochytrium mitochondrial code | FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --*-------**--*-----------------M--M---------------M------------ | +24 | | Rhabdopleuridae Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG | ---M------**-------M---------------M---------------M------------ | +25 | | Candidate Division SR1 and Gracilibacteria | FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------**-----------------------M---------------M------------ | +26 | | Pachysolen tannophilus Nuclear | FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**--*----M---------------M---------------------------- | +27 | | Karyorelict Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- | +28 | | Condylostoma Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**--*--------------------M---------------------------- | +29 | | Mesodinium Nuclear | FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- | +30 | | Peritrich Nuclear | FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- | +31 | | Blastocrithidia Nuclear | FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**-----------------------M---------------------------- | +32 | | Balanophoraceae Plastid | FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------*---*----M------------MMMM---------------M------------ | +33 | | Cephalodiscidae Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG | ---M-------*-------M---------------M---------------M------------ | |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/test-db/merged.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/merged.dmp Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,2 @@ +2824115 | 483329 | +2923440 | 2824115 | |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/test-db/names.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/names.dmp Fri Jul 26 09:26:02 2024 +0000 |
[ |
@@ -0,0 +1,75 @@ +83333 | Escherichia coli K-12 | | scientific name | +83333 | Escherichia coli K12 | | equivalent name | +562 | "Bacillus coli" Migula 1895 | | authority | +562 | "Bacterium coli commune" Escherich 1885 | | authority | +562 | "Bacterium coli" (Migula 1895) Lehmann and Neumann 1896 | | authority | +562 | ATCC 11775 | | type material | +562 | Bacillus coli | | synonym | +562 | Bacterium coli | | synonym | +562 | Bacterium coli commune | | synonym | +562 | CCUG 24 | | type material | +562 | CCUG 29300 | | type material | +562 | CIP 54.8 | | type material | +562 | DSM 30083 | | type material | +562 | Enterococcus coli | | synonym | +562 | Escherchia coli | | misspelling | +562 | Escherichia coli | | scientific name | +562 | Escherichia coli (Migula 1895) Castellani and Chalmers 1919 | | authority | +562 | Escherichia sp. MAR | | includes | +562 | Escherichia/Shigella coli | | equivalent name | +562 | Eschericia coli | | misspelling | +562 | JCM 1649 | | type material | +562 | LMG 2092 | | type material | +562 | NBRC 102203 | | type material | +562 | NCCB 54008 | | type material | +562 | NCTC 9001 | | type material | +562 | bacterium 10a | | includes | +562 | bacterium E3 | | includes | +561 | Escherchia | | misspelling | +561 | Escherichia | | scientific name | +561 | Escherichia Castellani and Chalmers 1919 | | authority | +543 | Enterobacteraceae | | synonym | +543 | Enterobacteraceae (ex Lapage 1979) Lapage 1982, fam. nov., nom. rev. | | synonym | +543 | Enterobacteriaceae | | scientific name | +543 | Enterobacteriaceae (ex Rahn 1937) Ewing et al. 1980, fam. nov., nom. rev. | | synonym | +543 | Enterobacteriaceae Rahn 1937 | | synonym | +543 | gamma-3 proteobacteria | gamma-3 proteobacteria <#1> | in-part | +91347 | 'Enterobacteriales' | | synonym | +91347 | Enterobacteriaceae and related endosymbionts | | synonym | +91347 | Enterobacteriaceae group | | synonym | +91347 | Enterobacteriales | | scientific name | +91347 | enterobacteria | enterobacteria<blast91347> | blast name | +91347 | gamma-3 proteobacteria | gamma-3 proteobacteria <#5> | in-part | +1236 | Gammaproteobacteria | | scientific name | +1236 | Gammaproteobacteria Garrity et al. 2005 | | synonym | +1236 | Proteobacteria gamma subdivision | | synonym | +1236 | Purple bacteria, gamma subdivision | | synonym | +1236 | g-proteobacteria | gamma proteos<blast1236> | blast name | +1236 | gamma proteobacteria | | synonym | +1236 | gamma subdivision | | synonym | +1236 | gamma subgroup | | synonym | +1224 | Proteobacteria | | scientific name | +1224 | Proteobacteria Garrity et al. 2005 | | authority | +1224 | Proteobacteria [class] Stackebrandt et al. 1988 | | authority | +1224 | not Proteobacteria Cavalier-Smith 2002 | | authority | +1224 | proteobacteria | proteobacteria<blast1224> | blast name | +1224 | purple bacteria | | common name | +1224 | purple bacteria and relatives | | common name | +1224 | purple non-sulfur bacteria | | common name | +1224 | purple photosynthetic bacteria | | common name | +1224 | purple photosynthetic bacteria and relatives | | common name | +2 | Bacteria | Bacteria <prokaryote> | scientific name | +2 | Monera | Monera <Bacteria> | in-part | +2 | Procaryotae | Procaryotae <Bacteria> | in-part | +2 | Prokaryota | Prokaryota <Bacteria> | in-part | +2 | Prokaryotae | Prokaryotae <Bacteria> | in-part | +2 | bacteria | bacteria <blast2> | blast name | +2 | eubacteria | | genbank common name | +2 | not Bacteria Haeckel 1894 | | synonym | +2 | prokaryote | prokaryote <Bacteria> | in-part | +2 | prokaryotes | prokaryotes <Bacteria> | in-part | +1 | all | | synonym | +1 | root | | scientific name | +131567 | biota | | synonym | +131567 | cellular organisms | | scientific name | + |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/test-db/nodes.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/nodes.dmp Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,10 @@ +83333 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | +131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | +1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | |
b |
diff -r 000000000000 -r 0fd79958fac6 test-data/test-db/readme.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/readme.txt Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,61 @@ +*.dmp files are bcp-like dump from GenBank taxonomy database. + +General information. +Field terminator is "\t|\t" +Row terminator is "\t|\n" + +nodes.dmp file consists of taxonomy nodes. The description for each node includes the following +fields: + tax_id -- node id in GenBank taxonomy database + parent tax_id -- parent node id in GenBank taxonomy database + rank -- rank of this node (superkingdom, kingdom, ...) + embl code -- locus-name prefix; not unique + division id -- see division.dmp file + inherited div flag (1 or 0) -- 1 if node inherits division from parent + genetic code id -- see gencode.dmp file + inherited GC flag (1 or 0) -- 1 if node inherits genetic code from parent + mitochondrial genetic code id -- see gencode.dmp file + inherited MGC flag (1 or 0) -- 1 if node inherits mitochondrial gencode from parent + GenBank hidden flag (1 or 0) -- 1 if name is suppressed in GenBank entry lineage + hidden subtree root flag (1 or 0) -- 1 if this subtree has no sequence data yet + comments -- free-text comments and citations + +Taxonomy names file (names.dmp): + tax_id -- the id of node associated with this name + name_txt -- name itself + unique name -- the unique variant of this name if name not unique + name class -- (synonym, common name, ...) + +Divisions file (division.dmp): + division id -- taxonomy database division id + division cde -- GenBank division code (three characters) + division name -- e.g. BCT, PLN, VRT, MAM, PRI... + comments + +Genetic codes file (gencode.dmp): + genetic code id -- GenBank genetic code id + abbreviation -- genetic code name abbreviation + name -- genetic code name + cde -- translation table for this genetic code + starts -- start codons for this genetic code + +Deleted nodes file (delnodes.dmp): + tax_id -- deleted node id + +Merged nodes file (merged.dmp): + old_tax_id -- id of nodes which has been merged + new_tax_id -- id of nodes which is result of merging + +Citations file (citations.dmp): + cit_id -- the unique id of citation + cit_key -- citation key + pubmed_id -- unique id in PubMed database (0 if not in PubMed) + medline_id -- unique id in MedLine database (0 if not in MedLine) + url -- URL associated with citation + text -- any text (usually article name and authors). + -- The following characters are escaped in this text by a backslash: + -- newline (appear as "\n"), + -- tab character ("\t"), + -- double quotes ('\"'), + -- backslash character ("\\"). + taxid_list -- list of node ids separated by a single space |
b |
diff -r 000000000000 -r 0fd79958fac6 tool-data/ncbi_taxonomy.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ncbi_taxonomy.loc.sample Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,2 @@ +#value name path +# test-db-tox "Test Database" tool-data/test-db \ No newline at end of file |
b |
diff -r 000000000000 -r 0fd79958fac6 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of taxonomy data downloaded from NCBI --> + <table name="ncbi_taxonomy" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/ncbi_taxonomy.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r 0fd79958fac6 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Jul 26 09:26:02 2024 +0000 |
b |
@@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of taxonomy data downloaded from NCBI --> + <table name="ncbi_taxonomy" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/ncbi_taxonomy.loc.test" /> + </table> +</tables> |