Repository 'spaln'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/spaln

Changeset 1:37b5e1f0b544 (2020-07-16)
Previous changeset 0:95ea8d97abb4 (2019-01-11) Next changeset 2:dd0cd2319ae5 (2021-11-19)
Commit message:
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
modified:
spaln.xml
test-data/output1.bed12
test-data/output1.tabular
test-data/output1_gff_genes.gff3
test-data/output1_gff_matches.gff3
added:
list_spaln_tables.py
list_spaln_tables.xml
macros.xml
test-data/gnm2tab
test-data/ncbi_taxonomy_sqlite.loc
test-data/output2.tabular
test-data/sqlite_taxdb/tax.ncbitaxonomy.sqlite
tool-data/ncbi_taxonomy_sqlite.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 list_spaln_tables.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/list_spaln_tables.py Thu Jul 16 07:57:10 2020 -0400
[
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+import argparse
+import shlex
+import sys
+from subprocess import run
+from typing import TextIO
+
+
+def find_common_ancestor_distance(
+    taxon: str, other_taxon: str, taxonomy_db_path: str, only_canonical: bool
+):
+    canonical = "--only_canonical" if only_canonical else ""
+    cmd_str = f"taxonomy_util -d {taxonomy_db_path} common_ancestor_distance {canonical} '{other_taxon}' '{taxon}'"
+    cmd = shlex.split(cmd_str)
+    proc = run(cmd, encoding="utf8", capture_output=True)
+    return proc
+
+
+def find_distances(gnm2tab_file: TextIO, taxon: str, taxonomy_db_path: str):
+    cmd = ["taxonomy_util", "-d", taxonomy_db_path, "get_id", taxon]
+    proc = run(cmd, capture_output=True, encoding="utf8")
+    if "not found in" in proc.stderr:
+        exit("Error: " + proc.stderr.strip())
+    for line in gnm2tab_file:
+        fields = line.split("\t")
+        (species_code, settings, other_taxon) = map(lambda el: el.strip(), fields[:3])
+        proc = find_common_ancestor_distance(taxon, other_taxon, taxonomy_db_path, True)
+        ancestor_info = proc.stdout.rstrip()
+        if proc.stderr != "":
+            print("Warning:", other_taxon, proc.stderr.rstrip(), file=sys.stderr)
+        else:
+            proc = find_common_ancestor_distance(
+                taxon, other_taxon, taxonomy_db_path, False
+            )
+            non_canonical_distance = proc.stdout.split("\t")[0]
+            print(
+                non_canonical_distance,
+                ancestor_info,
+                species_code,
+                settings,
+                other_taxon,
+                sep="\t",
+            )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Find distance to common ancestor")
+    parser.add_argument(
+        "--taxonomy_db", required=True, help="NCBI Taxonomy database (SQLite format)"
+    )
+    parser.add_argument(
+        "--gnm2tab_file",
+        required=True,
+        type=argparse.FileType(),
+        help="gnm2tab file from spal",
+    )
+    parser.add_argument("taxon")
+    args = parser.parse_args()
+
+    find_distances(args.gnm2tab_file, args.taxon, args.taxonomy_db)
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 list_spaln_tables.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/list_spaln_tables.xml Thu Jul 16 07:57:10 2020 -0400
[
@@ -0,0 +1,132 @@
+<tool id="list_spaln_tables" name="List spaln parameter tables" version="@TOOL_VERSION@+galaxy0">
+    <description>Given a query species, list the spaln settings tables that exist, from closest related species to most different</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="3.8">python</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">spaln</requirement>
+        <requirement type="package" version="1.0.7">rust-ncbitaxonomy</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        #if $gnm2tab.gnm2tab_source == "spaln_tool"
+            SPALN_LOCATION=\$(dirname \$(dirname \$(which spaln))) &&
+            GNM2TAB_PATH="\$SPALN_LOCATION/share/spaln/table/gnm2tab" &&
+            if [[ -f "\$GNM2TAB_PATH" ]] ; then
+                ln -s "\$GNM2TAB_PATH" gnm2tab ;
+            else
+                echo "Cannot find builtin gnm2tab file \$SPALN_LOCATION : \$GNM2TAB_PATH" >&2 ; exit 1 ;
+            fi &&
+        #else
+            ln -s '${gnm2tab.gnm2tab_file}' gnm2tab &&
+        #end if
+        #if $taxonomy.taxonomy_source == "cached"
+            ln -s '${taxonomy.taxonomy_sqlite_table.fields.path}/tax.ncbitaxonomy.sqlite' tax.ncbitaxonomy.sqlite &&
+        #else
+            ln -s '${taxonomy.taxonomy_sqlite_file}' tax.ncbitaxonomy.sqlite &&
+        #end if
+     python '${__tool_directory__}/list_spaln_tables.py' --taxonomy_db tax.ncbitaxonomy.sqlite --gnm2tab_file gnm2tab '$taxon' | sort -k1n > '${output}'
+    ]]></command>
+    <inputs>
+        <param name="taxon" label="Scientific name" type="text" />
+        <conditional name="gnm2tab">
+            <param name="gnm2tab_source" type="select" label="Choose source of gnm2tab file">
+                <option value="spaln_tool" selected="true">From the spaln tool installation</option>
+                <option value="history">From dataset in history</option>
+            </param>
+            <when value="spaln_tool" />
+            <when value="history">
+                <param name="gnm2tab_file" type="data" format="tabular" label="gnm2tab file from spaln" help="The gnm2tab file should be from the table directory of spaln v. @TOOL_VERSION@" />
+            </when>
+        </conditional>
+        <conditional name="taxonomy">
+            <param type="select" name="taxonomy_source">
+                <option value="cached" selected="true">Use built-in NCBI Taxonomy SQLite database</option>
+                <option value="history">Use NCBI Taxonomy SQLite database from history</option>
+            </param>
+            <when value="cached">
+                <param type="select" name="taxonomy_sqlite_table"  label="NCBI Taxonomy SQLite database">
+                    <options from_data_table="ncbi_taxonomy_sqlite">
+                        <filter type="sort_by" column="1" />
+                        <validator type="no_options" message="No NCBI Taxonomy SQLite database is available" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="taxonomy_sqlite_file" type="data" format="sqlite" label="NCBI Taxonomy SQLite database" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular">
+            <actions>
+                <action name="column_names" type="metadata" default="dist_all,dist_canonical,common_ancestor,species_code,settings_group,scientific_name" />
+            </actions>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="taxon" value="Lates calcarifer" />
+            <conditional name="gnm2tab">
+                <param name="gnm2tab_source" value="spaln_tool" />
+            </conditional>
+            <conditional name="taxonomy">
+                <param name="taxonomy_source" value="history" />
+                <param name="taxonomy_sqlite_file" ftype="sqlite" value="sqlite_taxdb/tax.ncbitaxonomy.sqlite" />
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_line_matching expression="5\t4\tActinopteri\tcynosemi\tFish_mdl\tCynoglossus semilaevis" />
+                    <has_line_matching expression="30\t7\tEukaryota\tzea_mays\tMagnolio\tZea mays" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="taxon" value="Lates calcarifer" />
+            <conditional name="gnm2tab">
+                <param name="gnm2tab_source" value="spaln_tool" />
+            </conditional>
+            <conditional name="taxonomy">
+                <param name="taxonomy_source" value="cached" />
+                <param name="taxonomy_sqlite_table" value="sample" />
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_line_matching expression="5\t4\tActinopteri\tcynosemi\tFish_mdl\tCynoglossus semilaevis" />
+                    <has_line_matching expression="30\t7\tEukaryota\tzea_mays\tMagnolio\tZea mays" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="taxon" value="Lates calcarifer" />
+            <conditional name="gnm2tab">
+                <param name="gnm2tab_source" value="history" />
+                <param name="gnm2tab_file" ftype="tabular" value="gnm2tab" />
+            </conditional>
+            <conditional name="taxonomy">
+                <param name="taxonomy_source" value="cached" />
+                <param name="taxonomy_sqlite_table" value="sample" />
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_line_matching expression="5\t4\tActinopteri\tcynosemi\tFish_mdl\tCynoglossus semilaevis" />
+                    <has_line_matching expression="30\t7\tEukaryota\tzea_mays\tMagnolio\tZea mays" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Spaln has a number of pre-computed settings files to tune its predictions for different species. These
+        are listed in a file named gnm2tab_ in the package. This tool uses the NCBI Taxonomy database to search
+        that able for a suitable (i.e. taxonomically close) set of settings for optimising spaln's alignment
+        predictions. 
+        
+        Input is the scientific name of a species (as reflected in the NCBI Taxonomy DB), output is
+        information from the gnm2tab file sorted by taxonomic distance from the query species.
+
+        .. _gnm2tab: https://github.com/ogotoh/spaln/blob/master/table/gnm2tab
+    ]]></help>
+    <citations>
+    </citations>
+</tool>
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jul 16 07:57:10 2020 -0400
b
@@ -0,0 +1,3 @@
+<macros>
+    <token name="@TOOL_VERSION@">2.4.03</token>
+</macros>
\ No newline at end of file
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 spaln.xml
--- a/spaln.xml Fri Jan 11 18:15:21 2019 -0500
+++ b/spaln.xml Thu Jul 16 07:57:10 2020 -0400
[
b'@@ -1,7 +1,7 @@\n <tool id="spaln" name="Spaln: align cDNA or Protein to genome" version="@TOOL_VERSION@+galaxy0">\n     <description>Maps and aligns a set of cDNA or protein sequences onto a whole genomic sequence.</description>\n     <macros>\n-        <token name="@TOOL_VERSION@">2.3.2</token>\n+        <import>macros.xml</import>\n     </macros>\n     <edam_topics>\n         <edam_topic>topic_3512</edam_topic>\n@@ -10,7 +10,36 @@\n         <requirement type="package" version="@TOOL_VERSION@">spaln</requirement>\n     </requirements>\n     <command detect_errors="aggressive"><![CDATA[\n-\t    spaln -t\\${GALAXY_SLOTS:-1} -O$format -o \'$output1\' \'$genome\' \'$query\'\n+\t    spaln -t\\${GALAXY_SLOTS:-1} -O$format\n+        #if str($species_params).strip() != \'\'\n+            -T\'${species_params}\'\n+        #end if\n+        #if $adv.use == "yes"\n+            -S\'${adv.query_orientation}\'\n+            -V\'${adv.hirschberg_threshold}\'\n+            -pa\'${adv.polya_trim}\'\n+            ${adv.all_results}\n+            -yu\'${adv.gap_extension_penalty}\'\n+            -yv\'${adv.gap_open_penalty}\'\n+            -yw\'${adv.dp_matrix_scan_width}\'\n+            -ya\'${adv.splice_stringency}\'\n+            -yj\'${adv.gap_penalty_incline}\'\n+            -yk\'${adv.gap_penalty_flex}\'\n+            \'${adv.double_affine_gap}\'\n+            -ym\'${adv.match_score}\'\n+            -yn\'${adv.mismatch_score}\'\n+            -yo\'${adv.stop_codon_penalty}\'\n+            -yx\'${adv.frameshift_penalty}\'\n+            -yy\'${adv.splice_site_weight}\n+            -yz\'${adv.coding_potential_weight}\'\n+            -yB\'${adv.branch_point_weight}\n+            -yL\'${adv.min_intron_len}\'\n+            -yZ\'${adv.intron_potential_weight}\'\n+            #if str($adv.max_gene_length).strip() != \'\'\n+                -XG\'${adv.max_gene_length}\'\n+            #end if\n+        #end if\n+        \'$genome\' \'$query\' >\'$output1\' \n     ]]></command>\n     <inputs>\n         <param type="data" name="genome" format="fasta" label="Genome sequence to search (FASTA format)" />\n@@ -20,7 +49,49 @@\n \t    <option value="2">GFF3 format matches</option>\n \t    <option value="3">BED format</option>\n \t    <option value="4">Tabular format exon information</option>\n+    </param>\n+    <param argument="-T" name="species_params" type="text" optional="true" label="Species to use for parameter setting" help="Choose a species table (e.g. cynosemi) from which to read parameters to optimise spaln" />\n+    <conditional name="adv">\n+        <param type="select" name="use" label="Advanced settings">\n+            <option selected="true" value="no">No</option>\n+            <option value="yes">Yes</option>\n         </param>\n+        <when value="no">\n+        </when>\n+        <when value="yes">\n+            <param argument="-S" name="query_orientation" type="select" label="DNA query orientation" help="Determines how to treat orientation of query sequence when searching">\n+                <option value="0">Infer orientation from sequence header (no poly-A/poly-T trimming)</option>\n+                <option value="1">Forward orientation only. Poly-A tail might be trimmed off</option>\n+                <option value="2">Reverse orientation only. Leading poly-T might be trimmed off</option>\n+                <option selected="true" value="3">Examine both orientations. Poly-A / Poly-T might be trimmed off</option>\n+            </param>\n+            <param argument="-V" name="hirschberg_threshold" type="integer" value="16777216" label="Minimum space to induce Hirschberg\'s algorithm" help="Default is 16M (16x1024x1024 bytes)" />\n+            <param argument="-pa" name="polya_trim" type="integer" value="12" label="Limit 3\' poly-As to this number of bases" help="poly-A/poly-T trimming is only done if -S (orientation) option is 0 or 3" />\n+            <param argument="-pw" name="all_results" type="boolean" checked="false" truevalue="-pw" falsevalue="" label="Report results even if the score is below the threshold" />\n+            <param argument="-yu" name="gap_exten'..b'ght" type="integer" value="0" label="Weight for branch point signal" />\n+            <param argument="-yL" name="min_intron_len" type="integer" value="30" label="Minimum expected length of intron" />\n+            <param argument="-yZ" name="intron_potential_weight" type="integer" value="0" label="Weight for intron potential" />\n+            <param argument="-XG" name="max_gene_length" type="text" label="Reset maximum expected gene size, suffix k or M is effective" />\n+        </when>\n+    </conditional>\n     </inputs>\n     <outputs>\n         <data name="output1" format="tabular">\n@@ -49,32 +120,61 @@\n             <param name="genome" ftype="fasta" value="genome.fasta" />\n             <param name="query" ftype="fasta" value="query.fasta" />\n             <param name="format" value="0"/>\n-\t        <output name="output1" value="output1_gff_genes.gff3" />\n+            <conditional name="adv">\n+                <param name="use" value="no" />\n+            </conditional>\n+\t        <output name="output1" ftype="gff3" value="output1_gff_genes.gff3" />\n         </test>\n         <test>\n             <param name="genome" ftype="fasta" value="genome.fasta" />\n             <param name="query" ftype="fasta" value="query.fasta" />\n             <param name="format" value="2"/>\n-\t        <output name="output1" value="output1_gff_matches.gff3" />\n+            <conditional name="adv">\n+                <param name="use" value="no" />\n+            </conditional>\n+\t        <output name="output1" ftype="gff3" value="output1_gff_matches.gff3" />\n         </test>\n         <test>\n             <param name="genome" ftype="fasta" value="genome.fasta" />\n             <param name="query" ftype="fasta" value="query.fasta" />\n             <param name="format" value="3"/>\n-\t        <output name="output1" value="output1.bed12" />\n+            <conditional name="adv">\n+                <param name="use" value="no" />\n+            </conditional>\n+\t        <output name="output1" ftype="bed12" value="output1.bed12" />\n         </test>\n         <test>\n             <param name="genome" ftype="fasta" value="genome.fasta" />\n             <param name="query" ftype="fasta" value="query.fasta" />\n             <param name="format" value="4"/>\n-\t        <output name="output1" value="output1.tabular" />\n+            <conditional name="adv">\n+                <param name="use" value="no" />\n+            </conditional>\n+\t        <output name="output1" ftype="tabular" value="output1.tabular" />\n+        </test>\n+        <test>\n+            <param name="genome" ftype="fasta" value="genome.fasta" />\n+            <param name="query" ftype="fasta" value="query.fasta" />\n+            <param name="format" value="4"/>\n+            <param name="species_params" value="cynosemi" />\n+            <conditional name="adv">\n+                <param name="use" value="no" />\n+            </conditional>\n+\t        <output name="output1" ftype="tabular" value="output2.tabular" />\n         </test>\n     </tests>\n     <help><![CDATA[\n         Spaln_ (space-efficient spliced alignment) is a stand-alone program that maps and aligns a set of cDNA or\n         protein sequences onto a whole genomic sequence in a single job. \n \n-        This Galaxy wrapper currently only supports the default (i.e. *-O3*) algorithm for Spaln with default parameters.\n+        This Galaxy wrapper currently only supports the default (i.e. *-O3*) algorithm for Spaln. This algorithm\n+        takes FASTA format query and genome sequence and finds an alignment of the query (either cDNA or protein)\n+        against the genome.\n+\n+        Spaln optionally takes a species name to use for parameter setting (the "-T" parameter). The \n+        "List spaln parameter tables" (list_spaln_tables) can be used to find a parameter file that is\n+        close (in terms of taxonomic distance) to your species of interest. Use of this setting is recommended.\n+\n \n         .. _Spaln: http://www.genome.ist.i.kyoto-u.ac.jp/~aln_user/spaln/\n     ]]></help>\n'
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/gnm2tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gnm2tab Thu Jul 16 07:57:10 2020 -0400
b
b'@@ -0,0 +1,688 @@\n+aaosarxi\tDothide2\tAaosphaeria arxii       \tF\n+acidrich\tDothide1\tAcidomyces richmondensis       \tF\n+acrealca\tSordari1\tAcremonium alcalophilum        \tF\n+acyrpisu\tInsectDm\tAcyrthosiphon pisum       \tA_I\n+aedeaegy\tInsectDm\tAedes aegypti             \tA_I\n+agarbisp\tAgarico1\tAgaricus bisporus            \tF\n+agarhyph\tAgarico2\tAgaricostilbum hyphaenes   \tF\n+ailumela\tTetrapod\tAiluropoda melanoleuca         \tA_M\n+albulaib\talbulaib\tAlbugo laibachii           \tO\n+allomacr\tBlastocl\tAllomyces macrogynus          \tF\n+altebras\tDothide2\tAlternaria brassicicola        \tF\n+amanmusc\tAgaricom\tAmanita muscaria            \tF\n+amanthie\tAgarico2\tAmanita thiersii            \tF\n+ambotric\tambotric\tAmborella trichopoda          \tP\n+amnilign\tDothide2\tAmniculicola lignicola     \tF\n+amorresi\tLeotiom1\tAmorphotheca resinae    \tF\n+amphquee\tamphquee\tAmphimedon queenslandica       \tA_O\n+anasplat\tTetrapod\tAnas platyrhynchos       \tA_B\n+anolcaro\tTetrapod\tAnolis carolinensis        \tA_R\n+anopdarl\tInsectAg\tAnopheles darlingi            \tA_I\n+anopgamb\tInsectAg\tAnopheles gambiae             \tA_I\n+anthavoc\tSordari1\tAnthostoma avocetta            \tF\n+antolocu\tFungiAll\tAntonospora locustae     \tF\n+antrsinu\tAgarico2\tAntrodia sinuosa             \tF\n+apiomont\tSordari1\tApiospora montagnei           \tF\n+apisflor\tInsectAg\tApis florea              \tA_I\n+apismell\tInsectAg\tApis mellifera          \tA_I                 \t\n+aplakerg\tLabyrint\tAplanochytrium kerguelense   \tO\n+aploprun\tDothide2\tAplosporella prunicola  \tF\n+aplycali\tChordatU\tAplysia californica         \tA_mo\n+aquicoer\tEudicoty\tAquilegia coerulea            \tP\n+arablyra\tEudicoty\tArabidopsis lyrata      \tP\n+arabthal\tEudicoty\tArabidopsis thaliana    \tP\n+armimell\tAgaricom\tArmillaria mellea              \tF\n+arthbenh\tEurotio1\tArthroderma benhamiae   \tF\n+artholig\tSordari1\tArthrobotrys oligospora  \tF\n+ascoimme\tPezizomy\tAscobolus immersus            \tF\n+ascorube\tSaccharo\tAscoidea rubescens           \tF\n+ascosarc\tLeotiom1\tAscocoryne sarcoides           \tF\n+ashbgoss\tSaccharo\tAshbya gossypii            \tF\n+aspeacid\tEurotio2\tAspergillus acidus      \tF\n+aspeacul\tEurotio2\tAspergillus aculeatus   \tF\n+aspebras\tEurotio2\tAspergillus brasiliensis\tF\n+aspecarb\tEurotio2\tAspergillus carbonarius \tF\n+aspeclav\tEurotio2\tAspergillus clavatus    \tF\n+aspeflav\tEurotio2\tAspergillus flavus      \tF\n+aspefumi\tEurotio2\tAspergillus fumigatus   \tF\n+aspeglau\tEurotio2\tAspergillus glaucus     \tF\n+aspekawa\tEurotio2\tAspergillus kawachii    \tF\n+aspenidu\tEurotio2\tAspergillus nidulans    \tF\n+aspenige\tEurotio2\tAspergillus niger\t       \tF\n+aspeoryz\tEurotio2\tAspergillus oryzae\t      \tF\n+aspesydo\tEurotio2\tAspergillus sydowii\t     \tF\n+aspeterr\tEurotio2\tAspergillus terreus\t     \tF\n+aspetubi\tEurotio2\tAspergillus tubingensis\t \tF\n+aspevers\tEurotio2\tAspergillus versicolor\t  \tF\n+aspewent\tEurotio2\tAspergillus wentii\t      \tF\n+aspezona\tEurotio2\tAspergillus zonatus\t     \tF\n+astespec\tastespec\tAsterochloris spec_Cgr/DA1pho  \tP\n+astymexi\tFish_lng\tAstyanax mexicanus\t   \tA_F\n+atraspec\tatraspec\tAtractiellales spec\t\tF\n+attaceph\tInsectAc\tAtta cephalotes          \tA_I\n+aulohede\tDothide2\tAulographum hederae            \tF\n+auralima\tLabyrint\tAurantiochytrium limacinum   \tO\n+aureanop\tPelagoph\tAureococcus anophagefferens    \tO\n+aurepull\tDothide2\tAureobasidium pulm_lulans \tF\n+aurepulm\tDothide2\tAureobasidium pulm_lulans \tF\n+aurepuln\tDothide2\tAureobasidium puln_lulans \tF\n+aurepulp\tDothide2\tAureobasidium pulp_lulans\tF\n+aurepuls\tDothide2\tAureobasidium puls_lulans  \tF\n+aurideli\tAgarico1\tAuricularia delicata\t    \tF\n+aurisubg\tAgarico1\tAuricularia subglabra\t   \tF\n+babjinos\tSaccharo\tBabjeviella inositovor\t  \tF\n+backcirc\tMucorale\tBackusella circina\t     \tF\n+batrdend\tPuccinia\tBatrachochytrium dendrobatidis\tF\n+baudcomp\tDothide2\tBaudoinia compniacensis       \tF\n+beaubass\tSordario\tBeauveria bassiana\t    \tF\n+bigenata\tPelagoph\tBigelowiella natans\t      \tO\n+bjeradus\tAgarico1\tBjerkandera adusta\t      \tF\n+blasderm\tEurotio3\tBlastomyces dermatitidis\tF\n+blumgram\tLeotiomy\tBlumeria graminis           '..b'\tTaphrina deformans           \tF\n+tarssyri\tTetrapod\tTarsius syrichta            \tA_M\n+terfboud\tPezizomy\tTerfezia boudieri            \tF\n+tetrnigr\tFish_sht\tTetraodon nigroviridis        \tA_F\n+tetrphaf\tSaccharo\tTetrapisispora phaffii     \tF\n+tetrther\ttetrther\tTetrahymena thermophila \tO\n+tetrurti\tInsectAg\tTetranychus urticae     \tA_I\n+thalpseu\tthalpseu\tThalassiosira pseudonana  \tO\n+thectrah\tFungiAll\tThecamonas trahens             \tO\n+theiannu\tTheileri\tTheileria annulata            \tA_O\n+theiparv\tTheileri\tTheileria parva               \tA_O\n+thelhalo\tEudicoty\tThellungiella halophila   \tP\n+thelparv\tEudicoty\tThellungiella parvula     \tP\n+theocaca\tEudicoty\tTheobroma cacao               \tP\n+theraura\tEurotio2\tThermoascus aurantiacus \tF\n+thieanta\tFungiAll\tThielavia antarctica          \tF\n+thieappe\tSordari1\tThielavia appendiculata       \tF\n+thiearen\tSordari1\tThielavia arenaria            \tF\n+thiehyrc\tSordari1\tThielavia hyrcaniae           \tF\n+thieterr\tSordari1\tThielavia terrestris          \tF\n+tillanom\ttillanom\tTilletiaria anomala     \tF\n+torudelb\tSaccharo\tTorulaspora delbrueckii \tF\n+toxogond\tConoidas\tToxoplasma gondii              \tO\n+tramvers\tAgarico2\tTrametes versicolor          \tF\n+tremmese\tTremell2\tTremella mesenterica         \tF\n+trempert\ttrempert\tTrematosphaeria pertusa     \tF\n+tremspec\tTremell2\tTremella species             \tF\n+tribcast\ttribcast\tTribolium castaneum           \tA_I\n+tricabie\tAgarico2\tTrichaptum abietinum           \tF\n+tricadha\ttricadha\tTrichoplax adhaerens           \tA_O\n+tricaspe\tSordari1\tTrichoderma asperellum  \tF\n+tricatro\tSordari1\tTrichoderma atroviride  \tF\n+triccitr\tSordario\tTrichoderma citrinoviride\tF\n+tricequi\tEurotio2\tTrichophyton equinum     \tF\n+tricharz\tSordari1\tTrichoderma harzianum   \tF\n+triclong\tSordari1\tTrichoderma longibrachiatum\tF\n+tricmana\tTetrapod\tTrichechus manatus      \tA_M\n+tricmats\tAgarico1\tTricholoma matsutake           \tF\n+tricolea\tTremell2\tTrichosporon oleaginosus \tF\n+tricrees\tSordari1\tTrichoderma reesei      \tF\n+tricrubr\tEurotio1\tTrichophyton rubrum      \tF\n+tricspir\ttribcast\tTrichinella spiralis    \tA_O\n+trictons\tEurotio1\tTrichophyton tonsurans   \tF\n+tricverr\tEurotio1\tTrichophyton verrucosum  \tF\n+tricvire\tSordari1\tTrichoderma virens      \tF\n+tritaesa\tMagnolio\tTriticum aestivum            \tP\n+tritaesb\tMagnolio\tTriticum aestivum            \tP\n+tritaesd\tMagnolio\tTriticum aestivum            \tP\n+tritspec\ttritspec\tTritirachium species     \tF\n+trypbruc\tArchamoe\tTrypanosoma brucei      \tO\n+trypelut\tDothide2\tTrypethelium eluteriae   \tF\n+tubemela\tPezizomy\tTuber melanosporum        \tF\n+tulacalo\tAgarico4\tTulasnella calospora           \tF\n+tupabela\tTetrapod\tTupaia belangeri           \tA_M\n+turstrun\tTetrapod\tTursiops truncatus           \tA_M\n+umberama\tMucorale\tUmbelopsis ramanniana          \tF\n+uncirees\tEurotio1\tUncinocarpus reesii      \tF\n+ustimayd\tEurotio3\tUstilago maydis              \tF\n+vertalbo\tSordario\tVerticillium alboatrum   \tF\n+vertalfa\tSordario\tVerticillium alfalfae    \tF\n+vertdahl\tSordario\tVerticillium dahliae     \tF\n+vicupaco\tTetrapod\tVicugna pacos               \tA_\n+vitivini\tEudicoty\tVitis vinifera            \tP\n+volvcart\tChlospec\tVolvox carteri             \tP\n+volvvolv\tAgaricom\tVolvariella volvacea    \tF\n+wallicht\tDothide4\tWallemia ichthyophaga        \tF\n+wallsebi\twallsebi\tWallemia sebi           \tF\n+wickanom\tSaccharo\tWickerhamomyces anomalus    \tF\n+wilcmiko\tPezizomy\tWilcoxina mikolae             \tF\n+wolfcoco\tAgarico2\tWolfiporia cocos               \tF\n+xantpari\tAgarico2\tXanthoria parietina           \tF\n+xenotrop\tTetrapod\tXenopus tropicalis          \tA_A\n+xiphmacu\tFish_mdl\tXiphophorus maculatus   \tA_F\n+xyloheve\txyloheve\tXylona heveae              \tF\n+yarrlipo\tSaccharo\tYarrowia lipolytica          \tF\n+zasmcell\tDothide2\tZasmidium cellare             \tF\n+zea_mays\tMagnolio\tZea mays                \tP\n+zonoalbi\tTetrapod\tZonotrichia albicollis  \tA_B\n+zopfrhiz\tDothide2\tZopfia rhizophila          \tF\n+zygoroux\tSaccharo\tZygosaccharomyces rouxii       \tF\n+zymotrit\tDothide2\tZymoseptoria tritici     \tF\n'
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/ncbi_taxonomy_sqlite.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ncbi_taxonomy_sqlite.loc Thu Jul 16 07:57:10 2020 -0400
b
@@ -0,0 +1,2 @@
+#value description path
+sample sample database ${__HERE__}/sqlite_taxdb
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output1.bed12
--- a/test-data/output1.bed12 Fri Jan 11 18:15:21 2019 -0500
+++ b/test-data/output1.bed12 Thu Jul 16 07:57:10 2020 -0400
b
@@ -1,2 +1,2 @@
 track name=Spaln description="NP_001121846.1" useScore=1
-scaffold_1 233 29800 NP_001121846.1 1000 + 233 29800 255,0,0 43 31,30,125,9,9,106,4,9,5,37,8,18,12,18,9,4,30,18,8,3,2,3,15,6,10,7,5,7,14,2,11,20,12,12,2,19,23,26,5,7,9,23,5, 0,71,184,1922,2587,6911,7425,7504,7626,8016,8114,8185,8347,8632,8746,9149,9725,15480,15549,15615,16511,16714,16848,17115,17191,18118,18504,19271,19365,19443,20377,22213,22440,25892,25951,26121,26227,26308,26406,28178,28715,28868,29562
+scaffold_1 16 10418 NP_001121846.1 530 + 16 10418 255,0,0 16 46,33,122,127,33,4,127,33,6,18,15,11,13,24,24,24, 0,111,401,7107,7546,7642,7721,7908,8278,8849,8963,9050,9959,10028,10212,10378
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output1.tabular
--- a/test-data/output1.tabular Fri Jan 11 18:15:21 2019 -0500
+++ b/test-data/output1.tabular Thu Jul 16 07:57:10 2020 -0400
b
b'@@ -1,45 +1,18 @@\n # rID\t  gID\t   %id\t  ExonL\t MisMch\t Unpair\t ref_l\t  ref_r\t  tgt_l\t  tgt_r\t eScore\t IntrnL\t iScore\t Sig3/I\t Sig5/T  # -  X P DiNuc\n-NP_001121846.1\tscaffold_1\t  30.00\t     31\t      7\t      0\t      1\t     10\t    234\t    264\t   71.9\t      0\t    0.0\t  67.90\t  -1.80  0 0  0 0   .  \n-NP_001121846.1\tscaffold_1\t  16.67\t     30\t      6\t      9\t     11\t     27\t    305\t    334\t   -9.0\t     40\t   -8.2\t   9.90\t  -2.20  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t  64.29\t    125\t     15\t      0\t     28\t     69\t    418\t    542\t  226.4\t     83\t    8.4\t  22.20\t  16.00  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t  66.67\t      9\t      1\t      0\t     70\t     72\t   2156\t   2164\t   37.9\t   1613\t   -6.6\t   4.40\t  21.90  0 0  0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t  66.67\t      9\t      1\t      0\t     73\t     75\t   2821\t   2829\t   32.3\t    656\t    7.5\t   7.20\t  12.10  0 0  0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t  62.86\t    106\t     13\t      0\t     76\t    110\t   7145\t   7250\t  176.6\t   4315\t    4.0\t  25.10\t  25.30  0 0  0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      4\t      2\t      0\t    111\t    112\t   7659\t   7662\t   22.9\t    408\t   11.8\t   5.70\t  17.50  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      9\t      2\t      1\t    113\t    114\t   7738\t   7746\t    4.9\t     75\t   20.0\t  13.50\t   2.90  0 0  0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      5\t      1\t      0\t    115\t    115\t   7860\t   7864\t   24.0\t    113\t   -3.9\t   7.30\t  16.20  0 0  0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t  30.00\t     37\t      7\t      0\t    116\t    125\t   8250\t   8286\t   15.8\t    385\t    9.7\t  12.50\t  11.50  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 100.00\t      8\t      0\t      0\t    126\t    127\t   8348\t   8355\t   28.3\t     61\t   18.0\t  16.20\t  -0.80  0 0  0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t  33.33\t     18\t      4\t      0\t    128\t    133\t   8419\t   8436\t   26.2\t     63\t    1.5\t  12.10\t   6.70  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t     12\t      4\t      0\t    134\t    137\t   8581\t   8592\t   29.2\t    144\t    9.9\t  18.20\t   9.20  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t     18\t      6\t      0\t    138\t    143\t   8866\t   8883\t   29.1\t    273\t    5.7\t  14.00\t  24.90  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      9\t      3\t      0\t    144\t    146\t   8980\t   8988\t   28.2\t     96\t   33.9\t  21.70\t  10.20  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t  50.00\t      4\t      1\t      0\t    147\t    148\t   9383\t   9386\t   33.7\t    394\t   -5.5\t   3.40\t  14.10  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t  10.00\t     30\t      5\t      4\t    149\t    154\t   9959\t   9988\t   21.1\t    572\t    3.1\t  10.30\t  20.60  0 0  0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t  16.67\t     18\t      5\t      0\t    155\t    160\t  15714\t  15731\t   43.6\t   5725\t    3.9\t  18.20\t  22.40  0 0  0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t  50.00\t      8\t      1\t      0\t    161\t    162\t  15783\t  15790\t   19.0\t     51\t    8.8\t  -3.20\t  17.10  0 0  0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      3\t      1\t      0\t    163\t    163\t  15849\t  15851\t   17.0\t     58\t   20.8\t  13.40\t   3.40  0 0  0 1 GC.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      2\t      1\t      0\t    164\t    164\t  16745\t  16746\t   20.9\t    893\t   -5.3\t  14.60\t   8.20  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      3\t      1\t      0\t    165\t    165\t  16948\t  16950\t   27.2\t    201\t   14.2\t  22.30\t   9.10  0 0  0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t 100.00\t     15\t      0\t      0\t    166\t    166\t  17082\t  17096\t   13.3\t    131\t   17.9\t  23.40\t   2.50  0 0  0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      6\t      1\t      1\t    167\t    167\t  17349\t  17354\t   28.3\t    252\t   -9.3\t   5.30\t  22.50  0 0  0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t  33.33\t     10\t      2\t      0\t    168\t    170\t  17425\t  17434\t   15.0\t     70\t   22.3\t  10.10\t   2.30  0 0  0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t  33.33\t      7\t      2\t      0\t    171\t    173\t  18352\t  18358\t   24.7\t    917\t  -18.2\t   2.90\t  16.40  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t '..b'197\t  26126\t  26137\t   31.8\t   3440\t   -1.3\t  22.70\t  14.00  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      2\t      1\t      0\t    198\t    198\t  26185\t  26186\t   22.5\t     47\t   14.8\t  12.40\t   8.30  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t  33.33\t     19\t      4\t      0\t    199\t    204\t  26355\t  26373\t   33.1\t    168\t    7.8\t  15.10\t   7.10  0 0  0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t     23\t      3\t      0\t    205\t    207\t  26461\t  26483\t    8.0\t     87\t   11.9\t  16.80\t   8.10  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t  33.33\t     26\t      2\t      4\t    208\t    212\t  26542\t  26567\t   22.6\t     58\t   18.9\t  20.50\t  -0.50  0 0  0 0 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      5\t      1\t      0\t    213\t    213\t  26640\t  26644\t   24.6\t     72\t    3.9\t  14.90\t  11.30  0 0  0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t  33.33\t      7\t      2\t      0\t    214\t    216\t  28412\t  28418\t   28.0\t   1767\t   -4.5\t  11.80\t  12.30  0 0  0 1 GT.AG\n-NP_001121846.1\tscaffold_1\t 100.00\t      9\t      0\t      0\t    217\t    217\t  28949\t  28957\t   26.9\t    530\t   17.1\t  25.30\t  10.30  0 0  0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t  28.57\t     23\t      2\t      3\t    218\t    221\t  29102\t  29124\t   36.1\t    144\t   16.5\t  21.20\t  10.60  0 0  0 2 GT.AG\n-NP_001121846.1\tscaffold_1\t   0.00\t      5\t      1\t      1\t    222\t    222\t  29796\t  29800\t   24.4\t    671\t    8.0\t  19.10\t  10.60  0 0  0 1 GT.AG\n-@ scaffold_1 + ( 234 29800 ) NP_001121846.1 222 ( 1 222 ) S: 1357.3 =: 38.1 C: 110.8 T#: 122 T-: 26 B#: 0 B-: 0 X: 0 Pam: 0\n+NP_001121846.1\tscaffold_1\t  21.43\t     46\t     11\t      0\t      1\t     14\t     17\t     62\t   79.7\t      0\t    0.0\t  66.40\t   0.90  0 0  0 0   .  \n+NP_001121846.1\tscaffold_1\t  12.50\t     33\t      8\t      3\t     15\t     27\t    128\t    160\t   -4.3\t     65\t  -23.9\t  -5.20\t  10.70  0 0  0 1 GT.AG\n+NP_001121846.1\tscaffold_1\t  68.29\t    122\t     13\t      0\t     28\t     68\t    418\t    539\t  210.5\t    257\t   -0.1\t  16.10\t   6.60  0 0  0 1 GT.AG\n+NP_001121846.1\tscaffold_1\t  64.29\t    127\t     15\t      0\t     69\t    110\t   7124\t   7250\t  187.0\t   6584\t  -24.3\t  14.60\t  17.80  0 0  0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t   9.09\t     33\t     10\t      0\t    111\t    121\t   7563\t   7595\t   11.0\t    312\t   -4.2\t   5.70\t   5.20  0 0  0 1 GT.AG\n+NP_001121846.1\tscaffold_1\t   0.00\t      4\t      2\t      0\t    122\t    123\t   7659\t   7662\t   21.2\t     63\t   -6.4\t   7.90\t  13.90  0 0  0 1 GT.AG\n+NP_001121846.1\tscaffold_1\t  20.45\t    127\t     31\t      2\t    124\t    165\t   7738\t   7864\t   21.7\t     75\t    5.0\t  11.80\t  13.20  0 0  0 2 GT.AG\n+NP_001121846.1\tscaffold_1\t   9.09\t     33\t     10\t      0\t    166\t    176\t   7925\t   7957\t   26.1\t     60\t    1.0\t   7.20\t   9.70  0 0  0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t  50.00\t      6\t      1\t      0\t    177\t    178\t   8295\t   8300\t   29.4\t    337\t   -9.5\t   8.90\t  10.80  0 0  0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t  16.67\t     18\t      5\t      0\t    179\t    184\t   8866\t   8883\t   31.0\t    565\t   -7.7\t  12.00\t  17.60  0 0  0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t   0.00\t     15\t      5\t      0\t    185\t    189\t   8980\t   8994\t   30.3\t     96\t   11.1\t  15.90\t   9.10  0 0  0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t   0.00\t     11\t      4\t      0\t    190\t    193\t   9067\t   9077\t   27.0\t     72\t   -4.0\t   7.10\t   7.60  0 0  0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t  25.00\t     13\t      3\t      0\t    194\t    197\t   9976\t   9988\t   28.5\t    898\t  -22.1\t   6.80\t  15.40  0 0  0 2 GT.AG\n+NP_001121846.1\tscaffold_1\t  37.50\t     24\t      5\t      0\t    198\t    205\t  10045\t  10068\t   27.5\t     56\t    3.7\t   7.70\t   5.10  0 0  0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t  25.00\t     24\t      6\t      0\t    206\t    213\t  10229\t  10252\t   38.3\t    160\t   -6.9\t  13.10\t  12.60  0 0  0 0 GT.AG\n+NP_001121846.1\tscaffold_1\t  20.00\t     24\t      6\t      1\t    214\t    222\t  10395\t  10418\t   23.3\t    142\t    0.3\t  12.40\t  12.60  0 0  0 0 GT.AG\n+@ scaffold_1 + ( 17 10418 ) NP_001121846.1 222 ( 1 222 ) S: 627.0 =: 36.5 C: 97.3 T#: 135 T-: 6 B#: 0 B-: 0 X: 0 Nexn: 16\n'
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output1_gff_genes.gff3
--- a/test-data/output1_gff_genes.gff3 Fri Jan 11 18:15:21 2019 -0500
+++ b/test-data/output1_gff_genes.gff3 Thu Jul 16 07:57:10 2020 -0400
b
@@ -1,47 +1,20 @@
 ##gff-version 3
 ##sequence-region scaffold_1 1 59940
-scaffold_1 ALN gene 234 29800 1357 + . ID=gene00001;Name=scaffold_1_15
-scaffold_1 ALN mRNA 234 29800 1357 + . ID=mRNA00001;Parent=gene00001;Name=scaffold_1_15
-scaffold_1 ALN cds 234 264 71 + 0 ID=cds00001;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 1 10 +
-scaffold_1 ALN cds 305 334 -9 + 2 ID=cds00002;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 11 27 +
-scaffold_1 ALN cds 418 542 226 + 2 ID=cds00003;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 28 69 +
-scaffold_1 ALN cds 2156 2164 37 + 0 ID=cds00004;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 70 72 +
-scaffold_1 ALN cds 2821 2829 32 + 0 ID=cds00005;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 73 75 +
-scaffold_1 ALN cds 7145 7250 176 + 0 ID=cds00006;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 76 110 +
-scaffold_1 ALN cds 7659 7662 22 + 2 ID=cds00007;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 111 112 +
-scaffold_1 ALN cds 7738 7746 4 + 1 ID=cds00008;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 113 114 +
-scaffold_1 ALN cds 7860 7864 24 + 1 ID=cds00009;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 115 115 +
-scaffold_1 ALN cds 8250 8286 15 + 2 ID=cds00010;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 116 125 +
-scaffold_1 ALN cds 8348 8355 28 + 1 ID=cds00011;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 126 127 +
-scaffold_1 ALN cds 8419 8436 26 + 2 ID=cds00012;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 128 133 +
-scaffold_1 ALN cds 8581 8592 29 + 2 ID=cds00013;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 134 137 +
-scaffold_1 ALN cds 8866 8883 29 + 2 ID=cds00014;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 138 143 +
-scaffold_1 ALN cds 8980 8988 28 + 2 ID=cds00015;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 144 146 +
-scaffold_1 ALN cds 9383 9386 33 + 2 ID=cds00016;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 147 148 +
-scaffold_1 ALN cds 9959 9988 21 + 1 ID=cds00017;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 149 154 +
-scaffold_1 ALN cds 15714 15731 43 + 1 ID=cds00018;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 155 160 +
-scaffold_1 ALN cds 15783 15790 19 + 1 ID=cds00019;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 161 162 +
-scaffold_1 ALN cds 15849 15851 17 + 2 ID=cds00020;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 163 163 +
-scaffold_1 ALN cds 16745 16746 20 + 2 ID=cds00021;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 164 164 +
-scaffold_1 ALN cds 16948 16950 27 + 0 ID=cds00022;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 165 165 +
-scaffold_1 ALN cds 17082 17096 13 + 0 ID=cds00023;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 166 166 +
-scaffold_1 ALN cds 17349 17354 28 + 0 ID=cds00024;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 167 167 +
-scaffold_1 ALN cds 17425 17434 15 + 0 ID=cds00025;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 168 170 +
-scaffold_1 ALN cds 18352 18358 24 + 2 ID=cds00026;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 171 173 +
-scaffold_1 ALN cds 18738 18742 17 + 1 ID=cds00027;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 174 174 +
-scaffold_1 ALN cds 19505 19511 32 + 2 ID=cds00028;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 175 177 +
-scaffold_1 ALN cds 19599 19612 16 + 1 ID=cds00029;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 178 181 +
-scaffold_1 ALN cds 19677 19678 17 + 2 ID=cds00030;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 182 182 +
-scaffold_1 ALN cds 20611 20621 30 + 0 ID=cds00031;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 183 184 +
-scaffold_1 ALN cds 22447 22466 27 + 1 ID=cds00032;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 185 188 +
-scaffold_1 ALN cds 22674 22685 31 + 2 ID=cds00033;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 189 192 +
-scaffold_1 ALN cds 26126 26137 31 + 2 ID=cds00034;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 193 197 +
-scaffold_1 ALN cds 26185 26186 22 + 2 ID=cds00035;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 198 198 +
-scaffold_1 ALN cds 26355 26373 33 + 0 ID=cds00036;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 199 204 +
-scaffold_1 ALN cds 26461 26483 8 + 2 ID=cds00037;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 205 207 +
-scaffold_1 ALN cds 26542 26567 22 + 0 ID=cds00038;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 208 212 +
-scaffold_1 ALN cds 26640 26644 24 + 1 ID=cds00039;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 213 213 +
-scaffold_1 ALN cds 28412 28418 28 + 2 ID=cds00040;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 214 216 +
-scaffold_1 ALN cds 28949 28957 26 + 1 ID=cds00041;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 217 217 +
-scaffold_1 ALN cds 29102 29124 36 + 1 ID=cds00042;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 218 221 +
-scaffold_1 ALN cds 29796 29800 24 + 2 ID=cds00043;Parent=mRNA00001;Name=scaffold_1_15;Target=NP_001121846.1 222 222 +
+scaffold_1 ALN gene 17 10418 627 + . ID=gene00001;Name=scaffold_1_5
+scaffold_1 ALN mRNA 17 10418 627 + . ID=mRNA00001;Parent=gene00001;Name=scaffold_1_5
+scaffold_1 ALN cds 17 62 79 + 0 ID=cds00001;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 1 14 +
+scaffold_1 ALN cds 128 160 -4 + 2 ID=cds00002;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 15 27 +
+scaffold_1 ALN cds 418 539 210 + 2 ID=cds00003;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 28 68 +
+scaffold_1 ALN cds 7124 7250 187 + 0 ID=cds00004;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 69 110 +
+scaffold_1 ALN cds 7563 7595 11 + 2 ID=cds00005;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 111 121 +
+scaffold_1 ALN cds 7659 7662 21 + 2 ID=cds00006;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 122 123 +
+scaffold_1 ALN cds 7738 7864 21 + 1 ID=cds00007;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 124 165 +
+scaffold_1 ALN cds 7925 7957 26 + 0 ID=cds00008;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 166 176 +
+scaffold_1 ALN cds 8295 8300 29 + 0 ID=cds00009;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 177 178 +
+scaffold_1 ALN cds 8866 8883 31 + 0 ID=cds00010;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 179 184 +
+scaffold_1 ALN cds 8980 8994 30 + 0 ID=cds00011;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 185 189 +
+scaffold_1 ALN cds 9067 9077 27 + 0 ID=cds00012;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 190 193 +
+scaffold_1 ALN cds 9976 9988 28 + 1 ID=cds00013;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 194 197 +
+scaffold_1 ALN cds 10045 10068 27 + 0 ID=cds00014;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 198 205 +
+scaffold_1 ALN cds 10229 10252 38 + 0 ID=cds00015;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 206 213 +
+scaffold_1 ALN cds 10395 10418 23 + 0 ID=cds00016;Parent=mRNA00001;Name=scaffold_1_5;Target=NP_001121846.1 214 222 +
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output1_gff_matches.gff3
--- a/test-data/output1_gff_matches.gff3 Fri Jan 11 18:15:21 2019 -0500
+++ b/test-data/output1_gff_matches.gff3 Thu Jul 16 07:57:10 2020 -0400
b
b'@@ -1,45 +1,18 @@\n ##gff-version\t3\n ##sequence-region\tscaffold_1 1 59940\n-scaffold_1\tALN\tnucleotide_to_protein_match\t234\t264\t71\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 1 10 +;Gap=M10 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t305\t334\t-9\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 11 27 +;Gap=M2 D1 M5 I8 M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t418\t542\t226\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 28 69 +;Gap=M42 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t2156\t2164\t37\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 70 72 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t2821\t2829\t32\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 73 75 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t7145\t7250\t176\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 76 110 +;Gap=M35 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t7659\t7662\t22\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 111 112 +;Gap=M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t7738\t7746\t4\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 113 114 +;Gap=D1 M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t7860\t7864\t24\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 115 115 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8250\t8286\t15\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 116 125 +;Gap=M10 D3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8348\t8355\t28\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 126 127 +;Gap=M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8419\t8436\t26\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 128 133 +;Gap=M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8581\t8592\t29\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 134 137 +;Gap=M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8866\t8883\t29\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 138 143 +;Gap=M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t8980\t8988\t28\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 144 146 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t9383\t9386\t33\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 147 148 +;Gap=M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t9959\t9988\t21\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 149 154 +;Gap=D4 M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t15714\t15731\t43\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 155 160 +;Gap=M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t15783\t15790\t19\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 161 162 +;Gap=M2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t15849\t15851\t17\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 163 163 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t16745\t16746\t20\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 164 164 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t16948\t16950\t27\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 165 165 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t17082\t17096\t13\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 166 166 +;Gap=M1 D4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t17349\t17354\t28\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 167 167 +;Gap=D1 M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t17425\t17434\t15\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 168 170 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t18352\t18358\t24\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 171 173 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t18738\t18742\t17\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 174 174 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t19505\t19511\t32\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 175 177 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t19599\t19612\t16\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 178 181 +;Gap=M4 \n-s'..b'1_15;Target=NP_001121846.1 182 182 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t20611\t20621\t30\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 183 184 +;Gap=M2 D2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t22447\t22466\t27\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 185 188 +;Gap=D2 M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t22674\t22685\t31\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 189 192 +;Gap=M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26126\t26137\t31\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 193 197 +;Gap=I1 M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26185\t26186\t22\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 198 198 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26355\t26373\t33\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 199 204 +;Gap=M6 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26461\t26483\t8\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 205 207 +;Gap=M3 D5 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26542\t26567\t22\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 208 212 +;Gap=D4 M5 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t26640\t26644\t24\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 213 213 +;Gap=M1 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t28412\t28418\t28\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 214 216 +;Gap=M3 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t28949\t28957\t26\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 217 217 +;Gap=M1 D2 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t29102\t29124\t36\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 218 221 +;Gap=D3 M4 \n-scaffold_1\tALN\tnucleotide_to_protein_match\t29796\t29800\t24\t+\t.\tID=match00001;Name=scaffold_1_15;Target=NP_001121846.1 222 222 +;Gap=M1 D1 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t17\t62\t79\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 1 14 +;Gap=D1 M14 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t128\t160\t-4\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 15 27 +;Gap=M5 I3 M1 D1 M4 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t418\t539\t210\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 28 68 +;Gap=M41 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7124\t7250\t187\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 69 110 +;Gap=M42 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7563\t7595\t11\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 111 121 +;Gap=M11 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7659\t7662\t21\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 122 123 +;Gap=M2 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7738\t7864\t21\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 124 165 +;Gap=M14 D1 M7 I2 M19 D1 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t7925\t7957\t26\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 166 176 +;Gap=M11 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t8295\t8300\t29\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 177 178 +;Gap=M2 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t8866\t8883\t31\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 179 184 +;Gap=M6 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t8980\t8994\t30\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 185 189 +;Gap=M5 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t9067\t9077\t27\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 190 193 +;Gap=M4 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t9976\t9988\t28\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 194 197 +;Gap=M4 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t10045\t10068\t27\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 198 205 +;Gap=M8 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t10229\t10252\t38\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 206 213 +;Gap=M8 \n+scaffold_1\tALN\tnucleotide_to_protein_match\t10395\t10418\t23\t+\t.\tID=match00001;Name=scaffold_1_5;Target=NP_001121846.1 214 222 +;Gap=M5 I1 M3 \n'
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/output2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output2.tabular Thu Jul 16 07:57:10 2020 -0400
b
@@ -0,0 +1,7 @@
+# rID   gID    %id   ExonL  MisMch  Unpair  ref_l   ref_r   tgt_l   tgt_r  eScore  IntrnL  iScore  Sig3/I  Sig5/T  # -  X P DiNuc
+NP_001121846.1 scaffold_1   11.11      91      24       0       1      27      17     107    54.8       0     0.0   66.40    5.40  0 0  0 0   .  
+NP_001121846.1 scaffold_1   65.85     122      14       0      28      68     418     539   210.7     310   -20.3   17.40    8.60  0 0  0 1 GT.AG
+NP_001121846.1 scaffold_1   64.29     127      15       0      69     110    7124    7250   188.0    6584   -33.9   16.50   16.90  0 0  0 0 GT.AG
+NP_001121846.1 scaffold_1   28.57     147      30       0     111     152   44187   44333    28.7   36936   -44.6   10.00    9.60  0 0  0 1 GT.AG
+NP_001121846.1 scaffold_1   20.55     221      52       3     153     222   44595   44815    23.4     261   -18.9   14.00    9.60  0 0  0 1 GT.AG
+@ scaffold_1 + ( 17 44815 ) NP_001121846.1 222 ( 1 222 ) S: 349.5 =: 37.8 C: 98.6 T#: 135 T-: 3 B#: 0 B-: 0 X: 0 Nexn: 5
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 test-data/sqlite_taxdb/tax.ncbitaxonomy.sqlite
b
Binary file test-data/sqlite_taxdb/tax.ncbitaxonomy.sqlite has changed
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 tool-data/ncbi_taxonomy_sqlite.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ncbi_taxonomy_sqlite.loc.sample Thu Jul 16 07:57:10 2020 -0400
b
@@ -0,0 +1,2 @@
+#This file describes the storage location for the NCBI Taxonomy SQLite database file
+#<unique_name>      <description>  <file_path>
\ No newline at end of file
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Jul 16 07:57:10 2020 -0400
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of SQLite format NCBI Taxonomy database -->
+    <table name="ncbi_taxonomy_sqlite" comment_char="#">
+        <columns>value, description, path</columns>
+        <file path="tool-data/ncbi_taxonomy_sqlite.loc" />
+    </table>
+</tables>
b
diff -r 95ea8d97abb4 -r 37b5e1f0b544 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Thu Jul 16 07:57:10 2020 -0400
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of SQLite format NCBI Taxonomy database -->
+    <table name="ncbi_taxonomy_sqlite" comment_char="#">
+        <columns>value, description, path</columns>
+        <file path="${__HERE__}/test-data/ncbi_taxonomy_sqlite.loc" />
+    </table>
+</tables>